]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge tag 'mac80211-for-davem-2017-02-28' of git://git.kernel.org/pub/scm/linux/kerne...
authorDavid S. Miller <davem@davemloft.net>
Wed, 1 Mar 2017 23:08:34 +0000 (15:08 -0800)
committerDavid S. Miller <davem@davemloft.net>
Wed, 1 Mar 2017 23:08:34 +0000 (15:08 -0800)
Johannes Berg says:

====================
First round of fixes - details in the commits:
 * use a valid hrtimer clock ID in mac80211_hwsim
 * don't reorder frames prior to BA session
 * flush a delayed work at suspend so the state is all valid before
   suspend/resume
 * fix packet statistics in fast-RX, the RX packets
   counter increment was simply missing
 * don't try to re-transmit filtered frames in an aggregation session
 * shorten (for tracing) a debug message
 * typo fix in another debug message
 * fix nul-termination with HWSIM_ATTR_RADIO_NAME in hwsim
 * fix mgmt RX processing when station is looked up by driver/device
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
1717 files changed:
Documentation/00-INDEX
Documentation/ABI/testing/sysfs-bus-i2c-devices-bq32k [new file with mode: 0644]
Documentation/DocBook/libata.tmpl
Documentation/IPMI.txt
Documentation/acpi/method-customizing.txt
Documentation/acpi/method-tracing.txt
Documentation/admin-guide/md.rst
Documentation/admin-guide/ras.rst
Documentation/blockdev/mflash.txt
Documentation/blockdev/zram.txt
Documentation/cgroup-v1/rdma.txt [new file with mode: 0644]
Documentation/cgroup-v2.txt
Documentation/device-mapper/dm-raid.txt
Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt
Documentation/devicetree/bindings/clock/exynos4415-clock.txt [deleted file]
Documentation/devicetree/bindings/clock/hi3660-clock.txt [new file with mode: 0644]
Documentation/devicetree/bindings/clock/idt,versaclock5.txt [new file with mode: 0644]
Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt
Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt
Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt [new file with mode: 0644]
Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.txt
Documentation/devicetree/bindings/clock/st,stm32-rcc.txt
Documentation/devicetree/bindings/clock/stericsson,abx500.txt [new file with mode: 0644]
Documentation/devicetree/bindings/clock/sun9i-de.txt [new file with mode: 0644]
Documentation/devicetree/bindings/clock/sun9i-usb.txt [new file with mode: 0644]
Documentation/devicetree/bindings/clock/sunxi-ccu.txt
Documentation/devicetree/bindings/clock/ti,cdce925.txt
Documentation/devicetree/bindings/clock/zx296718-clk.txt
Documentation/devicetree/bindings/display/ssd1307fb.txt
Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt
Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
Documentation/devicetree/bindings/i2c/i2c-stm32.txt [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/nvidia,tegra186-bpmp-i2c.txt [new file with mode: 0644]
Documentation/devicetree/bindings/mfd/qcom-rpm.txt
Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
Documentation/devicetree/bindings/opp/opp.txt
Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
Documentation/devicetree/bindings/power/pd-samsung.txt
Documentation/devicetree/bindings/rtc/armada-380-rtc.txt
Documentation/devicetree/bindings/rtc/cortina,gemini.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/imxdi-rtc.txt
Documentation/devicetree/bindings/rtc/maxim,ds3231.txt
Documentation/devicetree/bindings/rtc/pcf8563.txt
Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/sun6i-rtc.txt
Documentation/devicetree/bindings/soc/rockchip/grf.txt
Documentation/devicetree/bindings/sound/rockchip-i2s.txt
Documentation/devicetree/bindings/sound/rt5665.txt [changed mode: 0755->0644]
Documentation/devicetree/bindings/sound/sun4i-codec.txt
Documentation/devicetree/bindings/sound/sun4i-i2s.txt
Documentation/devicetree/bindings/watchdog/cortina,gemin-watchdog.txt [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
Documentation/devicetree/bindings/watchdog/zte,zx2967-wdt.txt [new file with mode: 0644]
Documentation/filesystems/autofs4-mount-control.txt
Documentation/filesystems/autofs4.txt
Documentation/filesystems/ceph.txt
Documentation/filesystems/quota.txt
Documentation/i2c/busses/i2c-i801
Documentation/i2c/muxes/i2c-mux-gpio
Documentation/kselftest.txt
Documentation/md-cluster.txt [deleted file]
Documentation/md/md-cluster.txt [new file with mode: 0644]
Documentation/md/raid5-cache.txt [new file with mode: 0644]
Documentation/media/dvb-drivers/ci.rst
Documentation/media/uapi/dvb/dvb-frontend-parameters.rst
Documentation/memory-hotplug.txt
Documentation/networking/cdc_mbim.txt
Documentation/scsi/ChangeLog.megaraid_sas
Documentation/sound/hd-audio/notes.rst
Documentation/sparc/console.txt [new file with mode: 0644]
Documentation/static-keys.txt
Documentation/sysctl/vm.txt
Documentation/vm/ksm.txt
Documentation/vm/userfaultfd.txt
Documentation/watchdog/watchdog-kernel-api.txt
Documentation/watchdog/watchdog-parameters.txt
Documentation/x86/intel_rdt_ui.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/Kbuild
arch/alpha/include/asm/current.h [deleted file]
arch/alpha/include/asm/dma-mapping.h
arch/alpha/kernel/pci-noop.c
arch/alpha/kernel/pci_iommu.c
arch/alpha/kernel/smp.c
arch/arc/include/asm/dma-mapping.h
arch/arc/include/asm/kprobes.h
arch/arc/kernel/smp.c
arch/arc/kernel/unwind.c
arch/arc/mm/dma.c
arch/arm/Kconfig
arch/arm/Kconfig-nommu
arch/arm/boot/compressed/decompress.c
arch/arm/boot/compressed/head.S
arch/arm/boot/dts/sun5i-a10s.dtsi
arch/arm/boot/dts/sun5i-a13.dtsi
arch/arm/boot/dts/sun5i-gr8.dtsi
arch/arm/boot/dts/sun5i-r8.dtsi
arch/arm/boot/dts/sun5i.dtsi
arch/arm/boot/dts/sun9i-a80.dtsi
arch/arm/common/dmabounce.c
arch/arm/common/mcpm_entry.c
arch/arm/include/asm/device.h
arch/arm/include/asm/dma-mapping.h
arch/arm/include/asm/hardware/cache-uniphier.h
arch/arm/include/asm/kprobes.h
arch/arm/include/asm/mach/flash.h
arch/arm/include/asm/memory.h
arch/arm/include/asm/pgtable-nommu.h
arch/arm/kernel/head-nommu.S
arch/arm/kernel/module.c
arch/arm/kernel/setup.c
arch/arm/kernel/smp.c
arch/arm/mach-alpine/platsmp.c
arch/arm/mach-axxia/platsmp.c
arch/arm/mach-bcm/bcm63xx_smp.c
arch/arm/mach-bcm/platsmp-brcmstb.c
arch/arm/mach-bcm/platsmp.c
arch/arm/mach-berlin/platsmp.c
arch/arm/mach-ep93xx/ts72xx.c
arch/arm/mach-ep93xx/ts72xx.h
arch/arm/mach-exynos/firmware.c
arch/arm/mach-exynos/mcpm-exynos.c
arch/arm/mach-exynos/platsmp.c
arch/arm/mach-exynos/pm.c
arch/arm/mach-exynos/suspend.c
arch/arm/mach-hisi/platmcpm.c
arch/arm/mach-hisi/platsmp.c
arch/arm/mach-imx/platsmp.c
arch/arm/mach-imx/pm-imx6.c
arch/arm/mach-imx/src.c
arch/arm/mach-mediatek/platsmp.c
arch/arm/mach-mvebu/pm.c
arch/arm/mach-mvebu/pmsu.c
arch/arm/mach-mvebu/system-controller.c
arch/arm/mach-omap2/control.c
arch/arm/mach-omap2/omap-mpuss-lowpower.c
arch/arm/mach-omap2/omap-smp.c
arch/arm/mach-omap2/omap_twl.c
arch/arm/mach-orion5x/ts78xx-setup.c
arch/arm/mach-prima2/platsmp.c
arch/arm/mach-prima2/pm.c
arch/arm/mach-pxa/palmz72.c
arch/arm/mach-pxa/pxa25x.c
arch/arm/mach-pxa/pxa27x.c
arch/arm/mach-pxa/pxa3xx.c
arch/arm/mach-realview/platsmp-dt.c
arch/arm/mach-rockchip/platsmp.c
arch/arm/mach-rockchip/pm.c
arch/arm/mach-s3c24xx/mach-jive.c
arch/arm/mach-s3c24xx/pm-s3c2410.c
arch/arm/mach-s3c24xx/pm-s3c2416.c
arch/arm/mach-s3c64xx/pm.c
arch/arm/mach-s5pv210/pm.c
arch/arm/mach-sa1100/pm.c
arch/arm/mach-shmobile/platsmp-apmu.c
arch/arm/mach-shmobile/platsmp-scu.c
arch/arm/mach-socfpga/platsmp.c
arch/arm/mach-spear/platsmp.c
arch/arm/mach-sti/platsmp.c
arch/arm/mach-sunxi/platsmp.c
arch/arm/mach-tango/platsmp.c
arch/arm/mach-tango/pm.c
arch/arm/mach-tegra/reset.c
arch/arm/mach-ux500/platsmp.c
arch/arm/mach-vexpress/dcscb.c
arch/arm/mach-vexpress/platsmp.c
arch/arm/mach-vexpress/tc2_pm.c
arch/arm/mach-zx/platsmp.c
arch/arm/mach-zynq/platsmp.c
arch/arm/mm/Kconfig
arch/arm/mm/Makefile
arch/arm/mm/cache-uniphier.c
arch/arm/mm/cache-v7.S
arch/arm/mm/cache-v7m.S
arch/arm/mm/dma-mapping.c
arch/arm/mm/dump.c
arch/arm/mm/flush.c
arch/arm/mm/init.c
arch/arm/mm/mmu.c
arch/arm/mm/nommu.c
arch/arm/mm/physaddr.c [new file with mode: 0644]
arch/arm/probes/decode.h
arch/arm/xen/mm.c
arch/arm64/include/asm/device.h
arch/arm64/include/asm/dma-mapping.h
arch/arm64/include/asm/kprobes.h
arch/arm64/kernel/armv8_deprecated.c
arch/arm64/kernel/insn.c
arch/arm64/kernel/probes/decode-insn.h
arch/arm64/kernel/smp.c
arch/arm64/lib/copy_template.S
arch/arm64/mm/dma-mapping.c
arch/avr32/include/asm/dma-mapping.h
arch/avr32/include/asm/kprobes.h
arch/avr32/mm/dma-coherent.c
arch/blackfin/include/asm/Kbuild
arch/blackfin/include/asm/dma-mapping.h
arch/blackfin/kernel/dma-mapping.c
arch/blackfin/mach-common/smp.c
arch/c6x/include/asm/Kbuild
arch/c6x/include/asm/dma-mapping.h
arch/c6x/kernel/dma.c
arch/cris/arch-v32/drivers/pci/dma.c
arch/cris/include/asm/Kbuild
arch/cris/include/asm/current.h [deleted file]
arch/cris/include/asm/dma-mapping.h
arch/frv/include/asm/Kbuild
arch/frv/include/asm/dma-mapping.h
arch/frv/mb93090-mb00/pci-dma-nommu.c
arch/frv/mb93090-mb00/pci-dma.c
arch/frv/mb93090-mb00/pci-frv.c
arch/frv/mm/mmu-context.c
arch/h8300/include/asm/Kbuild
arch/h8300/include/asm/dma-mapping.h
arch/h8300/kernel/dma.c
arch/hexagon/include/asm/Kbuild
arch/hexagon/include/asm/dma-mapping.h
arch/hexagon/kernel/dma.c
arch/hexagon/kernel/smp.c
arch/ia64/hp/common/hwsw_iommu.c
arch/ia64/hp/common/sba_iommu.c
arch/ia64/include/asm/dma-mapping.h
arch/ia64/include/asm/kprobes.h
arch/ia64/include/asm/machvec.h
arch/ia64/kernel/dma-mapping.c
arch/ia64/kernel/pci-dma.c
arch/ia64/kernel/pci-swiotlb.c
arch/ia64/kernel/setup.c
arch/ia64/sn/kernel/sn2/sn_hwperf.c
arch/m32r/Kconfig
arch/m32r/include/asm/Kbuild
arch/m32r/include/asm/device.h
arch/m32r/include/asm/dma-mapping.h
arch/m32r/kernel/setup.c
arch/m68k/68000/bootlogo-vz.h
arch/m68k/68000/bootlogo.h
arch/m68k/configs/amcore_defconfig
arch/m68k/ifpsp060/src/isp.S
arch/m68k/include/asm/Kbuild
arch/m68k/include/asm/MC68328.h
arch/m68k/include/asm/MC68EZ328.h
arch/m68k/include/asm/MC68VZ328.h
arch/m68k/include/asm/dma-mapping.h
arch/m68k/include/asm/natfeat.h
arch/m68k/kernel/dma.c
arch/m68k/lib/ashldi3.c
arch/m68k/lib/ashrdi3.c
arch/m68k/lib/lshrdi3.c
arch/m68k/lib/muldi3.c
arch/metag/include/asm/Kbuild
arch/metag/include/asm/dma-mapping.h
arch/metag/kernel/dma.c
arch/metag/kernel/smp.c
arch/microblaze/include/asm/Kbuild
arch/microblaze/include/asm/dma-mapping.h
arch/microblaze/kernel/dma.c
arch/microblaze/pci/pci-common.c
arch/mips/cavium-octeon/dma-octeon.c
arch/mips/include/asm/device.h
arch/mips/include/asm/dma-mapping.h
arch/mips/include/asm/kprobes.h
arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
arch/mips/include/asm/netlogic/common.h
arch/mips/kernel/traps.c
arch/mips/kernel/vdso.c
arch/mips/loongson64/common/dma-swiotlb.c
arch/mips/mm/dma-default.c
arch/mips/netlogic/common/nlm-dma.c
arch/mips/pci/pci-octeon.c
arch/mn10300/include/asm/dma-mapping.h
arch/mn10300/include/asm/kprobes.h
arch/mn10300/kernel/smp.c
arch/mn10300/mm/dma-alloc.c
arch/nios2/boot/dts/10m50_devboard.dts [changed mode: 0755->0644]
arch/nios2/configs/10m50_defconfig [changed mode: 0755->0644]
arch/nios2/include/asm/Kbuild
arch/nios2/include/asm/dma-mapping.h
arch/nios2/mm/dma-mapping.c
arch/openrisc/Kconfig
arch/openrisc/TODO.openrisc
arch/openrisc/include/asm/Kbuild
arch/openrisc/include/asm/atomic.h [new file with mode: 0644]
arch/openrisc/include/asm/bitops.h
arch/openrisc/include/asm/bitops/atomic.h [new file with mode: 0644]
arch/openrisc/include/asm/cmpxchg.h [new file with mode: 0644]
arch/openrisc/include/asm/cpuinfo.h
arch/openrisc/include/asm/dma-mapping.h
arch/openrisc/include/asm/futex.h [new file with mode: 0644]
arch/openrisc/include/asm/spr_defs.h
arch/openrisc/include/asm/string.h [new file with mode: 0644]
arch/openrisc/kernel/.gitignore [new file with mode: 0644]
arch/openrisc/kernel/dma.c
arch/openrisc/kernel/entry.S
arch/openrisc/kernel/head.S
arch/openrisc/kernel/or32_ksyms.c
arch/openrisc/kernel/process.c
arch/openrisc/kernel/ptrace.c
arch/openrisc/kernel/setup.c
arch/openrisc/kernel/traps.c
arch/openrisc/kernel/vmlinux.lds.S
arch/openrisc/lib/Makefile
arch/openrisc/lib/memcpy.c [new file with mode: 0644]
arch/openrisc/lib/memset.S [new file with mode: 0644]
arch/openrisc/mm/ioremap.c
arch/parisc/include/asm/Kbuild
arch/parisc/include/asm/dma-mapping.h
arch/parisc/kernel/drivers.c
arch/parisc/kernel/pci-dma.c
arch/parisc/kernel/smp.c
arch/powerpc/boot/dts/fsl/mpc8569mds.dts
arch/powerpc/include/asm/book3s/64/mmu.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/device.h
arch/powerpc/include/asm/dma-mapping.h
arch/powerpc/include/asm/fsl_hcalls.h
arch/powerpc/include/asm/kprobes.h
arch/powerpc/include/asm/pci.h
arch/powerpc/include/asm/ps3.h
arch/powerpc/include/asm/swiotlb.h
arch/powerpc/kernel/dma-swiotlb.c
arch/powerpc/kernel/dma.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/smp.c
arch/powerpc/kvm/book3s_64_vio.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/lib/code-patching.c
arch/powerpc/platforms/cell/iommu.c
arch/powerpc/platforms/cell/spufs/file.c
arch/powerpc/platforms/pasemi/iommu.c
arch/powerpc/platforms/pasemi/setup.c
arch/powerpc/platforms/powernv/npu-dma.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/ps3/system-bus.c
arch/powerpc/platforms/pseries/ibmebus.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/platforms/pseries/vio.c
arch/powerpc/xmon/ppc-opc.c
arch/s390/Kconfig
arch/s390/configs/default_defconfig
arch/s390/configs/performance_defconfig
arch/s390/crypto/Makefile
arch/s390/crypto/paes_s390.c [new file with mode: 0644]
arch/s390/defconfig
arch/s390/include/asm/cpacf.h
arch/s390/include/asm/device.h
arch/s390/include/asm/dma-mapping.h
arch/s390/include/asm/kprobes.h
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/pkey.h [new file with mode: 0644]
arch/s390/include/asm/processor.h
arch/s390/include/asm/uaccess.h
arch/s390/include/uapi/asm/Kbuild
arch/s390/include/uapi/asm/pkey.h [new file with mode: 0644]
arch/s390/kernel/crash_dump.c
arch/s390/kernel/entry.S
arch/s390/kernel/entry.h
arch/s390/kernel/nmi.c
arch/s390/kernel/process.c
arch/s390/kernel/processor.c
arch/s390/kernel/vtime.c
arch/s390/mm/gmap.c
arch/s390/mm/hugetlbpage.c
arch/s390/pci/pci.c
arch/s390/pci/pci_dma.c
arch/score/include/asm/Kbuild
arch/score/kernel/traps.c
arch/sh/boot/dts/j2_mimas_v2.dts [changed mode: 0755->0644]
arch/sh/include/asm/dma-mapping.h
arch/sh/include/asm/kprobes.h
arch/sh/kernel/dma-nommu.c
arch/sh/kernel/irq.c
arch/sh/kernel/smp.c
arch/sh/mm/consistent.c
arch/sparc/include/asm/dma-mapping.h
arch/sparc/include/asm/kprobes.h
arch/sparc/include/asm/page_64.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/include/asm/setup.h
arch/sparc/include/asm/switch_to_32.h
arch/sparc/include/asm/tlbflush_64.h
arch/sparc/include/asm/topology_64.h
arch/sparc/include/asm/uprobes.h
arch/sparc/kernel/iommu.c
arch/sparc/kernel/ioport.c
arch/sparc/kernel/leon_smp.c
arch/sparc/kernel/pci_sun4v.c
arch/sparc/kernel/smp_64.c
arch/sparc/kernel/sun4d_smp.c
arch/sparc/kernel/sun4m_smp.c
arch/sparc/kernel/traps_32.c
arch/sparc/kernel/traps_64.c
arch/sparc/kernel/tsb.S
arch/sparc/kernel/visemul.c
arch/sparc/mm/hugetlbpage.c
arch/sparc/mm/init_64.c
arch/sparc/mm/srmmu.c
arch/sparc/mm/tlb.c
arch/sparc/mm/tsb.c
arch/tile/include/asm/device.h
arch/tile/include/asm/dma-mapping.h
arch/tile/include/asm/kprobes.h
arch/tile/kernel/pci-dma.c
arch/tile/kernel/smpboot.c
arch/tile/mm/elf.c
arch/um/include/asm/Kbuild
arch/unicore32/include/asm/Kbuild
arch/unicore32/include/asm/dma-mapping.h
arch/unicore32/mm/dma-swiotlb.c
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/entry/vdso/vma.c
arch/x86/include/asm/cacheflush.h
arch/x86/include/asm/desc_defs.h
arch/x86/include/asm/device.h
arch/x86/include/asm/dma-mapping.h
arch/x86/include/asm/intel_pmc_ipc.h
arch/x86/include/asm/iommu.h
arch/x86/include/asm/kprobes.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/pgtable-2level.h
arch/x86/include/asm/pgtable-3level.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pmc_atom.h [deleted file]
arch/x86/kernel/Makefile
arch/x86/kernel/amd_gart_64.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/mcheck/threshold.c
arch/x86/kernel/irq.c
arch/x86/kernel/irq_work.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/pci-calgary_64.c
arch/x86/kernel/pci-dma.c
arch/x86/kernel/pci-nommu.c
arch/x86/kernel/pci-swiotlb.c
arch/x86/kernel/setup.c
arch/x86/kernel/smp.c
arch/x86/kernel/test_rodata.c [deleted file]
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/mmu.c
arch/x86/mm/gup.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/mpx.c
arch/x86/mm/pgtable.c
arch/x86/pci/common.c
arch/x86/pci/sta2x11-fixup.c
arch/x86/platform/atom/Makefile
arch/x86/platform/atom/pmc_atom.c [deleted file]
arch/x86/xen/pci-swiotlb-xen.c
arch/xtensa/include/asm/Kbuild
arch/xtensa/include/asm/device.h
arch/xtensa/include/asm/dma-mapping.h
arch/xtensa/kernel/pci-dma.c
arch/xtensa/kernel/smp.c
block/Kconfig.iosched
block/bio.c
block/blk-mq-sched.c
block/blk-mq-sched.h
block/blk-mq.c
block/blk-throttle.c
block/bsg.c
block/elevator.c
block/genhd.c
block/sed-opal.c
crypto/lz4.c
crypto/lz4hc.c
crypto/testmgr.h
drivers/acpi/acpi_ipmi.c
drivers/acpi/acpi_lpss.c
drivers/acpi/acpica/dbconvert.c
drivers/acpi/acpica/nspredef.c
drivers/acpi/acpica/nsxfeval.c
drivers/acpi/resource.c
drivers/acpi/spcr.c
drivers/android/binder.c
drivers/ata/libata-eh.c
drivers/atm/ambassador.c
drivers/atm/eni.c
drivers/atm/firestream.c
drivers/atm/horizon.c
drivers/atm/iphase.c
drivers/atm/iphase.h
drivers/atm/lanai.c
drivers/atm/nicstar.c
drivers/base/core.c
drivers/base/dma-contiguous.c
drivers/base/memory.c
drivers/block/cciss_scsi.c
drivers/block/drbd/drbd_main.c
drivers/block/loop.c
drivers/block/nbd.c
drivers/block/rbd.c
drivers/block/rbd_types.h
drivers/block/sunvdc.c
drivers/block/zram/zram_drv.c
drivers/block/zram/zram_drv.h
drivers/char/agp/alpha-agp.c
drivers/char/ipmi/Kconfig
drivers/char/ipmi/bt-bmc.c
drivers/char/ipmi/ipmi_devintf.c
drivers/char/ipmi/ipmi_msghandler.c
drivers/char/ipmi/ipmi_powernv.c
drivers/char/ipmi/ipmi_watchdog.c
drivers/char/mspec.c
drivers/char/pcmcia/cm4000_cs.c
drivers/char/pcmcia/cm4040_cs.c
drivers/char/sonypi.c
drivers/clk/Kconfig
drivers/clk/Makefile
drivers/clk/axs10x/i2s_pll_clock.c
drivers/clk/bcm/clk-bcm2835.c
drivers/clk/clk-cdce925.c
drivers/clk/clk-conf.c
drivers/clk/clk-cs2000-cp.c
drivers/clk/clk-scpi.c
drivers/clk/clk-stm32f4.c
drivers/clk/clk-versaclock5.c [new file with mode: 0644]
drivers/clk/clk-wm831x.c
drivers/clk/hisilicon/Kconfig
drivers/clk/hisilicon/Makefile
drivers/clk/hisilicon/clk-hi3660.c [new file with mode: 0644]
drivers/clk/hisilicon/clkgate-separated.c
drivers/clk/imx/clk-imx6q.c
drivers/clk/imx/clk-imx7d.c
drivers/clk/imx/clk-pllv3.c
drivers/clk/imx/clk-vf610.c
drivers/clk/imx/clk.h
drivers/clk/mediatek/Kconfig
drivers/clk/meson/meson8b.c
drivers/clk/mvebu/Makefile
drivers/clk/mvebu/ap806-system-controller.c
drivers/clk/mvebu/armada-xp.c
drivers/clk/mvebu/clk-corediv.c
drivers/clk/mvebu/clk-cpu.c
drivers/clk/mvebu/cp110-system-controller.c
drivers/clk/mvebu/mv98dx3236.c [new file with mode: 0644]
drivers/clk/qcom/clk-smd-rpm.c
drivers/clk/qcom/common.c
drivers/clk/qcom/gcc-ipq4019.c
drivers/clk/qcom/gcc-mdm9615.c
drivers/clk/qcom/gcc-msm8994.c
drivers/clk/qcom/gcc-msm8996.c
drivers/clk/qcom/gdsc.c
drivers/clk/renesas/clk-mstp.c
drivers/clk/renesas/r8a7795-cpg-mssr.c
drivers/clk/renesas/r8a7796-cpg-mssr.c
drivers/clk/renesas/renesas-cpg-mssr.c
drivers/clk/rockchip/Makefile
drivers/clk/rockchip/clk-muxgrf.c [new file with mode: 0644]
drivers/clk/rockchip/clk-pll.c
drivers/clk/rockchip/clk-rk3188.c
drivers/clk/rockchip/clk-rk3288.c
drivers/clk/rockchip/clk-rk3328.c [new file with mode: 0644]
drivers/clk/rockchip/clk-rk3399.c
drivers/clk/rockchip/clk.c
drivers/clk/rockchip/clk.h
drivers/clk/samsung/Makefile
drivers/clk/samsung/clk-exynos-audss.c
drivers/clk/samsung/clk-exynos4415.c [deleted file]
drivers/clk/samsung/clk-exynos5433.c
drivers/clk/samsung/clk-pll.c
drivers/clk/samsung/clk-s3c2410.c
drivers/clk/samsung/clk-s3c2412.c
drivers/clk/samsung/clk-s3c2443.c
drivers/clk/samsung/clk-s3c64xx.c
drivers/clk/sunxi-ng/Kconfig
drivers/clk/sunxi-ng/Makefile
drivers/clk/sunxi-ng/ccu-sun5i.c [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun5i.h [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun6i-a31.c
drivers/clk/sunxi-ng/ccu-sun8i-a33.c
drivers/clk/sunxi-ng/ccu-sun8i-h3.c
drivers/clk/sunxi-ng/ccu-sun8i-v3s.c [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun8i-v3s.h [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun9i-a80-de.c [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun9i-a80-de.h [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.c [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.h [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun9i-a80.c [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun9i-a80.h [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu_common.c
drivers/clk/sunxi-ng/ccu_common.h
drivers/clk/sunxi-ng/ccu_div.c
drivers/clk/sunxi-ng/ccu_div.h
drivers/clk/sunxi-ng/ccu_mp.c
drivers/clk/sunxi-ng/ccu_mult.c
drivers/clk/sunxi-ng/ccu_mult.h
drivers/clk/sunxi-ng/ccu_mux.c
drivers/clk/sunxi-ng/ccu_nk.c
drivers/clk/sunxi-ng/ccu_nkm.c
drivers/clk/sunxi-ng/ccu_nkmp.c
drivers/clk/sunxi-ng/ccu_nm.c
drivers/clk/tegra/Kconfig
drivers/clk/tegra/Makefile
drivers/clk/tegra/clk-bpmp.c [new file with mode: 0644]
drivers/clk/ti/divider.c
drivers/clk/uniphier/clk-uniphier-core.c
drivers/clk/uniphier/clk-uniphier-cpugear.c
drivers/clk/uniphier/clk-uniphier-sys.c
drivers/clk/ux500/abx500-clk.c
drivers/clk/ux500/u8500_of_clk.c
drivers/clk/x86/Makefile
drivers/clk/x86/clk-pmc-atom.c [new file with mode: 0644]
drivers/clk/zte/clk-zx296718.c
drivers/clk/zte/clk.c
drivers/clk/zte/clk.h
drivers/crypto/Kconfig
drivers/crypto/caam/ctrl.c
drivers/crypto/chelsio/chcr_algo.c [changed mode: 0755->0644]
drivers/dax/dax.c
drivers/devfreq/devfreq.c
drivers/extcon/extcon-rt8973a.c
drivers/firewire/core-cdev.c
drivers/firewire/core-device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/include/atombios.h
drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
drivers/gpu/drm/armada/armada_gem.c
drivers/gpu/drm/drm_probe_helper.c
drivers/gpu/drm/drm_vm.c
drivers/gpu/drm/etnaviv/etnaviv_drv.h
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/exynos/exynos_drm_gem.c
drivers/gpu/drm/exynos/exynos_drm_gem.h
drivers/gpu/drm/gma500/framebuffer.c
drivers/gpu/drm/gma500/gem.c
drivers/gpu/drm/gma500/psb_drv.h
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_render_state.c
drivers/gpu/drm/i915/i915_gem_userptr.c
drivers/gpu/drm/mga/mga_drv.h
drivers/gpu/drm/msm/msm_drv.h
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/omapdrm/omap_drv.h
drivers/gpu/drm/omapdrm/omap_gem.c
drivers/gpu/drm/qxl/qxl_ttm.c
drivers/gpu/drm/radeon/atombios.h
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/tegra/gem.c
drivers/gpu/drm/ttm/ttm_bo_vm.c
drivers/gpu/drm/udl/udl_drv.h
drivers/gpu/drm/udl/udl_gem.c
drivers/gpu/drm/vgem/vgem_drv.c
drivers/gpu/drm/virtio/virtgpu_ttm.c
drivers/hid/hid-kye.c
drivers/hsi/clients/cmt_speech.c
drivers/hwmon/g762.c
drivers/hwtracing/intel_th/msu.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/Makefile
drivers/i2c/busses/i2c-at91.c
drivers/i2c/busses/i2c-bcm2835.c
drivers/i2c/busses/i2c-bfin-twi.c
drivers/i2c/busses/i2c-cros-ec-tunnel.c
drivers/i2c/busses/i2c-designware-core.c
drivers/i2c/busses/i2c-eg20t.c
drivers/i2c/busses/i2c-emev2.c
drivers/i2c/busses/i2c-exynos5.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-imx-lpi2c.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-mv64xxx.c
drivers/i2c/busses/i2c-nforce2.c
drivers/i2c/busses/i2c-octeon-core.h
drivers/i2c/busses/i2c-omap.c
drivers/i2c/busses/i2c-riic.c
drivers/i2c/busses/i2c-robotfuzz-osif.c
drivers/i2c/busses/i2c-sh_mobile.c
drivers/i2c/busses/i2c-st.c
drivers/i2c/busses/i2c-stm32f4.c [new file with mode: 0644]
drivers/i2c/busses/i2c-tegra-bpmp.c [new file with mode: 0644]
drivers/i2c/busses/i2c-thunderx-pcidrv.c
drivers/i2c/busses/i2c-xgene-slimpro.c
drivers/i2c/busses/i2c-xlp9xx.c
drivers/i2c/busses/i2c-xlr.c
drivers/i2c/i2c-core.c
drivers/i2c/muxes/i2c-mux-mlxcpld.c
drivers/i2c/muxes/i2c-mux-pca9541.c
drivers/i2c/muxes/i2c-mux-pca954x.c
drivers/ide/ide-acpi.c
drivers/ide/ide-tape.c
drivers/ide/palm_bk3710.c
drivers/infiniband/core/Makefile
drivers/infiniband/core/cgroup.c [new file with mode: 0644]
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucm.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/hfi1/dma.c [deleted file]
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/mad.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/qedr/main.c
drivers/infiniband/hw/qib/qib_dma.c [deleted file]
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/hw/qib/qib_iba6120.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_keys.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/sw/rdmavt/Kconfig
drivers/infiniband/sw/rdmavt/Makefile
drivers/infiniband/sw/rdmavt/dma.c [deleted file]
drivers/infiniband/sw/rdmavt/dma.h [deleted file]
drivers/infiniband/sw/rdmavt/mad.c
drivers/infiniband/sw/rdmavt/mr.c
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/sw/rdmavt/vt.h
drivers/infiniband/sw/rxe/Kconfig
drivers/infiniband/sw/rxe/Makefile
drivers/infiniband/sw/rxe/rxe_dma.c [deleted file]
drivers/infiniband/sw/rxe/rxe_loc.h
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/input/touchscreen/cyttsp4_core.c
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/amd_iommu_types.h
drivers/iommu/intel-iommu.c
drivers/iommu/intel-svm.c
drivers/isdn/hardware/eicon/debug.c
drivers/isdn/hardware/mISDN/mISDNipac.c
drivers/isdn/mISDN/dsp_core.c
drivers/md/dm-rq.c
drivers/md/faulty.c
drivers/md/linear.c
drivers/md/linear.h
drivers/md/md.c
drivers/md/md.h
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid1.h
drivers/md/raid10.c
drivers/md/raid5-cache.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/media/dvb-core/dvb_ringbuffer.h
drivers/media/dvb-frontends/drx39xyj/drx_driver.h
drivers/media/dvb-frontends/drx39xyj/drxj.c
drivers/media/dvb-frontends/drx39xyj/drxj.h
drivers/media/dvb-frontends/drxk_hard.c
drivers/media/dvb-frontends/helene.c
drivers/media/dvb-frontends/or51132.c
drivers/media/dvb-frontends/tda10048.c
drivers/media/i2c/adv7183_regs.h
drivers/media/pci/saa7164/saa7164-fw.c
drivers/media/platform/exynos4-is/fimc-core.h
drivers/media/tuners/xc5000.c
drivers/media/usb/dvb-usb/dib0700_devices.c
drivers/media/usb/gspca/t613.c
drivers/media/usb/tm6000/tm6000-input.c
drivers/media/v4l2-core/tuner-core.c
drivers/media/v4l2-core/videobuf-dma-sg.c
drivers/misc/cxl/context.c
drivers/misc/eeprom/at24.c
drivers/misc/mic/bus/mic_bus.c
drivers/misc/mic/bus/scif_bus.c
drivers/misc/mic/bus/scif_bus.h
drivers/misc/mic/bus/vop_bus.c
drivers/misc/mic/host/mic_boot.c
drivers/misc/sgi-gru/grumain.c
drivers/misc/sgi-gru/grutables.h
drivers/misc/vmw_vmci/vmci_context.c
drivers/misc/vmw_vmci/vmci_queue_pair.c
drivers/mmc/host/mmci_qcom_dml.c
drivers/mtd/devices/lart.c
drivers/net/arcnet/arcnet.c
drivers/net/ethernet/adi/bfin_mac.c
drivers/net/ethernet/broadcom/bcm63xx_enet.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/igb/e1000_phy.c
drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/micrel/ksz884x.c
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_sriov.c
drivers/net/ethernet/qlogic/qed/qed_sriov.h
drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
drivers/net/ethernet/sfc/mcdi_pcol.h
drivers/net/ethernet/sis/sis900.c
drivers/net/geneve.c
drivers/net/gtp.c
drivers/net/usb/asix_devices.c
drivers/net/usb/kalmia.c
drivers/net/usb/rndis_host.c
drivers/net/usb/sierra_net.c
drivers/net/vxlan.c
drivers/net/wimax/i2400m/usb-fw.c
drivers/net/wireless/ath/ath9k/ani.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/intel/ipw2x00/ipw2100.c
drivers/net/wireless/intel/ipw2x00/ipw2200.c
drivers/net/wireless/intel/iwlegacy/4965-mac.c
drivers/net/wireless/intel/iwlwifi/iwl-drv.c
drivers/net/wireless/marvell/mwifiex/txrx.c
drivers/net/wireless/marvell/mwifiex/wmm.c
drivers/net/wireless/realtek/rtlwifi/rtl8192se/fw.c
drivers/net/wireless/rsi/rsi_91x_usb.c
drivers/net/wireless/ti/wl18xx/main.c
drivers/net/wireless/ti/wlcore/init.c
drivers/nfc/pn533/pn533.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/core.c
drivers/nvme/target/discovery.c
drivers/nvme/target/fabrics-cmd.c
drivers/nvme/target/fc.c
drivers/nvme/target/loop.c
drivers/nvme/target/nvmet.h
drivers/nvme/target/rdma.c
drivers/parisc/ccio-dma.c
drivers/parisc/sba_iommu.c
drivers/parport/ieee1284_ops.c
drivers/parport/parport_pc.c
drivers/pci/host/vmd.c
drivers/pci/quirks.c
drivers/pinctrl/bcm/Kconfig
drivers/platform/x86/Kconfig
drivers/platform/x86/Makefile
drivers/platform/x86/acer-wmi.c
drivers/platform/x86/alienware-wmi.c
drivers/platform/x86/asus-wireless.c
drivers/platform/x86/dell-laptop.c
drivers/platform/x86/fujitsu-laptop.c
drivers/platform/x86/hp_accel.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/intel_mid_powerbtn.c
drivers/platform/x86/intel_mid_thermal.c
drivers/platform/x86/intel_pmc_core.c
drivers/platform/x86/intel_pmc_ipc.c
drivers/platform/x86/intel_turbo_max_3.c [new file with mode: 0644]
drivers/platform/x86/mlx-platform.c
drivers/platform/x86/pmc_atom.c [new file with mode: 0644]
drivers/platform/x86/silead_dmi.c [new file with mode: 0644]
drivers/platform/x86/thinkpad_acpi.c
drivers/power/avs/smartreflex.c
drivers/rapidio/devices/rio_mport_cdev.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/rtc-armada38x.c
drivers/rtc/rtc-au1xxx.c
drivers/rtc/rtc-bfin.c
drivers/rtc/rtc-bq32k.c
drivers/rtc/rtc-dm355evm.c
drivers/rtc/rtc-ds3232.c
drivers/rtc/rtc-gemini.c
drivers/rtc/rtc-imxdi.c
drivers/rtc/rtc-ls1x.c
drivers/rtc/rtc-m48t86.c
drivers/rtc/rtc-mcp795.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-rx8010.c
drivers/rtc/rtc-sh.c
drivers/rtc/rtc-snvs.c
drivers/rtc/rtc-stm32.c [new file with mode: 0644]
drivers/rtc/rtc-sun6i.c
drivers/rtc/rtc-tegra.c
drivers/rtc/rtc-tps65910.c
drivers/s390/block/dasd_eckd.c
drivers/s390/cio/ioasm.c
drivers/s390/crypto/Makefile
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_card.c
drivers/s390/crypto/ap_queue.c
drivers/s390/crypto/pkey_api.c [new file with mode: 0644]
drivers/s390/crypto/zcrypt_api.c
drivers/s390/crypto/zcrypt_api.h
drivers/scsi/aacraid/linit.c
drivers/scsi/bfa/bfi_ms.h
drivers/scsi/cxlflash/superpipe.c
drivers/scsi/fcoe/fcoe_ctlr.c
drivers/scsi/ipr.c
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/mpt3sas/mpt3sas_ctl.c
drivers/scsi/mpt3sas/mpt3sas_ctl.h
drivers/scsi/osd/osd_initiator.c
drivers/scsi/osst.c
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_transport_sas.c
drivers/scsi/sg.c
drivers/scsi/storvsc_drv.c
drivers/staging/android/ion/ion.c
drivers/staging/gs_fpgaboot/gs_fpgaboot.h
drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
drivers/staging/lustre/lustre/llite/llite_mmap.c
drivers/staging/lustre/lustre/llite/vvp_io.c
drivers/staging/rtl8192u/ieee80211/ieee80211.h
drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
drivers/staging/wilc1000/linux_wlan.c
drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
drivers/target/target_core_user.c
drivers/tty/n_hdlc.c
drivers/tty/serial/ioc4_serial.c
drivers/tty/serial/sunhv.c
drivers/uio/uio.c
drivers/usb/core/devio.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/udc/fsl_udc_core.c
drivers/usb/gadget/udc/renesas_usb3.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/fotg210-hcd.c
drivers/usb/host/ohci-hcd.c
drivers/usb/misc/adutux.c
drivers/usb/misc/legousbtower.c
drivers/usb/misc/uss720.c
drivers/usb/mon/mon_bin.c
drivers/usb/usbip/usbip_common.c
drivers/video/console/fbcon.c
drivers/video/fbdev/amba-clcd-nomadik.c
drivers/video/fbdev/amba-clcd-nomadik.h
drivers/video/fbdev/amba-clcd-versatile.c
drivers/video/fbdev/amba-clcd-versatile.h
drivers/video/fbdev/amba-clcd.c
drivers/video/fbdev/amifb.c
drivers/video/fbdev/aty/radeon_monitor.c
drivers/video/fbdev/core/fb_defio.c
drivers/video/fbdev/fsl-diu-fb.c
drivers/video/fbdev/imxfb.c
drivers/video/fbdev/matrox/matroxfb_DAC1064.c
drivers/video/fbdev/matrox/matroxfb_Ti3026.c
drivers/video/fbdev/maxinefb.c
drivers/video/fbdev/mbx/mbxdebugfs.c
drivers/video/fbdev/metronomefb.c
drivers/video/fbdev/offb.c
drivers/video/fbdev/omap/lcd_ams_delta.c
drivers/video/fbdev/omap/lcd_h3.c
drivers/video/fbdev/omap/lcd_htcherald.c
drivers/video/fbdev/omap/lcd_inn1510.c
drivers/video/fbdev/omap/lcd_inn1610.c
drivers/video/fbdev/omap/lcd_osk.c
drivers/video/fbdev/omap/lcd_palmte.c
drivers/video/fbdev/omap/lcd_palmtt.c
drivers/video/fbdev/omap/lcd_palmz71.c
drivers/video/fbdev/omap/omapfb_main.c
drivers/video/fbdev/pmag-ba-fb.c
drivers/video/fbdev/pmagb-b-fb.c
drivers/video/fbdev/sh_mobile_lcdcfb.c
drivers/video/fbdev/simplefb.c
drivers/video/fbdev/ssd1307fb.c
drivers/video/fbdev/stifb.c
drivers/video/fbdev/wm8505fb.c
drivers/virtio/virtio_balloon.c
drivers/watchdog/Kconfig
drivers/watchdog/Makefile
drivers/watchdog/asm9260_wdt.c
drivers/watchdog/aspeed_wdt.c
drivers/watchdog/atlas7_wdt.c
drivers/watchdog/bcm2835_wdt.c
drivers/watchdog/bcm47xx_wdt.c
drivers/watchdog/bcm7038_wdt.c
drivers/watchdog/bcm_kona_wdt.c
drivers/watchdog/booke_wdt.c
drivers/watchdog/cadence_wdt.c
drivers/watchdog/coh901327_wdt.c
drivers/watchdog/da9052_wdt.c
drivers/watchdog/da9055_wdt.c
drivers/watchdog/da9062_wdt.c
drivers/watchdog/da9063_wdt.c
drivers/watchdog/diag288_wdt.c
drivers/watchdog/digicolor_wdt.c
drivers/watchdog/dw_wdt.c
drivers/watchdog/ebc-c384_wdt.c
drivers/watchdog/ep93xx_wdt.c
drivers/watchdog/gemini_wdt.c [new file with mode: 0644]
drivers/watchdog/iTCO_wdt.c
drivers/watchdog/imgpdc_wdt.c
drivers/watchdog/intel-mid_wdt.c
drivers/watchdog/kempld_wdt.c
drivers/watchdog/lantiq_wdt.c
drivers/watchdog/lpc18xx_wdt.c
drivers/watchdog/mena21_wdt.c
drivers/watchdog/meson_wdt.c
drivers/watchdog/mt7621_wdt.c
drivers/watchdog/nic7018_wdt.c [new file with mode: 0644]
drivers/watchdog/orion_wdt.c
drivers/watchdog/pika_wdt.c
drivers/watchdog/rn5t618_wdt.c
drivers/watchdog/rt2880_wdt.c
drivers/watchdog/s3c2410_wdt.c
drivers/watchdog/sa1100_wdt.c
drivers/watchdog/sama5d4_wdt.c
drivers/watchdog/sbsa_gwdt.c
drivers/watchdog/sirfsoc_wdt.c
drivers/watchdog/softdog.c
drivers/watchdog/sun4v_wdt.c
drivers/watchdog/sunxi_wdt.c
drivers/watchdog/tangox_wdt.c
drivers/watchdog/tegra_wdt.c
drivers/watchdog/ts72xx_wdt.c
drivers/watchdog/w83627hf_wdt.c
drivers/watchdog/watchdog_dev.c
drivers/watchdog/wm831x_wdt.c
drivers/watchdog/zx2967_wdt.c [new file with mode: 0644]
drivers/xen/privcmd.c
fs/9p/vfs_file.c
fs/affs/affs.h
fs/affs/amigaffs.c
fs/affs/inode.c
fs/affs/namei.c
fs/affs/super.c
fs/afs/dir.c
fs/aio.c
fs/autofs4/dev-ioctl.c
fs/autofs4/root.c
fs/block_dev.c
fs/btrfs/backref.c
fs/btrfs/btrfs_inode.h
fs/btrfs/compression.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-inode.h
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/export.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/free-space-tree.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/props.c
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/raid56.c
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/tree-log.h
fs/btrfs/ulist.c
fs/btrfs/ulist.h
fs/btrfs/volumes.c
fs/btrfs/xattr.c
fs/buffer.c
fs/ceph/addr.c
fs/ceph/cache.c
fs/ceph/caps.c
fs/ceph/debugfs.c
fs/ceph/dir.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/ioctl.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/super.c
fs/ceph/super.h
fs/cifs/file.c
fs/dax.c
fs/direct-io.c
fs/ecryptfs/kthread.c
fs/eventpoll.c
fs/ext2/file.c
fs/ext4/ext4.h
fs/ext4/extents_status.c
fs/ext4/file.c
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/move_extent.c
fs/f2fs/file.c
fs/fuse/file.c
fs/gfs2/file.c
fs/hfs/mdb.c
fs/hfsplus/wrapper.c
fs/iomap.c
fs/jfs/super.c
fs/kernfs/dir.c
fs/kernfs/file.c
fs/kernfs/kernfs-internal.h
fs/lockd/svc.c
fs/mpage.c
fs/ncpfs/mmap.c
fs/ncpfs/sock.c
fs/nfs/blocklayout/blocklayout.c
fs/nfs/callback_xdr.c
fs/nfs/file.c
fs/nfs/filelayout/filelayout.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/objlayout/objlayout.c
fs/nfsd/blocklayout.c
fs/nfsd/export.c
fs/nfsd/nfs2acl.c
fs/nfsd/nfs3acl.c
fs/nfsd/nfs3proc.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4idmap.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfscache.c
fs/nfsd/nfsctl.c
fs/nfsd/nfsd.h
fs/nfsd/nfsproc.c
fs/nfsd/nfssvc.c
fs/nfsd/state.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/nilfs2/alloc.c
fs/nilfs2/btnode.c
fs/nilfs2/btree.c
fs/nilfs2/file.c
fs/nilfs2/inode.c
fs/nilfs2/mdt.c
fs/nilfs2/segment.c
fs/ocfs2/aops.c
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/file.c
fs/ocfs2/mmap.c
fs/orangefs/devorangefs-req.c
fs/orangefs/inode.c
fs/orangefs/orangefs-bufmap.c
fs/orangefs/orangefs-debugfs.c
fs/orangefs/orangefs-dev-proto.h
fs/orangefs/orangefs-kernel.h
fs/orangefs/orangefs-mod.c
fs/orangefs/orangefs-sysfs.c
fs/orangefs/orangefs-utils.c
fs/orangefs/upcall.h
fs/proc/base.c
fs/proc/generic.c
fs/proc/inode.c
fs/proc/kcore.c
fs/proc/root.c
fs/proc/task_mmu.c
fs/proc/task_nommu.c
fs/proc/vmcore.c
fs/pstore/platform.c
fs/reiserfs/file.c
fs/reiserfs/inode.c
fs/reiserfs/super.c
fs/squashfs/lz4_wrapper.c
fs/stat.c
fs/ubifs/file.c
fs/udf/inode.c
fs/userfaultfd.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_file.c
fs/xfs/xfs_trace.h
include/asm-generic/atomic.h
include/asm-generic/kprobes.h [new file with mode: 0644]
include/asm-generic/pgtable.h
include/asm-generic/tlb.h
include/dt-bindings/clock/bcm2835.h
include/dt-bindings/clock/exynos4415.h [deleted file]
include/dt-bindings/clock/exynos5433.h
include/dt-bindings/clock/hi3660-clock.h [new file with mode: 0644]
include/dt-bindings/clock/imx7d-clock.h
include/dt-bindings/clock/qcom,gcc-ipq4019.h
include/dt-bindings/clock/qcom,gcc-mdm9615.h
include/dt-bindings/clock/qcom,gcc-msm8994.h
include/dt-bindings/clock/qcom,gcc-msm8996.h
include/dt-bindings/clock/qcom,rpmcc.h
include/dt-bindings/clock/rk3188-cru-common.h
include/dt-bindings/clock/rk3288-cru.h
include/dt-bindings/clock/rk3328-cru.h [new file with mode: 0644]
include/dt-bindings/clock/ste-ab8500.h [new file with mode: 0644]
include/dt-bindings/clock/stm32fx-clock.h
include/dt-bindings/clock/sun5i-ccu.h [new file with mode: 0644]
include/dt-bindings/clock/sun8i-v3s-ccu.h [new file with mode: 0644]
include/dt-bindings/clock/sun9i-a80-ccu.h [new file with mode: 0644]
include/dt-bindings/clock/sun9i-a80-de.h [new file with mode: 0644]
include/dt-bindings/clock/sun9i-a80-usb.h [new file with mode: 0644]
include/dt-bindings/reset/sun5i-ccu.h [new file with mode: 0644]
include/dt-bindings/reset/sun8i-v3s-ccu.h [new file with mode: 0644]
include/dt-bindings/reset/sun9i-a80-ccu.h [new file with mode: 0644]
include/dt-bindings/reset/sun9i-a80-de.h [new file with mode: 0644]
include/dt-bindings/reset/sun9i-a80-usb.h [new file with mode: 0644]
include/linux/bio.h
include/linux/blk-mq.h
include/linux/bug.h
include/linux/ceph/osd_client.h
include/linux/ceph/osdmap.h
include/linux/ceph/rados.h
include/linux/cgroup-defs.h
include/linux/cgroup.h
include/linux/cgroup_rdma.h [new file with mode: 0644]
include/linux/cgroup_subsys.h
include/linux/cma.h
include/linux/compat.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/crush/crush.h
include/linux/crush/mapper.h
include/linux/dax.h
include/linux/dcache.h
include/linux/device.h
include/linux/dma-contiguous.h
include/linux/dma-mapping.h
include/linux/fs.h
include/linux/fsl-diu-fb.h
include/linux/gfp.h
include/linux/huge_mm.h
include/linux/i2c.h
include/linux/idr.h
include/linux/iomap.h
include/linux/iopoll.h
include/linux/ipmi.h
include/linux/jump_label.h
include/linux/kasan.h
include/linux/kconfig.h
include/linux/kernel.h
include/linux/kernfs.h
include/linux/kprobes.h
include/linux/lz4.h
include/linux/memblock.h
include/linux/memory.h
include/linux/mfd/tps65910.h
include/linux/mic_bus.h
include/linux/migrate.h
include/linux/mlx4/driver.h
include/linux/mm.h
include/linux/mm_inline.h
include/linux/mm_types.h
include/linux/mmu_notifier.h
include/linux/mmzone.h
include/linux/mtd/qinfo.h
include/linux/netdevice.h
include/linux/nvme-rdma.h
include/linux/nvme.h
include/linux/pfn_t.h
include/linux/pid.h
include/linux/pid_namespace.h
include/linux/platform_data/rtc-m48t86.h [deleted file]
include/linux/platform_data/video-imxfb.h
include/linux/platform_data/x86/clk-pmc-atom.h [new file with mode: 0644]
include/linux/platform_data/x86/pmc_atom.h [new file with mode: 0644]
include/linux/radix-tree.h
include/linux/rbtree_augmented.h
include/linux/refcount.h
include/linux/rmap.h
include/linux/rodata_test.h [new file with mode: 0644]
include/linux/sched.h
include/linux/sed-opal.h
include/linux/sem.h
include/linux/shmem_fs.h
include/linux/spi/flash.h
include/linux/sunrpc/cache.h
include/linux/sunrpc/rpc_rdma.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/svc_rdma.h
include/linux/sunrpc/svc_xprt.h
include/linux/timer.h
include/linux/userfaultfd_k.h
include/linux/watchdog.h
include/linux/workqueue.h
include/linux/writeback.h
include/media/v4l2-ctrls.h
include/net/cfg80211.h
include/net/mac80211.h
include/rdma/ib_verbs.h
include/sound/rt5665.h [changed mode: 0755->0644]
include/trace/events/btrfs.h
include/trace/events/rxrpc.h
include/trace/events/timer.h
include/trace/events/writeback.h
include/uapi/linux/auto_dev-ioctl.h
include/uapi/linux/auto_fs.h
include/uapi/linux/auto_fs4.h
include/uapi/linux/mqueue.h
include/uapi/linux/netfilter.h
include/uapi/linux/netfilter/xt_hashlimit.h
include/uapi/linux/nfsd/export.h
include/uapi/linux/userfaultfd.h
include/xen/arm/hypervisor.h
include/xen/interface/grant_table.h
init/Kconfig
init/initramfs.c
init/main.c
ipc/mqueue.c
ipc/sem.c
ipc/shm.c
kernel/Makefile
kernel/bpf/verifier.c
kernel/cgroup.c [deleted file]
kernel/cgroup/Makefile [new file with mode: 0644]
kernel/cgroup/cgroup-internal.h [new file with mode: 0644]
kernel/cgroup/cgroup-v1.c [new file with mode: 0644]
kernel/cgroup/cgroup.c [new file with mode: 0644]
kernel/cgroup/cpuset.c [new file with mode: 0644]
kernel/cgroup/freezer.c [new file with mode: 0644]
kernel/cgroup/namespace.c [new file with mode: 0644]
kernel/cgroup/pids.c [new file with mode: 0644]
kernel/cgroup/rdma.c [new file with mode: 0644]
kernel/cgroup_freezer.c [deleted file]
kernel/cgroup_pids.c [deleted file]
kernel/configs/android-base.config
kernel/configs/android-recommended.config
kernel/cpuset.c [deleted file]
kernel/events/core.c
kernel/events/uprobes.c
kernel/exit.c
kernel/fork.c
kernel/futex.c
kernel/irq/manage.c
kernel/jump_label.c
kernel/ksysfs.c
kernel/memremap.c
kernel/notifier.c
kernel/panic.c
kernel/relay.c
kernel/sched/core.c
kernel/signal.c
kernel/torture.c
kernel/trace/ftrace.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_benchmark.c
kernel/trace/trace_branch.c
kernel/trace/trace_entries.h
kernel/trace/trace_hwlat.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_probe.c
kernel/trace/trace_uprobe.c
lib/Kconfig
lib/Kconfig.debug
lib/Makefile
lib/atomic64_test.c
lib/crc32.c
lib/crc32test.c [new file with mode: 0644]
lib/decompress_unlz4.c
lib/dma-noop.c
lib/dma-virt.c [new file with mode: 0644]
lib/find_bit.c
lib/fonts/Kconfig
lib/glob.c
lib/globtest.c [new file with mode: 0644]
lib/idr.c
lib/list_debug.c
lib/lz4/Makefile
lib/lz4/lz4_compress.c
lib/lz4/lz4_decompress.c
lib/lz4/lz4defs.h
lib/lz4/lz4hc_compress.c
lib/percpu_counter.c
lib/radix-tree.c
lib/rbtree.c
lib/refcount.c [new file with mode: 0644]
lib/scatterlist.c
lib/sort.c
lib/test_kasan.c
lib/test_sort.c [new file with mode: 0644]
lib/vsprintf.c
mm/Kconfig.debug
mm/Makefile
mm/cma.c
mm/cma_debug.c
mm/compaction.c
mm/dmapool.c
mm/filemap.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/kasan/kasan.c
mm/kasan/quarantine.c
mm/khugepaged.c
mm/ksm.c
mm/madvise.c
mm/memblock.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/memory_hotplug.c
mm/migrate.c
mm/mincore.c
mm/mmap.c
mm/mmu_context.c
mm/mmu_notifier.c
mm/mprotect.c
mm/mremap.c
mm/nommu.c
mm/oom_kill.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_idle.c
mm/page_vma_mapped.c [new file with mode: 0644]
mm/pagewalk.c
mm/percpu.c
mm/pgtable-generic.c
mm/rmap.c
mm/rodata_test.c [new file with mode: 0644]
mm/shmem.c
mm/slab_common.c
mm/swap.c
mm/swapfile.c
mm/truncate.c
mm/userfaultfd.c
mm/util.c
mm/vmalloc.c
mm/vmpressure.c
mm/vmscan.c
mm/workingset.c
mm/z3fold.c
mm/zsmalloc.c
mm/zswap.c
net/appletalk/ddp.c
net/atm/mpc.c
net/bluetooth/hci_sock.c
net/bridge/br_vlan.c
net/bridge/netfilter/ebt_among.c
net/ceph/cls_lock_client.c
net/ceph/crush/crush.c
net/ceph/crush/mapper.c
net/ceph/crypto.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/ceph/snapshot.c
net/core/dev.c
net/dccp/input.c
net/ieee802154/socket.c
net/ipv4/fib_frontend.c
net/ipv4/fib_trie.c
net/ipv4/ipmr.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
net/ipv4/netfilter/nf_log_arp.c
net/ipv4/tcp_input.c
net/ipv6/addrconf.c
net/ipv6/netfilter/nf_log_ipv6.c
net/ipv6/route.c
net/irda/irnet/irnet_ppp.c
net/l2tp/l2tp_core.c
net/mac80211/mesh.c
net/mac80211/status.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_dh.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_sh.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_ftp.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/nft_ct.c
net/netfilter/nft_set_bitmap.c
net/netfilter/x_tables.c
net/rds/ib.c
net/rds/ib.h
net/rds/ib_mr.h
net/rxrpc/af_rxrpc.c
net/rxrpc/ar-internal.h
net/rxrpc/call_accept.c
net/rxrpc/call_object.c
net/rxrpc/input.c
net/rxrpc/recvmsg.c
net/rxrpc/sendmsg.c
net/sctp/input.c
net/sctp/output.c
net/sctp/transport.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/cache.c
net/sunrpc/svc.c
net/sunrpc/svcsock.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
net/sunrpc/xprtrdma/svc_rdma_marshal.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtsock.c
scripts/checkpatch.pl
scripts/checkstack.pl
scripts/recordmcount.pl
scripts/spelling.txt
security/selinux/selinuxfs.c
security/selinux/ss/ebitmap.c
security/selinux/ss/policydb.c
sound/core/pcm_native.c
sound/pci/ac97/ac97_patch.c
sound/pci/cs46xx/cs46xx_dsp_task_types.h
sound/pci/hda/patch_ca0132.c
sound/pci/ice1712/wm8766.c
sound/pci/ice1712/wm8776.c
sound/pci/korg1212/korg1212.c
sound/pci/pcxhr/pcxhr_hwdep.c
sound/pcmcia/vx/vxp_ops.c
sound/ppc/snd_ps3.c
sound/soc/amd/acp-pcm-dma.c
sound/soc/codecs/wm_hubs.c
sound/soc/fsl/fsl_asrc.c
sound/soc/qcom/lpass.h
sound/soc/soc-core.c
sound/soc/soc-topology.c
sound/usb/usx2y/us122l.c
sound/usb/usx2y/usX2Yhwdep.c
sound/usb/usx2y/usx2yhwdeppcm.c
tools/build/Makefile
tools/build/Makefile.include
tools/include/asm-generic/bitops/atomic.h
tools/include/asm/bug.h
tools/include/linux/bitmap.h
tools/include/linux/bitops.h
tools/include/linux/compiler.h
tools/include/linux/spinlock.h [new file with mode: 0644]
tools/lib/bpf/bpf.c
tools/lib/find_bit.c
tools/lib/traceevent/event-parse.c
tools/lib/traceevent/event-parse.h
tools/objtool/arch.h
tools/objtool/arch/x86/decode.c
tools/objtool/builtin-check.c
tools/perf/Documentation/perf-annotate.txt
tools/perf/Documentation/perf-diff.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/tips.txt
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/builtin-annotate.c
tools/perf/builtin-diff.c
tools/perf/builtin-mem.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/pmu-events/json.c
tools/perf/tests/attr.c
tools/perf/tests/builtin-test.c
tools/perf/tests/code-reading.c
tools/perf/tests/fdarray.c
tools/perf/tests/llvm.c
tools/perf/tests/parse-events.c
tools/perf/tests/perf-record.c
tools/perf/tests/python-use.c
tools/perf/tests/thread-map.c
tools/perf/tests/topology.c
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/ui/browsers/map.c
tools/perf/ui/hist.c
tools/perf/util/annotate.c
tools/perf/util/cgroup.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/dso.c
tools/perf/util/env.c
tools/perf/util/header.c
tools/perf/util/hist.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-events.y
tools/perf/util/pmu.c
tools/perf/util/probe-event.c
tools/perf/util/probe-finder.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/setup.py
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/stat.c
tools/perf/util/symbol-elf.c
tools/power/cpupower/utils/cpufreq-info.c
tools/testing/ktest/ktest.pl
tools/testing/radix-tree/.gitignore
tools/testing/radix-tree/Makefile
tools/testing/radix-tree/benchmark.c
tools/testing/radix-tree/generated/autoconf.h
tools/testing/radix-tree/idr-test.c [new file with mode: 0644]
tools/testing/radix-tree/iteration_check.c
tools/testing/radix-tree/linux.c
tools/testing/radix-tree/linux/bitops.h [deleted file]
tools/testing/radix-tree/linux/bitops/__ffs.h [deleted file]
tools/testing/radix-tree/linux/bitops/ffs.h [deleted file]
tools/testing/radix-tree/linux/bitops/ffz.h [deleted file]
tools/testing/radix-tree/linux/bitops/find.h [deleted file]
tools/testing/radix-tree/linux/bitops/fls.h [deleted file]
tools/testing/radix-tree/linux/bitops/fls64.h [deleted file]
tools/testing/radix-tree/linux/bitops/hweight.h [deleted file]
tools/testing/radix-tree/linux/bitops/le.h [deleted file]
tools/testing/radix-tree/linux/bitops/non-atomic.h [deleted file]
tools/testing/radix-tree/linux/export.h [deleted file]
tools/testing/radix-tree/linux/gfp.h
tools/testing/radix-tree/linux/idr.h [new file with mode: 0644]
tools/testing/radix-tree/linux/init.h
tools/testing/radix-tree/linux/kernel.h
tools/testing/radix-tree/linux/mempool.h [deleted file]
tools/testing/radix-tree/linux/percpu.h
tools/testing/radix-tree/linux/preempt.h
tools/testing/radix-tree/linux/radix-tree.h
tools/testing/radix-tree/linux/types.h [deleted file]
tools/testing/radix-tree/main.c
tools/testing/radix-tree/multiorder.c
tools/testing/radix-tree/regression1.c
tools/testing/radix-tree/regression2.c
tools/testing/radix-tree/regression3.c
tools/testing/radix-tree/tag_check.c
tools/testing/radix-tree/test.c
tools/testing/radix-tree/test.h
tools/testing/selftests/Makefile
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/breakpoints/Makefile
tools/testing/selftests/capabilities/Makefile
tools/testing/selftests/cpufreq/Makefile [new file with mode: 0644]
tools/testing/selftests/cpufreq/cpu.sh [new file with mode: 0755]
tools/testing/selftests/cpufreq/cpufreq.sh [new file with mode: 0755]
tools/testing/selftests/cpufreq/governor.sh [new file with mode: 0755]
tools/testing/selftests/cpufreq/main.sh [new file with mode: 0755]
tools/testing/selftests/cpufreq/module.sh [new file with mode: 0755]
tools/testing/selftests/cpufreq/special-tests.sh [new file with mode: 0755]
tools/testing/selftests/efivarfs/Makefile
tools/testing/selftests/exec/Makefile
tools/testing/selftests/ftrace/Makefile
tools/testing/selftests/futex/Makefile
tools/testing/selftests/futex/functional/Makefile
tools/testing/selftests/futex/include/logging.h
tools/testing/selftests/gpio/.gitignore [new file with mode: 0644]
tools/testing/selftests/intel_pstate/Makefile
tools/testing/selftests/intel_pstate/aperf.c
tools/testing/selftests/ipc/.gitignore
tools/testing/selftests/ipc/Makefile
tools/testing/selftests/kcmp/Makefile
tools/testing/selftests/lib.mk
tools/testing/selftests/membarrier/Makefile
tools/testing/selftests/memfd/Makefile
tools/testing/selftests/mount/Makefile
tools/testing/selftests/mqueue/Makefile
tools/testing/selftests/net/Makefile
tools/testing/selftests/nsfs/Makefile
tools/testing/selftests/powerpc/Makefile
tools/testing/selftests/powerpc/alignment/Makefile
tools/testing/selftests/powerpc/benchmarks/Makefile
tools/testing/selftests/powerpc/context_switch/Makefile
tools/testing/selftests/powerpc/copyloops/Makefile
tools/testing/selftests/powerpc/dscr/Makefile
tools/testing/selftests/powerpc/math/Makefile
tools/testing/selftests/powerpc/mm/Makefile
tools/testing/selftests/powerpc/pmu/Makefile
tools/testing/selftests/powerpc/pmu/ebb/Makefile
tools/testing/selftests/powerpc/primitives/Makefile
tools/testing/selftests/powerpc/stringloops/Makefile
tools/testing/selftests/powerpc/switch_endian/Makefile
tools/testing/selftests/powerpc/syscalls/Makefile
tools/testing/selftests/powerpc/tm/Makefile
tools/testing/selftests/powerpc/vphn/Makefile
tools/testing/selftests/pstore/Makefile
tools/testing/selftests/ptrace/Makefile
tools/testing/selftests/seccomp/Makefile
tools/testing/selftests/sigaltstack/Makefile
tools/testing/selftests/sigaltstack/sas.c
tools/testing/selftests/size/Makefile
tools/testing/selftests/timers/Makefile
tools/testing/selftests/vm/Makefile
tools/testing/selftests/vm/userfaultfd.c
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/protection_keys.c
tools/testing/selftests/zram/Makefile
virt/kvm/async_pf.c
virt/kvm/kvm_main.c

index c8a8eb1a2b119c064f038559fa67f6511a31bce6..793acf999e9eac87057af3214ca1f98ad65b922f 100644 (file)
@@ -270,8 +270,8 @@ m68k/
        - directory with info about Linux on Motorola 68k architecture.
 mailbox.txt
        - How to write drivers for the common mailbox framework (IPC).
-md-cluster.txt
-       - info on shared-device RAID MD cluster.
+md/
+       - directory with info about Linux Software RAID
 media/
        - info on media drivers: uAPI, kAPI and driver documentation.
 memory-barriers.txt
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-bq32k b/Documentation/ABI/testing/sysfs-bus-i2c-devices-bq32k
new file mode 100644 (file)
index 0000000..398b258
--- /dev/null
@@ -0,0 +1,7 @@
+What:          /sys/bus/i2c/devices/.../trickle_charge_bypass
+Date:          Jan 2017
+KernelVersion: 4.11
+Contact:       Enric Balletbo i Serra <eballetbo@gmail.com>
+Description:    Attribute for enable/disable the trickle charge bypass
+               The trickle_charge_bypass attribute allows the userspace to
+                enable/disable the Trickle charge FET bypass.
index d7fcdc5a4379216b4279f287763ec938bf7fb2a4..0320910b866db2132a733b7b70d22a5d54c34107 100644 (file)
@@ -1020,7 +1020,7 @@ and other resources, etc.
        </itemizedlist>
 
        <para>
-       Of errors detected as above, the followings are not ATA/ATAPI
+       Of errors detected as above, the following are not ATA/ATAPI
        device errors but ATA bus errors and should be handled
        according to <xref linkend="excatATAbusErr"/>.
        </para>
index 72292308d0f5bb140412345e8f924ff243cd7bdb..6962cab997efd5cf740efd28c2edaa85141acc9c 100644 (file)
@@ -257,7 +257,7 @@ and tell you when they come and go.
 
 Creating the User
 
-To user the message handler, you must first create a user using
+To use the message handler, you must first create a user using
 ipmi_create_user.  The interface number specifies which SMI you want
 to connect to, and you must supply callback functions to be called
 when data comes in.  The callback function can run at interrupt level,
index 5f55373dd53bac20ec24554144fb81dd18e1d64d..a3f598e141f2eca4e844f868e9586dab657ec59d 100644 (file)
@@ -57,7 +57,7 @@ Note: To get the ACPI debug object output (Store (AAAA, Debug)),
 3. undo your changes
    The "undo" operation is not supported for a new inserted method
    right now, i.e. we can not remove a method currently.
-   For an overrided method, in order to undo your changes, please
+   For an overridden method, in order to undo your changes, please
    save a copy of the method original ASL code in step c) section 1,
    and redo step c) ~ g) to override the method with the original one.
 
index c2505eefc878b2a4c94e9d66c9bb459eed802f27..0aba14c8f459353a9d7efbbfda3f9f32a1bb3bbb 100644 (file)
@@ -152,7 +152,7 @@ tracing facility.
        Users can enable/disable this debug tracing feature by executing
        the following command:
            # echo string > /sys/module/acpi/parameters/trace_state
-       Where "string" should be one of the followings:
+       Where "string" should be one of the following:
        "disable"
            Disable the method tracing feature.
        "enable"
index e449fb5f277c25b9b31800561f73a2d2e0d63593..1e61bf50595c84c936cfe8788bb37114a6f7d5f0 100644 (file)
@@ -725,3 +725,8 @@ These currently include:
       to 1.  Setting this to 0 disables bypass accounting and
       requires preread stripes to wait until all full-width stripe-
       writes are complete.  Valid values are 0 to stripe_cache_size.
+
+  journal_mode (currently raid5 only)
+      The cache mode for raid5. raid5 could include an extra disk for
+      caching. The mode can be "write-throuth" and "write-back". The
+      default is "write-through".
index 9939348bd4a3dfae23dc2e3cd4b697dd0c8151dc..1b90c6f00a9290b14e365c5e07eed13b57942ac0 100644 (file)
@@ -81,7 +81,7 @@ That defines some categories of errors:
   still run, eventually replacing the affected hardware by a hot spare,
   if available.
 
-  Also, when an error happens on an userspace process, it is also possible to
+  Also, when an error happens on a userspace process, it is also possible to
   kill such process and let userspace restart it.
 
 The mechanism for handling non-fatal errors is usually complex and may
index 1f610ecf698a9670a478c4db3b01042e315612bd..f7e05055148715f1eca44617fecaf20193c799dd 100644 (file)
@@ -17,7 +17,7 @@ driver and currently works well under standard IDE subsystem. Actually it's
 one chip SSD. IO mode is ATA-like custom mode for the host that doesn't have
 IDE interface.
 
-Followings are brief descriptions about IO mode.
+Following are brief descriptions about IO mode.
 A. IO mode based on ATA protocol and uses some custom command. (read confirm,
 write confirm)
 B. IO mode uses SRAM bus interface.
index 1c0c08d9206b1e145e8a8ccc1a784ac0b19156e7..4fced8a21307517d82b77329aa1705c7569787eb 100644 (file)
@@ -201,8 +201,8 @@ File /sys/block/zram<id>/mm_stat
 The stat file represents device's mm statistics. It consists of a single
 line of text and contains the following stats separated by whitespace:
  orig_data_size   uncompressed size of data stored in this disk.
-                  This excludes zero-filled pages (zero_pages) since no
-                  memory is allocated for them.
+                 This excludes same-element-filled pages (same_pages) since
+                 no memory is allocated for them.
                   Unit: bytes
  compr_data_size  compressed size of data stored in this disk
  mem_used_total   the amount of memory allocated for this disk. This
@@ -214,7 +214,7 @@ line of text and contains the following stats separated by whitespace:
                   the compressed data
  mem_used_max     the maximum amount of memory zram have consumed to
                   store the data
zero_pages       the number of zero filled pages written to this disk.
same_pages       the number of same element filled pages written to this disk.
                   No memory is allocated for such pages.
  pages_compacted  the number of pages freed during compaction
 
diff --git a/Documentation/cgroup-v1/rdma.txt b/Documentation/cgroup-v1/rdma.txt
new file mode 100644 (file)
index 0000000..af61817
--- /dev/null
@@ -0,0 +1,109 @@
+                               RDMA Controller
+                               ----------------
+
+Contents
+--------
+
+1. Overview
+  1-1. What is RDMA controller?
+  1-2. Why RDMA controller needed?
+  1-3. How is RDMA controller implemented?
+2. Usage Examples
+
+1. Overview
+
+1-1. What is RDMA controller?
+-----------------------------
+
+RDMA controller allows user to limit RDMA/IB specific resources that a given
+set of processes can use. These processes are grouped using RDMA controller.
+
+RDMA controller defines two resources which can be limited for processes of a
+cgroup.
+
+1-2. Why RDMA controller needed?
+--------------------------------
+
+Currently user space applications can easily take away all the rdma verb
+specific resources such as AH, CQ, QP, MR etc. Due to which other applications
+in other cgroup or kernel space ULPs may not even get chance to allocate any
+rdma resources. This can leads to service unavailability.
+
+Therefore RDMA controller is needed through which resource consumption
+of processes can be limited. Through this controller different rdma
+resources can be accounted.
+
+1-3. How is RDMA controller implemented?
+----------------------------------------
+
+RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains
+resource accounting per cgroup, per device using resource pool structure.
+Each such resource pool is limited up to 64 resources in given resource pool
+by rdma cgroup, which can be extended later if required.
+
+This resource pool object is linked to the cgroup css. Typically there
+are 0 to 4 resource pool instances per cgroup, per device in most use cases.
+But nothing limits to have it more. At present hundreds of RDMA devices per
+single cgroup may not be handled optimally, however there is no
+known use case or requirement for such configuration either.
+
+Since RDMA resources can be allocated from any process and can be freed by any
+of the child processes which shares the address space, rdma resources are
+always owned by the creator cgroup css. This allows process migration from one
+to other cgroup without major complexity of transferring resource ownership;
+because such ownership is not really present due to shared nature of
+rdma resources. Linking resources around css also ensures that cgroups can be
+deleted after processes migrated. This allow progress migration as well with
+active resources, even though that is not a primary use case.
+
+Whenever RDMA resource charging occurs, owner rdma cgroup is returned to
+the caller. Same rdma cgroup should be passed while uncharging the resource.
+This also allows process migrated with active RDMA resource to charge
+to new owner cgroup for new resource. It also allows to uncharge resource of
+a process from previously charged cgroup which is migrated to new cgroup,
+even though that is not a primary use case.
+
+Resource pool object is created in following situations.
+(a) User sets the limit and no previous resource pool exist for the device
+of interest for the cgroup.
+(b) No resource limits were configured, but IB/RDMA stack tries to
+charge the resource. So that it correctly uncharge them when applications are
+running without limits and later on when limits are enforced during uncharging,
+otherwise usage count will drop to negative.
+
+Resource pool is destroyed if all the resource limits are set to max and
+it is the last resource getting deallocated.
+
+User should set all the limit to max value if it intents to remove/unconfigure
+the resource pool for a particular device.
+
+IB stack honors limits enforced by the rdma controller. When application
+query about maximum resource limits of IB device, it returns minimum of
+what is configured by user for a given cgroup and what is supported by
+IB device.
+
+Following resources can be accounted by rdma controller.
+  hca_handle   Maximum number of HCA Handles
+  hca_object   Maximum number of HCA Objects
+
+2. Usage Examples
+-----------------
+
+(a) Configure resource limit:
+echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
+echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
+
+(b) Query resource limit:
+cat /sys/fs/cgroup/rdma/2/rdma.max
+#Output:
+mlx4_0 hca_handle=2 hca_object=2000
+ocrdma1 hca_handle=3 hca_object=max
+
+(c) Query current usage:
+cat /sys/fs/cgroup/rdma/2/rdma.current
+#Output:
+mlx4_0 hca_handle=1 hca_object=20
+ocrdma1 hca_handle=1 hca_object=23
+
+(d) Delete resource limit:
+echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
index 4cc07ce3b8dd00ee109ef888b05931f4ce6e30e1..3b8449f8ac7e80a0ebeaf6dfe8c64b15503f3954 100644 (file)
@@ -47,6 +47,12 @@ CONTENTS
   5-3. IO
     5-3-1. IO Interface Files
     5-3-2. Writeback
+  5-4. PID
+    5-4-1. PID Interface Files
+  5-5. RDMA
+    5-5-1. RDMA Interface Files
+  5-6. Misc
+    5-6-1. perf_event
 6. Namespace
   6-1. Basics
   6-2. The Root and Views
@@ -328,14 +334,12 @@ a process with a non-root euid to migrate a target process into a
 cgroup by writing its PID to the "cgroup.procs" file, the following
 conditions must be met.
 
-- The writer's euid must match either uid or suid of the target process.
-
 - The writer must have write access to the "cgroup.procs" file.
 
 - The writer must have write access to the "cgroup.procs" file of the
   common ancestor of the source and destination cgroups.
 
-The above three constraints ensure that while a delegatee may migrate
+The above two constraints ensure that while a delegatee may migrate
 processes around freely in the delegated sub-hierarchy it can't pull
 in from or push out to outside the sub-hierarchy.
 
@@ -350,10 +354,10 @@ all processes under C0 and C1 belong to U0.
 
 Let's also say U0 wants to write the PID of a process which is
 currently in C10 into "C00/cgroup.procs".  U0 has write access to the
-file and uid match on the process; however, the common ancestor of the
-source cgroup C10 and the destination cgroup C00 is above the points
-of delegation and U0 would not have write access to its "cgroup.procs"
-files and thus the write will be denied with -EACCES.
+file; however, the common ancestor of the source cgroup C10 and the
+destination cgroup C00 is above the points of delegation and U0 would
+not have write access to its "cgroup.procs" files and thus the write
+will be denied with -EACCES.
 
 
 2-6. Guidelines
@@ -1119,6 +1123,91 @@ writeback as follows.
        vm.dirty[_background]_ratio.
 
 
+5-4. PID
+
+The process number controller is used to allow a cgroup to stop any
+new tasks from being fork()'d or clone()'d after a specified limit is
+reached.
+
+The number of tasks in a cgroup can be exhausted in ways which other
+controllers cannot prevent, thus warranting its own controller.  For
+example, a fork bomb is likely to exhaust the number of tasks before
+hitting memory restrictions.
+
+Note that PIDs used in this controller refer to TIDs, process IDs as
+used by the kernel.
+
+
+5-4-1. PID Interface Files
+
+  pids.max
+
+ A read-write single value file which exists on non-root cgroups.  The
+ default is "max".
+
+ Hard limit of number of processes.
+
+  pids.current
+
+ A read-only single value file which exists on all cgroups.
+
+ The number of processes currently in the cgroup and its descendants.
+
+Organisational operations are not blocked by cgroup policies, so it is
+possible to have pids.current > pids.max.  This can be done by either
+setting the limit to be smaller than pids.current, or attaching enough
+processes to the cgroup such that pids.current is larger than
+pids.max.  However, it is not possible to violate a cgroup PID policy
+through fork() or clone(). These will return -EAGAIN if the creation
+of a new process would cause a cgroup policy to be violated.
+
+
+5-5. RDMA
+
+The "rdma" controller regulates the distribution and accounting of
+of RDMA resources.
+
+5-5-1. RDMA Interface Files
+
+  rdma.max
+       A readwrite nested-keyed file that exists for all the cgroups
+       except root that describes current configured resource limit
+       for a RDMA/IB device.
+
+       Lines are keyed by device name and are not ordered.
+       Each line contains space separated resource name and its configured
+       limit that can be distributed.
+
+       The following nested keys are defined.
+
+         hca_handle    Maximum number of HCA Handles
+         hca_object    Maximum number of HCA Objects
+
+       An example for mlx4 and ocrdma device follows.
+
+         mlx4_0 hca_handle=2 hca_object=2000
+         ocrdma1 hca_handle=3 hca_object=max
+
+  rdma.current
+       A read-only file that describes current resource usage.
+       It exists for all the cgroup except root.
+
+       An example for mlx4 and ocrdma device follows.
+
+         mlx4_0 hca_handle=1 hca_object=20
+         ocrdma1 hca_handle=1 hca_object=23
+
+
+5-6. Misc
+
+5-6-1. perf_event
+
+perf_event controller, if not mounted on a legacy hierarchy, is
+automatically enabled on the v2 hierarchy so that perf events can
+always be filtered by cgroup v2 path.  The controller can still be
+moved to a legacy hierarchy after v2 hierarchy is populated.
+
+
 6. Namespace
 
 6-1. Basics
index 0d199353e4776b60e40062cd3f57f98be6166cb7..cd2cb2fc85ead940394873d941b24a7858ea3336 100644 (file)
@@ -319,7 +319,7 @@ Version History
 1.5.2   'mismatch_cnt' is zero unless [last_]sync_action is "check".
 1.6.0   Add discard support (and devices_handle_discard_safely module param).
 1.7.0   Add support for MD RAID0 mappings.
-1.8.0   Explictely check for compatible flags in the superblock metadata
+1.8.0   Explicitly check for compatible flags in the superblock metadata
        and reject to start the raid set if any are set by a newer
        target version, thus avoiding data corruption on a raid set
        with a reshape in progress.
index e56a1df3a9d3ca7fefbc5058072ee392c49b4cfc..dd906db34b328a581e4f4d99d11284544ff817f4 100644 (file)
@@ -16,7 +16,20 @@ Required properties:
 - #clock-cells:        Should be <1>. The permitted clock-specifier values can be
                  found in include/dt-bindings/clock/bcm2835.h
 - reg:         Specifies base physical address and size of the registers
-- clocks:      The external oscillator clock phandle
+- clocks:      phandles to the parent clocks used as input to the module, in
+                 the following order:
+
+                 - External oscillator
+                 - DSI0 byte clock
+                 - DSI0 DDR2 clock
+                 - DSI0 DDR clock
+                 - DSI1 byte clock
+                 - DSI1 DDR2 clock
+                 - DSI1 DDR clock
+
+                 Only external oscillator is required.  The DSI clocks may
+                 not be present, in which case their children will be
+                 unusable.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/clock/exynos4415-clock.txt b/Documentation/devicetree/bindings/clock/exynos4415-clock.txt
deleted file mode 100644 (file)
index 847d98b..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-* Samsung Exynos4415 Clock Controller
-
-The Exynos4415 clock controller generates and supplies clock to various
-consumer devices within the Exynos4415 SoC.
-
-Required properties:
-
-- compatible: should be one of the following:
-  - "samsung,exynos4415-cmu" - for the main system clocks controller
-    (CMU_LEFTBUS, CMU_RIGHTBUS, CMU_TOP, CMU_CPU clock domains).
-  - "samsung,exynos4415-cmu-dmc" - for the Exynos4415 SoC DRAM Memory
-    Controller (DMC) domain clock controller.
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos4415.h header and can be used in device
-tree sources.
-
-Example 1: An example of a clock controller node is listed below.
-
-       cmu: clock-controller@10030000 {
-               compatible = "samsung,exynos4415-cmu";
-               reg = <0x10030000 0x18000>;
-               #clock-cells = <1>;
-       };
-
-       cmu-dmc: clock-controller@105C0000 {
-               compatible = "samsung,exynos4415-cmu-dmc";
-               reg = <0x105C0000 0x3000>;
-               #clock-cells = <1>;
-       };
diff --git a/Documentation/devicetree/bindings/clock/hi3660-clock.txt b/Documentation/devicetree/bindings/clock/hi3660-clock.txt
new file mode 100644 (file)
index 0000000..cc9b86c
--- /dev/null
@@ -0,0 +1,42 @@
+* Hisilicon Hi3660 Clock Controller
+
+The Hi3660 clock controller generates and supplies clock to various
+controllers within the Hi3660 SoC.
+
+Required Properties:
+
+- compatible: the compatible should be one of the following strings to
+       indicate the clock controller functionality.
+
+       - "hisilicon,hi3660-crgctrl"
+       - "hisilicon,hi3660-pctrl"
+       - "hisilicon,hi3660-pmuctrl"
+       - "hisilicon,hi3660-sctrl"
+       - "hisilicon,hi3660-iomcu"
+
+- reg: physical base address of the controller and length of memory mapped
+  region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/hi3660-clock.h>.
+
+Examples:
+       crg_ctrl: clock-controller@fff35000 {
+               compatible = "hisilicon,hi3660-crgctrl", "syscon";
+               reg = <0x0 0xfff35000 0x0 0x1000>;
+               #clock-cells = <1>;
+       };
+
+       uart0: serial@fdf02000 {
+               compatible = "arm,pl011", "arm,primecell";
+               reg = <0x0 0xfdf02000 0x0 0x1000>;
+               interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&crg_ctrl HI3660_CLK_MUX_UART0>,
+                        <&crg_ctrl HI3660_PCLK>;
+               clock-names = "uartclk", "apb_pclk";
+               status = "disabled";
+       };
diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
new file mode 100644 (file)
index 0000000..87e9c47
--- /dev/null
@@ -0,0 +1,65 @@
+Binding for IDT VersaClock5 programmable i2c clock generator.
+
+The IDT VersaClock5 are programmable i2c clock generators providing
+from 3 to 12 output clocks.
+
+==I2C device node==
+
+Required properties:
+- compatible:  shall be one of "idt,5p49v5923" , "idt,5p49v5933".
+- reg:         i2c device address, shall be 0x68 or 0x6a.
+- #clock-cells:        from common clock binding; shall be set to 1.
+- clocks:      from common clock binding; list of parent clock handles,
+               - 5p49v5923: (required) either or both of XTAL or CLKIN
+                                       reference clock.
+               - 5p49v5933: (optional) property not present (internal
+                                       Xtal used) or CLKIN reference
+                                       clock.
+- clock-names: from common clock binding; clock input names, can be
+               - 5p49v5923: (required) either or both of "xin", "clkin".
+               - 5p49v5933: (optional) property not present or "clkin".
+
+==Mapping between clock specifier and physical pins==
+
+When referencing the provided clock in the DT using phandle and
+clock specifier, the following mapping applies:
+
+5P49V5923:
+       0 -- OUT0_SEL_I2CB
+       1 -- OUT1
+       2 -- OUT2
+
+5P49V5933:
+       0 -- OUT0_SEL_I2CB
+       1 -- OUT1
+       2 -- OUT4
+
+==Example==
+
+/* 25MHz reference crystal */
+ref25: ref25m {
+       compatible = "fixed-clock";
+       #clock-cells = <0>;
+       clock-frequency = <25000000>;
+};
+
+i2c-master-node {
+
+       /* IDT 5P49V5923 i2c clock generator */
+       vc5: clock-generator@6a {
+               compatible = "idt,5p49v5923";
+               reg = <0x6a>;
+               #clock-cells = <1>;
+
+               /* Connect XIN input to 25MHz reference */
+               clocks = <&ref25m>;
+               clock-names = "xin";
+       };
+};
+
+/* Consumer referencing the 5P49V5923 pin OUT1 */
+consumer {
+       ...
+       clocks = <&vc5 1>;
+       ...
+}
index 520562a7dc2abedefd6a04c6e8b2870071136cb2..c7b4e3a6b2c629b6c753e695e611ccb7f9eb2b98 100644 (file)
@@ -7,6 +7,7 @@ Required properties:
 - compatible : must be "marvell,armada-370-corediv-clock",
                       "marvell,armada-375-corediv-clock",
                       "marvell,armada-380-corediv-clock",
+                       "marvell,mv98dx3236-corediv-clock",
 
 - reg : must be the register address of Core Divider control register
 - #clock-cells : from common clock binding; shall be set to 1
index 99c214660bdc7d13b3bedb19df003625b242563a..7f28506eaee77a652e9ccc37cb819e7a7b686489 100644 (file)
@@ -3,6 +3,7 @@ Device Tree Clock bindings for cpu clock of Marvell EBU platforms
 Required properties:
 - compatible : shall be one of the following:
        "marvell,armada-xp-cpu-clock" - cpu clocks for Armada XP
+       "marvell,mv98dx3236-cpu-clock" - cpu clocks for 98DX3236 SoC
 - reg : Address and length of the clock complex register set, followed
         by address and length of the PMU DFS registers
 - #clock-cells : should be set to 1.
index 87d3714b956a281e4e8ea68d086d11a5f9a09516..a7235e9e1c97d38fdc9ff95a036c403dfb583b28 100644 (file)
@@ -11,6 +11,7 @@ Required properties :
                compatible "qcom,rpmcc" should be also included.
 
                        "qcom,rpmcc-msm8916", "qcom,rpmcc"
+                       "qcom,rpmcc-msm8974", "qcom,rpmcc"
                        "qcom,rpmcc-apq8064", "qcom,rpmcc"
 
 - #clock-cells : shall contain 1
index c469194129536332ee88c8d8ed47e9a4d507336c..f4f944d8130818570e1b58eae012be77b4872935 100644 (file)
@@ -42,6 +42,10 @@ Required Properties:
        Domain bindings in
        Documentation/devicetree/bindings/power/power_domain.txt.
 
+  - #reset-cells: Must be 1
+      - The single reset specifier cell must be the module number, as defined
+       in the datasheet.
+
 
 Examples
 --------
@@ -55,6 +59,7 @@ Examples
                clock-names = "extal", "extalr";
                #clock-cells = <2>;
                #power-domain-cells = <0>;
+               #reset-cells = <1>;
        };
 
 
@@ -69,5 +74,6 @@ Examples
                dmas = <&dmac1 0x13>, <&dmac1 0x12>;
                dma-names = "tx", "rx";
                power-domains = <&cpg>;
+               resets = <&cpg 310>;
                status = "disabled";
        };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt
new file mode 100644 (file)
index 0000000..e71c675
--- /dev/null
@@ -0,0 +1,57 @@
+* Rockchip RK3328 Clock and Reset Unit
+
+The RK3328 clock controller generates and supplies clock to various
+controllers within the SoC and also implements a reset controller for SoC
+peripherals.
+
+Required Properties:
+
+- compatible: should be "rockchip,rk3328-cru"
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Optional Properties:
+
+- rockchip,grf: phandle to the syscon managing the "general register files"
+  If missing pll rates are not changeable, due to the missing pll lock status.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/rk3328-cru.h headers and can be
+used in device tree sources. Similar macros exist for the reset sources in
+these files.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "clkin_i2s" - external I2S clock - optional,
+ - "gmac_clkin" - external GMAC clock - optional
+ - "phy_50m_out" - output clock of the pll in the mac phy
+
+Example: Clock controller node:
+
+       cru: clock-controller@ff440000 {
+               compatible = "rockchip,rk3328-cru";
+               reg = <0x0 0xff440000 0x0 0x1000>;
+               rockchip,grf = <&grf>;
+
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+       };
+
+Example: UART controller node that consumes the clock generated by the clock
+  controller:
+
+       uart0: serial@ff120000 {
+               compatible = "snps,dw-apb-uart";
+               reg = <0xff120000 0x100>;
+               interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+               reg-shift = <2>;
+               reg-io-width = <4>;
+               clocks = <&cru SCLK_UART0>;
+       };
index 3888dd33fcbd89cd9fdda39afbf90bdced614e39..3bc56fae90ac98b4cf626e7d433210b952db815a 100644 (file)
@@ -13,6 +13,12 @@ Required Properties:
 - #clock-cells: should be 1.
 - #reset-cells: should be 1.
 
+Optional Properties:
+
+- rockchip,grf: phandle to the syscon managing the "general register files".
+  It is used for GRF muxes, if missing any muxes present in the GRF will not
+  be available.
+
 Each clock is assigned an identifier and client nodes can use this identifier
 to specify the clock which they consume. All available clocks are defined as
 preprocessor macros in the dt-bindings/clock/rk3399-cru.h headers and can be
index 8f19d87cbf2451e4668f918832c94922363c07d6..b240121d2ac940e4eef678b3769f1262a1d3cb1d 100644 (file)
@@ -10,6 +10,7 @@ Required properties:
 - compatible: Should be:
   "st,stm32f42xx-rcc"
   "st,stm32f469-rcc"
+  "st,stm32f746-rcc"
 - reg: should be register base and length as documented in the
   datasheet
 - #reset-cells: 1, see below
@@ -84,6 +85,25 @@ The secondary index is bound with the following magic numbers:
        12      CLK_I2SQ_PDIV   (post divisor of pll i2s q divisor)
        13      CLK_SAIQ_PDIV   (post divisor of pll sai q divisor)
 
+       14      CLK_HSI         (Internal ocscillator clock)
+       15      CLK_SYSCLK      (System Clock)
+       16      CLK_HDMI_CEC    (HDMI-CEC clock)
+       17      CLK_SPDIF       (SPDIF-Rx clock)
+       18      CLK_USART1      (U(s)arts clocks)
+       19      CLK_USART2
+       20      CLK_USART3
+       21      CLK_UART4
+       22      CLK_UART5
+       23      CLK_USART6
+       24      CLK_UART7
+       25      CLK_UART8
+       26      CLK_I2C1        (I2S clocks)
+       27      CLK_I2C2
+       28      CLK_I2C3
+       29      CLK_I2C4
+       30      CLK_LPTIMER     (LPTimer1 clock)
+)
+
 Example:
 
        /* Misc clock, FCLK */
diff --git a/Documentation/devicetree/bindings/clock/stericsson,abx500.txt b/Documentation/devicetree/bindings/clock/stericsson,abx500.txt
new file mode 100644 (file)
index 0000000..dbaa886
--- /dev/null
@@ -0,0 +1,20 @@
+Clock bindings for ST-Ericsson ABx500 clocks
+
+Required properties :
+- compatible : shall contain the following:
+  "stericsson,ab8500-clk"
+- #clock-cells should be <1>
+
+The ABx500 clocks need to be placed as a subnode of an AB8500
+device node, see mfd/ab8500.txt
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/ste-ab8500.h header and can be used in device
+tree sources.
+
+Example:
+
+clock-controller {
+       compatible = "stericsson,ab8500-clk";
+       #clock-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/clock/sun9i-de.txt b/Documentation/devicetree/bindings/clock/sun9i-de.txt
new file mode 100644 (file)
index 0000000..fb18f32
--- /dev/null
@@ -0,0 +1,28 @@
+Allwinner A80 Display Engine Clock Control Binding
+--------------------------------------------------
+
+Required properties :
+- compatible: must contain one of the following compatibles:
+               - "allwinner,sun9i-a80-de-clks"
+
+- reg: Must contain the registers base address and length
+- clocks: phandle to the clocks feeding the display engine subsystem.
+         Three are needed:
+  - "mod": the display engine module clock
+  - "dram": the DRAM bus clock for the system
+  - "bus": the bus clock for the whole display engine subsystem
+- clock-names: Must contain the clock names described just above
+- resets: phandle to the reset control for the display engine subsystem.
+- #clock-cells : must contain 1
+- #reset-cells : must contain 1
+
+Example:
+de_clocks: clock@3000000 {
+       compatible = "allwinner,sun9i-a80-de-clks";
+       reg = <0x03000000 0x30>;
+       clocks = <&ccu CLK_DE>, <&ccu CLK_SDRAM>, <&ccu CLK_BUS_DE>;
+       clock-names = "mod", "dram", "bus";
+       resets = <&ccu RST_BUS_DE>;
+       #clock-cells = <1>;
+       #reset-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/clock/sun9i-usb.txt b/Documentation/devicetree/bindings/clock/sun9i-usb.txt
new file mode 100644 (file)
index 0000000..3564bd4
--- /dev/null
@@ -0,0 +1,24 @@
+Allwinner A80 USB Clock Control Binding
+---------------------------------------
+
+Required properties :
+- compatible: must contain one of the following compatibles:
+               - "allwinner,sun9i-a80-usb-clocks"
+
+- reg: Must contain the registers base address and length
+- clocks: phandle to the clocks feeding the USB subsystem. Two are needed:
+  - "bus": the bus clock for the whole USB subsystem
+  - "hosc": the high frequency oscillator (usually at 24MHz)
+- clock-names: Must contain the clock names described just above
+- #clock-cells : must contain 1
+- #reset-cells : must contain 1
+
+Example:
+usb_clocks: clock@a08000 {
+       compatible = "allwinner,sun9i-a80-usb-clks";
+       reg = <0x00a08000 0x8>;
+       clocks = <&ccu CLK_BUS_USB>, <&osc24M>;
+       clock-names = "bus", "hosc";
+       #clock-cells = <1>;
+       #reset-cells = <1>;
+};
index 74d44a4273f2d19b728989d85e852b1df0f2c2e7..bae5668cf427836642a81ebe2e1d9b8b386bee71 100644 (file)
@@ -7,6 +7,8 @@ Required properties :
                - "allwinner,sun8i-a23-ccu"
                - "allwinner,sun8i-a33-ccu"
                - "allwinner,sun8i-h3-ccu"
+               - "allwinner,sun8i-v3s-ccu"
+               - "allwinner,sun9i-a80-ccu"
                - "allwinner,sun50i-a64-ccu"
 
 - reg: Must contain the registers base address and length
index 4c7669ad681b8e0a1f8f97014e04affc5db39754..0d01f2d5cc36e8cd4aa636f365ed55482c50d1d1 100644 (file)
@@ -1,15 +1,22 @@
-Binding for TO CDCE925 programmable I2C clock synthesizers.
+Binding for TI CDCE913/925/937/949 programmable I2C clock synthesizers.
 
 Reference
 This binding uses the common clock binding[1].
 
 [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] http://www.ti.com/product/cdce925
+[2] http://www.ti.com/product/cdce913
+[3] http://www.ti.com/product/cdce925
+[4] http://www.ti.com/product/cdce937
+[5] http://www.ti.com/product/cdce949
 
 The driver provides clock sources for each output Y1 through Y5.
 
 Required properties:
- - compatible: Shall be "ti,cdce925"
+ - compatible: Shall be one of the following:
+       - "ti,cdce913": 1-PLL, 3 Outputs
+       - "ti,cdce925": 2-PLL, 5 Outputs
+       - "ti,cdce937": 3-PLL, 7 Outputs
+       - "ti,cdce949": 4-PLL, 9 Outputs
  - reg: I2C device address.
  - clocks: Points to a fixed parent clock that provides the input frequency.
  - #clock-cells: From common clock bindings: Shall be 1.
@@ -18,7 +25,7 @@ Optional properties:
  - xtal-load-pf: Crystal load-capacitor value to fine-tune performance on a
                  board, or to compensate for external influences.
 
-For both PLL1 and PLL2 an optional child node can be used to specify spread
+For all PLL1, PLL2, ... an optional child node can be used to specify spread
 spectrum clocking parameters for a board.
   - spread-spectrum: SSC mode as defined in the data sheet.
   - spread-spectrum-center: Use "centered" mode instead of "max" mode. When
index 8c18b7b237bf271169645eca55d9620c717ed3c9..4ad7038084074e405bdeb5a69a4c1fcccb210d6b 100644 (file)
@@ -13,6 +13,9 @@ Required properties:
        "zte,zx296718-lsp1crm":
                zx296718 device level clock selection and gating
 
+       "zte,zx296718-audiocrm":
+               zx296718 audio clock selection, divider and gating
+
 - reg: Address and length of the register set
 
 The clock consumer should specify the desired clock by having the clock
index eb31ed47a2834ceccc16f2f029e8f69f22b39e15..209d931ef16c4e53e46e8378b09f5cf04b121586 100644 (file)
@@ -8,14 +8,15 @@ Required properties:
          0x3c or 0x3d
   - pwm: Should contain the pwm to use according to the OF device tree PWM
          specification [0]. Only required for the ssd1307.
-  - reset-gpios: Should contain the GPIO used to reset the OLED display
   - solomon,height: Height in pixel of the screen driven by the controller
   - solomon,width: Width in pixel of the screen driven by the controller
   - solomon,page-offset: Offset of pages (band of 8 pixels) that the screen is
     mapped to.
 
 Optional properties:
-  - reset-active-low: Is the reset gpio is active on physical low?
+  - reset-gpios: The GPIO used to reset the OLED display, if available. See
+                 Documentation/devicetree/bindings/gpio/gpio.txt for details.
+  - vbat-supply: The supply for VBAT
   - solomon,segment-no-remap: Display needs normal (non-inverted) data column
                               to segment mapping
   - solomon,com-seq: Display uses sequential COM pin configuration
index cf53d5fba20a0934c631b8320e538cbc5440e155..aa097045a10ece3e89c0b1f3ab76e33466aa0615 100644 (file)
@@ -19,7 +19,14 @@ Optional Properties:
   - i2c-mux-idle-disconnect: Boolean; if defined, forces mux to disconnect all
     children in idle state. This is necessary for example, if there are several
     multiplexers on the bus and the devices behind them use same I2C addresses.
-
+  - interrupt-parent: Phandle for the interrupt controller that services
+    interrupts for this device.
+  - interrupts: Interrupt mapping for IRQ.
+  - interrupt-controller: Marks the device node as an interrupt controller.
+  - #interrupt-cells : Should be two.
+    - first cell is the pin number
+    - second cell is used to specify flags.
+    See also Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
 
 Example:
 
@@ -29,6 +36,11 @@ Example:
                #size-cells = <0>;
                reg = <0x74>;
 
+               interrupt-parent = <&ipic>;
+               interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+
                i2c@2 {
                        #address-cells = <1>;
                        #size-cells = <0>;
index 7716acc55decdf86b1c107b1557a7cbe3b975553..ae9c2a735f39fd3ff0a6bd945d5ba64fba470afd 100644 (file)
@@ -10,6 +10,7 @@ Required properties:
                        - "renesas,iic-r8a7793" (R-Car M2-N)
                        - "renesas,iic-r8a7794" (R-Car E2)
                        - "renesas,iic-r8a7795" (R-Car H3)
+                       - "renesas,iic-r8a7796" (R-Car M3-W)
                        - "renesas,iic-sh73a0" (SH-Mobile AG5)
                        - "renesas,rcar-gen2-iic" (generic R-Car Gen2 compatible device)
                        - "renesas,rcar-gen3-iic" (generic R-Car Gen3 compatible device)
diff --git a/Documentation/devicetree/bindings/i2c/i2c-stm32.txt b/Documentation/devicetree/bindings/i2c/i2c-stm32.txt
new file mode 100644 (file)
index 0000000..78eaf7b
--- /dev/null
@@ -0,0 +1,33 @@
+* I2C controller embedded in STMicroelectronics STM32 I2C platform
+
+Required properties :
+- compatible : Must be "st,stm32f4-i2c"
+- reg : Offset and length of the register set for the device
+- interrupts : Must contain the interrupt id for I2C event and then the
+  interrupt id for I2C error.
+- resets: Must contain the phandle to the reset controller.
+- clocks: Must contain the input clock of the I2C instance.
+- A pinctrl state named "default" must be defined to set pins in mode of
+  operation for I2C transfer
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Optional properties :
+- clock-frequency : Desired I2C bus clock frequency in Hz. If not specified,
+  the default 100 kHz frequency will be used. As only Normal and Fast modes
+  are supported, possible values are 100000 and 400000.
+
+Example :
+
+       i2c@40005400 {
+               compatible = "st,stm32f4-i2c";
+               #address-cells = <1>;
+               #size-cells = <0>;
+               reg = <0x40005400 0x400>;
+               interrupts = <31>,
+                            <32>;
+               resets = <&rcc 277>;
+               clocks = <&rcc 0 149>;
+               pinctrl-0 = <&i2c1_sda_pin>, <&i2c1_scl_pin>;
+               pinctrl-names = "default";
+       };
diff --git a/Documentation/devicetree/bindings/i2c/nvidia,tegra186-bpmp-i2c.txt b/Documentation/devicetree/bindings/i2c/nvidia,tegra186-bpmp-i2c.txt
new file mode 100644 (file)
index 0000000..ab240e1
--- /dev/null
@@ -0,0 +1,42 @@
+NVIDIA Tegra186 BPMP I2C controller
+
+In Tegra186, the BPMP (Boot and Power Management Processor) owns certain HW
+devices, such as the I2C controller for the power management I2C bus. Software
+running on other CPUs must perform IPC to the BPMP in order to execute
+transactions on that I2C bus. This binding describes an I2C bus that is
+accessed in such a fashion.
+
+The BPMP I2C node must be located directly inside the main BPMP node. See
+../firmware/nvidia,tegra186-bpmp.txt for details of the BPMP binding.
+
+This node represents an I2C controller. See ../i2c/i2c.txt for details of the
+core I2C binding.
+
+Required properties:
+- compatible:
+    Array of strings.
+    One of:
+    - "nvidia,tegra186-bpmp-i2c".
+- #address-cells: Address cells for I2C device address.
+    Single-cell integer.
+    Must be <1>.
+- #size-cells:
+    Single-cell integer.
+    Must be <0>.
+- nvidia,bpmp-bus-id:
+    Single-cell integer.
+    Indicates the I2C bus number this DT node represent, as defined by the
+    BPMP firmware.
+
+Example:
+
+bpmp {
+       ...
+
+       i2c {
+               compatible = "nvidia,tegra186-bpmp-i2c";
+               #address-cells = <1>;
+               #size-cells = <0>;
+               nvidia,bpmp-bus-id = <5>;
+       };
+};
index 485bc59fcc4876a5fe974181edf62c0fed65df2c..3c91ad430eea3fcc0b725a3e5fc591027d5cfe46 100644 (file)
@@ -234,7 +234,7 @@ see regulator.txt - with additional custom properties described below:
 - qcom,switch-mode-frequency:
        Usage: required
        Value type: <u32>
-       Definition: Frequency (Hz) of the swith mode power supply;
+       Definition: Frequency (Hz) of the switch mode power supply;
                    must be one of:
                    19200000, 9600000, 6400000, 4800000, 3840000, 3200000,
                    2740000, 2400000, 2130000, 1920000, 1750000, 1600000,
index 7aa840c8768d2f699f99d3dcbd0e8983214f9419..ae4234ca4ee44e91fbfc9a4a9fd0e9ba74368ca1 100644 (file)
@@ -1,7 +1,7 @@
 * Marvell Armada 370 / Armada XP / Armada 3700 Ethernet Controller (NETA)
 
 Required properties:
-- compatible: could be one of the followings
+- compatible: could be one of the following:
        "marvell,armada-370-neta"
        "marvell,armada-xp-neta"
        "marvell,armada-3700-neta"
index 9f5ca4457b5f6096f5b0d3596c8ce8eabd804097..ecdcfb790704947f0bb5a3f87d6d54c7107a4b3a 100644 (file)
@@ -136,7 +136,7 @@ Optional properties:
   larger OPP table, based on what version of the hardware we are running on. We
   still can't have multiple nodes with the same opp-hz value in OPP table.
 
-  It's an user defined array containing a hierarchy of hardware version numbers,
+  It's a user defined array containing a hierarchy of hardware version numbers,
   supported by the OPP. For example: a platform with hierarchy of three levels
   of versions (A, B and C), this field should be like <X Y Z>, where X
   corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z
index 7c85dca4221abc71ab873158eef0cbb80d3d31c1..2fd688c8dbdb5ad1de269e2df6ca6261df231148 100644 (file)
@@ -6,7 +6,7 @@ the first two functions being GPIO in and out. The configuration on
 the pins includes drive strength and pull-up.
 
 Required properties:
-- compatible: Should be one of the followings (depending on you SoC):
+- compatible: Should be one of the following (depending on your SoC):
   "allwinner,sun4i-a10-pinctrl"
   "allwinner,sun5i-a10s-pinctrl"
   "allwinner,sun5i-a13-pinctrl"
index 7eb9674e968740cf2a98e24c05784803ebfc70a5..549f7dee9b9de0b33f851ab1bb34008ab9d1a51a 100644 (file)
@@ -23,7 +23,7 @@ Optional Properties:
 - clock-names: The following clocks can be specified:
        - oscclk: Oscillator clock.
        - clkN: Input clocks to the devices in this power domain. These clocks
-               will be reparented to oscclk before swithing power domain off.
+               will be reparented to oscclk before switching power domain off.
                Their original parent will be brought back after turning on
                the domain. Maximum of 4 clocks (N = 0 to 3) are supported.
        - asbN: Clocks required by asynchronous bridges (ASB) present in
index 2eb9d4ee7dc07d579cb25ac0dd135662cd682fe6..c3c9a1226f9aa622702d590f593618506509b3b6 100644 (file)
@@ -1,9 +1,11 @@
-* Real Time Clock of the Armada 38x SoCs
+* Real Time Clock of the Armada 38x/7K/8K SoCs
 
-RTC controller for the Armada 38x SoCs
+RTC controller for the Armada 38x, 7K and 8K SoCs
 
 Required properties:
-- compatible : Should be "marvell,armada-380-rtc"
+- compatible : Should be one of the following:
+       "marvell,armada-380-rtc" for Armada 38x SoC
+       "marvell,armada-8k-rtc" for Aramda 7K/8K SoCs
 - reg: a list of base address and size pairs, one for each entry in
   reg-names
 - reg names: should contain:
diff --git a/Documentation/devicetree/bindings/rtc/cortina,gemini.txt b/Documentation/devicetree/bindings/rtc/cortina,gemini.txt
new file mode 100644 (file)
index 0000000..4ce4e79
--- /dev/null
@@ -0,0 +1,14 @@
+* Cortina Systems Gemini RTC
+
+Gemini SoC real-time clock.
+
+Required properties:
+- compatible : Should be "cortina,gemini-rtc"
+
+Examples:
+
+rtc@45000000 {
+       compatible = "cortina,gemini-rtc";
+       reg = <0x45000000 0x100>;
+       interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+};
index c9d80d7da14129ac656cabadcb8cbca5ba7339b9..323cf26374cb14dff4284fb8cdbe4e27270b8107 100644 (file)
@@ -8,10 +8,13 @@ Required properties:
   region.
 - interrupts: rtc alarm interrupt
 
+Optional properties:
+- interrupts: dryice security violation interrupt
+
 Example:
 
 rtc@80056000 {
        compatible = "fsl,imx53-rtc", "fsl,imx25-rtc";
        reg = <0x80056000 2000>;
-       interrupts = <29>;
+       interrupts = <29 56>;
 };
index 1ad4c1c2b3b378299fe376463f30308e41c5a89e..85be53a421809c0b8e04c6bf1dfa9833ff84a99d 100644 (file)
@@ -1,7 +1,8 @@
 * Maxim DS3231 Real Time Clock
 
 Required properties:
-see: Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst
+- compatible: Should contain "maxim,ds3231".
+- reg: I2C address for chip.
 
 Optional property:
 - #clock-cells: Should be 1.
index 086c998c556108134e111e69067156768d7e77f8..36984acbb383c275c57a25ff0d8cf756292ed45d 100644 (file)
@@ -3,7 +3,8 @@
 Philips PCF8563/Epson RTC8564 Real Time Clock
 
 Required properties:
-see: Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst
+- compatible: Should contain "nxp,pcf8563".
+- reg: I2C address for chip.
 
 Optional property:
 - #clock-cells: Should be 0.
diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt
new file mode 100644 (file)
index 0000000..e2837b9
--- /dev/null
@@ -0,0 +1,27 @@
+STM32 Real Time Clock
+
+Required properties:
+- compatible: "st,stm32-rtc".
+- reg: address range of rtc register set.
+- clocks: reference to the clock entry ck_rtc.
+- interrupt-parent: phandle for the interrupt controller.
+- interrupts: rtc alarm interrupt.
+- st,syscfg: phandle for pwrcfg, mandatory to disable/enable backup domain
+  (RTC registers) write protection.
+
+Optional properties (to override default ck_rtc parent clock):
+- assigned-clocks: reference to the ck_rtc clock entry.
+- assigned-clock-parents: phandle of the new parent clock of ck_rtc.
+
+Example:
+
+       rtc: rtc@40002800 {
+               compatible = "st,stm32-rtc";
+               reg = <0x40002800 0x400>;
+               clocks = <&rcc 1 CLK_RTC>;
+               assigned-clocks = <&rcc 1 CLK_RTC>;
+               assigned-clock-parents = <&rcc 1 CLK_LSE>;
+               interrupt-parent = <&exti>;
+               interrupts = <17 1>;
+               st,syscfg = <&pwrcfg>;
+       };
index f007e428a1ab277b6d38e2dae8c102a0d8ef3920..945934918b71fc127da6729f38b6498884ad5fca 100644 (file)
@@ -8,10 +8,20 @@ Required properties:
                  memory mapped region.
 - interrupts   : IRQ lines for the RTC alarm 0 and alarm 1, in that order.
 
+Required properties for new device trees
+- clocks       : phandle to the 32kHz external oscillator
+- clock-output-names : name of the LOSC clock created
+- #clock-cells  : must be equals to 1. The RTC provides two clocks: the
+                 LOSC and its external output, with index 0 and 1
+                 respectively.
+
 Example:
 
 rtc: rtc@01f00000 {
        compatible = "allwinner,sun6i-a31-rtc";
        reg = <0x01f00000 0x54>;
        interrupts = <0 40 4>, <0 41 4>;
+       clock-output-names = "osc32k";
+       clocks = <&ext_osc32k>;
+       #clock-cells = <1>;
 };
index c6e62cb30712ddc677f47298e2b1a0a27e57955d..a0685c2092184d225cbb25982c8af525f48259ce 100644 (file)
@@ -10,7 +10,7 @@ From RK3368 SoCs, the GRF is divided into two sections,
 
 Required Properties:
 
-- compatible: GRF should be one of the followings
+- compatible: GRF should be one of the following:
    - "rockchip,rk3036-grf", "syscon": for rk3036
    - "rockchip,rk3066-grf", "syscon": for rk3066
    - "rockchip,rk3188-grf", "syscon": for rk3188
@@ -18,7 +18,7 @@ Required Properties:
    - "rockchip,rk3288-grf", "syscon": for rk3288
    - "rockchip,rk3368-grf", "syscon": for rk3368
    - "rockchip,rk3399-grf", "syscon": for rk3399
-- compatible: PMUGRF should be one of the followings
+- compatible: PMUGRF should be one of the following:
    - "rockchip,rk3368-pmugrf", "syscon": for rk3368
    - "rockchip,rk3399-pmugrf", "syscon": for rk3399
 - compatible: SGRF should be one of the following
index 4ea29aa9af59a86d34577329835b8437e72bd86e..a6600f6dea64dd7c22188e05bc1a38e3ff4a0df0 100644 (file)
@@ -5,7 +5,7 @@ audio data transfer between devices in the system.
 
 Required properties:
 
-- compatible: should be one of the followings
+- compatible: should be one of the following:
    - "rockchip,rk3066-i2s": for rk3066
    - "rockchip,rk3188-i2s", "rockchip,rk3066-i2s": for rk3188
    - "rockchip,rk3288-i2s", "rockchip,rk3066-i2s": for rk3288
@@ -17,7 +17,7 @@ Required properties:
        Documentation/devicetree/bindings/dma/dma.txt
 - dma-names: should include "tx" and "rx".
 - clocks: a list of phandle + clock-specifer pairs, one for each entry in clock-names.
-- clock-names: should contain followings:
+- clock-names: should contain the following:
    - "i2s_hclk": clock for I2S BUS
    - "i2s_clk" : clock for I2S controller
 - rockchip,playback-channels: max playback channels, if not set, 8 channels default.
index 3033bd8aab0fdba90ff89c51fddc7ed3a7b4205a..3863531d1e6d52bb4e318f0e691c1314843a2bf0 100644 (file)
@@ -14,7 +14,7 @@ Required properties:
 - dma-names: should include "tx" and "rx".
 - clocks: a list of phandle + clock-specifer pairs, one for each entry
   in clock-names.
-- clock-names: should contain followings:
+- clock-names: should contain the following:
    - "apb": the parent APB clock for this controller
    - "codec": the parent module clock
 
index f4adc58f82baf04897313e2cf3cc981a0dc23e00..ee21da865771f303c9b21a288b5b1a293662aac7 100644 (file)
@@ -5,7 +5,7 @@ audio data transfer between devices in the system.
 
 Required properties:
 
-- compatible: should be one of the followings
+- compatible: should be one of the following:
    - "allwinner,sun4i-a10-i2s"
    - "allwinner,sun6i-a31-i2s"
 - reg: physical base address of the controller and length of memory mapped
@@ -15,7 +15,7 @@ Required properties:
        Documentation/devicetree/bindings/dma/dma.txt
 - dma-names: should include "tx" and "rx".
 - clocks: a list of phandle + clock-specifer pairs, one for each entry in clock-names.
-- clock-names: should contain followings:
+- clock-names: should contain the following:
    - "apb" : clock for the I2S bus interface
    - "mod" : module clock for the I2S controller
 - #sound-dai-cells : Must be equal to 0
diff --git a/Documentation/devicetree/bindings/watchdog/cortina,gemin-watchdog.txt b/Documentation/devicetree/bindings/watchdog/cortina,gemin-watchdog.txt
new file mode 100644 (file)
index 0000000..bc4b865
--- /dev/null
@@ -0,0 +1,17 @@
+Cortina Systems Gemini SoC Watchdog
+
+Required properties:
+- compatible : must be "cortina,gemini-watchdog"
+- reg : shall contain base register location and length
+- interrupts : shall contain the interrupt for the watchdog
+
+Optional properties:
+- timeout-sec : the default watchdog timeout in seconds.
+
+Example:
+
+watchdog@41000000 {
+       compatible = "cortina,gemini-watchdog";
+       reg = <0x41000000 0x1000>;
+       interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+};
index 8f3d96af81d70303e5bb449ecc366a85846bdd00..1f6e101e299a217875ea75d5e423a898be32c33e 100644 (file)
@@ -6,10 +6,11 @@ occurred.
 
 Required properties:
 - compatible : should be one among the following
-       (a) "samsung,s3c2410-wdt" for Exynos4 and previous SoCs
-       (b) "samsung,exynos5250-wdt" for Exynos5250
-       (c) "samsung,exynos5420-wdt" for Exynos5420
-       (c) "samsung,exynos7-wdt" for Exynos7
+       - "samsung,s3c2410-wdt" for S3C2410
+       - "samsung,s3c6410-wdt" for S3C6410, S5PV210 and Exynos4
+       - "samsung,exynos5250-wdt" for Exynos5250
+       - "samsung,exynos5420-wdt" for Exynos5420
+       - "samsung,exynos7-wdt" for Exynos7
 
 - reg : base physical address of the controller and length of memory mapped
        region.
diff --git a/Documentation/devicetree/bindings/watchdog/zte,zx2967-wdt.txt b/Documentation/devicetree/bindings/watchdog/zte,zx2967-wdt.txt
new file mode 100644 (file)
index 0000000..06ce677
--- /dev/null
@@ -0,0 +1,32 @@
+ZTE zx2967 Watchdog timer
+
+Required properties:
+
+- compatible : should be one of the following.
+       * zte,zx296718-wdt
+- reg : Specifies base physical address and size of the registers.
+- clocks : Pairs of phandle and specifier referencing the controller's clocks.
+- resets : Reference to the reset controller controlling the watchdog
+           controller.
+
+Optional properties:
+
+- timeout-sec : Contains the watchdog timeout in seconds.
+- zte,wdt-reset-sysctrl : Directs how to reset system by the watchdog.
+       if we don't want to restart system when watchdog been triggered,
+       it's not required, vice versa.
+       It should include following fields.
+         * phandle of aon-sysctrl.
+         * offset of register that be written, should be 0xb0.
+         * configure value that be written to aon-sysctrl.
+         * bit mask, corresponding bits will be affected.
+
+Example:
+
+wdt: watchdog@1465000 {
+       compatible = "zte,zx296718-wdt";
+       reg = <0x1465000 0x1000>;
+       clocks = <&topcrm WDT_WCLK>;
+       resets = <&toprst 35>;
+       zte,wdt-reset-sysctrl = <&aon_sysctrl 0xb0 1 0x115>;
+};
index 50a3e01a36f80c14b85cb6c1307531a00065a733..e5177cb31a040be01b5bf169e9e06d513596c64a 100644 (file)
@@ -179,6 +179,7 @@ struct autofs_dev_ioctl {
                                 * including this struct */
        __s32 ioctlfd;          /* automount command fd */
 
+       /* Command parameters */
        union {
                struct args_protover            protover;
                struct args_protosubver         protosubver;
index 8fac3fe7b8c971c0c39e54283c2fa0698922aaff..f10dd590f69fe3e9384ebf0b9d9ef87cd91f88e7 100644 (file)
@@ -65,7 +65,7 @@ directory is a mount trap only if the filesystem is mounted *direct*
 and the root is empty.
 
 Directories created in the root directory are mount traps only if the
-filesystem is mounted  *indirect* and they are empty.
+filesystem is mounted *indirect* and they are empty.
 
 Directories further down the tree depend on the *maxproto* mount
 option and particularly whether it is less than five or not.
@@ -352,7 +352,7 @@ Communicating with autofs: root directory ioctls
 ------------------------------------------------
 
 The root directory of an autofs filesystem will respond to a number of
-ioctls.   The process issuing the ioctl must have the CAP_SYS_ADMIN
+ioctls.  The process issuing the ioctl must have the CAP_SYS_ADMIN
 capability, or must be the automount daemon.
 
 The available ioctl commands are:
@@ -425,8 +425,20 @@ Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:
                                          * including this struct */
                 __s32 ioctlfd;          /* automount command fd */
 
-                __u32 arg1;             /* Command parameters */
-                __u32 arg2;
+               /* Command parameters */
+               union {
+                       struct args_protover            protover;
+                       struct args_protosubver         protosubver;
+                       struct args_openmount           openmount;
+                       struct args_ready               ready;
+                       struct args_fail                fail;
+                       struct args_setpipefd           setpipefd;
+                       struct args_timeout             timeout;
+                       struct args_requester           requester;
+                       struct args_expire              expire;
+                       struct args_askumount           askumount;
+                       struct args_ismountpoint        ismountpoint;
+               };
 
                 char path[0];
         };
@@ -446,25 +458,22 @@ Commands are:
     set version numbers.
 - **AUTOFS_DEV_IOCTL_OPENMOUNT_CMD**: return an open file descriptor
     on the root of an autofs filesystem.  The filesystem is identified
-    by name and device number, which is stored in `arg1`.  Device
-    numbers for existing filesystems can be found in
+    by name and device number, which is stored in `openmount.devid`.
+    Device numbers for existing filesystems can be found in
     `/proc/self/mountinfo`.
 - **AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD**: same as `close(ioctlfd)`.
 - **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the filesystem is in
     catatonic mode, this can provide the write end of a new pipe
-    in `arg1` to re-establish communication with a daemon.  The
-    process group of the calling process is used to identify the
+    in `setpipefd.pipefd` to re-establish communication with a daemon.
+    The process group of the calling process is used to identify the
     daemon.
 - **AUTOFS_DEV_IOCTL_REQUESTER_CMD**: `path` should be a
     name within the filesystem that has been auto-mounted on.
-    arg1 is the dev number of the underlying autofs.  On successful
-    return, `arg1` and `arg2` will be the UID and GID of the process
-    which triggered that mount.
-
+    On successful return, `requester.uid` and `requester.gid` will be
+    the UID and GID of the process which triggered that mount.
 - **AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD**: Check if path is a
     mountpoint of a particular type - see separate documentation for
     details.
-
 - **AUTOFS_DEV_IOCTL_PROTOVER_CMD**:
 - **AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD**:
 - **AUTOFS_DEV_IOCTL_READY_CMD**:
@@ -474,7 +483,7 @@ Commands are:
 - **AUTOFS_DEV_IOCTL_EXPIRE_CMD**:
 - **AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD**:  These all have the same
     function as the similarly named **AUTOFS_IOC** ioctls, except
-    that **FAIL** can be given an explicit error number in `arg1`
+    that **FAIL** can be given an explicit error number in `fail.status`
     instead of assuming `ENOENT`, and this **EXPIRE** command
     corresponds to **AUTOFS_IOC_EXPIRE_MULTI**.
 
@@ -512,7 +521,7 @@ always be mounted "shared". e.g.
 
 > `mount --make-shared /autofs/mount/point`
 
-The automount daemon is only able to mange a single mount location for
+The automount daemon is only able to manage a single mount location for
 an autofs filesystem and if mounts on that are not 'shared', other
 locations will not behave as expected.  In particular access to those
 other locations will likely result in the `ELOOP` error
index f5306ee40ea98216602d2214e8944a6aa11edadf..0b302a11718a43fd7ed44725390a5a4ceacb2229 100644 (file)
@@ -98,11 +98,10 @@ Mount Options
        size.
 
   rsize=X
-       Specify the maximum read size in bytes.  By default there is no
-       maximum.
+       Specify the maximum read size in bytes.  Default: 64 MB.
 
   rasize=X
-       Specify the maximum readahead.
+       Specify the maximum readahead.  Default: 8 MB.
 
   mount_timeout=X
        Specify the timeout value for mount (in seconds), in the case
index 29fc015526464608e6f1f1237fceeec11b9ead19..32874b06ebe917b2a17ae492a6c140645ab8167b 100644 (file)
@@ -6,7 +6,7 @@ Quota subsystem allows system administrator to set limits on used space and
 number of used inodes (inode is a filesystem structure which is associated with
 each file or directory) for users and/or groups. For both used space and number
 of used inodes there are actually two limits. The first one is called softlimit
-and the second one hardlimit.  An user can never exceed a hardlimit for any
+and the second one hardlimit.  A user can never exceed a hardlimit for any
 resource (unless he has CAP_SYS_RESOURCE capability). User is allowed to exceed
 softlimit but only for limited period of time. This period is called "grace
 period" or "grace time". When grace time is over, user is not able to allocate
index 1bba38dd263727cac57958e8f26a6ca75092c5e8..820d9040de1640865e6246f6dba6f758abb8e088 100644 (file)
@@ -33,6 +33,7 @@ Supported adapters:
   * Intel DNV (SOC)
   * Intel Broxton (SOC)
   * Intel Lewisburg (PCH)
+  * Intel Gemini Lake (SOC)
    Datasheets: Publicly available at the Intel website
 
 On Intel Patsburg and later chipsets, both the normal host SMBus controller
index d4d91a53fc39b4c4acd99b6964cf3c2129d601ab..7a8d7d261632e6d68a738bbb6395c6054eed44b8 100644 (file)
@@ -1,11 +1,11 @@
-Kernel driver i2c-gpio-mux
+Kernel driver i2c-mux-gpio
 
 Author: Peter Korsgaard <peter.korsgaard@barco.com>
 
 Description
 -----------
 
-i2c-gpio-mux is an i2c mux driver providing access to I2C bus segments
+i2c-mux-gpio is an i2c mux driver providing access to I2C bus segments
 from a master I2C bus and a hardware MUX controlled through GPIO pins.
 
 E.G.:
@@ -26,16 +26,16 @@ according to the settings of the GPIO pins 1..N.
 Usage
 -----
 
-i2c-gpio-mux uses the platform bus, so you need to provide a struct
+i2c-mux-gpio uses the platform bus, so you need to provide a struct
 platform_device with the platform_data pointing to a struct
-gpio_i2cmux_platform_data with the I2C adapter number of the master
+i2c_mux_gpio_platform_data with the I2C adapter number of the master
 bus, the number of bus segments to create and the GPIO pins used
-to control it. See include/linux/i2c-gpio-mux.h for details.
+to control it. See include/linux/i2c-mux-gpio.h for details.
 
 E.G. something like this for a MUX providing 4 bus segments
 controlled through 3 GPIO pins:
 
-#include <linux/i2c-gpio-mux.h>
+#include <linux/i2c-mux-gpio.h>
 #include <linux/platform_device.h>
 
 static const unsigned myboard_gpiomux_gpios[] = {
@@ -46,7 +46,7 @@ static const unsigned myboard_gpiomux_values[] = {
        0, 1, 2, 3
 };
 
-static struct gpio_i2cmux_platform_data myboard_i2cmux_data = {
+static struct i2c_mux_gpio_platform_data myboard_i2cmux_data = {
        .parent         = 1,
        .base_nr        = 2, /* optional */
        .values         = myboard_gpiomux_values,
@@ -57,7 +57,7 @@ static struct gpio_i2cmux_platform_data myboard_i2cmux_data = {
 };
 
 static struct platform_device myboard_i2cmux = {
-       .name           = "i2c-gpio-mux",
+       .name           = "i2c-mux-gpio",
        .id             = 0,
        .dev            = {
                .platform_data  = &myboard_i2cmux_data,
@@ -66,14 +66,14 @@ static struct platform_device myboard_i2cmux = {
 
 If you don't know the absolute GPIO pin numbers at registration time,
 you can instead provide a chip name (.chip_name) and relative GPIO pin
-numbers, and the i2c-gpio-mux driver will do the work for you,
+numbers, and the i2c-mux-gpio driver will do the work for you,
 including deferred probing if the GPIO chip isn't immediately
 available.
 
 Device Registration
 -------------------
 
-When registering your i2c-gpio-mux device, you should pass the number
+When registering your i2c-mux-gpio device, you should pass the number
 of any GPIO pin it uses as the device ID. This guarantees that every
 instance has a different ID.
 
index e5c7254e73d7d4c079638df02b376f81537bea48..5bd5903358392573d068ee457a26cbadfde7ad30 100644 (file)
@@ -59,14 +59,14 @@ Install selftests
 =================
 
 You can use kselftest_install.sh tool installs selftests in default
-location which is tools/testing/selftests/kselftest or an user specified
+location which is tools/testing/selftests/kselftest or a user specified
 location.
 
 To install selftests in default location:
    $ cd tools/testing/selftests
    $ ./kselftest_install.sh
 
-To install selftests in an user specified location:
+To install selftests in a user specified location:
    $ cd tools/testing/selftests
    $ ./kselftest_install.sh install_dir
 
@@ -95,3 +95,15 @@ In general, the rules for selftests are
 
  * Don't cause the top-level "make run_tests" to fail if your feature is
    unconfigured.
+
+Contributing new tests(details)
+===============================
+
+ * Use TEST_GEN_XXX if such binaries or files are generated during
+   compiling.
+   TEST_PROGS, TEST_GEN_PROGS mean it is the excutable tested by
+   default.
+   TEST_PROGS_EXTENDED, TEST_GEN_PROGS_EXTENDED mean it is the
+   executable which is not tested by default.
+   TEST_FILES, TEST_GEN_FILES mean it is the file which is used by
+   test.
diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt
deleted file mode 100644 (file)
index 3888327..0000000
+++ /dev/null
@@ -1,324 +0,0 @@
-The cluster MD is a shared-device RAID for a cluster.
-
-
-1. On-disk format
-
-Separate write-intent-bitmaps are used for each cluster node.
-The bitmaps record all writes that may have been started on that node,
-and may not yet have finished. The on-disk layout is:
-
-0                    4k                     8k                    12k
--------------------------------------------------------------------
-| idle                | md super            | bm super [0] + bits |
-| bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
-| bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
-| bm bits [3, contd]  |                     |                     |
-
-During "normal" functioning we assume the filesystem ensures that only
-one node writes to any given block at a time, so a write request will
-
- - set the appropriate bit (if not already set)
- - commit the write to all mirrors
- - schedule the bit to be cleared after a timeout.
-
-Reads are just handled normally. It is up to the filesystem to ensure
-one node doesn't read from a location where another node (or the same
-node) is writing.
-
-
-2. DLM Locks for management
-
-There are three groups of locks for managing the device:
-
-2.1 Bitmap lock resource (bm_lockres)
-
- The bm_lockres protects individual node bitmaps. They are named in
- the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a
- node joins the cluster, it acquires the lock in PW mode and it stays
- so during the lifetime the node is part of the cluster. The lock
- resource number is based on the slot number returned by the DLM
- subsystem. Since DLM starts node count from one and bitmap slots
- start from zero, one is subtracted from the DLM slot number to arrive
- at the bitmap slot number.
-
- The LVB of the bitmap lock for a particular node records the range
- of sectors that are being re-synced by that node.  No other
- node may write to those sectors.  This is used when a new nodes
- joins the cluster.
-
-2.2 Message passing locks
-
- Each node has to communicate with other nodes when starting or ending
- resync, and for metadata superblock updates.  This communication is
- managed through three locks: "token", "message", and "ack", together
- with the Lock Value Block (LVB) of one of the "message" lock.
-
-2.3 new-device management
-
- A single lock: "no-new-dev" is used to co-ordinate the addition of
- new devices - this must be synchronized across the array.
- Normally all nodes hold a concurrent-read lock on this device.
-
-3. Communication
-
- Messages can be broadcast to all nodes, and the sender waits for all
- other nodes to acknowledge the message before proceeding.  Only one
- message can be processed at a time.
-
-3.1 Message Types
-
- There are six types of messages which are passed:
-
- 3.1.1 METADATA_UPDATED: informs other nodes that the metadata has
-   been updated, and the node must re-read the md superblock. This is
-   performed synchronously. It is primarily used to signal device
-   failure.
-
- 3.1.2 RESYNCING: informs other nodes that a resync is initiated or
-   ended so that each node may suspend or resume the region.  Each
-   RESYNCING message identifies a range of the devices that the
-   sending node is about to resync. This over-rides any pervious
-   notification from that node: only one ranged can be resynced at a
-   time per-node.
-
- 3.1.3 NEWDISK: informs other nodes that a device is being added to
-   the array. Message contains an identifier for that device.  See
-   below for further details.
-
- 3.1.4 REMOVE: A failed or spare device is being removed from the
-   array. The slot-number of the device is included in the message.
-
- 3.1.5 RE_ADD: A failed device is being re-activated - the assumption
-   is that it has been determined to be working again.
-
- 3.1.6 BITMAP_NEEDS_SYNC: if a node is stopped locally but the bitmap
-   isn't clean, then another node is informed to take the ownership of
-   resync.
-
-3.2 Communication mechanism
-
- The DLM LVB is used to communicate within nodes of the cluster. There
- are three resources used for the purpose:
-
-  3.2.1 token: The resource which protects the entire communication
-   system. The node having the token resource is allowed to
-   communicate.
-
-  3.2.2 message: The lock resource which carries the data to
-   communicate.
-
-  3.2.3 ack: The resource, acquiring which means the message has been
-   acknowledged by all nodes in the cluster. The BAST of the resource
-   is used to inform the receiving node that a node wants to
-   communicate.
-
-The algorithm is:
-
- 1. receive status - all nodes have concurrent-reader lock on "ack".
-
-   sender                         receiver                 receiver
-   "ack":CR                       "ack":CR                 "ack":CR
-
- 2. sender get EX on "token"
-    sender get EX on "message"
-    sender                        receiver                 receiver
-    "token":EX                    "ack":CR                 "ack":CR
-    "message":EX
-    "ack":CR
-
-    Sender checks that it still needs to send a message. Messages
-    received or other events that happened while waiting for the
-    "token" may have made this message inappropriate or redundant.
-
- 3. sender writes LVB.
-    sender down-convert "message" from EX to CW
-    sender try to get EX of "ack"
-    [ wait until all receivers have *processed* the "message" ]
-
-                                     [ triggered by bast of "ack" ]
-                                     receiver get CR on "message"
-                                     receiver read LVB
-                                     receiver processes the message
-                                     [ wait finish ]
-                                     receiver releases "ack"
-                                     receiver tries to get PR on "message"
-
-   sender                         receiver                  receiver
-   "token":EX                     "message":CR              "message":CR
-   "message":CW
-   "ack":EX
-
- 4. triggered by grant of EX on "ack" (indicating all receivers
-    have processed message)
-    sender down-converts "ack" from EX to CR
-    sender releases "message"
-    sender releases "token"
-                               receiver upconvert to PR on "message"
-                               receiver get CR of "ack"
-                               receiver release "message"
-
-   sender                      receiver                   receiver
-   "ack":CR                    "ack":CR                   "ack":CR
-
-
-4. Handling Failures
-
-4.1 Node Failure
-
- When a node fails, the DLM informs the cluster with the slot
- number. The node starts a cluster recovery thread. The cluster
- recovery thread:
-
-       - acquires the bitmap<number> lock of the failed node
-       - opens the bitmap
-       - reads the bitmap of the failed node
-       - copies the set bitmap to local node
-       - cleans the bitmap of the failed node
-       - releases bitmap<number> lock of the failed node
-       - initiates resync of the bitmap on the current node
-               md_check_recovery is invoked within recover_bitmaps,
-               then md_check_recovery -> metadata_update_start/finish,
-               it will lock the communication by lock_comm.
-               Which means when one node is resyncing it blocks all
-               other nodes from writing anywhere on the array.
-
- The resync process is the regular md resync. However, in a clustered
- environment when a resync is performed, it needs to tell other nodes
- of the areas which are suspended. Before a resync starts, the node
- send out RESYNCING with the (lo,hi) range of the area which needs to
- be suspended. Each node maintains a suspend_list, which contains the
- list of ranges which are currently suspended. On receiving RESYNCING,
- the node adds the range to the suspend_list. Similarly, when the node
- performing resync finishes, it sends RESYNCING with an empty range to
- other nodes and other nodes remove the corresponding entry from the
- suspend_list.
-
- A helper function, ->area_resyncing() can be used to check if a
- particular I/O range should be suspended or not.
-
-4.2 Device Failure
-
- Device failures are handled and communicated with the metadata update
- routine.  When a node detects a device failure it does not allow
- any further writes to that device until the failure has been
- acknowledged by all other nodes.
-
-5. Adding a new Device
-
- For adding a new device, it is necessary that all nodes "see" the new
- device to be added. For this, the following algorithm is used:
-
-    1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
-       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD)
-    2. Node 1 sends a NEWDISK message with uuid and slot number
-    3. Other nodes issue kobject_uevent_env with uuid and slot number
-       (Steps 4,5 could be a udev rule)
-    4. In userspace, the node searches for the disk, perhaps
-       using blkid -t SUB_UUID=""
-    5. Other nodes issue either of the following depending on whether
-       the disk was found:
-       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
-             disc.number set to slot number)
-       ioctl(CLUSTERED_DISK_NACK)
-    6. Other nodes drop lock on "no-new-devs" (CR) if device is found
-    7. Node 1 attempts EX lock on "no-new-dev"
-    8. If node 1 gets the lock, it sends METADATA_UPDATED after
-       unmarking the disk as SpareLocal
-    9. If not (get "no-new-dev" lock), it fails the operation and sends
-       METADATA_UPDATED.
-   10. Other nodes get the information whether a disk is added or not
-       by the following METADATA_UPDATED.
-
-6. Module interface.
-
- There are 17 call-backs which the md core can make to the cluster
- module.  Understanding these can give a good overview of the whole
- process.
-
-6.1 join(nodes) and leave()
-
- These are called when an array is started with a clustered bitmap,
- and when the array is stopped.  join() ensures the cluster is
- available and initializes the various resources.
- Only the first 'nodes' nodes in the cluster can use the array.
-
-6.2 slot_number()
-
- Reports the slot number advised by the cluster infrastructure.
- Range is from 0 to nodes-1.
-
-6.3 resync_info_update()
-
- This updates the resync range that is stored in the bitmap lock.
- The starting point is updated as the resync progresses.  The
- end point is always the end of the array.
- It does *not* send a RESYNCING message.
-
-6.4 resync_start(), resync_finish()
-
- These are called when resync/recovery/reshape starts or stops.
- They update the resyncing range in the bitmap lock and also
- send a RESYNCING message.  resync_start reports the whole
- array as resyncing, resync_finish reports none of it.
-
- resync_finish() also sends a BITMAP_NEEDS_SYNC message which
- allows some other node to take over.
-
-6.5 metadata_update_start(), metadata_update_finish(),
-    metadata_update_cancel().
-
- metadata_update_start is used to get exclusive access to
- the metadata.  If a change is still needed once that access is
- gained, metadata_update_finish() will send a METADATA_UPDATE
- message to all other nodes, otherwise metadata_update_cancel()
- can be used to release the lock.
-
-6.6 area_resyncing()
-
- This combines two elements of functionality.
-
- Firstly, it will check if any node is currently resyncing
- anything in a given range of sectors.  If any resync is found,
- then the caller will avoid writing or read-balancing in that
- range.
-
- Secondly, while node recovery is happening it reports that
- all areas are resyncing for READ requests.  This avoids races
- between the cluster-filesystem and the cluster-RAID handling
- a node failure.
-
-6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack()
-
- These are used to manage the new-disk protocol described above.
- When a new device is added, add_new_disk_start() is called before
- it is bound to the array and, if that succeeds, add_new_disk_finish()
- is called the device is fully added.
-
- When a device is added in acknowledgement to a previous
- request, or when the device is declared "unavailable",
- new_disk_ack() is called.
-
-6.8 remove_disk()
-
- This is called when a spare or failed device is removed from
- the array.  It causes a REMOVE message to be send to other nodes.
-
-6.9 gather_bitmaps()
-
- This sends a RE_ADD message to all other nodes and then
- gathers bitmap information from all bitmaps.  This combined
- bitmap is then used to recovery the re-added device.
-
-6.10 lock_all_bitmaps() and unlock_all_bitmaps()
-
- These are called when change bitmap to none. If a node plans
- to clear the cluster raid's bitmap, it need to make sure no other
- nodes are using the raid which is achieved by lock all bitmap
- locks within the cluster, and also those locks are unlocked
- accordingly.
-
-7. Unsupported features
-
-There are somethings which are not supported by cluster MD yet.
-
-- update size and change array_sectors.
diff --git a/Documentation/md/md-cluster.txt b/Documentation/md/md-cluster.txt
new file mode 100644 (file)
index 0000000..3888327
--- /dev/null
@@ -0,0 +1,324 @@
+The cluster MD is a shared-device RAID for a cluster.
+
+
+1. On-disk format
+
+Separate write-intent-bitmaps are used for each cluster node.
+The bitmaps record all writes that may have been started on that node,
+and may not yet have finished. The on-disk layout is:
+
+0                    4k                     8k                    12k
+-------------------------------------------------------------------
+| idle                | md super            | bm super [0] + bits |
+| bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
+| bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
+| bm bits [3, contd]  |                     |                     |
+
+During "normal" functioning we assume the filesystem ensures that only
+one node writes to any given block at a time, so a write request will
+
+ - set the appropriate bit (if not already set)
+ - commit the write to all mirrors
+ - schedule the bit to be cleared after a timeout.
+
+Reads are just handled normally. It is up to the filesystem to ensure
+one node doesn't read from a location where another node (or the same
+node) is writing.
+
+
+2. DLM Locks for management
+
+There are three groups of locks for managing the device:
+
+2.1 Bitmap lock resource (bm_lockres)
+
+ The bm_lockres protects individual node bitmaps. They are named in
+ the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a
+ node joins the cluster, it acquires the lock in PW mode and it stays
+ so during the lifetime the node is part of the cluster. The lock
+ resource number is based on the slot number returned by the DLM
+ subsystem. Since DLM starts node count from one and bitmap slots
+ start from zero, one is subtracted from the DLM slot number to arrive
+ at the bitmap slot number.
+
+ The LVB of the bitmap lock for a particular node records the range
+ of sectors that are being re-synced by that node.  No other
+ node may write to those sectors.  This is used when a new nodes
+ joins the cluster.
+
+2.2 Message passing locks
+
+ Each node has to communicate with other nodes when starting or ending
+ resync, and for metadata superblock updates.  This communication is
+ managed through three locks: "token", "message", and "ack", together
+ with the Lock Value Block (LVB) of one of the "message" lock.
+
+2.3 new-device management
+
+ A single lock: "no-new-dev" is used to co-ordinate the addition of
+ new devices - this must be synchronized across the array.
+ Normally all nodes hold a concurrent-read lock on this device.
+
+3. Communication
+
+ Messages can be broadcast to all nodes, and the sender waits for all
+ other nodes to acknowledge the message before proceeding.  Only one
+ message can be processed at a time.
+
+3.1 Message Types
+
+ There are six types of messages which are passed:
+
+ 3.1.1 METADATA_UPDATED: informs other nodes that the metadata has
+   been updated, and the node must re-read the md superblock. This is
+   performed synchronously. It is primarily used to signal device
+   failure.
+
+ 3.1.2 RESYNCING: informs other nodes that a resync is initiated or
+   ended so that each node may suspend or resume the region.  Each
+   RESYNCING message identifies a range of the devices that the
+   sending node is about to resync. This over-rides any pervious
+   notification from that node: only one ranged can be resynced at a
+   time per-node.
+
+ 3.1.3 NEWDISK: informs other nodes that a device is being added to
+   the array. Message contains an identifier for that device.  See
+   below for further details.
+
+ 3.1.4 REMOVE: A failed or spare device is being removed from the
+   array. The slot-number of the device is included in the message.
+
+ 3.1.5 RE_ADD: A failed device is being re-activated - the assumption
+   is that it has been determined to be working again.
+
+ 3.1.6 BITMAP_NEEDS_SYNC: if a node is stopped locally but the bitmap
+   isn't clean, then another node is informed to take the ownership of
+   resync.
+
+3.2 Communication mechanism
+
+ The DLM LVB is used to communicate within nodes of the cluster. There
+ are three resources used for the purpose:
+
+  3.2.1 token: The resource which protects the entire communication
+   system. The node having the token resource is allowed to
+   communicate.
+
+  3.2.2 message: The lock resource which carries the data to
+   communicate.
+
+  3.2.3 ack: The resource, acquiring which means the message has been
+   acknowledged by all nodes in the cluster. The BAST of the resource
+   is used to inform the receiving node that a node wants to
+   communicate.
+
+The algorithm is:
+
+ 1. receive status - all nodes have concurrent-reader lock on "ack".
+
+   sender                         receiver                 receiver
+   "ack":CR                       "ack":CR                 "ack":CR
+
+ 2. sender get EX on "token"
+    sender get EX on "message"
+    sender                        receiver                 receiver
+    "token":EX                    "ack":CR                 "ack":CR
+    "message":EX
+    "ack":CR
+
+    Sender checks that it still needs to send a message. Messages
+    received or other events that happened while waiting for the
+    "token" may have made this message inappropriate or redundant.
+
+ 3. sender writes LVB.
+    sender down-convert "message" from EX to CW
+    sender try to get EX of "ack"
+    [ wait until all receivers have *processed* the "message" ]
+
+                                     [ triggered by bast of "ack" ]
+                                     receiver get CR on "message"
+                                     receiver read LVB
+                                     receiver processes the message
+                                     [ wait finish ]
+                                     receiver releases "ack"
+                                     receiver tries to get PR on "message"
+
+   sender                         receiver                  receiver
+   "token":EX                     "message":CR              "message":CR
+   "message":CW
+   "ack":EX
+
+ 4. triggered by grant of EX on "ack" (indicating all receivers
+    have processed message)
+    sender down-converts "ack" from EX to CR
+    sender releases "message"
+    sender releases "token"
+                               receiver upconvert to PR on "message"
+                               receiver get CR of "ack"
+                               receiver release "message"
+
+   sender                      receiver                   receiver
+   "ack":CR                    "ack":CR                   "ack":CR
+
+
+4. Handling Failures
+
+4.1 Node Failure
+
+ When a node fails, the DLM informs the cluster with the slot
+ number. The node starts a cluster recovery thread. The cluster
+ recovery thread:
+
+       - acquires the bitmap<number> lock of the failed node
+       - opens the bitmap
+       - reads the bitmap of the failed node
+       - copies the set bitmap to local node
+       - cleans the bitmap of the failed node
+       - releases bitmap<number> lock of the failed node
+       - initiates resync of the bitmap on the current node
+               md_check_recovery is invoked within recover_bitmaps,
+               then md_check_recovery -> metadata_update_start/finish,
+               it will lock the communication by lock_comm.
+               Which means when one node is resyncing it blocks all
+               other nodes from writing anywhere on the array.
+
+ The resync process is the regular md resync. However, in a clustered
+ environment when a resync is performed, it needs to tell other nodes
+ of the areas which are suspended. Before a resync starts, the node
+ send out RESYNCING with the (lo,hi) range of the area which needs to
+ be suspended. Each node maintains a suspend_list, which contains the
+ list of ranges which are currently suspended. On receiving RESYNCING,
+ the node adds the range to the suspend_list. Similarly, when the node
+ performing resync finishes, it sends RESYNCING with an empty range to
+ other nodes and other nodes remove the corresponding entry from the
+ suspend_list.
+
+ A helper function, ->area_resyncing() can be used to check if a
+ particular I/O range should be suspended or not.
+
+4.2 Device Failure
+
+ Device failures are handled and communicated with the metadata update
+ routine.  When a node detects a device failure it does not allow
+ any further writes to that device until the failure has been
+ acknowledged by all other nodes.
+
+5. Adding a new Device
+
+ For adding a new device, it is necessary that all nodes "see" the new
+ device to be added. For this, the following algorithm is used:
+
+    1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
+       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD)
+    2. Node 1 sends a NEWDISK message with uuid and slot number
+    3. Other nodes issue kobject_uevent_env with uuid and slot number
+       (Steps 4,5 could be a udev rule)
+    4. In userspace, the node searches for the disk, perhaps
+       using blkid -t SUB_UUID=""
+    5. Other nodes issue either of the following depending on whether
+       the disk was found:
+       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
+             disc.number set to slot number)
+       ioctl(CLUSTERED_DISK_NACK)
+    6. Other nodes drop lock on "no-new-devs" (CR) if device is found
+    7. Node 1 attempts EX lock on "no-new-dev"
+    8. If node 1 gets the lock, it sends METADATA_UPDATED after
+       unmarking the disk as SpareLocal
+    9. If not (get "no-new-dev" lock), it fails the operation and sends
+       METADATA_UPDATED.
+   10. Other nodes get the information whether a disk is added or not
+       by the following METADATA_UPDATED.
+
+6. Module interface.
+
+ There are 17 call-backs which the md core can make to the cluster
+ module.  Understanding these can give a good overview of the whole
+ process.
+
+6.1 join(nodes) and leave()
+
+ These are called when an array is started with a clustered bitmap,
+ and when the array is stopped.  join() ensures the cluster is
+ available and initializes the various resources.
+ Only the first 'nodes' nodes in the cluster can use the array.
+
+6.2 slot_number()
+
+ Reports the slot number advised by the cluster infrastructure.
+ Range is from 0 to nodes-1.
+
+6.3 resync_info_update()
+
+ This updates the resync range that is stored in the bitmap lock.
+ The starting point is updated as the resync progresses.  The
+ end point is always the end of the array.
+ It does *not* send a RESYNCING message.
+
+6.4 resync_start(), resync_finish()
+
+ These are called when resync/recovery/reshape starts or stops.
+ They update the resyncing range in the bitmap lock and also
+ send a RESYNCING message.  resync_start reports the whole
+ array as resyncing, resync_finish reports none of it.
+
+ resync_finish() also sends a BITMAP_NEEDS_SYNC message which
+ allows some other node to take over.
+
+6.5 metadata_update_start(), metadata_update_finish(),
+    metadata_update_cancel().
+
+ metadata_update_start is used to get exclusive access to
+ the metadata.  If a change is still needed once that access is
+ gained, metadata_update_finish() will send a METADATA_UPDATE
+ message to all other nodes, otherwise metadata_update_cancel()
+ can be used to release the lock.
+
+6.6 area_resyncing()
+
+ This combines two elements of functionality.
+
+ Firstly, it will check if any node is currently resyncing
+ anything in a given range of sectors.  If any resync is found,
+ then the caller will avoid writing or read-balancing in that
+ range.
+
+ Secondly, while node recovery is happening it reports that
+ all areas are resyncing for READ requests.  This avoids races
+ between the cluster-filesystem and the cluster-RAID handling
+ a node failure.
+
+6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack()
+
+ These are used to manage the new-disk protocol described above.
+ When a new device is added, add_new_disk_start() is called before
+ it is bound to the array and, if that succeeds, add_new_disk_finish()
+ is called the device is fully added.
+
+ When a device is added in acknowledgement to a previous
+ request, or when the device is declared "unavailable",
+ new_disk_ack() is called.
+
+6.8 remove_disk()
+
+ This is called when a spare or failed device is removed from
+ the array.  It causes a REMOVE message to be send to other nodes.
+
+6.9 gather_bitmaps()
+
+ This sends a RE_ADD message to all other nodes and then
+ gathers bitmap information from all bitmaps.  This combined
+ bitmap is then used to recovery the re-added device.
+
+6.10 lock_all_bitmaps() and unlock_all_bitmaps()
+
+ These are called when change bitmap to none. If a node plans
+ to clear the cluster raid's bitmap, it need to make sure no other
+ nodes are using the raid which is achieved by lock all bitmap
+ locks within the cluster, and also those locks are unlocked
+ accordingly.
+
+7. Unsupported features
+
+There are somethings which are not supported by cluster MD yet.
+
+- update size and change array_sectors.
diff --git a/Documentation/md/raid5-cache.txt b/Documentation/md/raid5-cache.txt
new file mode 100644 (file)
index 0000000..2b210f2
--- /dev/null
@@ -0,0 +1,109 @@
+RAID5 cache
+
+Raid 4/5/6 could include an extra disk for data cache besides normal RAID
+disks. The role of RAID disks isn't changed with the cache disk. The cache disk
+caches data to the RAID disks. The cache can be in write-through (supported
+since 4.4) or write-back mode (supported since 4.10). mdadm (supported since
+3.4) has a new option '--write-journal' to create array with cache. Please
+refer to mdadm manual for details. By default (RAID array starts), the cache is
+in write-through mode. A user can switch it to write-back mode by:
+
+echo "write-back" > /sys/block/md0/md/journal_mode
+
+And switch it back to write-through mode by:
+
+echo "write-through" > /sys/block/md0/md/journal_mode
+
+In both modes, all writes to the array will hit cache disk first. This means
+the cache disk must be fast and sustainable.
+
+-------------------------------------
+write-through mode:
+
+This mode mainly fixes the 'write hole' issue. For RAID 4/5/6 array, an unclean
+shutdown can cause data in some stripes to not be in consistent state, eg, data
+and parity don't match. The reason is that a stripe write involves several RAID
+disks and it's possible the writes don't hit all RAID disks yet before the
+unclean shutdown. We call an array degraded if it has inconsistent data. MD
+tries to resync the array to bring it back to normal state. But before the
+resync completes, any system crash will expose the chance of real data
+corruption in the RAID array. This problem is called 'write hole'.
+
+The write-through cache will cache all data on cache disk first. After the data
+is safe on the cache disk, the data will be flushed onto RAID disks. The
+two-step write will guarantee MD can recover correct data after unclean
+shutdown even the array is degraded. Thus the cache can close the 'write hole'.
+
+In write-through mode, MD reports IO completion to upper layer (usually
+filesystems) after the data is safe on RAID disks, so cache disk failure
+doesn't cause data loss. Of course cache disk failure means the array is
+exposed to 'write hole' again.
+
+In write-through mode, the cache disk isn't required to be big. Several
+hundreds megabytes are enough.
+
+--------------------------------------
+write-back mode:
+
+write-back mode fixes the 'write hole' issue too, since all write data is
+cached on cache disk. But the main goal of 'write-back' cache is to speed up
+write. If a write crosses all RAID disks of a stripe, we call it full-stripe
+write. For non-full-stripe writes, MD must read old data before the new parity
+can be calculated. These synchronous reads hurt write throughput. Some writes
+which are sequential but not dispatched in the same time will suffer from this
+overhead too. Write-back cache will aggregate the data and flush the data to
+RAID disks only after the data becomes a full stripe write. This will
+completely avoid the overhead, so it's very helpful for some workloads. A
+typical workload which does sequential write followed by fsync is an example.
+
+In write-back mode, MD reports IO completion to upper layer (usually
+filesystems) right after the data hits cache disk. The data is flushed to raid
+disks later after specific conditions met. So cache disk failure will cause
+data loss.
+
+In write-back mode, MD also caches data in memory. The memory cache includes
+the same data stored on cache disk, so a power loss doesn't cause data loss.
+The memory cache size has performance impact for the array. It's recommended
+the size is big. A user can configure the size by:
+
+echo "2048" > /sys/block/md0/md/stripe_cache_size
+
+Too small cache disk will make the write aggregation less efficient in this
+mode depending on the workloads. It's recommended to use a cache disk with at
+least several gigabytes size in write-back mode.
+
+--------------------------------------
+The implementation:
+
+The write-through and write-back cache use the same disk format. The cache disk
+is organized as a simple write log. The log consists of 'meta data' and 'data'
+pairs. The meta data describes the data. It also includes checksum and sequence
+ID for recovery identification. Data can be IO data and parity data. Data is
+checksumed too. The checksum is stored in the meta data ahead of the data. The
+checksum is an optimization because MD can write meta and data freely without
+worry about the order. MD superblock has a field pointed to the valid meta data
+of log head.
+
+The log implementation is pretty straightforward. The difficult part is the
+order in which MD writes data to cache disk and RAID disks. Specifically, in
+write-through mode, MD calculates parity for IO data, writes both IO data and
+parity to the log, writes the data and parity to RAID disks after the data and
+parity is settled down in log and finally the IO is finished. Read just reads
+from raid disks as usual.
+
+In write-back mode, MD writes IO data to the log and reports IO completion. The
+data is also fully cached in memory at that time, which means read must query
+memory cache. If some conditions are met, MD will flush the data to RAID disks.
+MD will calculate parity for the data and write parity into the log. After this
+is finished, MD will write both data and parity into RAID disks, then MD can
+release the memory cache. The flush conditions could be stripe becomes a full
+stripe write, free cache disk space is low or free in-kernel memory cache space
+is low.
+
+After an unclean shutdown, MD does recovery. MD reads all meta data and data
+from the log. The sequence ID and checksum will help us detect corrupted meta
+data and data. If MD finds a stripe with data and valid parities (1 parity for
+raid4/5 and 2 for raid6), MD will write the data and parities to RAID disks. If
+parities are incompleted, they are discarded. If part of data is corrupted,
+they are discarded too. MD then loads valid data and writes them to RAID disks
+in normal way.
index 8124bf5ce5ef826436057156d2b2ddd0359d841b..69b07e9d18163f9de676686ab9cb29ff0121ea44 100644 (file)
@@ -20,7 +20,7 @@ existing low level CI API.
 ca_zap
 ~~~~~~
 
-An userspace application, like ``ca_zap`` is required to handle encrypted
+A userspace application, like ``ca_zap`` is required to handle encrypted
 MPEG-TS streams.
 
 The ``ca_zap`` userland application is in charge of sending the
index bf31411fc9dfb290d5eeb3ba24155cf80537ad4c..899fd5c3545ee268e8950db257d9d092e29443ff 100644 (file)
@@ -9,7 +9,7 @@ frontend parameters
 The kind of parameters passed to the frontend device for tuning depend
 on the kind of hardware you are using.
 
-The struct ``dvb_frontend_parameters`` uses an union with specific
+The struct ``dvb_frontend_parameters`` uses a union with specific
 per-system parameters. However, as newer delivery systems required more
 data, the structure size weren't enough to fit, and just extending its
 size would break the existing applications. So, those parameters were
@@ -23,7 +23,7 @@ So, newer applications should use
 instead, in order to be able to support the newer System Delivery like
 DVB-S2, DVB-T2, DVB-C2, ISDB, etc.
 
-All kinds of parameters are combined as an union in the
+All kinds of parameters are combined as a union in the
 FrontendParameters structure:
 
 
index 5de846d3ecc077dc11f70e8c135349c581b6588a..670f3ded0802d83d60338ff91f624499d0ca2d59 100644 (file)
@@ -114,11 +114,11 @@ config options.
     Memory model -> Sparse Memory  (CONFIG_SPARSEMEM)
     Allow for memory hot-add       (CONFIG_MEMORY_HOTPLUG)
 
-- To enable memory removal, the followings are also necessary
+- To enable memory removal, the following are also necessary
     Allow for memory hot remove    (CONFIG_MEMORY_HOTREMOVE)
     Page Migration                 (CONFIG_MIGRATION)
 
-- For ACPI memory hotplug, the followings are also necessary
+- For ACPI memory hotplug, the following are also necessary
     Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
     This option can be kernel module.
 
index a15ea602aa5255dd1d160dfb9f7fff5eb1c8e68e..b9482ca1025457e12a0fed3b7e8e198ed2d18463 100644 (file)
@@ -38,7 +38,7 @@ Basic usage
 ===========
 
 MBIM functions are inactive when unmanaged. The cdc_mbim driver only
-provides an userspace interface to the MBIM control channel, and will
+provides a userspace interface to the MBIM control channel, and will
 not participate in the management of the function. This implies that a
 userspace MBIM management application always is required to enable a
 MBIM function.
@@ -200,7 +200,7 @@ structure described in section 10.5.29 of [1].
 The DSS VLAN subdevices are used as a practical interface between the
 shared MBIM data channel and a MBIM DSS aware userspace application.
 It is not intended to be presented as-is to an end user. The
-assumption is that an userspace application initiating a DSS session
+assumption is that a userspace application initiating a DSS session
 also takes care of the necessary framing of the DSS data, presenting
 the stream to the end user in an appropriate way for the stream type.
 
index 00ffdf187f0b3e85fc9e61898afd70dcf910915a..234ddabb23ef96da3a2c88a348e12eca67c7f5df 100644 (file)
@@ -549,7 +549,7 @@ ii. Reduced by 1 max cmds sent to FW from Driver to make the reply_q_sz same
 3 Older Version   : 00.00.03.02
 
 i.     Send stop adapter to FW & Dump pending FW cmds before declaring adapter dead.
-       New varible added to set dbg level.
+       New variable added to set dbg level.
 ii.    Disable interrupt made as fn pointer as they are different for 1068 / 1078
 iii.   Frame count optimization. Main frame can contain 2 SGE for 64 bit SGLs and
        3 SGE for 32 bit SGL
index 168d0cfab1cecb5e69896ec68b23e48532bdd976..9eeb9b468706ccd25ddc67fa457f5884d7d5a34d 100644 (file)
@@ -697,7 +697,7 @@ If it's a regression, at best, send alsa-info outputs of both working
 and non-working kernels.  This is really helpful because we can
 compare the codec registers directly.
 
-Send a bug report either the followings:
+Send a bug report either the following:
 
 kernel-bugzilla
     https://bugzilla.kernel.org/
diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt
new file mode 100644 (file)
index 0000000..5aa735a
--- /dev/null
@@ -0,0 +1,9 @@
+Steps for sending 'break' on sunhv console:
+===========================================
+
+On Baremetal:
+   1. press   Esc + 'B'
+
+On LDOM:
+   1. press    Ctrl + ']'
+   2. telnet> send  break
index ea8d7b4e53f05c01116f57f45d28da3e9943bfd8..32a25fad0c1b4e6ea3fe4ea67e48ccfbf7a461e5 100644 (file)
@@ -155,7 +155,9 @@ or:
 
 There are a few functions and macros that architectures must implement in order
 to take advantage of this optimization. If there is no architecture support, we
-simply fall back to a traditional, load, test, and jump sequence.
+simply fall back to a traditional, load, test, and jump sequence. Also, the
+struct jump_entry table must be at least 4-byte aligned because the
+static_key->entry field makes use of the two least significant bits.
 
 * select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig
 
index 95ccbe6d79ce6a37efc4f1b4890980974b43c603..b4ad97f10b8e6963878f64ff4828ca7ea56a4316 100644 (file)
@@ -376,8 +376,8 @@ max_map_count:
 
 This file contains the maximum number of memory map areas a process
 may have. Memory map areas are used as a side-effect of calling
-malloc, directly by mmap and mprotect, and also when loading shared
-libraries.
+malloc, directly by mmap, mprotect, and madvise, and also when loading
+shared libraries.
 
 While most applications need less than a thousand maps, certain
 programs, particularly malloc debuggers, may consume lots of them,
index f34a8ee6f8606e62d94b12d5dc9a9c6fb82dd36c..6b0ca7feb13573b7a40a9f3c7c145304cf158979 100644 (file)
@@ -38,6 +38,10 @@ the range for whenever the KSM daemon is started; even if the range
 cannot contain any pages which KSM could actually merge; even if
 MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
 
+If a region of memory must be split into at least one new MADV_MERGEABLE
+or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
+will exceed vm.max_map_count (see Documentation/sysctl/vm.txt).
+
 Like other madvise calls, they are intended for use on mapped areas of
 the user address space: they will report ENOMEM if the specified range
 includes unmapped gaps (though working on the intervening mapped areas),
@@ -80,6 +84,20 @@ run              - set 0 to stop ksmd from running but keep merged pages,
                    Default: 0 (must be changed to 1 to activate KSM,
                                except if CONFIG_SYSFS is disabled)
 
+use_zero_pages   - specifies whether empty pages (i.e. allocated pages
+                   that only contain zeroes) should be treated specially.
+                   When set to 1, empty pages are merged with the kernel
+                   zero page(s) instead of with each other as it would
+                   happen normally. This can improve the performance on
+                   architectures with coloured zero pages, depending on
+                   the workload. Care should be taken when enabling this
+                   setting, as it can potentially degrade the performance
+                   of KSM for some workloads, for example if the checksums
+                   of pages candidate for merging match the checksum of
+                   an empty page. This setting can be changed at any time,
+                   it is only effective for pages merged after the change.
+                   Default: 0 (normal KSM behaviour as in earlier releases)
+
 The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
 
 pages_shared     - how many shared pages are being used
index 70a3c94d19413b33d73b226e1b091698950343fe..0e5543a920e5b2595f4d194462ba722ce25bd80d 100644 (file)
@@ -54,6 +54,26 @@ uffdio_api.features and uffdio_api.ioctls two 64bit bitmasks of
 respectively all the available features of the read(2) protocol and
 the generic ioctl available.
 
+The uffdio_api.features bitmask returned by the UFFDIO_API ioctl
+defines what memory types are supported by the userfaultfd and what
+events, except page fault notifications, may be generated.
+
+If the kernel supports registering userfaultfd ranges on hugetlbfs
+virtual memory areas, UFFD_FEATURE_MISSING_HUGETLBFS will be set in
+uffdio_api.features. Similarly, UFFD_FEATURE_MISSING_SHMEM will be
+set if the kernel supports registering userfaultfd ranges on shared
+memory (covering all shmem APIs, i.e. tmpfs, IPCSHM, /dev/zero
+MAP_SHARED, memfd_create, etc).
+
+The userland application that wants to use userfaultfd with hugetlbfs
+or shared memory need to set the corresponding flag in
+uffdio_api.features to enable those features.
+
+If the userland desires to receive notifications for events other than
+page faults, it has to verify that uffdio_api.features has appropriate
+UFFD_FEATURE_EVENT_* bits set. These events are described in more
+detail below in "Non-cooperative userfaultfd" section.
+
 Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
 be invoked (if present in the returned uffdio_api.ioctls bitmask) to
 register a memory range in the userfaultfd by setting the
@@ -129,7 +149,7 @@ migration thread in the QEMU running in the destination node will
 receive the page that triggered the userfault and it'll map it as
 usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it
 was spontaneously sent by the source or if it was an urgent page
-requested through an userfault).
+requested through a userfault).
 
 By the time the userfaults start, the QEMU in the destination node
 doesn't need to keep any per-page state bitmap relative to the live
@@ -142,3 +162,72 @@ course the bitmap is updated accordingly. It's also useful to avoid
 sending the same page twice (in case the userfault is read by the
 postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
 thread).
+
+== Non-cooperative userfaultfd ==
+
+When the userfaultfd is monitored by an external manager, the manager
+must be able to track changes in the process virtual memory
+layout. Userfaultfd can notify the manager about such changes using
+the same read(2) protocol as for the page fault notifications. The
+manager has to explicitly enable these events by setting appropriate
+bits in uffdio_api.features passed to UFFDIO_API ioctl:
+
+UFFD_FEATURE_EVENT_EXIT - enable notification about exit() of the
+non-cooperative process. When the monitored process exits, the uffd
+manager will get UFFD_EVENT_EXIT.
+
+UFFD_FEATURE_EVENT_FORK - enable userfaultfd hooks for fork(). When
+this feature is enabled, the userfaultfd context of the parent process
+is duplicated into the newly created process. The manager receives
+UFFD_EVENT_FORK with file descriptor of the new userfaultfd context in
+the uffd_msg.fork.
+
+UFFD_FEATURE_EVENT_REMAP - enable notifications about mremap()
+calls. When the non-cooperative process moves a virtual memory area to
+a different location, the manager will receive UFFD_EVENT_REMAP. The
+uffd_msg.remap will contain the old and new addresses of the area and
+its original length.
+
+UFFD_FEATURE_EVENT_REMOVE - enable notifications about
+madvise(MADV_REMOVE) and madvise(MADV_DONTNEED) calls. The event
+UFFD_EVENT_REMOVE will be generated upon these calls to madvise. The
+uffd_msg.remove will contain start and end addresses of the removed
+area.
+
+UFFD_FEATURE_EVENT_UNMAP - enable notifications about memory
+unmapping. The manager will get UFFD_EVENT_UNMAP with uffd_msg.remove
+containing start and end addresses of the unmapped area.
+
+Although the UFFD_FEATURE_EVENT_REMOVE and UFFD_FEATURE_EVENT_UNMAP
+are pretty similar, they quite differ in the action expected from the
+userfaultfd manager. In the former case, the virtual memory is
+removed, but the area is not, the area remains monitored by the
+userfaultfd, and if a page fault occurs in that area it will be
+delivered to the manager. The proper resolution for such page fault is
+to zeromap the faulting address. However, in the latter case, when an
+area is unmapped, either explicitly (with munmap() system call), or
+implicitly (e.g. during mremap()), the area is removed and in turn the
+userfaultfd context for such area disappears too and the manager will
+not get further userland page faults from the removed area. Still, the
+notification is required in order to prevent manager from using
+UFFDIO_COPY on the unmapped area.
+
+Unlike userland page faults which have to be synchronous and require
+explicit or implicit wakeup, all the events are delivered
+asynchronously and the non-cooperative process resumes execution as
+soon as manager executes read(). The userfaultfd manager should
+carefully synchronize calls to UFFDIO_COPY with the events
+processing. To aid the synchronization, the UFFDIO_COPY ioctl will
+return -ENOSPC when the monitored process exits at the time of
+UFFDIO_COPY, and -ENOENT, when the non-cooperative process has changed
+its virtual memory layout simultaneously with outstanding UFFDIO_COPY
+operation.
+
+The current asynchronous model of the event delivery is optimal for
+single threaded non-cooperative userfaultfd manager implementations. A
+synchronous event delivery model can be added later as a new
+userfaultfd feature to facilitate multithreading enhancements of the
+non cooperative manager, for example to allow UFFDIO_COPY ioctls to
+run in parallel to the event reception. Single threaded
+implementations should continue to use the current async event
+delivery model instead.
index ea277478982f5aef1e122f33f55140e8c5010fc4..9b93953f69cf866289fd8e49632860185bda1e6c 100644 (file)
@@ -280,6 +280,12 @@ To disable the watchdog on reboot, the user must call the following helper:
 
 static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd);
 
+To disable the watchdog when unregistering the watchdog, the user must call
+the following helper. Note that this will only stop the watchdog if the
+nowayout flag is not set.
+
+static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd);
+
 To change the priority of the restart handler the following helper should be
 used:
 
index e21850e270a0555417c0615f128da609e2e1e1ce..4f7d86dd0a5d88802d85bb68ccf1f93715003a42 100644 (file)
@@ -209,6 +209,11 @@ timeout: Initial watchdog timeout in seconds (0<timeout<516, default=60)
 nowayout: Watchdog cannot be stopped once started
        (default=kernel config parameter)
 -------------------------------------------------
+nic7018_wdt:
+timeout: Initial watchdog timeout in seconds (0<timeout<464, default=80)
+nowayout: Watchdog cannot be stopped once started
+       (default=kernel config parameter)
+-------------------------------------------------
 nuc900_wdt:
 heartbeat: Watchdog heartbeats in seconds.
        (default = 15)
index d918d268cd72bd6e8c7ec839dc9235e75233a72b..51cf6fa5591f9e99d95e73212e19372821ae4d7f 100644 (file)
@@ -212,3 +212,117 @@ Finally we move core 4-7 over to the new group and make sure that the
 kernel and the tasks running there get 50% of the cache.
 
 # echo C0 > p0/cpus
+
+4) Locking between applications
+
+Certain operations on the resctrl filesystem, composed of read/writes
+to/from multiple files, must be atomic.
+
+As an example, the allocation of an exclusive reservation of L3 cache
+involves:
+
+  1. Read the cbmmasks from each directory
+  2. Find a contiguous set of bits in the global CBM bitmask that is clear
+     in any of the directory cbmmasks
+  3. Create a new directory
+  4. Set the bits found in step 2 to the new directory "schemata" file
+
+If two applications attempt to allocate space concurrently then they can
+end up allocating the same bits so the reservations are shared instead of
+exclusive.
+
+To coordinate atomic operations on the resctrlfs and to avoid the problem
+above, the following locking procedure is recommended:
+
+Locking is based on flock, which is available in libc and also as a shell
+script command
+
+Write lock:
+
+ A) Take flock(LOCK_EX) on /sys/fs/resctrl
+ B) Read/write the directory structure.
+ C) funlock
+
+Read lock:
+
+ A) Take flock(LOCK_SH) on /sys/fs/resctrl
+ B) If success read the directory structure.
+ C) funlock
+
+Example with bash:
+
+# Atomically read directory structure
+$ flock -s /sys/fs/resctrl/ find /sys/fs/resctrl
+
+# Read directory contents and create new subdirectory
+
+$ cat create-dir.sh
+find /sys/fs/resctrl/ > output.txt
+mask = function-of(output.txt)
+mkdir /sys/fs/resctrl/newres/
+echo mask > /sys/fs/resctrl/newres/schemata
+
+$ flock /sys/fs/resctrl/ ./create-dir.sh
+
+Example with C:
+
+/*
+ * Example code do take advisory locks
+ * before accessing resctrl filesystem
+ */
+#include <sys/file.h>
+#include <stdlib.h>
+
+void resctrl_take_shared_lock(int fd)
+{
+       int ret;
+
+       /* take shared lock on resctrl filesystem */
+       ret = flock(fd, LOCK_SH);
+       if (ret) {
+               perror("flock");
+               exit(-1);
+       }
+}
+
+void resctrl_take_exclusive_lock(int fd)
+{
+       int ret;
+
+       /* release lock on resctrl filesystem */
+       ret = flock(fd, LOCK_EX);
+       if (ret) {
+               perror("flock");
+               exit(-1);
+       }
+}
+
+void resctrl_release_lock(int fd)
+{
+       int ret;
+
+       /* take shared lock on resctrl filesystem */
+       ret = flock(fd, LOCK_UN);
+       if (ret) {
+               perror("flock");
+               exit(-1);
+       }
+}
+
+void main(void)
+{
+       int fd, ret;
+
+       fd = open("/sys/fs/resctrl", O_DIRECTORY);
+       if (fd == -1) {
+               perror("open");
+               exit(-1);
+       }
+       resctrl_take_shared_lock(fd);
+       /* code to read directory contents */
+       resctrl_release_lock(fd);
+
+       resctrl_take_exclusive_lock(fd);
+       /* code to read and write directory contents */
+       resctrl_release_lock(fd);
+}
index 4b03c4701030202c7ed91e2db62885e9d915e8ea..5a00239fd7b311b579dd8d6f004a768fe506f5c6 100644 (file)
@@ -3452,6 +3452,7 @@ B:        https://bugzilla.kernel.org
 F:     Documentation/cpu-freq/
 F:     drivers/cpufreq/
 F:     include/linux/cpufreq.h
+F:     tools/testing/selftests/cpufreq/
 
 CPU FREQUENCY DRIVERS - ARM BIG LITTLE
 M:     Viresh Kumar <viresh.kumar@linaro.org>
@@ -6011,9 +6012,8 @@ F:        include/linux/hsi/
 F:     include/uapi/linux/hsi/
 
 HSO 3G MODEM DRIVER
-M:     Jan Dumon <j.dumon@option.com>
-W:     http://www.pharscape.org
-S:     Maintained
+L:     linux-usb@vger.kernel.org
+S:     Orphan
 F:     drivers/net/usb/hso.c
 
 HSR NETWORK PROTOCOL
@@ -6270,6 +6270,11 @@ S:       Maintained
 F:     drivers/mfd/lpc_ich.c
 F:     drivers/gpio/gpio-ich.c
 
+IDT VersaClock 5 CLOCK DRIVER
+M:     Marek Vasut <marek.vasut@gmail.com>
+S:     Maintained
+F:     drivers/clk/clk-versaclock5.c
+
 IDE SUBSYSTEM
 M:     "David S. Miller" <davem@davemloft.net>
 L:     linux-ide@vger.kernel.org
@@ -7280,6 +7285,7 @@ M:        Masami Hiramatsu <mhiramat@kernel.org>
 S:     Maintained
 F:     Documentation/kprobes.txt
 F:     include/linux/kprobes.h
+F:     include/asm-generic/kprobes.h
 F:     kernel/kprobes.c
 
 KS0108 LCD CONTROLLER DRIVER
@@ -9315,6 +9321,7 @@ OPENRISC ARCHITECTURE
 M:     Jonas Bonn <jonas@southpole.se>
 M:     Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
 M:     Stafford Horne <shorne@gmail.com>
+T:     git git://github.com/openrisc/linux.git
 L:     openrisc@lists.librecores.org
 W:     http://openrisc.io
 S:     Maintained
@@ -11450,6 +11457,14 @@ F:     drivers/media/usb/siano/
 F:     drivers/media/usb/siano/
 F:     drivers/media/mmc/siano/
 
+SILEAD TOUCHSCREEN DRIVER
+M:     Hans de Goede <hdegoede@redhat.com>
+L:     linux-input@vger.kernel.org
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     drivers/input/touchscreen/silead.c
+F:     drivers/platform/x86/silead_dmi.c
+
 SIMPLEFB FB DRIVER
 M:     Hans de Goede <hdegoede@redhat.com>
 L:     linux-fbdev@vger.kernel.org
index b83109b5d217cc2652aba6cf99c6b4f92a5b8cea..4cb6b0a1152b5f57f783f0afa64207e956112c42 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -910,6 +910,18 @@ mod_sign_cmd = true
 endif
 export mod_sign_cmd
 
+ifdef CONFIG_STACK_VALIDATION
+  has_libelf := $(call try-run,\
+               echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf -,1,0)
+  ifeq ($(has_libelf),1)
+    objtool_target := tools/objtool FORCE
+  else
+    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
+    SKIP_STACK_VALIDATION := 1
+    export SKIP_STACK_VALIDATION
+  endif
+endif
+
 
 ifeq ($(KBUILD_EXTMOD),)
 core-y         += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
@@ -1037,18 +1049,6 @@ prepare0: archprepare gcc-plugins
 # All the preparing..
 prepare: prepare0 prepare-objtool
 
-ifdef CONFIG_STACK_VALIDATION
-  has_libelf := $(call try-run,\
-               echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf -,1,0)
-  ifeq ($(has_libelf),1)
-    objtool_target := tools/objtool FORCE
-  else
-    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
-    SKIP_STACK_VALIDATION := 1
-    export SKIP_STACK_VALIDATION
-  endif
-endif
-
 PHONY += prepare-objtool
 prepare-objtool: $(objtool_target)
 
index f761142976e589a0b40d8225d32677d0f985bfff..cd211a14a88f7774bec3abbe3b371f0302e624a0 100644 (file)
@@ -29,7 +29,7 @@ config OPROFILE_EVENT_MULTIPLEX
          The number of hardware counters is limited. The multiplexing
          feature enables OProfile to gather more events than counters
          are provided by the hardware. This is realized by switching
-         between events at an user specified time interval.
+         between events at a user specified time interval.
 
          If unsure, say N.
 
@@ -571,6 +571,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
        bool
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+       bool
+
 config HAVE_ARCH_HUGE_VMAP
        bool
 
index baa152b9348e7dd230472c0218817bb97e89255e..d103db5af5ffdd19fbbc897e6050171b884e6d32 100644 (file)
@@ -9,3 +9,5 @@ generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
+generic-y += current.h
+generic-y += kprobes.h
diff --git a/arch/alpha/include/asm/current.h b/arch/alpha/include/asm/current.h
deleted file mode 100644 (file)
index 094d285..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _ALPHA_CURRENT_H
-#define _ALPHA_CURRENT_H
-
-#include <linux/thread_info.h>
-
-#define get_current()  (current_thread_info()->task)
-#define current                get_current()
-
-#endif /* _ALPHA_CURRENT_H */
index c63b6ac19ee5c8141e4499dff8bf2f5c3f858bb5..5d53666935e6bacf733bb4c03d46c1c241618ff0 100644 (file)
@@ -1,9 +1,9 @@
 #ifndef _ALPHA_DMA_MAPPING_H
 #define _ALPHA_DMA_MAPPING_H
 
-extern struct dma_map_ops *dma_ops;
+extern const struct dma_map_ops *dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return dma_ops;
 }
index bb152e21e5ae89960be20e51d37f7de4695b50cf..ffbdb3fb672f6b6d6e4f23a5d47fc519fb07203e 100644 (file)
@@ -128,7 +128,7 @@ static int alpha_noop_supported(struct device *dev, u64 mask)
        return mask < 0x00ffffffUL ? 0 : 1;
 }
 
-struct dma_map_ops alpha_noop_ops = {
+const struct dma_map_ops alpha_noop_ops = {
        .alloc                  = alpha_noop_alloc_coherent,
        .free                   = dma_noop_free_coherent,
        .map_page               = dma_noop_map_page,
@@ -137,5 +137,5 @@ struct dma_map_ops alpha_noop_ops = {
        .dma_supported          = alpha_noop_supported,
 };
 
-struct dma_map_ops *dma_ops = &alpha_noop_ops;
+const struct dma_map_ops *dma_ops = &alpha_noop_ops;
 EXPORT_SYMBOL(dma_ops);
index 451fc9cdd323a7bccc2c0f4f687b45192a288a9c..7fd2329038a3ef1664de33f4f9759ea15c56e7c7 100644 (file)
@@ -939,7 +939,7 @@ static int alpha_pci_mapping_error(struct device *dev, dma_addr_t dma_addr)
        return dma_addr == 0;
 }
 
-struct dma_map_ops alpha_pci_ops = {
+const struct dma_map_ops alpha_pci_ops = {
        .alloc                  = alpha_pci_alloc_coherent,
        .free                   = alpha_pci_free_coherent,
        .map_page               = alpha_pci_map_page,
@@ -950,5 +950,5 @@ struct dma_map_ops alpha_pci_ops = {
        .dma_supported          = alpha_pci_supported,
 };
 
-struct dma_map_ops *dma_ops = &alpha_pci_ops;
+const struct dma_map_ops *dma_ops = &alpha_pci_ops;
 EXPORT_SYMBOL(dma_ops);
index 46bf263c315318cabb1c1530e6584f2060512497..acb4b146a607959c3916d4c10690a3cf079e5e57 100644 (file)
@@ -144,7 +144,7 @@ smp_callin(void)
                alpha_mv.smp_callin();
 
        /* All kernel threads share the same mm context.  */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
        /* inform the notifiers about the new cpu */
index 266f11c9bd593e58775d12a5dac214817da3110e..94285031c4fb4c516649d658bfa1ba4cbdb7b1ac 100644 (file)
@@ -18,9 +18,9 @@
 #include <plat/dma.h>
 #endif
 
-extern struct dma_map_ops arc_dma_ops;
+extern const struct dma_map_ops arc_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &arc_dma_ops;
 }
index 944dbedb38b5949fdb61ebb6c59eff44b17642f0..00bdbe167615ec2d97c7bccec66595217741cd83 100644 (file)
@@ -9,6 +9,8 @@
 #ifndef _ARC_KPROBES_H
 #define _ARC_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
 #ifdef CONFIG_KPROBES
 
 typedef u16 kprobe_opcode_t;
@@ -55,6 +57,6 @@ void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
 static void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
 {
 }
-#endif
+#endif /* CONFIG_KPROBES */
 
-#endif
+#endif /* _ARC_KPROBES_H */
index 2afbafadb6ab529ebaa7af4e367d733fc7837e1e..b8e8d394448137e1efc7ce3581860b94f1f7f9b9 100644 (file)
@@ -139,8 +139,8 @@ void start_kernel_secondary(void)
        /* MMU, Caches, Vector Table, Interrupts etc */
        setup_processor();
 
-       atomic_inc(&mm->mm_users);
-       atomic_inc(&mm->mm_count);
+       mmget(mm);
+       mmgrab(mm);
        current->active_mm = mm;
        cpumask_set_cpu(cpu, mm_cpumask(mm));
 
index 61fd1ce63c56ac499130c6ffcf87471b0398fe06..b6e4f7a7419b3215d8b24103ee5a035ee50c96c7 100644 (file)
@@ -1051,9 +1051,9 @@ int arc_unwind(struct unwind_frame_info *frame)
                ++ptr;
        }
        if (cie != NULL) {
-               /* get code aligment factor */
+               /* get code alignment factor */
                state.codeAlign = get_uleb128(&ptr, end);
-               /* get data aligment factor */
+               /* get data alignment factor */
                state.dataAlign = get_sleb128(&ptr, end);
                if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
                        cie = NULL;
index 08450a1a5b5fd06f0cc0ef74b2a7de317473f6c8..2a07e6ecafbd768bcdca7c09fa4cc29bc1a6ff65 100644 (file)
@@ -218,7 +218,7 @@ static int arc_dma_supported(struct device *dev, u64 dma_mask)
        return dma_mask == DMA_BIT_MASK(32);
 }
 
-struct dma_map_ops arc_dma_ops = {
+const struct dma_map_ops arc_dma_ops = {
        .alloc                  = arc_dma_alloc,
        .free                   = arc_dma_free,
        .mmap                   = arc_dma_mmap,
index fda6a46d27cfe2bda02257a8a8b30d316cd14612..0d4e71b42c77da986a2dc471a1f335be5e958527 100644 (file)
@@ -2,6 +2,7 @@ config ARM
        bool
        default y
        select ARCH_CLOCKSOURCE_DATA
+       select ARCH_HAS_DEBUG_VIRTUAL
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_SET_MEMORY
index aed66d5df7f1546ac48cf93ee9c4f2aeb91d5b76..b7576349528c12a8ef7533b6b66bc9a68b8dc9f9 100644 (file)
@@ -34,8 +34,7 @@ config PROCESSOR_ID
          used instead of the auto-probing which utilizes the register.
 
 config REMAP_VECTORS_TO_RAM
-       bool 'Install vectors to the beginning of RAM' if DRAM_BASE
-       depends on DRAM_BASE
+       bool 'Install vectors to the beginning of RAM'
        help
          The kernel needs to change the hardware exception vectors.
          In nommu mode, the hardware exception vectors are normally
index a0765e7ed6c7dd2166b2cb95874fc076532d526c..ea7832702a8f4473ffbc90e5c1c58271fab1dd9a 100644 (file)
@@ -32,6 +32,7 @@ extern void error(char *);
 
 /* Not needed, but used in some headers pulled in by decompressors */
 extern char * strstr(const char * s1, const char *s2);
+extern size_t strlen(const char *s);
 
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"
index fc6d541549a2c53b38b3f64ce48eebabebba9a0f..9150f9732785aaec77d069f8d92ba6d65e941e2c 100644 (file)
@@ -1196,7 +1196,7 @@ skip:
                bgt     loop1
 finished:
                ldmfd   sp!, {r0-r7, r9-r11}
-               mov     r10, #0                 @ swith back to cache level 0
+               mov     r10, #0                 @ switch back to cache level 0
                mcr     p15, 2, r10, c0, c0, 0  @ select current cache level in cssr
 iflush:
                mcr     p15, 0, r10, c7, c10, 4 @ DSB
index 62b3ffe62df2a30cac25687afb207d658fc7e0af..24b0f5f556f8a99ea3bd34fdc70b70213e06baa7 100644 (file)
@@ -65,8 +65,9 @@
                        compatible = "allwinner,simple-framebuffer",
                                     "simple-framebuffer";
                        allwinner,pipeline = "de_be0-lcd0-hdmi";
-                       clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
-                                <&ahb_gates 43>, <&ahb_gates 44>;
+                       clocks = <&ccu CLK_AHB_LCD>, <&ccu CLK_AHB_HDMI>,
+                                <&ccu CLK_AHB_DE_BE>, <&ccu CLK_DRAM_DE_BE>,
+                                <&ccu CLK_DE_BE>, <&ccu CLK_HDMI>;
                        status = "disabled";
                };
 
@@ -74,8 +75,8 @@
                        compatible = "allwinner,simple-framebuffer",
                                     "simple-framebuffer";
                        allwinner,pipeline = "de_be0-lcd0";
-                       clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
-                                <&ahb_gates 44>;
+                       clocks = <&ccu CLK_AHB_LCD>, <&ccu CLK_AHB_DE_BE>, <&ccu CLK_DE_BE>,
+                                <&ccu CLK_TCON_CH0>, <&ccu CLK_DRAM_DE_BE>;
                        status = "disabled";
                };
 
                        compatible = "allwinner,simple-framebuffer",
                                     "simple-framebuffer";
                        allwinner,pipeline = "de_be0-lcd0-tve0";
-                       clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>,
-                                <&ahb_gates 36>, <&ahb_gates 44>;
+                       clocks = <&ccu CLK_AHB_TVE>, <&ccu CLK_AHB_LCD>,
+                                <&ccu CLK_AHB_DE_BE>, <&ccu CLK_DE_BE>,
+                                <&ccu CLK_TCON_CH1>, <&ccu CLK_DRAM_DE_BE>;
                        status = "disabled";
                };
        };
 
-       clocks {
-               ahb_gates: clk@01c20060 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a10s-ahb-gates-clk";
-                       reg = <0x01c20060 0x8>;
-                       clocks = <&ahb>;
-                       clock-indices = <0>, <1>,
-                                       <2>, <5>, <6>,
-                                       <7>, <8>, <9>,
-                                       <10>, <13>,
-                                       <14>, <17>, <18>,
-                                       <20>, <21>, <22>,
-                                       <26>, <28>, <32>,
-                                       <34>, <36>, <40>,
-                                       <43>, <44>,
-                                       <46>, <51>,
-                                       <52>;
-                       clock-output-names = "ahb_usbotg", "ahb_ehci",
-                                            "ahb_ohci", "ahb_ss", "ahb_dma",
-                                            "ahb_bist", "ahb_mmc0", "ahb_mmc1",
-                                            "ahb_mmc2", "ahb_nand",
-                                            "ahb_sdram", "ahb_emac", "ahb_ts",
-                                            "ahb_spi0", "ahb_spi1", "ahb_spi2",
-                                            "ahb_gps", "ahb_stimer", "ahb_ve",
-                                            "ahb_tve", "ahb_lcd", "ahb_csi",
-                                            "ahb_hdmi", "ahb_de_be",
-                                            "ahb_de_fe", "ahb_iep",
-                                            "ahb_mali400";
-               };
-
-               apb0_gates: clk@01c20068 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a10s-apb0-gates-clk";
-                       reg = <0x01c20068 0x4>;
-                       clocks = <&apb0>;
-                       clock-indices = <0>, <3>,
-                                       <5>, <6>,
-                                       <10>;
-                       clock-output-names = "apb0_codec", "apb0_iis",
-                                            "apb0_pio", "apb0_ir",
-                                            "apb0_keypad";
-               };
-
-               apb1_gates: clk@01c2006c {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a10s-apb1-gates-clk";
-                       reg = <0x01c2006c 0x4>;
-                       clocks = <&apb1>;
-                       clock-indices = <0>, <1>,
-                                       <2>, <16>,
-                                       <17>, <18>,
-                                       <19>;
-                       clock-output-names = "apb1_i2c0", "apb1_i2c1",
-                                            "apb1_i2c2", "apb1_uart0",
-                                            "apb1_uart1", "apb1_uart2",
-                                            "apb1_uart3";
-               };
-       };
-
        soc@01c00000 {
                emac: ethernet@01c0b000 {
                        compatible = "allwinner,sun4i-a10-emac";
                        reg = <0x01c0b000 0x1000>;
                        interrupts = <55>;
-                       clocks = <&ahb_gates 17>;
+                       clocks = <&ccu CLK_AHB_EMAC>;
                        allwinner,sram = <&emac_sram 1>;
                        status = "disabled";
                };
                pwm: pwm@01c20e00 {
                        compatible = "allwinner,sun5i-a10s-pwm";
                        reg = <0x01c20e00 0xc>;
-                       clocks = <&osc24M>;
+                       clocks = <&ccu CLK_HOSC>;
                        #pwm-cells = <3>;
                        status = "disabled";
                };
                        interrupts = <1>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 16>;
+                       clocks = <&ccu CLK_APB1_UART0>;
                        status = "disabled";
                };
 
                        interrupts = <3>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 18>;
+                       clocks = <&ccu CLK_APB1_UART2>;
                        status = "disabled";
                };
        };
 };
 
+&ccu {
+       compatible = "allwinner,sun5i-a10s-ccu";
+};
+
 &pio {
        compatible = "allwinner,sun5i-a10s-pinctrl";
 
index 4131ab44558bac931a3ccba70e30da9a7f8f069c..fb2ddb9a04c9e8487da6a14975aa9d8ec0c34f70 100644 (file)
@@ -61,8 +61,8 @@
                        compatible = "allwinner,simple-framebuffer",
                                     "simple-framebuffer";
                        allwinner,pipeline = "de_be0-lcd0";
-                       clocks = <&ahb_gates 36>, <&ahb_gates 44>, <&de_be_clk>,
-                                <&tcon_ch0_clk>, <&dram_gates 26>;
+                       clocks = <&ccu CLK_AHB_LCD>, <&ccu CLK_AHB_DE_BE>, <&ccu CLK_DE_BE>,
+                                <&ccu CLK_TCON_CH0>, <&ccu CLK_DRAM_DE_BE>;
                        status = "disabled";
                };
        };
                };
        };
 
-       clocks {
-               ahb_gates: clk@01c20060 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-ahb-gates-clk";
-                       reg = <0x01c20060 0x8>;
-                       clocks = <&ahb>;
-                       clock-indices = <0>, <1>,
-                                       <2>, <5>, <6>,
-                                       <7>, <8>, <9>,
-                                       <10>, <13>,
-                                       <14>, <20>,
-                                       <21>, <22>,
-                                       <28>, <32>, <34>,
-                                       <36>, <40>, <44>,
-                                       <46>, <51>,
-                                       <52>;
-                       clock-output-names = "ahb_usbotg", "ahb_ehci",
-                                            "ahb_ohci", "ahb_ss", "ahb_dma",
-                                            "ahb_bist", "ahb_mmc0", "ahb_mmc1",
-                                            "ahb_mmc2", "ahb_nand",
-                                            "ahb_sdram", "ahb_spi0",
-                                            "ahb_spi1", "ahb_spi2",
-                                            "ahb_stimer", "ahb_ve", "ahb_tve",
-                                            "ahb_lcd", "ahb_csi", "ahb_de_be",
-                                            "ahb_de_fe", "ahb_iep",
-                                            "ahb_mali400";
-               };
-
-               apb0_gates: clk@01c20068 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-apb0-gates-clk";
-                       reg = <0x01c20068 0x4>;
-                       clocks = <&apb0>;
-                       clock-indices = <0>, <5>,
-                                       <6>;
-                       clock-output-names = "apb0_codec", "apb0_pio",
-                                            "apb0_ir";
-               };
-
-               apb1_gates: clk@01c2006c {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-apb1-gates-clk";
-                       reg = <0x01c2006c 0x4>;
-                       clocks = <&apb1>;
-                       clock-indices = <0>, <1>,
-                                       <2>, <17>,
-                                       <19>;
-                       clock-output-names = "apb1_i2c0", "apb1_i2c1",
-                                            "apb1_i2c2", "apb1_uart1",
-                                            "apb1_uart3";
-               };
-
-               dram_gates: clk@01c20100 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-dram-gates-clk",
-                                    "allwinner,sun4i-a10-gates-clk";
-                       reg = <0x01c20100 0x4>;
-                       clocks = <&pll5 0>;
-                       clock-indices = <0>,
-                                       <1>,
-                                       <25>,
-                                       <26>,
-                                       <29>,
-                                       <31>;
-                       clock-output-names = "dram_ve",
-                                            "dram_csi",
-                                            "dram_de_fe",
-                                            "dram_de_be",
-                                            "dram_ace",
-                                            "dram_iep";
-               };
-
-               de_be_clk: clk@01c20104 {
-                       #clock-cells = <0>;
-                       #reset-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-display-clk";
-                       reg = <0x01c20104 0x4>;
-                       clocks = <&pll3>, <&pll7>, <&pll5 1>;
-                       clock-output-names = "de-be";
-               };
-
-               de_fe_clk: clk@01c2010c {
-                       #clock-cells = <0>;
-                       #reset-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-display-clk";
-                       reg = <0x01c2010c 0x4>;
-                       clocks = <&pll3>, <&pll7>, <&pll5 1>;
-                       clock-output-names = "de-fe";
-               };
-
-               tcon_ch0_clk: clk@01c20118 {
-                       #clock-cells = <0>;
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-tcon-ch0-clk";
-                       reg = <0x01c20118 0x4>;
-                       clocks = <&pll3>, <&pll7>, <&pll3x2>, <&pll7x2>;
-                       clock-output-names = "tcon-ch0-sclk";
-               };
-
-               tcon_ch1_clk: clk@01c2012c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-tcon-ch1-clk";
-                       reg = <0x01c2012c 0x4>;
-                       clocks = <&pll3>, <&pll7>, <&pll3x2>, <&pll7x2>;
-                       clock-output-names = "tcon-ch1-sclk";
-               };
-       };
-
        display-engine {
                compatible = "allwinner,sun5i-a13-display-engine";
                allwinner,pipelines = <&fe0>;
                        compatible = "allwinner,sun5i-a13-tcon";
                        reg = <0x01c0c000 0x1000>;
                        interrupts = <44>;
-                       resets = <&tcon_ch0_clk 1>;
+                       resets = <&ccu RST_LCD>;
                        reset-names = "lcd";
-                       clocks = <&ahb_gates 36>,
-                                <&tcon_ch0_clk>,
-                                <&tcon_ch1_clk>;
+                       clocks = <&ccu CLK_AHB_LCD>,
+                                <&ccu CLK_TCON_CH0>,
+                                <&ccu CLK_TCON_CH1>;
                        clock-names = "ahb",
                                      "tcon-ch0",
                                      "tcon-ch1";
                pwm: pwm@01c20e00 {
                        compatible = "allwinner,sun5i-a13-pwm";
                        reg = <0x01c20e00 0xc>;
-                       clocks = <&osc24M>;
+                       clocks = <&ccu CLK_HOSC>;
                        #pwm-cells = <3>;
                        status = "disabled";
                };
                        compatible = "allwinner,sun5i-a13-display-frontend";
                        reg = <0x01e00000 0x20000>;
                        interrupts = <47>;
-                       clocks = <&ahb_gates 46>, <&de_fe_clk>,
-                                <&dram_gates 25>;
+                       clocks = <&ccu CLK_DE_FE>, <&ccu CLK_DE_FE>,
+                                <&ccu CLK_DRAM_DE_FE>;
                        clock-names = "ahb", "mod",
                                      "ram";
-                       resets = <&de_fe_clk>;
+                       resets = <&ccu RST_DE_FE>;
                        status = "disabled";
 
                        ports {
                be0: display-backend@01e60000 {
                        compatible = "allwinner,sun5i-a13-display-backend";
                        reg = <0x01e60000 0x10000>;
-                       clocks = <&ahb_gates 44>, <&de_be_clk>,
-                                <&dram_gates 26>;
+                       clocks = <&ccu CLK_AHB_DE_BE>, <&ccu CLK_DE_BE>,
+                                <&ccu CLK_DRAM_DE_BE>;
                        clock-names = "ahb", "mod",
                                      "ram";
-                       resets = <&de_be_clk>;
+                       resets = <&ccu RST_DE_BE>;
                        status = "disabled";
 
-                       assigned-clocks = <&de_be_clk>;
+                       assigned-clocks = <&ccu CLK_DE_BE>;
                        assigned-clock-rates = <300000000>;
 
                        ports {
        };
 };
 
+&ccu {
+       compatible = "allwinner,sun5i-a13-ccu";
+};
+
 &cpu0 {
        clock-latency = <244144>; /* 8 32k periods */
        operating-points = <
index f83ae3fc632981634c8ba57e9798ad22c2463542..cb9b2aaf7297ab6cfecdd5620e78631410e1f03c 100644 (file)
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <dt-bindings/clock/sun4i-a10-pll2.h>
+#include <dt-bindings/clock/sun5i-ccu.h>
 #include <dt-bindings/dma/sun4i-a10.h>
 #include <dt-bindings/pinctrl/sun4i-a10.h>
+#include <dt-bindings/reset/sun5i-ccu.h>
 
 / {
        interrupt-parent = <&intc>;
@@ -59,7 +60,7 @@
                        device_type = "cpu";
                        compatible = "arm,cortex-a8";
                        reg = <0x0>;
-                       clocks = <&cpu>;
+                       clocks = <&ccu CLK_CPU>;
                };
        };
 
                #size-cells = <1>;
                ranges;
 
-               /*
-                * This is a dummy clock, to be used as placeholder on
-                * other mux clocks when a specific parent clock is not
-                * yet implemented. It should be dropped when the driver
-                * is complete.
-                */
-               dummy: dummy {
-                       #clock-cells = <0>;
-                       compatible = "fixed-clock";
-                       clock-frequency = <0>;
-               };
-
                osc24M: clk@01c20050 {
                        #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-osc-clk";
-                       reg = <0x01c20050 0x4>;
+                       compatible = "fixed-clock";
                        clock-frequency = <24000000>;
                        clock-output-names = "osc24M";
                };
 
-               osc3M: osc3M-clk {
-                       compatible = "fixed-factor-clock";
-                       #clock-cells = <0>;
-                       clock-div = <8>;
-                       clock-mult = <1>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "osc3M";
-               };
-
                osc32k: clk@0 {
                        #clock-cells = <0>;
                        compatible = "fixed-clock";
                        clock-frequency = <32768>;
                        clock-output-names = "osc32k";
                };
-
-               pll1: clk@01c20000 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-pll1-clk";
-                       reg = <0x01c20000 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll1";
-               };
-
-               pll2: clk@01c20008 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-pll2-clk";
-                       reg = <0x01c20008 0x8>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll2-1x", "pll2-2x",
-                                            "pll2-4x", "pll2-8x";
-               };
-
-               pll3: clk@01c20010 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-pll3-clk";
-                       reg = <0x01c20010 0x4>;
-                       clocks = <&osc3M>;
-                       clock-output-names = "pll3";
-               };
-
-               pll3x2: pll3x2-clk {
-                       compatible = "allwinner,sun4i-a10-pll3-2x-clk";
-                       #clock-cells = <0>;
-                       clock-div = <1>;
-                       clock-mult = <2>;
-                       clocks = <&pll3>;
-                       clock-output-names = "pll3-2x";
-               };
-
-               pll4: clk@01c20018 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-pll1-clk";
-                       reg = <0x01c20018 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll4";
-               };
-
-               pll5: clk@01c20020 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-pll5-clk";
-                       reg = <0x01c20020 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll5_ddr", "pll5_other";
-               };
-
-               pll6: clk@01c20028 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-pll6-clk";
-                       reg = <0x01c20028 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll6_sata", "pll6_other", "pll6";
-               };
-
-               pll7: clk@01c20030 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-pll3-clk";
-                       reg = <0x01c20030 0x4>;
-                       clocks = <&osc3M>;
-                       clock-output-names = "pll7";
-               };
-
-               pll7x2: pll7x2-clk {
-                       compatible = "allwinner,sun4i-a10-pll3-2x-clk";
-                       #clock-cells = <0>;
-                       clock-div = <1>;
-                       clock-mult = <2>;
-                       clocks = <&pll7>;
-                       clock-output-names = "pll7-2x";
-               };
-
-               /* dummy is 200M */
-               cpu: cpu@01c20054 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-cpu-clk";
-                       reg = <0x01c20054 0x4>;
-                       clocks = <&osc32k>, <&osc24M>, <&pll1>, <&dummy>;
-                       clock-output-names = "cpu";
-               };
-
-               axi: axi@01c20054 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-axi-clk";
-                       reg = <0x01c20054 0x4>;
-                       clocks = <&cpu>;
-                       clock-output-names = "axi";
-               };
-
-               ahb: ahb@01c20054 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun5i-a13-ahb-clk";
-                       reg = <0x01c20054 0x4>;
-                       clocks = <&axi>, <&cpu>, <&pll6 1>;
-                       clock-output-names = "ahb";
-                       /*
-                        * Use PLL6 as parent, instead of CPU/AXI
-                        * which has rate changes due to cpufreq
-                        */
-                       assigned-clocks = <&ahb>;
-                       assigned-clock-parents = <&pll6 1>;
-               };
-
-               apb0: apb0@01c20054 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-apb0-clk";
-                       reg = <0x01c20054 0x4>;
-                       clocks = <&ahb>;
-                       clock-output-names = "apb0";
-               };
-
-               apb1: clk@01c20058 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-apb1-clk";
-                       reg = <0x01c20058 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-                       clock-output-names = "apb1";
-               };
-
-               axi_gates: clk@01c2005c {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-gates-clk";
-                       reg = <0x01c2005c 0x4>;
-                       clocks = <&axi>;
-                       clock-indices = <0>;
-                       clock-output-names = "axi_dram";
-               };
-
-               ahb_gates: clk@01c20060 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-ahb-gates-clk";
-                       reg = <0x01c20060 0x8>;
-                       clocks = <&ahb>;
-                       clock-indices = <0>, <1>,
-                                       <2>, <5>, <6>,
-                                       <7>, <8>, <9>,
-                                       <10>, <13>,
-                                       <14>, <17>, <20>,
-                                       <21>, <22>,
-                                       <28>, <32>, <34>,
-                                       <36>, <40>, <44>,
-                                       <46>, <51>,
-                                       <52>;
-                       clock-output-names = "ahb_usbotg", "ahb_ehci",
-                                            "ahb_ohci", "ahb_ss", "ahb_dma",
-                                            "ahb_bist", "ahb_mmc0", "ahb_mmc1",
-                                            "ahb_mmc2", "ahb_nand",
-                                            "ahb_sdram", "ahb_emac", "ahb_spi0",
-                                            "ahb_spi1", "ahb_spi2",
-                                            "ahb_hstimer", "ahb_ve", "ahb_tve",
-                                            "ahb_lcd", "ahb_csi", "ahb_de_be",
-                                            "ahb_de_fe", "ahb_iep",
-                                            "ahb_mali400";
-               };
-
-               apb0_gates: clk@01c20068 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-gates-clk";
-                       reg = <0x01c20068 0x4>;
-                       clocks = <&apb0>;
-                       clock-indices = <0>, <3>,
-                                       <5>, <6>;
-                       clock-output-names = "apb0_codec", "apb0_i2s0",
-                                            "apb0_pio", "apb0_ir";
-               };
-
-               apb1_gates: clk@01c2006c {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-gates-clk";
-                       reg = <0x01c2006c 0x4>;
-                       clocks = <&apb1>;
-                       clock-indices = <0>, <1>,
-                                       <2>, <17>,
-                                       <18>, <19>;
-                       clock-output-names = "apb1_i2c0", "apb1_i2c1",
-                                            "apb1_i2c2", "apb1_uart1",
-                                            "apb1_uart2", "apb1_uart3";
-               };
-
-               nand_clk: clk@01c20080 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c20080 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "nand";
-               };
-
-               ms_clk: clk@01c20084 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c20084 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "ms";
-               };
-
-               mmc0_clk: clk@01c20088 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-mmc-clk";
-                       reg = <0x01c20088 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "mmc0",
-                                            "mmc0_output",
-                                            "mmc0_sample";
-               };
-
-               mmc1_clk: clk@01c2008c {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-mmc-clk";
-                       reg = <0x01c2008c 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "mmc1",
-                                            "mmc1_output",
-                                            "mmc1_sample";
-               };
-
-               mmc2_clk: clk@01c20090 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-mmc-clk";
-                       reg = <0x01c20090 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "mmc2",
-                                            "mmc2_output",
-                                            "mmc2_sample";
-               };
-
-               ts_clk: clk@01c20098 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c20098 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "ts";
-               };
-
-               ss_clk: clk@01c2009c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c2009c 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "ss";
-               };
-
-               spi0_clk: clk@01c200a0 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c200a0 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "spi0";
-               };
-
-               spi1_clk: clk@01c200a4 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c200a4 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "spi1";
-               };
-
-               spi2_clk: clk@01c200a8 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c200a8 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "spi2";
-               };
-
-               ir0_clk: clk@01c200b0 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c200b0 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "ir0";
-               };
-
-               i2s0_clk: clk@01c200b8 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod1-clk";
-                       reg = <0x01c200b8 0x4>;
-                       clocks = <&pll2 SUN4I_A10_PLL2_8X>,
-                                <&pll2 SUN4I_A10_PLL2_4X>,
-                                <&pll2 SUN4I_A10_PLL2_2X>,
-                                <&pll2 SUN4I_A10_PLL2_1X>;
-                       clock-output-names = "i2s0";
-               };
-
-               spdif_clk: clk@01c200c0 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod1-clk";
-                       reg = <0x01c200c0 0x4>;
-                       clocks = <&pll2 SUN4I_A10_PLL2_8X>,
-                                <&pll2 SUN4I_A10_PLL2_4X>,
-                                <&pll2 SUN4I_A10_PLL2_2X>,
-                                <&pll2 SUN4I_A10_PLL2_1X>;
-                       clock-output-names = "spdif";
-               };
-
-               usb_clk: clk@01c200cc {
-                       #clock-cells = <1>;
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-usb-clk";
-                       reg = <0x01c200cc 0x4>;
-                       clocks = <&pll6 1>;
-                       clock-output-names = "usb_ohci0", "usb_phy";
-               };
-
-               dram_gates: clk@01c20100 {
-                       #clock-cells = <1>;
-                       compatible = "nextthing,gr8-dram-gates-clk",
-                                    "allwinner,sun4i-a10-gates-clk";
-                       reg = <0x01c20100 0x4>;
-                       clocks = <&pll5 0>;
-                       clock-indices = <0>,
-                                       <1>,
-                                       <25>,
-                                       <26>,
-                                       <29>,
-                                       <31>;
-                       clock-output-names = "dram_ve",
-                                            "dram_csi",
-                                            "dram_de_fe",
-                                            "dram_de_be",
-                                            "dram_ace",
-                                            "dram_iep";
-               };
-
-               de_be_clk: clk@01c20104 {
-                       #clock-cells = <0>;
-                       #reset-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-display-clk";
-                       reg = <0x01c20104 0x4>;
-                       clocks = <&pll3>, <&pll7>, <&pll5 1>;
-                       clock-output-names = "de-be";
-               };
-
-               de_fe_clk: clk@01c2010c {
-                       #clock-cells = <0>;
-                       #reset-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-display-clk";
-                       reg = <0x01c2010c 0x4>;
-                       clocks = <&pll3>, <&pll7>, <&pll5 1>;
-                       clock-output-names = "de-fe";
-               };
-
-               tcon_ch0_clk: clk@01c20118 {
-                       #clock-cells = <0>;
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-tcon-ch0-clk";
-                       reg = <0x01c20118 0x4>;
-                       clocks = <&pll3>, <&pll7>, <&pll3x2>, <&pll7x2>;
-                       clock-output-names = "tcon-ch0-sclk";
-               };
-
-               tcon_ch1_clk: clk@01c2012c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-tcon-ch1-clk";
-                       reg = <0x01c2012c 0x4>;
-                       clocks = <&pll3>, <&pll7>, <&pll3x2>, <&pll7x2>;
-                       clock-output-names = "tcon-ch1-sclk";
-               };
-
-               codec_clk: clk@01c20140 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-codec-clk";
-                       reg = <0x01c20140 0x4>;
-                       clocks = <&pll2 SUN4I_A10_PLL2_1X>;
-                       clock-output-names = "codec";
-               };
-
-               mbus_clk: clk@01c2015c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun5i-a13-mbus-clk";
-                       reg = <0x01c2015c 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "mbus";
-               };
        };
 
        display-engine {
                        compatible = "allwinner,sun4i-a10-dma";
                        reg = <0x01c02000 0x1000>;
                        interrupts = <27>;
-                       clocks = <&ahb_gates 6>;
+                       clocks = <&ccu CLK_AHB_DMA>;
                        #dma-cells = <2>;
                };
 
                        compatible = "allwinner,sun4i-a10-nand";
                        reg = <0x01c03000 0x1000>;
                        interrupts = <37>;
-                       clocks = <&ahb_gates 13>, <&nand_clk>;
+                       clocks = <&ccu CLK_AHB_NAND>, <&ccu CLK_NAND>;
                        clock-names = "ahb", "mod";
                        dmas = <&dma SUN4I_DMA_DEDICATED 3>;
                        dma-names = "rxtx";
                        compatible = "allwinner,sun4i-a10-spi";
                        reg = <0x01c05000 0x1000>;
                        interrupts = <10>;
-                       clocks = <&ahb_gates 20>, <&spi0_clk>;
+                       clocks = <&ccu CLK_AHB_SPI0>, <&ccu CLK_SPI0>;
                        clock-names = "ahb", "mod";
                        dmas = <&dma SUN4I_DMA_DEDICATED 27>,
                               <&dma SUN4I_DMA_DEDICATED 26>;
                        compatible = "allwinner,sun4i-a10-spi";
                        reg = <0x01c06000 0x1000>;
                        interrupts = <11>;
-                       clocks = <&ahb_gates 21>, <&spi1_clk>;
+                       clocks = <&ccu CLK_AHB_SPI1>, <&ccu CLK_SPI1>;
                        clock-names = "ahb", "mod";
                        dmas = <&dma SUN4I_DMA_DEDICATED 9>,
                               <&dma SUN4I_DMA_DEDICATED 8>;
                tve0: tv-encoder@01c0a000 {
                        compatible = "allwinner,sun4i-a10-tv-encoder";
                        reg = <0x01c0a000 0x1000>;
-                       clocks = <&ahb_gates 34>;
-                       resets = <&tcon_ch0_clk 0>;
+                       clocks = <&ccu CLK_AHB_TVE>;
+                       resets = <&ccu RST_TVE>;
                        status = "disabled";
 
                        port {
                        compatible = "allwinner,sun5i-a13-tcon";
                        reg = <0x01c0c000 0x1000>;
                        interrupts = <44>;
-                       resets = <&tcon_ch0_clk 1>;
+                       resets = <&ccu RST_LCD>;
                        reset-names = "lcd";
-                       clocks = <&ahb_gates 36>,
-                                <&tcon_ch0_clk>,
-                                <&tcon_ch1_clk>;
+                       clocks = <&ccu CLK_AHB_LCD>,
+                                <&ccu CLK_TCON_CH0>,
+                                <&ccu CLK_TCON_CH1>;
                        clock-names = "ahb",
                                      "tcon-ch0",
                                      "tcon-ch1";
                mmc0: mmc@01c0f000 {
                        compatible = "allwinner,sun5i-a13-mmc";
                        reg = <0x01c0f000 0x1000>;
-                       clocks = <&ahb_gates 8>,
-                                <&mmc0_clk 0>,
-                                <&mmc0_clk 1>,
-                                <&mmc0_clk 2>;
-                       clock-names = "ahb",
-                                     "mmc",
-                                     "output",
-                                     "sample";
+                       clocks = <&ccu CLK_AHB_MMC0>, <&ccu CLK_MMC0>;
+                       clock-names = "ahb", "mmc";
                        interrupts = <32>;
                        status = "disabled";
                        #address-cells = <1>;
                mmc1: mmc@01c10000 {
                        compatible = "allwinner,sun5i-a13-mmc";
                        reg = <0x01c10000 0x1000>;
-                       clocks = <&ahb_gates 9>,
-                                <&mmc1_clk 0>,
-                                <&mmc1_clk 1>,
-                                <&mmc1_clk 2>;
-                       clock-names = "ahb",
-                                     "mmc",
-                                     "output",
-                                     "sample";
+                       clocks = <&ccu CLK_AHB_MMC1>, <&ccu CLK_MMC1>;
+                       clock-names = "ahb", "mmc";
                        interrupts = <33>;
                        status = "disabled";
                        #address-cells = <1>;
                mmc2: mmc@01c11000 {
                        compatible = "allwinner,sun5i-a13-mmc";
                        reg = <0x01c11000 0x1000>;
-                       clocks = <&ahb_gates 10>,
-                                <&mmc2_clk 0>,
-                                <&mmc2_clk 1>,
-                                <&mmc2_clk 2>;
-                       clock-names = "ahb",
-                                     "mmc",
-                                     "output",
-                                     "sample";
+                       clocks = <&ccu CLK_AHB_MMC2>, <&ccu CLK_MMC2>;
+                       clock-names = "ahb", "mmc";
                        interrupts = <34>;
                        status = "disabled";
                        #address-cells = <1>;
                usb_otg: usb@01c13000 {
                        compatible = "allwinner,sun4i-a10-musb";
                        reg = <0x01c13000 0x0400>;
-                       clocks = <&ahb_gates 0>;
+                       clocks = <&ccu CLK_AHB_OTG>;
                        interrupts = <38>;
                        interrupt-names = "mc";
                        phys = <&usbphy 0>;
                        compatible = "allwinner,sun5i-a13-usb-phy";
                        reg = <0x01c13400 0x10 0x01c14800 0x4>;
                        reg-names = "phy_ctrl", "pmu1";
-                       clocks = <&usb_clk 8>;
+                       clocks = <&ccu CLK_USB_PHY0>;
                        clock-names = "usb_phy";
-                       resets = <&usb_clk 0>, <&usb_clk 1>;
+                       resets = <&ccu RST_USB_PHY0>, <&ccu RST_USB_PHY1>;
                        reset-names = "usb0_reset", "usb1_reset";
                        status = "disabled";
                };
                        compatible = "allwinner,sun5i-a13-ehci", "generic-ehci";
                        reg = <0x01c14000 0x100>;
                        interrupts = <39>;
-                       clocks = <&ahb_gates 1>;
+                       clocks = <&ccu CLK_AHB_EHCI>;
                        phys = <&usbphy 1>;
                        phy-names = "usb";
                        status = "disabled";
                        compatible = "allwinner,sun5i-a13-ohci", "generic-ohci";
                        reg = <0x01c14400 0x100>;
                        interrupts = <40>;
-                       clocks = <&usb_clk 6>, <&ahb_gates 2>;
+                       clocks = <&ccu CLK_USB_OHCI>, <&ccu CLK_AHB_OHCI>;
                        phys = <&usbphy 1>;
                        phy-names = "usb";
                        status = "disabled";
                        compatible = "allwinner,sun4i-a10-spi";
                        reg = <0x01c17000 0x1000>;
                        interrupts = <12>;
-                       clocks = <&ahb_gates 22>, <&spi2_clk>;
+                       clocks = <&ccu CLK_AHB_SPI2>, <&ccu CLK_SPI2>;
                        clock-names = "ahb", "mod";
                        dmas = <&dma SUN4I_DMA_DEDICATED 29>,
                               <&dma SUN4I_DMA_DEDICATED 28>;
                        #size-cells = <0>;
                };
 
+               ccu: clock@01c20000 {
+                       compatible = "nextthing,gr8-ccu";
+                       reg = <0x01c20000 0x400>;
+                       clocks = <&osc24M>, <&osc32k>;
+                       clock-names = "hosc", "losc";
+                       #clock-cells = <1>;
+                       #reset-cells = <1>;
+               };
+
                intc: interrupt-controller@01c20400 {
                        compatible = "allwinner,sun4i-a10-ic";
                        reg = <0x01c20400 0x400>;
                        compatible = "nextthing,gr8-pinctrl";
                        reg = <0x01c20800 0x400>;
                        interrupts = <28>;
-                       clocks = <&apb0_gates 5>;
+                       clocks = <&ccu CLK_APB0_PIO>;
                        gpio-controller;
                        interrupt-controller;
                        #interrupt-cells = <3>;
                pwm: pwm@01c20e00 {
                        compatible = "allwinner,sun5i-a10s-pwm";
                        reg = <0x01c20e00 0xc>;
-                       clocks = <&osc24M>;
+                       clocks = <&ccu CLK_HOSC>;
                        #pwm-cells = <3>;
                        status = "disabled";
                };
                        compatible = "allwinner,sun4i-a10-timer";
                        reg = <0x01c20c00 0x90>;
                        interrupts = <22>;
-                       clocks = <&osc24M>;
+                       clocks = <&ccu CLK_HOSC>;
                };
 
                wdt: watchdog@01c20c90 {
                        compatible = "allwinner,sun4i-a10-spdif";
                        reg = <0x01c21000 0x400>;
                        interrupts = <13>;
-                       clocks = <&apb0_gates 1>, <&spdif_clk>;
+                       clocks = <&ccu CLK_APB0_SPDIF>, <&ccu CLK_SPDIF>;
                        clock-names = "apb", "spdif";
                        dmas = <&dma SUN4I_DMA_NORMAL 2>,
                               <&dma SUN4I_DMA_NORMAL 2>;
 
                ir0: ir@01c21800 {
                        compatible = "allwinner,sun4i-a10-ir";
-                       clocks = <&apb0_gates 6>, <&ir0_clk>;
+                       clocks = <&ccu CLK_APB0_IR>, <&ccu CLK_IR>;
                        clock-names = "apb", "ir";
                        interrupts = <5>;
                        reg = <0x01c21800 0x40>;
                        compatible = "allwinner,sun4i-a10-i2s";
                        reg = <0x01c22400 0x400>;
                        interrupts = <16>;
-                       clocks = <&apb0_gates 3>, <&i2s0_clk>;
+                       clocks = <&ccu CLK_APB0_I2S>, <&ccu CLK_I2S>;
                        clock-names = "apb", "mod";
                        dmas = <&dma SUN4I_DMA_NORMAL 3>,
                               <&dma SUN4I_DMA_NORMAL 3>;
                        compatible = "allwinner,sun4i-a10-codec";
                        reg = <0x01c22c00 0x40>;
                        interrupts = <30>;
-                       clocks = <&apb0_gates 0>, <&codec_clk>;
+                       clocks = <&ccu CLK_APB0_CODEC>, <&ccu CLK_CODEC>;
                        clock-names = "apb", "codec";
                        dmas = <&dma SUN4I_DMA_NORMAL 19>,
                               <&dma SUN4I_DMA_NORMAL 19>;
                        interrupts = <2>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 17>;
+                       clocks = <&ccu CLK_APB1_UART1>;
                        status = "disabled";
                };
 
                        interrupts = <3>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 18>;
+                       clocks = <&ccu CLK_APB1_UART2>;
                        status = "disabled";
                };
 
                        interrupts = <4>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 19>;
+                       clocks = <&ccu CLK_APB1_UART3>;
                        status = "disabled";
                };
 
                        compatible = "allwinner,sun4i-a10-i2c";
                        reg = <0x01c2ac00 0x400>;
                        interrupts = <7>;
-                       clocks = <&apb1_gates 0>;
+                       clocks = <&ccu CLK_APB1_I2C0>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun4i-a10-i2c";
                        reg = <0x01c2b000 0x400>;
                        interrupts = <8>;
-                       clocks = <&apb1_gates 1>;
+                       clocks = <&ccu CLK_APB1_I2C1>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun4i-a10-i2c";
                        reg = <0x01c2b400 0x400>;
                        interrupts = <9>;
-                       clocks = <&apb1_gates 2>;
+                       clocks = <&ccu CLK_APB1_I2C2>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun5i-a13-hstimer";
                        reg = <0x01c60000 0x1000>;
                        interrupts = <82>, <83>;
-                       clocks = <&ahb_gates 28>;
+                       clocks = <&ccu CLK_AHB_HSTIMER>;
                };
 
                fe0: display-frontend@01e00000 {
                        compatible = "allwinner,sun5i-a13-display-frontend";
                        reg = <0x01e00000 0x20000>;
                        interrupts = <47>;
-                       clocks = <&ahb_gates 46>, <&de_fe_clk>,
-                                <&dram_gates 25>;
+                       clocks = <&ccu CLK_AHB_DE_FE>, <&ccu CLK_DE_FE>,
+                                <&ccu CLK_DRAM_DE_FE>;
                        clock-names = "ahb", "mod",
                                      "ram";
-                       resets = <&de_fe_clk>;
+                       resets = <&ccu RST_DE_FE>;
                        status = "disabled";
 
                        ports {
                be0: display-backend@01e60000 {
                        compatible = "allwinner,sun5i-a13-display-backend";
                        reg = <0x01e60000 0x10000>;
-                       clocks = <&ahb_gates 44>, <&de_be_clk>,
-                                <&dram_gates 26>;
+                       clocks = <&ccu CLK_AHB_DE_BE>, <&ccu CLK_DE_BE>,
+                                <&ccu CLK_DRAM_DE_BE>;
                        clock-names = "ahb", "mod",
                                      "ram";
-                       resets = <&de_be_clk>;
+                       resets = <&ccu RST_DE_BE>;
                        status = "disabled";
 
-                       assigned-clocks = <&de_be_clk>;
+                       assigned-clocks = <&ccu CLK_DE_BE>;
                        assigned-clock-rates = <300000000>;
 
                        ports {
index 8b058f53b7dcee2fc0cdbbe3541b47e9cc3e4671..4c1141396c99e8e5dbad67d4a055fc82d77a097d 100644 (file)
@@ -51,9 +51,9 @@
                        compatible = "allwinner,simple-framebuffer",
                                     "simple-framebuffer";
                        allwinner,pipeline = "de_be0-lcd0-tve0";
-                       clocks = <&ahb_gates 34>, <&ahb_gates 36>,
-                                <&ahb_gates 44>, <&de_be_clk>,
-                                <&tcon_ch1_clk>, <&dram_gates 26>;
+                       clocks = <&ccu CLK_AHB_TVE>, <&ccu CLK_AHB_LCD>,
+                                <&ccu CLK_AHB_DE_BE>, <&ccu CLK_DE_BE>,
+                                <&ccu CLK_TCON_CH1>, <&ccu CLK_DRAM_DE_BE>;
                        status = "disabled";
                };
        };
@@ -62,8 +62,8 @@
                tve0: tv-encoder@01c0a000 {
                        compatible = "allwinner,sun4i-a10-tv-encoder";
                        reg = <0x01c0a000 0x1000>;
-                       clocks = <&ahb_gates 34>;
-                       resets = <&tcon_ch0_clk 0>;
+                       clocks = <&ccu CLK_AHB_TVE>;
+                       resets = <&ccu RST_TVE>;
                        status = "disabled";
 
                        port {
index c058d37d54336fc159c8a5276cee182df388ad7d..a9574a6cd95c6d5fcb84c0e9c66bee5a3fc550ec 100644 (file)
 
 #include "skeleton.dtsi"
 
-#include <dt-bindings/clock/sun4i-a10-pll2.h>
+#include <dt-bindings/clock/sun5i-ccu.h>
 #include <dt-bindings/dma/sun4i-a10.h>
 #include <dt-bindings/pinctrl/sun4i-a10.h>
+#include <dt-bindings/reset/sun5i-ccu.h>
 
 / {
        interrupt-parent = <&intc>;
@@ -59,7 +60,7 @@
                        device_type = "cpu";
                        compatible = "arm,cortex-a8";
                        reg = <0x0>;
-                       clocks = <&cpu>;
+                       clocks = <&ccu CLK_CPU>;
                };
        };
 
                #size-cells = <1>;
                ranges;
 
-               /*
-                * This is a dummy clock, to be used as placeholder on
-                * other mux clocks when a specific parent clock is not
-                * yet implemented. It should be dropped when the driver
-                * is complete.
-                */
-               dummy: dummy {
-                       #clock-cells = <0>;
-                       compatible = "fixed-clock";
-                       clock-frequency = <0>;
-               };
-
                osc24M: clk@01c20050 {
                        #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-osc-clk";
-                       reg = <0x01c20050 0x4>;
+                       compatible = "fixed-clock";
                        clock-frequency = <24000000>;
                        clock-output-names = "osc24M";
                };
 
-               osc3M: osc3M_clk {
-                       compatible = "fixed-factor-clock";
-                       #clock-cells = <0>;
-                       clock-div = <8>;
-                       clock-mult = <1>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "osc3M";
-               };
-
                osc32k: clk@0 {
                        #clock-cells = <0>;
                        compatible = "fixed-clock";
                        clock-frequency = <32768>;
                        clock-output-names = "osc32k";
                };
-
-               pll1: clk@01c20000 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-pll1-clk";
-                       reg = <0x01c20000 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll1";
-               };
-
-               pll2: clk@01c20008 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-pll2-clk";
-                       reg = <0x01c20008 0x8>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll2-1x", "pll2-2x",
-                                            "pll2-4x", "pll2-8x";
-               };
-
-               pll3: clk@01c20010 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-pll3-clk";
-                       reg = <0x01c20010 0x4>;
-                       clocks = <&osc3M>;
-                       clock-output-names = "pll3";
-               };
-
-               pll3x2: pll3x2_clk {
-                       compatible = "allwinner,sun4i-a10-pll3-2x-clk", "fixed-factor-clock";
-                       #clock-cells = <0>;
-                       clock-div = <1>;
-                       clock-mult = <2>;
-                       clocks = <&pll3>;
-                       clock-output-names = "pll3-2x";
-               };
-
-               pll4: clk@01c20018 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-pll1-clk";
-                       reg = <0x01c20018 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll4";
-               };
-
-               pll5: clk@01c20020 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-pll5-clk";
-                       reg = <0x01c20020 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll5_ddr", "pll5_other";
-               };
-
-               pll6: clk@01c20028 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-pll6-clk";
-                       reg = <0x01c20028 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll6_sata", "pll6_other", "pll6";
-               };
-
-               pll7: clk@01c20030 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-pll3-clk";
-                       reg = <0x01c20030 0x4>;
-                       clocks = <&osc3M>;
-                       clock-output-names = "pll7";
-               };
-
-               pll7x2: pll7x2_clk {
-                       compatible = "fixed-factor-clock";
-                       #clock-cells = <0>;
-                       clock-div = <1>;
-                       clock-mult = <2>;
-                       clocks = <&pll7>;
-                       clock-output-names = "pll7-2x";
-               };
-
-               /* dummy is 200M */
-               cpu: cpu@01c20054 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-cpu-clk";
-                       reg = <0x01c20054 0x4>;
-                       clocks = <&osc32k>, <&osc24M>, <&pll1>, <&dummy>;
-                       clock-output-names = "cpu";
-               };
-
-               axi: axi@01c20054 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-axi-clk";
-                       reg = <0x01c20054 0x4>;
-                       clocks = <&cpu>;
-                       clock-output-names = "axi";
-               };
-
-               ahb: ahb@01c20054 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun5i-a13-ahb-clk";
-                       reg = <0x01c20054 0x4>;
-                       clocks = <&axi>, <&cpu>, <&pll6 1>;
-                       clock-output-names = "ahb";
-                       /*
-                        * Use PLL6 as parent, instead of CPU/AXI
-                        * which has rate changes due to cpufreq
-                        */
-                       assigned-clocks = <&ahb>;
-                       assigned-clock-parents = <&pll6 1>;
-               };
-
-               apb0: apb0@01c20054 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-apb0-clk";
-                       reg = <0x01c20054 0x4>;
-                       clocks = <&ahb>;
-                       clock-output-names = "apb0";
-               };
-
-               apb1: clk@01c20058 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-apb1-clk";
-                       reg = <0x01c20058 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-                       clock-output-names = "apb1";
-               };
-
-               axi_gates: clk@01c2005c {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-axi-gates-clk";
-                       reg = <0x01c2005c 0x4>;
-                       clocks = <&axi>;
-                       clock-indices = <0>;
-                       clock-output-names = "axi_dram";
-               };
-
-               nand_clk: clk@01c20080 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c20080 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "nand";
-               };
-
-               ms_clk: clk@01c20084 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c20084 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "ms";
-               };
-
-               mmc0_clk: clk@01c20088 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-mmc-clk";
-                       reg = <0x01c20088 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "mmc0",
-                                            "mmc0_output",
-                                            "mmc0_sample";
-               };
-
-               mmc1_clk: clk@01c2008c {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-mmc-clk";
-                       reg = <0x01c2008c 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "mmc1",
-                                            "mmc1_output",
-                                            "mmc1_sample";
-               };
-
-               mmc2_clk: clk@01c20090 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun4i-a10-mmc-clk";
-                       reg = <0x01c20090 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "mmc2",
-                                            "mmc2_output",
-                                            "mmc2_sample";
-               };
-
-               ts_clk: clk@01c20098 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c20098 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "ts";
-               };
-
-               ss_clk: clk@01c2009c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c2009c 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "ss";
-               };
-
-               spi0_clk: clk@01c200a0 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c200a0 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "spi0";
-               };
-
-               spi1_clk: clk@01c200a4 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c200a4 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "spi1";
-               };
-
-               spi2_clk: clk@01c200a8 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c200a8 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "spi2";
-               };
-
-               ir0_clk: clk@01c200b0 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-mod0-clk";
-                       reg = <0x01c200b0 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "ir0";
-               };
-
-               usb_clk: clk@01c200cc {
-                       #clock-cells = <1>;
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun5i-a13-usb-clk";
-                       reg = <0x01c200cc 0x4>;
-                       clocks = <&pll6 1>;
-                       clock-output-names = "usb_ohci0", "usb_phy";
-               };
-
-               codec_clk: clk@01c20140 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun4i-a10-codec-clk";
-                       reg = <0x01c20140 0x4>;
-                       clocks = <&pll2 SUN4I_A10_PLL2_1X>;
-                       clock-output-names = "codec";
-               };
-
-               mbus_clk: clk@01c2015c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun5i-a13-mbus-clk";
-                       reg = <0x01c2015c 0x4>;
-                       clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
-                       clock-output-names = "mbus";
-               };
        };
 
        soc@01c00000 {
                        compatible = "allwinner,sun4i-a10-dma";
                        reg = <0x01c02000 0x1000>;
                        interrupts = <27>;
-                       clocks = <&ahb_gates 6>;
+                       clocks = <&ccu CLK_AHB_DMA>;
                        #dma-cells = <2>;
                };
 
                        compatible = "allwinner,sun4i-a10-spi";
                        reg = <0x01c05000 0x1000>;
                        interrupts = <10>;
-                       clocks = <&ahb_gates 20>, <&spi0_clk>;
+                       clocks = <&ccu CLK_AHB_SPI0>, <&ccu CLK_SPI0>;
                        clock-names = "ahb", "mod";
                        dmas = <&dma SUN4I_DMA_DEDICATED 27>,
                               <&dma SUN4I_DMA_DEDICATED 26>;
                        compatible = "allwinner,sun4i-a10-spi";
                        reg = <0x01c06000 0x1000>;
                        interrupts = <11>;
-                       clocks = <&ahb_gates 21>, <&spi1_clk>;
+                       clocks = <&ccu CLK_AHB_SPI1>, <&ccu CLK_SPI1>;
                        clock-names = "ahb", "mod";
                        dmas = <&dma SUN4I_DMA_DEDICATED 9>,
                               <&dma SUN4I_DMA_DEDICATED 8>;
                mmc0: mmc@01c0f000 {
                        compatible = "allwinner,sun5i-a13-mmc";
                        reg = <0x01c0f000 0x1000>;
-                       clocks = <&ahb_gates 8>,
-                                <&mmc0_clk 0>,
-                                <&mmc0_clk 1>,
-                                <&mmc0_clk 2>;
-                       clock-names = "ahb",
-                                     "mmc",
-                                     "output",
-                                     "sample";
+                       clocks = <&ccu CLK_AHB_MMC0>, <&ccu CLK_MMC0>;
+                       clock-names = "ahb", "mmc";
                        interrupts = <32>;
                        status = "disabled";
                        #address-cells = <1>;
                mmc1: mmc@01c10000 {
                        compatible = "allwinner,sun5i-a13-mmc";
                        reg = <0x01c10000 0x1000>;
-                       clocks = <&ahb_gates 9>,
-                                <&mmc1_clk 0>,
-                                <&mmc1_clk 1>,
-                                <&mmc1_clk 2>;
-                       clock-names = "ahb",
-                                     "mmc",
-                                     "output",
-                                     "sample";
+                       clocks = <&ccu CLK_AHB_MMC1>, <&ccu CLK_MMC1>;
+                       clock-names = "ahb", "mmc";
                        interrupts = <33>;
                        status = "disabled";
                        #address-cells = <1>;
                mmc2: mmc@01c11000 {
                        compatible = "allwinner,sun5i-a13-mmc";
                        reg = <0x01c11000 0x1000>;
-                       clocks = <&ahb_gates 10>,
-                                <&mmc2_clk 0>,
-                                <&mmc2_clk 1>,
-                                <&mmc2_clk 2>;
-                       clock-names = "ahb",
-                                     "mmc",
-                                     "output",
-                                     "sample";
+                       clocks = <&ccu CLK_AHB_MMC2>, <&ccu CLK_MMC2>;
+                       clock-names = "ahb", "mmc";
                        interrupts = <34>;
                        status = "disabled";
                        #address-cells = <1>;
                usb_otg: usb@01c13000 {
                        compatible = "allwinner,sun4i-a10-musb";
                        reg = <0x01c13000 0x0400>;
-                       clocks = <&ahb_gates 0>;
+                       clocks = <&ccu CLK_AHB_OTG>;
                        interrupts = <38>;
                        interrupt-names = "mc";
                        phys = <&usbphy 0>;
                        compatible = "allwinner,sun5i-a13-usb-phy";
                        reg = <0x01c13400 0x10 0x01c14800 0x4>;
                        reg-names = "phy_ctrl", "pmu1";
-                       clocks = <&usb_clk 8>;
+                       clocks = <&ccu CLK_USB_PHY0>;
                        clock-names = "usb_phy";
-                       resets = <&usb_clk 0>, <&usb_clk 1>;
+                       resets = <&ccu RST_USB_PHY0>, <&ccu RST_USB_PHY1>;
                        reset-names = "usb0_reset", "usb1_reset";
                        status = "disabled";
                };
                        compatible = "allwinner,sun5i-a13-ehci", "generic-ehci";
                        reg = <0x01c14000 0x100>;
                        interrupts = <39>;
-                       clocks = <&ahb_gates 1>;
+                       clocks = <&ccu CLK_AHB_EHCI>;
                        phys = <&usbphy 1>;
                        phy-names = "usb";
                        status = "disabled";
                        compatible = "allwinner,sun5i-a13-ohci", "generic-ohci";
                        reg = <0x01c14400 0x100>;
                        interrupts = <40>;
-                       clocks = <&usb_clk 6>, <&ahb_gates 2>;
+                       clocks = <&ccu CLK_USB_OHCI>, <&ccu CLK_AHB_OHCI>;
                        phys = <&usbphy 1>;
                        phy-names = "usb";
                        status = "disabled";
                        compatible = "allwinner,sun4i-a10-spi";
                        reg = <0x01c17000 0x1000>;
                        interrupts = <12>;
-                       clocks = <&ahb_gates 22>, <&spi2_clk>;
+                       clocks = <&ccu CLK_AHB_SPI2>, <&ccu CLK_SPI2>;
                        clock-names = "ahb", "mod";
                        dmas = <&dma SUN4I_DMA_DEDICATED 29>,
                               <&dma SUN4I_DMA_DEDICATED 28>;
                        #size-cells = <0>;
                };
 
+               ccu: clock@01c20000 {
+                       reg = <0x01c20000 0x400>;
+                       clocks = <&osc24M>, <&osc32k>;
+                       clock-names = "hosc", "losc";
+                       #clock-cells = <1>;
+                       #reset-cells = <1>;
+               };
+
                intc: interrupt-controller@01c20400 {
                        compatible = "allwinner,sun4i-a10-ic";
                        reg = <0x01c20400 0x400>;
                pio: pinctrl@01c20800 {
                        reg = <0x01c20800 0x400>;
                        interrupts = <28>;
-                       clocks = <&apb0_gates 5>, <&osc24M>, <&osc32k>;
+                       clocks = <&ccu CLK_APB0_PIO>, <&osc24M>, <&osc32k>;
                        clock-names = "apb", "hosc", "losc";
                        gpio-controller;
                        interrupt-controller;
                        compatible = "allwinner,sun4i-a10-timer";
                        reg = <0x01c20c00 0x90>;
                        interrupts = <22>;
-                       clocks = <&osc24M>;
+                       clocks = <&ccu CLK_HOSC>;
                };
 
                wdt: watchdog@01c20c90 {
                        compatible = "allwinner,sun4i-a10-codec";
                        reg = <0x01c22c00 0x40>;
                        interrupts = <30>;
-                       clocks = <&apb0_gates 0>, <&codec_clk>;
+                       clocks = <&ccu CLK_APB0_CODEC>, <&ccu CLK_CODEC>;
                        clock-names = "apb", "codec";
                        dmas = <&dma SUN4I_DMA_NORMAL 19>,
                               <&dma SUN4I_DMA_NORMAL 19>;
                        interrupts = <2>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 17>;
+                       clocks = <&ccu CLK_APB1_UART1>;
                        status = "disabled";
                };
 
                        interrupts = <4>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 19>;
+                       clocks = <&ccu CLK_APB1_UART3>;
                        status = "disabled";
                };
 
                        compatible = "allwinner,sun4i-a10-i2c";
                        reg = <0x01c2ac00 0x400>;
                        interrupts = <7>;
-                       clocks = <&apb1_gates 0>;
+                       clocks = <&ccu CLK_APB1_I2C0>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun4i-a10-i2c";
                        reg = <0x01c2b000 0x400>;
                        interrupts = <8>;
-                       clocks = <&apb1_gates 1>;
+                       clocks = <&ccu CLK_APB1_I2C1>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun4i-a10-i2c";
                        reg = <0x01c2b400 0x400>;
                        interrupts = <9>;
-                       clocks = <&apb1_gates 2>;
+                       clocks = <&ccu CLK_APB1_I2C2>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun5i-a13-hstimer";
                        reg = <0x01c60000 0x1000>;
                        interrupts = <82>, <83>;
-                       clocks = <&ahb_gates 28>;
+                       clocks = <&ccu CLK_AHB_HSTIMER>;
                };
        };
 };
index 03f2ab47ece0a24f203a3fa011a811dd0532d1a4..15b6d122f878ba7f29ec96e8d555fcc2687141fd 100644 (file)
 
 #include <dt-bindings/pinctrl/sun4i-a10.h>
 
+#include <dt-bindings/clock/sun9i-a80-ccu.h>
+#include <dt-bindings/clock/sun9i-a80-de.h>
+#include <dt-bindings/clock/sun9i-a80-usb.h>
+#include <dt-bindings/reset/sun9i-a80-ccu.h>
+#include <dt-bindings/reset/sun9i-a80-de.h>
+#include <dt-bindings/reset/sun9i-a80-usb.h>
+
 / {
        interrupt-parent = <&gic>;
 
                        clock-output-names = "osc32k";
                };
 
-               usb_mod_clk: clk@00a08000 {
-                       #clock-cells = <1>;
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-usb-mod-clk";
-                       reg = <0x00a08000 0x4>;
-                       clocks = <&ahb1_gates 1>;
-                       clock-output-names = "usb0_ahb", "usb_ohci0",
-                                            "usb1_ahb", "usb_ohci1",
-                                            "usb2_ahb", "usb_ohci2";
-               };
-
-               usb_phy_clk: clk@00a08004 {
-                       #clock-cells = <1>;
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-usb-phy-clk";
-                       reg = <0x00a08004 0x4>;
-                       clocks = <&ahb1_gates 1>;
-                       clock-output-names = "usb_phy0", "usb_hsic1_480M",
-                                            "usb_phy1", "usb_hsic2_480M",
-                                            "usb_phy2", "usb_hsic_12M";
-               };
-
-               pll3: clk@06000008 {
-                       /* placeholder until implemented */
-                       #clock-cells = <0>;
-                       compatible = "fixed-clock";
-                       clock-rate = <0>;
-                       clock-output-names = "pll3";
-               };
-
-               pll4: clk@0600000c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-pll4-clk";
-                       reg = <0x0600000c 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll4";
-               };
-
-               pll12: clk@0600002c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-pll4-clk";
-                       reg = <0x0600002c 0x4>;
-                       clocks = <&osc24M>;
-                       clock-output-names = "pll12";
-               };
-
-               gt_clk: clk@0600005c {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-gt-clk";
-                       reg = <0x0600005c 0x4>;
-                       clocks = <&osc24M>, <&pll4>, <&pll12>, <&pll12>;
-                       clock-output-names = "gt";
-               };
-
-               ahb0: clk@06000060 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-ahb-clk";
-                       reg = <0x06000060 0x4>;
-                       clocks = <&gt_clk>, <&pll4>, <&pll12>, <&pll12>;
-                       clock-output-names = "ahb0";
-               };
-
-               ahb1: clk@06000064 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-ahb-clk";
-                       reg = <0x06000064 0x4>;
-                       clocks = <&gt_clk>, <&pll4>, <&pll12>, <&pll12>;
-                       clock-output-names = "ahb1";
-               };
-
-               ahb2: clk@06000068 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-ahb-clk";
-                       reg = <0x06000068 0x4>;
-                       clocks = <&gt_clk>, <&pll4>, <&pll12>, <&pll12>;
-                       clock-output-names = "ahb2";
-               };
-
-               apb0: clk@06000070 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-apb0-clk";
-                       reg = <0x06000070 0x4>;
-                       clocks = <&osc24M>, <&pll4>;
-                       clock-output-names = "apb0";
-               };
-
-               apb1: clk@06000074 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-apb1-clk";
-                       reg = <0x06000074 0x4>;
-                       clocks = <&osc24M>, <&pll4>;
-                       clock-output-names = "apb1";
-               };
-
-               cci400_clk: clk@06000078 {
-                       #clock-cells = <0>;
-                       compatible = "allwinner,sun9i-a80-gt-clk";
-                       reg = <0x06000078 0x4>;
-                       clocks = <&osc24M>, <&pll4>, <&pll12>, <&pll12>;
-                       clock-output-names = "cci400";
-               };
-
-               mmc0_clk: clk@06000410 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-mmc-clk";
-                       reg = <0x06000410 0x4>;
-                       clocks = <&osc24M>, <&pll4>;
-                       clock-output-names = "mmc0", "mmc0_output",
-                                            "mmc0_sample";
-               };
-
-               mmc1_clk: clk@06000414 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-mmc-clk";
-                       reg = <0x06000414 0x4>;
-                       clocks = <&osc24M>, <&pll4>;
-                       clock-output-names = "mmc1", "mmc1_output",
-                                            "mmc1_sample";
-               };
-
-               mmc2_clk: clk@06000418 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-mmc-clk";
-                       reg = <0x06000418 0x4>;
-                       clocks = <&osc24M>, <&pll4>;
-                       clock-output-names = "mmc2", "mmc2_output",
-                                            "mmc2_sample";
-               };
-
-               mmc3_clk: clk@0600041c {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-mmc-clk";
-                       reg = <0x0600041c 0x4>;
-                       clocks = <&osc24M>, <&pll4>;
-                       clock-output-names = "mmc3", "mmc3_output",
-                                            "mmc3_sample";
-               };
-
-               ahb0_gates: clk@06000580 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-ahb0-gates-clk";
-                       reg = <0x06000580 0x4>;
-                       clocks = <&ahb0>;
-                       clock-indices = <0>, <1>, <3>,
-                                       <5>, <8>, <12>,
-                                       <13>, <14>,
-                                       <15>, <16>, <18>,
-                                       <20>, <21>, <22>,
-                                       <23>;
-                       clock-output-names = "ahb0_fd", "ahb0_ve", "ahb0_gpu",
-                                       "ahb0_ss", "ahb0_sd", "ahb0_nand1",
-                                       "ahb0_nand0", "ahb0_sdram",
-                                       "ahb0_mipi_hsi", "ahb0_sata", "ahb0_ts",
-                                       "ahb0_spi0", "ahb0_spi1", "ahb0_spi2",
-                                       "ahb0_spi3";
-               };
-
-               ahb1_gates: clk@06000584 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-ahb1-gates-clk";
-                       reg = <0x06000584 0x4>;
-                       clocks = <&ahb1>;
-                       clock-indices = <0>, <1>,
-                                       <17>, <21>,
-                                       <22>, <23>,
-                                       <24>;
-                       clock-output-names = "ahb1_usbotg", "ahb1_usbhci",
-                                       "ahb1_gmac", "ahb1_msgbox",
-                                       "ahb1_spinlock", "ahb1_hstimer",
-                                       "ahb1_dma";
-               };
-
-               ahb2_gates: clk@06000588 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-ahb2-gates-clk";
-                       reg = <0x06000588 0x4>;
-                       clocks = <&ahb2>;
-                       clock-indices = <0>, <1>,
-                                       <2>, <4>, <5>,
-                                       <7>, <8>, <11>;
-                       clock-output-names = "ahb2_lcd0", "ahb2_lcd1",
-                                       "ahb2_edp", "ahb2_csi", "ahb2_hdmi",
-                                       "ahb2_de", "ahb2_mp", "ahb2_mipi_dsi";
-               };
-
-               apb0_gates: clk@06000590 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-apb0-gates-clk";
-                       reg = <0x06000590 0x4>;
-                       clocks = <&apb0>;
-                       clock-indices = <1>, <5>,
-                                       <11>, <12>, <13>,
-                                       <15>, <17>, <18>,
-                                       <19>;
-                       clock-output-names = "apb0_spdif", "apb0_pio",
-                                       "apb0_ac97", "apb0_i2s0", "apb0_i2s1",
-                                       "apb0_lradc", "apb0_gpadc", "apb0_twd",
-                                       "apb0_cirtx";
-               };
-
-               apb1_gates: clk@06000594 {
-                       #clock-cells = <1>;
-                       compatible = "allwinner,sun9i-a80-apb1-gates-clk";
-                       reg = <0x06000594 0x4>;
-                       clocks = <&apb1>;
-                       clock-indices = <0>, <1>,
-                                       <2>, <3>, <4>,
-                                       <16>, <17>,
-                                       <18>, <19>,
-                                       <20>, <21>;
-                       clock-output-names = "apb1_i2c0", "apb1_i2c1",
-                                       "apb1_i2c2", "apb1_i2c3", "apb1_i2c4",
-                                       "apb1_uart0", "apb1_uart1",
-                                       "apb1_uart2", "apb1_uart3",
-                                       "apb1_uart4", "apb1_uart5";
-               };
-
                cpus_clk: clk@08001410 {
                        compatible = "allwinner,sun9i-a80-cpus-clk";
                        reg = <0x08001410 0x4>;
                        #clock-cells = <0>;
-                       clocks = <&osc32k>, <&osc24M>, <&pll4>, <&pll3>;
+                       clocks = <&osc32k>, <&osc24M>,
+                                <&ccu CLK_PLL_PERIPH0>,
+                                <&ccu CLK_PLL_AUDIO>;
                        clock-output-names = "cpus";
                };
 
                        compatible = "allwinner,sun9i-a80-ehci", "generic-ehci";
                        reg = <0x00a00000 0x100>;
                        interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&usb_mod_clk 1>;
-                       resets = <&usb_mod_clk 17>;
+                       clocks = <&usb_clocks CLK_BUS_HCI0>;
+                       resets = <&usb_clocks RST_USB0_HCI>;
                        phys = <&usbphy1>;
                        phy-names = "usb";
                        status = "disabled";
                        compatible = "allwinner,sun9i-a80-ohci", "generic-ohci";
                        reg = <0x00a00400 0x100>;
                        interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&usb_mod_clk 1>, <&usb_mod_clk 2>;
-                       resets = <&usb_mod_clk 17>;
+                       clocks = <&usb_clocks CLK_BUS_HCI0>,
+                                <&usb_clocks CLK_USB_OHCI0>;
+                       resets = <&usb_clocks RST_USB0_HCI>;
                        phys = <&usbphy1>;
                        phy-names = "usb";
                        status = "disabled";
                usbphy1: phy@00a00800 {
                        compatible = "allwinner,sun9i-a80-usb-phy";
                        reg = <0x00a00800 0x4>;
-                       clocks = <&usb_phy_clk 1>;
+                       clocks = <&usb_clocks CLK_USB0_PHY>;
                        clock-names = "phy";
-                       resets = <&usb_phy_clk 17>;
+                       resets = <&usb_clocks RST_USB0_PHY>;
                        reset-names = "phy";
                        status = "disabled";
                        #phy-cells = <0>;
                        compatible = "allwinner,sun9i-a80-ehci", "generic-ehci";
                        reg = <0x00a01000 0x100>;
                        interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&usb_mod_clk 3>;
-                       resets = <&usb_mod_clk 18>;
+                       clocks = <&usb_clocks CLK_BUS_HCI1>;
+                       resets = <&usb_clocks RST_USB1_HCI>;
                        phys = <&usbphy2>;
                        phy-names = "usb";
                        status = "disabled";
                usbphy2: phy@00a01800 {
                        compatible = "allwinner,sun9i-a80-usb-phy";
                        reg = <0x00a01800 0x4>;
-                       clocks = <&usb_phy_clk 2>, <&usb_phy_clk 10>,
-                                <&usb_phy_clk 3>;
-                       clock-names = "hsic_480M", "hsic_12M", "phy";
-                       resets = <&usb_phy_clk 18>, <&usb_phy_clk 19>;
-                       reset-names = "hsic", "phy";
+                       clocks = <&usb_clocks CLK_USB1_HSIC>,
+                                <&usb_clocks CLK_USB_HSIC>,
+                                <&usb_clocks CLK_USB1_PHY>;
+                       clock-names = "hsic_480M",
+                                     "hsic_12M",
+                                     "phy";
+                       resets = <&usb_clocks RST_USB1_HSIC>,
+                                <&usb_clocks RST_USB1_PHY>;
+                       reset-names = "hsic",
+                                     "phy";
                        status = "disabled";
                        #phy-cells = <0>;
                        /* usb1 is always used with HSIC */
                        compatible = "allwinner,sun9i-a80-ehci", "generic-ehci";
                        reg = <0x00a02000 0x100>;
                        interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&usb_mod_clk 5>;
-                       resets = <&usb_mod_clk 19>;
+                       clocks = <&usb_clocks CLK_BUS_HCI2>;
+                       resets = <&usb_clocks RST_USB2_HCI>;
                        phys = <&usbphy3>;
                        phy-names = "usb";
                        status = "disabled";
                        compatible = "allwinner,sun9i-a80-ohci", "generic-ohci";
                        reg = <0x00a02400 0x100>;
                        interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&usb_mod_clk 5>, <&usb_mod_clk 6>;
-                       resets = <&usb_mod_clk 19>;
+                       clocks = <&usb_clocks CLK_BUS_HCI2>,
+                                <&usb_clocks CLK_USB_OHCI2>;
+                       resets = <&usb_clocks RST_USB2_HCI>;
                        phys = <&usbphy3>;
                        phy-names = "usb";
                        status = "disabled";
                usbphy3: phy@00a02800 {
                        compatible = "allwinner,sun9i-a80-usb-phy";
                        reg = <0x00a02800 0x4>;
-                       clocks = <&usb_phy_clk 4>, <&usb_phy_clk 10>,
-                                <&usb_phy_clk 5>;
-                       clock-names = "hsic_480M", "hsic_12M", "phy";
-                       resets = <&usb_phy_clk 20>, <&usb_phy_clk 21>;
-                       reset-names = "hsic", "phy";
+                       clocks = <&usb_clocks CLK_USB2_HSIC>,
+                                <&usb_clocks CLK_USB_HSIC>,
+                                <&usb_clocks CLK_USB2_PHY>;
+                       clock-names = "hsic_480M",
+                                     "hsic_12M",
+                                     "phy";
+                       resets = <&usb_clocks RST_USB2_HSIC>,
+                                <&usb_clocks RST_USB2_PHY>;
+                       reset-names = "hsic",
+                                     "phy";
                        status = "disabled";
                        #phy-cells = <0>;
                };
 
+               usb_clocks: clock@00a08000 {
+                       compatible = "allwinner,sun9i-a80-usb-clks";
+                       reg = <0x00a08000 0x8>;
+                       clocks = <&ccu CLK_BUS_USB>, <&osc24M>;
+                       clock-names = "bus", "hosc";
+                       #clock-cells = <1>;
+                       #reset-cells = <1>;
+               };
+
                mmc0: mmc@01c0f000 {
                        compatible = "allwinner,sun9i-a80-mmc";
                        reg = <0x01c0f000 0x1000>;
-                       clocks = <&mmc_config_clk 0>, <&mmc0_clk 0>,
-                                <&mmc0_clk 1>, <&mmc0_clk 2>;
+                       clocks = <&mmc_config_clk 0>, <&ccu CLK_MMC0>,
+                                <&ccu CLK_MMC0_OUTPUT>,
+                                <&ccu CLK_MMC0_SAMPLE>;
                        clock-names = "ahb", "mmc", "output", "sample";
                        resets = <&mmc_config_clk 0>;
                        reset-names = "ahb";
                mmc1: mmc@01c10000 {
                        compatible = "allwinner,sun9i-a80-mmc";
                        reg = <0x01c10000 0x1000>;
-                       clocks = <&mmc_config_clk 1>, <&mmc1_clk 0>,
-                                <&mmc1_clk 1>, <&mmc1_clk 2>;
+                       clocks = <&mmc_config_clk 1>, <&ccu CLK_MMC1>,
+                                <&ccu CLK_MMC1_OUTPUT>,
+                                <&ccu CLK_MMC1_SAMPLE>;
                        clock-names = "ahb", "mmc", "output", "sample";
                        resets = <&mmc_config_clk 1>;
                        reset-names = "ahb";
                mmc2: mmc@01c11000 {
                        compatible = "allwinner,sun9i-a80-mmc";
                        reg = <0x01c11000 0x1000>;
-                       clocks = <&mmc_config_clk 2>, <&mmc2_clk 0>,
-                                <&mmc2_clk 1>, <&mmc2_clk 2>;
+                       clocks = <&mmc_config_clk 2>, <&ccu CLK_MMC2>,
+                                <&ccu CLK_MMC2_OUTPUT>,
+                                <&ccu CLK_MMC2_SAMPLE>;
                        clock-names = "ahb", "mmc", "output", "sample";
                        resets = <&mmc_config_clk 2>;
                        reset-names = "ahb";
                mmc3: mmc@01c12000 {
                        compatible = "allwinner,sun9i-a80-mmc";
                        reg = <0x01c12000 0x1000>;
-                       clocks = <&mmc_config_clk 3>, <&mmc3_clk 0>,
-                                <&mmc3_clk 1>, <&mmc3_clk 2>;
+                       clocks = <&mmc_config_clk 3>, <&ccu CLK_MMC3>,
+                                <&ccu CLK_MMC3_OUTPUT>,
+                                <&ccu CLK_MMC3_SAMPLE>;
                        clock-names = "ahb", "mmc", "output", "sample";
                        resets = <&mmc_config_clk 3>;
                        reset-names = "ahb";
                mmc_config_clk: clk@01c13000 {
                        compatible = "allwinner,sun9i-a80-mmc-config-clk";
                        reg = <0x01c13000 0x10>;
-                       clocks = <&ahb0_gates 8>;
+                       clocks = <&ccu CLK_BUS_MMC>;
                        clock-names = "ahb";
-                       resets = <&ahb0_resets 8>;
+                       resets = <&ccu RST_BUS_MMC>;
                        reset-names = "ahb";
                        #clock-cells = <1>;
                        #reset-cells = <1>;
                        interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
                };
 
-               ahb0_resets: reset@060005a0 {
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun6i-a31-clock-reset";
-                       reg = <0x060005a0 0x4>;
-               };
-
-               ahb1_resets: reset@060005a4 {
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun6i-a31-clock-reset";
-                       reg = <0x060005a4 0x4>;
-               };
-
-               ahb2_resets: reset@060005a8 {
-                       #reset-cells = <1>;
-                       compatible = "allwinner,sun6i-a31-clock-reset";
-                       reg = <0x060005a8 0x4>;
-               };
-
-               apb0_resets: reset@060005b0 {
+               de_clocks: clock@03000000 {
+                       compatible = "allwinner,sun9i-a80-de-clks";
+                       reg = <0x03000000 0x30>;
+                       clocks = <&ccu CLK_DE>,
+                                <&ccu CLK_SDRAM>,
+                                <&ccu CLK_BUS_DE>;
+                       clock-names = "mod",
+                                     "dram",
+                                     "bus";
+                       resets = <&ccu RST_BUS_DE>;
+                       #clock-cells = <1>;
                        #reset-cells = <1>;
-                       compatible = "allwinner,sun6i-a31-clock-reset";
-                       reg = <0x060005b0 0x4>;
                };
 
-               apb1_resets: reset@060005b4 {
+               ccu: clock@06000000 {
+                       compatible = "allwinner,sun9i-a80-ccu";
+                       reg = <0x06000000 0x800>;
+                       clocks = <&osc24M>, <&osc32k>;
+                       clock-names = "hosc", "losc";
+                       #clock-cells = <1>;
                        #reset-cells = <1>;
-                       compatible = "allwinner,sun6i-a31-clock-reset";
-                       reg = <0x060005b4 0x4>;
                };
 
                timer@06000c00 {
                                     <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&apb0_gates 5>, <&osc24M>, <&osc32k>;
+                       clocks = <&ccu CLK_BUS_PIO>, <&osc24M>, <&osc32k>;
                        clock-names = "apb", "hosc", "losc";
                        gpio-controller;
                        interrupt-controller;
                        interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 16>;
-                       resets = <&apb1_resets 16>;
+                       clocks = <&ccu CLK_BUS_UART0>;
+                       resets = <&ccu RST_BUS_UART0>;
                        status = "disabled";
                };
 
                        interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 17>;
-                       resets = <&apb1_resets 17>;
+                       clocks = <&ccu CLK_BUS_UART1>;
+                       resets = <&ccu RST_BUS_UART1>;
                        status = "disabled";
                };
 
                        interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 18>;
-                       resets = <&apb1_resets 18>;
+                       clocks = <&ccu CLK_BUS_UART2>;
+                       resets = <&ccu RST_BUS_UART2>;
                        status = "disabled";
                };
 
                        interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 19>;
-                       resets = <&apb1_resets 19>;
+                       clocks = <&ccu CLK_BUS_UART3>;
+                       resets = <&ccu RST_BUS_UART3>;
                        status = "disabled";
                };
 
                        interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 20>;
-                       resets = <&apb1_resets 20>;
+                       clocks = <&ccu CLK_BUS_UART4>;
+                       resets = <&ccu RST_BUS_UART4>;
                        status = "disabled";
                };
 
                        interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
                        reg-shift = <2>;
                        reg-io-width = <4>;
-                       clocks = <&apb1_gates 21>;
-                       resets = <&apb1_resets 21>;
+                       clocks = <&ccu CLK_BUS_UART5>;
+                       resets = <&ccu RST_BUS_UART5>;
                        status = "disabled";
                };
 
                        compatible = "allwinner,sun6i-a31-i2c";
                        reg = <0x07002800 0x400>;
                        interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&apb1_gates 0>;
-                       resets = <&apb1_resets 0>;
+                       clocks = <&ccu CLK_BUS_I2C0>;
+                       resets = <&ccu RST_BUS_I2C0>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun6i-a31-i2c";
                        reg = <0x07002c00 0x400>;
                        interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&apb1_gates 1>;
-                       resets = <&apb1_resets 1>;
+                       clocks = <&ccu CLK_BUS_I2C1>;
+                       resets = <&ccu RST_BUS_I2C1>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun6i-a31-i2c";
                        reg = <0x07003000 0x400>;
                        interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&apb1_gates 2>;
-                       resets = <&apb1_resets 2>;
+                       clocks = <&ccu CLK_BUS_I2C2>;
+                       resets = <&ccu RST_BUS_I2C2>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun6i-a31-i2c";
                        reg = <0x07003400 0x400>;
                        interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&apb1_gates 3>;
-                       resets = <&apb1_resets 3>;
+                       clocks = <&ccu CLK_BUS_I2C3>;
+                       resets = <&ccu RST_BUS_I2C3>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "allwinner,sun6i-a31-i2c";
                        reg = <0x07003800 0x400>;
                        interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
-                       clocks = <&apb1_gates 4>;
-                       resets = <&apb1_resets 4>;
+                       clocks = <&ccu CLK_BUS_I2C4>;
+                       resets = <&ccu RST_BUS_I2C4>;
                        status = "disabled";
                        #address-cells = <1>;
                        #size-cells = <0>;
index 75055df1cda30f39c42863d9bdd4fdcc391ff8f1..9b1b7be2ec0e494c650f4e2f2b80588066347414 100644 (file)
@@ -452,7 +452,7 @@ static int dmabounce_set_mask(struct device *dev, u64 dma_mask)
        return arm_dma_ops.set_dma_mask(dev, dma_mask);
 }
 
-static struct dma_map_ops dmabounce_ops = {
+static const struct dma_map_ops dmabounce_ops = {
        .alloc                  = arm_dma_alloc,
        .free                   = arm_dma_free,
        .mmap                   = arm_dma_mmap,
index a923524d1040734d1f5b94b4a6e2f5d59cab99d1..cf062472e07bcb4be470bf35ab029df3438dbc7e 100644 (file)
@@ -144,7 +144,7 @@ extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
 
 void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
 {
-       unsigned long val = ptr ? virt_to_phys(ptr) : 0;
+       unsigned long val = ptr ? __pa_symbol(ptr) : 0;
        mcpm_entry_vectors[cluster][cpu] = val;
        sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
 }
@@ -299,8 +299,8 @@ void mcpm_cpu_power_down(void)
         * the kernel as if the power_up method just had deasserted reset
         * on the CPU.
         */
-       phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-       phys_reset(virt_to_phys(mcpm_entry_point));
+       phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
+       phys_reset(__pa_symbol(mcpm_entry_point));
 
        /* should never get here */
        BUG();
@@ -388,8 +388,8 @@ static int __init nocache_trampoline(unsigned long _arg)
        __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
        __mcpm_cpu_down(cpu, cluster);
 
-       phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-       phys_reset(virt_to_phys(mcpm_entry_point));
+       phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
+       phys_reset(__pa_symbol(mcpm_entry_point));
        BUG();
 }
 
@@ -449,7 +449,7 @@ int __init mcpm_sync_init(
        sync_cache_w(&mcpm_sync);
 
        if (power_up_setup) {
-               mcpm_power_up_setup_phys = virt_to_phys(power_up_setup);
+               mcpm_power_up_setup_phys = __pa_symbol(power_up_setup);
                sync_cache_w(&mcpm_power_up_setup_phys);
        }
 
index 4111592f01301adfaf1b96e50397a152cfe7b625..220ba207be9130769d3effd8687b19722c32c9ca 100644 (file)
@@ -7,7 +7,6 @@
 #define ASMARM_DEVICE_H
 
 struct dev_archdata {
-       struct dma_map_ops      *dma_ops;
 #ifdef CONFIG_DMABOUNCE
        struct dmabounce_device_info *dmabounce;
 #endif
index bf02dbd9ccda3bd9addfbac5566df2a7b97c7ef8..71665692597541538c1c04e4475b4d718191cbc3 100644 (file)
 #include <asm/xen/hypervisor.h>
 
 #define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-extern struct dma_map_ops arm_dma_ops;
-extern struct dma_map_ops arm_coherent_dma_ops;
+extern const struct dma_map_ops arm_dma_ops;
+extern const struct dma_map_ops arm_coherent_dma_ops;
 
-static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
 {
-       if (dev && dev->archdata.dma_ops)
-               return dev->archdata.dma_ops;
+       if (dev && dev->dma_ops)
+               return dev->dma_ops;
        return &arm_dma_ops;
 }
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        if (xen_initial_domain())
                return xen_dma_ops;
        else
-               return __generic_dma_ops(dev);
-}
-
-static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
-{
-       BUG_ON(!dev);
-       dev->archdata.dma_ops = ops;
+               return __generic_dma_ops(NULL);
 }
 
 #define HAVE_ARCH_DMA_SUPPORTED 1
index eaa60da7dac31fc8977776d9929dde934ccd52b6..0ef42ae75b6ca992d2e13a7a9725e4dcbda89439 100644 (file)
@@ -16,7 +16,7 @@
 #ifndef __CACHE_UNIPHIER_H
 #define __CACHE_UNIPHIER_H
 
-#include <linux/types.h>
+#include <linux/errno.h>
 
 #ifdef CONFIG_CACHE_UNIPHIER
 int uniphier_cache_init(void);
index 3ea9be559726ec06b547b4dda08dda98ea8edace..59655459da591bfb767231d10a8826cc784fd24c 100644 (file)
@@ -16,6 +16,9 @@
 #ifndef _ARM_KPROBES_H
 #define _ARM_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/notifier.h>
@@ -83,4 +86,5 @@ struct arch_optimized_insn {
         */
 };
 
+#endif /* CONFIG_KPROBES */
 #endif /* _ARM_KPROBES_H */
index 4ca69fe2c850c0a995574ffe250444d8062d8d95..bada3f845a975e656683fe1645c197ade562bf74 100644 (file)
@@ -22,7 +22,7 @@ struct mtd_info;
  * set_vpp:    method called to enable or disable VPP
  * mmcontrol:  method called to enable or disable Sync. Burst Read in OneNAND
  * parts:      optional array of mtd_partitions for static partitioning
- * nr_parts:   number of mtd_partitions for static partitoning
+ * nr_parts:   number of mtd_partitions for static partitioning
  */
 struct flash_platform_data {
        const char      *map_name;
index 76cbd9c674dff7815e7f25a293a8b03e80e3b442..1f54e4e98c1e10cd77fd23a465268946c33d15c8 100644 (file)
 #define IOREMAP_MAX_ORDER      24
 #endif
 
+#define VECTORS_BASE           UL(0xffff0000)
+
 #else /* CONFIG_MMU */
 
+#ifndef __ASSEMBLY__
+extern unsigned long vectors_base;
+#define VECTORS_BASE           vectors_base
+#endif
+
 /*
  * The limitation of user task size can grow up to the end of free ram region.
  * It is difficult to define and perhaps will never meet the original meaning
 
 #endif /* !CONFIG_MMU */
 
+#ifdef CONFIG_XIP_KERNEL
+#define KERNEL_START           _sdata
+#else
+#define KERNEL_START           _stext
+#endif
+#define KERNEL_END             _end
+
 /*
  * We fix the TCM memories max 32 KiB ITCM resp DTCM at these
  * locations
@@ -206,7 +220,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
        : "r" (x), "I" (__PV_BITS_31_24)                \
        : "cc")
 
-static inline phys_addr_t __virt_to_phys(unsigned long x)
+static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
        phys_addr_t t;
 
@@ -238,7 +252,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 #define PHYS_OFFSET    PLAT_PHYS_OFFSET
 #define PHYS_PFN_OFFSET        ((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
-static inline phys_addr_t __virt_to_phys(unsigned long x)
+static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
        return (phys_addr_t)x - PAGE_OFFSET + PHYS_OFFSET;
 }
@@ -254,6 +268,16 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
        ((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
         PHYS_PFN_OFFSET)
 
+#define __pa_symbol_nodebug(x) __virt_to_phys_nodebug((x))
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+extern phys_addr_t __virt_to_phys(unsigned long x);
+extern phys_addr_t __phys_addr_symbol(unsigned long x);
+#else
+#define __virt_to_phys(x)      __virt_to_phys_nodebug(x)
+#define __phys_addr_symbol(x)  __pa_symbol_nodebug(x)
+#endif
+
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
  * Note: Drivers should NOT use these.  They are the wrong
@@ -276,6 +300,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  * Drivers should NOT use these either.
  */
 #define __pa(x)                        __virt_to_phys((unsigned long)(x))
+#define __pa_symbol(x)         __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
 #define __va(x)                        ((void *)__phys_to_virt((phys_addr_t)(x)))
 #define pfn_to_kaddr(pfn)      __va((phys_addr_t)(pfn) << PAGE_SHIFT)
 
index add094d09e3e2cec57c85cfc13349655cb93b9c1..302240c19a5aa688e7bdab1ece506dfbeaccea4e 100644 (file)
@@ -63,9 +63,9 @@ typedef pte_t *pte_addr_t;
 /*
  * Mark the prot value as uncacheable and unbufferable.
  */
-#define pgprot_noncached(prot) __pgprot(0)
-#define pgprot_writecombine(prot) __pgprot(0)
-#define pgprot_dmacoherent(prot) __pgprot(0)
+#define pgprot_noncached(prot) (prot)
+#define pgprot_writecombine(prot) (prot)
+#define pgprot_dmacoherent(prot) (prot)
 
 
 /*
index 6b4eb27b875863ffeb37a131fe445ee38f28a0f4..2e21e08de7478b5e19f1aee89521356e52d8c041 100644 (file)
@@ -151,11 +151,6 @@ __after_proc_init:
 #endif
 #ifdef CONFIG_CPU_ICACHE_DISABLE
        bic     r0, r0, #CR_I
-#endif
-#ifdef CONFIG_CPU_HIGH_VECTOR
-       orr     r0, r0, #CR_V
-#else
-       bic     r0, r0, #CR_V
 #endif
        mcr     p15, 0, r0, c1, c0, 0           @ write control reg
 #elif defined (CONFIG_CPU_V7M)
index 4f14b5ce6535f7a19215660ebb4b3e62bd6ea5ed..80254b47dc3420ec11cb6611f645d5b1faf55b66 100644 (file)
@@ -155,8 +155,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
                       break;
 
                case R_ARM_PREL31:
-                       offset = *(u32 *)loc + sym->st_value - loc;
-                       *(u32 *)loc = offset & 0x7fffffff;
+                       offset = (*(s32 *)loc << 1) >> 1; /* sign extend */
+                       offset += sym->st_value - loc;
+                       if (offset >= 0x40000000 || offset < -0x40000000) {
+                               pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
+                                      module->name, relindex, i, symname,
+                                      ELF32_R_TYPE(rel->r_info), loc,
+                                      sym->st_value);
+                               return -ENOEXEC;
+                       }
+                       *(u32 *)loc &= 0x80000000;
+                       *(u32 *)loc |= offset & 0x7fffffff;
                        break;
 
                case R_ARM_MOVW_ABS_NC:
index 34e3f3c45634d96182cd94f2672375dd5de32643..f4e54503afa9587106fad19c0932273f81995993 100644 (file)
@@ -81,7 +81,7 @@ __setup("fpe=", fpe_setup);
 extern void init_default_cache_policy(unsigned long);
 extern void paging_init(const struct machine_desc *desc);
 extern void early_paging_init(const struct machine_desc *);
-extern void sanity_check_meminfo(void);
+extern void adjust_lowmem_bounds(void);
 extern enum reboot_mode reboot_mode;
 extern void setup_dma_zone(const struct machine_desc *desc);
 
@@ -1093,8 +1093,14 @@ void __init setup_arch(char **cmdline_p)
        setup_dma_zone(mdesc);
        xen_early_init();
        efi_init();
-       sanity_check_meminfo();
+       /*
+        * Make sure the calculation for lowmem/highmem is set appropriately
+        * before reserving/allocating any mmeory
+        */
+       adjust_lowmem_bounds();
        arm_memblock_init(mdesc);
+       /* Memory may have been removed so recalculate the bounds. */
+       adjust_lowmem_bounds();
 
        early_ioremap_reset();
 
index 7dd14e8395e62976b3083f677b0e4d2ea0d71d95..5a07c5a4b8943c68487cc8669eee0c20408670c2 100644 (file)
@@ -251,7 +251,7 @@ void __cpu_die(unsigned int cpu)
                pr_err("CPU%u: cpu didn't die\n", cpu);
                return;
        }
-       pr_notice("CPU%u: shutdown\n", cpu);
+       pr_debug("CPU%u: shutdown\n", cpu);
 
        /*
         * platform_cpu_kill() is generally expected to do the powering off
@@ -371,7 +371,7 @@ asmlinkage void secondary_start_kernel(void)
         * reference and switch to it.
         */
        cpu = smp_processor_id();
-       atomic_inc(&mm->mm_count);
+       mmgrab(mm);
        current->active_mm = mm;
        cpumask_set_cpu(cpu, mm_cpumask(mm));
 
index dd77ea25e7ca9f6ff0cdb215fd5d190fbcfa6d11..6dc6d491f88a03d02d643a59dda67c1c2b51cce1 100644 (file)
@@ -27,7 +27,7 @@ static int alpine_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
        phys_addr_t addr;
 
-       addr = virt_to_phys(secondary_startup);
+       addr = __pa_symbol(secondary_startup);
 
        if (addr > (phys_addr_t)(uint32_t)(-1)) {
                pr_err("FAIL: resume address over 32bit (%pa)", &addr);
index ffbd71d45008a00639d8a496605150a3755caac0..502e3df69f696e4a740cd09def122a923386e7c9 100644 (file)
@@ -25,7 +25,7 @@
 static void write_release_addr(u32 release_phys)
 {
        u32 *virt = (u32 *) phys_to_virt(release_phys);
-       writel_relaxed(virt_to_phys(secondary_startup), virt);
+       writel_relaxed(__pa_symbol(secondary_startup), virt);
        /* Make sure this store is visible to other CPUs */
        smp_wmb();
        __cpuc_flush_dcache_area(virt, sizeof(u32));
index 9b6727ed68cd711632f6dd9fb53d9519f0d02683..f5fb10b4376f7391bc93f5c3a44edad3612c9864 100644 (file)
@@ -135,7 +135,7 @@ static int bcm63138_smp_boot_secondary(unsigned int cpu,
        }
 
        /* Write the secondary init routine to the BootLUT reset vector */
-       val = virt_to_phys(secondary_startup);
+       val = __pa_symbol(secondary_startup);
        writel_relaxed(val, bootlut_base + BOOTLUT_RESET_VECT);
 
        /* Power up the core, will jump straight to its reset vector when we
index 40dc8448445e6e2741c8c9a91ac5cd7768809bd2..12379960e982de5ea427bfee05a28d05a78af680 100644 (file)
@@ -151,7 +151,7 @@ static void brcmstb_cpu_boot(u32 cpu)
         * Set the reset vector to point to the secondary_startup
         * routine
         */
-       cpu_set_boot_addr(cpu, virt_to_phys(secondary_startup));
+       cpu_set_boot_addr(cpu, __pa_symbol(secondary_startup));
 
        /* Unhalt the cpu */
        cpu_rst_cfg_set(cpu, 0);
index 3ac3a9bc663c5889a373a798883205894589d488..582886d0d02f7243d5117a59d88c356ceb4fd11a 100644 (file)
@@ -116,7 +116,7 @@ static int nsp_write_lut(unsigned int cpu)
                return -ENOMEM;
        }
 
-       secondary_startup_phy = virt_to_phys(secondary_startup);
+       secondary_startup_phy = __pa_symbol(secondary_startup);
        BUG_ON(secondary_startup_phy > (phys_addr_t)U32_MAX);
 
        writel_relaxed(secondary_startup_phy, sku_rom_lut);
@@ -189,7 +189,7 @@ static int kona_boot_secondary(unsigned int cpu, struct task_struct *idle)
         * Secondary cores will start in secondary_startup(),
         * defined in "arch/arm/kernel/head.S"
         */
-       boot_func = virt_to_phys(secondary_startup);
+       boot_func = __pa_symbol(secondary_startup);
        BUG_ON(boot_func & BOOT_ADDR_CPUID_MASK);
        BUG_ON(boot_func > (phys_addr_t)U32_MAX);
 
index 93f90688db18301b081d045a1089fe14e63ac2c1..7586b7aec272c0c5b3786d166d026c4f96af4afd 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/cp15.h>
+#include <asm/memory.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
 
@@ -75,7 +76,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus)
        if (!cpu_ctrl)
                goto unmap_scu;
 
-       vectors_base = ioremap(CONFIG_VECTORS_BASE, SZ_32K);
+       vectors_base = ioremap(VECTORS_BASE, SZ_32K);
        if (!vectors_base)
                goto unmap_scu;
 
@@ -92,7 +93,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus)
         * Write the secondary startup address into the SW reset address
         * vector. This is used by boot_inst.
         */
-       writel(virt_to_phys(secondary_startup), vectors_base + SW_RESET_ADDR);
+       writel(__pa_symbol(secondary_startup), vectors_base + SW_RESET_ADDR);
 
        iounmap(vectors_base);
 unmap_scu:
index 3b39ea353d3075b687a6f30a4688bab8994ab625..8a5b6f059498d4c53d664ab7fa0407026e02498c 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
-#include <linux/platform_data/rtc-m48t86.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 
@@ -45,16 +44,6 @@ static struct map_desc ts72xx_io_desc[] __initdata = {
                .pfn            = __phys_to_pfn(TS72XX_OPTIONS2_PHYS_BASE),
                .length         = TS72XX_OPTIONS2_SIZE,
                .type           = MT_DEVICE,
-       }, {
-               .virtual        = (unsigned long)TS72XX_RTC_INDEX_VIRT_BASE,
-               .pfn            = __phys_to_pfn(TS72XX_RTC_INDEX_PHYS_BASE),
-               .length         = TS72XX_RTC_INDEX_SIZE,
-               .type           = MT_DEVICE,
-       }, {
-               .virtual        = (unsigned long)TS72XX_RTC_DATA_VIRT_BASE,
-               .pfn            = __phys_to_pfn(TS72XX_RTC_DATA_PHYS_BASE),
-               .length         = TS72XX_RTC_DATA_SIZE,
-               .type           = MT_DEVICE,
        }
 };
 
@@ -179,31 +168,22 @@ static void __init ts72xx_register_flash(void)
        }
 }
 
+/*************************************************************************
+ * RTC M48T86
+ *************************************************************************/
+#define TS72XX_RTC_INDEX_PHYS_BASE     (EP93XX_CS1_PHYS_BASE + 0x00800000)
+#define TS72XX_RTC_DATA_PHYS_BASE      (EP93XX_CS1_PHYS_BASE + 0x01700000)
 
-static unsigned char ts72xx_rtc_readbyte(unsigned long addr)
-{
-       __raw_writeb(addr, TS72XX_RTC_INDEX_VIRT_BASE);
-       return __raw_readb(TS72XX_RTC_DATA_VIRT_BASE);
-}
-
-static void ts72xx_rtc_writebyte(unsigned char value, unsigned long addr)
-{
-       __raw_writeb(addr, TS72XX_RTC_INDEX_VIRT_BASE);
-       __raw_writeb(value, TS72XX_RTC_DATA_VIRT_BASE);
-}
-
-static struct m48t86_ops ts72xx_rtc_ops = {
-       .readbyte       = ts72xx_rtc_readbyte,
-       .writebyte      = ts72xx_rtc_writebyte,
+static struct resource ts72xx_rtc_resources[] = {
+       DEFINE_RES_MEM(TS72XX_RTC_INDEX_PHYS_BASE, 0x01),
+       DEFINE_RES_MEM(TS72XX_RTC_DATA_PHYS_BASE, 0x01),
 };
 
 static struct platform_device ts72xx_rtc_device = {
        .name           = "rtc-m48t86",
        .id             = -1,
-       .dev            = {
-               .platform_data  = &ts72xx_rtc_ops,
-       },
-       .num_resources  = 0,
+       .resource       = ts72xx_rtc_resources,
+       .num_resources  = ARRAY_SIZE(ts72xx_rtc_resources),
 };
 
 static struct resource ts72xx_wdt_resources[] = {
index 071feaa30adc6094a995738a0b812a7f0b7b5a34..2255ba29fdd64d4bdc3102a69869184972a31f60 100644 (file)
@@ -9,8 +9,6 @@
  * febff000    22000000        4K      model number register (bits 0-2)
  * febfe000    22400000        4K      options register
  * febfd000    22800000        4K      options register #2
- * febf9000    10800000        4K      TS-5620 RTC index register
- * febf8000    11700000        4K      TS-5620 RTC data register
  */
 
 #define TS72XX_MODEL_PHYS_BASE         0x22000000
 #define TS72XX_OPTIONS2_TS9420         0x04
 #define TS72XX_OPTIONS2_TS9420_BOOT    0x02
 
-
-#define TS72XX_RTC_INDEX_VIRT_BASE     IOMEM(0xfebf9000)
-#define TS72XX_RTC_INDEX_PHYS_BASE     0x10800000
-#define TS72XX_RTC_INDEX_SIZE          0x00001000
-
-#define TS72XX_RTC_DATA_VIRT_BASE      IOMEM(0xfebf8000)
-#define TS72XX_RTC_DATA_PHYS_BASE      0x11700000
-#define TS72XX_RTC_DATA_SIZE           0x00001000
-
 #define TS72XX_WDT_CONTROL_PHYS_BASE   0x23800000
 #define TS72XX_WDT_FEED_PHYS_BASE      0x23c00000
 
index fd6da5419b5107e99346ae12c6c9d6d381ae0be6..e81a78b125d98af5f76568bb72f701400c45c2a9 100644 (file)
@@ -41,7 +41,7 @@ static int exynos_do_idle(unsigned long mode)
        case FW_DO_IDLE_AFTR:
                if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
                        exynos_save_cp15();
-               writel_relaxed(virt_to_phys(exynos_cpu_resume_ns),
+               writel_relaxed(__pa_symbol(exynos_cpu_resume_ns),
                               sysram_ns_base_addr + 0x24);
                writel_relaxed(EXYNOS_AFTR_MAGIC, sysram_ns_base_addr + 0x20);
                if (soc_is_exynos3250()) {
@@ -135,7 +135,7 @@ static int exynos_suspend(void)
                exynos_save_cp15();
 
        writel(EXYNOS_SLEEP_MAGIC, sysram_ns_base_addr + EXYNOS_BOOT_FLAG);
-       writel(virt_to_phys(exynos_cpu_resume_ns),
+       writel(__pa_symbol(exynos_cpu_resume_ns),
                sysram_ns_base_addr + EXYNOS_BOOT_ADDR);
 
        return cpu_suspend(0, exynos_cpu_suspend);
index 038fd8c993d0e6aa58acd7698be6c29cd287d4cc..b42622562ea79871b5c0ccaa960011cd6eef8c6d 100644 (file)
@@ -221,7 +221,7 @@ static void exynos_mcpm_setup_entry_point(void)
         */
        __raw_writel(0xe59f0000, ns_sram_base_addr);     /* ldr r0, [pc, #0] */
        __raw_writel(0xe12fff10, ns_sram_base_addr + 4); /* bx  r0 */
-       __raw_writel(virt_to_phys(mcpm_entry_point), ns_sram_base_addr + 8);
+       __raw_writel(__pa_symbol(mcpm_entry_point), ns_sram_base_addr + 8);
 }
 
 static struct syscore_ops exynos_mcpm_syscore_ops = {
index a5d68411a037994cfcf7f3c2b62c0afb5d91617f..5a03bffe7226030fe528eaf9c4c683b92822b4fd 100644 (file)
@@ -353,7 +353,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
 
                smp_rmb();
 
-               boot_addr = virt_to_phys(exynos4_secondary_startup);
+               boot_addr = __pa_symbol(exynos4_secondary_startup);
 
                ret = exynos_set_boot_addr(core_id, boot_addr);
                if (ret)
@@ -413,7 +413,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
 
                mpidr = cpu_logical_map(i);
                core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-               boot_addr = virt_to_phys(exynos4_secondary_startup);
+               boot_addr = __pa_symbol(exynos4_secondary_startup);
 
                ret = exynos_set_boot_addr(core_id, boot_addr);
                if (ret)
index 487295f4a56bc07ab2412ca73538f539421234d6..1a7e5b5d08d83234ff58a6a75d6e0c777f0be9c6 100644 (file)
@@ -132,7 +132,7 @@ static void exynos_set_wakeupmask(long mask)
 
 static void exynos_cpu_set_boot_vector(long flags)
 {
-       writel_relaxed(virt_to_phys(exynos_cpu_resume),
+       writel_relaxed(__pa_symbol(exynos_cpu_resume),
                       exynos_boot_vector_addr());
        writel_relaxed(flags, exynos_boot_vector_flag());
 }
@@ -238,7 +238,7 @@ static int exynos_cpu0_enter_aftr(void)
 
 abort:
        if (cpu_online(1)) {
-               unsigned long boot_addr = virt_to_phys(exynos_cpu_resume);
+               unsigned long boot_addr = __pa_symbol(exynos_cpu_resume);
 
                /*
                 * Set the boot vector to something non-zero
@@ -330,7 +330,7 @@ cpu1_aborted:
 
 static void exynos_pre_enter_aftr(void)
 {
-       unsigned long boot_addr = virt_to_phys(exynos_cpu_resume);
+       unsigned long boot_addr = __pa_symbol(exynos_cpu_resume);
 
        (void)exynos_set_boot_addr(1, boot_addr);
 }
index adf4e8f182bd650eb0ce7e3ab148492fae53b58e..748cfb8d521247c2073b8cc064d963c1f7ab9eeb 100644 (file)
@@ -301,7 +301,7 @@ static void exynos_pm_prepare(void)
        exynos_pm_enter_sleep_mode();
 
        /* ensure at least INFORM0 has the resume address */
-       pmu_raw_writel(virt_to_phys(exynos_cpu_resume), S5P_INFORM0);
+       pmu_raw_writel(__pa_symbol(exynos_cpu_resume), S5P_INFORM0);
 }
 
 static void exynos3250_pm_prepare(void)
@@ -318,7 +318,7 @@ static void exynos3250_pm_prepare(void)
        exynos_pm_enter_sleep_mode();
 
        /* ensure at least INFORM0 has the resume address */
-       pmu_raw_writel(virt_to_phys(exynos_cpu_resume), S5P_INFORM0);
+       pmu_raw_writel(__pa_symbol(exynos_cpu_resume), S5P_INFORM0);
 }
 
 static void exynos5420_pm_prepare(void)
@@ -343,7 +343,7 @@ static void exynos5420_pm_prepare(void)
 
        /* ensure at least INFORM0 has the resume address */
        if (IS_ENABLED(CONFIG_EXYNOS5420_MCPM))
-               pmu_raw_writel(virt_to_phys(mcpm_entry_point), S5P_INFORM0);
+               pmu_raw_writel(__pa_symbol(mcpm_entry_point), S5P_INFORM0);
 
        tmp = pmu_raw_readl(EXYNOS_L2_OPTION(0));
        tmp &= ~EXYNOS_L2_USE_RETENTION;
index 4b653a8cb75ce82f2a5ff163bec263591c1a2983..a6c117622d67619ae4eccc7f720b785878935585 100644 (file)
@@ -327,7 +327,7 @@ static int __init hip04_smp_init(void)
         */
        writel_relaxed(hip04_boot_method[0], relocation);
        writel_relaxed(0xa5a5a5a5, relocation + 4);     /* magic number */
-       writel_relaxed(virt_to_phys(secondary_startup), relocation + 8);
+       writel_relaxed(__pa_symbol(secondary_startup), relocation + 8);
        writel_relaxed(0, relocation + 12);
        iounmap(relocation);
 
index e1d67648d5d02ed8d96b3ce6314691459ea2e0fe..91bb02dec20f15a63f438f3ab65fe5dc62a40af7 100644 (file)
@@ -28,7 +28,7 @@ void hi3xxx_set_cpu_jump(int cpu, void *jump_addr)
        cpu = cpu_logical_map(cpu);
        if (!cpu || !ctrl_base)
                return;
-       writel_relaxed(virt_to_phys(jump_addr), ctrl_base + ((cpu - 1) << 2));
+       writel_relaxed(__pa_symbol(jump_addr), ctrl_base + ((cpu - 1) << 2));
 }
 
 int hi3xxx_get_cpu_jump(int cpu)
@@ -118,7 +118,7 @@ static int hix5hd2_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
        phys_addr_t jumpaddr;
 
-       jumpaddr = virt_to_phys(secondary_startup);
+       jumpaddr = __pa_symbol(secondary_startup);
        hix5hd2_set_scu_boot_addr(HIX5HD2_BOOT_ADDRESS, jumpaddr);
        hix5hd2_set_cpu(cpu, true);
        arch_send_wakeup_ipi_mask(cpumask_of(cpu));
@@ -156,7 +156,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle)
        struct device_node *node;
 
 
-       jumpaddr = virt_to_phys(secondary_startup);
+       jumpaddr = __pa_symbol(secondary_startup);
        hip01_set_boot_addr(HIP01_BOOT_ADDRESS, jumpaddr);
 
        node = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl");
index 711dbbd5baddaac8953d2d90ac1232776efd9f16..c2d1b329fba13da1511af56d0305a01608d55511 100644 (file)
@@ -117,7 +117,7 @@ static void __init ls1021a_smp_prepare_cpus(unsigned int max_cpus)
        dcfg_base = of_iomap(np, 0);
        BUG_ON(!dcfg_base);
 
-       paddr = virt_to_phys(secondary_startup);
+       paddr = __pa_symbol(secondary_startup);
        writel_relaxed(cpu_to_be32(paddr), dcfg_base + DCFG_CCSR_SCRATCHRW1);
 
        iounmap(dcfg_base);
index 1515e498d348c6ea1636149e35665b4fd2e2b239..e61b1d1027e12d5c5284f74f4480dbba70b47b05 100644 (file)
@@ -499,7 +499,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata)
        memset(suspend_ocram_base, 0, sizeof(*pm_info));
        pm_info = suspend_ocram_base;
        pm_info->pbase = ocram_pbase;
-       pm_info->resume_addr = virt_to_phys(v7_cpu_resume);
+       pm_info->resume_addr = __pa_symbol(v7_cpu_resume);
        pm_info->pm_info_size = sizeof(*pm_info);
 
        /*
index 70b083fe934a8f7763cd4cfc4dca0fea73f66d3d..495d85d0fe7ef64daf560a9ef53914b4f82fde08 100644 (file)
@@ -99,7 +99,7 @@ void imx_enable_cpu(int cpu, bool enable)
 void imx_set_cpu_jump(int cpu, void *jump_addr)
 {
        cpu = cpu_logical_map(cpu);
-       writel_relaxed(virt_to_phys(jump_addr),
+       writel_relaxed(__pa_symbol(jump_addr),
                       src_base + SRC_GPR1 + cpu * 8);
 }
 
index b821e34474b6fc5a950e1739365b2442cac0a2c5..726eb69bb655decfbd7a817bdddcb8946c761dc4 100644 (file)
@@ -122,7 +122,7 @@ static void __init __mtk_smp_prepare_cpus(unsigned int max_cpus, int trustzone)
         * write the address of slave startup address into the system-wide
         * jump register
         */
-       writel_relaxed(virt_to_phys(secondary_startup_arm),
+       writel_relaxed(__pa_symbol(secondary_startup_arm),
                        mtk_smp_base + mtk_smp_info->jump_reg);
 }
 
index 2990c5269b18586be26dbf3405f0aa696d4d0973..c487be61d6d8c9f00ceb6e131f9ab364d4e56fdd 100644 (file)
@@ -110,7 +110,7 @@ static void mvebu_pm_store_armadaxp_bootinfo(u32 *store_addr)
 {
        phys_addr_t resume_pc;
 
-       resume_pc = virt_to_phys(armada_370_xp_cpu_resume);
+       resume_pc = __pa_symbol(armada_370_xp_cpu_resume);
 
        /*
         * The bootloader expects the first two words to be a magic
index f39bd51bce18b85784e3269a61ddf82a7119d3f8..27a78c80e5b17352aafc4924d04694217bfa5c15 100644 (file)
@@ -112,7 +112,7 @@ static const struct of_device_id of_pmsu_table[] = {
 
 void mvebu_pmsu_set_cpu_boot_addr(int hw_cpu, void *boot_addr)
 {
-       writel(virt_to_phys(boot_addr), pmsu_mp_base +
+       writel(__pa_symbol(boot_addr), pmsu_mp_base +
                PMSU_BOOT_ADDR_REDIRECT_OFFSET(hw_cpu));
 }
 
index 76cbc82a7407e48d03317db872ddad26b8e24de4..04d9ebe6a90a0ccfe427fae9922ef9198bab3a37 100644 (file)
@@ -153,7 +153,7 @@ void mvebu_system_controller_set_cpu_boot_addr(void *boot_addr)
        if (of_machine_is_compatible("marvell,armada375"))
                mvebu_armada375_smp_wa_init();
 
-       writel(virt_to_phys(boot_addr), system_controller_base +
+       writel(__pa_symbol(boot_addr), system_controller_base +
               mvebu_sc->resume_boot_addr);
 }
 #endif
index 1662071bb2cc8361023aa3a066ab963bbd411540..bd8089ff929f61847bf4fc49d4efe0c6fe9c6136 100644 (file)
@@ -315,15 +315,15 @@ void omap3_save_scratchpad_contents(void)
        scratchpad_contents.boot_config_ptr = 0x0;
        if (cpu_is_omap3630())
                scratchpad_contents.public_restore_ptr =
-                       virt_to_phys(omap3_restore_3630);
+                       __pa_symbol(omap3_restore_3630);
        else if (omap_rev() != OMAP3430_REV_ES3_0 &&
                                        omap_rev() != OMAP3430_REV_ES3_1 &&
                                        omap_rev() != OMAP3430_REV_ES3_1_2)
                scratchpad_contents.public_restore_ptr =
-                       virt_to_phys(omap3_restore);
+                       __pa_symbol(omap3_restore);
        else
                scratchpad_contents.public_restore_ptr =
-                       virt_to_phys(omap3_restore_es3);
+                       __pa_symbol(omap3_restore_es3);
 
        if (omap_type() == OMAP2_DEVICE_TYPE_GP)
                scratchpad_contents.secure_ram_restore_ptr = 0x0;
@@ -395,7 +395,7 @@ void omap3_save_scratchpad_contents(void)
        sdrc_block_contents.flags = 0x0;
        sdrc_block_contents.block_size = 0x0;
 
-       arm_context_addr = virt_to_phys(omap3_arm_context);
+       arm_context_addr = __pa_symbol(omap3_arm_context);
 
        /* Copy all the contents to the scratchpad location */
        scratchpad_address = OMAP2_L4_IO_ADDRESS(OMAP343X_SCRATCHPAD);
index 7d62ad48c7c9dd1dfb290cdcf5d61479b081d7a9..113ab2dd2ee91ccf9c238813bd6c4d7561ff7d97 100644 (file)
@@ -273,7 +273,7 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state)
        cpu_clear_prev_logic_pwrst(cpu);
        pwrdm_set_next_pwrst(pm_info->pwrdm, power_state);
        pwrdm_set_logic_retst(pm_info->pwrdm, cpu_logic_state);
-       set_cpu_wakeup_addr(cpu, virt_to_phys(omap_pm_ops.resume));
+       set_cpu_wakeup_addr(cpu, __pa_symbol(omap_pm_ops.resume));
        omap_pm_ops.scu_prepare(cpu, power_state);
        l2x0_pwrst_prepare(cpu, save_state);
 
@@ -325,7 +325,7 @@ int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state)
 
        pwrdm_clear_all_prev_pwrst(pm_info->pwrdm);
        pwrdm_set_next_pwrst(pm_info->pwrdm, power_state);
-       set_cpu_wakeup_addr(cpu, virt_to_phys(omap_pm_ops.hotplug_restart));
+       set_cpu_wakeup_addr(cpu, __pa_symbol(omap_pm_ops.hotplug_restart));
        omap_pm_ops.scu_prepare(cpu, power_state);
 
        /*
@@ -467,13 +467,13 @@ void __init omap4_mpuss_early_init(void)
        sar_base = omap4_get_sar_ram_base();
 
        if (cpu_is_omap443x())
-               startup_pa = virt_to_phys(omap4_secondary_startup);
+               startup_pa = __pa_symbol(omap4_secondary_startup);
        else if (cpu_is_omap446x())
-               startup_pa = virt_to_phys(omap4460_secondary_startup);
+               startup_pa = __pa_symbol(omap4460_secondary_startup);
        else if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
-               startup_pa = virt_to_phys(omap5_secondary_hyp_startup);
+               startup_pa = __pa_symbol(omap5_secondary_hyp_startup);
        else
-               startup_pa = virt_to_phys(omap5_secondary_startup);
+               startup_pa = __pa_symbol(omap5_secondary_startup);
 
        if (cpu_is_omap44xx())
                writel_relaxed(startup_pa, sar_base +
index b4de3da6dffa5e0593dad0ca305895fc9c341e6a..003353b0b7944d9363fb6446e683314823cd82f9 100644 (file)
@@ -316,9 +316,9 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
         * A barrier is added to ensure that write buffer is drained
         */
        if (omap_secure_apis_support())
-               omap_auxcoreboot_addr(virt_to_phys(cfg.startup_addr));
+               omap_auxcoreboot_addr(__pa_symbol(cfg.startup_addr));
        else
-               writel_relaxed(virt_to_phys(cfg.startup_addr),
+               writel_relaxed(__pa_symbol(cfg.startup_addr),
                               base + OMAP_AUX_CORE_BOOT_1);
 }
 
index 6bf6267005571b5fadc03d4bab9d8e60489d2d0c..1346b3ab34a5e39b319dcbb718fec2cfb2423d9a 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * OMAP and TWL PMIC specific intializations.
+ * OMAP and TWL PMIC specific initializations.
  *
  * Copyright (C) 2010 Texas Instruments Incorporated.
  * Thara Gopinath
index 8d597267d0c457530ca097b0730f251fd2d4c7bd..7ef80a8304c0c3ce96f3120e285914fce49b89e8 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/platform_device.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ata_platform.h>
-#include <linux/platform_data/rtc-m48t86.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/timeriomem-rng.h>
@@ -80,79 +79,38 @@ static struct mv_sata_platform_data ts78xx_sata_data = {
 /*****************************************************************************
  * RTC M48T86 - nicked^Wborrowed from arch/arm/mach-ep93xx/ts72xx.c
  ****************************************************************************/
-#define TS_RTC_CTRL    (TS78XX_FPGA_REGS_VIRT_BASE + 0x808)
-#define TS_RTC_DATA    (TS78XX_FPGA_REGS_VIRT_BASE + 0x80c)
+#define TS_RTC_CTRL    (TS78XX_FPGA_REGS_PHYS_BASE + 0x808)
+#define TS_RTC_DATA    (TS78XX_FPGA_REGS_PHYS_BASE + 0x80c)
 
-static unsigned char ts78xx_ts_rtc_readbyte(unsigned long addr)
-{
-       writeb(addr, TS_RTC_CTRL);
-       return readb(TS_RTC_DATA);
-}
-
-static void ts78xx_ts_rtc_writebyte(unsigned char value, unsigned long addr)
-{
-       writeb(addr, TS_RTC_CTRL);
-       writeb(value, TS_RTC_DATA);
-}
-
-static struct m48t86_ops ts78xx_ts_rtc_ops = {
-       .readbyte       = ts78xx_ts_rtc_readbyte,
-       .writebyte      = ts78xx_ts_rtc_writebyte,
+static struct resource ts78xx_ts_rtc_resources[] = {
+       DEFINE_RES_MEM(TS_RTC_CTRL, 0x01),
+       DEFINE_RES_MEM(TS_RTC_DATA, 0x01),
 };
 
 static struct platform_device ts78xx_ts_rtc_device = {
        .name           = "rtc-m48t86",
        .id             = -1,
-       .dev            = {
-               .platform_data  = &ts78xx_ts_rtc_ops,
-       },
-       .num_resources  = 0,
+       .resource       = ts78xx_ts_rtc_resources,
+       .num_resources  = ARRAY_SIZE(ts78xx_ts_rtc_resources),
 };
 
-/*
- * TS uses some of the user storage space on the RTC chip so see if it is
- * present; as it's an optional feature at purchase time and not all boards
- * will have it present
- *
- * I've used the method TS use in their rtc7800.c example for the detection
- *
- * TODO: track down a guinea pig without an RTC to see if we can work out a
- *             better RTC detection routine
- */
 static int ts78xx_ts_rtc_load(void)
 {
        int rc;
-       unsigned char tmp_rtc0, tmp_rtc1;
-
-       tmp_rtc0 = ts78xx_ts_rtc_readbyte(126);
-       tmp_rtc1 = ts78xx_ts_rtc_readbyte(127);
-
-       ts78xx_ts_rtc_writebyte(0x00, 126);
-       ts78xx_ts_rtc_writebyte(0x55, 127);
-       if (ts78xx_ts_rtc_readbyte(127) == 0x55) {
-               ts78xx_ts_rtc_writebyte(0xaa, 127);
-               if (ts78xx_ts_rtc_readbyte(127) == 0xaa
-                               && ts78xx_ts_rtc_readbyte(126) == 0x00) {
-                       ts78xx_ts_rtc_writebyte(tmp_rtc0, 126);
-                       ts78xx_ts_rtc_writebyte(tmp_rtc1, 127);
-
-                       if (ts78xx_fpga.supports.ts_rtc.init == 0) {
-                               rc = platform_device_register(&ts78xx_ts_rtc_device);
-                               if (!rc)
-                                       ts78xx_fpga.supports.ts_rtc.init = 1;
-                       } else
-                               rc = platform_device_add(&ts78xx_ts_rtc_device);
-
-                       if (rc)
-                               pr_info("RTC could not be registered: %d\n",
-                                       rc);
-                       return rc;
-               }
+
+       if (ts78xx_fpga.supports.ts_rtc.init == 0) {
+               rc = platform_device_register(&ts78xx_ts_rtc_device);
+               if (!rc)
+                       ts78xx_fpga.supports.ts_rtc.init = 1;
+       } else {
+               rc = platform_device_add(&ts78xx_ts_rtc_device);
        }
 
-       pr_info("RTC not found\n");
-       return -ENODEV;
-};
+       if (rc)
+               pr_info("RTC could not be registered: %d\n", rc);
+
+       return rc;
+}
 
 static void ts78xx_ts_rtc_unload(void)
 {
index 0875b99add1870dab41b3b6b029a9a1ef61f708d..75ef5d4be554ce9f8564f347e52da1e6766bf5ac 100644 (file)
@@ -65,7 +65,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
         * waiting for. This would wake up the secondary core from WFE
         */
 #define SIRFSOC_CPU1_JUMPADDR_OFFSET 0x2bc
-       __raw_writel(virt_to_phys(sirfsoc_secondary_startup),
+       __raw_writel(__pa_symbol(sirfsoc_secondary_startup),
                clk_base + SIRFSOC_CPU1_JUMPADDR_OFFSET);
 
 #define SIRFSOC_CPU1_WAKEMAGIC_OFFSET 0x2b8
index 83e94c95e314414a6d85e145c9e204f186f9cc75..b0bcf1ff02dd058687a00f173090bc3183a22b63 100644 (file)
@@ -54,7 +54,7 @@ static void sirfsoc_set_sleep_mode(u32 mode)
 
 static int sirfsoc_pre_suspend_power_off(void)
 {
-       u32 wakeup_entry = virt_to_phys(cpu_resume);
+       u32 wakeup_entry = __pa_symbol(cpu_resume);
 
        sirfsoc_rtc_iobrg_writel(wakeup_entry, sirfsoc_pwrc_base +
                SIRFSOC_PWRC_SCRATCH_PAD1);
index 9c308de158c6fa2c0cf07a331c68292f8d4788b6..29630061e7007f8365be536d836b617ebd57ae01 100644 (file)
@@ -249,7 +249,7 @@ static int palmz72_pm_suspend(void)
        store_ptr = *PALMZ72_SAVE_DWORD;
 
        /* Setting PSPR to a proper value */
-       PSPR = virt_to_phys(&palmz72_resume_info);
+       PSPR = __pa_symbol(&palmz72_resume_info);
 
        return 0;
 }
index c725baf119e1135e0b8f796c18e3684dc5b74862..ba431fad5c47fd4456871083c5f81092da6814da 100644 (file)
@@ -85,7 +85,7 @@ static void pxa25x_cpu_pm_enter(suspend_state_t state)
 static int pxa25x_cpu_pm_prepare(void)
 {
        /* set resume return address */
-       PSPR = virt_to_phys(cpu_resume);
+       PSPR = __pa_symbol(cpu_resume);
        return 0;
 }
 
index c0185c5c5a08b4bcc03ff110909bf7dca46c4732..9b69be4e9fe33156837fb7d520476d47cd1e23d1 100644 (file)
@@ -168,7 +168,7 @@ static int pxa27x_cpu_pm_valid(suspend_state_t state)
 static int pxa27x_cpu_pm_prepare(void)
 {
        /* set resume return address */
-       PSPR = virt_to_phys(cpu_resume);
+       PSPR = __pa_symbol(cpu_resume);
        return 0;
 }
 
index 87acc96388c7347949c685e55a5e3b2832a2daf2..0cc9f124c9ac3769c73d52973a34c959eb56ad0a 100644 (file)
@@ -123,7 +123,7 @@ static void pxa3xx_cpu_pm_suspend(void)
        PSPR = 0x5c014000;
 
        /* overwrite with the resume address */
-       *p = virt_to_phys(cpu_resume);
+       *p = __pa_symbol(cpu_resume);
 
        cpu_suspend(0, pxa3xx_finish_suspend);
 
index 70ca99eb52c6ce8d537d0148bd71bc8bbf04282e..c242423bf8db5a5e64d9818f01c42e0b23b699fd 100644 (file)
@@ -76,7 +76,7 @@ static void __init realview_smp_prepare_cpus(unsigned int max_cpus)
        }
        /* Put the boot address in this magic register */
        regmap_write(map, REALVIEW_SYS_FLAGSSET_OFFSET,
-                    virt_to_phys(versatile_secondary_startup));
+                    __pa_symbol(versatile_secondary_startup));
 }
 
 static const struct smp_operations realview_dt_smp_ops __initconst = {
index 4d827a069d49c7a9a5dcae3ce9b2f81d807b3578..3abafdbdd7f4a24d7afed4aa53f196aab838ac92 100644 (file)
@@ -156,7 +156,7 @@ static int rockchip_boot_secondary(unsigned int cpu, struct task_struct *idle)
                 */
                mdelay(1); /* ensure the cpus other than cpu0 to startup */
 
-               writel(virt_to_phys(secondary_startup), sram_base_addr + 8);
+               writel(__pa_symbol(secondary_startup), sram_base_addr + 8);
                writel(0xDEADBEAF, sram_base_addr + 4);
                dsb_sev();
        }
@@ -195,7 +195,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node)
        }
 
        /* set the boot function for the sram code */
-       rockchip_boot_fn = virt_to_phys(secondary_startup);
+       rockchip_boot_fn = __pa_symbol(secondary_startup);
 
        /* copy the trampoline to sram, that runs during startup of the core */
        memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz);
index bee8c80519299269cde5ba9c852243d6e5a47b05..0592534e0b88c47c203686faafc8ba5650cd7d54 100644 (file)
@@ -62,7 +62,7 @@ static inline u32 rk3288_l2_config(void)
 static void rk3288_config_bootdata(void)
 {
        rkpm_bootdata_cpusp = rk3288_bootram_phy + (SZ_4K - 8);
-       rkpm_bootdata_cpu_code = virt_to_phys(cpu_resume);
+       rkpm_bootdata_cpu_code = __pa_symbol(cpu_resume);
 
        rkpm_bootdata_l2ctlr_f  = 1;
        rkpm_bootdata_l2ctlr = rk3288_l2_config();
index 895aca225952d62f137aae798f603fa2006fc60e..f5b5c49b56ac0b561576ac64acef9c1846fa6020 100644 (file)
@@ -484,7 +484,7 @@ static int jive_pm_suspend(void)
         * correct address to resume from. */
 
        __raw_writel(0x2BED, S3C2412_INFORM0);
-       __raw_writel(virt_to_phys(s3c_cpu_resume), S3C2412_INFORM1);
+       __raw_writel(__pa_symbol(s3c_cpu_resume), S3C2412_INFORM1);
 
        return 0;
 }
index 20e481d8a33a60ff10ab3a3962bf78d5b0241c0c..a4588daeddb0f6ab94b85f837c2ccdb0791c5816 100644 (file)
@@ -45,7 +45,7 @@ static void s3c2410_pm_prepare(void)
 {
        /* ensure at least GSTATUS3 has the resume address */
 
-       __raw_writel(virt_to_phys(s3c_cpu_resume), S3C2410_GSTATUS3);
+       __raw_writel(__pa_symbol(s3c_cpu_resume), S3C2410_GSTATUS3);
 
        S3C_PMDBG("GSTATUS3 0x%08x\n", __raw_readl(S3C2410_GSTATUS3));
        S3C_PMDBG("GSTATUS4 0x%08x\n", __raw_readl(S3C2410_GSTATUS4));
index c0e328e37bd63927fd48b6cc415fef6bc86bdd44..b5bbf0d5985c818947e9b7a44c2a966e0c6c995b 100644 (file)
@@ -48,7 +48,7 @@ static void s3c2416_pm_prepare(void)
         * correct address to resume from.
         */
        __raw_writel(0x2BED, S3C2412_INFORM0);
-       __raw_writel(virt_to_phys(s3c_cpu_resume), S3C2412_INFORM1);
+       __raw_writel(__pa_symbol(s3c_cpu_resume), S3C2412_INFORM1);
 }
 
 static int s3c2416_pm_add(struct device *dev, struct subsys_interface *sif)
index b0be382ff6bb7fa7debce8a9f7cbc2b2384e2bc1..2f579be8fe677adc72032ca91a647ac046f02a4a 100644 (file)
@@ -304,7 +304,7 @@ static void s3c64xx_pm_prepare(void)
                              wake_irqs, ARRAY_SIZE(wake_irqs));
 
        /* store address of resume. */
-       __raw_writel(virt_to_phys(s3c_cpu_resume), S3C64XX_INFORM0);
+       __raw_writel(__pa_symbol(s3c_cpu_resume), S3C64XX_INFORM0);
 
        /* ensure previous wakeup state is cleared before sleeping */
        __raw_writel(__raw_readl(S3C64XX_WAKEUP_STAT), S3C64XX_WAKEUP_STAT);
index 7d69666de5ba2dd6b3c5c705d608ba4ad182767b..07cee14a363b05eb1ca99418eb8d3c0074d0b04d 100644 (file)
@@ -69,7 +69,7 @@ static void s5pv210_pm_prepare(void)
        __raw_writel(s5pv210_irqwake_intmask, S5P_WAKEUP_MASK);
 
        /* ensure at least INFORM0 has the resume address */
-       __raw_writel(virt_to_phys(s5pv210_cpu_resume), S5P_INFORM0);
+       __raw_writel(__pa_symbol(s5pv210_cpu_resume), S5P_INFORM0);
 
        tmp = __raw_readl(S5P_SLEEP_CFG);
        tmp &= ~(S5P_SLEEP_CFG_OSC_EN | S5P_SLEEP_CFG_USBOSC_EN);
index 34853d5dfda28b9e5d5f79205c30c1a71facf5d8..9a7079f565bd394c4ae1436e8a2b14cd52acba34 100644 (file)
@@ -73,7 +73,7 @@ static int sa11x0_pm_enter(suspend_state_t state)
        RCSR = RCSR_HWR | RCSR_SWR | RCSR_WDR | RCSR_SMR;
 
        /* set resume return address */
-       PSPR = virt_to_phys(cpu_resume);
+       PSPR = __pa_symbol(cpu_resume);
 
        /* go zzz */
        cpu_suspend(0, sa1100_finish_suspend);
index e19266844e16126b855bd9ab1f82e3ae5fa7a5f4..3ca2c13346f0cbc35d291cfdb28c966b71aba67e 100644 (file)
@@ -190,7 +190,7 @@ static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit))
 static void __init shmobile_smp_apmu_setup_boot(void)
 {
        /* install boot code shared by all CPUs */
-       shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
+       shmobile_boot_fn = __pa_symbol(shmobile_smp_boot);
 }
 
 void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus,
@@ -204,7 +204,7 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus,
 int shmobile_smp_apmu_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
        /* For this particular CPU register boot vector */
-       shmobile_smp_hook(cpu, virt_to_phys(secondary_startup), 0);
+       shmobile_smp_hook(cpu, __pa_symbol(secondary_startup), 0);
 
        return apmu_wrap(cpu, apmu_power_on);
 }
@@ -308,7 +308,7 @@ int shmobile_smp_apmu_cpu_kill(unsigned int cpu)
 #if defined(CONFIG_SUSPEND)
 static int shmobile_smp_apmu_do_suspend(unsigned long cpu)
 {
-       shmobile_smp_hook(cpu, virt_to_phys(cpu_resume), 0);
+       shmobile_smp_hook(cpu, __pa_symbol(cpu_resume), 0);
        shmobile_smp_apmu_cpu_shutdown(cpu);
        cpu_do_idle(); /* WFI selects Core Standby */
        return 1;
index d1ecaf37d1422d214d56177f42da9af9529454fb..f1a1efde4beb19c2b520188aa9dac6fed428c224 100644 (file)
@@ -24,7 +24,7 @@ static void __iomem *shmobile_scu_base;
 static int shmobile_scu_cpu_prepare(unsigned int cpu)
 {
        /* For this particular CPU register SCU SMP boot vector */
-       shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu),
+       shmobile_smp_hook(cpu, __pa_symbol(shmobile_boot_scu),
                          shmobile_scu_base_phys);
        return 0;
 }
@@ -33,7 +33,7 @@ void __init shmobile_smp_scu_prepare_cpus(phys_addr_t scu_base_phys,
                                          unsigned int max_cpus)
 {
        /* install boot code shared by all CPUs */
-       shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
+       shmobile_boot_fn = __pa_symbol(shmobile_smp_boot);
 
        /* enable SCU and cache coherency on booting CPU */
        shmobile_scu_base_phys = scu_base_phys;
index 07945748b57141f2c216d729a6f895d36227be7a..0ee76772b50743f099637e18b4859d5736644b23 100644 (file)
@@ -40,7 +40,7 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
 
                memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
 
-               writel(virt_to_phys(secondary_startup),
+               writel(__pa_symbol(secondary_startup),
                       sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff));
 
                flush_cache_all();
@@ -63,7 +63,7 @@ static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle
                       SOCFPGA_A10_RSTMGR_MODMPURST);
                memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
 
-               writel(virt_to_phys(secondary_startup),
+               writel(__pa_symbol(secondary_startup),
                       sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff));
 
                flush_cache_all();
index 8d1e2d55178684cd96c49828eb5390e24286ff7c..39038a03836acb8f3288488f063a99d5ef0f814c 100644 (file)
@@ -117,7 +117,7 @@ static void __init spear13xx_smp_prepare_cpus(unsigned int max_cpus)
         * (presently it is in SRAM). The BootMonitor waits until it receives a
         * soft interrupt, and then the secondary CPU branches to this address.
         */
-       __raw_writel(virt_to_phys(spear13xx_secondary_startup), SYS_LOCATION);
+       __raw_writel(__pa_symbol(spear13xx_secondary_startup), SYS_LOCATION);
 }
 
 const struct smp_operations spear13xx_smp_ops __initconst = {
index ea5a2277ee46b4132edfea00fd6e6c07a91be630..231f19e174365229f034c9897b6e61984b92d84c 100644 (file)
@@ -103,7 +103,7 @@ static void __init sti_smp_prepare_cpus(unsigned int max_cpus)
        u32 __iomem *cpu_strt_ptr;
        u32 release_phys;
        int cpu;
-       unsigned long entry_pa = virt_to_phys(sti_secondary_startup);
+       unsigned long entry_pa = __pa_symbol(sti_secondary_startup);
 
        np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-scu");
 
index 6642267812c96fc3f2b0e1f694b5bf3667dd20ed..8fb5088464db3dc932d367ff8272e303d1401b48 100644 (file)
@@ -80,7 +80,7 @@ static int sun6i_smp_boot_secondary(unsigned int cpu,
        spin_lock(&cpu_lock);
 
        /* Set CPU boot address */
-       writel(virt_to_phys(secondary_startup),
+       writel(__pa_symbol(secondary_startup),
               cpucfg_membase + CPUCFG_PRIVATE0_REG);
 
        /* Assert the CPU core in reset */
@@ -162,7 +162,7 @@ static int sun8i_smp_boot_secondary(unsigned int cpu,
        spin_lock(&cpu_lock);
 
        /* Set CPU boot address */
-       writel(virt_to_phys(secondary_startup),
+       writel(__pa_symbol(secondary_startup),
               cpucfg_membase + CPUCFG_PRIVATE0_REG);
 
        /* Assert the CPU core in reset */
index 98c62a4a8623df12f3e9a4d2fdd6118af7befd75..2f0c6c050fed742de088c13b74e70f1537ca70ae 100644 (file)
@@ -5,7 +5,7 @@
 
 static int tango_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-       tango_set_aux_boot_addr(virt_to_phys(secondary_startup));
+       tango_set_aux_boot_addr(__pa_symbol(secondary_startup));
        tango_start_aux_core(cpu);
        return 0;
 }
index b05c6d6f99d072b5ef006004475776d14513745c..406c0814eb6e6195bf61990d73febbec8109bdd7 100644 (file)
@@ -5,7 +5,7 @@
 
 static int tango_pm_powerdown(unsigned long arg)
 {
-       tango_suspend(virt_to_phys(cpu_resume));
+       tango_suspend(__pa_symbol(cpu_resume));
 
        return -EIO; /* tango_suspend has failed */
 }
index 6fd9db54887eeebd400e425a216bce2cce9399b2..dc558892753c69c3c12829d27f03282f9ae1e49b 100644 (file)
@@ -94,14 +94,14 @@ void __init tegra_cpu_reset_handler_init(void)
        __tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_PRESENT] =
                *((u32 *)cpu_possible_mask);
        __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_SECONDARY] =
-               virt_to_phys((void *)secondary_startup);
+               __pa_symbol((void *)secondary_startup);
 #endif
 
 #ifdef CONFIG_PM_SLEEP
        __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_LP1] =
                TEGRA_IRAM_LPx_RESUME_AREA;
        __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_LP2] =
-               virt_to_phys((void *)tegra_resume);
+               __pa_symbol((void *)tegra_resume);
 #endif
 
        tegra_cpu_reset_handler_enable();
index e0ee139fdebfeffbd51918948d3e94909ccc89b6..9b124c22035f4a336456eaaa5afc92799f4c68d2 100644 (file)
@@ -79,7 +79,7 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
         * backup ram register at offset 0x1FF0, which is what boot rom code
         * is waiting for. This will wake up the secondary core from WFE.
         */
-       writel(virt_to_phys(secondary_startup),
+       writel(__pa_symbol(secondary_startup),
               backupram + UX500_CPU1_JUMPADDR_OFFSET);
        writel(0xA1FEED01,
               backupram + UX500_CPU1_WAKEMAGIC_OFFSET);
index 5cedcf572104bcdf82b1b75ab61f89f4cdf19651..ee2a0faafaa19309ca660543b71819cee82fc72c 100644 (file)
@@ -166,7 +166,7 @@ static int __init dcscb_init(void)
         * Future entries into the kernel can now go
         * through the cluster entry vectors.
         */
-       vexpress_flags_set(virt_to_phys(mcpm_entry_point));
+       vexpress_flags_set(__pa_symbol(mcpm_entry_point));
 
        return 0;
 }
index 98e29dee91e865f7c76408c845e99ff2af14ca92..742499bac6d09f27eaed8580e2df4b06e63a3252 100644 (file)
@@ -79,7 +79,7 @@ static void __init vexpress_smp_dt_prepare_cpus(unsigned int max_cpus)
         * until it receives a soft interrupt, and then the
         * secondary CPU branches to this address.
         */
-       vexpress_flags_set(virt_to_phys(versatile_secondary_startup));
+       vexpress_flags_set(__pa_symbol(versatile_secondary_startup));
 }
 
 const struct smp_operations vexpress_smp_dt_ops __initconst = {
index 1aa4ccece69f97cb06dacb98f146d91bde6560d2..9b5f3c427086cd28f2e3781df87b560de7161167 100644 (file)
@@ -54,7 +54,7 @@ static int tc2_pm_cpu_powerup(unsigned int cpu, unsigned int cluster)
        if (cluster >= TC2_CLUSTERS || cpu >= tc2_nr_cpus[cluster])
                return -EINVAL;
        ve_spc_set_resume_addr(cluster, cpu,
-                              virt_to_phys(mcpm_entry_point));
+                              __pa_symbol(mcpm_entry_point));
        ve_spc_cpu_wakeup_irq(cluster, cpu, true);
        return 0;
 }
@@ -159,7 +159,7 @@ static int tc2_pm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
 
 static void tc2_pm_cpu_suspend_prepare(unsigned int cpu, unsigned int cluster)
 {
-       ve_spc_set_resume_addr(cluster, cpu, virt_to_phys(mcpm_entry_point));
+       ve_spc_set_resume_addr(cluster, cpu, __pa_symbol(mcpm_entry_point));
 }
 
 static void tc2_pm_cpu_is_up(unsigned int cpu, unsigned int cluster)
index 0297f92084e048234ed557443983735079d7ac2a..afb9a82dedc314b9b63f3bb7f0cfc8c81c8a6697 100644 (file)
@@ -76,7 +76,7 @@ void __init zx_smp_prepare_cpus(unsigned int max_cpus)
         * until it receives a soft interrupt, and then the
         * secondary CPU branches to this address.
         */
-       __raw_writel(virt_to_phys(zx_secondary_startup),
+       __raw_writel(__pa_symbol(zx_secondary_startup),
                     aonsysctrl_base + AON_SYS_CTRL_RESERVED1);
 
        iounmap(aonsysctrl_base);
@@ -94,7 +94,7 @@ void __init zx_smp_prepare_cpus(unsigned int max_cpus)
 
        /* Map the first 4 KB IRAM for suspend usage */
        sys_iram = __arm_ioremap_exec(ZX_IRAM_BASE, PAGE_SIZE, false);
-       zx_secondary_startup_pa = virt_to_phys(zx_secondary_startup);
+       zx_secondary_startup_pa = __pa_symbol(zx_secondary_startup);
        fncpy(sys_iram, &zx_resume_jump, zx_suspend_iram_sz);
 }
 
index 7cd9865bdeb7bed3c46c575fb5fea5a022ac251b..caa6d5fe9078326ea65d29d8d6359e1a82306969 100644 (file)
@@ -89,7 +89,7 @@ EXPORT_SYMBOL(zynq_cpun_start);
 
 static int zynq_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-       return zynq_cpun_start(virt_to_phys(secondary_startup), cpu);
+       return zynq_cpun_start(__pa_symbol(secondary_startup), cpu);
 }
 
 /*
index 35e3a56e5d865bc38fed7a86ce91563c08e3dfa7..c6c4c9c8824b0f4b3c354538306d1c195ad7e90d 100644 (file)
@@ -29,6 +29,7 @@ config CPU_ARM720T
        select CPU_COPY_V4WT if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WT if MMU
        help
          A 32-bit RISC processor with 8kByte Cache, Write Buffer and
@@ -46,6 +47,7 @@ config CPU_ARM740T
        select CPU_CACHE_V4
        select CPU_CP15_MPU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        help
          A 32-bit RISC processor with 8KB cache or 4KB variants,
          write buffer and MPU(Protection Unit) built around
@@ -79,6 +81,7 @@ config CPU_ARM920T
        select CPU_COPY_V4WB if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
        help
          The ARM920T is licensed to be produced by numerous vendors,
@@ -97,6 +100,7 @@ config CPU_ARM922T
        select CPU_COPY_V4WB if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
        help
          The ARM922T is a version of the ARM920T, but with smaller
@@ -116,6 +120,7 @@ config CPU_ARM925T
        select CPU_COPY_V4WB if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
        help
          The ARM925T is a mix between the ARM920T and ARM926T, but with
@@ -134,6 +139,7 @@ config CPU_ARM926T
        select CPU_COPY_V4WB if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
        help
          This is a variant of the ARM920.  It has slightly different
@@ -170,6 +176,7 @@ config CPU_ARM940T
        select CPU_CACHE_VIVT
        select CPU_CP15_MPU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        help
          ARM940T is a member of the ARM9TDMI family of general-
          purpose microprocessors with MPU and separate 4KB
@@ -188,6 +195,7 @@ config CPU_ARM946E
        select CPU_CACHE_VIVT
        select CPU_CP15_MPU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        help
          ARM946E-S is a member of the ARM9E-S family of high-
          performance, 32-bit system-on-chip processor solutions.
@@ -206,6 +214,7 @@ config CPU_ARM1020
        select CPU_COPY_V4WB if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
        help
          The ARM1020 is the 32K cached version of the ARM10 processor,
@@ -225,6 +234,7 @@ config CPU_ARM1020E
        select CPU_COPY_V4WB if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
 
 # ARM1022E
@@ -236,6 +246,7 @@ config CPU_ARM1022
        select CPU_COPY_V4WB if MMU # can probably do better
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
        help
          The ARM1022E is an implementation of the ARMv5TE architecture
@@ -254,6 +265,7 @@ config CPU_ARM1026
        select CPU_COPY_V4WB if MMU # can probably do better
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
        help
          The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture
@@ -302,6 +314,7 @@ config CPU_XSCALE
        select CPU_CACHE_VIVT
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
 
 # XScale Core Version 3
@@ -312,6 +325,7 @@ config CPU_XSC3
        select CPU_CACHE_VIVT
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
        select IO_36
 
@@ -324,6 +338,7 @@ config CPU_MOHAWK
        select CPU_COPY_V4WB if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V4WBI if MMU
 
 # Feroceon
@@ -335,6 +350,7 @@ config CPU_FEROCEON
        select CPU_COPY_FEROCEON if MMU
        select CPU_CP15_MMU
        select CPU_PABRT_LEGACY
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_FEROCEON if MMU
 
 config CPU_FEROCEON_OLD_ID
@@ -367,6 +383,7 @@ config CPU_V6
        select CPU_CP15_MMU
        select CPU_HAS_ASID if MMU
        select CPU_PABRT_V6
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V6 if MMU
 
 # ARMv6k
@@ -381,6 +398,7 @@ config CPU_V6K
        select CPU_CP15_MMU
        select CPU_HAS_ASID if MMU
        select CPU_PABRT_V6
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V6 if MMU
 
 # ARMv7
@@ -396,6 +414,7 @@ config CPU_V7
        select CPU_CP15_MPU if !MMU
        select CPU_HAS_ASID if MMU
        select CPU_PABRT_V7
+       select CPU_THUMB_CAPABLE
        select CPU_TLB_V7 if MMU
 
 # ARMv7M
@@ -410,11 +429,17 @@ config CPU_V7M
 
 config CPU_THUMBONLY
        bool
+       select CPU_THUMB_CAPABLE
        # There are no CPUs available with MMU that don't implement an ARM ISA:
        depends on !MMU
        help
          Select this if your CPU doesn't support the 32 bit ARM instructions.
 
+config CPU_THUMB_CAPABLE
+       bool
+       help
+         Select this if your CPU can support Thumb mode.
+
 # Figure out what processor architecture version we should be using.
 # This defines the compiler instruction set which depends on the machine type.
 config CPU_32v3
@@ -655,11 +680,7 @@ config ARCH_DMA_ADDR_T_64BIT
 
 config ARM_THUMB
        bool "Support Thumb user binaries" if !CPU_THUMBONLY
-       depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || \
-               CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || \
-               CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \
-               CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || \
-               CPU_V7 || CPU_FEROCEON || CPU_V7M
+       depends on CPU_THUMB_CAPABLE
        default y
        help
          Say Y if you want to include kernel support for running user space
index e8698241ece904180372e1cd06101037363347ff..b3dea80715b47d3930f25d5b403aafcec3a6fb16 100644 (file)
@@ -14,6 +14,7 @@ endif
 
 obj-$(CONFIG_ARM_PTDUMP)       += dump.o
 obj-$(CONFIG_MODULES)          += proc-syms.o
+obj-$(CONFIG_DEBUG_VIRTUAL)    += physaddr.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
 obj-$(CONFIG_HIGHMEM)          += highmem.o
index dfe97b40991609fc6f66629614b7a7dea0d8f044..f57b080b6fd4055bfe9e106f44cbf8352c96c5be 100644 (file)
@@ -15,6 +15,7 @@
 
 #define pr_fmt(fmt)            "uniphier: " fmt
 
+#include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/log2.h>
@@ -71,8 +72,7 @@
  * @ctrl_base: virtual base address of control registers
  * @rev_base: virtual base address of revision registers
  * @op_base: virtual base address of operation registers
- * @way_present_mask: each bit specifies if the way is present
- * @way_locked_mask: each bit specifies if the way is locked
+ * @way_mask: each bit specifies if the way is present
  * @nsets: number of associativity sets
  * @line_size: line size in bytes
  * @range_op_max_size: max size that can be handled by a single range operation
@@ -83,8 +83,7 @@ struct uniphier_cache_data {
        void __iomem *rev_base;
        void __iomem *op_base;
        void __iomem *way_ctrl_base;
-       u32 way_present_mask;
-       u32 way_locked_mask;
+       u32 way_mask;
        u32 nsets;
        u32 line_size;
        u32 range_op_max_size;
@@ -234,17 +233,13 @@ static void __uniphier_cache_enable(struct uniphier_cache_data *data, bool on)
        writel_relaxed(val, data->ctrl_base + UNIPHIER_SSCC);
 }
 
-static void __init __uniphier_cache_set_locked_ways(
-                                       struct uniphier_cache_data *data,
-                                       u32 way_mask)
+static void __init __uniphier_cache_set_active_ways(
+                                       struct uniphier_cache_data *data)
 {
        unsigned int cpu;
 
-       data->way_locked_mask = way_mask & data->way_present_mask;
-
        for_each_possible_cpu(cpu)
-               writel_relaxed(~data->way_locked_mask & data->way_present_mask,
-                              data->way_ctrl_base + 4 * cpu);
+               writel_relaxed(data->way_mask, data->way_ctrl_base + 4 * cpu);
 }
 
 static void uniphier_cache_maint_range(unsigned long start, unsigned long end,
@@ -307,7 +302,7 @@ static void __init uniphier_cache_enable(void)
 
        list_for_each_entry(data, &uniphier_cache_list, list) {
                __uniphier_cache_enable(data, true);
-               __uniphier_cache_set_locked_ways(data, 0);
+               __uniphier_cache_set_active_ways(data);
        }
 }
 
@@ -382,8 +377,8 @@ static int __init __uniphier_cache_init(struct device_node *np,
                goto err;
        }
 
-       data->way_present_mask =
-               ((u32)1 << cache_size / data->nsets / data->line_size) - 1;
+       data->way_mask = GENMASK(cache_size / data->nsets / data->line_size - 1,
+                                0);
 
        data->ctrl_base = of_iomap(np, 0);
        if (!data->ctrl_base) {
index a134d8a13d001ee8ad4b1084c9a71c1dc9f8d832..de78109d002db1a5e7c94a6c1bc8bb94161d07b8 100644 (file)
@@ -164,7 +164,7 @@ skip:
        cmp     r3, r10
        bgt     flush_levels
 finished:
-       mov     r10, #0                         @ swith back to cache level 0
+       mov     r10, #0                         @ switch back to cache level 0
        mcr     p15, 2, r10, c0, c0, 0          @ select current cache level in cssr
        dsb     st
        isb
index 816a7e44e6f12c4337340454cdd47a902d9a4b76..788486e830d3e644bbf4c608af6c75e64b5bb84a 100644 (file)
@@ -217,7 +217,7 @@ skip:
        cmp     r3, r10
        bgt     flush_levels
 finished:
-       mov     r10, #0                         @ swith back to cache level 0
+       mov     r10, #0                         @ switch back to cache level 0
        write_csselr r10, r3                    @ select current cache level in cssr
        dsb     st
        isb
index 82d3e79ec82b231587b6002fa0aa15e2d70818c1..63eabb06f9f17551695e89efc0ed59e0ce6ba186 100644 (file)
@@ -180,7 +180,7 @@ static void arm_dma_sync_single_for_device(struct device *dev,
        __dma_page_cpu_to_dev(page, offset, size, dir);
 }
 
-struct dma_map_ops arm_dma_ops = {
+const struct dma_map_ops arm_dma_ops = {
        .alloc                  = arm_dma_alloc,
        .free                   = arm_dma_free,
        .mmap                   = arm_dma_mmap,
@@ -204,7 +204,7 @@ static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
                 unsigned long attrs);
 
-struct dma_map_ops arm_coherent_dma_ops = {
+const struct dma_map_ops arm_coherent_dma_ops = {
        .alloc                  = arm_coherent_dma_alloc,
        .free                   = arm_coherent_dma_free,
        .mmap                   = arm_coherent_dma_mmap,
@@ -349,7 +349,7 @@ static void __dma_free_buffer(struct page *page, size_t size)
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
                                     pgprot_t prot, struct page **ret_page,
                                     const void *caller, bool want_vaddr,
-                                    int coherent_flag);
+                                    int coherent_flag, gfp_t gfp);
 
 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
                                 pgprot_t prot, struct page **ret_page,
@@ -420,7 +420,8 @@ static int __init atomic_pool_init(void)
         */
        if (dev_get_cma_area(NULL))
                ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
-                                     &page, atomic_pool_init, true, NORMAL);
+                                     &page, atomic_pool_init, true, NORMAL,
+                                     GFP_KERNEL);
        else
                ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
                                           &page, atomic_pool_init, true);
@@ -594,14 +595,14 @@ static int __free_from_pool(void *start, size_t size)
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
                                     pgprot_t prot, struct page **ret_page,
                                     const void *caller, bool want_vaddr,
-                                    int coherent_flag)
+                                    int coherent_flag, gfp_t gfp)
 {
        unsigned long order = get_order(size);
        size_t count = size >> PAGE_SHIFT;
        struct page *page;
        void *ptr = NULL;
 
-       page = dma_alloc_from_contiguous(dev, count, order);
+       page = dma_alloc_from_contiguous(dev, count, order, gfp);
        if (!page)
                return NULL;
 
@@ -655,7 +656,7 @@ static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot)
 #define __get_dma_pgprot(attrs, prot)                          __pgprot(0)
 #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL
 #define __alloc_from_pool(size, ret_page)                      NULL
-#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag)    NULL
+#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag, gfp)       NULL
 #define __free_from_pool(cpu_addr, size)                       do { } while (0)
 #define __free_from_contiguous(dev, page, cpu_addr, size, wv)  do { } while (0)
 #define __dma_free_remap(cpu_addr, size)                       do { } while (0)
@@ -697,7 +698,8 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
 {
        return __alloc_from_contiguous(args->dev, args->size, args->prot,
                                       ret_page, args->caller,
-                                      args->want_vaddr, args->coherent_flag);
+                                      args->want_vaddr, args->coherent_flag,
+                                      args->gfp);
 }
 
 static void cma_allocator_free(struct arm_dma_free_args *args)
@@ -868,6 +870,9 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
                                      vma->vm_end - vma->vm_start,
                                      vma->vm_page_prot);
        }
+#else
+       ret = vm_iomap_memory(vma, vma->vm_start,
+                             (vma->vm_end - vma->vm_start));
 #endif /* CONFIG_MMU */
 
        return ret;
@@ -1067,7 +1072,7 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
                enum dma_data_direction dir, unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        struct scatterlist *s;
        int i, j;
 
@@ -1101,7 +1106,7 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
                enum dma_data_direction dir, unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        struct scatterlist *s;
 
        int i;
@@ -1120,7 +1125,7 @@ void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
                        int nents, enum dma_data_direction dir)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        struct scatterlist *s;
        int i;
 
@@ -1139,7 +1144,7 @@ void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
                        int nents, enum dma_data_direction dir)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        struct scatterlist *s;
        int i;
 
@@ -1312,7 +1317,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
                unsigned long order = get_order(size);
                struct page *page;
 
-               page = dma_alloc_from_contiguous(dev, count, order);
+               page = dma_alloc_from_contiguous(dev, count, order, gfp);
                if (!page)
                        goto error;
 
@@ -2099,7 +2104,7 @@ static void arm_iommu_sync_single_for_device(struct device *dev,
        __dma_page_cpu_to_dev(page, offset, size, dir);
 }
 
-struct dma_map_ops iommu_ops = {
+const struct dma_map_ops iommu_ops = {
        .alloc          = arm_iommu_alloc_attrs,
        .free           = arm_iommu_free_attrs,
        .mmap           = arm_iommu_mmap_attrs,
@@ -2119,7 +2124,7 @@ struct dma_map_ops iommu_ops = {
        .unmap_resource         = arm_iommu_unmap_resource,
 };
 
-struct dma_map_ops iommu_coherent_ops = {
+const struct dma_map_ops iommu_coherent_ops = {
        .alloc          = arm_coherent_iommu_alloc_attrs,
        .free           = arm_coherent_iommu_free_attrs,
        .mmap           = arm_coherent_iommu_mmap_attrs,
@@ -2319,7 +2324,7 @@ void arm_iommu_detach_device(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
 
-static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
+static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
 {
        return coherent ? &iommu_coherent_ops : &iommu_ops;
 }
@@ -2374,7 +2379,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { }
 
 #endif /* CONFIG_ARM_DMA_USE_IOMMU */
 
-static struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
+static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
 {
        return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
 }
@@ -2382,7 +2387,7 @@ static struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
                        const struct iommu_ops *iommu, bool coherent)
 {
-       struct dma_map_ops *dma_ops;
+       const struct dma_map_ops *dma_ops;
 
        dev->archdata.dma_coherent = coherent;
        if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu))
index 9fe8e241335c6edcb0db5077f5d4621aefb68944..21192d6eda401a76787f377795c330199f881e48 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/fixmap.h>
+#include <asm/memory.h>
 #include <asm/pgtable.h>
 
 struct addr_marker {
@@ -31,8 +32,8 @@ static struct addr_marker address_markers[] = {
        { 0,                    "vmalloc() Area" },
        { VMALLOC_END,          "vmalloc() End" },
        { FIXADDR_START,        "Fixmap Area" },
-       { CONFIG_VECTORS_BASE,  "Vectors" },
-       { CONFIG_VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" },
+       { VECTORS_BASE, "Vectors" },
+       { VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" },
        { -1,                   NULL },
 };
 
index 3cced8455727953a2525571c5a62b5ad884e8bee..f1e6190aa7eaedf5adacca20c09cb7c3702bee96 100644 (file)
@@ -327,6 +327,12 @@ void flush_dcache_page(struct page *page)
        if (page == ZERO_PAGE(0))
                return;
 
+       if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) {
+               if (test_bit(PG_dcache_clean, &page->flags))
+                       clear_bit(PG_dcache_clean, &page->flags);
+               return;
+       }
+
        mapping = page_mapping(page);
 
        if (!cache_ops_need_broadcast() &&
index 4be0bee4c35700aacbaaa3994d5ae4a7ce92bfb5..bf4d3bc41a7a85e5144eeecb1231418f9870f54e 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/cp15.h>
 #include <asm/mach-types.h>
 #include <asm/memblock.h>
+#include <asm/memory.h>
 #include <asm/prom.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -227,41 +228,59 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
        return phys;
 }
 
-void __init arm_memblock_init(const struct machine_desc *mdesc)
+static void __init arm_initrd_init(void)
 {
-       /* Register the kernel text, kernel data and initrd with memblock. */
-#ifdef CONFIG_XIP_KERNEL
-       memblock_reserve(__pa(_sdata), _end - _sdata);
-#else
-       memblock_reserve(__pa(_stext), _end - _stext);
-#endif
 #ifdef CONFIG_BLK_DEV_INITRD
+       phys_addr_t start;
+       unsigned long size;
+
        /* FDT scan will populate initrd_start */
        if (initrd_start && !phys_initrd_size) {
                phys_initrd_start = __virt_to_phys(initrd_start);
                phys_initrd_size = initrd_end - initrd_start;
        }
+
        initrd_start = initrd_end = 0;
-       if (phys_initrd_size &&
-           !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
+
+       if (!phys_initrd_size)
+               return;
+
+       /*
+        * Round the memory region to page boundaries as per free_initrd_mem()
+        * This allows us to detect whether the pages overlapping the initrd
+        * are in use, but more importantly, reserves the entire set of pages
+        * as we don't want these pages allocated for other purposes.
+        */
+       start = round_down(phys_initrd_start, PAGE_SIZE);
+       size = phys_initrd_size + (phys_initrd_start - start);
+       size = round_up(size, PAGE_SIZE);
+
+       if (!memblock_is_region_memory(start, size)) {
                pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
-                      (u64)phys_initrd_start, phys_initrd_size);
-               phys_initrd_start = phys_initrd_size = 0;
+                      (u64)start, size);
+               return;
        }
-       if (phys_initrd_size &&
-           memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
+
+       if (memblock_is_region_reserved(start, size)) {
                pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
-                      (u64)phys_initrd_start, phys_initrd_size);
-               phys_initrd_start = phys_initrd_size = 0;
+                      (u64)start, size);
+               return;
        }
-       if (phys_initrd_size) {
-               memblock_reserve(phys_initrd_start, phys_initrd_size);
 
-               /* Now convert initrd to virtual addresses */
-               initrd_start = __phys_to_virt(phys_initrd_start);
-               initrd_end = initrd_start + phys_initrd_size;
-       }
+       memblock_reserve(start, size);
+
+       /* Now convert initrd to virtual addresses */
+       initrd_start = __phys_to_virt(phys_initrd_start);
+       initrd_end = initrd_start + phys_initrd_size;
 #endif
+}
+
+void __init arm_memblock_init(const struct machine_desc *mdesc)
+{
+       /* Register the kernel text, kernel data and initrd with memblock. */
+       memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);
+
+       arm_initrd_init();
 
        arm_mm_memblock_reserve();
 
@@ -521,8 +540,7 @@ void __init mem_init(void)
                        "      .data : 0x%p" " - 0x%p" "   (%4td kB)\n"
                        "       .bss : 0x%p" " - 0x%p" "   (%4td kB)\n",
 
-                       MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) +
-                               (PAGE_SIZE)),
+                       MLK(VECTORS_BASE, VECTORS_BASE + PAGE_SIZE),
 #ifdef CONFIG_HAVE_TCM
                        MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
                        MLK(ITCM_OFFSET, (unsigned long) itcm_end),
index 4001dd15818d79aea7e586d19e70943c8b689975..4e016d7f37b3af6568282aa1a909a14406247bb2 100644 (file)
@@ -1152,13 +1152,12 @@ early_param("vmalloc", early_vmalloc);
 
 phys_addr_t arm_lowmem_limit __initdata = 0;
 
-void __init sanity_check_meminfo(void)
+void __init adjust_lowmem_bounds(void)
 {
        phys_addr_t memblock_limit = 0;
-       int highmem = 0;
        u64 vmalloc_limit;
        struct memblock_region *reg;
-       bool should_use_highmem = false;
+       phys_addr_t lowmem_limit = 0;
 
        /*
         * Let's use our own (unoptimized) equivalent of __pa() that is
@@ -1172,43 +1171,18 @@ void __init sanity_check_meminfo(void)
        for_each_memblock(memory, reg) {
                phys_addr_t block_start = reg->base;
                phys_addr_t block_end = reg->base + reg->size;
-               phys_addr_t size_limit = reg->size;
 
-               if (reg->base >= vmalloc_limit)
-                       highmem = 1;
-               else
-                       size_limit = vmalloc_limit - reg->base;
-
-
-               if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
-
-                       if (highmem) {
-                               pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
-                                         &block_start, &block_end);
-                               memblock_remove(reg->base, reg->size);
-                               should_use_highmem = true;
-                               continue;
-                       }
-
-                       if (reg->size > size_limit) {
-                               phys_addr_t overlap_size = reg->size - size_limit;
-
-                               pr_notice("Truncating RAM at %pa-%pa",
-                                         &block_start, &block_end);
-                               block_end = vmalloc_limit;
-                               pr_cont(" to -%pa", &block_end);
-                               memblock_remove(vmalloc_limit, overlap_size);
-                               should_use_highmem = true;
-                       }
-               }
-
-               if (!highmem) {
-                       if (block_end > arm_lowmem_limit) {
-                               if (reg->size > size_limit)
-                                       arm_lowmem_limit = vmalloc_limit;
-                               else
-                                       arm_lowmem_limit = block_end;
-                       }
+               if (reg->base < vmalloc_limit) {
+                       if (block_end > lowmem_limit)
+                               /*
+                                * Compare as u64 to ensure vmalloc_limit does
+                                * not get truncated. block_end should always
+                                * fit in phys_addr_t so there should be no
+                                * issue with assignment.
+                                */
+                               lowmem_limit = min_t(u64,
+                                                        vmalloc_limit,
+                                                        block_end);
 
                        /*
                         * Find the first non-pmd-aligned page, and point
@@ -1227,14 +1201,13 @@ void __init sanity_check_meminfo(void)
                                if (!IS_ALIGNED(block_start, PMD_SIZE))
                                        memblock_limit = block_start;
                                else if (!IS_ALIGNED(block_end, PMD_SIZE))
-                                       memblock_limit = arm_lowmem_limit;
+                                       memblock_limit = lowmem_limit;
                        }
 
                }
        }
 
-       if (should_use_highmem)
-               pr_notice("Consider using a HIGHMEM enabled kernel.\n");
+       arm_lowmem_limit = lowmem_limit;
 
        high_memory = __va(arm_lowmem_limit - 1) + 1;
 
@@ -1248,6 +1221,18 @@ void __init sanity_check_meminfo(void)
        if (!memblock_limit)
                memblock_limit = arm_lowmem_limit;
 
+       if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
+               if (memblock_end_of_DRAM() > arm_lowmem_limit) {
+                       phys_addr_t end = memblock_end_of_DRAM();
+
+                       pr_notice("Ignoring RAM at %pa-%pa\n",
+                                 &memblock_limit, &end);
+                       pr_notice("Consider using a HIGHMEM enabled kernel.\n");
+
+                       memblock_remove(memblock_limit, end - memblock_limit);
+               }
+       }
+
        memblock_set_current_limit(memblock_limit);
 }
 
@@ -1437,11 +1422,7 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
        struct memblock_region *reg;
-#ifdef CONFIG_XIP_KERNEL
-       phys_addr_t kernel_x_start = round_down(__pa(_sdata), SECTION_SIZE);
-#else
-       phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
-#endif
+       phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE);
        phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
 
        /* Map all the lowmem memory banks. */
index 2740967727e2057ef8897e4d4335d38acaeaac2c..3b5c7aaf9c76c522f8c6cbd7890c5105b3a3e1d4 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cp15.h>
 #include <asm/sections.h>
 #include <asm/page.h>
 #include <asm/setup.h>
@@ -22,6 +23,8 @@
 
 #include "mm.h"
 
+unsigned long vectors_base;
+
 #ifdef CONFIG_ARM_MPU
 struct mpu_rgn_info mpu_rgn_info;
 
@@ -85,7 +88,7 @@ static unsigned long irbar_read(void)
 }
 
 /* MPU initialisation functions */
-void __init sanity_check_meminfo_mpu(void)
+void __init adjust_lowmem_bounds_mpu(void)
 {
        phys_addr_t phys_offset = PHYS_OFFSET;
        phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size;
@@ -274,19 +277,64 @@ void __init mpu_setup(void)
        }
 }
 #else
-static void sanity_check_meminfo_mpu(void) {}
+static void adjust_lowmem_bounds_mpu(void) {}
 static void __init mpu_setup(void) {}
 #endif /* CONFIG_ARM_MPU */
 
+#ifdef CONFIG_CPU_CP15
+#ifdef CONFIG_CPU_HIGH_VECTOR
+static unsigned long __init setup_vectors_base(void)
+{
+       unsigned long reg = get_cr();
+
+       set_cr(reg | CR_V);
+       return 0xffff0000;
+}
+#else /* CONFIG_CPU_HIGH_VECTOR */
+/* Write exception base address to VBAR */
+static inline void set_vbar(unsigned long val)
+{
+       asm("mcr p15, 0, %0, c12, c0, 0" : : "r" (val) : "cc");
+}
+
+/*
+ * Security extensions, bits[7:4], permitted values,
+ * 0b0000 - not implemented, 0b0001/0b0010 - implemented
+ */
+static inline bool security_extensions_enabled(void)
+{
+       return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4);
+}
+
+static unsigned long __init setup_vectors_base(void)
+{
+       unsigned long base = 0, reg = get_cr();
+
+       set_cr(reg & ~CR_V);
+       if (security_extensions_enabled()) {
+               if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM))
+                       base = CONFIG_DRAM_BASE;
+               set_vbar(base);
+       } else if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM)) {
+               if (CONFIG_DRAM_BASE != 0)
+                       pr_err("Security extensions not enabled, vectors cannot be remapped to RAM, vectors base will be 0x00000000\n");
+       }
+
+       return base;
+}
+#endif /* CONFIG_CPU_HIGH_VECTOR */
+#endif /* CONFIG_CPU_CP15 */
+
 void __init arm_mm_memblock_reserve(void)
 {
 #ifndef CONFIG_CPU_V7M
+       vectors_base = IS_ENABLED(CONFIG_CPU_CP15) ? setup_vectors_base() : 0;
        /*
         * Register the exception vector page.
         * some architectures which the DRAM is the exception vector to trap,
         * alloc_page breaks with error, although it is not NULL, but "0."
         */
-       memblock_reserve(CONFIG_VECTORS_BASE, 2 * PAGE_SIZE);
+       memblock_reserve(vectors_base, 2 * PAGE_SIZE);
 #else /* ifndef CONFIG_CPU_V7M */
        /*
         * There is no dedicated vector page on V7-M. So nothing needs to be
@@ -295,10 +343,10 @@ void __init arm_mm_memblock_reserve(void)
 #endif
 }
 
-void __init sanity_check_meminfo(void)
+void __init adjust_lowmem_bounds(void)
 {
        phys_addr_t end;
-       sanity_check_meminfo_mpu();
+       adjust_lowmem_bounds_mpu();
        end = memblock_end_of_DRAM();
        high_memory = __va(end - 1) + 1;
        memblock_set_current_limit(end);
@@ -310,7 +358,7 @@ void __init sanity_check_meminfo(void)
  */
 void __init paging_init(const struct machine_desc *mdesc)
 {
-       early_trap_init((void *)CONFIG_VECTORS_BASE);
+       early_trap_init((void *)vectors_base);
        mpu_setup();
        bootmem_init();
 }
diff --git a/arch/arm/mm/physaddr.c b/arch/arm/mm/physaddr.c
new file mode 100644 (file)
index 0000000..02e60f4
--- /dev/null
@@ -0,0 +1,57 @@
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+#include <asm/memory.h>
+#include <asm/fixmap.h>
+#include <asm/dma.h>
+
+#include "mm.h"
+
+static inline bool __virt_addr_valid(unsigned long x)
+{
+       /*
+        * high_memory does not get immediately defined, and there
+        * are early callers of __pa() against PAGE_OFFSET
+        */
+       if (!high_memory && x >= PAGE_OFFSET)
+               return true;
+
+       if (high_memory && x >= PAGE_OFFSET && x < (unsigned long)high_memory)
+               return true;
+
+       /*
+        * MAX_DMA_ADDRESS is a virtual address that may not correspond to an
+        * actual physical address. Enough code relies on __pa(MAX_DMA_ADDRESS)
+        * that we just need to work around it and always return true.
+        */
+       if (x == MAX_DMA_ADDRESS)
+               return true;
+
+       return false;
+}
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+       WARN(!__virt_addr_valid(x),
+            "virt_to_phys used for non-linear address: %pK (%pS)\n",
+            (void *)x, (void *)x);
+
+       return __virt_to_phys_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+       /* This is bounds checking against the kernel image only.
+        * __pa_symbol should only be used on kernel symbol addresses.
+        */
+       VIRTUAL_BUG_ON(x < (unsigned long)KERNEL_START ||
+                      x > (unsigned long)KERNEL_END);
+
+       return __pa_symbol_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
index f9b08ba7fe73efd99d04572db6e6e092b9655c18..548d622a315993b02e7486650f9fee7a0178bb45 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <asm/probes.h>
+#include <asm/kprobes.h>
 
 void __init arm_probes_decode_init(void);
 
index bd62d94f8ac5b72cd54e73098ebd3eef89ca6415..ce18c91b50a1cbac3fb6d38af60c63af9b031185 100644 (file)
@@ -182,10 +182,10 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 }
 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
-struct dma_map_ops *xen_dma_ops;
+const struct dma_map_ops *xen_dma_ops;
 EXPORT_SYMBOL(xen_dma_ops);
 
-static struct dma_map_ops xen_swiotlb_dma_ops = {
+static const struct dma_map_ops xen_swiotlb_dma_ops = {
        .alloc = xen_swiotlb_alloc_coherent,
        .free = xen_swiotlb_free_coherent,
        .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
index 243ef256b8c9e3a33f86ec3c19a8843cc3d1126a..73d5bab015ebb3c702daaf80d7f31bc1292b4eb4 100644 (file)
@@ -17,7 +17,6 @@
 #define __ASM_DEVICE_H
 
 struct dev_archdata {
-       struct dma_map_ops *dma_ops;
 #ifdef CONFIG_IOMMU_API
        void *iommu;                    /* private IOMMU data */
 #endif
index ccea82c2b089d01c3340711e9e219b8bba3ca89c..505756cdc67a1deba4ff49491392f95a8290af57 100644 (file)
 #include <asm/xen/hypervisor.h>
 
 #define DMA_ERROR_CODE (~(dma_addr_t)0)
-extern struct dma_map_ops dummy_dma_ops;
+extern const struct dma_map_ops dummy_dma_ops;
 
-static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
 {
-       if (dev && dev->archdata.dma_ops)
-               return dev->archdata.dma_ops;
+       if (dev && dev->dma_ops)
+               return dev->dma_ops;
 
        /*
         * We expect no ISA devices, and all other DMA masters are expected to
@@ -39,12 +39,12 @@ static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
        return &dummy_dma_ops;
 }
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        if (xen_initial_domain())
                return xen_dma_ops;
        else
-               return __generic_dma_ops(dev);
+               return __generic_dma_ops(NULL);
 }
 
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
index 1737aecfcc5e462c78e6c47ab06ffe2e2fc12c60..6deb8d726041eb9763efa84d4689ca24881d1e54 100644 (file)
@@ -16,6 +16,9 @@
 #ifndef _ARM_KPROBES_H
 #define _ARM_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
@@ -57,4 +60,5 @@ int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr);
 void kretprobe_trampoline(void);
 void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
 
+#endif /* CONFIG_KPROBES */
 #endif /* _ARM_KPROBES_H */
index 86032a01238861d526cfec9f7f7f1f2b4d52624c..657977e77ec8fa49e55fc9cacc1415db81a7cc2d 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/sysreg.h>
 #include <asm/system_misc.h>
 #include <asm/traps.h>
+#include <asm/kprobes.h>
 #include <linux/uaccess.h>
 #include <asm/cpufeature.h>
 
index b6badff5a15114224eb7d3d08810d79979618602..3a63954a8b143e75f9ccde84ca783af3942e9726 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/fixmap.h>
 #include <asm/insn.h>
+#include <asm/kprobes.h>
 
 #define AARCH64_INSN_SF_BIT    BIT(31)
 #define AARCH64_INSN_N_BIT     BIT(22)
index 76d3f315407f7ce7e7e5b34b82b5b99ab10d86b3..192ab007bacb3c28e487d4823eeca2c1f7c1e8f8 100644 (file)
@@ -16,6 +16,8 @@
 #ifndef _ARM_KERNEL_KPROBES_ARM64_H
 #define _ARM_KERNEL_KPROBES_ARM64_H
 
+#include <asm/kprobes.h>
+
 /*
  * ARM strongly recommends a limit of 128 bytes between LoadExcl and
  * StoreExcl instructions in a single thread of execution. So keep the
index a8ec5da530af73987c550ee63eac5e9c91f5d611..827d52d78b67d1206a25789b87ee433fd24c50b1 100644 (file)
@@ -222,7 +222,7 @@ asmlinkage void secondary_start_kernel(void)
         * All kernel threads share the same mm context; grab a
         * reference and switch to it.
         */
-       atomic_inc(&mm->mm_count);
+       mmgrab(mm);
        current->active_mm = mm;
 
        /*
index 410fbdb8163ffdba5af4a8742849bfd3f73efeea..f5b9210f1c83b2c5dc780ddf7252b991751b30d9 100644 (file)
@@ -62,7 +62,7 @@ D_h   .req    x14
        sub     count, count, tmp2
        /*
        * Copy the leading memory data from src to dst in an increasing
-       * address order.By this way,the risk of overwritting the source
+       * address order.By this way,the risk of overwriting the source
        * memory data is eliminated when the distance between src and
        * dst is less than 16. The memory accesses here are alignment.
        */
index 351f7595cb3ebdb9acf735a05d7e71fe56f1cf2d..81cdb2e844ed9fe80e4192315647ccc9245b8003 100644 (file)
@@ -107,7 +107,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
                void *addr;
 
                page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
-                                                       get_order(size));
+                                                get_order(size), flags);
                if (!page)
                        return NULL;
 
@@ -363,7 +363,7 @@ static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
        return 0;
 }
 
-static struct dma_map_ops swiotlb_dma_ops = {
+static const struct dma_map_ops swiotlb_dma_ops = {
        .alloc = __dma_alloc,
        .free = __dma_free,
        .mmap = __swiotlb_mmap,
@@ -390,7 +390,7 @@ static int __init atomic_pool_init(void)
 
        if (dev_get_cma_area(NULL))
                page = dma_alloc_from_contiguous(NULL, nr_pages,
-                                                       pool_size_order);
+                                                pool_size_order, GFP_KERNEL);
        else
                page = alloc_pages(GFP_DMA, pool_size_order);
 
@@ -516,7 +516,7 @@ static int __dummy_dma_supported(struct device *hwdev, u64 mask)
        return 0;
 }
 
-struct dma_map_ops dummy_dma_ops = {
+const struct dma_map_ops dummy_dma_ops = {
        .alloc                  = __dummy_alloc,
        .free                   = __dummy_free,
        .mmap                   = __dummy_mmap,
@@ -795,7 +795,7 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
        iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
 }
 
-static struct dma_map_ops iommu_dma_ops = {
+static const struct dma_map_ops iommu_dma_ops = {
        .alloc = __iommu_alloc_attrs,
        .free = __iommu_free_attrs,
        .mmap = __iommu_mmap_attrs,
@@ -848,7 +848,7 @@ static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
                if (iommu_dma_init_domain(domain, dma_base, size, dev))
                        goto out_err;
 
-               dev->archdata.dma_ops = &iommu_dma_ops;
+               dev->dma_ops = &iommu_dma_ops;
        }
 
        return true;
@@ -958,7 +958,7 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 
 void arch_teardown_dma_ops(struct device *dev)
 {
-       dev->archdata.dma_ops = NULL;
+       dev->dma_ops = NULL;
 }
 
 #else
@@ -972,8 +972,8 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
                        const struct iommu_ops *iommu, bool coherent)
 {
-       if (!dev->archdata.dma_ops)
-               dev->archdata.dma_ops = &swiotlb_dma_ops;
+       if (!dev->dma_ops)
+               dev->dma_ops = &swiotlb_dma_ops;
 
        dev->archdata.dma_coherent = coherent;
        __iommu_setup_dma_ops(dev, dma_base, size, iommu);
index 1115f2a645d1805de87396e49fc27de9b7700136..7388451f9905004f6bca38e8064562dcc47c4926 100644 (file)
@@ -4,9 +4,9 @@
 extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
        int direction);
 
-extern struct dma_map_ops avr32_dma_ops;
+extern const struct dma_map_ops avr32_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &avr32_dma_ops;
 }
index 45f563ed73fd51e6d4b0590a56d0f7fa3e5f4c48..28dfc61ad3840ebb61947f6cd78aa4ad6189294e 100644 (file)
 #ifndef __ASM_AVR32_KPROBES_H
 #define __ASM_AVR32_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0xd673  /* breakpoint */
+
+#ifdef CONFIG_KPROBES
 #include <linux/types.h>
 
 typedef u16    kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xd673  /* breakpoint */
 #define MAX_INSN_SIZE          2
 #define MAX_STACK_SIZE         64      /* 32 would probably be OK */
 
@@ -46,4 +50,5 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 
 #define flush_insn_slot(p)     do { } while (0)
 
+#endif /* CONFIG_KPROBES */
 #endif /* __ASM_AVR32_KPROBES_H */
index 54534e5d0781bb343a5ae72f482cea4fa228d104..555222d4f4143ac722b71e0f7557461e9da2d657 100644 (file)
@@ -191,7 +191,7 @@ static void avr32_dma_sync_sg_for_device(struct device *dev,
                dma_cache_sync(dev, sg_virt(sg), sg->length, direction);
 }
 
-struct dma_map_ops avr32_dma_ops = {
+const struct dma_map_ops avr32_dma_ops = {
        .alloc                  = avr32_dma_alloc,
        .free                   = avr32_dma_free,
        .map_page               = avr32_dma_map_page,
index d6fa60b158be250010e8d71b72cb1a59338645b0..625db8ac815e8a3ba644b8602cc947b9e0c71ddc 100644 (file)
@@ -46,3 +46,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 3490570aaa8284daffc6abb6d73d60d3bdcc92b4..04254ac36bed1dd3897c316c07c8c924ab08faca 100644 (file)
@@ -36,9 +36,9 @@ _dma_sync(dma_addr_t addr, size_t size, enum dma_data_direction dir)
                __dma_sync(addr, size, dir);
 }
 
-extern struct dma_map_ops bfin_dma_ops;
+extern const struct dma_map_ops bfin_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &bfin_dma_ops;
 }
index a27a74a18fb0f9413ae63cf9b5663b2d7af658d1..477bb29a7987558780609c2d6dba9588d461a254 100644 (file)
@@ -159,7 +159,7 @@ static inline void bfin_dma_sync_single_for_device(struct device *dev,
        _dma_sync(handle, size, dir);
 }
 
-struct dma_map_ops bfin_dma_ops = {
+const struct dma_map_ops bfin_dma_ops = {
        .alloc                  = bfin_dma_alloc,
        .free                   = bfin_dma_free,
 
index 23c4ef5f8bdced2fb30b461f94b53c56694db4a4..a2e6db2ce811c94c5f6bbbb5c5ae6581f8fb797e 100644 (file)
@@ -307,8 +307,8 @@ void secondary_start_kernel(void)
        local_irq_disable();
 
        /* Attach the new idle task to the global mm. */
-       atomic_inc(&mm->mm_users);
-       atomic_inc(&mm->mm_count);
+       mmget(mm);
+       mmgrab(mm);
        current->active_mm = mm;
 
        preempt_disable();
index 4e9f57433f3a405568f187cc6951e94318a65fac..82619c32d25bdb2e5cd0867c81ef6aa002594bbe 100644 (file)
@@ -61,3 +61,4 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 5717b1e52d96386bad6e3656acf23f804e571736..aca9f755e4f8e6564088069eeeafd62e8c9ff8dd 100644 (file)
@@ -17,9 +17,9 @@
  */
 #define DMA_ERROR_CODE ~0
 
-extern struct dma_map_ops c6x_dma_ops;
+extern const struct dma_map_ops c6x_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &c6x_dma_ops;
 }
index 6752df32ef06788487fbdb573a8712a618ee87d8..9fff8be75f585a8bf961aa5173b0957bf0ab7d31 100644 (file)
@@ -123,7 +123,7 @@ static void c6x_dma_sync_sg_for_device(struct device *dev,
 
 }
 
-struct dma_map_ops c6x_dma_ops = {
+const struct dma_map_ops c6x_dma_ops = {
        .alloc                  = c6x_dma_alloc,
        .free                   = c6x_dma_free,
        .map_page               = c6x_dma_map_page,
index 1f0636793f0c8b4b7fcc61c324427908adc5308f..7072341995ff23651e6ac748a58d314f2e831199 100644 (file)
@@ -69,7 +69,7 @@ static inline int v32_dma_supported(struct device *dev, u64 mask)
        return 1;
 }
 
-struct dma_map_ops v32_dma_ops = {
+const struct dma_map_ops v32_dma_ops = {
        .alloc                  = v32_dma_alloc,
        .free                   = v32_dma_free,
        .map_page               = v32_dma_map_page,
index 9f19e19bff9d39117871446675563f7f9fc6426c..0f5132b08896a07af70cbb564884024c95e5729d 100644 (file)
@@ -4,6 +4,7 @@ generic-y += barrier.h
 generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += cmpxchg.h
+generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += errno.h
@@ -44,3 +45,4 @@ generic-y += types.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/cris/include/asm/current.h b/arch/cris/include/asm/current.h
deleted file mode 100644 (file)
index 5f5c0ef..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _CRIS_CURRENT_H
-#define _CRIS_CURRENT_H
-
-#include <linux/thread_info.h>
-
-struct task_struct;
-
-static inline struct task_struct * get_current(void)
-{
-        return current_thread_info()->task;
-}
-#define current get_current()
-
-#endif /* !(_CRIS_CURRENT_H) */
index 5a370178a0e95a7463688ab591860aeb5050c94f..256169de3743defd576df3e5f119359e228a991a 100644 (file)
@@ -2,14 +2,14 @@
 #define _ASM_CRIS_DMA_MAPPING_H
 
 #ifdef CONFIG_PCI
-extern struct dma_map_ops v32_dma_ops;
+extern const struct dma_map_ops v32_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &v32_dma_ops;
 }
 #else
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        BUG();
        return NULL;
index 0f5b0d5d313ccc122142c2491e3048312ae0025c..c33b46715f6598b5e9dcb67f54aeb020f542c944 100644 (file)
@@ -7,3 +7,4 @@ generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
+generic-y += kprobes.h
index 9a82bfa4303b2ed4f1390906d62e85b5b7d26cb2..354900917585a220fe840aecb4b2a081616f6d42 100644 (file)
@@ -7,9 +7,9 @@
 extern unsigned long __nongprelbss dma_coherent_mem_start;
 extern unsigned long __nongprelbss dma_coherent_mem_end;
 
-extern struct dma_map_ops frv_dma_ops;
+extern const struct dma_map_ops frv_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &frv_dma_ops;
 }
index 187688128c65f94b29de1f751f0fee60988bd126..4a96de7f0af4ec4599d0888eb5ce5541267bf5fc 100644 (file)
@@ -164,7 +164,7 @@ static int frv_dma_supported(struct device *dev, u64 mask)
        return 1;
 }
 
-struct dma_map_ops frv_dma_ops = {
+const struct dma_map_ops frv_dma_ops = {
        .alloc                  = frv_dma_alloc,
        .free                   = frv_dma_free,
        .map_page               = frv_dma_map_page,
index dba7df918144059a4f3155dbd94a667dac02b35e..e7130abc0dae67eb04202da599b49b66dd33f8e5 100644 (file)
@@ -106,7 +106,7 @@ static int frv_dma_supported(struct device *dev, u64 mask)
        return 1;
 }
 
-struct dma_map_ops frv_dma_ops = {
+const struct dma_map_ops frv_dma_ops = {
        .alloc                  = frv_dma_alloc,
        .free                   = frv_dma_free,
        .map_page               = frv_dma_map_page,
index 34bb4b13e079811f79e76598119c272625f774db..c452ddb5620f220d0a3d35ead88e42f9eaad5ff8 100644 (file)
@@ -147,7 +147,7 @@ static void __init pcibios_allocate_resources(int pass)
 static void __init pcibios_assign_resources(void)
 {
        struct pci_dev *dev = NULL;
-       int idx;
+       int idx, err;
        struct resource *r;
 
        for_each_pci_dev(dev) {
@@ -172,8 +172,13 @@ static void __init pcibios_assign_resources(void)
                         *  the BIOS forgot to do so or because we have decided the old
                         *  address was unusable for some reason.
                         */
-                       if (!r->start && r->end)
-                               pci_assign_resource(dev, idx);
+                       if (!r->start && r->end) {
+                               err = pci_assign_resource(dev, idx);
+                               if (err)
+                                       dev_err(&dev->dev,
+                                               "Failed to assign new address to %d\n",
+                                               idx);
+                       }
                }
        }
 }
index 81757d55a5b592fed1e38281edc0cbb6a256dff7..3473bde77f566e196984f054264d42795a1ae249 100644 (file)
@@ -188,7 +188,7 @@ int cxn_pin_by_pid(pid_t pid)
                task_lock(tsk);
                if (tsk->mm) {
                        mm = tsk->mm;
-                       atomic_inc(&mm->mm_users);
+                       mmget(mm);
                        ret = 0;
                }
                task_unlock(tsk);
index 5efd0c87f3c0acf8f9fac4e8d36898f0a0191a2c..341740c3581c1e2c0934bd6d70394d5cb44ca75a 100644 (file)
@@ -74,3 +74,4 @@ generic-y += unaligned.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 7ac7fadffed07577e842d2a45912fae29630583c..847c7562e04607c19324ad7c8f720dcf8c9e4503 100644 (file)
@@ -1,9 +1,9 @@
 #ifndef _H8300_DMA_MAPPING_H
 #define _H8300_DMA_MAPPING_H
 
-extern struct dma_map_ops h8300_dma_map_ops;
+extern const struct dma_map_ops h8300_dma_map_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &h8300_dma_map_ops;
 }
index 3651da045806a9dd8764609df0290ed6a1c1a5d4..225dd0a188dc62f59ad1fb484cef657054f45e9a 100644 (file)
@@ -60,7 +60,7 @@ static int map_sg(struct device *dev, struct scatterlist *sgl,
        return nents;
 }
 
-struct dma_map_ops h8300_dma_map_ops = {
+const struct dma_map_ops h8300_dma_map_ops = {
        .alloc = dma_alloc,
        .free = dma_free,
        .map_page = map_page,
index a43a7c90e4af8a20ed9cae98b23af2471de25bea..797b64a4b80bac0aa188ccb074de472547f93c47 100644 (file)
@@ -59,3 +59,4 @@ generic-y += unaligned.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 7ef58df909fc2f51e23dc7e180a06b55ca88b1f4..d3a87bd9b68655bcf564b8a6ac85c754b9a76045 100644 (file)
@@ -32,13 +32,10 @@ struct device;
 extern int bad_dma_address;
 #define DMA_ERROR_CODE bad_dma_address
 
-extern struct dma_map_ops *dma_ops;
+extern const struct dma_map_ops *dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-       if (unlikely(dev == NULL))
-               return NULL;
-
        return dma_ops;
 }
 
index dbc4f1003da46e522b081b738ff2dded4e50aaf6..e74b65009587f51382e001c0f69154494d2e23b6 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <asm/page.h>
 
-struct dma_map_ops *dma_ops;
+const struct dma_map_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 int bad_dma_address;  /*  globals are automatically initialized to zero  */
@@ -203,7 +203,7 @@ static void hexagon_sync_single_for_device(struct device *dev,
        dma_sync(dma_addr_to_virt(dma_handle), size, dir);
 }
 
-struct dma_map_ops hexagon_dma_ops = {
+const struct dma_map_ops hexagon_dma_ops = {
        .alloc          = hexagon_dma_alloc_coherent,
        .free           = hexagon_free_coherent,
        .map_sg         = hexagon_map_sg,
index 983bae7d2665cd6cc164e4584041b96f9e98f64e..c02a6455839e012319739ef273f8f6d128e4574a 100644 (file)
@@ -162,7 +162,7 @@ void start_secondary(void)
        );
 
        /*  Set the memory struct  */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
        cpu = smp_processor_id();
index 1e4cae5ae0532e5560834bdde6b6e6382977d7df..0310078a95f8a8a78bf360cc7aa7d0f6680a11a0 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/export.h>
 #include <asm/machvec.h>
 
-extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops;
+extern const struct dma_map_ops sba_dma_ops, swiotlb_dma_ops;
 
 /* swiotlb declarations & definitions: */
 extern int swiotlb_late_init_with_default_size (size_t size);
@@ -34,7 +34,7 @@ static inline int use_swiotlb(struct device *dev)
                !sba_dma_ops.dma_supported(dev, *dev->dma_mask);
 }
 
-struct dma_map_ops *hwsw_dma_get_ops(struct device *dev)
+const struct dma_map_ops *hwsw_dma_get_ops(struct device *dev)
 {
        if (use_swiotlb(dev))
                return &swiotlb_dma_ops;
index 630ee807389968af1c7a45d1ad6896ff4e6f158b..aec4a3354abe2c0b7b96b373e7f1c6d6da7c740b 100644 (file)
@@ -2096,7 +2096,7 @@ static int __init acpi_sba_ioc_init_acpi(void)
 /* This has to run before acpi_scan_init(). */
 arch_initcall(acpi_sba_ioc_init_acpi);
 
-extern struct dma_map_ops swiotlb_dma_ops;
+extern const struct dma_map_ops swiotlb_dma_ops;
 
 static int __init
 sba_init(void)
@@ -2216,7 +2216,7 @@ sba_page_override(char *str)
 
 __setup("sbapagesize=",sba_page_override);
 
-struct dma_map_ops sba_dma_ops = {
+const struct dma_map_ops sba_dma_ops = {
        .alloc                  = sba_alloc_coherent,
        .free                   = sba_free_coherent,
        .map_page               = sba_map_page,
index d472805edfa9da0df269effd38c09de607bdd291..73ec3c6f4cfe6869d9935d4f45bec063146d66e4 100644 (file)
@@ -14,7 +14,7 @@
 
 #define DMA_ERROR_CODE 0
 
-extern struct dma_map_ops *dma_ops;
+extern const struct dma_map_ops *dma_ops;
 extern struct ia64_machine_vector ia64_mv;
 extern void set_iommu_machvec(void);
 
@@ -23,7 +23,10 @@ extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t,
 extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int,
                                enum dma_data_direction);
 
-#define get_dma_ops(dev) platform_dma_get_ops(dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+{
+       return platform_dma_get_ops(NULL);
+}
 
 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
index d5505d6f2382bd00d298639595730000b9347021..0302b366478919700fe06d900c4363fa12bc2a06 100644 (file)
  * 2005-Apr     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
  *              <anil.s.keshavamurthy@intel.com> adapted from i386
  */
+#include <asm-generic/kprobes.h>
+#include <asm/break.h>
+
+#define BREAK_INST     (long)(__IA64_BREAK_KPROBE << 6)
+
+#ifdef CONFIG_KPROBES
+
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
-#include <asm/break.h>
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
 #define MAX_INSN_SIZE   2      /* last half is for kprobe-booster */
-#define BREAK_INST     (long)(__IA64_BREAK_KPROBE << 6)
 #define NOP_M_INST     (long)(1<<27)
 #define BRL_INST(i1, i2) ((long)((0xcL << 37) |        /* brl */ \
                                (0x1L << 12) |  /* many */ \
@@ -124,4 +129,5 @@ extern void invalidate_stacked_regs(void);
 extern void flush_register_stack(void);
 extern void arch_remove_kprobe(struct kprobe *p);
 
-#endif                         /* _ASM_KPROBES_H */
+#endif /* CONFIG_KPROBES */
+#endif /* _ASM_KPROBES_H */
index ed7f09089f12fb8e29f481fb63f97df04300a8bf..af285c423e1e4d7a339404beb6d6dd48c2e15d44 100644 (file)
@@ -44,7 +44,7 @@ typedef void ia64_mv_kernel_launch_event_t(void);
 /* DMA-mapping interface: */
 typedef void ia64_mv_dma_init (void);
 typedef u64 ia64_mv_dma_get_required_mask (struct device *);
-typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *);
+typedef const struct dma_map_ops *ia64_mv_dma_get_ops(struct device *);
 
 /*
  * WARNING: The legacy I/O space is _architected_.  Platforms are
@@ -248,7 +248,7 @@ extern void machvec_init_from_cmdline(const char *cmdline);
 # endif /* CONFIG_IA64_GENERIC */
 
 extern void swiotlb_dma_init(void);
-extern struct dma_map_ops *dma_get_ops(struct device *);
+extern const struct dma_map_ops *dma_get_ops(struct device *);
 
 /*
  * Define default versions so we can extend machvec for new platforms without having
index 7f7916238208cf13999c8ccfee9086e823cea8ca..e0dd97f4eb693fe131cddb668f4031f5c3291c99 100644 (file)
@@ -4,7 +4,7 @@
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly;
 
-struct dma_map_ops *dma_ops;
+const struct dma_map_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
@@ -17,7 +17,7 @@ static int __init dma_init(void)
 }
 fs_initcall(dma_init);
 
-struct dma_map_ops *dma_get_ops(struct device *dev)
+const struct dma_map_ops *dma_get_ops(struct device *dev)
 {
        return dma_ops;
 }
index 992c1098c522c2eecf6dacbef4a3b96f2ecf2de1..9094a73f996f30ebfa036d43122eb29410f6fa70 100644 (file)
@@ -90,11 +90,11 @@ void __init pci_iommu_alloc(void)
 {
        dma_ops = &intel_dma_ops;
 
-       dma_ops->sync_single_for_cpu = machvec_dma_sync_single;
-       dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg;
-       dma_ops->sync_single_for_device = machvec_dma_sync_single;
-       dma_ops->sync_sg_for_device = machvec_dma_sync_sg;
-       dma_ops->dma_supported = iommu_dma_supported;
+       intel_dma_ops.sync_single_for_cpu = machvec_dma_sync_single;
+       intel_dma_ops.sync_sg_for_cpu = machvec_dma_sync_sg;
+       intel_dma_ops.sync_single_for_device = machvec_dma_sync_single;
+       intel_dma_ops.sync_sg_for_device = machvec_dma_sync_sg;
+       intel_dma_ops.dma_supported = iommu_dma_supported;
 
        /*
         * The order of these functions is important for
index 2933208c02855d28e7aa607f2e15f8fd11d9fdb4..a14989dacded81301734c3b12de8d17759c290f0 100644 (file)
@@ -30,7 +30,7 @@ static void ia64_swiotlb_free_coherent(struct device *dev, size_t size,
        swiotlb_free_coherent(dev, size, vaddr, dma_addr);
 }
 
-struct dma_map_ops swiotlb_dma_ops = {
+const struct dma_map_ops swiotlb_dma_ops = {
        .alloc = ia64_swiotlb_alloc_coherent,
        .free = ia64_swiotlb_free_coherent,
        .map_page = swiotlb_map_page,
index c483ece3eb84c9b1ac3d271c7ffe43610a47931e..d68322966f33acba41b13b06395356bfd1f8b6d2 100644 (file)
@@ -994,7 +994,7 @@ cpu_init (void)
         */
        ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
                                        | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
        BUG_ON(current->mm);
 
index 4c3b84d8406a8b5b0cc1888681fe2f85a356dc99..52704f199dd68ed8406dcabf0e5cb0741924979a 100644 (file)
@@ -525,7 +525,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
                                /* both ends local to this partition */
                                seq_puts(s, " local");
                        else if (SN_HWPERF_FOREIGN(p))
-                               /* both ends of the link in foreign partiton */
+                               /* both ends of the link in foreign partition */
                                seq_puts(s, " foreign");
                        else
                                /* link straddles a partition */
index d227a6988d6b14b0bb3119d43ef4ed553fd231d3..95474460b367208a0dc327b1bf120af0eca2816c 100644 (file)
@@ -18,6 +18,7 @@ config M32R
        select MODULES_USE_ELF_RELA
        select HAVE_DEBUG_STACKOVERFLOW
        select CPU_NO_EFFICIENT_FFS
+       select DMA_NOOP_OPS
 
 config SBUS
        bool
index 8c24c5e1db66c1b4a74c04460ce460a9e3f80bc4..deb298777df2b96b5a51b6f6b3ec06cfdd423afa 100644 (file)
@@ -11,3 +11,4 @@ generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
+generic-y += kprobes.h
index 4a9f35e0973ff166956c123d594fd24c7dfafc92..5203fc87f080edaa854d685f7a7b3126d4151d66 100644 (file)
@@ -4,7 +4,6 @@
  * This file is released under the GPLv2
  */
 struct dev_archdata {
-       struct dma_map_ops *dma_ops;
 };
 
 struct pdev_archdata {
index 2c43a77fe94235512ad45111daac393732685dc4..c01d9f52d22875a9f4469c0da2addc3ebeff5418 100644 (file)
 
 #define DMA_ERROR_CODE (~(dma_addr_t)0x0)
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-       if (dev && dev->archdata.dma_ops)
-               return dev->archdata.dma_ops;
        return &dma_noop_ops;
 }
 
index 136c69f1fb8ab8b73c23e2a752ea371c6e484512..b18bc0bd65447044c8ebd996faad7c3bc69fb697 100644 (file)
@@ -403,7 +403,7 @@ void __init cpu_init (void)
        printk(KERN_INFO "Initializing CPU#%d\n", cpu_id);
 
        /* Set up and load the per-CPU TSS and LDT */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
        if (current->mm)
                BUG();
index b38e2b255142ef1cf6132f7293d7bb393a566864..6ff09beba1baef02f391a4d8bc0ca4dc2bdee27d 100644 (file)
@@ -1,6 +1,8 @@
+#include <linux/compiler.h>
+
 #define splash_width 640
 #define splash_height 480
-unsigned char __attribute__ ((aligned(16))) bootlogo_bits[] = {
+unsigned char __aligned(16) bootlogo_bits[] = {
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
index b896c933fafce4182b3af263690587f72aba2336..c466db3ca3a835289b29ab1d6cb29c4101d0b685 100644 (file)
@@ -1,6 +1,8 @@
+#include <linux/compiler.h>
+
 #define bootlogo_width 160
 #define bootlogo_height 160
-unsigned char __attribute__ ((aligned(16))) bootlogo_bits[] = {
+unsigned char __aligned(16) bootlogo_bits[] = {
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x01, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x40, 0x55, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
index f108dd121e9a6d4ac76503036fa061a63970a85f..131b4101ae5df2e641247cce42243248f6e8f8b0 100644 (file)
@@ -1,19 +1,20 @@
-CONFIG_LOCALVERSION="amcore-001"
+CONFIG_LOCALVERSION="amcore-002"
 CONFIG_DEFAULT_HOSTNAME="amcore"
 CONFIG_SYSVIPC=y
 # CONFIG_FHANDLE is not set
 # CONFIG_USELIB is not set
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_NAMESPACES=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 # CONFIG_AIO is not set
 # CONFIG_ADVISE_SYSCALLS is not set
 # CONFIG_MEMBARRIER is not set
 CONFIG_EMBEDDED=y
 # CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_LBDAF is not set
 # CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_CFQ is not set
 # CONFIG_MMU is not set
 CONFIG_M5307=y
 CONFIG_AMCORE=y
@@ -27,13 +28,14 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
+CONFIG_SYN_COOKIES=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
-CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
+# CONFIG_FW_LOADER is not set
 # CONFIG_ALLOW_DEV_COREDUMP is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
@@ -53,6 +55,7 @@ CONFIG_MTD_UCLINUX=y
 CONFIG_MTD_PLATRAM=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_AMAZON is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -89,14 +92,12 @@ CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 # CONFIG_I2C_HELPER_AUTO is not set
 CONFIG_I2C_IMX=y
-CONFIG_PPS=y
+CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_SYSTOHC is not set
 CONFIG_RTC_DRV_DS1307=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
 # CONFIG_FILE_LOCKING is not set
 # CONFIG_DNOTIFY is not set
 # CONFIG_INOTIFY_USER is not set
@@ -108,6 +109,7 @@ CONFIG_ROMFS_BACKED_BY_BOTH=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_PRINTK_TIME=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_PANIC_ON_OOPS=y
 # CONFIG_SCHED_DEBUG is not set
index 6dccda766e22be661235d9af581cdae7f86b6772..b865c1a052ba2c24bc5206f374e9497e4773977a 100644 (file)
@@ -3814,7 +3814,7 @@ CAS2W2_FILLER:
 #      (3) Save current DFC/SFC (ASSUMED TO BE EQUAL!!!); Then set     #
 #          SFC/DFC according to whether exception occurred in user or  #
 #          supervisor mode.                                            #
-#      (4) Use "plpaw" instruction to pre-load ATC with efective       #
+#      (4) Use "plpaw" instruction to pre-load ATC with effective      #
 #          address page(s). THIS SHOULD NOT FAULT!!! The relevant      #
 #          page(s) should have been made resident prior to entering    #
 #          this routine.                                               #
index 6c76d6c24b3d0d3f206d9c9600adf6eb040eed4f..d4f9ccbfa85cd3cd55d4efae978c94093d2393a4 100644 (file)
@@ -33,3 +33,4 @@ generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 1a8080c4cc402a9922ee873474c3c0010cf59655..b61230e74e636066da66ca895ef6ea7b90f0a41d 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright (C) 1998  Kenneth Albanowski <kjahds@kjahds.com>,
  *
  */
+#include <linux/compiler.h>
 
 #ifndef _MC68328_H_
 #define _MC68328_H_
@@ -993,7 +994,7 @@ typedef volatile struct {
   volatile unsigned short int pad1;
   volatile unsigned short int pad2;
   volatile unsigned short int pad3;
-} __attribute__((packed)) m68328_uart;
+} __packed m68328_uart;
 
 
 /**********
index fedac87c5d130858ad929118229b4680f588e568..703331ece32861c1c1d38f13361b44e02d61b8e3 100644 (file)
@@ -9,6 +9,7 @@
  *                     The Silver Hammer Group, Ltd.
  *
  */
+#include <linux/compiler.h>
 
 #ifndef _MC68EZ328_H_
 #define _MC68EZ328_H_
@@ -815,7 +816,7 @@ typedef volatile struct {
   volatile unsigned short int nipr;
   volatile unsigned short int pad1;
   volatile unsigned short int pad2;
-} __attribute__((packed)) m68328_uart;
+} __packed m68328_uart;
 
 
 /**********
index 34a51b2c784fb96e74924947df9bbeef426ef2fb..fbaed7ddfb41526a6a44bb849f1724724eb2bc61 100644 (file)
@@ -909,7 +909,7 @@ typedef struct {
   volatile unsigned short int nipr;
   volatile unsigned short int hmark;
   volatile unsigned short int unused;
-} __attribute__((packed)) m68328_uart;
+} __packed m68328_uart;
 
 
 
index 96c536194287d02d1802c114add933495c855736..9210e470771bd43e5a7be7a8df08f9c05487713b 100644 (file)
@@ -1,9 +1,9 @@
 #ifndef _M68K_DMA_MAPPING_H
 #define _M68K_DMA_MAPPING_H
 
-extern struct dma_map_ops m68k_dma_ops;
+extern const struct dma_map_ops m68k_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
         return &m68k_dma_ops;
 }
index a3521b80c3b970c1aa4ddadc38654344ed6e108b..2d2424de1d658218686f262bb57d00e5fd2c4158 100644 (file)
@@ -6,6 +6,7 @@
  * This software may be used and distributed according to the terms of
  * the GNU General Public License (GPL), incorporated herein by reference.
  */
+#include <linux/compiler.h>
 
 #ifndef _NATFEAT_H
 #define _NATFEAT_H
@@ -17,6 +18,6 @@ void nf_init(void);
 void nf_shutdown(void);
 
 void nfprint(const char *fmt, ...)
-       __attribute__ ((format (printf, 1, 2)));
+       __printf(1, 2);
 
 # endif /* _NATFEAT_H */
index 1e4f386ba31e22bfe96182b7253b620504360077..87ef73a9385672ba6af2b8af42fda3043e3d4eba 100644 (file)
@@ -158,7 +158,7 @@ static int m68k_dma_map_sg(struct device *dev, struct scatterlist *sglist,
        return nents;
 }
 
-struct dma_map_ops m68k_dma_ops = {
+const struct dma_map_ops m68k_dma_ops = {
        .alloc                  = m68k_dma_alloc,
        .free                   = m68k_dma_free,
        .map_page               = m68k_dma_map_page,
index 8dffd36ec4f2411f867629111844c907ac47f7bf..ac08f81413901b5db2c70f3a1f631cfebe0ba756 100644 (file)
@@ -18,10 +18,10 @@ GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
-typedef                 int SItype     __attribute__ ((mode (SI)));
-typedef unsigned int USItype   __attribute__ ((mode (SI)));
-typedef                 int DItype     __attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
+typedef                 int SItype     __mode(SI);
+typedef unsigned int USItype   __mode(SI);
+typedef                 int DItype     __mode(DI);
+typedef int word_type           __mode(__word__);
 
 struct DIstruct {SItype high, low;};
 
index e6565a3ee2c37065949cd92eed8deeb78c709f33..5837b1dd33343f8ffc5682f7e1064d01ac467724 100644 (file)
@@ -18,10 +18,10 @@ GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
-typedef                 int SItype     __attribute__ ((mode (SI)));
-typedef unsigned int USItype   __attribute__ ((mode (SI)));
-typedef                 int DItype     __attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
+typedef                 int SItype     __mode(SI);
+typedef unsigned int USItype   __mode(SI);
+typedef                 int DItype     __mode(DI);
+typedef int word_type           __mode(__word__);
 
 struct DIstruct {SItype high, low;};
 
index 039779737c7d28bfb1d8caf29995fa440c3f2a2a..7f40566be6c81825ff0db530d87d13834f7ae454 100644 (file)
@@ -18,10 +18,10 @@ GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
-typedef                 int SItype     __attribute__ ((mode (SI)));
-typedef unsigned int USItype   __attribute__ ((mode (SI)));
-typedef                 int DItype     __attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
+typedef                 int SItype     __mode(SI);
+typedef unsigned int USItype   __mode(SI);
+typedef                 int DItype     __mode(DI);
+typedef int word_type           __mode(__word__);
 
 struct DIstruct {SItype high, low;};
 
index 6459af5b2af0a9c312c8c04d1dfc2009e24c30a6..3fb05c698c41ebfb28e9101dde8bf6480cb17e52 100644 (file)
@@ -65,10 +65,10 @@ GNU General Public License for more details. */
     umul_ppmm (__w.s.high, __w.s.low, u, v);                           \
     __w.ll; })
 
-typedef         int SItype     __attribute__ ((mode (SI)));
-typedef unsigned int USItype   __attribute__ ((mode (SI)));
-typedef                 int DItype     __attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
+typedef         int SItype     __mode(SI);
+typedef unsigned int USItype   __mode(SI);
+typedef                 int DItype     __mode(DI);
+typedef int word_type           __mode(__word__);
 
 struct DIstruct {SItype high, low;};
 
index d3731f0db73b77af36207b07ff4b74a34341c1bf..f9b9df5d6de92767b34816e45b92a7fe2d095c38 100644 (file)
@@ -54,3 +54,4 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 27af5d479ce62fda67f29a604efd852a0ddee7ff..fad3dc3cb21008c59d612827c40282fae34d1827 100644 (file)
@@ -1,9 +1,9 @@
 #ifndef _ASM_METAG_DMA_MAPPING_H
 #define _ASM_METAG_DMA_MAPPING_H
 
-extern struct dma_map_ops metag_dma_ops;
+extern const struct dma_map_ops metag_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &metag_dma_ops;
 }
index 91968d92652b4a7f03fb9c7138d5685f569bcb64..f0ab3a498328529229ad2e9d6ad5f5ec69413fd2 100644 (file)
@@ -575,7 +575,7 @@ static void metag_dma_sync_sg_for_device(struct device *dev,
                dma_sync_for_device(sg_virt(sg), sg->length, direction);
 }
 
-struct dma_map_ops metag_dma_ops = {
+const struct dma_map_ops metag_dma_ops = {
        .alloc                  = metag_dma_alloc,
        .free                   = metag_dma_free,
        .map_page               = metag_dma_map_page,
index bad13232de51897c4040fda12772ae31a4dbcc1c..c622293254e4e409aeb57c77a38fad8e7c34dc5a 100644 (file)
@@ -344,8 +344,8 @@ asmlinkage void secondary_start_kernel(void)
         * All kernel threads share the same mm context; grab a
         * reference and switch to it.
         */
-       atomic_inc(&mm->mm_users);
-       atomic_inc(&mm->mm_count);
+       mmget(mm);
+       mmgrab(mm);
        current->active_mm = mm;
        cpumask_set_cpu(cpu, mm_cpumask(mm));
        enter_lazy_tlb(mm, current);
index 6275eb051801236cbdd235d5be84d6d1574cd838..1732ec13b211d5a672298461cd88d4cf409a55a2 100644 (file)
@@ -10,3 +10,4 @@ generic-y += preempt.h
 generic-y += syscalls.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
+generic-y += kprobes.h
index 1768d4bdc8d30fa4ff9ec91d228670700e5f5762..3fad5e722a6690b43ddae12092dfc53f73e52501 100644 (file)
@@ -36,9 +36,9 @@
 /*
  * Available generic sets of operations
  */
-extern struct dma_map_ops dma_direct_ops;
+extern const struct dma_map_ops dma_direct_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &dma_direct_ops;
 }
index 818daf230eb4e8d34989e938406d4b1445e37fc5..12e093a03e60374c9a10222e5755a0f63e143aa2 100644 (file)
@@ -187,7 +187,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 #endif
 }
 
-struct dma_map_ops dma_direct_ops = {
+const struct dma_map_ops dma_direct_ops = {
        .alloc          = dma_direct_alloc_coherent,
        .free           = dma_direct_free_coherent,
        .mmap           = dma_direct_mmap_coherent,
index 7f696f97f9ddc1828807bbab0bcd0e89161b0deb..13bc93242c0c571c71dc4afe5a7947a47bd7051d 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/mm.h>
+#include <linux/shmem_fs.h>
 #include <linux/list.h>
 #include <linux/syscalls.h>
 #include <linux/irq.h>
index 1226965e1e4f7527563450523f2be053265f377e..c64bd87f0b6ef06b5538459f32023cba29015289 100644 (file)
@@ -200,7 +200,7 @@ static phys_addr_t octeon_unity_dma_to_phys(struct device *dev, dma_addr_t daddr
 }
 
 struct octeon_dma_map_ops {
-       struct dma_map_ops dma_map_ops;
+       const struct dma_map_ops dma_map_ops;
        dma_addr_t (*phys_to_dma)(struct device *dev, phys_addr_t paddr);
        phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
 };
@@ -328,7 +328,7 @@ static struct octeon_dma_map_ops _octeon_pci_dma_map_ops = {
        },
 };
 
-struct dma_map_ops *octeon_pci_dma_map_ops;
+const struct dma_map_ops *octeon_pci_dma_map_ops;
 
 void __init octeon_pci_dma_init(void)
 {
index 21c2082a0dfbb3b1dec84125f9216d54accd4edf..6aa796f1081a90b0b2f1dded017cb29fce683c09 100644 (file)
@@ -6,12 +6,7 @@
 #ifndef _ASM_MIPS_DEVICE_H
 #define _ASM_MIPS_DEVICE_H
 
-struct dma_map_ops;
-
 struct dev_archdata {
-       /* DMA operations on that device */
-       struct dma_map_ops *dma_ops;
-
 #ifdef CONFIG_DMA_PERDEV_COHERENT
        /* Non-zero if DMA is coherent with CPU caches */
        bool dma_coherent;
index 7aa71b9b0258f1fc349fbc2cc78b1b928ac730f8..aba71385f9d15afcbdd82262df9b905c69ef9c22 100644 (file)
@@ -9,14 +9,11 @@
 #include <dma-coherence.h>
 #endif
 
-extern struct dma_map_ops *mips_dma_map_ops;
+extern const struct dma_map_ops *mips_dma_map_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-       if (dev && dev->archdata.dma_ops)
-               return dev->archdata.dma_ops;
-       else
-               return mips_dma_map_ops;
+       return mips_dma_map_ops;
 }
 
 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
index daba1f9a4f7939070dd0a75c00ac67979055737e..291846d9ba8346ca915879ff6b7c1162cde67df8 100644 (file)
@@ -22,6 +22,9 @@
 #ifndef _ASM_KPROBES_H
 #define _ASM_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
 #include <linux/ptrace.h>
 #include <linux/types.h>
 
@@ -94,4 +97,5 @@ struct kprobe_ctlblk {
 extern int kprobe_exceptions_notify(struct notifier_block *self,
                                    unsigned long val, void *data);
 
-#endif                         /* _ASM_KPROBES_H */
+#endif /* CONFIG_KPROBES */
+#endif /* _ASM_KPROBES_H */
index 460042ee5d6fa292fb28998ad43001ad7f91f744..9110988b92a127c525d0d3f889b720a50cd2b160 100644 (file)
@@ -65,7 +65,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
 phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
 
 struct dma_map_ops;
-extern struct dma_map_ops *octeon_pci_dma_map_ops;
+extern const struct dma_map_ops *octeon_pci_dma_map_ops;
 extern char *octeon_swiotlb;
 
 #endif /* __ASM_MACH_CAVIUM_OCTEON_DMA_COHERENCE_H */
index be52c2125d7101e37b805aef738ffa252c03d341..e0717d10e650fd0ebcbf734bf7f59ddb0954cc50 100644 (file)
@@ -88,7 +88,7 @@ extern struct plat_smp_ops nlm_smp_ops;
 extern char nlm_reset_entry[], nlm_reset_entry_end[];
 
 /* SWIOTLB */
-extern struct dma_map_ops nlm_swiotlb_dma_ops;
+extern const struct dma_map_ops nlm_swiotlb_dma_ops;
 
 extern unsigned int nlm_threads_per_core;
 extern cpumask_t nlm_cpumask;
index cb479be31a500cec8827cbdfaf5e78da070bd416..49c6df20672a9dca6575cd0771bf72867ddfd869 100644 (file)
@@ -2232,7 +2232,7 @@ void per_cpu_trap_init(bool is_boot_cpu)
        if (!cpu_data[cpu].asid_cache)
                cpu_data[cpu].asid_cache = asid_first_version(cpu);
 
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
        BUG_ON(current->mm);
        enter_lazy_tlb(&init_mm, current);
index f9dbfb14af3358e67b3ef3e922709ab38ba0bdd8..093517e85a6cdca8a550042dbd06a7c4b2a93ae5 100644 (file)
@@ -111,7 +111,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
        base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
                           VM_READ|VM_WRITE|VM_EXEC|
                           VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-                          0);
+                          0, NULL);
        if (IS_ERR_VALUE(base)) {
                ret = base;
                goto out;
index df7235e334997a2398b4ddf6af96347a5d2be764..178ca17a5667ee4d8584bfc585d69d3e88c7a6ba 100644 (file)
@@ -114,7 +114,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
        return daddr;
 }
 
-static struct dma_map_ops loongson_dma_map_ops = {
+static const struct dma_map_ops loongson_dma_map_ops = {
        .alloc = loongson_dma_alloc_coherent,
        .free = loongson_dma_free_coherent,
        .map_page = loongson_dma_map_page,
index a39c36af97adf371459c1df924d2e20aace50181..fe8df14b616984a702416dcdb442e5a2c2f3761d 100644 (file)
@@ -148,8 +148,8 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
        gfp = massage_gfp_flags(dev, gfp);
 
        if (IS_ENABLED(CONFIG_DMA_CMA) && gfpflags_allow_blocking(gfp))
-               page = dma_alloc_from_contiguous(dev,
-                                       count, get_order(size));
+               page = dma_alloc_from_contiguous(dev, count, get_order(size),
+                                                gfp);
        if (!page)
                page = alloc_pages(gfp, get_order(size));
 
@@ -417,7 +417,7 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 
 EXPORT_SYMBOL(dma_cache_sync);
 
-static struct dma_map_ops mips_default_dma_map_ops = {
+static const struct dma_map_ops mips_default_dma_map_ops = {
        .alloc = mips_dma_alloc_coherent,
        .free = mips_dma_free_coherent,
        .mmap = mips_dma_mmap,
@@ -433,7 +433,7 @@ static struct dma_map_ops mips_default_dma_map_ops = {
        .dma_supported = mips_dma_supported
 };
 
-struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
+const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
 EXPORT_SYMBOL(mips_dma_map_ops);
 
 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
index 0630693bec2aa5d491568f34fda84477056afc63..0ec9d9da6d51bddf440dfed4f19eb38559522c49 100644 (file)
@@ -67,7 +67,7 @@ static void nlm_dma_free_coherent(struct device *dev, size_t size,
        swiotlb_free_coherent(dev, size, vaddr, dma_handle);
 }
 
-struct dma_map_ops nlm_swiotlb_dma_ops = {
+const struct dma_map_ops nlm_swiotlb_dma_ops = {
        .alloc = nlm_dma_alloc_coherent,
        .free = nlm_dma_free_coherent,
        .map_page = swiotlb_map_page,
index 308d051fc45cd5d25e96c4157abbfeabd32cc5c0..9ee01936862ee1dfe11d8a21754bb16572230aac 100644 (file)
@@ -167,7 +167,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
                pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, dconfig);
        }
 
-       dev->dev.archdata.dma_ops = octeon_pci_dma_map_ops;
+       dev->dev.dma_ops = octeon_pci_dma_map_ops;
 
        return 0;
 }
index 1dcd44757f323f26db9ae7e38562a9ea947996ab..737ef574b3eae5faf0e59970a7427f8138b65a40 100644 (file)
@@ -14,9 +14,9 @@
 #include <asm/cache.h>
 #include <asm/io.h>
 
-extern struct dma_map_ops mn10300_dma_ops;
+extern const struct dma_map_ops mn10300_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &mn10300_dma_ops;
 }
index c800b590183a8af19104f132cdb13b4a9972547a..7abea0bdb549a6fefb375fe0bdce64e1a6c11207 100644 (file)
 #ifndef _ASM_KPROBES_H
 #define _ASM_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0xff
+
+#ifdef CONFIG_KPROBES
 #include <linux/types.h>
 #include <linux/ptrace.h>
 
 struct kprobe;
 
 typedef unsigned char kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xff
 #define MAX_INSN_SIZE 8
 #define MAX_STACK_SIZE 128
 
@@ -47,4 +51,5 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 
 extern void arch_remove_kprobe(struct kprobe *p);
 
+#endif /* CONFIG_KPROBES */
 #endif /* _ASM_KPROBES_H */
index 426173c4b0b900c0315431ef1104932aa5f867a7..e65b5cc2fa67f1ace278fc2b418636568708af43 100644 (file)
@@ -589,7 +589,7 @@ static void __init smp_cpu_init(void)
        }
        printk(KERN_INFO "Initializing CPU#%d\n", cpu_id);
 
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
        BUG_ON(current->mm);
 
index 4f4b9029f0ea176c81faf729fe5dde9ed4078f87..86108d2496b35fa3727a6c9625b792fa47a21e90 100644 (file)
@@ -121,7 +121,7 @@ static int mn10300_dma_supported(struct device *dev, u64 mask)
        return 1;
 }
 
-struct dma_map_ops mn10300_dma_ops = {
+const struct dma_map_ops mn10300_dma_ops = {
        .alloc                  = mn10300_dma_alloc,
        .free                   = mn10300_dma_free,
        .map_page               = mn10300_dma_map_page,
old mode 100755 (executable)
new mode 100644 (file)
old mode 100755 (executable)
new mode 100644 (file)
index 35b0e883761a846e7cd63d8175e0aa304de73f9c..aaa3c218b56cceefeab7ffac2477a3c4a3d1f55b 100644 (file)
@@ -62,3 +62,4 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index bec8ac8e6ad2311cf1703334c71c367af6248ebb..7b3c6f280293aeb5cdd13b1de341f0e74f36bf2d 100644 (file)
@@ -10,9 +10,9 @@
 #ifndef _ASM_NIOS2_DMA_MAPPING_H
 #define _ASM_NIOS2_DMA_MAPPING_H
 
-extern struct dma_map_ops nios2_dma_ops;
+extern const struct dma_map_ops nios2_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &nios2_dma_ops;
 }
index f6a5dcf9d6825ff5fd0e750b9afd62a962295759..7040c1adbb5efecf1e1de93d34c19c66e67f6fec 100644 (file)
@@ -192,7 +192,7 @@ static void nios2_dma_sync_sg_for_device(struct device *dev,
 
 }
 
-struct dma_map_ops nios2_dma_ops = {
+const struct dma_map_ops nios2_dma_ops = {
        .alloc                  = nios2_dma_alloc,
        .free                   = nios2_dma_free,
        .map_page               = nios2_dma_map_page,
index 8d22015fde3e053dc95b5f82aaa94d6c8ee3d2ce..1e95920b07377417415c8622201d8e04bfd1f1a7 100644 (file)
@@ -12,6 +12,7 @@ config OPENRISC
        select HAVE_MEMBLOCK
        select GPIOLIB
         select HAVE_ARCH_TRACEHOOK
+       select SPARSE_IRQ
        select GENERIC_IRQ_CHIP
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
index 0eb04c8240f95fb6715703a439280d6c358e077e..c43d4e1d14eb9c576b1438d19e479d147a642c97 100644 (file)
@@ -10,4 +10,3 @@ that are due for investigation shortly, i.e. our TODO list:
    or1k and this change is slowly trickling through the stack.  For the time
    being, or32 is equivalent to or1k.
 
--- Implement optimized version of memcpy and memset
index ef8d1ccc3e450eaaaaaa8b199e7bf8b7a5b5436d..fb01873a5aad5c06d09867f8adb6256e851e0cb6 100644 (file)
@@ -1,7 +1,6 @@
 
 header-y += ucontext.h
 
-generic-y += atomic.h
 generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bitsperlong.h
@@ -10,8 +9,6 @@ generic-y += bugs.h
 generic-y += cacheflush.h
 generic-y += checksum.h
 generic-y += clkdev.h
-generic-y += cmpxchg-local.h
-generic-y += cmpxchg.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
@@ -22,12 +19,12 @@ generic-y += exec.h
 generic-y += fb.h
 generic-y += fcntl.h
 generic-y += ftrace.h
-generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
+generic-y += irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
@@ -70,3 +67,4 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
new file mode 100644 (file)
index 0000000..146e166
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_ATOMIC_H
+#define __ASM_OPENRISC_ATOMIC_H
+
+#include <linux/types.h>
+
+/* Atomically perform op with v->counter and i */
+#define ATOMIC_OP(op)                                                  \
+static inline void atomic_##op(int i, atomic_t *v)                     \
+{                                                                      \
+       int tmp;                                                        \
+                                                                       \
+       __asm__ __volatile__(                                           \
+               "1:     l.lwa   %0,0(%1)        \n"                     \
+               "       l." #op " %0,%0,%2      \n"                     \
+               "       l.swa   0(%1),%0        \n"                     \
+               "       l.bnf   1b              \n"                     \
+               "        l.nop                  \n"                     \
+               : "=&r"(tmp)                                            \
+               : "r"(&v->counter), "r"(i)                              \
+               : "cc", "memory");                                      \
+}
+
+/* Atomically perform op with v->counter and i, return the result */
+#define ATOMIC_OP_RETURN(op)                                           \
+static inline int atomic_##op##_return(int i, atomic_t *v)             \
+{                                                                      \
+       int tmp;                                                        \
+                                                                       \
+       __asm__ __volatile__(                                           \
+               "1:     l.lwa   %0,0(%1)        \n"                     \
+               "       l." #op " %0,%0,%2      \n"                     \
+               "       l.swa   0(%1),%0        \n"                     \
+               "       l.bnf   1b              \n"                     \
+               "        l.nop                  \n"                     \
+               : "=&r"(tmp)                                            \
+               : "r"(&v->counter), "r"(i)                              \
+               : "cc", "memory");                                      \
+                                                                       \
+       return tmp;                                                     \
+}
+
+/* Atomically perform op with v->counter and i, return orig v->counter */
+#define ATOMIC_FETCH_OP(op)                                            \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       int tmp, old;                                                   \
+                                                                       \
+       __asm__ __volatile__(                                           \
+               "1:     l.lwa   %0,0(%2)        \n"                     \
+               "       l." #op " %1,%0,%3      \n"                     \
+               "       l.swa   0(%2),%1        \n"                     \
+               "       l.bnf   1b              \n"                     \
+               "        l.nop                  \n"                     \
+               : "=&r"(old), "=&r"(tmp)                                \
+               : "r"(&v->counter), "r"(i)                              \
+               : "cc", "memory");                                      \
+                                                                       \
+       return old;                                                     \
+}
+
+ATOMIC_OP_RETURN(add)
+ATOMIC_OP_RETURN(sub)
+
+ATOMIC_FETCH_OP(add)
+ATOMIC_FETCH_OP(sub)
+ATOMIC_FETCH_OP(and)
+ATOMIC_FETCH_OP(or)
+ATOMIC_FETCH_OP(xor)
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#define atomic_add_return      atomic_add_return
+#define atomic_sub_return      atomic_sub_return
+#define atomic_fetch_add       atomic_fetch_add
+#define atomic_fetch_sub       atomic_fetch_sub
+#define atomic_fetch_and       atomic_fetch_and
+#define atomic_fetch_or                atomic_fetch_or
+#define atomic_fetch_xor       atomic_fetch_xor
+#define atomic_and     atomic_and
+#define atomic_or      atomic_or
+#define atomic_xor     atomic_xor
+
+/*
+ * Atomically add a to v->counter as long as v is not already u.
+ * Returns the original value at v->counter.
+ *
+ * This is often used through atomic_inc_not_zero()
+ */
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+       int old, tmp;
+
+       __asm__ __volatile__(
+               "1:     l.lwa %0, 0(%2)         \n"
+               "       l.sfeq %0, %4           \n"
+               "       l.bf 2f                 \n"
+               "        l.add %1, %0, %3       \n"
+               "       l.swa 0(%2), %1         \n"
+               "       l.bnf 1b                \n"
+               "        l.nop                  \n"
+               "2:                             \n"
+               : "=&r"(old), "=&r" (tmp)
+               : "r"(&v->counter), "r"(a), "r"(u)
+               : "cc", "memory");
+
+       return old;
+}
+#define __atomic_add_unless    __atomic_add_unless
+
+#include <asm-generic/atomic.h>
+
+#endif /* __ASM_OPENRISC_ATOMIC_H */
index 3003cdad561bdac6fa75031f2275373fbd9bec4d..689f56819d53b3f4815e46a1b44044485ea2f00c 100644 (file)
@@ -45,7 +45,7 @@
 #include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/lock.h>
 
-#include <asm-generic/bitops/atomic.h>
+#include <asm/bitops/atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/openrisc/include/asm/bitops/atomic.h b/arch/openrisc/include/asm/bitops/atomic.h
new file mode 100644 (file)
index 0000000..35fb85f
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_BITOPS_ATOMIC_H
+#define __ASM_OPENRISC_BITOPS_ATOMIC_H
+
+static inline void set_bit(int nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+       unsigned long tmp;
+
+       __asm__ __volatile__(
+               "1:     l.lwa   %0,0(%1)        \n"
+               "       l.or    %0,%0,%2        \n"
+               "       l.swa   0(%1),%0        \n"
+               "       l.bnf   1b              \n"
+               "        l.nop                  \n"
+               : "=&r"(tmp)
+               : "r"(p), "r"(mask)
+               : "cc", "memory");
+}
+
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+       unsigned long tmp;
+
+       __asm__ __volatile__(
+               "1:     l.lwa   %0,0(%1)        \n"
+               "       l.and   %0,%0,%2        \n"
+               "       l.swa   0(%1),%0        \n"
+               "       l.bnf   1b              \n"
+               "        l.nop                  \n"
+               : "=&r"(tmp)
+               : "r"(p), "r"(~mask)
+               : "cc", "memory");
+}
+
+static inline void change_bit(int nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+       unsigned long tmp;
+
+       __asm__ __volatile__(
+               "1:     l.lwa   %0,0(%1)        \n"
+               "       l.xor   %0,%0,%2        \n"
+               "       l.swa   0(%1),%0        \n"
+               "       l.bnf   1b              \n"
+               "        l.nop                  \n"
+               : "=&r"(tmp)
+               : "r"(p), "r"(mask)
+               : "cc", "memory");
+}
+
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+       unsigned long old;
+       unsigned long tmp;
+
+       __asm__ __volatile__(
+               "1:     l.lwa   %0,0(%2)        \n"
+               "       l.or    %1,%0,%3        \n"
+               "       l.swa   0(%2),%1        \n"
+               "       l.bnf   1b              \n"
+               "        l.nop                  \n"
+               : "=&r"(old), "=&r"(tmp)
+               : "r"(p), "r"(mask)
+               : "cc", "memory");
+
+       return (old & mask) != 0;
+}
+
+static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+       unsigned long old;
+       unsigned long tmp;
+
+       __asm__ __volatile__(
+               "1:     l.lwa   %0,0(%2)        \n"
+               "       l.and   %1,%0,%3        \n"
+               "       l.swa   0(%2),%1        \n"
+               "       l.bnf   1b              \n"
+               "        l.nop                  \n"
+               : "=&r"(old), "=&r"(tmp)
+               : "r"(p), "r"(~mask)
+               : "cc", "memory");
+
+       return (old & mask) != 0;
+}
+
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+       unsigned long old;
+       unsigned long tmp;
+
+       __asm__ __volatile__(
+               "1:     l.lwa   %0,0(%2)        \n"
+               "       l.xor   %1,%0,%3        \n"
+               "       l.swa   0(%2),%1        \n"
+               "       l.bnf   1b              \n"
+               "        l.nop                  \n"
+               : "=&r"(old), "=&r"(tmp)
+               : "r"(p), "r"(mask)
+               : "cc", "memory");
+
+       return (old & mask) != 0;
+}
+
+#endif /* __ASM_OPENRISC_BITOPS_ATOMIC_H */
diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
new file mode 100644 (file)
index 0000000..5fcb9ac
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_CMPXCHG_H
+#define __ASM_OPENRISC_CMPXCHG_H
+
+#include  <linux/types.h>
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg().
+ */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
+{
+       if (size != 4) {
+               __cmpxchg_called_with_bad_pointer();
+               return old;
+       }
+
+       __asm__ __volatile__(
+               "1:     l.lwa %0, 0(%1)         \n"
+               "       l.sfeq %0, %2           \n"
+               "       l.bnf 2f                \n"
+               "        l.nop                  \n"
+               "       l.swa 0(%1), %3         \n"
+               "       l.bnf 1b                \n"
+               "        l.nop                  \n"
+               "2:                             \n"
+               : "=&r"(old)
+               : "r"(ptr), "r"(old), "r"(new)
+               : "cc", "memory");
+
+       return old;
+}
+
+#define cmpxchg(ptr, o, n)                                             \
+       ({                                                              \
+               (__typeof__(*(ptr))) __cmpxchg((ptr),                   \
+                                              (unsigned long)(o),      \
+                                              (unsigned long)(n),      \
+                                              sizeof(*(ptr)));         \
+       })
+
+/*
+ * This function doesn't exist, so you'll get a linker error if
+ * something tries to do an invalidly-sized xchg().
+ */
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
+                                  int size)
+{
+       if (size != 4) {
+               __xchg_called_with_bad_pointer();
+               return val;
+       }
+
+       __asm__ __volatile__(
+               "1:     l.lwa %0, 0(%1)         \n"
+               "       l.swa 0(%1), %2         \n"
+               "       l.bnf 1b                \n"
+               "        l.nop                  \n"
+               : "=&r"(val)
+               : "r"(ptr), "r"(val)
+               : "cc", "memory");
+
+       return val;
+}
+
+#define xchg(ptr, with) \
+       ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+
+#endif /* __ASM_OPENRISC_CMPXCHG_H */
index 917318b6a970f533fbd4c8d576f42d63327e7b00..ec10679d6429d5f67bf43f77e990c6eecf0a004f 100644 (file)
@@ -24,9 +24,11 @@ struct cpuinfo {
 
        u32 icache_size;
        u32 icache_block_size;
+       u32 icache_ways;
 
        u32 dcache_size;
        u32 dcache_block_size;
+       u32 dcache_ways;
 };
 
 extern struct cpuinfo cpuinfo;
index 1f260bccb36878d6445d12695f9f033787cbda48..0c0075f17145f526ec2e4abff84b45b83c4acf84 100644 (file)
@@ -28,9 +28,9 @@
 
 #define DMA_ERROR_CODE         (~(dma_addr_t)0x0)
 
-extern struct dma_map_ops or1k_dma_map_ops;
+extern const struct dma_map_ops or1k_dma_map_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &or1k_dma_map_ops;
 }
diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h
new file mode 100644 (file)
index 0000000..7780873
--- /dev/null
@@ -0,0 +1,135 @@
+#ifndef __ASM_OPENRISC_FUTEX_H
+#define __ASM_OPENRISC_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+({                                                             \
+       __asm__ __volatile__ (                                  \
+               "1:     l.lwa   %0, %2                  \n"     \
+                       insn                            "\n"    \
+               "2:     l.swa   %2, %1                  \n"     \
+               "       l.bnf   1b                      \n"     \
+               "        l.ori  %1, r0, 0               \n"     \
+               "3:                                     \n"     \
+               ".section .fixup,\"ax\"                 \n"     \
+               "4:     l.j     3b                      \n"     \
+               "        l.addi %1, r0, %3              \n"     \
+               ".previous                              \n"     \
+               ".section __ex_table,\"a\"              \n"     \
+               ".word  1b,4b,2b,4b                     \n"     \
+               ".previous                              \n"     \
+               : "=&r" (oldval), "=&r" (ret), "+m" (*uaddr)    \
+               : "i" (-EFAULT), "r" (oparg)                    \
+               : "cc", "memory"                                \
+               );                                              \
+})
+
+static inline int
+futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+{
+       int op = (encoded_op >> 28) & 7;
+       int cmp = (encoded_op >> 24) & 15;
+       int oparg = (encoded_op << 8) >> 20;
+       int cmparg = (encoded_op << 20) >> 20;
+       int oldval = 0, ret;
+
+       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+               oparg = 1 << oparg;
+
+       if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       pagefault_disable();
+
+       switch (op) {
+       case FUTEX_OP_SET:
+               __futex_atomic_op("l.or %1,%4,%4", ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_ADD:
+               __futex_atomic_op("l.add %1,%0,%4", ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_OR:
+               __futex_atomic_op("l.or %1,%0,%4", ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_ANDN:
+               __futex_atomic_op("l.and %1,%0,%4", ret, oldval, uaddr, ~oparg);
+               break;
+       case FUTEX_OP_XOR:
+               __futex_atomic_op("l.xor %1,%0,%4", ret, oldval, uaddr, oparg);
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+
+       pagefault_enable();
+
+       if (!ret) {
+               switch (cmp) {
+               case FUTEX_OP_CMP_EQ:
+                       ret = (oldval == cmparg);
+                       break;
+               case FUTEX_OP_CMP_NE:
+                       ret = (oldval != cmparg);
+                       break;
+               case FUTEX_OP_CMP_LT:
+                       ret = (oldval < cmparg);
+                       break;
+               case FUTEX_OP_CMP_GE:
+                       ret = (oldval >= cmparg);
+                       break;
+               case FUTEX_OP_CMP_LE:
+                       ret = (oldval <= cmparg);
+                       break;
+               case FUTEX_OP_CMP_GT:
+                       ret = (oldval > cmparg);
+                       break;
+               default:
+                       ret = -ENOSYS;
+               }
+       }
+       return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+                             u32 oldval, u32 newval)
+{
+       int ret = 0;
+       u32 prev;
+
+       if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       __asm__ __volatile__ (                          \
+               "1:     l.lwa   %1, %2          \n"     \
+               "       l.sfeq  %1, %3          \n"     \
+               "       l.bnf   3f              \n"     \
+               "        l.nop                  \n"     \
+               "2:     l.swa   %2, %4          \n"     \
+               "       l.bnf   1b              \n"     \
+               "        l.nop                  \n"     \
+               "3:                             \n"     \
+               ".section .fixup,\"ax\"         \n"     \
+               "4:     l.j     3b              \n"     \
+               "        l.addi %0, r0, %5      \n"     \
+               ".previous                      \n"     \
+               ".section __ex_table,\"a\"      \n"     \
+               ".word  1b,4b,2b,4b             \n"     \
+               ".previous                      \n"     \
+               : "+r" (ret), "=&r" (prev), "+m" (*uaddr) \
+               : "r" (oldval), "r" (newval), "i" (-EFAULT) \
+               : "cc", "memory"                        \
+               );
+
+       *uval = prev;
+       return ret;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_OPENRISC_FUTEX_H */
index 5dbc668865c48f24825bf4a98bc38b6eab70387c..367dac70326af01731e8d841fa8fd5ce49c36222 100644 (file)
 #define SPR_UPR_MP        0x00000020  /* MAC present */
 #define SPR_UPR_DUP       0x00000040  /* Debug unit present */
 #define SPR_UPR_PCUP      0x00000080  /* Performance counters unit present */
-#define SPR_UPR_PMP       0x00000100  /* Power management present */
-#define SPR_UPR_PICP      0x00000200  /* PIC present */
+#define SPR_UPR_PICP      0x00000100  /* PIC present */
+#define SPR_UPR_PMP       0x00000200  /* Power management present */
 #define SPR_UPR_TTP       0x00000400  /* Tick timer present */
 #define SPR_UPR_RES       0x00fe0000  /* Reserved */
 #define SPR_UPR_CUP       0xff000000  /* Context units present */
diff --git a/arch/openrisc/include/asm/string.h b/arch/openrisc/include/asm/string.h
new file mode 100644 (file)
index 0000000..64939cc
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __ASM_OPENRISC_STRING_H
+#define __ASM_OPENRISC_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *s, int c, __kernel_size_t n);
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *dest, __const void *src, __kernel_size_t n);
+
+#endif /* __ASM_OPENRISC_STRING_H */
diff --git a/arch/openrisc/kernel/.gitignore b/arch/openrisc/kernel/.gitignore
new file mode 100644 (file)
index 0000000..c5f676c
--- /dev/null
@@ -0,0 +1 @@
+vmlinux.lds
index 906998bac9575161314cc7b996eef4c4b813d41d..b10369b7e31b287796482fe65d83e89ed97e1d33 100644 (file)
@@ -232,7 +232,7 @@ or1k_sync_single_for_device(struct device *dev,
                mtspr(SPR_DCBFR, cl);
 }
 
-struct dma_map_ops or1k_dma_map_ops = {
+const struct dma_map_ops or1k_dma_map_ops = {
        .alloc = or1k_dma_alloc,
        .free = or1k_dma_free,
        .map_page = or1k_map_page,
index aac0bde3330c4daa7366aca30a6f8aa810a4b3aa..1b7160c79646be4e136d0f813b4b55c0bfd63827 100644 (file)
@@ -173,6 +173,11 @@ handler:                                                   ;\
        l.j     _ret_from_exception                             ;\
         l.nop
 
+/* clobbers 'reg' */
+#define CLEAR_LWA_FLAG(reg)            \
+       l.movhi reg,hi(lwa_flag)        ;\
+       l.ori   reg,reg,lo(lwa_flag)    ;\
+       l.sw    0(reg),r0
 /*
  * NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR
  *       contain the same values as when exception we're handling
@@ -193,6 +198,7 @@ EXCEPTION_ENTRY(_tng_kernel_start)
 /* ---[ 0x200: BUS exception ]------------------------------------------- */
 
 EXCEPTION_ENTRY(_bus_fault_handler)
+       CLEAR_LWA_FLAG(r3)
        /* r4: EA of fault (set by EXCEPTION_HANDLE) */
        l.jal   do_bus_fault
         l.addi  r3,r1,0 /* pt_regs */
@@ -202,11 +208,13 @@ EXCEPTION_ENTRY(_bus_fault_handler)
 
 /* ---[ 0x300: Data Page Fault exception ]------------------------------- */
 EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler)
+       CLEAR_LWA_FLAG(r3)
        l.and   r5,r5,r0
        l.j     1f
         l.nop
 
 EXCEPTION_ENTRY(_data_page_fault_handler)
+       CLEAR_LWA_FLAG(r3)
        /* set up parameters for do_page_fault */
        l.ori   r5,r0,0x300                // exception vector
 1:
@@ -220,7 +228,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
         * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part
         */
 #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
-       l.lwz   r6,PT_PC(r3)                  // address of an offending insn
+       l.lwz   r6,PT_PC(r3)               // address of an offending insn
        l.lwz   r6,0(r6)                   // instruction that caused pf
 
        l.srli  r6,r6,26                   // check opcode for jump insn
@@ -236,57 +244,57 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
        l.bf    8f
        l.sfeqi r6,0x12                    // l.jalr
        l.bf    8f
-
-       l.nop
+        l.nop
 
        l.j     9f
-       l.nop
-8:
+        l.nop
 
-       l.lwz   r6,PT_PC(r3)                  // address of an offending insn
+8: // offending insn is in delay slot
+       l.lwz   r6,PT_PC(r3)               // address of an offending insn
        l.addi  r6,r6,4
        l.lwz   r6,0(r6)                   // instruction that caused pf
        l.srli  r6,r6,26                   // get opcode
-9:
+9: // offending instruction opcode loaded in r6
 
 #else
 
-       l.mfspr r6,r0,SPR_SR               // SR
-//     l.lwz   r6,PT_SR(r3)               // ESR
-       l.andi  r6,r6,SPR_SR_DSX           // check for delay slot exception
-       l.sfeqi r6,0x1                     // exception happened in delay slot
-       l.bnf   7f
-       l.lwz   r6,PT_PC(r3)               // address of an offending insn
+       l.lwz   r6,PT_SR(r3)               // SR
+       l.andi  r6,r6,SPR_SR_DSX           // check for delay slot exception
+       l.sfne  r6,r0                      // exception happened in delay slot
+       l.bnf   7f
+        l.lwz  r6,PT_PC(r3)               // address of an offending insn
 
-       l.addi  r6,r6,4                    // offending insn is in delay slot
+       l.addi  r6,r6,4                    // offending insn is in delay slot
 7:
        l.lwz   r6,0(r6)                   // instruction that caused pf
        l.srli  r6,r6,26                   // check opcode for write access
 #endif
 
-       l.sfgeui r6,0x33                   // check opcode for write access
+       l.sfgeui r6,0x33                   // check opcode for write access
        l.bnf   1f
        l.sfleui r6,0x37
        l.bnf   1f
        l.ori   r6,r0,0x1                  // write access
        l.j     2f
-       l.nop
+        l.nop
 1:     l.ori   r6,r0,0x0                  // !write access
 2:
 
        /* call fault.c handler in or32/mm/fault.c */
        l.jal   do_page_fault
-       l.nop
+        l.nop
        l.j     _ret_from_exception
-       l.nop
+        l.nop
 
 /* ---[ 0x400: Insn Page Fault exception ]------------------------------- */
 EXCEPTION_ENTRY(_itlb_miss_page_fault_handler)
+       CLEAR_LWA_FLAG(r3)
        l.and   r5,r5,r0
        l.j     1f
         l.nop
 
 EXCEPTION_ENTRY(_insn_page_fault_handler)
+       CLEAR_LWA_FLAG(r3)
        /* set up parameters for do_page_fault */
        l.ori   r5,r0,0x400                // exception vector
 1:
@@ -296,23 +304,25 @@ EXCEPTION_ENTRY(_insn_page_fault_handler)
 
        /* call fault.c handler in or32/mm/fault.c */
        l.jal   do_page_fault
-       l.nop
+        l.nop
        l.j     _ret_from_exception
-       l.nop
+        l.nop
 
 
 /* ---[ 0x500: Timer exception ]----------------------------------------- */
 
 EXCEPTION_ENTRY(_timer_handler)
+       CLEAR_LWA_FLAG(r3)
        l.jal   timer_interrupt
         l.addi r3,r1,0 /* pt_regs */
 
        l.j    _ret_from_intr
         l.nop
 
-/* ---[ 0x600: Aligment exception ]-------------------------------------- */
+/* ---[ 0x600: Alignment exception ]-------------------------------------- */
 
 EXCEPTION_ENTRY(_alignment_handler)
+       CLEAR_LWA_FLAG(r3)
        /* r4: EA of fault (set by EXCEPTION_HANDLE) */
        l.jal   do_unaligned_access
         l.addi  r3,r1,0 /* pt_regs */
@@ -321,8 +331,8 @@ EXCEPTION_ENTRY(_alignment_handler)
         l.nop
 
 #if 0
-EXCEPTION_ENTRY(_aligment_handler)
-//        l.mfspr r2,r0,SPR_EEAR_BASE     /* Load the efective addres */
+EXCEPTION_ENTRY(_alignment_handler)
+//        l.mfspr r2,r0,SPR_EEAR_BASE     /* Load the effective address */
        l.addi  r2,r4,0
 //        l.mfspr r5,r0,SPR_EPCR_BASE     /* Load the insn address */
        l.lwz   r5,PT_PC(r1)
@@ -509,6 +519,7 @@ EXCEPTION_ENTRY(_external_irq_handler)
 //     l.sw    PT_SR(r1),r4
 1:
 #endif
+       CLEAR_LWA_FLAG(r3)
        l.addi  r3,r1,0
        l.movhi r8,hi(do_IRQ)
        l.ori   r8,r8,lo(do_IRQ)
@@ -556,8 +567,12 @@ ENTRY(_sys_call_handler)
         * they should be clobbered, otherwise
         */
        l.sw    PT_GPR3(r1),r3
-       /* r4 already saved */
-       /* r4 holds the EEAR address of the fault, load the original r4 */
+       /*
+        * r4 already saved
+        * r4 holds the EEAR address of the fault, use it as screatch reg and
+        * then load the original r4
+        */
+       CLEAR_LWA_FLAG(r4)
        l.lwz   r4,PT_GPR4(r1)
        l.sw    PT_GPR5(r1),r5
        l.sw    PT_GPR6(r1),r6
@@ -776,6 +791,7 @@ UNHANDLED_EXCEPTION(_vector_0xd00,0xd00)
 /* ---[ 0xe00: Trap exception ]------------------------------------------ */
 
 EXCEPTION_ENTRY(_trap_handler)
+       CLEAR_LWA_FLAG(r3)
        /* r4: EA of fault (set by EXCEPTION_HANDLE) */
        l.jal   do_trap
         l.addi  r3,r1,0 /* pt_regs */
index f14793306b03f35681afebddf873e794148b40d9..1e87913576e304e47ea7da5518436d3e0fe3f94e 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/pgtable.h>
+#include <asm/thread_info.h>
 #include <asm/cache.h>
 #include <asm/spr_defs.h>
 #include <asm/asm-offsets.h>
@@ -34,7 +35,7 @@
        l.add   rd,rd,rs
 
 #define CLEAR_GPR(gpr)                         \
-       l.or    gpr,r0,r0
+       l.movhi gpr,0x0
 
 #define LOAD_SYMBOL_2_GPR(gpr,symbol)          \
        l.movhi gpr,hi(symbol)                  ;\
@@ -324,7 +325,7 @@ _dispatch_do_ipage_fault:
     .org 0x500
        EXCEPTION_HANDLE(_timer_handler)
 
-/* ---[ 0x600: Aligment exception ]-------------------------------------- */
+/* ---[ 0x600: Alignment exception ]-------------------------------------- */
     .org 0x600
        EXCEPTION_HANDLE(_alignment_handler)
 
@@ -442,6 +443,9 @@ _dispatch_do_ipage_fault:
        __HEAD
        .global _start
 _start:
+       /* Init r0 to zero as per spec */
+       CLEAR_GPR(r0)
+
        /* save kernel parameters */
        l.or    r25,r0,r3       /* pointer to fdt */
 
@@ -486,7 +490,8 @@ _start:
        /*
         * set up initial ksp and current
         */
-       LOAD_SYMBOL_2_GPR(r1,init_thread_union+0x2000)  // setup kernel stack
+       /* setup kernel stack */
+       LOAD_SYMBOL_2_GPR(r1,init_thread_union + THREAD_SIZE)
        LOAD_SYMBOL_2_GPR(r10,init_thread_union)        // setup current
        tophys  (r31,r10)
        l.sw    TI_KSP(r31), r1
@@ -520,22 +525,8 @@ enable_dc:
         l.nop
 
 flush_tlb:
-       /*
-        *  I N V A L I D A T E   T L B   e n t r i e s
-        */
-       LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0))
-       LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0))
-       l.addi  r7,r0,128 /* Maximum number of sets */
-1:
-       l.mtspr r5,r0,0x0
-       l.mtspr r6,r0,0x0
-
-       l.addi  r5,r5,1
-       l.addi  r6,r6,1
-       l.sfeq  r7,r0
-       l.bnf   1b
-        l.addi r7,r7,-1
-
+       l.jal   _flush_tlb
+        l.nop
 
 /* The MMU needs to be enabled before or32_early_setup is called */
 
@@ -627,10 +618,30 @@ jump_start_kernel:
        l.jr    r30
         l.nop
 
+_flush_tlb:
+       /*
+        *  I N V A L I D A T E   T L B   e n t r i e s
+        */
+       LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0))
+       LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0))
+       l.addi  r7,r0,128 /* Maximum number of sets */
+1:
+       l.mtspr r5,r0,0x0
+       l.mtspr r6,r0,0x0
+
+       l.addi  r5,r5,1
+       l.addi  r6,r6,1
+       l.sfeq  r7,r0
+       l.bnf   1b
+        l.addi r7,r7,-1
+
+       l.jr    r9
+        l.nop
+
 /* ========================================[ cache ]=== */
 
-       /* aligment here so we don't change memory offsets with
-        * memory controler defined
+       /* alignment here so we don't change memory offsets with
+        * memory controller defined
         */
        .align 0x2000
 
@@ -971,8 +982,6 @@ ENTRY(dtlb_miss_handler)
        EXCEPTION_STORE_GPR2
        EXCEPTION_STORE_GPR3
        EXCEPTION_STORE_GPR4
-       EXCEPTION_STORE_GPR5
-       EXCEPTION_STORE_GPR6
        /*
         * get EA of the miss
         */
@@ -980,91 +989,70 @@ ENTRY(dtlb_miss_handler)
        /*
         * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
         */
-       GET_CURRENT_PGD(r3,r5)          // r3 is current_pgd, r5 is temp
+       GET_CURRENT_PGD(r3,r4)          // r3 is current_pgd, r4 is temp
        l.srli  r4,r2,0x18              // >> PAGE_SHIFT + (PAGE_SHIFT - 2)
        l.slli  r4,r4,0x2               // to get address << 2
-       l.add   r5,r4,r3                // r4 is pgd_index(daddr)
+       l.add   r3,r4,r3                // r4 is pgd_index(daddr)
        /*
         * if (pmd_none(*pmd))
         *   goto pmd_none:
         */
-       tophys  (r4,r5)
+       tophys  (r4,r3)
        l.lwz   r3,0x0(r4)              // get *pmd value
        l.sfne  r3,r0
        l.bnf   d_pmd_none
-        l.andi r3,r3,~PAGE_MASK //0x1fff               // ~PAGE_MASK
-       /*
-        * if (pmd_bad(*pmd))
-        *   pmd_clear(pmd)
-        *   goto pmd_bad:
-        */
-//     l.sfeq  r3,r0                   // check *pmd value
-//     l.bf    d_pmd_good
-       l.addi  r3,r0,0xffffe000        // PAGE_MASK
-//     l.j     d_pmd_bad
-//     l.sw    0x0(r4),r0              // clear pmd
+        l.addi r3,r0,0xffffe000        // PAGE_MASK
+
 d_pmd_good:
        /*
         * pte = *pte_offset(pmd, daddr);
         */
        l.lwz   r4,0x0(r4)              // get **pmd value
        l.and   r4,r4,r3                // & PAGE_MASK
-       l.srli  r5,r2,0xd               // >> PAGE_SHIFT, r2 == EEAR
-       l.andi  r3,r5,0x7ff             // (1UL << PAGE_SHIFT - 2) - 1
+       l.srli  r2,r2,0xd               // >> PAGE_SHIFT, r2 == EEAR
+       l.andi  r3,r2,0x7ff             // (1UL << PAGE_SHIFT - 2) - 1
        l.slli  r3,r3,0x2               // to get address << 2
        l.add   r3,r3,r4
-       l.lwz   r2,0x0(r3)              // this is pte at last
+       l.lwz   r3,0x0(r3)              // this is pte at last
        /*
         * if (!pte_present(pte))
         */
-       l.andi  r4,r2,0x1
+       l.andi  r4,r3,0x1
        l.sfne  r4,r0                   // is pte present
        l.bnf   d_pte_not_present
-       l.addi  r3,r0,0xffffe3fa        // PAGE_MASK | DTLB_UP_CONVERT_MASK
+       l.addi  r4,r0,0xffffe3fa        // PAGE_MASK | DTLB_UP_CONVERT_MASK
        /*
         * fill DTLB TR register
         */
-       l.and   r4,r2,r3                // apply the mask
+       l.and   r4,r3,r4                // apply the mask
        // Determine number of DMMU sets
-       l.mfspr r6, r0, SPR_DMMUCFGR
-       l.andi  r6, r6, SPR_DMMUCFGR_NTS
-       l.srli  r6, r6, SPR_DMMUCFGR_NTS_OFF
+       l.mfspr r2, r0, SPR_DMMUCFGR
+       l.andi  r2, r2, SPR_DMMUCFGR_NTS
+       l.srli  r2, r2, SPR_DMMUCFGR_NTS_OFF
        l.ori   r3, r0, 0x1
-       l.sll   r3, r3, r6      // r3 = number DMMU sets DMMUCFGR
-       l.addi  r6, r3, -1      // r6 = nsets mask
-       l.and   r5, r5, r6      // calc offset:  & (NUM_TLB_ENTRIES-1)
+       l.sll   r3, r3, r2      // r3 = number DMMU sets DMMUCFGR
+       l.addi  r2, r3, -1      // r2 = nsets mask
+       l.mfspr r3, r0, SPR_EEAR_BASE
+       l.srli  r3, r3, 0xd     // >> PAGE_SHIFT
+       l.and   r2, r3, r2      // calc offset:  & (NUM_TLB_ENTRIES-1)
                                                           //NUM_TLB_ENTRIES
-       l.mtspr r5,r4,SPR_DTLBTR_BASE(0)
+       l.mtspr r2,r4,SPR_DTLBTR_BASE(0)
        /*
         * fill DTLB MR register
         */
-       l.mfspr r2,r0,SPR_EEAR_BASE
-       l.addi  r3,r0,0xffffe000        // PAGE_MASK
-       l.and   r4,r2,r3                // apply PAGE_MASK to EA (__PHX__ do we really need this?)
-       l.ori   r4,r4,0x1               // set hardware valid bit: DTBL_MR entry
-       l.mtspr r5,r4,SPR_DTLBMR_BASE(0)
+       l.slli  r3, r3, 0xd             /* << PAGE_SHIFT => EA & PAGE_MASK */
+       l.ori   r4,r3,0x1               // set hardware valid bit: DTBL_MR entry
+       l.mtspr r2,r4,SPR_DTLBMR_BASE(0)
 
        EXCEPTION_LOAD_GPR2
        EXCEPTION_LOAD_GPR3
        EXCEPTION_LOAD_GPR4
-       EXCEPTION_LOAD_GPR5
-       EXCEPTION_LOAD_GPR6
-       l.rfe
-d_pmd_bad:
-       l.nop   1
-       EXCEPTION_LOAD_GPR2
-       EXCEPTION_LOAD_GPR3
-       EXCEPTION_LOAD_GPR4
-       EXCEPTION_LOAD_GPR5
-       EXCEPTION_LOAD_GPR6
        l.rfe
 d_pmd_none:
 d_pte_not_present:
        EXCEPTION_LOAD_GPR2
        EXCEPTION_LOAD_GPR3
        EXCEPTION_LOAD_GPR4
-       EXCEPTION_LOAD_GPR5
-       EXCEPTION_LOAD_GPR6
        EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler)
 
 /* ==============================================[ ITLB miss handler ]=== */
@@ -1072,8 +1060,6 @@ ENTRY(itlb_miss_handler)
        EXCEPTION_STORE_GPR2
        EXCEPTION_STORE_GPR3
        EXCEPTION_STORE_GPR4
-       EXCEPTION_STORE_GPR5
-       EXCEPTION_STORE_GPR6
        /*
         * get EA of the miss
         */
@@ -1083,30 +1069,19 @@ ENTRY(itlb_miss_handler)
         * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
         *
         */
-       GET_CURRENT_PGD(r3,r5)          // r3 is current_pgd, r5 is temp
+       GET_CURRENT_PGD(r3,r4)          // r3 is current_pgd, r5 is temp
        l.srli  r4,r2,0x18              // >> PAGE_SHIFT + (PAGE_SHIFT - 2)
        l.slli  r4,r4,0x2               // to get address << 2
-       l.add   r5,r4,r3                // r4 is pgd_index(daddr)
+       l.add   r3,r4,r3                // r4 is pgd_index(daddr)
        /*
         * if (pmd_none(*pmd))
         *   goto pmd_none:
         */
-       tophys  (r4,r5)
+       tophys  (r4,r3)
        l.lwz   r3,0x0(r4)              // get *pmd value
        l.sfne  r3,r0
        l.bnf   i_pmd_none
-       l.andi  r3,r3,0x1fff            // ~PAGE_MASK
-       /*
-        * if (pmd_bad(*pmd))
-        *   pmd_clear(pmd)
-        *   goto pmd_bad:
-        */
-
-//     l.sfeq  r3,r0                   // check *pmd value
-//     l.bf    i_pmd_good
-       l.addi  r3,r0,0xffffe000        // PAGE_MASK
-//     l.j     i_pmd_bad
-//     l.sw    0x0(r4),r0              // clear pmd
+        l.addi r3,r0,0xffffe000        // PAGE_MASK
 
 i_pmd_good:
        /*
@@ -1115,35 +1090,36 @@ i_pmd_good:
         */
        l.lwz   r4,0x0(r4)              // get **pmd value
        l.and   r4,r4,r3                // & PAGE_MASK
-       l.srli  r5,r2,0xd               // >> PAGE_SHIFT, r2 == EEAR
-       l.andi  r3,r5,0x7ff             // (1UL << PAGE_SHIFT - 2) - 1
+       l.srli  r2,r2,0xd               // >> PAGE_SHIFT, r2 == EEAR
+       l.andi  r3,r2,0x7ff             // (1UL << PAGE_SHIFT - 2) - 1
        l.slli  r3,r3,0x2               // to get address << 2
        l.add   r3,r3,r4
-       l.lwz   r2,0x0(r3)              // this is pte at last
+       l.lwz   r3,0x0(r3)              // this is pte at last
        /*
         * if (!pte_present(pte))
         *
         */
-       l.andi  r4,r2,0x1
+       l.andi  r4,r3,0x1
        l.sfne  r4,r0                   // is pte present
        l.bnf   i_pte_not_present
-       l.addi  r3,r0,0xffffe03a        // PAGE_MASK | ITLB_UP_CONVERT_MASK
+        l.addi r4,r0,0xffffe03a        // PAGE_MASK | ITLB_UP_CONVERT_MASK
        /*
         * fill ITLB TR register
         */
-       l.and   r4,r2,r3                // apply the mask
-       l.andi  r3,r2,0x7c0             // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE |  _PAGE_URE | _PAGE_UWE
-//     l.andi  r3,r2,0x400             // _PAGE_EXEC
+       l.and   r4,r3,r4                // apply the mask
+       l.andi  r3,r3,0x7c0             // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE |  _PAGE_URE | _PAGE_UWE
        l.sfeq  r3,r0
        l.bf    itlb_tr_fill //_workaround
        // Determine number of IMMU sets
-       l.mfspr r6, r0, SPR_IMMUCFGR
-       l.andi  r6, r6, SPR_IMMUCFGR_NTS
-       l.srli  r6, r6, SPR_IMMUCFGR_NTS_OFF
+       l.mfspr r2, r0, SPR_IMMUCFGR
+       l.andi  r2, r2, SPR_IMMUCFGR_NTS
+       l.srli  r2, r2, SPR_IMMUCFGR_NTS_OFF
        l.ori   r3, r0, 0x1
-       l.sll   r3, r3, r6      // r3 = number IMMU sets IMMUCFGR
-       l.addi  r6, r3, -1      // r6 = nsets mask
-       l.and   r5, r5, r6      // calc offset:  & (NUM_TLB_ENTRIES-1)
+       l.sll   r3, r3, r2      // r3 = number IMMU sets IMMUCFGR
+       l.addi  r2, r3, -1      // r2 = nsets mask
+       l.mfspr r3, r0, SPR_EEAR_BASE
+       l.srli  r3, r3, 0xd     // >> PAGE_SHIFT
+       l.and   r2, r3, r2      // calc offset:  & (NUM_TLB_ENTRIES-1)
 
 /*
  * __PHX__ :: fixme
@@ -1155,38 +1131,24 @@ i_pmd_good:
 itlb_tr_fill_workaround:
        l.ori   r4,r4,0xc0              // | (SPR_ITLBTR_UXE | ITLBTR_SXE)
 itlb_tr_fill:
-       l.mtspr r5,r4,SPR_ITLBTR_BASE(0)
+       l.mtspr r2,r4,SPR_ITLBTR_BASE(0)
        /*
         * fill DTLB MR register
         */
-       l.mfspr r2,r0,SPR_EEAR_BASE
-       l.addi  r3,r0,0xffffe000        // PAGE_MASK
-       l.and   r4,r2,r3                // apply PAGE_MASK to EA (__PHX__ do we really need this?)
-       l.ori   r4,r4,0x1               // set hardware valid bit: DTBL_MR entry
-       l.mtspr r5,r4,SPR_ITLBMR_BASE(0)
+       l.slli  r3, r3, 0xd             /* << PAGE_SHIFT => EA & PAGE_MASK */
+       l.ori   r4,r3,0x1               // set hardware valid bit: ITBL_MR entry
+       l.mtspr r2,r4,SPR_ITLBMR_BASE(0)
 
        EXCEPTION_LOAD_GPR2
        EXCEPTION_LOAD_GPR3
        EXCEPTION_LOAD_GPR4
-       EXCEPTION_LOAD_GPR5
-       EXCEPTION_LOAD_GPR6
        l.rfe
 
-i_pmd_bad:
-       l.nop   1
-       EXCEPTION_LOAD_GPR2
-       EXCEPTION_LOAD_GPR3
-       EXCEPTION_LOAD_GPR4
-       EXCEPTION_LOAD_GPR5
-       EXCEPTION_LOAD_GPR6
-       l.rfe
 i_pmd_none:
 i_pte_not_present:
        EXCEPTION_LOAD_GPR2
        EXCEPTION_LOAD_GPR3
        EXCEPTION_LOAD_GPR4
-       EXCEPTION_LOAD_GPR5
-       EXCEPTION_LOAD_GPR6
        EXCEPTION_HANDLE(_itlb_miss_page_fault_handler)
 
 /* ==============================================[ boot tlb handlers ]=== */
@@ -1571,12 +1533,7 @@ ENTRY(_early_uart_init)
        l.jr    r9
        l.nop
 
-_string_copying_linux:
-       .string "\n\n\n\n\n\rCopying Linux... \0"
-
-_string_ok_booting:
-       .string "Ok, booting the kernel.\n\r\0"
-
+       .section .rodata
 _string_unhandled_exception:
        .string "\n\rRunarunaround: Unhandled exception 0x\0"
 
@@ -1586,11 +1543,6 @@ _string_epc_prefix:
 _string_nl:
        .string "\n\r\0"
 
-       .global _string_esr_irq_bug
-_string_esr_irq_bug:
-       .string "\n\rESR external interrupt bug, for details look into entry.S\n\r\0"
-
-
 
 /* ========================================[ page aligned structures ]=== */
 
index 86e31cf1de1d1d3c43e671b9337c9917219fa6e9..5c4695d13542fc003054995b728ac468e18bd94c 100644 (file)
@@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
 
 EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(memset);
index d7990df9025a6e1f77c1e98668a625700c323bea..6e9d1cb519f245777ada914e1a9e009ed5d7980a 100644 (file)
@@ -75,6 +75,17 @@ void machine_power_off(void)
        __asm__("l.nop 1");
 }
 
+/*
+ * Send the doze signal to the cpu if available.
+ * Make sure, that all interrupts are enabled
+ */
+void arch_cpu_idle(void)
+{
+       local_irq_enable();
+       if (mfspr(SPR_UPR) & SPR_UPR_PMP)
+               mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
+}
+
 void (*pm_power_off) (void) = machine_power_off;
 
 /*
@@ -226,6 +237,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
 
 extern struct thread_info *_switch(struct thread_info *old_ti,
                                   struct thread_info *new_ti);
+extern int lwa_flag;
 
 struct task_struct *__switch_to(struct task_struct *old,
                                struct task_struct *new)
@@ -243,6 +255,8 @@ struct task_struct *__switch_to(struct task_struct *old,
        new_ti = new->stack;
        old_ti = old->stack;
 
+       lwa_flag = 0;
+
        current_thread_info_set[smp_processor_id()] = new_ti;
        last = (_switch(old_ti, new_ti))->task;
 
index 4f59fa4e34e5f2c795c3d1e150520513dd78d7d3..228288887d74facd56c89f1f71e102e940ae540e 100644 (file)
@@ -16,7 +16,6 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <stddef.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/string.h>
index cb797a3beb47740717528a042c5205dd265fc375..dbf5ee95a0d5f2ba8e2e6f453234afa954e04f45 100644 (file)
@@ -117,13 +117,15 @@ static void print_cpuinfo(void)
        if (upr & SPR_UPR_DCP)
                printk(KERN_INFO
                       "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
-                      cpuinfo.dcache_size, cpuinfo.dcache_block_size, 1);
+                      cpuinfo.dcache_size, cpuinfo.dcache_block_size,
+                      cpuinfo.dcache_ways);
        else
                printk(KERN_INFO "-- dcache disabled\n");
        if (upr & SPR_UPR_ICP)
                printk(KERN_INFO
                       "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
-                      cpuinfo.icache_size, cpuinfo.icache_block_size, 1);
+                      cpuinfo.icache_size, cpuinfo.icache_block_size,
+                      cpuinfo.icache_ways);
        else
                printk(KERN_INFO "-- icache disabled\n");
 
@@ -155,25 +157,25 @@ void __init setup_cpuinfo(void)
 {
        struct device_node *cpu;
        unsigned long iccfgr, dccfgr;
-       unsigned long cache_set_size, cache_ways;
+       unsigned long cache_set_size;
 
        cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481");
        if (!cpu)
                panic("No compatible CPU found in device tree...\n");
 
        iccfgr = mfspr(SPR_ICCFGR);
-       cache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
+       cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
        cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
        cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
        cpuinfo.icache_size =
-           cache_set_size * cache_ways * cpuinfo.icache_block_size;
+           cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size;
 
        dccfgr = mfspr(SPR_DCCFGR);
-       cache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
+       cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
        cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
        cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
        cpuinfo.dcache_size =
-           cache_set_size * cache_ways * cpuinfo.dcache_block_size;
+           cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size;
 
        if (of_property_read_u32(cpu, "clock-frequency",
                                 &cpuinfo.clock_frequency)) {
@@ -308,30 +310,33 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        revision = vr & SPR_VR_REV;
 
        seq_printf(m,
-                  "cpu\t\t: OpenRISC-%x\n"
-                  "revision\t: %d\n"
-                  "frequency\t: %ld\n"
-                  "dcache size\t: %d bytes\n"
-                  "dcache block size\t: %d bytes\n"
-                  "icache size\t: %d bytes\n"
-                  "icache block size\t: %d bytes\n"
-                  "immu\t\t: %d entries, %lu ways\n"
-                  "dmmu\t\t: %d entries, %lu ways\n"
-                  "bogomips\t: %lu.%02lu\n",
-                  version,
-                  revision,
-                  loops_per_jiffy * HZ,
-                  cpuinfo.dcache_size,
-                  cpuinfo.dcache_block_size,
-                  cpuinfo.icache_size,
-                  cpuinfo.icache_block_size,
-                  1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
-                  1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
-                  1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
-                  1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
-                  (loops_per_jiffy * HZ) / 500000,
-                  ((loops_per_jiffy * HZ) / 5000) % 100);
-
+                 "cpu\t\t: OpenRISC-%x\n"
+                 "revision\t: %d\n"
+                 "frequency\t: %ld\n"
+                 "dcache size\t: %d bytes\n"
+                 "dcache block size\t: %d bytes\n"
+                 "dcache ways\t: %d\n"
+                 "icache size\t: %d bytes\n"
+                 "icache block size\t: %d bytes\n"
+                 "icache ways\t: %d\n"
+                 "immu\t\t: %d entries, %lu ways\n"
+                 "dmmu\t\t: %d entries, %lu ways\n"
+                 "bogomips\t: %lu.%02lu\n",
+                 version,
+                 revision,
+                 loops_per_jiffy * HZ,
+                 cpuinfo.dcache_size,
+                 cpuinfo.dcache_block_size,
+                 cpuinfo.dcache_ways,
+                 cpuinfo.icache_size,
+                 cpuinfo.icache_block_size,
+                 cpuinfo.icache_ways,
+                 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
+                 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
+                 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
+                 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
+                 (loops_per_jiffy * HZ) / 500000,
+                 ((loops_per_jiffy * HZ) / 5000) % 100);
        return 0;
 }
 
index d29c41bfbffaab232cf21d781aed6893fde12c06..7e81ad258bca39bf20867060cf012bd24bed346e 100644 (file)
@@ -40,6 +40,8 @@
 extern char _etext, _stext;
 
 int kstack_depth_to_print = 0x180;
+int lwa_flag;
+unsigned long __user *lwa_addr;
 
 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 {
@@ -334,10 +336,191 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address)
        }
 }
 
+static inline int in_delay_slot(struct pt_regs *regs)
+{
+#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
+       /* No delay slot flag, do the old way */
+       unsigned int op, insn;
+
+       insn = *((unsigned int *)regs->pc);
+       op = insn >> 26;
+       switch (op) {
+       case 0x00: /* l.j */
+       case 0x01: /* l.jal */
+       case 0x03: /* l.bnf */
+       case 0x04: /* l.bf */
+       case 0x11: /* l.jr */
+       case 0x12: /* l.jalr */
+               return 1;
+       default:
+               return 0;
+       }
+#else
+       return regs->sr & SPR_SR_DSX;
+#endif
+}
+
+static inline void adjust_pc(struct pt_regs *regs, unsigned long address)
+{
+       int displacement;
+       unsigned int rb, op, jmp;
+
+       if (unlikely(in_delay_slot(regs))) {
+               /* In delay slot, instruction at pc is a branch, simulate it */
+               jmp = *((unsigned int *)regs->pc);
+
+               displacement = sign_extend32(((jmp) & 0x3ffffff) << 2, 27);
+               rb = (jmp & 0x0000ffff) >> 11;
+               op = jmp >> 26;
+
+               switch (op) {
+               case 0x00: /* l.j */
+                       regs->pc += displacement;
+                       return;
+               case 0x01: /* l.jal */
+                       regs->pc += displacement;
+                       regs->gpr[9] = regs->pc + 8;
+                       return;
+               case 0x03: /* l.bnf */
+                       if (regs->sr & SPR_SR_F)
+                               regs->pc += 8;
+                       else
+                               regs->pc += displacement;
+                       return;
+               case 0x04: /* l.bf */
+                       if (regs->sr & SPR_SR_F)
+                               regs->pc += displacement;
+                       else
+                               regs->pc += 8;
+                       return;
+               case 0x11: /* l.jr */
+                       regs->pc = regs->gpr[rb];
+                       return;
+               case 0x12: /* l.jalr */
+                       regs->pc = regs->gpr[rb];
+                       regs->gpr[9] = regs->pc + 8;
+                       return;
+               default:
+                       break;
+               }
+       } else {
+               regs->pc += 4;
+       }
+}
+
+static inline void simulate_lwa(struct pt_regs *regs, unsigned long address,
+                               unsigned int insn)
+{
+       unsigned int ra, rd;
+       unsigned long value;
+       unsigned long orig_pc;
+       long imm;
+
+       const struct exception_table_entry *entry;
+
+       orig_pc = regs->pc;
+       adjust_pc(regs, address);
+
+       ra = (insn >> 16) & 0x1f;
+       rd = (insn >> 21) & 0x1f;
+       imm = (short)insn;
+       lwa_addr = (unsigned long __user *)(regs->gpr[ra] + imm);
+
+       if ((unsigned long)lwa_addr & 0x3) {
+               do_unaligned_access(regs, address);
+               return;
+       }
+
+       if (get_user(value, lwa_addr)) {
+               if (user_mode(regs)) {
+                       force_sig(SIGSEGV, current);
+                       return;
+               }
+
+               if ((entry = search_exception_tables(orig_pc))) {
+                       regs->pc = entry->fixup;
+                       return;
+               }
+
+               /* kernel access in kernel space, load it directly */
+               value = *((unsigned long *)lwa_addr);
+       }
+
+       lwa_flag = 1;
+       regs->gpr[rd] = value;
+}
+
+static inline void simulate_swa(struct pt_regs *regs, unsigned long address,
+                               unsigned int insn)
+{
+       unsigned long __user *vaddr;
+       unsigned long orig_pc;
+       unsigned int ra, rb;
+       long imm;
+
+       const struct exception_table_entry *entry;
+
+       orig_pc = regs->pc;
+       adjust_pc(regs, address);
+
+       ra = (insn >> 16) & 0x1f;
+       rb = (insn >> 11) & 0x1f;
+       imm = (short)(((insn & 0x2200000) >> 10) | (insn & 0x7ff));
+       vaddr = (unsigned long __user *)(regs->gpr[ra] + imm);
+
+       if (!lwa_flag || vaddr != lwa_addr) {
+               regs->sr &= ~SPR_SR_F;
+               return;
+       }
+
+       if ((unsigned long)vaddr & 0x3) {
+               do_unaligned_access(regs, address);
+               return;
+       }
+
+       if (put_user(regs->gpr[rb], vaddr)) {
+               if (user_mode(regs)) {
+                       force_sig(SIGSEGV, current);
+                       return;
+               }
+
+               if ((entry = search_exception_tables(orig_pc))) {
+                       regs->pc = entry->fixup;
+                       return;
+               }
+
+               /* kernel access in kernel space, store it directly */
+               *((unsigned long *)vaddr) = regs->gpr[rb];
+       }
+
+       lwa_flag = 0;
+       regs->sr |= SPR_SR_F;
+}
+
+#define INSN_LWA       0x1b
+#define INSN_SWA       0x33
+
 asmlinkage void do_illegal_instruction(struct pt_regs *regs,
                                       unsigned long address)
 {
        siginfo_t info;
+       unsigned int op;
+       unsigned int insn = *((unsigned int *)address);
+
+       op = insn >> 26;
+
+       switch (op) {
+       case INSN_LWA:
+               simulate_lwa(regs, address, insn);
+               return;
+
+       case INSN_SWA:
+               simulate_swa(regs, address, insn);
+               return;
+
+       default:
+               break;
+       }
 
        if (user_mode(regs)) {
                /* Send a SIGILL */
index 552544616b9d93ab6838a998ffb0b6ad9a7f1574..00ddb7804be4f66cec6e5644e96c6ea26501ab28 100644 (file)
@@ -19,8 +19,8 @@
 
 /* TODO
  *             - clean up __offset & stuff
- *             - change all 8192 aligment to PAGE !!!
- *             - recheck if all aligments are really needed
+ *             - change all 8192 alignment to PAGE !!!
+ *             - recheck if all alignments are really needed
  */
 
 #  define LOAD_OFFSET  PAGE_OFFSET
index 966f65dbc6f013ed02fb59233535e1b432cfabf2..17d9d37f32d2ec3cd7086548f0d4190010897a99 100644 (file)
@@ -2,4 +2,4 @@
 # Makefile for or32 specific library files..
 #
 
-obj-y  = string.o delay.o
+obj-y  := delay.o string.o memset.o memcpy.o
diff --git a/arch/openrisc/lib/memcpy.c b/arch/openrisc/lib/memcpy.c
new file mode 100644 (file)
index 0000000..669887a
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * arch/openrisc/lib/memcpy.c
+ *
+ * Optimized memory copy routines for openrisc.  These are mostly copied
+ * from ohter sources but slightly entended based on ideas discuassed in
+ * #openrisc.
+ *
+ * The word unroll implementation is an extension to the arm byte
+ * unrolled implementation, but using word copies (if things are
+ * properly aligned)
+ *
+ * The great arm loop unroll algorithm can be found at:
+ *  arch/arm/boot/compressed/string.c
+ */
+
+#include <linux/export.h>
+
+#include <linux/string.h>
+
+#ifdef CONFIG_OR1K_1200
+/*
+ * Do memcpy with word copies and loop unrolling. This gives the
+ * best performance on the OR1200 and MOR1KX archirectures
+ */
+void *memcpy(void *dest, __const void *src, __kernel_size_t n)
+{
+       int i = 0;
+       unsigned char *d, *s;
+       uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
+
+       /* If both source and dest are word aligned copy words */
+       if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
+               /* Copy 32 bytes per loop */
+               for (i = n >> 5; i > 0; i--) {
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+               }
+
+               if (n & 1 << 4) {
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+               }
+
+               if (n & 1 << 3) {
+                       *dest_w++ = *src_w++;
+                       *dest_w++ = *src_w++;
+               }
+
+               if (n & 1 << 2)
+                       *dest_w++ = *src_w++;
+
+               d = (unsigned char *)dest_w;
+               s = (unsigned char *)src_w;
+
+       } else {
+               d = (unsigned char *)dest_w;
+               s = (unsigned char *)src_w;
+
+               for (i = n >> 3; i > 0; i--) {
+                       *d++ = *s++;
+                       *d++ = *s++;
+                       *d++ = *s++;
+                       *d++ = *s++;
+                       *d++ = *s++;
+                       *d++ = *s++;
+                       *d++ = *s++;
+                       *d++ = *s++;
+               }
+
+               if (n & 1 << 2) {
+                       *d++ = *s++;
+                       *d++ = *s++;
+                       *d++ = *s++;
+                       *d++ = *s++;
+               }
+       }
+
+       if (n & 1 << 1) {
+               *d++ = *s++;
+               *d++ = *s++;
+       }
+
+       if (n & 1)
+               *d++ = *s++;
+
+       return dest;
+}
+#else
+/*
+ * Use word copies but no loop unrolling as we cannot assume there
+ * will be benefits on the archirecture
+ */
+void *memcpy(void *dest, __const void *src, __kernel_size_t n)
+{
+       unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src;
+       uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
+
+       /* If both source and dest are word aligned copy words */
+       if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
+               for (; n >= 4; n -= 4)
+                       *dest_w++ = *src_w++;
+       }
+
+       d = (unsigned char *)dest_w;
+       s = (unsigned char *)src_w;
+
+       /* For remaining or if not aligned, copy bytes */
+       for (; n >= 1; n -= 1)
+               *d++ = *s++;
+
+       return dest;
+
+}
+#endif
+
+EXPORT_SYMBOL(memcpy);
diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S
new file mode 100644 (file)
index 0000000..92cc2ea
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * OpenRISC memset.S
+ *
+ * Hand-optimized assembler version of memset for OpenRISC.
+ * Algorithm inspired by several other arch-specific memset routines
+ * in the kernel tree
+ *
+ * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+       .global memset
+       .type   memset, @function
+memset:
+       /* arguments:
+        * r3 = *s
+        * r4 = c
+        * r5 = n
+        * r13, r15, r17, r19 used as temp regs
+       */
+
+       /* Exit if n == 0 */
+       l.sfeqi         r5, 0
+       l.bf            4f
+
+       /* Truncate c to char */
+       l.andi          r13, r4, 0xff
+
+       /* Skip word extension if c is 0 */
+       l.sfeqi         r13, 0
+       l.bf            1f
+       /* Check for at least two whole words (8 bytes) */
+        l.sfleui       r5, 7
+
+       /* Extend char c to 32-bit word cccc in r13 */
+       l.slli          r15, r13, 16  // r13 = 000c, r15 = 0c00
+       l.or            r13, r13, r15 // r13 = 0c0c, r15 = 0c00
+       l.slli          r15, r13, 8   // r13 = 0c0c, r15 = c0c0
+       l.or            r13, r13, r15 // r13 = cccc, r15 = c0c0
+
+1:     l.addi          r19, r3, 0 // Set r19 = src
+       /* Jump to byte copy loop if less than two words */
+       l.bf            3f
+        l.or           r17, r5, r0 // Set r17 = n
+
+       /* Mask out two LSBs to check alignment */
+       l.andi          r15, r3, 0x3
+
+       /* lsb == 00, jump to word copy loop */
+       l.sfeqi         r15, 0
+       l.bf            2f
+        l.addi         r19, r3, 0 // Set r19 = src
+
+       /* lsb == 01,10 or 11 */
+       l.sb            0(r3), r13   // *src = c
+       l.addi          r17, r17, -1 // Decrease n
+
+       l.sfeqi         r15, 3
+       l.bf            2f
+        l.addi         r19, r3, 1  // src += 1
+
+       /* lsb == 01 or 10 */
+       l.sb            1(r3), r13   // *(src+1) = c
+       l.addi          r17, r17, -1 // Decrease n
+
+       l.sfeqi         r15, 2
+       l.bf            2f
+        l.addi         r19, r3, 2  // src += 2
+
+       /* lsb == 01 */
+       l.sb            2(r3), r13   // *(src+2) = c
+       l.addi          r17, r17, -1 // Decrease n
+       l.addi          r19, r3, 3   // src += 3
+
+       /* Word copy loop */
+2:     l.sw            0(r19), r13  // *src = cccc
+       l.addi          r17, r17, -4 // Decrease n
+       l.sfgeui        r17, 4
+       l.bf            2b
+        l.addi         r19, r19, 4  // Increase src
+
+       /* When n > 0, copy the remaining bytes, otherwise jump to exit */
+       l.sfeqi         r17, 0
+       l.bf            4f
+
+       /* Byte copy loop */
+3:     l.addi          r17, r17, -1 // Decrease n
+       l.sb            0(r19), r13  // *src = cccc
+       l.sfnei         r17, 0
+       l.bf            3b
+        l.addi         r19, r19, 1  // Increase src
+
+4:     l.jr            r9
+        l.ori          r11, r3, 0
index 8705a46218f9273263a0678db7647b1925edf4b7..2175e4bfd9fc0a28e80df5dca135493ec3728720 100644 (file)
@@ -80,6 +80,7 @@ __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot)
 
        return (void __iomem *)(offset + (char *)v);
 }
+EXPORT_SYMBOL(__ioremap);
 
 void iounmap(void *addr)
 {
@@ -106,6 +107,7 @@ void iounmap(void *addr)
 
        return vfree((void *)(PAGE_MASK & (unsigned long)addr));
 }
+EXPORT_SYMBOL(iounmap);
 
 /**
  * OK, this one's a bit tricky... ioremap can get called before memory is
index cc70b4116718ae67fcc9fa5eb83bf983d9dc3ec5..a9909c2d04c5cb4a2a949ddad8dcd48fb9f9583f 100644 (file)
@@ -28,3 +28,4 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 16e024602737085eee5c3bdfae979fc5e578e2a1..5404c6a726b227f57024afec81bbd199d567c810 100644 (file)
 */
 
 #ifdef CONFIG_PA11
-extern struct dma_map_ops pcxl_dma_ops;
-extern struct dma_map_ops pcx_dma_ops;
+extern const struct dma_map_ops pcxl_dma_ops;
+extern const struct dma_map_ops pcx_dma_ops;
 #endif
 
-extern struct dma_map_ops *hppa_dma_ops;
+extern const struct dma_map_ops *hppa_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return hppa_dma_ops;
 }
index 700e2d2da0969cdfeb872071fe16b5f9c32e82cf..fa78419100c84b46a3f7ad9183ffd7748eb5b96b 100644 (file)
@@ -40,7 +40,7 @@
 #include <asm/parisc-device.h>
 
 /* See comments in include/asm-parisc/pci.h */
-struct dma_map_ops *hppa_dma_ops __read_mostly;
+const struct dma_map_ops *hppa_dma_ops __read_mostly;
 EXPORT_SYMBOL(hppa_dma_ops);
 
 static struct device root = {
index 697c53543a4d1c2112318c73aefd4edc518e41d0..5f0067a62738a77f5b62efb2ea45cc847c1d424d 100644 (file)
@@ -572,7 +572,7 @@ static void pa11_dma_sync_sg_for_device(struct device *dev, struct scatterlist *
                flush_kernel_vmap_range(sg_virt(sg), sg->length);
 }
 
-struct dma_map_ops pcxl_dma_ops = {
+const struct dma_map_ops pcxl_dma_ops = {
        .dma_supported =        pa11_dma_supported,
        .alloc =                pa11_dma_alloc,
        .free =                 pa11_dma_free,
@@ -608,7 +608,7 @@ static void pcx_dma_free(struct device *dev, size_t size, void *vaddr,
        return;
 }
 
-struct dma_map_ops pcx_dma_ops = {
+const struct dma_map_ops pcx_dma_ops = {
        .dma_supported =        pa11_dma_supported,
        .alloc =                pcx_dma_alloc,
        .free =                 pcx_dma_free,
index 75dab2871346ce969d82a9fbb09ffcc0d579fc5c..67b452b41ff6a6fad09722b829a97d04eb669670 100644 (file)
@@ -279,7 +279,7 @@ smp_cpu_init(int cpunum)
        set_cpu_online(cpunum, true);
 
        /* Initialise the idle task for this CPU */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
        BUG_ON(current->mm);
        enter_lazy_tlb(&init_mm, current);
index 8e94448f296c5a4c7053e841a7aff4c267ac715e..76b2bd6f77422e16e4089e267cdb9e6c5c651efe 100644 (file)
@@ -55,7 +55,7 @@
                                label = "kernel";
                                reg = <0x01c00000 0x002e0000>;
                        };
-                       partiton@1ee0000 {
+                       partition@1ee0000 {
                                label = "dtb";
                                reg = <0x01ee0000 0x00020000>;
                        };
index d73e9dfa5237f4e11d63810c5439ab93b1b4f645..1145dc8e726dbec93498f14bdbc3083f15aedbec 100644 (file)
@@ -30,7 +30,7 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
 #ifndef __ASSEMBLY__
 /*
- * ISA 3.0 partiton and process table entry format
+ * ISA 3.0 partition and process table entry format
  */
 struct prtb_entry {
        __be64 prtb0;
index fef738229a686e7e56b283ebadb34279e82e2d02..1eeeb72c70158aa07775444e2fe160e4ef15223b 100644 (file)
@@ -1,6 +1,9 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 
+#ifndef __ASSEMBLY__
+#include <linux/mmdebug.h>
+#endif
 /*
  * Common bits between hash and Radix page table
  */
@@ -434,15 +437,47 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte)
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
 #ifdef CONFIG_NUMA_BALANCING
-/*
- * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h . On powerpc, this will only
- * work for user pages and always return true for kernel pages.
- */
 static inline int pte_protnone(pte_t pte)
 {
-       return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED)) ==
-               cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED);
+       return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
+               cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
+}
+
+#define pte_mk_savedwrite pte_mk_savedwrite
+static inline pte_t pte_mk_savedwrite(pte_t pte)
+{
+       /*
+        * Used by Autonuma subsystem to preserve the write bit
+        * while marking the pte PROT_NONE. Only allow this
+        * on PROT_NONE pte
+        */
+       VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
+                 cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
+       return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED);
+}
+
+#define pte_clear_savedwrite pte_clear_savedwrite
+static inline pte_t pte_clear_savedwrite(pte_t pte)
+{
+       /*
+        * Used by KSM subsystem to make a protnone pte readonly.
+        */
+       VM_BUG_ON(!pte_protnone(pte));
+       return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
+}
+
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+       /*
+        * Saved write ptes are prot none ptes that doesn't have
+        * privileged bit sit. We mark prot none as one which has
+        * present and pviliged bit set and RWX cleared. To mark
+        * protnone which used to have _PAGE_WRITE set we clear
+        * the privileged bit.
+        */
+       VM_BUG_ON(!pte_protnone(pte));
+       return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
@@ -873,6 +908,8 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_mkclean(pmd)       pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)       pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 #define pmd_mkwrite(pmd)       pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mk_savedwrite(pmd) pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
+#define pmd_clear_savedwrite(pmd)      pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 #define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
@@ -889,6 +926,7 @@ static inline int pmd_protnone(pmd_t pmd)
 
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)         pte_write(pmd_pte(pmd))
+#define pmd_savedwrite(pmd)    pte_savedwrite(pmd_pte(pmd))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
index 406c2b1ff82d67fd0465163ce05cfd76961456bb..0245bfcaac324c4ed0f42af59f8c25ab486a2b5e 100644 (file)
@@ -6,7 +6,6 @@
 #ifndef _ASM_POWERPC_DEVICE_H
 #define _ASM_POWERPC_DEVICE_H
 
-struct dma_map_ops;
 struct device_node;
 #ifdef CONFIG_PPC64
 struct pci_dn;
@@ -20,9 +19,6 @@ struct iommu_table;
  * drivers/macintosh/macio_asic.c
  */
 struct dev_archdata {
-       /* DMA operations on that device */
-       struct dma_map_ops      *dma_ops;
-
        /*
         * These two used to be a union. However, with the hybrid ops we need
         * both so here we store both a DMA offset for direct mappings and
index 84e3f8dd5e4f05f04485b657036a95434076e738..181a095468e444a4c0637377c29d962ded2ba91b 100644 (file)
@@ -76,24 +76,16 @@ static inline unsigned long device_to_mask(struct device *dev)
 #ifdef CONFIG_PPC64
 extern struct dma_map_ops dma_iommu_ops;
 #endif
-extern struct dma_map_ops dma_direct_ops;
+extern const struct dma_map_ops dma_direct_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        /* We don't handle the NULL dev case for ISA for now. We could
         * do it via an out of line call but it is not needed for now. The
         * only ISA DMA device we support is the floppy and we have a hack
         * in the floppy driver directly to get a device for us.
         */
-       if (unlikely(dev == NULL))
-               return NULL;
-
-       return dev->archdata.dma_ops;
-}
-
-static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
-{
-       dev->archdata.dma_ops = ops;
+       return NULL;
 }
 
 /*
index 3abb58394da4830d9d2b61ff6397e5f3090f1c69..b889d13547fdc533f6d9a90f1309bb7c77cc80f1 100644 (file)
@@ -109,7 +109,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
 #define FH_DTPROP_MAX_PROPLEN 32768
 
 /**
- * fh_partiton_get_dtprop - get a property from a guest device tree.
+ * fh_partition_get_dtprop - get a property from a guest device tree.
  * @handle: handle of partition whose device tree is to be accessed
  * @dtpath_addr: physical address of device tree path to access
  * @propname_addr: physical address of name of property
index d821835ade8620982e1f888411feacc9b367c6d1..0503c98b21172ff09044e2dcfbf847acdf6e9fb3 100644 (file)
@@ -1,5 +1,8 @@
 #ifndef _ASM_POWERPC_KPROBES_H
 #define _ASM_POWERPC_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
 #ifdef __KERNEL__
 /*
  *  Kernel Probes (KProbes)
index e9bd6cf0212fdbc33e14b0b9b7a192dbc775b5b0..93eded8d38431eaf34fc7e2d0e2da9279088dce9 100644 (file)
@@ -53,8 +53,8 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 }
 
 #ifdef CONFIG_PCI
-extern void set_pci_dma_ops(struct dma_map_ops *dma_ops);
-extern struct dma_map_ops *get_pci_dma_ops(void);
+extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops);
+extern const struct dma_map_ops *get_pci_dma_ops(void);
 #else  /* CONFIG_PCI */
 #define set_pci_dma_ops(d)
 #define get_pci_dma_ops()      NULL
index a19f831a4cc9acdc6b0974e1d84240646983b0a6..17ee719e799fa470171b33f901b1b18bfffcffc2 100644 (file)
@@ -435,7 +435,7 @@ static inline void *ps3_system_bus_get_drvdata(
        return dev_get_drvdata(&dev->core);
 }
 
-/* These two need global scope for get_dma_ops(). */
+/* These two need global scope for get_arch_dma_ops(). */
 
 extern struct bus_type ps3_system_bus_type;
 
index de99d6e29430341623fd8a0f6ee3d3a4c4e1a768..01d45a5fd00b3dcfe7404f2bdbb32ba33bab86bc 100644 (file)
@@ -13,7 +13,7 @@
 
 #include <linux/swiotlb.h>
 
-extern struct dma_map_ops swiotlb_dma_ops;
+extern const struct dma_map_ops swiotlb_dma_ops;
 
 static inline void dma_mark_clean(void *addr, size_t size) {}
 
index c6689f658b50fe93db23bdacc7e50b4e2af85968..d0ea7860e02bb973ffa37d26e91a0254ba6fc3bd 100644 (file)
@@ -46,7 +46,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)
  * map_page, and unmap_page on highmem, use normal dma_ops
  * for everything else.
  */
-struct dma_map_ops swiotlb_dma_ops = {
+const struct dma_map_ops swiotlb_dma_ops = {
        .alloc = __dma_direct_alloc_coherent,
        .free = __dma_direct_free_coherent,
        .mmap = dma_direct_mmap_coherent,
index 6877e3fa95bbc2a3523c32e6437779721ee67e0b..41c749586bd2241d471d4f916eaefaab04a2e0c2 100644 (file)
@@ -33,7 +33,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev)
        struct dev_archdata __maybe_unused *sd = &dev->archdata;
 
 #ifdef CONFIG_SWIOTLB
-       if (sd->max_direct_dma_addr && sd->dma_ops == &swiotlb_dma_ops)
+       if (sd->max_direct_dma_addr && dev->dma_ops == &swiotlb_dma_ops)
                pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
 #endif
 
@@ -274,7 +274,7 @@ static inline void dma_direct_sync_single(struct device *dev,
 }
 #endif
 
-struct dma_map_ops dma_direct_ops = {
+const struct dma_map_ops dma_direct_ops = {
        .alloc                          = dma_direct_alloc_coherent,
        .free                           = dma_direct_free_coherent,
        .mmap                           = dma_direct_mmap_coherent,
@@ -316,7 +316,7 @@ EXPORT_SYMBOL(dma_set_coherent_mask);
 
 int __dma_set_mask(struct device *dev, u64 dma_mask)
 {
-       struct dma_map_ops *dma_ops = get_dma_ops(dev);
+       const struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
        if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
                return dma_ops->set_dma_mask(dev, dma_mask);
@@ -344,7 +344,7 @@ EXPORT_SYMBOL(dma_set_mask);
 
 u64 __dma_get_required_mask(struct device *dev)
 {
-       struct dma_map_ops *dma_ops = get_dma_ops(dev);
+       const struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
        if (unlikely(dma_ops == NULL))
                return 0;
index 74bec549897202e797b2f0c7de30ad7d83ed9cfb..8e6fde8d28f362649f61b859bcf99d20ec06a916 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
 #include <linux/mm.h>
+#include <linux/shmem_fs.h>
 #include <linux/list.h>
 #include <linux/syscalls.h>
 #include <linux/irq.h>
@@ -59,14 +60,14 @@ resource_size_t isa_mem_base;
 EXPORT_SYMBOL(isa_mem_base);
 
 
-static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
+static const struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
 
-void set_pci_dma_ops(struct dma_map_ops *dma_ops)
+void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
 {
        pci_dma_ops = dma_ops;
 }
 
-struct dma_map_ops *get_pci_dma_ops(void)
+const struct dma_map_ops *get_pci_dma_ops(void)
 {
        return pci_dma_ops;
 }
index 893bd7f79be682decd077a3e05a42e1bc7520452..573fb3a461b5d765da4908d8b04c7efd813b0e5e 100644 (file)
@@ -707,7 +707,7 @@ void start_secondary(void *unused)
        unsigned int cpu = smp_processor_id();
        int i, base;
 
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
        smp_store_cpu_info(cpu);
index 491c5d8120f75da870e8d030b1044ddfb9c8279d..ab9d14c0e4609ab727e37da83ec62c43cbddf977 100644 (file)
@@ -102,9 +102,9 @@ static void release_spapr_tce_table(struct rcu_head *head)
        kfree(stt);
 }
 
-static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kvm_spapr_tce_fault(struct vm_fault *vmf)
 {
-       struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
+       struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
        struct page *page;
 
        if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
index c42a7e63b39e6425b2d57a047a6ab6c90181d7b1..4d6c64b3041c3aa4f58d1a504ad868c27b6924df 100644 (file)
@@ -56,7 +56,8 @@ struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
 {
        VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
-       return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
+       return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES),
+                        GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
 
index 0899315e1434bba2d98f19f0a104af75e06ad276..0d3002b7e2b4e96cb177c2f4eb146fb3091f6da2 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/page.h>
 #include <asm/code-patching.h>
 #include <linux/uaccess.h>
+#include <linux/kprobes.h>
 
 
 int patch_instruction(unsigned int *addr, unsigned int instr)
index 7ff51f96a00e8222fd8720bb98deb0aae57458b6..71b995bbcae0ebeff195e9cb983cdc0128e9853a 100644 (file)
@@ -651,7 +651,7 @@ static int dma_fixed_dma_supported(struct device *dev, u64 mask)
 
 static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask);
 
-static struct dma_map_ops dma_iommu_fixed_ops = {
+static const struct dma_map_ops dma_iommu_fixed_ops = {
        .alloc          = dma_fixed_alloc_coherent,
        .free           = dma_fixed_free_coherent,
        .map_sg         = dma_fixed_map_sg,
@@ -692,7 +692,7 @@ static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
                return 0;
 
        /* We use the PCI DMA ops */
-       dev->archdata.dma_ops = get_pci_dma_ops();
+       dev->dma_ops = get_pci_dma_ops();
 
        cell_dma_dev_setup(dev);
 
@@ -1172,7 +1172,7 @@ __setup("iommu_fixed=", setup_iommu_fixed);
 
 static u64 cell_dma_get_required_mask(struct device *dev)
 {
-       struct dma_map_ops *dma_ops;
+       const struct dma_map_ops *dma_ops;
 
        if (!dev->dma_mask)
                return 0;
index a35e2c29d7eed3b0ee15f2e088e3c0d4aeb8b19a..e5ec1368f0cd7723a6f80c86fd5eadd516f8f6c0 100644 (file)
@@ -233,8 +233,9 @@ spufs_mem_write(struct file *file, const char __user *buffer,
 }
 
 static int
-spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_mem_mmap_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct spu_context *ctx = vma->vm_file->private_data;
        unsigned long pfn, offset;
 
@@ -311,12 +312,11 @@ static const struct file_operations spufs_mem_fops = {
        .mmap                   = spufs_mem_mmap,
 };
 
-static int spufs_ps_fault(struct vm_area_struct *vma,
-                                   struct vm_fault *vmf,
+static int spufs_ps_fault(struct vm_fault *vmf,
                                    unsigned long ps_offs,
                                    unsigned long ps_size)
 {
-       struct spu_context *ctx = vma->vm_file->private_data;
+       struct spu_context *ctx = vmf->vma->vm_file->private_data;
        unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
        int ret = 0;
 
@@ -354,7 +354,7 @@ static int spufs_ps_fault(struct vm_area_struct *vma,
                down_read(&current->mm->mmap_sem);
        } else {
                area = ctx->spu->problem_phys + ps_offs;
-               vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
+               vm_insert_pfn(vmf->vma, vmf->address, (area + offset) >> PAGE_SHIFT);
                spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
        }
 
@@ -367,10 +367,9 @@ refault:
 }
 
 #if SPUFS_MMAP_4K
-static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
-                                          struct vm_fault *vmf)
+static int spufs_cntl_mmap_fault(struct vm_fault *vmf)
 {
-       return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+       return spufs_ps_fault(vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
 }
 
 static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
@@ -1067,15 +1066,15 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
 }
 
 static int
-spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_signal1_mmap_fault(struct vm_fault *vmf)
 {
 #if SPUFS_SIGNAL_MAP_SIZE == 0x1000
-       return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+       return spufs_ps_fault(vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
 #elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
        /* For 64k pages, both signal1 and signal2 can be used to mmap the whole
         * signal 1 and 2 area
         */
-       return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+       return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
 #else
 #error unsupported page size
 #endif
@@ -1205,15 +1204,15 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
 
 #if SPUFS_MMAP_4K
 static int
-spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_signal2_mmap_fault(struct vm_fault *vmf)
 {
 #if SPUFS_SIGNAL_MAP_SIZE == 0x1000
-       return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+       return spufs_ps_fault(vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
 #elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
        /* For 64k pages, both signal1 and signal2 can be used to mmap the whole
         * signal 1 and 2 area
         */
-       return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+       return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
 #else
 #error unsupported page size
 #endif
@@ -1334,9 +1333,9 @@ DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
 
 #if SPUFS_MMAP_4K
 static int
-spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_mss_mmap_fault(struct vm_fault *vmf)
 {
-       return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+       return spufs_ps_fault(vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
 }
 
 static const struct vm_operations_struct spufs_mss_mmap_vmops = {
@@ -1396,9 +1395,9 @@ static const struct file_operations spufs_mss_fops = {
 };
 
 static int
-spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_psmap_mmap_fault(struct vm_fault *vmf)
 {
-       return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+       return spufs_ps_fault(vmf, 0x0000, SPUFS_PS_MAP_SIZE);
 }
 
 static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
@@ -1456,9 +1455,9 @@ static const struct file_operations spufs_psmap_fops = {
 
 #if SPUFS_MMAP_4K
 static int
-spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_mfc_mmap_fault(struct vm_fault *vmf)
 {
-       return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+       return spufs_ps_fault(vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
 }
 
 static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
index e74adc4e7fd8ead2b6a8b9eb8bcf5a51be694041..7fec04de27fc0f15991690d05d0a72fccf61f385 100644 (file)
@@ -186,7 +186,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
         */
        if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
            !firmware_has_feature(FW_FEATURE_LPAR)) {
-               dev->dev.archdata.dma_ops = &dma_direct_ops;
+               dev->dev.dma_ops = &dma_direct_ops;
                /*
                 * Set the coherent DMA mask to prevent the iommu
                 * being used unnecessarily
index 3182400cf48ff181dafa5fc11ac49dfd8e3a7f5d..c4a3e93dc324a252ffbf1e20044fbab6cd426c06 100644 (file)
@@ -363,7 +363,7 @@ static int pcmcia_notify(struct notifier_block *nb, unsigned long action,
                return 0;
 
        /* We use the direct ops for localbus */
-       dev->archdata.dma_ops = &dma_direct_ops;
+       dev->dma_ops = &dma_direct_ops;
 
        return 0;
 }
index 73b155fd4481595d6763929f76ea8df344e3b6b2..1c383f38031dfe3a9db91e6394f47cd0699f1d83 100644 (file)
@@ -115,7 +115,7 @@ static u64 dma_npu_get_required_mask(struct device *dev)
        return 0;
 }
 
-static struct dma_map_ops dma_npu_ops = {
+static const struct dma_map_ops dma_npu_ops = {
        .map_page               = dma_npu_map_page,
        .map_sg                 = dma_npu_map_sg,
        .alloc                  = dma_npu_alloc,
index 8278f43ad4b8f30be7abbf7d7e8c541b9af6f069..e0f83c204ccc79f22cc591c76d9b23387b3ef210 100644 (file)
@@ -3034,7 +3034,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
 /*
  * This function is supposed to be called on basis of PE from top
  * to bottom style. So the the I/O or MMIO segment assigned to
- * parent PE could be overrided by its child PEs if necessary.
+ * parent PE could be overridden by its child PEs if necessary.
  */
 static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
 {
index 8af1c15aef85f585e73a2b1b327d3c9d35e7872e..2d2e5f80a3d3fd3854c1774e89113bef4c7fcb6d 100644 (file)
@@ -701,7 +701,7 @@ static u64 ps3_dma_get_required_mask(struct device *_dev)
        return DMA_BIT_MASK(32);
 }
 
-static struct dma_map_ops ps3_sb_dma_ops = {
+static const struct dma_map_ops ps3_sb_dma_ops = {
        .alloc = ps3_alloc_coherent,
        .free = ps3_free_coherent,
        .map_sg = ps3_sb_map_sg,
@@ -712,7 +712,7 @@ static struct dma_map_ops ps3_sb_dma_ops = {
        .unmap_page = ps3_unmap_page,
 };
 
-static struct dma_map_ops ps3_ioc0_dma_ops = {
+static const struct dma_map_ops ps3_ioc0_dma_ops = {
        .alloc = ps3_alloc_coherent,
        .free = ps3_free_coherent,
        .map_sg = ps3_ioc0_map_sg,
@@ -756,11 +756,11 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev)
 
        switch (dev->dev_type) {
        case PS3_DEVICE_TYPE_IOC0:
-               dev->core.archdata.dma_ops = &ps3_ioc0_dma_ops;
+               dev->core.dma_ops = &ps3_ioc0_dma_ops;
                dev_set_name(&dev->core, "ioc0_%02x", ++dev_ioc0_count);
                break;
        case PS3_DEVICE_TYPE_SB:
-               dev->core.archdata.dma_ops = &ps3_sb_dma_ops;
+               dev->core.dma_ops = &ps3_sb_dma_ops;
                dev_set_name(&dev->core, "sb_%02x", ++dev_sb_count);
 
                break;
index 614c2853714166e125f7206f8ad47fc0db76096f..99a6bf7f3bcf3508f6a452f9512757bd6649aeb8 100644 (file)
@@ -136,7 +136,7 @@ static u64 ibmebus_dma_get_required_mask(struct device *dev)
        return DMA_BIT_MASK(64);
 }
 
-static struct dma_map_ops ibmebus_dma_ops = {
+static const struct dma_map_ops ibmebus_dma_ops = {
        .alloc              = ibmebus_alloc_coherent,
        .free               = ibmebus_free_coherent,
        .map_sg             = ibmebus_map_sg,
@@ -169,7 +169,7 @@ static int ibmebus_create_device(struct device_node *dn)
                return -ENOMEM;
 
        dev->dev.bus = &ibmebus_bus_type;
-       dev->dev.archdata.dma_ops = &ibmebus_dma_ops;
+       dev->dev.dma_ops = &ibmebus_dma_ops;
 
        ret = of_device_add(dev);
        if (ret)
index 0024e451bb36f8ef7665835180d34a5e2807679b..4d757eaa46bf70cf2d1b757e6abe987fa7aa5647 100644 (file)
@@ -1020,7 +1020,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
        /* check largest block * page size > max memory hotplug addr */
        max_addr = memory_hotplug_max();
        if (query.largest_available_block < (max_addr >> page_shift)) {
-               dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
+               dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
                          "%llu-sized pages\n", max_addr,  query.largest_available_block,
                          1ULL << page_shift);
                goto out_failed;
index 2c8fb3ec989e08b98cbd0835c9674efdc0d90044..7204939324863566ef98ea6d387248a362cd1a87 100644 (file)
@@ -615,7 +615,7 @@ static u64 vio_dma_get_required_mask(struct device *dev)
         return dma_iommu_ops.get_required_mask(dev);
 }
 
-static struct dma_map_ops vio_dma_mapping_ops = {
+static const struct dma_map_ops vio_dma_mapping_ops = {
        .alloc             = vio_dma_iommu_alloc_coherent,
        .free              = vio_dma_iommu_free_coherent,
        .mmap              = dma_direct_mmap_coherent,
index 6845e91ba04a5ef8c48265de04391f6da9adf547..954dbf8222d71098f95d21aa4ce98184199cef79 100644 (file)
@@ -1587,7 +1587,7 @@ extract_tbr (unsigned long insn,
 #define CTX(op, xop)   (OP (op) | (((unsigned long)(xop)) & 0x7))
 #define CTX_MASK CTX(0x3f, 0x7)
 
-/* An User Context form instruction.  */
+/* A User Context form instruction.  */
 #define UCTX(op, xop)  (OP (op) | (((unsigned long)(xop)) & 0x1f))
 #define UCTX_MASK UCTX(0x3f, 0x1f)
 
index 2ef031bee7ab38324c2a426d2d7719d227d7dfc1..a2dcef0aacc76db2d318e7b8785f9f8dadbca02d 100644 (file)
@@ -134,9 +134,11 @@ config S390
        select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
        select HAVE_CMPXCHG_DOUBLE
        select HAVE_CMPXCHG_LOCAL
+       select HAVE_COPY_THREAD_TLS
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_API_DEBUG
        select HAVE_DMA_CONTIGUOUS
+       select DMA_NOOP_OPS
        select HAVE_DYNAMIC_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_REGS
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
index e00975361fec00fb89ad916f1b8a15539a449a6c..143b1e00b818493f4cb683c251e1d90ef6a5aa9e 100644 (file)
@@ -678,6 +678,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
index 2cf87343b59030f76267e47dc88672e536bf6e9b..2358bf33c5efcf2790643f0b8bbd2a8c80a2fc8f 100644 (file)
@@ -628,6 +628,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
index d1033de4c4ee04be807b326175e893dda169f99b..402c530c6da50d9bc724dcbcfa8c35c8085adef5 100644 (file)
@@ -6,7 +6,7 @@ obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
-obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
+obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
 obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
new file mode 100644 (file)
index 0000000..d69ea49
--- /dev/null
@@ -0,0 +1,619 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the AES Cipher Algorithm with protected keys.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2017
+ *   Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *             Harald Freudenberger <freude@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ */
+
+#define KMSG_COMPONENT "paes_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <crypto/xts.h>
+#include <asm/cpacf.h>
+#include <asm/pkey.h>
+
+static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+
+struct s390_paes_ctx {
+       struct pkey_seckey sk;
+       struct pkey_protkey pk;
+       unsigned long fc;
+};
+
+struct s390_pxts_ctx {
+       struct pkey_seckey sk[2];
+       struct pkey_protkey pk[2];
+       unsigned long fc;
+};
+
+static inline int __paes_convert_key(struct pkey_seckey *sk,
+                                    struct pkey_protkey *pk)
+{
+       int i, ret;
+
+       /* try three times in case of failure */
+       for (i = 0; i < 3; i++) {
+               ret = pkey_skey2pkey(sk, pk);
+               if (ret == 0)
+                       break;
+       }
+
+       return ret;
+}
+
+static int __paes_set_key(struct s390_paes_ctx *ctx)
+{
+       unsigned long fc;
+
+       if (__paes_convert_key(&ctx->sk, &ctx->pk))
+               return -EINVAL;
+
+       /* Pick the correct function code based on the protected key type */
+       fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 :
+               (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 :
+               (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0;
+
+       /* Check if the function code is available */
+       ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+       return ctx->fc ? 0 : -EINVAL;
+}
+
+static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+                           unsigned int key_len)
+{
+       struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       if (key_len != SECKEYBLOBSIZE)
+               return -EINVAL;
+
+       memcpy(ctx->sk.seckey, in_key, SECKEYBLOBSIZE);
+       if (__paes_set_key(ctx)) {
+               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int ecb_paes_crypt(struct blkcipher_desc *desc,
+                         unsigned long modifier,
+                         struct blkcipher_walk *walk)
+{
+       struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       unsigned int nbytes, n, k;
+       int ret;
+
+       ret = blkcipher_walk_virt(desc, walk);
+       while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+               /* only use complete blocks */
+               n = nbytes & ~(AES_BLOCK_SIZE - 1);
+               k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey,
+                            walk->dst.virt.addr, walk->src.virt.addr, n);
+               if (k)
+                       ret = blkcipher_walk_done(desc, walk, nbytes - k);
+               if (k < n) {
+                       if (__paes_set_key(ctx) != 0)
+                               return blkcipher_walk_done(desc, walk, -EIO);
+               }
+       }
+       return ret;
+}
+
+static int ecb_paes_encrypt(struct blkcipher_desc *desc,
+                           struct scatterlist *dst, struct scatterlist *src,
+                           unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk);
+}
+
+static int ecb_paes_decrypt(struct blkcipher_desc *desc,
+                           struct scatterlist *dst, struct scatterlist *src,
+                           unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg ecb_paes_alg = {
+       .cra_name               =       "ecb(paes)",
+       .cra_driver_name        =       "ecb-paes-s390",
+       .cra_priority           =       400,    /* combo: aes + ecb */
+       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          =       AES_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct s390_paes_ctx),
+       .cra_type               =       &crypto_blkcipher_type,
+       .cra_module             =       THIS_MODULE,
+       .cra_list               =       LIST_HEAD_INIT(ecb_paes_alg.cra_list),
+       .cra_u                  =       {
+               .blkcipher = {
+                       .min_keysize            =       SECKEYBLOBSIZE,
+                       .max_keysize            =       SECKEYBLOBSIZE,
+                       .setkey                 =       ecb_paes_set_key,
+                       .encrypt                =       ecb_paes_encrypt,
+                       .decrypt                =       ecb_paes_decrypt,
+               }
+       }
+};
+
+static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+{
+       unsigned long fc;
+
+       if (__paes_convert_key(&ctx->sk, &ctx->pk))
+               return -EINVAL;
+
+       /* Pick the correct function code based on the protected key type */
+       fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 :
+               (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 :
+               (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0;
+
+       /* Check if the function code is available */
+       ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+
+       return ctx->fc ? 0 : -EINVAL;
+}
+
+static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+                           unsigned int key_len)
+{
+       struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       memcpy(ctx->sk.seckey, in_key, SECKEYBLOBSIZE);
+       if (__cbc_paes_set_key(ctx)) {
+               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+                         struct blkcipher_walk *walk)
+{
+       struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       unsigned int nbytes, n, k;
+       int ret;
+       struct {
+               u8 iv[AES_BLOCK_SIZE];
+               u8 key[MAXPROTKEYSIZE];
+       } param;
+
+       ret = blkcipher_walk_virt(desc, walk);
+       memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+       memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+       while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+               /* only use complete blocks */
+               n = nbytes & ~(AES_BLOCK_SIZE - 1);
+               k = cpacf_kmc(ctx->fc | modifier, &param,
+                             walk->dst.virt.addr, walk->src.virt.addr, n);
+               if (k)
+                       ret = blkcipher_walk_done(desc, walk, nbytes - k);
+               if (n < k) {
+                       if (__cbc_paes_set_key(ctx) != 0)
+                               return blkcipher_walk_done(desc, walk, -EIO);
+                       memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+               }
+       }
+       memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
+       return ret;
+}
+
+static int cbc_paes_encrypt(struct blkcipher_desc *desc,
+                           struct scatterlist *dst, struct scatterlist *src,
+                           unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       return cbc_paes_crypt(desc, 0, &walk);
+}
+
+static int cbc_paes_decrypt(struct blkcipher_desc *desc,
+                           struct scatterlist *dst, struct scatterlist *src,
+                           unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg cbc_paes_alg = {
+       .cra_name               =       "cbc(paes)",
+       .cra_driver_name        =       "cbc-paes-s390",
+       .cra_priority           =       400,    /* combo: aes + cbc */
+       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          =       AES_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct s390_paes_ctx),
+       .cra_type               =       &crypto_blkcipher_type,
+       .cra_module             =       THIS_MODULE,
+       .cra_list               =       LIST_HEAD_INIT(cbc_paes_alg.cra_list),
+       .cra_u                  =       {
+               .blkcipher = {
+                       .min_keysize            =       SECKEYBLOBSIZE,
+                       .max_keysize            =       SECKEYBLOBSIZE,
+                       .ivsize                 =       AES_BLOCK_SIZE,
+                       .setkey                 =       cbc_paes_set_key,
+                       .encrypt                =       cbc_paes_encrypt,
+                       .decrypt                =       cbc_paes_decrypt,
+               }
+       }
+};
+
+static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+{
+       unsigned long fc;
+
+       if (__paes_convert_key(&ctx->sk[0], &ctx->pk[0]) ||
+           __paes_convert_key(&ctx->sk[1], &ctx->pk[1]))
+               return -EINVAL;
+
+       if (ctx->pk[0].type != ctx->pk[1].type)
+               return -EINVAL;
+
+       /* Pick the correct function code based on the protected key type */
+       fc = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PXTS_128 :
+               (ctx->pk[0].type == PKEY_KEYTYPE_AES_256) ?
+               CPACF_KM_PXTS_256 : 0;
+
+       /* Check if the function code is available */
+       ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+       return ctx->fc ? 0 : -EINVAL;
+}
+
+static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+                           unsigned int key_len)
+{
+       struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+       u8 ckey[2 * AES_MAX_KEY_SIZE];
+       unsigned int ckey_len;
+
+       memcpy(ctx->sk[0].seckey, in_key, SECKEYBLOBSIZE);
+       memcpy(ctx->sk[1].seckey, in_key + SECKEYBLOBSIZE, SECKEYBLOBSIZE);
+       if (__xts_paes_set_key(ctx)) {
+               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+
+       /*
+        * xts_check_key verifies the key length is not odd and makes
+        * sure that the two keys are not the same. This can be done
+        * on the two protected keys as well
+        */
+       ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
+               AES_KEYSIZE_128 : AES_KEYSIZE_256;
+       memcpy(ckey, ctx->pk[0].protkey, ckey_len);
+       memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
+       return xts_check_key(tfm, ckey, 2*ckey_len);
+}
+
+static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+                         struct blkcipher_walk *walk)
+{
+       struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       unsigned int keylen, offset, nbytes, n, k;
+       int ret;
+       struct {
+               u8 key[MAXPROTKEYSIZE]; /* key + verification pattern */
+               u8 tweak[16];
+               u8 block[16];
+               u8 bit[16];
+               u8 xts[16];
+       } pcc_param;
+       struct {
+               u8 key[MAXPROTKEYSIZE]; /* key + verification pattern */
+               u8 init[16];
+       } xts_param;
+
+       ret = blkcipher_walk_virt(desc, walk);
+       keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
+       offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
+retry:
+       memset(&pcc_param, 0, sizeof(pcc_param));
+       memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+       memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
+       cpacf_pcc(ctx->fc, pcc_param.key + offset);
+
+       memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
+       memcpy(xts_param.init, pcc_param.xts, 16);
+
+       while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+               /* only use complete blocks */
+               n = nbytes & ~(AES_BLOCK_SIZE - 1);
+               k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
+                            walk->dst.virt.addr, walk->src.virt.addr, n);
+               if (k)
+                       ret = blkcipher_walk_done(desc, walk, nbytes - k);
+               if (k < n) {
+                       if (__xts_paes_set_key(ctx) != 0)
+                               return blkcipher_walk_done(desc, walk, -EIO);
+                       goto retry;
+               }
+       }
+       return ret;
+}
+
+static int xts_paes_encrypt(struct blkcipher_desc *desc,
+                           struct scatterlist *dst, struct scatterlist *src,
+                           unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       return xts_paes_crypt(desc, 0, &walk);
+}
+
+static int xts_paes_decrypt(struct blkcipher_desc *desc,
+                           struct scatterlist *dst, struct scatterlist *src,
+                           unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       return xts_paes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg xts_paes_alg = {
+       .cra_name               =       "xts(paes)",
+       .cra_driver_name        =       "xts-paes-s390",
+       .cra_priority           =       400,    /* combo: aes + xts */
+       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          =       AES_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct s390_pxts_ctx),
+       .cra_type               =       &crypto_blkcipher_type,
+       .cra_module             =       THIS_MODULE,
+       .cra_list               =       LIST_HEAD_INIT(xts_paes_alg.cra_list),
+       .cra_u                  =       {
+               .blkcipher = {
+                       .min_keysize            =       2 * SECKEYBLOBSIZE,
+                       .max_keysize            =       2 * SECKEYBLOBSIZE,
+                       .ivsize                 =       AES_BLOCK_SIZE,
+                       .setkey                 =       xts_paes_set_key,
+                       .encrypt                =       xts_paes_encrypt,
+                       .decrypt                =       xts_paes_decrypt,
+               }
+       }
+};
+
+static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+{
+       unsigned long fc;
+
+       if (__paes_convert_key(&ctx->sk, &ctx->pk))
+               return -EINVAL;
+
+       /* Pick the correct function code based on the protected key type */
+       fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 :
+               (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 :
+               (ctx->pk.type == PKEY_KEYTYPE_AES_256) ?
+               CPACF_KMCTR_PAES_256 : 0;
+
+       /* Check if the function code is available */
+       ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+
+       return ctx->fc ? 0 : -EINVAL;
+}
+
+static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+                           unsigned int key_len)
+{
+       struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       memcpy(ctx->sk.seckey, in_key, key_len);
+       if (__ctr_paes_set_key(ctx)) {
+               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+       unsigned int i, n;
+
+       /* only use complete blocks, max. PAGE_SIZE */
+       memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+       n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+       for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+               memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+               crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+               ctrptr += AES_BLOCK_SIZE;
+       }
+       return n;
+}
+
+static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+                         struct blkcipher_walk *walk)
+{
+       struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       u8 buf[AES_BLOCK_SIZE], *ctrptr;
+       unsigned int nbytes, n, k;
+       int ret, locked;
+
+       locked = spin_trylock(&ctrblk_lock);
+
+       ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+       while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+               n = AES_BLOCK_SIZE;
+               if (nbytes >= 2*AES_BLOCK_SIZE && locked)
+                       n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+               ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+               k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey,
+                               walk->dst.virt.addr, walk->src.virt.addr,
+                               n, ctrptr);
+               if (k) {
+                       if (ctrptr == ctrblk)
+                               memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+                                      AES_BLOCK_SIZE);
+                       crypto_inc(walk->iv, AES_BLOCK_SIZE);
+                       ret = blkcipher_walk_done(desc, walk, nbytes - n);
+               }
+               if (k < n) {
+                       if (__ctr_paes_set_key(ctx) != 0)
+                               return blkcipher_walk_done(desc, walk, -EIO);
+               }
+       }
+       if (locked)
+               spin_unlock(&ctrblk_lock);
+       /*
+        * final block may be < AES_BLOCK_SIZE, copy only nbytes
+        */
+       if (nbytes) {
+               while (1) {
+                       if (cpacf_kmctr(ctx->fc | modifier,
+                                       ctx->pk.protkey, buf,
+                                       walk->src.virt.addr, AES_BLOCK_SIZE,
+                                       walk->iv) == AES_BLOCK_SIZE)
+                               break;
+                       if (__ctr_paes_set_key(ctx) != 0)
+                               return blkcipher_walk_done(desc, walk, -EIO);
+               }
+               memcpy(walk->dst.virt.addr, buf, nbytes);
+               crypto_inc(walk->iv, AES_BLOCK_SIZE);
+               ret = blkcipher_walk_done(desc, walk, 0);
+       }
+
+       return ret;
+}
+
+static int ctr_paes_encrypt(struct blkcipher_desc *desc,
+                           struct scatterlist *dst, struct scatterlist *src,
+                           unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       return ctr_paes_crypt(desc, 0, &walk);
+}
+
+static int ctr_paes_decrypt(struct blkcipher_desc *desc,
+                           struct scatterlist *dst, struct scatterlist *src,
+                           unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg ctr_paes_alg = {
+       .cra_name               =       "ctr(paes)",
+       .cra_driver_name        =       "ctr-paes-s390",
+       .cra_priority           =       400,    /* combo: aes + ctr */
+       .cra_flags              =       CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          =       1,
+       .cra_ctxsize            =       sizeof(struct s390_paes_ctx),
+       .cra_type               =       &crypto_blkcipher_type,
+       .cra_module             =       THIS_MODULE,
+       .cra_list               =       LIST_HEAD_INIT(ctr_paes_alg.cra_list),
+       .cra_u                  =       {
+               .blkcipher = {
+                       .min_keysize            =       SECKEYBLOBSIZE,
+                       .max_keysize            =       SECKEYBLOBSIZE,
+                       .ivsize                 =       AES_BLOCK_SIZE,
+                       .setkey                 =       ctr_paes_set_key,
+                       .encrypt                =       ctr_paes_encrypt,
+                       .decrypt                =       ctr_paes_decrypt,
+               }
+       }
+};
+
+static inline void __crypto_unregister_alg(struct crypto_alg *alg)
+{
+       if (!list_empty(&alg->cra_list))
+               crypto_unregister_alg(alg);
+}
+
+static void paes_s390_fini(void)
+{
+       if (ctrblk)
+               free_page((unsigned long) ctrblk);
+       __crypto_unregister_alg(&ctr_paes_alg);
+       __crypto_unregister_alg(&xts_paes_alg);
+       __crypto_unregister_alg(&cbc_paes_alg);
+       __crypto_unregister_alg(&ecb_paes_alg);
+}
+
+static int __init paes_s390_init(void)
+{
+       int ret;
+
+       /* Query available functions for KM, KMC and KMCTR */
+       cpacf_query(CPACF_KM, &km_functions);
+       cpacf_query(CPACF_KMC, &kmc_functions);
+       cpacf_query(CPACF_KMCTR, &kmctr_functions);
+
+       if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
+           cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
+           cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
+               ret = crypto_register_alg(&ecb_paes_alg);
+               if (ret)
+                       goto out_err;
+       }
+
+       if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
+           cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
+           cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
+               ret = crypto_register_alg(&cbc_paes_alg);
+               if (ret)
+                       goto out_err;
+       }
+
+       if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
+           cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
+               ret = crypto_register_alg(&xts_paes_alg);
+               if (ret)
+                       goto out_err;
+       }
+
+       if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
+           cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
+           cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
+               ret = crypto_register_alg(&ctr_paes_alg);
+               if (ret)
+                       goto out_err;
+               ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+               if (!ctrblk) {
+                       ret = -ENOMEM;
+                       goto out_err;
+               }
+       }
+
+       return 0;
+out_err:
+       paes_s390_fini();
+       return ret;
+}
+
+module_init(paes_s390_init);
+module_exit(paes_s390_fini);
+
+MODULE_ALIAS_CRYPTO("aes-all");
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys");
+MODULE_LICENSE("GPL");
index d00e368fb5e6ef949b6fb070321fa3a6dc9c8501..68bfd09f1b02ec23dad7ba4931db828f1286d890 100644 (file)
@@ -229,6 +229,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
index 2c680db7e5c10a20f9182c1232032e9db9f54740..e2dfbf280d124aefefb6e82247820c23d8268c74 100644 (file)
@@ -28,8 +28,9 @@
 #define CPACF_PPNO             0xb93c          /* MSA5 */
 
 /*
- * Decryption modifier bit
+ * En/decryption modifier bits
  */
+#define CPACF_ENCRYPT          0x00
 #define CPACF_DECRYPT          0x80
 
 /*
 #define CPACF_KM_AES_128       0x12
 #define CPACF_KM_AES_192       0x13
 #define CPACF_KM_AES_256       0x14
+#define CPACF_KM_PAES_128      0x1a
+#define CPACF_KM_PAES_192      0x1b
+#define CPACF_KM_PAES_256      0x1c
 #define CPACF_KM_XTS_128       0x32
 #define CPACF_KM_XTS_256       0x34
+#define CPACF_KM_PXTS_128      0x3a
+#define CPACF_KM_PXTS_256      0x3c
 
 /*
  * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
@@ -56,6 +62,9 @@
 #define CPACF_KMC_AES_128      0x12
 #define CPACF_KMC_AES_192      0x13
 #define CPACF_KMC_AES_256      0x14
+#define CPACF_KMC_PAES_128     0x1a
+#define CPACF_KMC_PAES_192     0x1b
+#define CPACF_KMC_PAES_256     0x1c
 #define CPACF_KMC_PRNG         0x43
 
 /*
@@ -69,6 +78,9 @@
 #define CPACF_KMCTR_AES_128    0x12
 #define CPACF_KMCTR_AES_192    0x13
 #define CPACF_KMCTR_AES_256    0x14
+#define CPACF_KMCTR_PAES_128   0x1a
+#define CPACF_KMCTR_PAES_192   0x1b
+#define CPACF_KMCTR_PAES_256   0x1c
 
 /*
  * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
 #define CPACF_KMAC_TDEA_128    0x02
 #define CPACF_KMAC_TDEA_192    0x03
 
+/*
+ * Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT)
+ * instruction
+ */
+#define CPACF_PCKMO_QUERY              0x00
+#define CPACF_PCKMO_ENC_DES_KEY                0x01
+#define CPACF_PCKMO_ENC_TDES_128_KEY   0x02
+#define CPACF_PCKMO_ENC_TDES_192_KEY   0x03
+#define CPACF_PCKMO_ENC_AES_128_KEY    0x12
+#define CPACF_PCKMO_ENC_AES_192_KEY    0x13
+#define CPACF_PCKMO_ENC_AES_256_KEY    0x14
+
 /*
  * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
  * instruction
@@ -397,4 +421,24 @@ static inline void cpacf_pcc(unsigned long func, void *param)
                : "cc", "memory");
 }
 
+/**
+ * cpacf_pckmo() - executes the PCKMO (PERFORM CRYPTOGRAPHIC KEY
+ *               MANAGEMENT) instruction
+ * @func: the function code passed to PCKMO; see CPACF_PCKMO_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Returns 0.
+ */
+static inline void cpacf_pckmo(long func, void *param)
+{
+       register unsigned long r0 asm("0") = (unsigned long) func;
+       register unsigned long r1 asm("1") = (unsigned long) param;
+
+       asm volatile(
+               "       .insn   rre,%[opc] << 16,0,0\n" /* PCKMO opcode */
+               :
+               : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCKMO)
+               : "cc", "memory");
+}
+
 #endif /* _ASM_S390_CPACF_H */
index 4a9f35e0973ff166956c123d594fd24c7dfafc92..5203fc87f080edaa854d685f7a7b3126d4151d66 100644 (file)
@@ -4,7 +4,6 @@
  * This file is released under the GPLv2
  */
 struct dev_archdata {
-       struct dma_map_ops *dma_ops;
 };
 
 struct pdev_archdata {
index ffaba07f50ab5e583c76ee87a8c69ef5e48a8618..3108b8dbe266a52c145f7b03fd5ad9ece5bb83b2 100644 (file)
 
 #define DMA_ERROR_CODE         (~(dma_addr_t) 0x0)
 
-extern struct dma_map_ops s390_pci_dma_ops;
+extern const struct dma_map_ops s390_pci_dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-       if (dev && dev->archdata.dma_ops)
-               return dev->archdata.dma_ops;
        return &dma_noop_ops;
 }
 
index 591e5a5279b05e46395ab949cb9bfb88ce92b6d3..84c0f908648366cd5acad1db31deaa66d50cec9c 100644 (file)
  * 2005-Dec    Used as a template for s390 by Mike Grundy
  *             <grundym@us.ibm.com>
  */
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0x0002
+
+#ifdef CONFIG_KPROBES
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
@@ -37,7 +42,6 @@ struct pt_regs;
 struct kprobe;
 
 typedef u16 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0x0002
 
 /* Maximum instruction size is 3 (16bit) halfwords: */
 #define MAX_INSN_SIZE          0x0003
@@ -91,4 +95,5 @@ int probe_is_insn_relative_long(u16 *insn);
 
 #define flush_insn_slot(p)     do { } while (0)
 
+#endif /* CONFIG_KPROBES */
 #endif /* _ASM_S390_KPROBES_H */
index 67f7a991c929bb92731c6aafeef4a1255d69c959..9b828c073176dbf19658ffea7936544629d67777 100644 (file)
@@ -63,7 +63,7 @@ static inline void set_user_asce(struct mm_struct *mm)
        S390_lowcore.user_asce = mm->context.asce;
        if (current->thread.mm_segment.ar4)
                __ctl_load(S390_lowcore.user_asce, 7, 7);
-       set_cpu_flag(CIF_ASCE);
+       set_cpu_flag(CIF_ASCE_PRIMARY);
 }
 
 static inline void clear_user_asce(void)
@@ -81,7 +81,7 @@ static inline void load_kernel_asce(void)
        __ctl_store(asce, 1, 1);
        if (asce != S390_lowcore.kernel_asce)
                __ctl_load(S390_lowcore.kernel_asce, 1, 1);
-       set_cpu_flag(CIF_ASCE);
+       set_cpu_flag(CIF_ASCE_PRIMARY);
 }
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
index 52511866fb14d1fb3a66cd609cdd968adfb091ec..7ed1972b1920eb45e8544f5b495db75bafa20636 100644 (file)
@@ -640,12 +640,12 @@ static inline int pud_bad(pud_t pud)
 
 static inline int pmd_present(pmd_t pmd)
 {
-       return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
+       return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
 }
 
 static inline int pmd_none(pmd_t pmd)
 {
-       return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
+       return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
@@ -803,7 +803,7 @@ static inline void pud_clear(pud_t *pud)
 
 static inline void pmd_clear(pmd_t *pmdp)
 {
-       pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
+       pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
 }
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -1357,7 +1357,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
                                            unsigned long addr, pmd_t *pmdp)
 {
-       return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
+       return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
 }
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
@@ -1367,10 +1367,10 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
 {
        if (full) {
                pmd_t pmd = *pmdp;
-               *pmdp = __pmd(_SEGMENT_ENTRY_INVALID);
+               *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
                return pmd;
        }
-       return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
+       return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
 }
 
 #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
@@ -1384,7 +1384,7 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
 static inline void pmdp_invalidate(struct vm_area_struct *vma,
                                   unsigned long addr, pmd_t *pmdp)
 {
-       pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
+       pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
 }
 
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
new file mode 100644 (file)
index 0000000..b48aef4
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Kernelspace interface to the pkey device driver
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _KAPI_PKEY_H
+#define _KAPI_PKEY_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <uapi/asm/pkey.h>
+
+/*
+ * Generate (AES) random secure key.
+ * @param cardnr may be -1 (use default card)
+ * @param domain may be -1 (use default domain)
+ * @param keytype one of the PKEY_KEYTYPE values
+ * @param seckey pointer to buffer receiving the secure key
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_genseckey(__u16 cardnr, __u16 domain,
+                  __u32 keytype, struct pkey_seckey *seckey);
+
+/*
+ * Generate (AES) secure key with given key value.
+ * @param cardnr may be -1 (use default card)
+ * @param domain may be -1 (use default domain)
+ * @param keytype one of the PKEY_KEYTYPE values
+ * @param clrkey pointer to buffer with clear key data
+ * @param seckey pointer to buffer receiving the secure key
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_clr2seckey(__u16 cardnr, __u16 domain, __u32 keytype,
+                   const struct pkey_clrkey *clrkey,
+                   struct pkey_seckey *seckey);
+
+/*
+ * Derive (AES) proteced key from the (AES) secure key blob.
+ * @param cardnr may be -1 (use default card)
+ * @param domain may be -1 (use default domain)
+ * @param seckey pointer to buffer with the input secure key
+ * @param protkey pointer to buffer receiving the protected key and
+ *       additional info (type, length)
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_sec2protkey(__u16 cardnr, __u16 domain,
+                    const struct pkey_seckey *seckey,
+                    struct pkey_protkey *protkey);
+
+/*
+ * Derive (AES) protected key from a given clear key value.
+ * @param keytype one of the PKEY_KEYTYPE values
+ * @param clrkey pointer to buffer with clear key data
+ * @param protkey pointer to buffer receiving the protected key and
+ *       additional info (type, length)
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_clr2protkey(__u32 keytype,
+                    const struct pkey_clrkey *clrkey,
+                    struct pkey_protkey *protkey);
+
+/*
+ * Search for a matching crypto card based on the Master Key
+ * Verification Pattern provided inside a secure key.
+ * @param seckey pointer to buffer with the input secure key
+ * @param cardnr pointer to cardnr, receives the card number on success
+ * @param domain pointer to domain, receives the domain number on success
+ * @param verify if set, always verify by fetching verification pattern
+ *       from card
+ * @return 0 on success, negative errno value on failure. If no card could be
+ *        found, -ENODEV is returned.
+ */
+int pkey_findcard(const struct pkey_seckey *seckey,
+                 __u16 *cardnr, __u16 *domain, int verify);
+
+/*
+ * Find card and transform secure key to protected key.
+ * @param seckey pointer to buffer with the input secure key
+ * @param protkey pointer to buffer receiving the protected key and
+ *       additional info (type, length)
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_skey2pkey(const struct pkey_seckey *seckey,
+                  struct pkey_protkey *protkey);
+
+#endif /* _KAPI_PKEY_H */
index dacba341e47594463a1ed12c44f21879e9d1a0a2..e4988710aa86219a2e4e7e0d9f2338584ae0077a 100644 (file)
 #include <linux/const.h>
 
 #define CIF_MCCK_PENDING       0       /* machine check handling is pending */
-#define CIF_ASCE               1       /* user asce needs fixup / uaccess */
-#define CIF_NOHZ_DELAY         2       /* delay HZ disable for a tick */
-#define CIF_FPU                        3       /* restore FPU registers */
-#define CIF_IGNORE_IRQ         4       /* ignore interrupt (for udelay) */
-#define CIF_ENABLED_WAIT       5       /* in enabled wait state */
+#define CIF_ASCE_PRIMARY       1       /* primary asce needs fixup / uaccess */
+#define CIF_ASCE_SECONDARY     2       /* secondary asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY         3       /* delay HZ disable for a tick */
+#define CIF_FPU                        4       /* restore FPU registers */
+#define CIF_IGNORE_IRQ         5       /* ignore interrupt (for udelay) */
+#define CIF_ENABLED_WAIT       6       /* in enabled wait state */
 
 #define _CIF_MCCK_PENDING      _BITUL(CIF_MCCK_PENDING)
-#define _CIF_ASCE              _BITUL(CIF_ASCE)
+#define _CIF_ASCE_PRIMARY      _BITUL(CIF_ASCE_PRIMARY)
+#define _CIF_ASCE_SECONDARY    _BITUL(CIF_ASCE_SECONDARY)
 #define _CIF_NOHZ_DELAY                _BITUL(CIF_NOHZ_DELAY)
 #define _CIF_FPU               _BITUL(CIF_FPU)
 #define _CIF_IGNORE_IRQ                _BITUL(CIF_IGNORE_IRQ)
@@ -89,7 +91,8 @@ extern void execve_tail(void);
  * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
  */
 
-#define TASK_SIZE_OF(tsk)      ((tsk)->mm->context.asce_limit)
+#define TASK_SIZE_OF(tsk)      ((tsk)->mm ? \
+                                (tsk)->mm->context.asce_limit : TASK_MAX_SIZE)
 #define TASK_UNMAPPED_BASE     (test_thread_flag(TIF_31BIT) ? \
                                        (1UL << 30) : (1UL << 41))
 #define TASK_SIZE              TASK_SIZE_OF(current)
@@ -200,10 +203,12 @@ struct stack_frame {
 struct task_struct;
 struct mm_struct;
 struct seq_file;
+struct pt_regs;
 
 typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
 void dump_trace(dump_trace_func_t func, void *data,
                struct task_struct *task, unsigned long sp);
+void show_registers(struct pt_regs *regs);
 
 void show_cacheinfo(struct seq_file *m);
 
index b2988fc60f65e53db21815e5e3536ee178db7d06..136932ff42502027820a94702a924d65b3049622 100644 (file)
@@ -14,6 +14,7 @@
  */
 #include <linux/sched.h>
 #include <linux/errno.h>
+#include <asm/processor.h>
 #include <asm/ctl_reg.h>
 
 #define VERIFY_READ     0
 
 #define get_ds()        (KERNEL_DS)
 #define get_fs()        (current->thread.mm_segment)
-
-#define set_fs(x)                                                      \
-do {                                                                   \
-       unsigned long __pto;                                            \
-       current->thread.mm_segment = (x);                               \
-       __pto = current->thread.mm_segment.ar4 ?                        \
-               S390_lowcore.user_asce : S390_lowcore.kernel_asce;      \
-       __ctl_load(__pto, 7, 7);                                        \
-} while (0)
-
 #define segment_eq(a,b) ((a).ar4 == (b).ar4)
 
+static inline void set_fs(mm_segment_t fs)
+{
+       current->thread.mm_segment = fs;
+       if (segment_eq(fs, KERNEL_DS)) {
+               set_cpu_flag(CIF_ASCE_SECONDARY);
+               __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+       } else {
+               clear_cpu_flag(CIF_ASCE_SECONDARY);
+               __ctl_load(S390_lowcore.user_asce, 7, 7);
+       }
+}
+
 static inline int __range_ok(unsigned long addr, unsigned long size)
 {
        return 1;
index bf736e764cb40cf6c439a6d9c9012138ebee70f1..6848ba5c1454f347c8f4053c276a41bd919506ea 100644 (file)
@@ -24,6 +24,7 @@ header-y += mman.h
 header-y += monwriter.h
 header-y += msgbuf.h
 header-y += param.h
+header-y += pkey.h
 header-y += poll.h
 header-y += posix_types.h
 header-y += ptrace.h
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
new file mode 100644 (file)
index 0000000..ed7f19c
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * Userspace interface to the pkey device driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _UAPI_PKEY_H
+#define _UAPI_PKEY_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * Ioctl calls supported by the pkey device driver
+ */
+
+#define PKEY_IOCTL_MAGIC 'p'
+
+#define SECKEYBLOBSIZE 64     /* secure key blob size is always 64 bytes */
+#define MAXPROTKEYSIZE 64  /* a protected key blob may be up to 64 bytes */
+#define MAXCLRKEYSIZE  32     /* a clear key value may be up to 32 bytes */
+
+/* defines for the type field within the pkey_protkey struct */
+#define PKEY_KEYTYPE_AES_128  1
+#define PKEY_KEYTYPE_AES_192  2
+#define PKEY_KEYTYPE_AES_256  3
+
+/* Struct to hold a secure key blob */
+struct pkey_seckey {
+       __u8  seckey[SECKEYBLOBSIZE];             /* the secure key blob */
+};
+
+/* Struct to hold protected key and length info */
+struct pkey_protkey {
+       __u32 type;          /* key type, one of the PKEY_KEYTYPE values */
+       __u32 len;              /* bytes actually stored in protkey[]    */
+       __u8  protkey[MAXPROTKEYSIZE];         /* the protected key blob */
+};
+
+/* Struct to hold a clear key value */
+struct pkey_clrkey {
+       __u8  clrkey[MAXCLRKEYSIZE]; /* 16, 24, or 32 byte clear key value */
+};
+
+/*
+ * Generate secure key
+ */
+struct pkey_genseck {
+       __u16 cardnr;               /* in: card to use or FFFF for any   */
+       __u16 domain;               /* in: domain or FFFF for any        */
+       __u32 keytype;              /* in: key type to generate          */
+       struct pkey_seckey seckey;  /* out: the secure key blob          */
+};
+#define PKEY_GENSECK _IOWR(PKEY_IOCTL_MAGIC, 0x01, struct pkey_genseck)
+
+/*
+ * Construct secure key from clear key value
+ */
+struct pkey_clr2seck {
+       __u16 cardnr;               /* in: card to use or FFFF for any   */
+       __u16 domain;               /* in: domain or FFFF for any        */
+       __u32 keytype;              /* in: key type to generate          */
+       struct pkey_clrkey clrkey;  /* in: the clear key value           */
+       struct pkey_seckey seckey;  /* out: the secure key blob          */
+};
+#define PKEY_CLR2SECK _IOWR(PKEY_IOCTL_MAGIC, 0x02, struct pkey_clr2seck)
+
+/*
+ * Fabricate protected key from a secure key
+ */
+struct pkey_sec2protk {
+       __u16 cardnr;                /* in: card to use or FFFF for any   */
+       __u16 domain;                /* in: domain or FFFF for any        */
+       struct pkey_seckey seckey;   /* in: the secure key blob           */
+       struct pkey_protkey protkey; /* out: the protected key            */
+};
+#define PKEY_SEC2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x03, struct pkey_sec2protk)
+
+/*
+ * Fabricate protected key from an clear key value
+ */
+struct pkey_clr2protk {
+       __u32 keytype;               /* in: key type to generate          */
+       struct pkey_clrkey clrkey;   /* in: the clear key value           */
+       struct pkey_protkey protkey; /* out: the protected key            */
+};
+#define PKEY_CLR2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x04, struct pkey_clr2protk)
+
+/*
+ * Search for matching crypto card based on the Master Key
+ * Verification Pattern provided inside a secure key.
+ */
+struct pkey_findcard {
+       struct pkey_seckey seckey;             /* in: the secure key blob */
+       __u16  cardnr;                         /* out: card number        */
+       __u16  domain;                         /* out: domain number      */
+};
+#define PKEY_FINDCARD _IOWR(PKEY_IOCTL_MAGIC, 0x05, struct pkey_findcard)
+
+/*
+ * Combined together: findcard + sec2prot
+ */
+struct pkey_skey2pkey {
+       struct pkey_seckey seckey;   /* in: the secure key blob           */
+       struct pkey_protkey protkey; /* out: the protected key            */
+};
+#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
+
+#endif /* _UAPI_PKEY_H */
index e2293c662bdfa6f76c2169247c5999e6fff88df2..dd1d5c62c374b70919a7b7c727291977e0c90cf2 100644 (file)
@@ -32,6 +32,7 @@ static struct memblock_type oldmem_type = {
        .max = 1,
        .total_size = 0,
        .regions = &oldmem_region,
+       .name = "oldmem",
 };
 
 struct save_area {
index db469fa11462c16a0f21676a4324c24befcee2b6..dff2152350a7ebaaf3df6c8b000eb36b03afd19e 100644 (file)
@@ -50,7 +50,8 @@ _TIF_WORK     = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
                   _TIF_UPROBE)
 _TIF_TRACE     = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
                   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK      = (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
+_CIF_WORK      = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
+                  _CIF_ASCE_SECONDARY | _CIF_FPU)
 _PIF_WORK      = (_PIF_PER_TRAP)
 
 #define BASED(name) name-cleanup_critical(%r13)
@@ -339,8 +340,8 @@ ENTRY(system_call)
        jo      .Lsysc_notify_resume
        TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
        jo      .Lsysc_vxrs
-       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE
-       jo      .Lsysc_uaccess
+       TSTMSK  __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
+       jnz     .Lsysc_asce
        j       .Lsysc_return           # beware of critical section cleanup
 
 #
@@ -358,12 +359,15 @@ ENTRY(system_call)
        jg      s390_handle_mcck        # TIF bit will be cleared by handler
 
 #
-# _CIF_ASCE is set, load user space asce
+# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
 #
-.Lsysc_uaccess:
-       ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE
+.Lsysc_asce:
+       ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
-       j       .Lsysc_return
+       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
+       jz      .Lsysc_return
+       larl    %r14,.Lsysc_return
+       jg      set_fs_fixup
 
 #
 # CIF_FPU is set, restore floating-point controls and floating-point registers.
@@ -661,8 +665,8 @@ ENTRY(io_int_handler)
        jo      .Lio_notify_resume
        TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
        jo      .Lio_vxrs
-       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE
-       jo      .Lio_uaccess
+       TSTMSK  __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
+       jnz     .Lio_asce
        j       .Lio_return             # beware of critical section cleanup
 
 #
@@ -675,12 +679,15 @@ ENTRY(io_int_handler)
        j       .Lio_return
 
 #
-# _CIF_ASCE is set, load user space asce
+# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
 #
-.Lio_uaccess:
-       ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE
+.Lio_asce:
+       ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
-       j       .Lio_return
+       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
+       jz      .Lio_return
+       larl    %r14,.Lio_return
+       jg      set_fs_fixup
 
 #
 # CIF_FPU is set, restore floating-point controls and floating-point registers.
index e79f030dd276381bfeea25643077c607be951f1c..33f9018653261c33e819ecdfc7ad8645b965cbde 100644 (file)
@@ -80,5 +80,6 @@ long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
 DECLARE_PER_CPU(u64, mt_cycles[8]);
 
 void verify_facilities(void);
+void set_fs_fixup(void);
 
 #endif /* _ENTRY_H */
index 56e14d073167407fe03b22ad8b0dac490c674d4d..80c093e0c6f1a3ef56fcc38e04d1af9a076f427f 100644 (file)
@@ -116,6 +116,19 @@ static int notrace s390_validate_registers(union mci mci, int umode)
                        s390_handle_damage();
                kill_task = 1;
        }
+       /* Validate control registers */
+       if (!mci.cr) {
+               /*
+                * Control registers have unknown contents.
+                * Can't recover and therefore stopping machine.
+                */
+               s390_handle_damage();
+       } else {
+               asm volatile(
+                       "       lctlg   0,15,0(%0)\n"
+                       "       ptlb\n"
+                       : : "a" (&S390_lowcore.cregs_save_area) : "memory");
+       }
        if (!mci.fp) {
                /*
                 * Floating point registers can't be restored. If the
@@ -208,18 +221,6 @@ static int notrace s390_validate_registers(union mci mci, int umode)
                 */
                kill_task = 1;
        }
-       /* Validate control registers */
-       if (!mci.cr) {
-               /*
-                * Control registers have unknown contents.
-                * Can't recover and therefore stopping machine.
-                */
-               s390_handle_damage();
-       } else {
-               asm volatile(
-                       "       lctlg   0,15,0(%0)"
-                       : : "a" (&S390_lowcore.cregs_save_area) : "memory");
-       }
        /*
         * We don't even try to validate the TOD register, since we simply
         * can't write something sensible into that register.
index c5b86b4a1a8b613716dbc7a8947ec31dbd8c147c..54281660582cb1d70f49ac432afe3a8af3d6e5ef 100644 (file)
@@ -100,8 +100,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
        return 0;
 }
 
-int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
-               unsigned long arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
+                   unsigned long arg, struct task_struct *p, unsigned long tls)
 {
        struct fake_frame
        {
@@ -156,7 +156,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 
        /* Set a new TLS ?  */
        if (clone_flags & CLONE_SETTLS) {
-               unsigned long tls = frame->childregs.gprs[6];
                if (is_compat_task()) {
                        p->thread.acrs[0] = (unsigned int)tls;
                } else {
@@ -234,3 +233,16 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
        ret = PAGE_ALIGN(mm->brk + brk_rnd());
        return (ret > mm->brk) ? ret : mm->brk;
 }
+
+void set_fs_fixup(void)
+{
+       struct pt_regs *regs = current_pt_regs();
+       static bool warned;
+
+       set_fs(USER_DS);
+       if (warned)
+               return;
+       WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code);
+       show_registers(regs);
+       warned = true;
+}
index 21004aaac69b06974608ba8b7d39dfa8c0b08998..bc2b60dcb17828037b73440e7baefb5433b3beac 100644 (file)
@@ -73,7 +73,7 @@ void cpu_init(void)
        get_cpu_id(id);
        if (machine_has_cpu_mhz)
                update_cpu_mhz(NULL);
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
        BUG_ON(current->mm);
        enter_lazy_tlb(&init_mm, current);
index b4a3e9e06ef244967b440057a1ee46b05bdb0def..31bd96e811677546b8a668d9c6e1e85b8d980f7f 100644 (file)
@@ -350,7 +350,7 @@ static void __add_vtimer(struct vtimer_list *timer, int periodic)
 }
 
 /*
- * add_virt_timer - add an oneshot virtual CPU timer
+ * add_virt_timer - add a oneshot virtual CPU timer
  */
 void add_virt_timer(struct vtimer_list *timer)
 {
index 59ac93714fa47dd34e44fa0bbf25d08bcba87ab5..a07b1ec1391d537efec0e89edbe64e8b899bd1a9 100644 (file)
@@ -359,8 +359,8 @@ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
        spin_lock(&gmap->guest_table_lock);
        entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
        if (entry) {
-               flush = (*entry != _SEGMENT_ENTRY_INVALID);
-               *entry = _SEGMENT_ENTRY_INVALID;
+               flush = (*entry != _SEGMENT_ENTRY_EMPTY);
+               *entry = _SEGMENT_ENTRY_EMPTY;
        }
        spin_unlock(&gmap->guest_table_lock);
        return flush;
@@ -589,7 +589,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
                return rc;
        ptl = pmd_lock(mm, pmd);
        spin_lock(&gmap->guest_table_lock);
-       if (*table == _SEGMENT_ENTRY_INVALID) {
+       if (*table == _SEGMENT_ENTRY_EMPTY) {
                rc = radix_tree_insert(&gmap->host_to_guest,
                                       vmaddr >> PMD_SHIFT, table);
                if (!rc)
index a038162277195eaed7c65e92655096dd3ae36564..9b4050caa4e92f0465f61df52386a3ffc517ce3d 100644 (file)
@@ -62,7 +62,7 @@ static inline unsigned long __pte_to_rste(pte_t pte)
                rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
                                     _SEGMENT_ENTRY_NOEXEC);
        } else
-               rste = _SEGMENT_ENTRY_INVALID;
+               rste = _SEGMENT_ENTRY_EMPTY;
        return rste;
 }
 
index 4c0fa9b3b2a003cf4acf191631432f54001c6901..364b9d824be30bc7825bae3acce72231d8d1bbe1 100644 (file)
@@ -641,7 +641,7 @@ int pcibios_add_device(struct pci_dev *pdev)
        int i;
 
        pdev->dev.groups = zpci_attr_groups;
-       pdev->dev.archdata.dma_ops = &s390_pci_dma_ops;
+       pdev->dev.dma_ops = &s390_pci_dma_ops;
        zpci_map_resources(pdev);
 
        for (i = 0; i < PCI_BAR_COUNT; i++) {
index 1d7a9c71944a7737c16cdec2d6a68b798e6590b0..9081a57fa340ce40059565363a3c2f93ce202055 100644 (file)
@@ -650,7 +650,7 @@ static int __init dma_debug_do_init(void)
 }
 fs_initcall(dma_debug_do_init);
 
-struct dma_map_ops s390_pci_dma_ops = {
+const struct dma_map_ops s390_pci_dma_ops = {
        .alloc          = s390_dma_alloc,
        .free           = s390_dma_free,
        .map_sg         = s390_dma_map_sg,
index db3e28ca3ae27a093dcc2aa2ed08058a36984a89..926943a49ea5abf8925ca01399cbc9fe9da9006c 100644 (file)
@@ -13,3 +13,4 @@ generic-y += trace_clock.h
 generic-y += xor.h
 generic-y += serial.h
 generic-y += word-at-a-time.h
+generic-y += kprobes.h
index 2b22bcf02c27e8559adc3aee6e7c61bb770a6109..569ac02f68dfe57689156a78b3345bf2cf533ce9 100644 (file)
@@ -336,7 +336,7 @@ void __init trap_init(void)
        set_except_vector(18, handle_dbe);
        flush_icache_range(DEBUG_VECTOR_BASE_ADDR, IRQ_VECTOR_BASE_ADDR);
 
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
        cpu_cache_init();
 }
old mode 100755 (executable)
new mode 100644 (file)
index 0052ad40e86d33f04a2034fc33337d1e1859d035..d99008af5f73bde118fb7b513d7315c4bd80d7b4 100644 (file)
@@ -1,10 +1,10 @@
 #ifndef __ASM_SH_DMA_MAPPING_H
 #define __ASM_SH_DMA_MAPPING_H
 
-extern struct dma_map_ops *dma_ops;
+extern const struct dma_map_ops *dma_ops;
 extern void no_iommu_init(void);
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return dma_ops;
 }
index 134f3980e44a60ab9caf41796ad031f6bc966748..f0986f9b38445cb665ec9c1a15fdea5b8cafb7ac 100644 (file)
@@ -1,13 +1,16 @@
 #ifndef __ASM_SH_KPROBES_H
 #define __ASM_SH_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0xc33a
+
 #ifdef CONFIG_KPROBES
 
 #include <linux/types.h>
 #include <linux/ptrace.h>
 
 typedef insn_size_t kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xc33a
 
 #define MAX_INSN_SIZE 16
 #define MAX_STACK_SIZE 64
index 47fee3b6e29c1311ac2f36779ff5560c8d98cb28..d24c707b21818a8d46ff9078373bc293691ab83b 100644 (file)
@@ -65,7 +65,7 @@ static void nommu_sync_sg(struct device *dev, struct scatterlist *sg,
 }
 #endif
 
-struct dma_map_ops nommu_dma_ops = {
+const struct dma_map_ops nommu_dma_ops = {
        .alloc                  = dma_generic_alloc_coherent,
        .free                   = dma_generic_free_coherent,
        .map_page               = nommu_map_page,
index bc3591125df7a51a43140e743160b6ee8a040dfb..04487e8fc9b12ef08383c88051a307329e241173 100644 (file)
@@ -99,7 +99,7 @@ static inline void handle_one_irq(unsigned int irq)
                        "mov    %0, r4          \n"
                        "mov    r15, r8         \n"
                        "jsr    @%1             \n"
-                       /* swith to the irq stack */
+                       /* switch to the irq stack */
                        " mov   %2, r15         \n"
                        /* restore the stack (ring zero) */
                        "mov    r8, r15         \n"
index 38e7860845db1e37f7519881337a5161f59afdfd..edc4769b047eee780c516b23d2d0a06e44de202d 100644 (file)
@@ -178,8 +178,8 @@ asmlinkage void start_secondary(void)
        struct mm_struct *mm = &init_mm;
 
        enable_mmu();
-       atomic_inc(&mm->mm_count);
-       atomic_inc(&mm->mm_users);
+       mmgrab(mm);
+       mmget(mm);
        current->active_mm = mm;
 #ifdef CONFIG_MMU
        enter_lazy_tlb(mm, current);
index 92b6976fde592008b29b0581a1b3e91b2b5633f7..d1275adfa0efb94c76bb880827103e921bb2d6e0 100644 (file)
@@ -22,7 +22,7 @@
 
 #define PREALLOC_DMA_DEBUG_ENTRIES     4096
 
-struct dma_map_ops *dma_ops;
+const struct dma_map_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 static int __init dma_init(void)
index 1180ae25415489d1642ffc94a5db70011f250dd9..69cc627779f25ffa5c01f226f65adcd29be8a193 100644 (file)
@@ -18,20 +18,20 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
         */
 }
 
-extern struct dma_map_ops *dma_ops;
-extern struct dma_map_ops *leon_dma_ops;
-extern struct dma_map_ops pci32_dma_ops;
+extern const struct dma_map_ops *dma_ops;
+extern const struct dma_map_ops *leon_dma_ops;
+extern const struct dma_map_ops pci32_dma_ops;
 
 extern struct bus_type pci_bus_type;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
 #ifdef CONFIG_SPARC_LEON
        if (sparc_cpu_model == sparc_leon)
                return leon_dma_ops;
 #endif
 #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
-       if (dev->bus == &pci_bus_type)
+       if (bus == &pci_bus_type)
                return &pci32_dma_ops;
 #endif
        return dma_ops;
index a145d798e1123a9f189e4d546092f689551289f6..49f8402035d7d5bd8c223d140017db2fb52f9a37 100644 (file)
@@ -1,13 +1,17 @@
 #ifndef _SPARC64_KPROBES_H
 #define _SPARC64_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION   0x91d02070 /* ta 0x70 */
+#define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */
+
+#ifdef CONFIG_KPROBES
 #include <linux/types.h>
 #include <linux/percpu.h>
 
 typedef u32 kprobe_opcode_t;
 
-#define BREAKPOINT_INSTRUCTION   0x91d02070 /* ta 0x70 */
-#define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */
 #define MAX_INSN_SIZE 2
 
 #define kretprobe_blacklist_size 0
@@ -48,4 +52,6 @@ int kprobe_exceptions_notify(struct notifier_block *self,
 int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
                                      struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
 #endif /* _SPARC64_KPROBES_H */
index c1263fc390db6d2f1672c9380d5a63d9b881d066..f294dd42fc7d3833ccc5b92fa6077e3b7d869d26 100644 (file)
@@ -17,7 +17,8 @@
 
 #define HPAGE_SHIFT            23
 #define REAL_HPAGE_SHIFT       22
-
+#define HPAGE_256MB_SHIFT      28
+#define HPAGE_64K_SHIFT                16
 #define REAL_HPAGE_SIZE                (_AC(1,UL) << REAL_HPAGE_SHIFT)
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
@@ -26,6 +27,7 @@
 #define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 #define REAL_HPAGE_PER_HPAGE   (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
+#define HUGE_MAX_HSTATE                3
 #endif
 
 #ifndef __ASSEMBLY__
index 314b66851348200f03970e7df655127ea87decad..7932a4a378176cfc697ba56758ad605f67bc16fb 100644 (file)
@@ -375,7 +375,10 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
 #define pgprot_noncached pgprot_noncached
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline unsigned long __pte_huge_mask(void)
+extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+                               struct page *page, int writable);
+#define arch_make_huge_pte arch_make_huge_pte
+static inline unsigned long __pte_default_huge_mask(void)
 {
        unsigned long mask;
 
@@ -395,12 +398,14 @@ static inline unsigned long __pte_huge_mask(void)
 
 static inline pte_t pte_mkhuge(pte_t pte)
 {
-       return __pte(pte_val(pte) | _PAGE_PMD_HUGE | __pte_huge_mask());
+       return __pte(pte_val(pte) | __pte_default_huge_mask());
 }
 
-static inline bool is_hugetlb_pte(pte_t pte)
+static inline bool is_default_hugetlb_pte(pte_t pte)
 {
-       return !!(pte_val(pte) & __pte_huge_mask());
+       unsigned long mask = __pte_default_huge_mask();
+
+       return (pte_val(pte) & mask) == mask;
 }
 
 static inline bool is_hugetlb_pmd(pmd_t pmd)
@@ -875,10 +880,12 @@ static inline unsigned long pud_pfn(pud_t pud)
 
 /* Actual page table PTE updates.  */
 void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
-                  pte_t *ptep, pte_t orig, int fullmm);
+                  pte_t *ptep, pte_t orig, int fullmm,
+                  unsigned int hugepage_shift);
 
 static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
-                               pte_t *ptep, pte_t orig, int fullmm)
+                               pte_t *ptep, pte_t orig, int fullmm,
+                               unsigned int hugepage_shift)
 {
        /* It is more efficient to let flush_tlb_kernel_range()
         * handle init_mm tlb flushes.
@@ -887,7 +894,7 @@ static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
         *             and SUN4V pte layout, so this inline test is fine.
         */
        if (likely(mm != &init_mm) && pte_accessible(mm, orig))
-               tlb_batch_add(mm, vaddr, ptep, orig, fullmm);
+               tlb_batch_add(mm, vaddr, ptep, orig, fullmm, hugepage_shift);
 }
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
@@ -906,7 +913,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
        pte_t orig = *ptep;
 
        *ptep = pte;
-       maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm);
+       maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm, PAGE_SHIFT);
 }
 
 #define set_pte_at(mm,addr,ptep,pte)   \
index 29d64b1758ed2a0ceb795a07c10205e6f923e6c6..478bf6bb4598b345dd7f590beee86f43893e7645 100644 (file)
@@ -59,8 +59,11 @@ extern atomic_t dcpage_flushes;
 extern atomic_t dcpage_flushes_xcall;
 
 extern int sysctl_tsb_ratio;
-#endif
 
+#ifdef CONFIG_SERIAL_SUNHV
+void sunhv_migrate_hvcons_irq(int cpu);
+#endif
+#endif
 void sun_do_break(void);
 extern int stop_a_enabled;
 extern int scons_pwroff;
index 16f10374feb32b1468ec092e7d9cf180ef788cda..475dd4158ae4f6c2c295d1b42c172905196eef5b 100644 (file)
@@ -9,7 +9,7 @@ extern struct thread_info *current_set[NR_CPUS];
  * Flush windows so that the VM switch which follows
  * would not pull the stack from under us.
  *
- * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
+ * SWITCH_ENTER and SWITCH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
  * XXX WTF is the above comment? Found in late teen 2.4.x.
  */
 #ifdef CONFIG_SMP
index a8e192e907003dd855f9bb232dede7ae5eb069f3..54be88a6774c5cc64fd10aa381999a21bb071ec4 100644 (file)
@@ -8,7 +8,7 @@
 #define TLB_BATCH_NR   192
 
 struct tlb_batch {
-       bool huge;
+       unsigned int hugepage_shift;
        struct mm_struct *mm;
        unsigned long tlb_nr;
        unsigned long active;
@@ -17,7 +17,8 @@ struct tlb_batch {
 
 void flush_tsb_kernel_range(unsigned long start, unsigned long end);
 void flush_tsb_user(struct tlb_batch *tb);
-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge);
+void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
+                        unsigned int hugepage_shift);
 
 /* TLB flush operations. */
 
index 225543000122777847f686d418c62e6e6b92d1a5..ad5293f89680f4c3dd4405e413e2a2b4966bcd1b 100644 (file)
@@ -4,7 +4,6 @@
 #ifdef CONFIG_NUMA
 
 #include <asm/mmzone.h>
-#include <asm/cpudata.h>
 
 static inline int cpu_to_node(int cpu)
 {
@@ -42,6 +41,9 @@ int __node_distance(int, int);
 #endif /* !(CONFIG_NUMA) */
 
 #ifdef CONFIG_SMP
+
+#include <asm/cpudata.h>
+
 #define topology_physical_package_id(cpu)      (cpu_data(cpu).proc_id)
 #define topology_core_id(cpu)                  (cpu_data(cpu).core_id)
 #define topology_core_cpumask(cpu)             (&cpu_core_sib_map[cpu])
index f87aae5a908e668a9d458a3c45724448b26bb72c..36196c17aff8ed886a4c1f6e11248ea06f8341b7 100644 (file)
@@ -42,8 +42,8 @@ struct arch_uprobe {
 };
 
 struct arch_uprobe_task {
-       u32 saved_tpc;
-       u32 saved_tnpc;
+       u64 saved_tpc;
+       u64 saved_tnpc;
 };
 
 struct task_struct;
index 9df997995f6b8970c64acd9d562d9ad8bb1db79f..c63ba99ca551a8b07794d9a02609a9e820943966 100644 (file)
@@ -741,7 +741,7 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
        spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-static struct dma_map_ops sun4u_dma_ops = {
+static const struct dma_map_ops sun4u_dma_ops = {
        .alloc                  = dma_4u_alloc_coherent,
        .free                   = dma_4u_free_coherent,
        .map_page               = dma_4u_map_page,
@@ -752,7 +752,7 @@ static struct dma_map_ops sun4u_dma_ops = {
        .sync_sg_for_cpu        = dma_4u_sync_sg_for_cpu,
 };
 
-struct dma_map_ops *dma_ops = &sun4u_dma_ops;
+const struct dma_map_ops *dma_ops = &sun4u_dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 int dma_supported(struct device *dev, u64 device_mask)
index 6ffaec44931a6eee4bdba92e0cae91c431dfa1c6..cf20033a14584e97dfe472a749f5345471e6a84e 100644 (file)
@@ -401,7 +401,7 @@ static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
        BUG();
 }
 
-static struct dma_map_ops sbus_dma_ops = {
+static const struct dma_map_ops sbus_dma_ops = {
        .alloc                  = sbus_alloc_coherent,
        .free                   = sbus_free_coherent,
        .map_page               = sbus_map_page,
@@ -637,7 +637,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *
        }
 }
 
-struct dma_map_ops pci32_dma_ops = {
+const struct dma_map_ops pci32_dma_ops = {
        .alloc                  = pci32_alloc_coherent,
        .free                   = pci32_free_coherent,
        .map_page               = pci32_map_page,
@@ -652,10 +652,10 @@ struct dma_map_ops pci32_dma_ops = {
 EXPORT_SYMBOL(pci32_dma_ops);
 
 /* leon re-uses pci32_dma_ops */
-struct dma_map_ops *leon_dma_ops = &pci32_dma_ops;
+const struct dma_map_ops *leon_dma_ops = &pci32_dma_ops;
 EXPORT_SYMBOL(leon_dma_ops);
 
-struct dma_map_ops *dma_ops = &sbus_dma_ops;
+const struct dma_map_ops *dma_ops = &sbus_dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 
index 71e16f2241c25f916734285b7f97e2deb7bb2c59..b99d33797e1df01cdf0ba194ee9ee11745076fd8 100644 (file)
@@ -93,7 +93,7 @@ void leon_cpu_pre_online(void *arg)
                             : "memory" /* paranoid */);
 
        /* Attach to the address space of init_task. */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
        while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
index f4daccd12bf549849d47d04bf048d9500fffcb27..68bec7c97cb8ff9a1e46c120c81542027f710210 100644 (file)
@@ -669,7 +669,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
        local_irq_restore(flags);
 }
 
-static struct dma_map_ops sun4v_dma_ops = {
+static const struct dma_map_ops sun4v_dma_ops = {
        .alloc                          = dma_4v_alloc_coherent,
        .free                           = dma_4v_free_coherent,
        .map_page                       = dma_4v_map_page,
index 0ce347f8e4ccf30ae94bb52ec0d4dca4642fa5c6..8e3e13924594c2cf8b8bdeef9ef156258ba873b9 100644 (file)
@@ -122,7 +122,7 @@ void smp_callin(void)
        current_thread_info()->new_child = 0;
 
        /* Attach to the address space of init_task. */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
        /* inform the notifiers about the new cpu */
@@ -1443,6 +1443,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
 
 static void stop_this_cpu(void *dummy)
 {
+       set_cpu_online(smp_processor_id(), false);
        prom_stopself();
 }
 
@@ -1451,9 +1452,15 @@ void smp_send_stop(void)
        int cpu;
 
        if (tlb_type == hypervisor) {
+               int this_cpu = smp_processor_id();
+#ifdef CONFIG_SERIAL_SUNHV
+               sunhv_migrate_hvcons_irq(this_cpu);
+#endif
                for_each_online_cpu(cpu) {
-                       if (cpu == smp_processor_id())
+                       if (cpu == this_cpu)
                                continue;
+
+                       set_cpu_online(cpu, false);
 #ifdef CONFIG_SUN_LDOMS
                        if (ldom_domaining_enabled) {
                                unsigned long hv_err;
index 9d98e5002a09a483eb87b23bd68b33a6e6f83e05..7b55c50eabe55adf3c05da74871d05bb71245024 100644 (file)
@@ -93,7 +93,7 @@ void sun4d_cpu_pre_online(void *arg)
        show_leds(cpuid);
 
        /* Attach to the address space of init_task. */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
        local_ops->cache_all();
index 278c40abce828f78650b5d5a52841867065c0750..633c4cf6fdb0bfd9f8990abf265666a30d691976 100644 (file)
@@ -59,7 +59,7 @@ void sun4m_cpu_pre_online(void *arg)
                             : "memory" /* paranoid */);
 
        /* Attach to the address space of init_task. */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
        while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
index 4f21df7d4f13bd32935f870ec26fa6e03b0f8a01..ecddac5a4c9628e28eb16022bc32c1335732f965 100644 (file)
@@ -448,7 +448,7 @@ void trap_init(void)
                thread_info_offsets_are_bolixed_pete();
 
        /* Attach to the address space of init_task. */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 
        /* NOTE: Other cpus have this done as they are started
index dfc97a47c9a08a330f31040fe120030ebe8cc098..e022d7b0039045e6eade6a1a31d8cae0fa2d274d 100644 (file)
@@ -2837,6 +2837,6 @@ void __init trap_init(void)
        /* Attach to the address space of init_task.  On SMP we
         * do this in smp.c:smp_callin for other cpus.
         */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
 }
index d568c8207af72ffbd15aae8e5f41f77401ba5397..10689cfd0ad40e6b12ae6b148f99ac2f5c7deb64 100644 (file)
@@ -117,26 +117,11 @@ tsb_miss_page_table_walk_sun4v_fastpath:
        /* Valid PTE is now in %g5.  */
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-661:   sethi           %uhi(_PAGE_SZALL_4U), %g7
+       sethi           %uhi(_PAGE_PMD_HUGE), %g7
        sllx            %g7, 32, %g7
-       .section        .sun4v_2insn_patch, "ax"
-       .word           661b
-       mov             _PAGE_SZALL_4V, %g7
-       nop
-       .previous
-
-       and             %g5, %g7, %g2
-
-661:   sethi           %uhi(_PAGE_SZHUGE_4U), %g7
-       sllx            %g7, 32, %g7
-       .section        .sun4v_2insn_patch, "ax"
-       .word           661b
-       mov             _PAGE_SZHUGE_4V, %g7
-       nop
-       .previous
 
-       cmp             %g2, %g7
-       bne,pt          %xcc, 60f
+       andcc           %g5, %g7, %g0
+       be,pt           %xcc, 60f
         nop
 
        /* It is a huge page, use huge page TSB entry address we
index c4ac58e483a4ebacad0b96b552176bc43d99a301..8f35eea2103aee1449410d9c97685af79b9104b7 100644 (file)
@@ -30,7 +30,7 @@
 /* 001001011 - two 32-bit merges */
 #define FPMERGE_OPF    0x04b
 
-/* 000110001 - 8-by-16-bit partitoned product  */
+/* 000110001 - 8-by-16-bit partitioned product  */
 #define FMUL8x16_OPF   0x031
 
 /* 000110011 - 8-by-16-bit upper alpha partitioned product  */
index 988acc8b1b80a387d9119782f53f1d41dbe53c4e..e98a3f2e8f0f4839c30a61c8bf583d0753a6219c 100644 (file)
@@ -28,6 +28,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
                                                        unsigned long pgoff,
                                                        unsigned long flags)
 {
+       struct hstate *h = hstate_file(filp);
        unsigned long task_size = TASK_SIZE;
        struct vm_unmapped_area_info info;
 
@@ -38,7 +39,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
        info.length = len;
        info.low_limit = TASK_UNMAPPED_BASE;
        info.high_limit = min(task_size, VA_EXCLUDE_START);
-       info.align_mask = PAGE_MASK & ~HPAGE_MASK;
+       info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        addr = vm_unmapped_area(&info);
 
@@ -58,6 +59,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                                  const unsigned long pgoff,
                                  const unsigned long flags)
 {
+       struct hstate *h = hstate_file(filp);
        struct mm_struct *mm = current->mm;
        unsigned long addr = addr0;
        struct vm_unmapped_area_info info;
@@ -69,7 +71,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        info.length = len;
        info.low_limit = PAGE_SIZE;
        info.high_limit = mm->mmap_base;
-       info.align_mask = PAGE_MASK & ~HPAGE_MASK;
+       info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        addr = vm_unmapped_area(&info);
 
@@ -94,6 +96,7 @@ unsigned long
 hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                unsigned long len, unsigned long pgoff, unsigned long flags)
 {
+       struct hstate *h = hstate_file(file);
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long task_size = TASK_SIZE;
@@ -101,7 +104,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        if (test_thread_flag(TIF_32BIT))
                task_size = STACK_TOP32;
 
-       if (len & ~HPAGE_MASK)
+       if (len & ~huge_page_mask(h))
                return -EINVAL;
        if (len > task_size)
                return -ENOMEM;
@@ -113,7 +116,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        }
 
        if (addr) {
-               addr = ALIGN(addr, HPAGE_SIZE);
+               addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
                if (task_size - len >= addr &&
                    (!vma || addr + len <= vma->vm_start))
@@ -127,17 +130,141 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                                pgoff, flags);
 }
 
+static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+       return entry;
+}
+
+static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+       unsigned long hugepage_size = _PAGE_SZ4MB_4V;
+
+       pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
+
+       switch (shift) {
+       case HPAGE_256MB_SHIFT:
+               hugepage_size = _PAGE_SZ256MB_4V;
+               pte_val(entry) |= _PAGE_PMD_HUGE;
+               break;
+       case HPAGE_SHIFT:
+               pte_val(entry) |= _PAGE_PMD_HUGE;
+               break;
+       case HPAGE_64K_SHIFT:
+               hugepage_size = _PAGE_SZ64K_4V;
+               break;
+       default:
+               WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
+       }
+
+       pte_val(entry) = pte_val(entry) | hugepage_size;
+       return entry;
+}
+
+static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+       if (tlb_type == hypervisor)
+               return sun4v_hugepage_shift_to_tte(entry, shift);
+       else
+               return sun4u_hugepage_shift_to_tte(entry, shift);
+}
+
+pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+                        struct page *page, int writeable)
+{
+       unsigned int shift = huge_page_shift(hstate_vma(vma));
+
+       return hugepage_shift_to_tte(entry, shift);
+}
+
+static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
+{
+       unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
+       unsigned int shift;
+
+       switch (tte_szbits) {
+       case _PAGE_SZ256MB_4V:
+               shift = HPAGE_256MB_SHIFT;
+               break;
+       case _PAGE_SZ4MB_4V:
+               shift = REAL_HPAGE_SHIFT;
+               break;
+       case _PAGE_SZ64K_4V:
+               shift = HPAGE_64K_SHIFT;
+               break;
+       default:
+               shift = PAGE_SHIFT;
+               break;
+       }
+       return shift;
+}
+
+static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
+{
+       unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
+       unsigned int shift;
+
+       switch (tte_szbits) {
+       case _PAGE_SZ256MB_4U:
+               shift = HPAGE_256MB_SHIFT;
+               break;
+       case _PAGE_SZ4MB_4U:
+               shift = REAL_HPAGE_SHIFT;
+               break;
+       case _PAGE_SZ64K_4U:
+               shift = HPAGE_64K_SHIFT;
+               break;
+       default:
+               shift = PAGE_SHIFT;
+               break;
+       }
+       return shift;
+}
+
+static unsigned int huge_tte_to_shift(pte_t entry)
+{
+       unsigned long shift;
+
+       if (tlb_type == hypervisor)
+               shift = sun4v_huge_tte_to_shift(entry);
+       else
+               shift = sun4u_huge_tte_to_shift(entry);
+
+       if (shift == PAGE_SHIFT)
+               WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
+                         pte_val(entry));
+
+       return shift;
+}
+
+static unsigned long huge_tte_to_size(pte_t pte)
+{
+       unsigned long size = 1UL << huge_tte_to_shift(pte);
+
+       if (size == REAL_HPAGE_SIZE)
+               size = HPAGE_SIZE;
+       return size;
+}
+
 pte_t *huge_pte_alloc(struct mm_struct *mm,
                        unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
+       pmd_t *pmd;
        pte_t *pte = NULL;
 
        pgd = pgd_offset(mm, addr);
        pud = pud_alloc(mm, pgd, addr);
-       if (pud)
-               pte = (pte_t *)pmd_alloc(mm, pud, addr);
+       if (pud) {
+               pmd = pmd_alloc(mm, pud, addr);
+               if (!pmd)
+                       return NULL;
+
+               if (sz == PMD_SHIFT)
+                       pte = (pte_t *)pmd;
+               else
+                       pte = pte_alloc_map(mm, pmd, addr);
+       }
 
        return pte;
 }
@@ -146,49 +273,83 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
        pgd_t *pgd;
        pud_t *pud;
+       pmd_t *pmd;
        pte_t *pte = NULL;
 
        pgd = pgd_offset(mm, addr);
        if (!pgd_none(*pgd)) {
                pud = pud_offset(pgd, addr);
-               if (!pud_none(*pud))
-                       pte = (pte_t *)pmd_offset(pud, addr);
+               if (!pud_none(*pud)) {
+                       pmd = pmd_offset(pud, addr);
+                       if (!pmd_none(*pmd)) {
+                               if (is_hugetlb_pmd(*pmd))
+                                       pte = (pte_t *)pmd;
+                               else
+                                       pte = pte_offset_map(pmd, addr);
+                       }
+               }
        }
+
        return pte;
 }
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
                     pte_t *ptep, pte_t entry)
 {
+       unsigned int i, nptes, orig_shift, shift;
+       unsigned long size;
        pte_t orig;
 
+       size = huge_tte_to_size(entry);
+       shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
+       nptes = size >> shift;
+
        if (!pte_present(*ptep) && pte_present(entry))
-               mm->context.hugetlb_pte_count++;
+               mm->context.hugetlb_pte_count += nptes;
 
-       addr &= HPAGE_MASK;
+       addr &= ~(size - 1);
        orig = *ptep;
-       *ptep = entry;
+       orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
+
+       for (i = 0; i < nptes; i++)
+               ptep[i] = __pte(pte_val(entry) + (i << shift));
 
-       /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
-       maybe_tlb_batch_add(mm, addr, ptep, orig, 0);
-       maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0);
+       maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
+       /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
+       if (size == HPAGE_SIZE)
+               maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
+                                   orig_shift);
 }
 
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep)
 {
+       unsigned int i, nptes, hugepage_shift;
+       unsigned long size;
        pte_t entry;
 
        entry = *ptep;
+       size = huge_tte_to_size(entry);
+       if (size >= HPAGE_SIZE)
+               nptes = size >> PMD_SHIFT;
+       else
+               nptes = size >> PAGE_SHIFT;
+
+       hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
+               huge_tte_to_shift(entry);
+
        if (pte_present(entry))
-               mm->context.hugetlb_pte_count--;
+               mm->context.hugetlb_pte_count -= nptes;
 
-       addr &= HPAGE_MASK;
-       *ptep = __pte(0UL);
+       addr &= ~(size - 1);
+       for (i = 0; i < nptes; i++)
+               ptep[i] = __pte(0UL);
 
-       /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
-       maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
-       maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0);
+       maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
+       /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
+       if (size == HPAGE_SIZE)
+               maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
+                                   hugepage_shift);
 
        return entry;
 }
index 5d2f91511c60ce6099bc4e2ce3d383004febaffc..ccd4553289899ee2e585e409189206f67c7e77f2 100644 (file)
@@ -324,6 +324,50 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde
        tsb_insert(tsb, tag, tte);
 }
 
+#ifdef CONFIG_HUGETLB_PAGE
+static int __init setup_hugepagesz(char *string)
+{
+       unsigned long long hugepage_size;
+       unsigned int hugepage_shift;
+       unsigned short hv_pgsz_idx;
+       unsigned int hv_pgsz_mask;
+       int rc = 0;
+
+       hugepage_size = memparse(string, &string);
+       hugepage_shift = ilog2(hugepage_size);
+
+       switch (hugepage_shift) {
+       case HPAGE_256MB_SHIFT:
+               hv_pgsz_mask = HV_PGSZ_MASK_256MB;
+               hv_pgsz_idx = HV_PGSZ_IDX_256MB;
+               break;
+       case HPAGE_SHIFT:
+               hv_pgsz_mask = HV_PGSZ_MASK_4MB;
+               hv_pgsz_idx = HV_PGSZ_IDX_4MB;
+               break;
+       case HPAGE_64K_SHIFT:
+               hv_pgsz_mask = HV_PGSZ_MASK_64K;
+               hv_pgsz_idx = HV_PGSZ_IDX_64K;
+               break;
+       default:
+               hv_pgsz_mask = 0;
+       }
+
+       if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
+               pr_warn("hugepagesz=%llu not supported by MMU.\n",
+                       hugepage_size);
+               goto out;
+       }
+
+       hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT);
+       rc = 1;
+
+out:
+       return rc;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+#endif /* CONFIG_HUGETLB_PAGE */
+
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
        struct mm_struct *mm;
@@ -347,7 +391,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
        if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
-           is_hugetlb_pte(pte)) {
+           is_hugetlb_pmd(__pmd(pte_val(pte)))) {
                /* We are fabricating 8MB pages using 4MB real hw pages.  */
                pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
                __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
@@ -785,13 +829,23 @@ static void __init find_ramdisk(unsigned long phys_base)
 
 struct node_mem_mask {
        unsigned long mask;
-       unsigned long val;
+       unsigned long match;
 };
 static struct node_mem_mask node_masks[MAX_NUMNODES];
 static int num_node_masks;
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
 
+struct mdesc_mlgroup {
+       u64     node;
+       u64     latency;
+       u64     match;
+       u64     mask;
+};
+
+static struct mdesc_mlgroup *mlgroups;
+static int num_mlgroups;
+
 int numa_cpu_lookup_table[NR_CPUS];
 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
 
@@ -802,78 +856,129 @@ struct mdesc_mblock {
 };
 static struct mdesc_mblock *mblocks;
 static int num_mblocks;
-static int find_numa_node_for_addr(unsigned long pa,
-                                  struct node_mem_mask *pnode_mask);
 
-static unsigned long __init ra_to_pa(unsigned long addr)
+static struct mdesc_mblock * __init addr_to_mblock(unsigned long addr)
 {
+       struct mdesc_mblock *m = NULL;
        int i;
 
        for (i = 0; i < num_mblocks; i++) {
-               struct mdesc_mblock *m = &mblocks[i];
+               m = &mblocks[i];
 
                if (addr >= m->base &&
                    addr < (m->base + m->size)) {
-                       addr += m->offset;
                        break;
                }
        }
-       return addr;
+
+       return m;
 }
 
-static int __init find_node(unsigned long addr)
+static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid)
 {
-       static bool search_mdesc = true;
-       static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
-       static int last_index;
-       int i;
+       int prev_nid, new_nid;
 
-       addr = ra_to_pa(addr);
-       for (i = 0; i < num_node_masks; i++) {
-               struct node_mem_mask *p = &node_masks[i];
+       prev_nid = -1;
+       for ( ; start < end; start += PAGE_SIZE) {
+               for (new_nid = 0; new_nid < num_node_masks; new_nid++) {
+                       struct node_mem_mask *p = &node_masks[new_nid];
 
-               if ((addr & p->mask) == p->val)
-                       return i;
-       }
-       /* The following condition has been observed on LDOM guests because
-        * node_masks only contains the best latency mask and value.
-        * LDOM guest's mdesc can contain a single latency group to
-        * cover multiple address range. Print warning message only if the
-        * address cannot be found in node_masks nor mdesc.
-        */
-       if ((search_mdesc) &&
-           ((addr & last_mem_mask.mask) != last_mem_mask.val)) {
-               /* find the available node in the mdesc */
-               last_index = find_numa_node_for_addr(addr, &last_mem_mask);
-               numadbg("find_node: latency group for address 0x%lx is %d\n",
-                       addr, last_index);
-               if ((last_index < 0) || (last_index >= num_node_masks)) {
-                       /* WARN_ONCE() and use default group 0 */
-                       WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
-                       search_mdesc = false;
-                       last_index = 0;
+                       if ((start & p->mask) == p->match) {
+                               if (prev_nid == -1)
+                                       prev_nid = new_nid;
+                               break;
+                       }
                }
+
+               if (new_nid == num_node_masks) {
+                       prev_nid = 0;
+                       WARN_ONCE(1, "addr[%Lx] doesn't match a NUMA node rule. Some memory will be owned by node 0.",
+                                 start);
+                       break;
+               }
+
+               if (prev_nid != new_nid)
+                       break;
        }
+       *nid = prev_nid;
 
-       return last_index;
+       return start > end ? end : start;
 }
 
 static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
 {
-       *nid = find_node(start);
-       start += PAGE_SIZE;
-       while (start < end) {
-               int n = find_node(start);
+       u64 ret_end, pa_start, m_mask, m_match, m_end;
+       struct mdesc_mblock *mblock;
+       int _nid, i;
+
+       if (tlb_type != hypervisor)
+               return memblock_nid_range_sun4u(start, end, nid);
+
+       mblock = addr_to_mblock(start);
+       if (!mblock) {
+               WARN_ONCE(1, "memblock_nid_range: Can't find mblock addr[%Lx]",
+                         start);
+
+               _nid = 0;
+               ret_end = end;
+               goto done;
+       }
+
+       pa_start = start + mblock->offset;
+       m_match = 0;
+       m_mask = 0;
+
+       for (_nid = 0; _nid < num_node_masks; _nid++) {
+               struct node_mem_mask *const m = &node_masks[_nid];
 
-               if (n != *nid)
+               if ((pa_start & m->mask) == m->match) {
+                       m_match = m->match;
+                       m_mask = m->mask;
                        break;
-               start += PAGE_SIZE;
+               }
        }
 
-       if (start > end)
-               start = end;
+       if (num_node_masks == _nid) {
+               /* We could not find NUMA group, so default to 0, but lets
+                * search for latency group, so we could calculate the correct
+                * end address that we return
+                */
+               _nid = 0;
+
+               for (i = 0; i < num_mlgroups; i++) {
+                       struct mdesc_mlgroup *const m = &mlgroups[i];
+
+                       if ((pa_start & m->mask) == m->match) {
+                               m_match = m->match;
+                               m_mask = m->mask;
+                               break;
+                       }
+               }
+
+               if (i == num_mlgroups) {
+                       WARN_ONCE(1, "memblock_nid_range: Can't find latency group addr[%Lx]",
+                                 start);
+
+                       ret_end = end;
+                       goto done;
+               }
+       }
 
-       return start;
+       /*
+        * Each latency group has match and mask, and each memory block has an
+        * offset.  An address belongs to a latency group if its address matches
+        * the following formula: ((addr + offset) & mask) == match
+        * It is, however, slow to check every single page if it matches a
+        * particular latency group. As optimization we calculate end value by
+        * using bit arithmetics.
+        */
+       m_end = m_match + (1ul << __ffs(m_mask)) - mblock->offset;
+       m_end += pa_start & ~((1ul << fls64(m_mask)) - 1);
+       ret_end = m_end > end ? end : m_end;
+
+done:
+       *nid = _nid;
+       return ret_end;
 }
 #endif
 
@@ -914,7 +1019,8 @@ static void init_node_masks_nonnuma(void)
 
        numadbg("Initializing tables for non-numa.\n");
 
-       node_masks[0].mask = node_masks[0].val = 0;
+       node_masks[0].mask = 0;
+       node_masks[0].match = 0;
        num_node_masks = 1;
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -932,15 +1038,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
 EXPORT_SYMBOL(numa_cpumask_lookup_table);
 EXPORT_SYMBOL(node_data);
 
-struct mdesc_mlgroup {
-       u64     node;
-       u64     latency;
-       u64     match;
-       u64     mask;
-};
-static struct mdesc_mlgroup *mlgroups;
-static int num_mlgroups;
-
 static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
                                   u32 cfg_handle)
 {
@@ -1029,6 +1126,10 @@ int of_node_to_nid(struct device_node *dp)
 static void __init add_node_ranges(void)
 {
        struct memblock_region *reg;
+       unsigned long prev_max;
+
+memblock_resized:
+       prev_max = memblock.memory.max;
 
        for_each_memblock(memory, reg) {
                unsigned long size = reg->size;
@@ -1048,6 +1149,8 @@ static void __init add_node_ranges(void)
 
                        memblock_set_node(start, this_end - start,
                                          &memblock.memory, nid);
+                       if (memblock.memory.max != prev_max)
+                               goto memblock_resized;
                        start = this_end;
                }
        }
@@ -1182,41 +1285,6 @@ int __node_distance(int from, int to)
        return numa_latency[from][to];
 }
 
-static int find_numa_node_for_addr(unsigned long pa,
-                                  struct node_mem_mask *pnode_mask)
-{
-       struct mdesc_handle *md = mdesc_grab();
-       u64 node, arc;
-       int i = 0;
-
-       node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
-       if (node == MDESC_NODE_NULL)
-               goto out;
-
-       mdesc_for_each_node_by_name(md, node, "group") {
-               mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) {
-                       u64 target = mdesc_arc_target(md, arc);
-                       struct mdesc_mlgroup *m = find_mlgroup(target);
-
-                       if (!m)
-                               continue;
-                       if ((pa & m->mask) == m->match) {
-                               if (pnode_mask) {
-                                       pnode_mask->mask = m->mask;
-                                       pnode_mask->val = m->match;
-                               }
-                               mdesc_release(md);
-                               return i;
-                       }
-               }
-               i++;
-       }
-
-out:
-       mdesc_release(md);
-       return -1;
-}
-
 static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
 {
        int i;
@@ -1224,7 +1292,7 @@ static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
        for (i = 0; i < MAX_NUMNODES; i++) {
                struct node_mem_mask *n = &node_masks[i];
 
-               if ((grp->mask == n->mask) && (grp->match == n->val))
+               if ((grp->mask == n->mask) && (grp->match == n->match))
                        break;
        }
        return i;
@@ -1279,10 +1347,10 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
        n = &node_masks[num_node_masks++];
 
        n->mask = candidate->mask;
-       n->val = candidate->match;
+       n->match = candidate->match;
 
-       numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%llx])\n",
-               index, n->mask, n->val, candidate->latency);
+       numadbg("NUMA NODE[%d]: mask[%lx] match[%lx] (latency[%llx])\n",
+               index, n->mask, n->match, candidate->latency);
 
        return 0;
 }
@@ -1379,7 +1447,7 @@ static int __init numa_parse_jbus(void)
                numa_cpu_lookup_table[cpu] = index;
                cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
                node_masks[index].mask = ~((1UL << 36UL) - 1UL);
-               node_masks[index].val = cpu << 36UL;
+               node_masks[index].match = cpu << 36UL;
 
                index++;
        }
index c7f2a5295b3a54599b68e4932c1c68cf2bdab99f..def82f6d626f774772807427c6fe6e67fd343bf1 100644 (file)
@@ -1444,7 +1444,7 @@ static void poke_viking(void)
        srmmu_set_mmureg(mreg);
 }
 
-static struct sparc32_cachetlb_ops viking_ops = {
+static struct sparc32_cachetlb_ops viking_ops __ro_after_init = {
        .cache_all      = viking_flush_cache_all,
        .cache_mm       = viking_flush_cache_mm,
        .cache_page     = viking_flush_cache_page,
@@ -1475,7 +1475,7 @@ static struct sparc32_cachetlb_ops viking_ops = {
  * flushes going at once will require SMP locking anyways so there's
  * no real value in trying any harder than this.
  */
-static struct sparc32_cachetlb_ops viking_sun4d_smp_ops = {
+static struct sparc32_cachetlb_ops viking_sun4d_smp_ops __ro_after_init = {
        .cache_all      = viking_flush_cache_all,
        .cache_mm       = viking_flush_cache_mm,
        .cache_page     = viking_flush_cache_page,
@@ -1759,7 +1759,7 @@ static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
        local_ops->sig_insns(mm, insn_addr);
 }
 
-static struct sparc32_cachetlb_ops smp_cachetlb_ops = {
+static struct sparc32_cachetlb_ops smp_cachetlb_ops __ro_after_init = {
        .cache_all      = smp_flush_cache_all,
        .cache_mm       = smp_flush_cache_mm,
        .cache_page     = smp_flush_cache_page,
index c56a195c90719fc3eb1400ad14e6b3ae27bb8417..afda3bbf78542a0297849d65fe7470a5e73716f1 100644 (file)
@@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void)
 }
 
 static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
-                             bool exec, bool huge)
+                             bool exec, unsigned int hugepage_shift)
 {
        struct tlb_batch *tb = &get_cpu_var(tlb_batch);
        unsigned long nr;
@@ -84,19 +84,19 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
        }
 
        if (!tb->active) {
-               flush_tsb_user_page(mm, vaddr, huge);
+               flush_tsb_user_page(mm, vaddr, hugepage_shift);
                global_flush_tlb_page(mm, vaddr);
                goto out;
        }
 
        if (nr == 0) {
                tb->mm = mm;
-               tb->huge = huge;
+               tb->hugepage_shift = hugepage_shift;
        }
 
-       if (tb->huge != huge) {
+       if (tb->hugepage_shift != hugepage_shift) {
                flush_tlb_pending();
-               tb->huge = huge;
+               tb->hugepage_shift = hugepage_shift;
                nr = 0;
        }
 
@@ -110,10 +110,9 @@ out:
 }
 
 void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
-                  pte_t *ptep, pte_t orig, int fullmm)
+                  pte_t *ptep, pte_t orig, int fullmm,
+                  unsigned int hugepage_shift)
 {
-       bool huge = is_hugetlb_pte(orig);
-
        if (tlb_type != hypervisor &&
            pte_dirty(orig)) {
                unsigned long paddr, pfn = pte_pfn(orig);
@@ -139,7 +138,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
 
 no_cache_flush:
        if (!fullmm)
-               tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge);
+               tlb_batch_add_one(mm, vaddr, pte_exec(orig), hugepage_shift);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
index e20fbbafb0b04af0fa85b21188cd6c851c132e6e..23479c3d39f0221a98c86cf3923a040b3b3be658 100644 (file)
@@ -86,6 +86,33 @@ static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
                __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries);
 }
 
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static void __flush_huge_tsb_one_entry(unsigned long tsb, unsigned long v,
+                                      unsigned long hash_shift,
+                                      unsigned long nentries,
+                                      unsigned int hugepage_shift)
+{
+       unsigned int hpage_entries;
+       unsigned int i;
+
+       hpage_entries = 1 << (hugepage_shift - hash_shift);
+       for (i = 0; i < hpage_entries; i++)
+               __flush_tsb_one_entry(tsb, v + (i << hash_shift), hash_shift,
+                                     nentries);
+}
+
+static void __flush_huge_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
+                                unsigned long tsb, unsigned long nentries,
+                                unsigned int hugepage_shift)
+{
+       unsigned long i;
+
+       for (i = 0; i < tb->tlb_nr; i++)
+               __flush_huge_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift,
+                                          nentries, hugepage_shift);
+}
+#endif
+
 void flush_tsb_user(struct tlb_batch *tb)
 {
        struct mm_struct *mm = tb->mm;
@@ -93,45 +120,61 @@ void flush_tsb_user(struct tlb_batch *tb)
 
        spin_lock_irqsave(&mm->context.lock, flags);
 
-       if (!tb->huge) {
+       if (tb->hugepage_shift < HPAGE_SHIFT) {
                base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
                nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
                        base = __pa(base);
-               __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
+               if (tb->hugepage_shift == PAGE_SHIFT)
+                       __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
+#if defined(CONFIG_HUGETLB_PAGE)
+               else
+                       __flush_huge_tsb_one(tb, PAGE_SHIFT, base, nentries,
+                                            tb->hugepage_shift);
+#endif
        }
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+       else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
                base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
                nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
                        base = __pa(base);
-               __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries);
+               __flush_huge_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries,
+                                    tb->hugepage_shift);
        }
 #endif
        spin_unlock_irqrestore(&mm->context.lock, flags);
 }
 
-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge)
+void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
+                        unsigned int hugepage_shift)
 {
        unsigned long nentries, base, flags;
 
        spin_lock_irqsave(&mm->context.lock, flags);
 
-       if (!huge) {
+       if (hugepage_shift < HPAGE_SHIFT) {
                base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
                nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
                        base = __pa(base);
-               __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
+               if (hugepage_shift == PAGE_SHIFT)
+                       __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT,
+                                             nentries);
+#if defined(CONFIG_HUGETLB_PAGE)
+               else
+                       __flush_huge_tsb_one_entry(base, vaddr, PAGE_SHIFT,
+                                                  nentries, hugepage_shift);
+#endif
        }
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+       else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
                base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
                nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
                        base = __pa(base);
-               __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries);
+               __flush_huge_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT,
+                                          nentries, hugepage_shift);
        }
 #endif
        spin_unlock_irqrestore(&mm->context.lock, flags);
index 6ab8bf146d4c72cb830cc0fb263fd164a2ccec79..1cf45422a0dff1262be2ff1f8e15179eede80495 100644 (file)
@@ -17,9 +17,6 @@
 #define _ASM_TILE_DEVICE_H
 
 struct dev_archdata {
-       /* DMA operations on that device */
-        struct dma_map_ops     *dma_ops;
-
        /* Offset of the DMA address from the PA. */
        dma_addr_t              dma_offset;
 
index 01ceb4a895b09b21167fd269ee8874a5d87a00ce..bbc71a29b2c6a860abf9d7a9d81a6ad75a57f7a1 100644 (file)
 #define ARCH_HAS_DMA_GET_REQUIRED_MASK
 #endif
 
-extern struct dma_map_ops *tile_dma_map_ops;
-extern struct dma_map_ops *gx_pci_dma_map_ops;
-extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
-extern struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
+extern const struct dma_map_ops *tile_dma_map_ops;
+extern const struct dma_map_ops *gx_pci_dma_map_ops;
+extern const struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+extern const struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-       if (dev && dev->archdata.dma_ops)
-               return dev->archdata.dma_ops;
-       else
-               return tile_dma_map_ops;
+       return tile_dma_map_ops;
 }
 
 static inline dma_addr_t get_dma_offset(struct device *dev)
@@ -59,11 +56,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 
 static inline void dma_mark_clean(void *addr, size_t size) {}
 
-static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
-{
-       dev->archdata.dma_ops = ops;
-}
-
 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
        if (!dev->dma_mask)
index d8f9a83943b137b57dd32cef2dbb60b5e2f39f63..4a8b1cadca249d0599ce8b563f052d7be4377139 100644 (file)
 #ifndef _ASM_TILE_KPROBES_H
 #define _ASM_TILE_KPROBES_H
 
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
+
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
-
 #include <arch/opcode.h>
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
@@ -76,4 +79,5 @@ void arch_remove_kprobe(struct kprobe *);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
                             unsigned long val, void *data);
 
+#endif /* CONFIG_KPROBES */
 #endif /* _ASM_TILE_KPROBES_H */
index 24e0f8c21f2f4b9734f027a06a688df76a9effe7..569bb6dd154a4ab2a6b933b8a2c15cb144df1c88 100644 (file)
@@ -329,7 +329,7 @@ tile_dma_supported(struct device *dev, u64 mask)
        return 1;
 }
 
-static struct dma_map_ops tile_default_dma_map_ops = {
+static const struct dma_map_ops tile_default_dma_map_ops = {
        .alloc = tile_dma_alloc_coherent,
        .free = tile_dma_free_coherent,
        .map_page = tile_dma_map_page,
@@ -344,7 +344,7 @@ static struct dma_map_ops tile_default_dma_map_ops = {
        .dma_supported = tile_dma_supported
 };
 
-struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
+const struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
 EXPORT_SYMBOL(tile_dma_map_ops);
 
 /* Generic PCI DMA mapping functions */
@@ -516,7 +516,7 @@ tile_pci_dma_supported(struct device *dev, u64 mask)
        return 1;
 }
 
-static struct dma_map_ops tile_pci_default_dma_map_ops = {
+static const struct dma_map_ops tile_pci_default_dma_map_ops = {
        .alloc = tile_pci_dma_alloc_coherent,
        .free = tile_pci_dma_free_coherent,
        .map_page = tile_pci_dma_map_page,
@@ -531,7 +531,7 @@ static struct dma_map_ops tile_pci_default_dma_map_ops = {
        .dma_supported = tile_pci_dma_supported
 };
 
-struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
+const struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
 EXPORT_SYMBOL(gx_pci_dma_map_ops);
 
 /* PCI DMA mapping functions for legacy PCI devices */
@@ -552,7 +552,7 @@ static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
        swiotlb_free_coherent(dev, size, vaddr, dma_addr);
 }
 
-static struct dma_map_ops pci_swiotlb_dma_ops = {
+static const struct dma_map_ops pci_swiotlb_dma_ops = {
        .alloc = tile_swiotlb_alloc_coherent,
        .free = tile_swiotlb_free_coherent,
        .map_page = swiotlb_map_page,
@@ -567,7 +567,7 @@ static struct dma_map_ops pci_swiotlb_dma_ops = {
        .mapping_error = swiotlb_dma_mapping_error,
 };
 
-static struct dma_map_ops pci_hybrid_dma_ops = {
+static const struct dma_map_ops pci_hybrid_dma_ops = {
        .alloc = tile_swiotlb_alloc_coherent,
        .free = tile_swiotlb_free_coherent,
        .map_page = tile_pci_dma_map_page,
@@ -582,18 +582,18 @@ static struct dma_map_ops pci_hybrid_dma_ops = {
        .dma_supported = tile_pci_dma_supported
 };
 
-struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
-struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops;
+const struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
+const struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops;
 #else
-struct dma_map_ops *gx_legacy_pci_dma_map_ops;
-struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
+const struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+const struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
 #endif
 EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
 EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops);
 
 int dma_set_mask(struct device *dev, u64 mask)
 {
-       struct dma_map_ops *dma_ops = get_dma_ops(dev);
+       const struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
        /*
         * For PCI devices with 64-bit DMA addressing capability, promote
@@ -623,7 +623,7 @@ EXPORT_SYMBOL(dma_set_mask);
 #ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
 int dma_set_coherent_mask(struct device *dev, u64 mask)
 {
-       struct dma_map_ops *dma_ops = get_dma_ops(dev);
+       const struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
        /*
         * For PCI devices with 64-bit DMA addressing capability, promote
index 6c0abaacec335be522041bd4634dc0561f708375..53ce940a50169ab73b3be242156fb1b3923b68ac 100644 (file)
@@ -160,7 +160,7 @@ static void start_secondary(void)
        __this_cpu_write(current_asid, min_asid);
 
        /* Set up this thread as another owner of the init_mm */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        current->active_mm = &init_mm;
        if (current->mm)
                BUG();
index 6225cc998db1308b81a4c3abc1bd2edb0ebb2347..88990182440026d06c6d69634eb2cbd12788e6e2 100644 (file)
@@ -143,7 +143,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
                unsigned long addr = MEM_USER_INTRPT;
                addr = mmap_region(NULL, addr, INTRPT_SIZE,
                                   VM_READ|VM_EXEC|
-                                  VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0);
+                                  VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0, NULL);
                if (addr > (unsigned long) -PAGE_SIZE)
                        retval = (int) addr;
        }
index 90c281cd7e1dbcf18c46c6b68f65bead984352fc..e9d42aab76dcbd1dcc08bcba6965b87705393110 100644 (file)
@@ -25,3 +25,4 @@ generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 5d51ade89f4c5f8ca1875dbf3505853f007e8109..84205fe1cd790805ef0f347591f46fa9990f0a34 100644 (file)
@@ -63,3 +63,4 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index 4749854afd03544481207d54569f47c5368edc60..518ba5848dd6c921dec86e84942edeb687c57f0a 100644 (file)
@@ -21,9 +21,9 @@
 #include <asm/memory.h>
 #include <asm/cacheflush.h>
 
-extern struct dma_map_ops swiotlb_dma_map_ops;
+extern const struct dma_map_ops swiotlb_dma_map_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
        return &swiotlb_dma_map_ops;
 }
index 3e9f6489ba380a72e575e53118e914d3077369e4..525413d6690eabbf96f93a5db1c84a0c2b827f6e 100644 (file)
@@ -31,7 +31,7 @@ static void unicore_swiotlb_free_coherent(struct device *dev, size_t size,
        swiotlb_free_coherent(dev, size, vaddr, dma_addr);
 }
 
-struct dma_map_ops swiotlb_dma_map_ops = {
+const struct dma_map_ops swiotlb_dma_map_ops = {
        .alloc = unicore_swiotlb_alloc_coherent,
        .free = unicore_swiotlb_free_coherent,
        .map_sg = swiotlb_map_sg_attrs,
index 874c1238dffd8e58e007169e822e8be522b7dcae..cc98d5a294eee25c56209d92e38bdb08f379780d 100644 (file)
@@ -109,6 +109,7 @@ config X86
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
        select HAVE_ARCH_VMAP_STACK             if X86_64
        select HAVE_ARCH_WITHIN_STACK_FRAMES
        select HAVE_CC_STACKPROTECTOR
@@ -2786,10 +2787,6 @@ config X86_DMA_REMAP
        bool
        depends on STA2X11
 
-config PMC_ATOM
-       def_bool y
-        depends on PCI
-
 source "net/Kconfig"
 
 source "drivers/Kconfig"
index c4cba00dbdee5ec48af1fff44adbe0f7e6582c26..63c1d13aaf9f99934c4e02a7ca6eb8ced4389056 100644 (file)
@@ -74,14 +74,6 @@ config EFI_PGT_DUMP
          issues with the mapping of the EFI runtime regions into that
          table.
 
-config DEBUG_RODATA_TEST
-       bool "Testcase for the marking rodata read-only"
-       default y
-       ---help---
-         This option enables a testcase for the setting rodata read-only
-         as well as for the change_page_attr() infrastructure.
-         If in doubt, say "N"
-
 config DEBUG_WX
        bool "Warn on W+X mappings at boot"
        select X86_PTDUMP_CORE
index 10820f6cefbf020737d1729fb92a7217fe0827fe..572cee3fccffc59881cbaaa93eb7908836bc70bf 100644 (file)
@@ -186,7 +186,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
 
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
-               do_munmap(mm, text_start, image->size);
+               do_munmap(mm, text_start, image->size, NULL);
        } else {
                current->mm->context.vdso = (void __user *)text_start;
                current->mm->context.vdso_image = image;
index 872877d930de5018b16f3029ba780cbd1058e2e2..e7e1942edff7360bb36be03bd0f097c2f04e34e7 100644 (file)
@@ -90,18 +90,8 @@ void clflush_cache_range(void *addr, unsigned int size);
 
 #define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
 
-extern const int rodata_test_data;
 extern int kernel_set_to_readonly;
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
 
-#ifdef CONFIG_DEBUG_RODATA_TEST
-int rodata_test(void);
-#else
-static inline int rodata_test(void)
-{
-       return 0;
-}
-#endif
-
 #endif /* _ASM_X86_CACHEFLUSH_H */
index eb5deb42484d5e283adeaa191c004d1566f11a7f..49265345d4d223ffcf15e03ba2843ddaabe82668 100644 (file)
@@ -15,7 +15,7 @@
  * FIXME: Accessing the desc_struct through its fields is more elegant,
  * and should be the one valid thing to do. However, a lot of open code
  * still touches the a and b accessors, and doing this allow us to do it
- * incrementally. We keep the signature as a struct, rather than an union,
+ * incrementally. We keep the signature as a struct, rather than a union,
  * so we can get rid of it transparently in the future -- glommer
  */
 /* 8 byte segment descriptor */
index 684ed6c3aa679d15dac80a48cd832ecf721b52dd..1b3ef26e77df2b264542f23a2ac38ceb561cc39f 100644 (file)
@@ -2,9 +2,6 @@
 #define _ASM_X86_DEVICE_H
 
 struct dev_archdata {
-#ifdef CONFIG_X86_DEV_DMA_OPS
-       struct dma_map_ops *dma_ops;
-#endif
 #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
        void *iommu; /* hook for IOMMU specific extension */
 #endif
@@ -13,7 +10,7 @@ struct dev_archdata {
 #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
 struct dma_domain {
        struct list_head node;
-       struct dma_map_ops *dma_ops;
+       const struct dma_map_ops *dma_ops;
        int domain_nr;
 };
 void add_dma_domain(struct dma_domain *domain);
index 44461626830e4be5f04b8074fd7b3c0e8bc612c6..08a0838b83fb33ccbf04abce495c469e95ce4253 100644 (file)
@@ -25,18 +25,11 @@ extern int iommu_merge;
 extern struct device x86_dma_fallback_dev;
 extern int panic_on_overflow;
 
-extern struct dma_map_ops *dma_ops;
+extern const struct dma_map_ops *dma_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-#ifndef CONFIG_X86_DEV_DMA_OPS
        return dma_ops;
-#else
-       if (unlikely(!dev) || !dev->archdata.dma_ops)
-               return dma_ops;
-       else
-               return dev->archdata.dma_ops;
-#endif
 }
 
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
index cd0310e186f4af534b7e21312b81d092ff9ba8cd..4291b6a5ddf795ecee1da4aec4dd091d1b5fce48 100644 (file)
@@ -30,6 +30,7 @@ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
                u32 *out, u32 outlen, u32 dptr, u32 sptr);
 int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
                u32 *out, u32 outlen);
+int intel_pmc_s0ix_counter_read(u64 *data);
 
 #else
 
@@ -50,6 +51,11 @@ static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
        return -EINVAL;
 }
 
+static inline int intel_pmc_s0ix_counter_read(u64 *data)
+{
+       return -EINVAL;
+}
+
 #endif /*CONFIG_INTEL_PMC_IPC*/
 
 #endif
index 345c99cef15262dda6415b5eff3d2140c37bafcc..793869879464e0ca8fa872cae092e1d39cd15118 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_IOMMU_H
 #define _ASM_X86_IOMMU_H
 
-extern struct dma_map_ops nommu_dma_ops;
+extern const struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 extern int iommu_pass_through;
index d1d1e5094c2844d31ff7d2028c950bd42e396312..200581691c6e3b98b7540681ea9866456900142d 100644 (file)
  *
  * See arch/x86/kernel/kprobes.c for x86 kprobes history.
  */
+
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0xcc
+
+#ifdef CONFIG_KPROBES
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
@@ -32,7 +38,6 @@ struct pt_regs;
 struct kprobe;
 
 typedef u8 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xcc
 #define RELATIVEJUMP_OPCODE 0xe9
 #define RELATIVEJUMP_SIZE 5
 #define RELATIVECALL_OPCODE 0xe8
@@ -116,4 +121,6 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
                                    unsigned long val, void *data);
 extern int kprobe_int3_handler(struct pt_regs *regs);
 extern int kprobe_debug_handler(struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
 #endif /* _ASM_X86_KPROBES_H */
index f75fbfe550f20561e40d578a782618d67fafada3..0489884fdc440c14e68a1b961987163234cb1525 100644 (file)
@@ -475,6 +475,17 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                            native_pmd_val(pmd));
 }
 
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+                             pud_t *pudp, pud_t pud)
+{
+       if (sizeof(pudval_t) > sizeof(long))
+               /* 5 arg words */
+               pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
+       else
+               PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
+                           native_pud_val(pud));
+}
+
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
        pmdval_t val = native_pmd_val(pmd);
index bb2de45a60f296b2dc44bf0681c35e24f05e8475..b060f962d581684912a08feefd8354db7b943d2b 100644 (file)
@@ -249,6 +249,8 @@ struct pv_mmu_ops {
        void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
        void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
                           pmd_t *pmdp, pmd_t pmdval);
+       void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
+                          pud_t *pudp, pud_t pudval);
        void (*pte_update)(struct mm_struct *mm, unsigned long addr,
                           pte_t *ptep);
 
index fd74a11959de0db5f4c6026f522decc3f40ea7e0..a8b96e708c2b13053f17096c8a2b0628c7bf8050 100644 (file)
@@ -21,6 +21,10 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
        *pmdp = pmd;
 }
 
+static inline void native_set_pud(pud_t *pudp, pud_t pud)
+{
+}
+
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
        native_set_pte(ptep, pte);
@@ -31,6 +35,10 @@ static inline void native_pmd_clear(pmd_t *pmdp)
        native_set_pmd(pmdp, __pmd(0));
 }
 
+static inline void native_pud_clear(pud_t *pudp)
+{
+}
+
 static inline void native_pte_clear(struct mm_struct *mm,
                                    unsigned long addr, pte_t *xp)
 {
@@ -55,6 +63,15 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
 #endif
 
+#ifdef CONFIG_SMP
+static inline pud_t native_pudp_get_and_clear(pud_t *xp)
+{
+       return __pud(xchg((pudval_t *)xp, 0));
+}
+#else
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
+#endif
+
 /* Bit manipulation helper on pte/pgoff entry */
 static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift,
                                      unsigned long mask, unsigned int leftshift)
index cdaa58c9b39ed3a31054686f68f934e488990e2f..72277b1028a5f54551962555fa56bfd5aebab15c 100644 (file)
@@ -121,6 +121,13 @@ static inline void native_pmd_clear(pmd_t *pmd)
        *(tmp + 1) = 0;
 }
 
+#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
+               defined(CONFIG_PARAVIRT))
+static inline void native_pud_clear(pud_t *pudp)
+{
+}
+#endif
+
 static inline void pud_clear(pud_t *pudp)
 {
        set_pud(pudp, __pud(0));
@@ -176,6 +183,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
 #endif
 
+#ifdef CONFIG_SMP
+union split_pud {
+       struct {
+               u32 pud_low;
+               u32 pud_high;
+       };
+       pud_t pud;
+};
+
+static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
+{
+       union split_pud res, *orig = (union split_pud *)pudp;
+
+       /* xchg acts as a barrier before setting of the high bits */
+       res.pud_low = xchg(&orig->pud_low, 0);
+       res.pud_high = orig->pud_high;
+       orig->pud_high = 0;
+
+       return res.pud;
+}
+#else
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
+#endif
+
 /* Encode and de-code a swap entry */
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
 #define __swp_type(x)                  (((x).val) & 0x1f)
index 437feb436efa666dbe13732c7d4269160fed49e3..1cfb36b8c024ab07b8334121fc56ac79f2a35371 100644 (file)
@@ -46,6 +46,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 #define set_pte(ptep, pte)             native_set_pte(ptep, pte)
 #define set_pte_at(mm, addr, ptep, pte)        native_set_pte_at(mm, addr, ptep, pte)
 #define set_pmd_at(mm, addr, pmdp, pmd)        native_set_pmd_at(mm, addr, pmdp, pmd)
+#define set_pud_at(mm, addr, pudp, pud)        native_set_pud_at(mm, addr, pudp, pud)
 
 #define set_pte_atomic(ptep, pte)                                      \
        native_set_pte_atomic(ptep, pte)
@@ -128,6 +129,16 @@ static inline int pmd_young(pmd_t pmd)
        return pmd_flags(pmd) & _PAGE_ACCESSED;
 }
 
+static inline int pud_dirty(pud_t pud)
+{
+       return pud_flags(pud) & _PAGE_DIRTY;
+}
+
+static inline int pud_young(pud_t pud)
+{
+       return pud_flags(pud) & _PAGE_ACCESSED;
+}
+
 static inline int pte_write(pte_t pte)
 {
        return pte_flags(pte) & _PAGE_RW;
@@ -181,6 +192,13 @@ static inline int pmd_trans_huge(pmd_t pmd)
        return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
 }
 
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline int pud_trans_huge(pud_t pud)
+{
+       return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+}
+#endif
+
 #define has_transparent_hugepage has_transparent_hugepage
 static inline int has_transparent_hugepage(void)
 {
@@ -192,6 +210,18 @@ static inline int pmd_devmap(pmd_t pmd)
 {
        return !!(pmd_val(pmd) & _PAGE_DEVMAP);
 }
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline int pud_devmap(pud_t pud)
+{
+       return !!(pud_val(pud) & _PAGE_DEVMAP);
+}
+#else
+static inline int pud_devmap(pud_t pud)
+{
+       return 0;
+}
+#endif
 #endif
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -333,6 +363,65 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
        return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
 }
 
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+       pudval_t v = native_pud_val(pud);
+
+       return __pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+       pudval_t v = native_pud_val(pud);
+
+       return __pud(v & ~clear);
+}
+
+static inline pud_t pud_mkold(pud_t pud)
+{
+       return pud_clear_flags(pud, _PAGE_ACCESSED);
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+       return pud_clear_flags(pud, _PAGE_DIRTY);
+}
+
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+       return pud_clear_flags(pud, _PAGE_RW);
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+       return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+}
+
+static inline pud_t pud_mkdevmap(pud_t pud)
+{
+       return pud_set_flags(pud, _PAGE_DEVMAP);
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+       return pud_set_flags(pud, _PAGE_PSE);
+}
+
+static inline pud_t pud_mkyoung(pud_t pud)
+{
+       return pud_set_flags(pud, _PAGE_ACCESSED);
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+       return pud_set_flags(pud, _PAGE_RW);
+}
+
+static inline pud_t pud_mknotpresent(pud_t pud)
+{
+       return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE);
+}
+
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 static inline int pte_soft_dirty(pte_t pte)
 {
@@ -344,6 +433,11 @@ static inline int pmd_soft_dirty(pmd_t pmd)
        return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
 }
 
+static inline int pud_soft_dirty(pud_t pud)
+{
+       return pud_flags(pud) & _PAGE_SOFT_DIRTY;
+}
+
 static inline pte_t pte_mksoft_dirty(pte_t pte)
 {
        return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
@@ -354,6 +448,11 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
        return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
 }
 
+static inline pud_t pud_mksoft_dirty(pud_t pud)
+{
+       return pud_set_flags(pud, _PAGE_SOFT_DIRTY);
+}
+
 static inline pte_t pte_clear_soft_dirty(pte_t pte)
 {
        return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
@@ -364,6 +463,11 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
        return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
 }
 
+static inline pud_t pud_clear_soft_dirty(pud_t pud)
+{
+       return pud_clear_flags(pud, _PAGE_SOFT_DIRTY);
+}
+
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
 /*
@@ -392,6 +496,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
                     massage_pgprot(pgprot));
 }
 
+static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+{
+       return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
+                    massage_pgprot(pgprot));
+}
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
        pteval_t val = pte_val(pte);
@@ -771,6 +881,14 @@ static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
        return res;
 }
 
+static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
+{
+       pud_t res = *pudp;
+
+       native_pud_clear(pudp);
+       return res;
+}
+
 static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
                                     pte_t *ptep , pte_t pte)
 {
@@ -783,6 +901,12 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
        native_set_pmd(pmdp, pmd);
 }
 
+static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
+                                    pud_t *pudp, pud_t pud)
+{
+       native_set_pud(pudp, pud);
+}
+
 #ifndef CONFIG_PARAVIRT
 /*
  * Rules for using pte_update - it must be called after any PTE update which
@@ -861,10 +985,15 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pmd_t *pmdp,
                                 pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+                                unsigned long address, pud_t *pudp,
+                                pud_t entry, int dirty);
 
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
                                     unsigned long addr, pmd_t *pmdp);
+extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
+                                    unsigned long addr, pud_t *pudp);
 
 #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
 extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
@@ -884,6 +1013,13 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long
        return native_pmdp_get_and_clear(pmdp);
 }
 
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+                                       unsigned long addr, pud_t *pudp)
+{
+       return native_pudp_get_and_clear(pudp);
+}
+
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
                                      unsigned long addr, pmd_t *pmdp)
@@ -932,6 +1068,10 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
                unsigned long addr, pmd_t *pmd)
 {
 }
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+               unsigned long addr, pud_t *pud)
+{
+}
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
index 62b775926045edb68aa935d7142067e2c00a03bd..73c7ccc389122d074399e9fc776ec33ab5332baf 100644 (file)
@@ -106,6 +106,21 @@ static inline void native_pud_clear(pud_t *pud)
        native_set_pud(pud, native_make_pud(0));
 }
 
+static inline pud_t native_pudp_get_and_clear(pud_t *xp)
+{
+#ifdef CONFIG_SMP
+       return native_make_pud(xchg(&xp->pud, 0));
+#else
+       /* native_local_pudp_get_and_clear,
+        * but duplicated because of cyclic dependency
+        */
+       pud_t ret = *xp;
+
+       native_pud_clear(xp);
+       return ret;
+#endif
+}
+
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
        *pgdp = pgd;
diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h
deleted file mode 100644 (file)
index aa8744c..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Intel Atom SOC Power Management Controller Header File
- * Copyright (c) 2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- */
-
-#ifndef PMC_ATOM_H
-#define PMC_ATOM_H
-
-/* ValleyView Power Control Unit PCI Device ID */
-#define        PCI_DEVICE_ID_VLV_PMC   0x0F1C
-/* CherryTrail Power Control Unit PCI Device ID */
-#define        PCI_DEVICE_ID_CHT_PMC   0x229C
-
-/* PMC Memory mapped IO registers */
-#define        PMC_BASE_ADDR_OFFSET    0x44
-#define        PMC_BASE_ADDR_MASK      0xFFFFFE00
-#define        PMC_MMIO_REG_LEN        0x100
-#define        PMC_REG_BIT_WIDTH       32
-
-/* BIOS uses FUNC_DIS to disable specific function */
-#define        PMC_FUNC_DIS            0x34
-#define        PMC_FUNC_DIS_2          0x38
-
-/* CHT specific bits in FUNC_DIS2 register */
-#define        BIT_FD_GMM              BIT(3)
-#define        BIT_FD_ISH              BIT(4)
-
-/* S0ix wake event control */
-#define        PMC_S0IX_WAKE_EN        0x3C
-
-#define        BIT_LPC_CLOCK_RUN               BIT(4)
-#define        BIT_SHARED_IRQ_GPSC             BIT(5)
-#define        BIT_ORED_DEDICATED_IRQ_GPSS     BIT(18)
-#define        BIT_ORED_DEDICATED_IRQ_GPSC     BIT(19)
-#define        BIT_SHARED_IRQ_GPSS             BIT(20)
-
-#define        PMC_WAKE_EN_SETTING     ~(BIT_LPC_CLOCK_RUN | \
-                               BIT_SHARED_IRQ_GPSC | \
-                               BIT_ORED_DEDICATED_IRQ_GPSS | \
-                               BIT_ORED_DEDICATED_IRQ_GPSC | \
-                               BIT_SHARED_IRQ_GPSS)
-
-/* The timers acumulate time spent in sleep state */
-#define        PMC_S0IR_TMR            0x80
-#define        PMC_S0I1_TMR            0x84
-#define        PMC_S0I2_TMR            0x88
-#define        PMC_S0I3_TMR            0x8C
-#define        PMC_S0_TMR              0x90
-/* Sleep state counter is in units of of 32us */
-#define        PMC_TMR_SHIFT           5
-
-/* Power status of power islands */
-#define        PMC_PSS                 0x98
-
-#define PMC_PSS_BIT_GBE                        BIT(0)
-#define PMC_PSS_BIT_SATA               BIT(1)
-#define PMC_PSS_BIT_HDA                        BIT(2)
-#define PMC_PSS_BIT_SEC                        BIT(3)
-#define PMC_PSS_BIT_PCIE               BIT(4)
-#define PMC_PSS_BIT_LPSS               BIT(5)
-#define PMC_PSS_BIT_LPE                        BIT(6)
-#define PMC_PSS_BIT_DFX                        BIT(7)
-#define PMC_PSS_BIT_USH_CTRL           BIT(8)
-#define PMC_PSS_BIT_USH_SUS            BIT(9)
-#define PMC_PSS_BIT_USH_VCCS           BIT(10)
-#define PMC_PSS_BIT_USH_VCCA           BIT(11)
-#define PMC_PSS_BIT_OTG_CTRL           BIT(12)
-#define PMC_PSS_BIT_OTG_VCCS           BIT(13)
-#define PMC_PSS_BIT_OTG_VCCA_CLK       BIT(14)
-#define PMC_PSS_BIT_OTG_VCCA           BIT(15)
-#define PMC_PSS_BIT_USB                        BIT(16)
-#define PMC_PSS_BIT_USB_SUS            BIT(17)
-
-/* CHT specific bits in PSS register */
-#define        PMC_PSS_BIT_CHT_UFS             BIT(7)
-#define        PMC_PSS_BIT_CHT_UXD             BIT(11)
-#define        PMC_PSS_BIT_CHT_UXD_FD          BIT(12)
-#define        PMC_PSS_BIT_CHT_UX_ENG          BIT(15)
-#define        PMC_PSS_BIT_CHT_USB_SUS         BIT(16)
-#define        PMC_PSS_BIT_CHT_GMM             BIT(17)
-#define        PMC_PSS_BIT_CHT_ISH             BIT(18)
-#define        PMC_PSS_BIT_CHT_DFX_MASTER      BIT(26)
-#define        PMC_PSS_BIT_CHT_DFX_CLUSTER1    BIT(27)
-#define        PMC_PSS_BIT_CHT_DFX_CLUSTER2    BIT(28)
-#define        PMC_PSS_BIT_CHT_DFX_CLUSTER3    BIT(29)
-#define        PMC_PSS_BIT_CHT_DFX_CLUSTER4    BIT(30)
-#define        PMC_PSS_BIT_CHT_DFX_CLUSTER5    BIT(31)
-
-/* These registers reflect D3 status of functions */
-#define        PMC_D3_STS_0            0xA0
-
-#define        BIT_LPSS1_F0_DMA        BIT(0)
-#define        BIT_LPSS1_F1_PWM1       BIT(1)
-#define        BIT_LPSS1_F2_PWM2       BIT(2)
-#define        BIT_LPSS1_F3_HSUART1    BIT(3)
-#define        BIT_LPSS1_F4_HSUART2    BIT(4)
-#define        BIT_LPSS1_F5_SPI        BIT(5)
-#define        BIT_LPSS1_F6_XXX        BIT(6)
-#define        BIT_LPSS1_F7_XXX        BIT(7)
-#define        BIT_SCC_EMMC            BIT(8)
-#define        BIT_SCC_SDIO            BIT(9)
-#define        BIT_SCC_SDCARD          BIT(10)
-#define        BIT_SCC_MIPI            BIT(11)
-#define        BIT_HDA                 BIT(12)
-#define        BIT_LPE                 BIT(13)
-#define        BIT_OTG                 BIT(14)
-#define        BIT_USH                 BIT(15)
-#define        BIT_GBE                 BIT(16)
-#define        BIT_SATA                BIT(17)
-#define        BIT_USB_EHCI            BIT(18)
-#define        BIT_SEC                 BIT(19)
-#define        BIT_PCIE_PORT0          BIT(20)
-#define        BIT_PCIE_PORT1          BIT(21)
-#define        BIT_PCIE_PORT2          BIT(22)
-#define        BIT_PCIE_PORT3          BIT(23)
-#define        BIT_LPSS2_F0_DMA        BIT(24)
-#define        BIT_LPSS2_F1_I2C1       BIT(25)
-#define        BIT_LPSS2_F2_I2C2       BIT(26)
-#define        BIT_LPSS2_F3_I2C3       BIT(27)
-#define        BIT_LPSS2_F4_I2C4       BIT(28)
-#define        BIT_LPSS2_F5_I2C5       BIT(29)
-#define        BIT_LPSS2_F6_I2C6       BIT(30)
-#define        BIT_LPSS2_F7_I2C7       BIT(31)
-
-#define        PMC_D3_STS_1            0xA4
-#define        BIT_SMB                 BIT(0)
-#define        BIT_OTG_SS_PHY          BIT(1)
-#define        BIT_USH_SS_PHY          BIT(2)
-#define        BIT_DFX                 BIT(3)
-
-/* CHT specific bits in PMC_D3_STS_1 register */
-#define        BIT_STS_GMM             BIT(1)
-#define        BIT_STS_ISH             BIT(2)
-
-/* PMC I/O Registers */
-#define        ACPI_BASE_ADDR_OFFSET   0x40
-#define        ACPI_BASE_ADDR_MASK     0xFFFFFE00
-#define        ACPI_MMIO_REG_LEN       0x100
-
-#define        PM1_CNT                 0x4
-#define        SLEEP_TYPE_MASK         0xFFFFECFF
-#define        SLEEP_TYPE_S5           0x1C00
-#define        SLEEP_ENABLE            0x2000
-
-extern int pmc_atom_read(int offset, u32 *value);
-extern int pmc_atom_write(int offset, u32 value);
-
-#endif /* PMC_ATOM_H */
index bdcdb3b3a219308544c0e669ca7dd9831f3f5146..84c00592d3598a2dc851202893bcc3e62cf8e669 100644 (file)
@@ -100,7 +100,6 @@ obj-$(CONFIG_HPET_TIMER)    += hpet.o
 obj-$(CONFIG_APB_TIMER)                += apb_timer.o
 
 obj-$(CONFIG_AMD_NB)           += amd_nb.o
-obj-$(CONFIG_DEBUG_RODATA_TEST)        += test_rodata.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)                += kvm.o kvmclock.o
index 63ff468a7986659b1c082165860065fab16391b5..82dfe32faaf41b17ea92dd21b72b1783b0f887b1 100644 (file)
@@ -695,7 +695,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
        return -1;
 }
 
-static struct dma_map_ops gart_dma_ops = {
+static const struct dma_map_ops gart_dma_ops = {
        .map_sg                         = gart_map_sg,
        .unmap_sg                       = gart_unmap_sg,
        .map_page                       = gart_map_page,
index 8567c851172c78535e5d6b2fee9c96003155169d..4261b3282ad99dd87799683e33b2945bcfb20746 100644 (file)
@@ -1865,14 +1865,14 @@ static void __smp_spurious_interrupt(u8 vector)
                "should never happen.\n", vector, smp_processor_id());
 }
 
-__visible void smp_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
 {
        entering_irq();
        __smp_spurious_interrupt(~regs->orig_ax);
        exiting_irq();
 }
 
-__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
        u8 vector = ~regs->orig_ax;
 
@@ -1923,14 +1923,14 @@ static void __smp_error_interrupt(struct pt_regs *regs)
 
 }
 
-__visible void smp_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
 {
        entering_irq();
        __smp_error_interrupt(regs);
        exiting_irq();
 }
 
-__visible void smp_trace_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
 {
        entering_irq();
        trace_error_apic_entry(ERROR_APIC_VECTOR);
index 5d30c5e42bb13939b9164ac575d9202a26d772a7..f3557a1eb562fbe6e46b2e3db3289ca8535b1b6c 100644 (file)
@@ -559,7 +559,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
                __send_cleanup_vector(data);
 }
 
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 {
        unsigned vector, me;
 
index f07005e6f4616f3b2504d59e8cbfff9efca1b127..c64ca5929cb5e00e6c7b694f1663ea0b338d5447 100644 (file)
@@ -1510,7 +1510,7 @@ void cpu_init(void)
        for (i = 0; i <= IO_BITMAP_LONGS; i++)
                t->io_bitmap[i] = ~0UL;
 
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        me->active_mm = &init_mm;
        BUG_ON(me->mm);
        enter_lazy_tlb(&init_mm, me);
@@ -1561,7 +1561,7 @@ void cpu_init(void)
        /*
         * Set up and load the per-CPU TSS and LDT
         */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        curr->active_mm = &init_mm;
        BUG_ON(curr->mm);
        enter_lazy_tlb(&init_mm, curr);
index 9e5427df3243430a752e4f1425d906de45a458e7..524cc5780a779630d3203d834b0a508097340c67 100644 (file)
@@ -816,14 +816,14 @@ static inline void __smp_deferred_error_interrupt(void)
        deferred_error_int_vector();
 }
 
-asmlinkage __visible void smp_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
 {
        entering_irq();
        __smp_deferred_error_interrupt();
        exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
 {
        entering_irq();
        trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
index 85469f84c9214027aab98273ac952a94291173b1..d7cc190ae45719bf84d721e781dc7fde00c85e14 100644 (file)
@@ -396,14 +396,16 @@ static inline void __smp_thermal_interrupt(void)
        smp_thermal_vector();
 }
 
-asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_thermal_interrupt(struct pt_regs *regs)
 {
        entering_irq();
        __smp_thermal_interrupt();
        exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_trace_thermal_interrupt(struct pt_regs *regs)
 {
        entering_irq();
        trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
index 9beb092d68a514b7572ae7c906084a80411dbef8..bb0e75eed10a10a1b19daf82eeef12aefd8be2db 100644 (file)
@@ -23,14 +23,14 @@ static inline void __smp_threshold_interrupt(void)
        mce_threshold_vector();
 }
 
-asmlinkage __visible void smp_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
 {
        entering_irq();
        __smp_threshold_interrupt();
        exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
 {
        entering_irq();
        trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
index 7c6e9ffe4424d9d4cc8c88b3ed1166dd299409e1..4d8183b5f11323789f133de97bd32a6c8c403524 100644 (file)
@@ -264,7 +264,7 @@ void __smp_x86_platform_ipi(void)
                x86_platform_ipi_callback();
 }
 
-__visible void smp_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -315,7 +315,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
 }
 #endif
 
-__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
 
index 3512ba607361403e587f417cbce2775cdec428a1..275487872be2b35e7c6d01335fb08170b2cf7fa0 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/hardirq.h>
 #include <asm/apic.h>
 #include <asm/trace/irq_vectors.h>
+#include <linux/interrupt.h>
 
 static inline void __smp_irq_work_interrupt(void)
 {
@@ -16,14 +17,14 @@ static inline void __smp_irq_work_interrupt(void)
        irq_work_run();
 }
 
-__visible void smp_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
 {
        ipi_entering_ack_irq();
        __smp_irq_work_interrupt();
        exiting_irq();
 }
 
-__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
 {
        ipi_entering_ack_irq();
        trace_irq_work_entry(IRQ_WORK_VECTOR);
index a1bfba0f7234d3c0f9fe549a61d8bac5198ef1d4..4797e87b0fb6a6a0b27817f7342bd6f65dcc15c2 100644 (file)
@@ -425,6 +425,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
        .pmd_clear = native_pmd_clear,
 #endif
        .set_pud = native_set_pud,
+       .set_pud_at = native_set_pud_at,
 
        .pmd_val = PTE_IDENT,
        .make_pmd = PTE_IDENT,
index d47517941bbc03ee288848561c54b7f791a97e76..0c150c06fa5a5642a6630cfeed2b6a74a1acab17 100644 (file)
@@ -478,7 +478,7 @@ static void calgary_free_coherent(struct device *dev, size_t size,
        free_pages((unsigned long)vaddr, get_order(size));
 }
 
-static struct dma_map_ops calgary_dma_ops = {
+static const struct dma_map_ops calgary_dma_ops = {
        .alloc = calgary_alloc_coherent,
        .free = calgary_free_coherent,
        .map_sg = calgary_map_sg,
@@ -1177,7 +1177,7 @@ static int __init calgary_init(void)
                tbl = find_iommu_table(&dev->dev);
 
                if (translation_enabled(tbl))
-                       dev->dev.archdata.dma_ops = &calgary_dma_ops;
+                       dev->dev.dma_ops = &calgary_dma_ops;
        }
 
        return ret;
@@ -1201,7 +1201,7 @@ error:
                calgary_disable_translation(dev);
                calgary_free_bus(dev);
                pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
-               dev->dev.archdata.dma_ops = NULL;
+               dev->dev.dma_ops = NULL;
        } while (1);
 
        return ret;
index d30c37750765675f58b05ead98f1cbdfb9f854ec..3a216ec869cdbafc00ec89ab88fb412b0bb3f462 100644 (file)
@@ -17,7 +17,7 @@
 
 static int forbid_dac __read_mostly;
 
-struct dma_map_ops *dma_ops = &nommu_dma_ops;
+const struct dma_map_ops *dma_ops = &nommu_dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 static int iommu_sac_force __read_mostly;
@@ -91,7 +91,8 @@ again:
        page = NULL;
        /* CMA can be used only in the context which permits sleeping */
        if (gfpflags_allow_blocking(flag)) {
-               page = dma_alloc_from_contiguous(dev, count, get_order(size));
+               page = dma_alloc_from_contiguous(dev, count, get_order(size),
+                                                flag);
                if (page && page_to_phys(page) + size > dma_mask) {
                        dma_release_from_contiguous(dev, page, count);
                        page = NULL;
@@ -214,7 +215,7 @@ early_param("iommu", iommu_setup);
 
 int dma_supported(struct device *dev, u64 mask)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
 #ifdef CONFIG_PCI
        if (mask > 0xffffffff && forbid_dac > 0) {
index 00e71ce396a83f3c0c7f7c238139ef70beb8400c..a88952ef371ceb29f21e11e95717c45e321a7544 100644 (file)
@@ -88,7 +88,7 @@ static void nommu_sync_sg_for_device(struct device *dev,
        flush_write_buffers();
 }
 
-struct dma_map_ops nommu_dma_ops = {
+const struct dma_map_ops nommu_dma_ops = {
        .alloc                  = dma_generic_alloc_coherent,
        .free                   = dma_generic_free_coherent,
        .map_sg                 = nommu_map_sg,
index 410efb2c7b80028ba63367e5ed6d5e27518b0f24..1e23577e17cf10f87d584e4cdc38f4691db57862 100644 (file)
@@ -45,7 +45,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size,
                dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
 }
 
-static struct dma_map_ops swiotlb_dma_ops = {
+static const struct dma_map_ops swiotlb_dma_ops = {
        .mapping_error = swiotlb_dma_mapping_error,
        .alloc = x86_swiotlb_alloc_coherent,
        .free = x86_swiotlb_free_coherent,
index 69780edf0dde90c8c5b04211525dcd1137d73e73..4bf0c8926a1c061bb22c1973962a0d668bfa3d98 100644 (file)
@@ -575,7 +575,9 @@ static void __init reserve_crashkernel(void)
        /* 0 means: find the address automatically */
        if (crash_base <= 0) {
                /*
-                *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
+                * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
+                * as old kexec-tools loads bzImage below that, unless
+                * "crashkernel=size[KMG],high" is specified.
                 */
                crash_base = memblock_find_in_range(CRASH_ALIGN,
                                                    high ? CRASH_ADDR_HIGH_MAX
index 68f8cc222f255aa1cf5266e2d84d7ceeb2417977..d3c66a15bbde00e254461922a5a0cb8f1018c166 100644 (file)
@@ -259,7 +259,7 @@ static inline void __smp_reschedule_interrupt(void)
        scheduler_ipi();
 }
 
-__visible void smp_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
 {
        ack_APIC_irq();
        __smp_reschedule_interrupt();
@@ -268,7 +268,7 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs)
         */
 }
 
-__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
 {
        /*
         * Need to call irq_enter() before calling the trace point.
@@ -292,14 +292,15 @@ static inline void __smp_call_function_interrupt(void)
        inc_irq_stat(irq_call_count);
 }
 
-__visible void smp_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
 {
        ipi_entering_ack_irq();
        __smp_call_function_interrupt();
        exiting_irq();
 }
 
-__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_interrupt(struct pt_regs *regs)
 {
        ipi_entering_ack_irq();
        trace_call_function_entry(CALL_FUNCTION_VECTOR);
@@ -314,14 +315,16 @@ static inline void __smp_call_function_single_interrupt(void)
        inc_irq_stat(irq_call_count);
 }
 
-__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_call_function_single_interrupt(struct pt_regs *regs)
 {
        ipi_entering_ack_irq();
        __smp_call_function_single_interrupt();
        exiting_irq();
 }
 
-__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_single_interrupt(struct pt_regs *regs)
 {
        ipi_entering_ack_irq();
        trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
deleted file mode 100644 (file)
index 222e84e..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * test_rodata.c: functional test for mark_rodata_ro function
- *
- * (C) Copyright 2008 Intel Corporation
- * Author: Arjan van de Ven <arjan@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <asm/cacheflush.h>
-#include <asm/sections.h>
-#include <asm/asm.h>
-
-int rodata_test(void)
-{
-       unsigned long result;
-       unsigned long start, end;
-
-       /* test 1: read the value */
-       /* If this test fails, some previous testrun has clobbered the state */
-       if (!rodata_test_data) {
-               printk(KERN_ERR "rodata_test: test 1 fails (start data)\n");
-               return -ENODEV;
-       }
-
-       /* test 2: write to the variable; this should fault */
-       /*
-        * If this test fails, we managed to overwrite the data
-        *
-        * This is written in assembly to be able to catch the
-        * exception that is supposed to happen in the correct
-        * case
-        */
-
-       result = 1;
-       asm volatile(
-               "0:     mov %[zero],(%[rodata_test])\n"
-               "       mov %[zero], %[rslt]\n"
-               "1:\n"
-               ".section .fixup,\"ax\"\n"
-               "2:     jmp 1b\n"
-               ".previous\n"
-               _ASM_EXTABLE(0b,2b)
-               : [rslt] "=r" (result)
-               : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
-       );
-
-
-       if (!result) {
-               printk(KERN_ERR "rodata_test: test data was not read only\n");
-               return -ENODEV;
-       }
-
-       /* test 3: check the value hasn't changed */
-       /* If this test fails, we managed to overwrite the data */
-       if (!rodata_test_data) {
-               printk(KERN_ERR "rodata_test: Test 3 fails (end data)\n");
-               return -ENODEV;
-       }
-       /* test 4: check if the rodata section is 4Kb aligned */
-       start = (unsigned long)__start_rodata;
-       end = (unsigned long)__end_rodata;
-       if (start & (PAGE_SIZE - 1)) {
-               printk(KERN_ERR "rodata_test: .rodata is not 4k aligned\n");
-               return -ENODEV;
-       }
-       if (end & (PAGE_SIZE - 1)) {
-               printk(KERN_ERR "rodata_test: .rodata end is not 4k aligned\n");
-               return -ENODEV;
-       }
-
-       return 0;
-}
index e79f15f108a8d43d8d5644f7fc967f32c3282b47..ad0118fbce90d79e80211a923fced106c39f9316 100644 (file)
@@ -346,6 +346,7 @@ SECTIONS
        /DISCARD/ : {
                *(.eh_frame)
                *(__func_stack_frame_non_standard)
+               *(__unreachable)
        }
 }
 
index 2fd7586aad4d60e35bf26ae772e543a5ef222595..1cda35277278ab80b46effdd24e94e5e1596b9fb 100644 (file)
@@ -4102,7 +4102,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
                                 * as a SMAP violation if all of the following
                                 * conditions are ture:
                                 *   - X86_CR4_SMAP is set in CR4
-                                *   - An user page is accessed
+                                *   - A user page is accessed
                                 *   - Page fault in kernel mode
                                 *   - if CPL = 3 or X86_EFLAGS_AC is clear
                                 *
index 0d4fb3ebbbac9872aaaf26514211ae543a253299..99c7805a96937c17fffa7b92eb72a8b8c776ccbb 100644 (file)
@@ -154,14 +154,12 @@ static inline void get_head_page_multiple(struct page *page, int nr)
        SetPageReferenced(page);
 }
 
-static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+static int __gup_device_huge(unsigned long pfn, unsigned long addr,
                unsigned long end, struct page **pages, int *nr)
 {
        int nr_start = *nr;
-       unsigned long pfn = pmd_pfn(pmd);
        struct dev_pagemap *pgmap = NULL;
 
-       pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
        do {
                struct page *page = pfn_to_page(pfn);
 
@@ -180,6 +178,24 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
        return 1;
 }
 
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+               unsigned long end, struct page **pages, int *nr)
+{
+       unsigned long fault_pfn;
+
+       fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+       return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+
+static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+               unsigned long end, struct page **pages, int *nr)
+{
+       unsigned long fault_pfn;
+
+       fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+       return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+
 static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
                unsigned long end, int write, struct page **pages, int *nr)
 {
@@ -251,9 +267,13 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
 
        if (!pte_allows_gup(pud_val(pud), write))
                return 0;
+
+       VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
+       if (pud_devmap(pud))
+               return __gup_device_huge_pud(pud, addr, end, pages, nr);
+
        /* hugepages are never "special" */
        VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
-       VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
 
        refs = 0;
        head = pud_page(pud);
index 928d657de8295d8f617f3e0b7ba69be81006be4b..2b4b53e6793f16b24d2e96199166c724d068f339 100644 (file)
@@ -864,9 +864,6 @@ static noinline int do_test_wp_bit(void)
        return flag;
 }
 
-const int rodata_test_data = 0xC3;
-EXPORT_SYMBOL_GPL(rodata_test_data);
-
 int kernel_set_to_readonly __read_mostly;
 
 void set_kernel_text_rw(void)
@@ -939,7 +936,6 @@ void mark_rodata_ro(void)
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
        printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
                size >> 10);
-       rodata_test();
 
 #ifdef CONFIG_CPA_DEBUG
        printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
index 97346f987ef20d80ab74eba8801f3475906a0c32..15173d37f399610caf8969fa942b17b175dd466b 100644 (file)
@@ -1000,9 +1000,6 @@ void __init mem_init(void)
        mem_init_print_info(NULL);
 }
 
-const int rodata_test_data = 0xC3;
-EXPORT_SYMBOL_GPL(rodata_test_data);
-
 int kernel_set_to_readonly;
 
 void set_kernel_text_rw(void)
@@ -1071,8 +1068,6 @@ void mark_rodata_ro(void)
        all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
        set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
 
-       rodata_test();
-
 #ifdef CONFIG_CPA_DEBUG
        printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
        set_memory_rw(start, (end-start) >> PAGE_SHIFT);
index aad4ac386f98c34d7cec84a35974b22207bcea6a..c98079684bdb29d5a1248e6c0b804ffc3477eb90 100644 (file)
@@ -51,7 +51,7 @@ static unsigned long mpx_mmap(unsigned long len)
 
        down_write(&mm->mmap_sem);
        addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
-                       MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
+                      MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
        up_write(&mm->mmap_sem);
        if (populate)
                mm_populate(addr, populate);
@@ -893,7 +893,7 @@ static int unmap_entire_bt(struct mm_struct *mm,
         * avoid recursion, do_munmap() will check whether it comes
         * from one bounds table through VM_MPX flag.
         */
-       return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
+       return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
 }
 
 static int try_unmap_single_bt(struct mm_struct *mm,
index 3feec5af4e67c096b9bd663edc4a94fb587f67bb..6cbdff26bb96a25939a0e818d4895b42b1a5a130 100644 (file)
@@ -445,6 +445,26 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
 
        return changed;
 }
+
+int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+                         pud_t *pudp, pud_t entry, int dirty)
+{
+       int changed = !pud_same(*pudp, entry);
+
+       VM_BUG_ON(address & ~HPAGE_PUD_MASK);
+
+       if (changed && dirty) {
+               *pudp = entry;
+               /*
+                * We had a write-protection fault here and changed the pud
+                * to to more permissive. No need to flush the TLB for that,
+                * #PF is architecturally guaranteed to do that and in the
+                * worst-case we'll generate a spurious fault.
+                */
+       }
+
+       return changed;
+}
 #endif
 
 int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -474,6 +494,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 
        return ret;
 }
+int pudp_test_and_clear_young(struct vm_area_struct *vma,
+                             unsigned long addr, pud_t *pudp)
+{
+       int ret = 0;
+
+       if (pud_young(*pudp))
+               ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+                                        (unsigned long *)pudp);
+
+       return ret;
+}
 #endif
 
 int ptep_clear_flush_young(struct vm_area_struct *vma,
index a4fdfa7dcc1bc414af628411ac7e447651635b1d..0cb52ae0a8f07521ee1cdf6a1075a4221f920884 100644 (file)
@@ -667,7 +667,7 @@ static void set_dma_domain_ops(struct pci_dev *pdev)
        spin_lock(&dma_domain_list_lock);
        list_for_each_entry(domain, &dma_domain_list, node) {
                if (pci_domain_nr(pdev->bus) == domain->domain_nr) {
-                       pdev->dev.archdata.dma_ops = domain->dma_ops;
+                       pdev->dev.dma_ops = domain->dma_ops;
                        break;
                }
        }
index 052c1cb7630538af82e45692ae27d543469b9b9f..ec008e800b45038794ce7beb41cea391dc176c9a 100644 (file)
@@ -179,7 +179,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
 }
 
 /* We have our own dma_ops: the same as swiotlb but from alloc (above) */
-static struct dma_map_ops sta2x11_dma_ops = {
+static const struct dma_map_ops sta2x11_dma_ops = {
        .alloc = sta2x11_swiotlb_alloc_coherent,
        .free = x86_swiotlb_free_coherent,
        .map_page = swiotlb_map_page,
@@ -203,7 +203,7 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
                return;
        pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
        pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
-       pdev->dev.archdata.dma_ops = &sta2x11_dma_ops;
+       pdev->dev.dma_ops = &sta2x11_dma_ops;
 
        /* We must enable all devices as master, for audio DMA to work */
        pci_set_master(pdev);
@@ -223,7 +223,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
        struct sta2x11_mapping *map;
 
-       if (dev->archdata.dma_ops != &sta2x11_dma_ops) {
+       if (dev->dma_ops != &sta2x11_dma_ops) {
                if (!dev->dma_mask)
                        return false;
                return addr + size - 1 <= *dev->dma_mask;
@@ -247,7 +247,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
  */
 dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-       if (dev->archdata.dma_ops != &sta2x11_dma_ops)
+       if (dev->dma_ops != &sta2x11_dma_ops)
                return paddr;
        return p2a(paddr, to_pci_dev(dev));
 }
@@ -259,7 +259,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
  */
 phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-       if (dev->archdata.dma_ops != &sta2x11_dma_ops)
+       if (dev->dma_ops != &sta2x11_dma_ops)
                return daddr;
        return a2p(daddr, to_pci_dev(dev));
 }
index 40983f5b0858355e42a28d4ed5552aa086e859b6..57be88fa34bb7f1f3d3f0d36ff9800393a89eb8d 100644 (file)
@@ -1,2 +1 @@
-obj-$(CONFIG_PMC_ATOM)         += pmc_atom.o
 obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o
diff --git a/arch/x86/platform/atom/pmc_atom.c b/arch/x86/platform/atom/pmc_atom.c
deleted file mode 100644 (file)
index 964ff4f..0000000
+++ /dev/null
@@ -1,460 +0,0 @@
-/*
- * Intel Atom SOC Power Management Controller Driver
- * Copyright (c) 2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/device.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/io.h>
-
-#include <asm/pmc_atom.h>
-
-struct pmc_bit_map {
-       const char *name;
-       u32 bit_mask;
-};
-
-struct pmc_reg_map {
-       const struct pmc_bit_map *d3_sts_0;
-       const struct pmc_bit_map *d3_sts_1;
-       const struct pmc_bit_map *func_dis;
-       const struct pmc_bit_map *func_dis_2;
-       const struct pmc_bit_map *pss;
-};
-
-struct pmc_dev {
-       u32 base_addr;
-       void __iomem *regmap;
-       const struct pmc_reg_map *map;
-#ifdef CONFIG_DEBUG_FS
-       struct dentry *dbgfs_dir;
-#endif /* CONFIG_DEBUG_FS */
-       bool init;
-};
-
-static struct pmc_dev pmc_device;
-static u32 acpi_base_addr;
-
-static const struct pmc_bit_map d3_sts_0_map[] = {
-       {"LPSS1_F0_DMA",        BIT_LPSS1_F0_DMA},
-       {"LPSS1_F1_PWM1",       BIT_LPSS1_F1_PWM1},
-       {"LPSS1_F2_PWM2",       BIT_LPSS1_F2_PWM2},
-       {"LPSS1_F3_HSUART1",    BIT_LPSS1_F3_HSUART1},
-       {"LPSS1_F4_HSUART2",    BIT_LPSS1_F4_HSUART2},
-       {"LPSS1_F5_SPI",        BIT_LPSS1_F5_SPI},
-       {"LPSS1_F6_Reserved",   BIT_LPSS1_F6_XXX},
-       {"LPSS1_F7_Reserved",   BIT_LPSS1_F7_XXX},
-       {"SCC_EMMC",            BIT_SCC_EMMC},
-       {"SCC_SDIO",            BIT_SCC_SDIO},
-       {"SCC_SDCARD",          BIT_SCC_SDCARD},
-       {"SCC_MIPI",            BIT_SCC_MIPI},
-       {"HDA",                 BIT_HDA},
-       {"LPE",                 BIT_LPE},
-       {"OTG",                 BIT_OTG},
-       {"USH",                 BIT_USH},
-       {"GBE",                 BIT_GBE},
-       {"SATA",                BIT_SATA},
-       {"USB_EHCI",            BIT_USB_EHCI},
-       {"SEC",                 BIT_SEC},
-       {"PCIE_PORT0",          BIT_PCIE_PORT0},
-       {"PCIE_PORT1",          BIT_PCIE_PORT1},
-       {"PCIE_PORT2",          BIT_PCIE_PORT2},
-       {"PCIE_PORT3",          BIT_PCIE_PORT3},
-       {"LPSS2_F0_DMA",        BIT_LPSS2_F0_DMA},
-       {"LPSS2_F1_I2C1",       BIT_LPSS2_F1_I2C1},
-       {"LPSS2_F2_I2C2",       BIT_LPSS2_F2_I2C2},
-       {"LPSS2_F3_I2C3",       BIT_LPSS2_F3_I2C3},
-       {"LPSS2_F3_I2C4",       BIT_LPSS2_F4_I2C4},
-       {"LPSS2_F5_I2C5",       BIT_LPSS2_F5_I2C5},
-       {"LPSS2_F6_I2C6",       BIT_LPSS2_F6_I2C6},
-       {"LPSS2_F7_I2C7",       BIT_LPSS2_F7_I2C7},
-       {},
-};
-
-static struct pmc_bit_map byt_d3_sts_1_map[] = {
-       {"SMB",                 BIT_SMB},
-       {"OTG_SS_PHY",          BIT_OTG_SS_PHY},
-       {"USH_SS_PHY",          BIT_USH_SS_PHY},
-       {"DFX",                 BIT_DFX},
-       {},
-};
-
-static struct pmc_bit_map cht_d3_sts_1_map[] = {
-       {"SMB",                 BIT_SMB},
-       {"GMM",                 BIT_STS_GMM},
-       {"ISH",                 BIT_STS_ISH},
-       {},
-};
-
-static struct pmc_bit_map cht_func_dis_2_map[] = {
-       {"SMB",                 BIT_SMB},
-       {"GMM",                 BIT_FD_GMM},
-       {"ISH",                 BIT_FD_ISH},
-       {},
-};
-
-static const struct pmc_bit_map byt_pss_map[] = {
-       {"GBE",                 PMC_PSS_BIT_GBE},
-       {"SATA",                PMC_PSS_BIT_SATA},
-       {"HDA",                 PMC_PSS_BIT_HDA},
-       {"SEC",                 PMC_PSS_BIT_SEC},
-       {"PCIE",                PMC_PSS_BIT_PCIE},
-       {"LPSS",                PMC_PSS_BIT_LPSS},
-       {"LPE",                 PMC_PSS_BIT_LPE},
-       {"DFX",                 PMC_PSS_BIT_DFX},
-       {"USH_CTRL",            PMC_PSS_BIT_USH_CTRL},
-       {"USH_SUS",             PMC_PSS_BIT_USH_SUS},
-       {"USH_VCCS",            PMC_PSS_BIT_USH_VCCS},
-       {"USH_VCCA",            PMC_PSS_BIT_USH_VCCA},
-       {"OTG_CTRL",            PMC_PSS_BIT_OTG_CTRL},
-       {"OTG_VCCS",            PMC_PSS_BIT_OTG_VCCS},
-       {"OTG_VCCA_CLK",        PMC_PSS_BIT_OTG_VCCA_CLK},
-       {"OTG_VCCA",            PMC_PSS_BIT_OTG_VCCA},
-       {"USB",                 PMC_PSS_BIT_USB},
-       {"USB_SUS",             PMC_PSS_BIT_USB_SUS},
-       {},
-};
-
-static const struct pmc_bit_map cht_pss_map[] = {
-       {"SATA",                PMC_PSS_BIT_SATA},
-       {"HDA",                 PMC_PSS_BIT_HDA},
-       {"SEC",                 PMC_PSS_BIT_SEC},
-       {"PCIE",                PMC_PSS_BIT_PCIE},
-       {"LPSS",                PMC_PSS_BIT_LPSS},
-       {"LPE",                 PMC_PSS_BIT_LPE},
-       {"UFS",                 PMC_PSS_BIT_CHT_UFS},
-       {"UXD",                 PMC_PSS_BIT_CHT_UXD},
-       {"UXD_FD",              PMC_PSS_BIT_CHT_UXD_FD},
-       {"UX_ENG",              PMC_PSS_BIT_CHT_UX_ENG},
-       {"USB_SUS",             PMC_PSS_BIT_CHT_USB_SUS},
-       {"GMM",                 PMC_PSS_BIT_CHT_GMM},
-       {"ISH",                 PMC_PSS_BIT_CHT_ISH},
-       {"DFX_MASTER",          PMC_PSS_BIT_CHT_DFX_MASTER},
-       {"DFX_CLUSTER1",        PMC_PSS_BIT_CHT_DFX_CLUSTER1},
-       {"DFX_CLUSTER2",        PMC_PSS_BIT_CHT_DFX_CLUSTER2},
-       {"DFX_CLUSTER3",        PMC_PSS_BIT_CHT_DFX_CLUSTER3},
-       {"DFX_CLUSTER4",        PMC_PSS_BIT_CHT_DFX_CLUSTER4},
-       {"DFX_CLUSTER5",        PMC_PSS_BIT_CHT_DFX_CLUSTER5},
-       {},
-};
-
-static const struct pmc_reg_map byt_reg_map = {
-       .d3_sts_0       = d3_sts_0_map,
-       .d3_sts_1       = byt_d3_sts_1_map,
-       .func_dis       = d3_sts_0_map,
-       .func_dis_2     = byt_d3_sts_1_map,
-       .pss            = byt_pss_map,
-};
-
-static const struct pmc_reg_map cht_reg_map = {
-       .d3_sts_0       = d3_sts_0_map,
-       .d3_sts_1       = cht_d3_sts_1_map,
-       .func_dis       = d3_sts_0_map,
-       .func_dis_2     = cht_func_dis_2_map,
-       .pss            = cht_pss_map,
-};
-
-static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset)
-{
-       return readl(pmc->regmap + reg_offset);
-}
-
-static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val)
-{
-       writel(val, pmc->regmap + reg_offset);
-}
-
-int pmc_atom_read(int offset, u32 *value)
-{
-       struct pmc_dev *pmc = &pmc_device;
-
-       if (!pmc->init)
-               return -ENODEV;
-
-       *value = pmc_reg_read(pmc, offset);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(pmc_atom_read);
-
-int pmc_atom_write(int offset, u32 value)
-{
-       struct pmc_dev *pmc = &pmc_device;
-
-       if (!pmc->init)
-               return -ENODEV;
-
-       pmc_reg_write(pmc, offset, value);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(pmc_atom_write);
-
-static void pmc_power_off(void)
-{
-       u16     pm1_cnt_port;
-       u32     pm1_cnt_value;
-
-       pr_info("Preparing to enter system sleep state S5\n");
-
-       pm1_cnt_port = acpi_base_addr + PM1_CNT;
-
-       pm1_cnt_value = inl(pm1_cnt_port);
-       pm1_cnt_value &= SLEEP_TYPE_MASK;
-       pm1_cnt_value |= SLEEP_TYPE_S5;
-       pm1_cnt_value |= SLEEP_ENABLE;
-
-       outl(pm1_cnt_value, pm1_cnt_port);
-}
-
-static void pmc_hw_reg_setup(struct pmc_dev *pmc)
-{
-       /*
-        * Disable PMC S0IX_WAKE_EN events coming from:
-        * - LPC clock run
-        * - GPIO_SUS ored dedicated IRQs
-        * - GPIO_SCORE ored dedicated IRQs
-        * - GPIO_SUS shared IRQ
-        * - GPIO_SCORE shared IRQ
-        */
-       pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static void pmc_dev_state_print(struct seq_file *s, int reg_index,
-                               u32 sts, const struct pmc_bit_map *sts_map,
-                               u32 fd, const struct pmc_bit_map *fd_map)
-{
-       int offset = PMC_REG_BIT_WIDTH * reg_index;
-       int index;
-
-       for (index = 0; sts_map[index].name; index++) {
-               seq_printf(s, "Dev: %-2d - %-32s\tState: %s [%s]\n",
-                       offset + index, sts_map[index].name,
-                       fd_map[index].bit_mask & fd ?  "Disabled" : "Enabled ",
-                       sts_map[index].bit_mask & sts ?  "D3" : "D0");
-       }
-}
-
-static int pmc_dev_state_show(struct seq_file *s, void *unused)
-{
-       struct pmc_dev *pmc = s->private;
-       const struct pmc_reg_map *m = pmc->map;
-       u32 func_dis, func_dis_2;
-       u32 d3_sts_0, d3_sts_1;
-
-       func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS);
-       func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2);
-       d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0);
-       d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1);
-
-       /* Low part */
-       pmc_dev_state_print(s, 0, d3_sts_0, m->d3_sts_0, func_dis, m->func_dis);
-
-       /* High part */
-       pmc_dev_state_print(s, 1, d3_sts_1, m->d3_sts_1, func_dis_2, m->func_dis_2);
-
-       return 0;
-}
-
-static int pmc_dev_state_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, pmc_dev_state_show, inode->i_private);
-}
-
-static const struct file_operations pmc_dev_state_ops = {
-       .open           = pmc_dev_state_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static int pmc_pss_state_show(struct seq_file *s, void *unused)
-{
-       struct pmc_dev *pmc = s->private;
-       const struct pmc_bit_map *map = pmc->map->pss;
-       u32 pss = pmc_reg_read(pmc, PMC_PSS);
-       int index;
-
-       for (index = 0; map[index].name; index++) {
-               seq_printf(s, "Island: %-2d - %-32s\tState: %s\n",
-                       index, map[index].name,
-                       map[index].bit_mask & pss ? "Off" : "On");
-       }
-       return 0;
-}
-
-static int pmc_pss_state_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, pmc_pss_state_show, inode->i_private);
-}
-
-static const struct file_operations pmc_pss_state_ops = {
-       .open           = pmc_pss_state_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static int pmc_sleep_tmr_show(struct seq_file *s, void *unused)
-{
-       struct pmc_dev *pmc = s->private;
-       u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr;
-
-       s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT;
-       s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT;
-       s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT;
-       s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT;
-       s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT;
-
-       seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr);
-       seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr);
-       seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr);
-       seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr);
-       seq_printf(s, "S0   Residency:\t%lldus\n", s0_tmr);
-       return 0;
-}
-
-static int pmc_sleep_tmr_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, pmc_sleep_tmr_show, inode->i_private);
-}
-
-static const struct file_operations pmc_sleep_tmr_ops = {
-       .open           = pmc_sleep_tmr_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static void pmc_dbgfs_unregister(struct pmc_dev *pmc)
-{
-       debugfs_remove_recursive(pmc->dbgfs_dir);
-}
-
-static int pmc_dbgfs_register(struct pmc_dev *pmc)
-{
-       struct dentry *dir, *f;
-
-       dir = debugfs_create_dir("pmc_atom", NULL);
-       if (!dir)
-               return -ENOMEM;
-
-       pmc->dbgfs_dir = dir;
-
-       f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO,
-                               dir, pmc, &pmc_dev_state_ops);
-       if (!f)
-               goto err;
-
-       f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO,
-                               dir, pmc, &pmc_pss_state_ops);
-       if (!f)
-               goto err;
-
-       f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO,
-                               dir, pmc, &pmc_sleep_tmr_ops);
-       if (!f)
-               goto err;
-
-       return 0;
-err:
-       pmc_dbgfs_unregister(pmc);
-       return -ENODEV;
-}
-#else
-static int pmc_dbgfs_register(struct pmc_dev *pmc)
-{
-       return 0;
-}
-#endif /* CONFIG_DEBUG_FS */
-
-static int pmc_setup_dev(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-       struct pmc_dev *pmc = &pmc_device;
-       const struct pmc_reg_map *map = (struct pmc_reg_map *)ent->driver_data;
-       int ret;
-
-       /* Obtain ACPI base address */
-       pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr);
-       acpi_base_addr &= ACPI_BASE_ADDR_MASK;
-
-       /* Install power off function */
-       if (acpi_base_addr != 0 && pm_power_off == NULL)
-               pm_power_off = pmc_power_off;
-
-       pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr);
-       pmc->base_addr &= PMC_BASE_ADDR_MASK;
-
-       pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN);
-       if (!pmc->regmap) {
-               dev_err(&pdev->dev, "error: ioremap failed\n");
-               return -ENOMEM;
-       }
-
-       pmc->map = map;
-
-       /* PMC hardware registers setup */
-       pmc_hw_reg_setup(pmc);
-
-       ret = pmc_dbgfs_register(pmc);
-       if (ret)
-               dev_warn(&pdev->dev, "debugfs register failed\n");
-
-       pmc->init = true;
-       return ret;
-}
-
-/*
- * Data for PCI driver interface
- *
- * used by pci_match_id() call below.
- */
-static const struct pci_device_id pmc_pci_ids[] = {
-       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_VLV_PMC), (kernel_ulong_t)&byt_reg_map },
-       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_CHT_PMC), (kernel_ulong_t)&cht_reg_map },
-       { 0, },
-};
-
-static int __init pmc_atom_init(void)
-{
-       struct pci_dev *pdev = NULL;
-       const struct pci_device_id *ent;
-
-       /* We look for our device - PCU PMC
-        * we assume that there is max. one device.
-        *
-        * We can't use plain pci_driver mechanism,
-        * as the device is really a multiple function device,
-        * main driver that binds to the pci_device is lpc_ich
-        * and have to find & bind to the device this way.
-        */
-       for_each_pci_dev(pdev) {
-               ent = pci_match_id(pmc_pci_ids, pdev);
-               if (ent)
-                       return pmc_setup_dev(pdev, ent);
-       }
-       /* Device not found. */
-       return -ENODEV;
-}
-
-device_initcall(pmc_atom_init);
-
-/*
-MODULE_AUTHOR("Aubrey Li <aubrey.li@linux.intel.com>");
-MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface");
-MODULE_LICENSE("GPL v2");
-*/
index a0b36a9d5df149e6370fb3b6be0d5894bae72bd7..42b08f8fc2cae906d5c5101bcd8f6c59e51d4568 100644 (file)
@@ -18,7 +18,7 @@
 
 int xen_swiotlb __read_mostly;
 
-static struct dma_map_ops xen_swiotlb_dma_ops = {
+static const struct dma_map_ops xen_swiotlb_dma_ops = {
        .alloc = xen_swiotlb_alloc_coherent,
        .free = xen_swiotlb_free_coherent,
        .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
index 9e9760b20be583689bc140a9559a6b83f49acd0c..f41408c53fe1e029e379b0017b3029873f7058bf 100644 (file)
@@ -31,3 +31,4 @@ generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += kprobes.h
index fe1f5c878493845703b188747857aedc5cda7a21..1deeb8ebbb1bc30f236e3592d9920c678ba533ba 100644 (file)
@@ -6,11 +6,7 @@
 #ifndef _ASM_XTENSA_DEVICE_H
 #define _ASM_XTENSA_DEVICE_H
 
-struct dma_map_ops;
-
 struct dev_archdata {
-       /* DMA operations on that device */
-       struct dma_map_ops *dma_ops;
 };
 
 struct pdev_archdata {
index 3fc1170a64880cdfc495fb3aca16fe8904d807b3..c6140fa8c0beb7512f9973218399ee03f1b38cc1 100644 (file)
 
 #define DMA_ERROR_CODE         (~(dma_addr_t)0x0)
 
-extern struct dma_map_ops xtensa_dma_map_ops;
+extern const struct dma_map_ops xtensa_dma_map_ops;
 
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-       if (dev && dev->archdata.dma_ops)
-               return dev->archdata.dma_ops;
-       else
-               return &xtensa_dma_map_ops;
+       return &xtensa_dma_map_ops;
 }
 
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
index 70e362e6038e80c0e802bf6f2a1ebb3360292c07..cec86a1c2acce6845238da7ef1994e1e1ef1a6f0 100644 (file)
@@ -158,7 +158,8 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
                flag |= GFP_DMA;
 
        if (gfpflags_allow_blocking(flag))
-               page = dma_alloc_from_contiguous(dev, count, get_order(size));
+               page = dma_alloc_from_contiguous(dev, count, get_order(size),
+                                                flag);
 
        if (!page)
                page = alloc_pages(flag, get_order(size));
@@ -249,7 +250,7 @@ int xtensa_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
        return 0;
 }
 
-struct dma_map_ops xtensa_dma_map_ops = {
+const struct dma_map_ops xtensa_dma_map_ops = {
        .alloc = xtensa_dma_alloc,
        .free = xtensa_dma_free,
        .map_page = xtensa_map_page,
index fc4ad21a5ed44f9b170d9587c0934e581f75cd01..fcea72019df798110aa9195fe36219fe668a6e38 100644 (file)
@@ -135,8 +135,8 @@ void secondary_start_kernel(void)
 
        /* All kernel threads share the same mm context. */
 
-       atomic_inc(&mm->mm_users);
-       atomic_inc(&mm->mm_count);
+       mmget(mm);
+       mmgrab(mm);
        current->active_mm = mm;
        cpumask_set_cpu(cpu, mm_cpumask(mm));
        enter_lazy_tlb(mm, current);
index 0715ce93daef42001407f690912a1b2a437e5a6e..58fc8684788d1f9fe7894c5afefa46c05692bf27 100644 (file)
@@ -69,50 +69,6 @@ config MQ_IOSCHED_DEADLINE
        ---help---
          MQ version of the deadline IO scheduler.
 
-config MQ_IOSCHED_NONE
-       bool
-       default y
-
-choice
-       prompt "Default single-queue blk-mq I/O scheduler"
-       default DEFAULT_SQ_NONE
-       help
-         Select the I/O scheduler which will be used by default for blk-mq
-         managed block devices with a single queue.
-
-       config DEFAULT_SQ_DEADLINE
-               bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
-
-       config DEFAULT_SQ_NONE
-               bool "None"
-
-endchoice
-
-config DEFAULT_SQ_IOSCHED
-       string
-       default "mq-deadline" if DEFAULT_SQ_DEADLINE
-       default "none" if DEFAULT_SQ_NONE
-
-choice
-       prompt "Default multi-queue blk-mq I/O scheduler"
-       default DEFAULT_MQ_NONE
-       help
-         Select the I/O scheduler which will be used by default for blk-mq
-         managed block devices with multiple queues.
-
-       config DEFAULT_MQ_DEADLINE
-               bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
-
-       config DEFAULT_MQ_NONE
-               bool "None"
-
-endchoice
-
-config DEFAULT_MQ_IOSCHED
-       string
-       default "mq-deadline" if DEFAULT_MQ_DEADLINE
-       default "none" if DEFAULT_MQ_NONE
-
 endmenu
 
 endif
index 4b564d0c3e29a4c15becf545a996794b296ae622..5eec5e08417f6ff1989e3e2a07b31c62901953d5 100644 (file)
@@ -625,21 +625,20 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 }
 EXPORT_SYMBOL(bio_clone_fast);
 
-/**
- *     bio_clone_bioset - clone a bio
- *     @bio_src: bio to clone
- *     @gfp_mask: allocation priority
- *     @bs: bio_set to allocate from
- *
- *     Clone bio. Caller will own the returned bio, but not the actual data it
- *     points to. Reference count of returned bio will be one.
- */
-struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
-                            struct bio_set *bs)
+static struct bio *__bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
+                                     struct bio_set *bs, int offset,
+                                     int size)
 {
        struct bvec_iter iter;
        struct bio_vec bv;
        struct bio *bio;
+       struct bvec_iter iter_src = bio_src->bi_iter;
+
+       /* for supporting partial clone */
+       if (offset || size != bio_src->bi_iter.bi_size) {
+               bio_advance_iter(bio_src, &iter_src, offset);
+               iter_src.bi_size = size;
+       }
 
        /*
         * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
@@ -663,7 +662,8 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
         *    __bio_clone_fast() anyways.
         */
 
-       bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
+       bio = bio_alloc_bioset(gfp_mask, __bio_segments(bio_src,
+                              &iter_src), bs);
        if (!bio)
                return NULL;
        bio->bi_bdev            = bio_src->bi_bdev;
@@ -680,7 +680,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
                bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
                break;
        default:
-               bio_for_each_segment(bv, bio_src, iter)
+               __bio_for_each_segment(bv, bio_src, iter, iter_src)
                        bio->bi_io_vec[bio->bi_vcnt++] = bv;
                break;
        }
@@ -699,8 +699,43 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
 
        return bio;
 }
+
+/**
+ *     bio_clone_bioset - clone a bio
+ *     @bio_src: bio to clone
+ *     @gfp_mask: allocation priority
+ *     @bs: bio_set to allocate from
+ *
+ *     Clone bio. Caller will own the returned bio, but not the actual data it
+ *     points to. Reference count of returned bio will be one.
+ */
+struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
+                            struct bio_set *bs)
+{
+       return __bio_clone_bioset(bio_src, gfp_mask, bs, 0,
+                                 bio_src->bi_iter.bi_size);
+}
 EXPORT_SYMBOL(bio_clone_bioset);
 
+/**
+ *     bio_clone_bioset_partial - clone a partial bio
+ *     @bio_src: bio to clone
+ *     @gfp_mask: allocation priority
+ *     @bs: bio_set to allocate from
+ *     @offset: cloned starting from the offset
+ *     @size: size for the cloned bio
+ *
+ *     Clone bio. Caller will own the returned bio, but not the actual data it
+ *     points to. Reference count of returned bio will be one.
+ */
+struct bio *bio_clone_bioset_partial(struct bio *bio_src, gfp_t gfp_mask,
+                                    struct bio_set *bs, int offset,
+                                    int size)
+{
+       return __bio_clone_bioset(bio_src, gfp_mask, bs, offset, size);
+}
+EXPORT_SYMBOL(bio_clone_bioset_partial);
+
 /**
  *     bio_add_pc_page -       attempt to add page to bio
  *     @q: the target queue
index 9e8d6795a8c1be7eee1c727bb376f209da432ecf..98c7b061781e55f0176bfc24c6345c0c0611f1f3 100644 (file)
@@ -205,7 +205,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
         * needing a restart in that case.
         */
        if (!list_empty(&rq_list)) {
-               blk_mq_sched_mark_restart(hctx);
+               blk_mq_sched_mark_restart_hctx(hctx);
                did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
        } else if (!has_sched_dispatch) {
                blk_mq_flush_busy_ctxs(hctx, &rq_list);
@@ -331,20 +331,16 @@ static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
 
 void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
 {
+       struct request_queue *q = hctx->queue;
        unsigned int i;
 
-       if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
+       if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
+               if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
+                       queue_for_each_hw_ctx(q, hctx, i)
+                               blk_mq_sched_restart_hctx(hctx);
+               }
+       } else {
                blk_mq_sched_restart_hctx(hctx);
-       else {
-               struct request_queue *q = hctx->queue;
-
-               if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
-                       return;
-
-               clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
-
-               queue_for_each_hw_ctx(q, hctx, i)
-                       blk_mq_sched_restart_hctx(hctx);
        }
 }
 
@@ -498,15 +494,6 @@ int blk_mq_sched_init(struct request_queue *q)
 {
        int ret;
 
-#if defined(CONFIG_DEFAULT_SQ_NONE)
-       if (q->nr_hw_queues == 1)
-               return 0;
-#endif
-#if defined(CONFIG_DEFAULT_MQ_NONE)
-       if (q->nr_hw_queues > 1)
-               return 0;
-#endif
-
        mutex_lock(&q->sysfs_lock);
        ret = elevator_init(q, NULL);
        mutex_unlock(&q->sysfs_lock);
index 7b5f3b95c78e93c41d7e680090fa67b638ea5be2..a75b16b123f7aadac672651a7eef5c79f5553e16 100644 (file)
@@ -122,17 +122,27 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
        return false;
 }
 
-static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
+/*
+ * Mark a hardware queue as needing a restart.
+ */
+static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 {
-       if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
+       if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
                set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-               if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-                       struct request_queue *q = hctx->queue;
+}
+
+/*
+ * Mark a hardware queue and the request queue it belongs to as needing a
+ * restart.
+ */
+static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx)
+{
+       struct request_queue *q = hctx->queue;
 
-                       if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
-                               set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
-               }
-       }
+       if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+               set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+       if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
+               set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
 }
 
 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
index b29e7dc7b309e4cf939cd2c011b26b38dfd4df73..9e6b064e533979446a936c45c18f500c6f87725b 100644 (file)
@@ -904,6 +904,44 @@ static bool reorder_tags_to_front(struct list_head *list)
        return first != NULL;
 }
 
+static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags,
+                               void *key)
+{
+       struct blk_mq_hw_ctx *hctx;
+
+       hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
+
+       list_del(&wait->task_list);
+       clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
+       blk_mq_run_hw_queue(hctx, true);
+       return 1;
+}
+
+static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
+{
+       struct sbq_wait_state *ws;
+
+       /*
+        * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.
+        * The thread which wins the race to grab this bit adds the hardware
+        * queue to the wait queue.
+        */
+       if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) ||
+           test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))
+               return false;
+
+       init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
+       ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
+
+       /*
+        * As soon as this returns, it's no longer safe to fiddle with
+        * hctx->dispatch_wait, since a completion can wake up the wait queue
+        * and unlock the bit.
+        */
+       add_wait_queue(&ws->wait, &hctx->dispatch_wait);
+       return true;
+}
+
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 {
        struct request_queue *q = hctx->queue;
@@ -931,15 +969,22 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
                                continue;
 
                        /*
-                        * We failed getting a driver tag. Mark the queue(s)
-                        * as needing a restart. Retry getting a tag again,
-                        * in case the needed IO completed right before we
-                        * marked the queue as needing a restart.
+                        * The initial allocation attempt failed, so we need to
+                        * rerun the hardware queue when a tag is freed.
                         */
-                       blk_mq_sched_mark_restart(hctx);
-                       if (!blk_mq_get_driver_tag(rq, &hctx, false))
+                       if (blk_mq_dispatch_wait_add(hctx)) {
+                               /*
+                                * It's possible that a tag was freed in the
+                                * window between the allocation failure and
+                                * adding the hardware queue to the wait queue.
+                                */
+                               if (!blk_mq_get_driver_tag(rq, &hctx, false))
+                                       break;
+                       } else {
                                break;
+                       }
                }
+
                list_del_init(&rq->queuelist);
 
                bd.rq = rq;
@@ -995,10 +1040,11 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
                 *
                 * blk_mq_run_hw_queue() already checks the STOPPED bit
                 *
-                * If RESTART is set, then let completion restart the queue
-                * instead of potentially looping here.
+                * If RESTART or TAG_WAITING is set, then let completion restart
+                * the queue instead of potentially looping here.
                 */
-               if (!blk_mq_sched_needs_restart(hctx))
+               if (!blk_mq_sched_needs_restart(hctx) &&
+                   !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
                        blk_mq_run_hw_queue(hctx, true);
        }
 
index 82fd0cc394ebd9d6ad3a0e02b9d5556baccc14c4..8fab716e40596199d680dba33230d8c01d37b45c 100644 (file)
@@ -185,7 +185,7 @@ static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq)
  * sq_to_td - return throtl_data the specified service queue belongs to
  * @sq: the throtl_service_queue of interest
  *
- * A service_queue can be embeded in either a throtl_grp or throtl_data.
+ * A service_queue can be embedded in either a throtl_grp or throtl_data.
  * Determine the associated throtl_data accordingly and return it.
  */
 static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
index a9a8b8e0446f4434616a4f4d7f5513bd3a8936c9..74835dbf0c47c3fc1c6f2f80c9da53ee112402da 100644 (file)
@@ -573,7 +573,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        int ret;
        ssize_t bytes_read;
 
-       dprintk("%s: read %Zd bytes\n", bd->name, count);
+       dprintk("%s: read %zd bytes\n", bd->name, count);
 
        bsg_set_block(bd, file);
 
@@ -648,7 +648,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
        ssize_t bytes_written;
        int ret;
 
-       dprintk("%s: write %Zd bytes\n", bd->name, count);
+       dprintk("%s: write %zd bytes\n", bd->name, count);
 
        if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
                return -EINVAL;
@@ -667,7 +667,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
        if (!bytes_written || err_block_err(ret))
                bytes_written = ret;
 
-       dprintk("%s: returning %Zd\n", bd->name, bytes_written);
+       dprintk("%s: returning %zd\n", bd->name, bytes_written);
        return bytes_written;
 }
 
index 699d10f71a2cac3f871bb6879f68f1fc5fafe714..ac1c9f481a9895525b98601cf96837fd5b4015b6 100644 (file)
@@ -220,17 +220,24 @@ int elevator_init(struct request_queue *q, char *name)
        }
 
        if (!e) {
-               if (q->mq_ops && q->nr_hw_queues == 1)
-                       e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false);
-               else if (q->mq_ops)
-                       e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false);
-               else
+               /*
+                * For blk-mq devices, we default to using mq-deadline,
+                * if available, for single queue devices. If deadline
+                * isn't available OR we have multiple queues, default
+                * to "none".
+                */
+               if (q->mq_ops) {
+                       if (q->nr_hw_queues == 1)
+                               e = elevator_get("mq-deadline", false);
+                       if (!e)
+                               return 0;
+               } else
                        e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
 
                if (!e) {
                        printk(KERN_ERR
                                "Default I/O scheduler not found. " \
-                               "Using noop/none.\n");
+                               "Using noop.\n");
                        e = elevator_get("noop", false);
                }
        }
index 3631cd4802955247d27316c10b172744700e6340..2f444b87a5f244db1bd65fea29eea1daae265c1a 100644 (file)
@@ -669,14 +669,14 @@ void del_gendisk(struct gendisk *disk)
        disk_part_iter_init(&piter, disk,
                             DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
        while ((part = disk_part_iter_next(&piter))) {
-               bdev_unhash_inode(MKDEV(disk->major,
-                                       disk->first_minor + part->partno));
                invalidate_partition(disk, part->partno);
+               bdev_unhash_inode(part_devt(part));
                delete_partition(disk, part->partno);
        }
        disk_part_iter_exit(&piter);
 
        invalidate_partition(disk, 0);
+       bdev_unhash_inode(disk_devt(disk));
        set_capacity(disk, 0);
        disk->flags &= ~GENHD_FL_UP;
 
index d1c52ba4d62dcf7b7a34870ec6b3095eed8b5c15..1e18dca360fc501033762d4c505c2e32c4674ee6 100644 (file)
 #define IO_BUFFER_LENGTH 2048
 #define MAX_TOKS 64
 
-typedef int (*opal_step)(struct opal_dev *dev);
+struct opal_step {
+       int (*fn)(struct opal_dev *dev, void *data);
+       void *data;
+};
+typedef int (cont_fn)(struct opal_dev *dev);
 
 enum opal_atom_width {
        OPAL_WIDTH_TINY,
@@ -80,9 +84,7 @@ struct opal_dev {
        void *data;
        sec_send_recv *send_recv;
 
-       const opal_step *funcs;
-       void **func_data;
-       int state;
+       const struct opal_step *steps;
        struct mutex dev_lock;
        u16 comid;
        u32 hsn;
@@ -213,8 +215,6 @@ static const u8 opalmethod[][OPAL_UID_LENGTH] = {
                { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 },
 };
 
-typedef int (cont_fn)(struct opal_dev *dev);
-
 static int end_opal_session_error(struct opal_dev *dev);
 
 struct opal_suspend_data {
@@ -375,18 +375,18 @@ static void check_geometry(struct opal_dev *dev, const void *data)
 
 static int next(struct opal_dev *dev)
 {
-       opal_step func;
-       int error = 0;
+       const struct opal_step *step;
+       int state = 0, error = 0;
 
        do {
-               func = dev->funcs[dev->state];
-               if (!func)
+               step = &dev->steps[state];
+               if (!step->fn)
                        break;
 
-               error = func(dev);
+               error = step->fn(dev, step->data);
                if (error) {
                        pr_err("Error on step function: %d with error %d: %s\n",
-                              dev->state, error,
+                              state, error,
                               opal_error_to_human(error));
 
                        /* For each OPAL command we do a discovery0 then we
@@ -396,10 +396,13 @@ static int next(struct opal_dev *dev)
                         * session. Therefore we shouldn't attempt to terminate
                         * a session, as one has not yet been created.
                         */
-                       if (dev->state > 1)
-                               return end_opal_session_error(dev);
+                       if (state > 1) {
+                               end_opal_session_error(dev);
+                               return error;
+                       }
+
                }
-               dev->state++;
+               state++;
        } while (!error);
 
        return error;
@@ -411,10 +414,17 @@ static int opal_discovery0_end(struct opal_dev *dev)
        const struct d0_header *hdr = (struct d0_header *)dev->resp;
        const u8 *epos = dev->resp, *cpos = dev->resp;
        u16 comid = 0;
+       u32 hlen = be32_to_cpu(hdr->length);
+
+       print_buffer(dev->resp, hlen);
 
-       print_buffer(dev->resp, be32_to_cpu(hdr->length));
+       if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
+               pr_warn("Discovery length overflows buffer (%zu+%u)/%u\n",
+                       sizeof(*hdr), hlen, IO_BUFFER_LENGTH);
+               return -EFAULT;
+       }
 
-       epos += be32_to_cpu(hdr->length); /* end of buffer */
+       epos += hlen; /* end of buffer */
        cpos += sizeof(*hdr); /* current position on buffer */
 
        while (cpos < epos && supported) {
@@ -476,7 +486,7 @@ static int opal_discovery0_end(struct opal_dev *dev)
        return 0;
 }
 
-static int opal_discovery0(struct opal_dev *dev)
+static int opal_discovery0(struct opal_dev *dev, void *data)
 {
        int ret;
 
@@ -662,52 +672,29 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn)
        return 0;
 }
 
-static enum opal_response_token token_type(const struct parsed_resp *resp,
-                                          int n)
-{
-       const struct opal_resp_tok *tok;
-
-       if (n >= resp->num) {
-               pr_err("Token number doesn't exist: %d, resp: %d\n",
-                      n, resp->num);
-               return OPAL_DTA_TOKENID_INVALID;
-       }
-
-       tok = &resp->toks[n];
-       if (tok->len == 0) {
-               pr_err("Token length must be non-zero\n");
-               return OPAL_DTA_TOKENID_INVALID;
-       }
-
-       return tok->type;
-}
-
-/*
- * This function returns 0 in case of invalid token. One should call
- * token_type() first to find out if the token is valid or not.
- */
-static enum opal_token response_get_token(const struct parsed_resp *resp,
-                                         int n)
+static const struct opal_resp_tok *response_get_token(
+                               const struct parsed_resp *resp,
+                               int n)
 {
        const struct opal_resp_tok *tok;
 
        if (n >= resp->num) {
                pr_err("Token number doesn't exist: %d, resp: %d\n",
                       n, resp->num);
-               return 0;
+               return ERR_PTR(-EINVAL);
        }
 
        tok = &resp->toks[n];
        if (tok->len == 0) {
                pr_err("Token length must be non-zero\n");
-               return 0;
+               return ERR_PTR(-EINVAL);
        }
 
-       return tok->pos[0];
+       return tok;
 }
 
-static size_t response_parse_tiny(struct opal_resp_tok *tok,
-                                 const u8 *pos)
+static ssize_t response_parse_tiny(struct opal_resp_tok *tok,
+                                  const u8 *pos)
 {
        tok->pos = pos;
        tok->len = 1;
@@ -723,8 +710,8 @@ static size_t response_parse_tiny(struct opal_resp_tok *tok,
        return tok->len;
 }
 
-static size_t response_parse_short(struct opal_resp_tok *tok,
-                                  const u8 *pos)
+static ssize_t response_parse_short(struct opal_resp_tok *tok,
+                                   const u8 *pos)
 {
        tok->pos = pos;
        tok->len = (pos[0] & SHORT_ATOM_LEN_MASK) + 1;
@@ -736,7 +723,7 @@ static size_t response_parse_short(struct opal_resp_tok *tok,
                tok->type = OPAL_DTA_TOKENID_SINT;
        } else {
                u64 u_integer = 0;
-               int i, b = 0;
+               ssize_t i, b = 0;
 
                tok->type = OPAL_DTA_TOKENID_UINT;
                if (tok->len > 9) {
@@ -753,8 +740,8 @@ static size_t response_parse_short(struct opal_resp_tok *tok,
        return tok->len;
 }
 
-static size_t response_parse_medium(struct opal_resp_tok *tok,
-                                   const u8 *pos)
+static ssize_t response_parse_medium(struct opal_resp_tok *tok,
+                                    const u8 *pos)
 {
        tok->pos = pos;
        tok->len = (((pos[0] & MEDIUM_ATOM_LEN_MASK) << 8) | pos[1]) + 2;
@@ -770,8 +757,8 @@ static size_t response_parse_medium(struct opal_resp_tok *tok,
        return tok->len;
 }
 
-static size_t response_parse_long(struct opal_resp_tok *tok,
-                                 const u8 *pos)
+static ssize_t response_parse_long(struct opal_resp_tok *tok,
+                                  const u8 *pos)
 {
        tok->pos = pos;
        tok->len = ((pos[1] << 16) | (pos[2] << 8) | pos[3]) + 4;
@@ -787,8 +774,8 @@ static size_t response_parse_long(struct opal_resp_tok *tok,
        return tok->len;
 }
 
-static size_t response_parse_token(struct opal_resp_tok *tok,
-                                  const u8 *pos)
+static ssize_t response_parse_token(struct opal_resp_tok *tok,
+                                   const u8 *pos)
 {
        tok->pos = pos;
        tok->len = 1;
@@ -805,8 +792,9 @@ static int response_parse(const u8 *buf, size_t length,
        struct opal_resp_tok *iter;
        int num_entries = 0;
        int total;
-       size_t token_length;
+       ssize_t token_length;
        const u8 *pos;
+       u32 clen, plen, slen;
 
        if (!buf)
                return -EFAULT;
@@ -818,17 +806,16 @@ static int response_parse(const u8 *buf, size_t length,
        pos = buf;
        pos += sizeof(*hdr);
 
-       pr_debug("Response size: cp: %d, pkt: %d, subpkt: %d\n",
-                be32_to_cpu(hdr->cp.length),
-                be32_to_cpu(hdr->pkt.length),
-                be32_to_cpu(hdr->subpkt.length));
-
-       if (hdr->cp.length == 0 || hdr->pkt.length == 0 ||
-           hdr->subpkt.length == 0) {
-               pr_err("Bad header length. cp: %d, pkt: %d, subpkt: %d\n",
-                      be32_to_cpu(hdr->cp.length),
-                      be32_to_cpu(hdr->pkt.length),
-                      be32_to_cpu(hdr->subpkt.length));
+       clen = be32_to_cpu(hdr->cp.length);
+       plen = be32_to_cpu(hdr->pkt.length);
+       slen = be32_to_cpu(hdr->subpkt.length);
+       pr_debug("Response size: cp: %u, pkt: %u, subpkt: %u\n",
+                clen, plen, slen);
+
+       if (clen == 0 || plen == 0 || slen == 0 ||
+           slen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
+               pr_err("Bad header length. cp: %u, pkt: %u, subpkt: %u\n",
+                      clen, plen, slen);
                print_buffer(pos, sizeof(*hdr));
                return -EINVAL;
        }
@@ -837,7 +824,7 @@ static int response_parse(const u8 *buf, size_t length,
                return -EFAULT;
 
        iter = resp->toks;
-       total = be32_to_cpu(hdr->subpkt.length);
+       total = slen;
        print_buffer(pos, total);
        while (total > 0) {
                if (pos[0] <= TINY_ATOM_BYTE) /* tiny atom */
@@ -851,8 +838,8 @@ static int response_parse(const u8 *buf, size_t length,
                else /* TOKEN */
                        token_length = response_parse_token(iter, pos);
 
-               if (token_length == -EINVAL)
-                       return -EINVAL;
+               if (token_length < 0)
+                       return token_length;
 
                pos += token_length;
                total -= token_length;
@@ -922,20 +909,32 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n)
        return resp->toks[n].stored.u;
 }
 
+static bool response_token_matches(const struct opal_resp_tok *token, u8 match)
+{
+       if (IS_ERR(token) ||
+           token->type != OPAL_DTA_TOKENID_TOKEN ||
+           token->pos[0] != match)
+               return false;
+       return true;
+}
+
 static u8 response_status(const struct parsed_resp *resp)
 {
-       if (token_type(resp, 0) == OPAL_DTA_TOKENID_TOKEN &&
-           response_get_token(resp, 0) == OPAL_ENDOFSESSION) {
+       const struct opal_resp_tok *tok;
+
+       tok = response_get_token(resp, 0);
+       if (response_token_matches(tok, OPAL_ENDOFSESSION))
                return 0;
-       }
 
        if (resp->num < 5)
                return DTAERROR_NO_METHOD_STATUS;
 
-       if (token_type(resp, resp->num - 1) != OPAL_DTA_TOKENID_TOKEN ||
-           token_type(resp, resp->num - 5) != OPAL_DTA_TOKENID_TOKEN ||
-           response_get_token(resp, resp->num - 1) != OPAL_ENDLIST ||
-           response_get_token(resp, resp->num - 5) != OPAL_STARTLIST)
+       tok = response_get_token(resp, resp->num - 5);
+       if (!response_token_matches(tok, OPAL_STARTLIST))
+               return DTAERROR_NO_METHOD_STATUS;
+
+       tok = response_get_token(resp, resp->num - 1);
+       if (!response_token_matches(tok, OPAL_ENDLIST))
                return DTAERROR_NO_METHOD_STATUS;
 
        return response_get_u64(resp, resp->num - 4);
@@ -1022,7 +1021,7 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
        return opal_send_recv(dev, cont);
 }
 
-static int gen_key(struct opal_dev *dev)
+static int gen_key(struct opal_dev *dev, void *data)
 {
        const u8 *method;
        u8 uid[OPAL_UID_LENGTH];
@@ -1076,15 +1075,14 @@ static int get_active_key_cont(struct opal_dev *dev)
        return 0;
 }
 
-static int get_active_key(struct opal_dev *dev)
+static int get_active_key(struct opal_dev *dev, void *data)
 {
        u8 uid[OPAL_UID_LENGTH];
        int err = 0;
-       u8 *lr;
+       u8 *lr = data;
 
        clear_opal_cmd(dev);
        set_comid(dev, dev->comid);
-       lr = dev->func_data[dev->state];
 
        err = build_locking_range(uid, sizeof(uid), *lr);
        if (err)
@@ -1167,17 +1165,16 @@ static inline int enable_global_lr(struct opal_dev *dev, u8 *uid,
        return err;
 }
 
-static int setup_locking_range(struct opal_dev *dev)
+static int setup_locking_range(struct opal_dev *dev, void *data)
 {
        u8 uid[OPAL_UID_LENGTH];
-       struct opal_user_lr_setup *setup;
+       struct opal_user_lr_setup *setup = data;
        u8 lr;
        int err = 0;
 
        clear_opal_cmd(dev);
        set_comid(dev, dev->comid);
 
-       setup = dev->func_data[dev->state];
        lr = setup->session.opal_key.lr;
        err = build_locking_range(uid, sizeof(uid), lr);
        if (err)
@@ -1290,20 +1287,19 @@ static int start_generic_opal_session(struct opal_dev *dev,
        return finalize_and_send(dev, start_opal_session_cont);
 }
 
-static int start_anybodyASP_opal_session(struct opal_dev *dev)
+static int start_anybodyASP_opal_session(struct opal_dev *dev, void *data)
 {
        return start_generic_opal_session(dev, OPAL_ANYBODY_UID,
                                          OPAL_ADMINSP_UID, NULL, 0);
 }
 
-static int start_SIDASP_opal_session(struct opal_dev *dev)
+static int start_SIDASP_opal_session(struct opal_dev *dev, void *data)
 {
        int ret;
        const u8 *key = dev->prev_data;
-       struct opal_key *okey;
 
        if (!key) {
-               okey = dev->func_data[dev->state];
+               const struct opal_key *okey = data;
                ret = start_generic_opal_session(dev, OPAL_SID_UID,
                                                 OPAL_ADMINSP_UID,
                                                 okey->key,
@@ -1318,22 +1314,21 @@ static int start_SIDASP_opal_session(struct opal_dev *dev)
        return ret;
 }
 
-static inline int start_admin1LSP_opal_session(struct opal_dev *dev)
+static int start_admin1LSP_opal_session(struct opal_dev *dev, void *data)
 {
-       struct opal_key *key = dev->func_data[dev->state];
-
+       struct opal_key *key = data;
        return start_generic_opal_session(dev, OPAL_ADMIN1_UID,
                                          OPAL_LOCKINGSP_UID,
                                          key->key, key->key_len);
 }
 
-static int start_auth_opal_session(struct opal_dev *dev)
+static int start_auth_opal_session(struct opal_dev *dev, void *data)
 {
+       struct opal_session_info *session = data;
        u8 lk_ul_user[OPAL_UID_LENGTH];
+       size_t keylen = session->opal_key.key_len;
        int err = 0;
 
-       struct opal_session_info *session = dev->func_data[dev->state];
-       size_t keylen = session->opal_key.key_len;
        u8 *key = session->opal_key.key;
        u32 hsn = GENERIC_HOST_SESSION_NUM;
 
@@ -1383,7 +1378,7 @@ static int start_auth_opal_session(struct opal_dev *dev)
        return finalize_and_send(dev, start_opal_session_cont);
 }
 
-static int revert_tper(struct opal_dev *dev)
+static int revert_tper(struct opal_dev *dev, void *data)
 {
        int err = 0;
 
@@ -1405,9 +1400,9 @@ static int revert_tper(struct opal_dev *dev)
        return finalize_and_send(dev, parse_and_check_status);
 }
 
-static int internal_activate_user(struct opal_dev *dev)
+static int internal_activate_user(struct opal_dev *dev, void *data)
 {
-       struct opal_session_info *session = dev->func_data[dev->state];
+       struct opal_session_info *session = data;
        u8 uid[OPAL_UID_LENGTH];
        int err = 0;
 
@@ -1440,15 +1435,14 @@ static int internal_activate_user(struct opal_dev *dev)
        return finalize_and_send(dev, parse_and_check_status);
 }
 
-static int erase_locking_range(struct opal_dev *dev)
+static int erase_locking_range(struct opal_dev *dev, void *data)
 {
-       struct opal_session_info *session;
+       struct opal_session_info *session = data;
        u8 uid[OPAL_UID_LENGTH];
        int err = 0;
 
        clear_opal_cmd(dev);
        set_comid(dev, dev->comid);
-       session = dev->func_data[dev->state];
 
        if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0)
                return -ERANGE;
@@ -1467,9 +1461,9 @@ static int erase_locking_range(struct opal_dev *dev)
        return finalize_and_send(dev, parse_and_check_status);
 }
 
-static int set_mbr_done(struct opal_dev *dev)
+static int set_mbr_done(struct opal_dev *dev, void *data)
 {
-       u8 mbr_done_tf = *(u8 *)dev->func_data[dev->state];
+       u8 *mbr_done_tf = data;
        int err = 0;
 
        clear_opal_cmd(dev);
@@ -1485,7 +1479,7 @@ static int set_mbr_done(struct opal_dev *dev)
        add_token_u8(&err, dev, OPAL_STARTLIST);
        add_token_u8(&err, dev, OPAL_STARTNAME);
        add_token_u8(&err, dev, 2); /* Done */
-       add_token_u8(&err, dev, mbr_done_tf); /* Done T or F */
+       add_token_u8(&err, dev, *mbr_done_tf); /* Done T or F */
        add_token_u8(&err, dev, OPAL_ENDNAME);
        add_token_u8(&err, dev, OPAL_ENDLIST);
        add_token_u8(&err, dev, OPAL_ENDNAME);
@@ -1499,9 +1493,9 @@ static int set_mbr_done(struct opal_dev *dev)
        return finalize_and_send(dev, parse_and_check_status);
 }
 
-static int set_mbr_enable_disable(struct opal_dev *dev)
+static int set_mbr_enable_disable(struct opal_dev *dev, void *data)
 {
-       u8 mbr_en_dis = *(u8 *)dev->func_data[dev->state];
+       u8 *mbr_en_dis = data;
        int err = 0;
 
        clear_opal_cmd(dev);
@@ -1517,7 +1511,7 @@ static int set_mbr_enable_disable(struct opal_dev *dev)
        add_token_u8(&err, dev, OPAL_STARTLIST);
        add_token_u8(&err, dev, OPAL_STARTNAME);
        add_token_u8(&err, dev, 1);
-       add_token_u8(&err, dev, mbr_en_dis);
+       add_token_u8(&err, dev, *mbr_en_dis);
        add_token_u8(&err, dev, OPAL_ENDNAME);
        add_token_u8(&err, dev, OPAL_ENDLIST);
        add_token_u8(&err, dev, OPAL_ENDNAME);
@@ -1558,11 +1552,10 @@ static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid,
        return err;
 }
 
-static int set_new_pw(struct opal_dev *dev)
+static int set_new_pw(struct opal_dev *dev, void *data)
 {
        u8 cpin_uid[OPAL_UID_LENGTH];
-       struct opal_session_info *usr = dev->func_data[dev->state];
-
+       struct opal_session_info *usr = data;
 
        memcpy(cpin_uid, opaluid[OPAL_C_PIN_ADMIN1], OPAL_UID_LENGTH);
 
@@ -1583,10 +1576,10 @@ static int set_new_pw(struct opal_dev *dev)
        return finalize_and_send(dev, parse_and_check_status);
 }
 
-static int set_sid_cpin_pin(struct opal_dev *dev)
+static int set_sid_cpin_pin(struct opal_dev *dev, void *data)
 {
        u8 cpin_uid[OPAL_UID_LENGTH];
-       struct opal_key *key = dev->func_data[dev->state];
+       struct opal_key *key = data;
 
        memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH);
 
@@ -1597,18 +1590,16 @@ static int set_sid_cpin_pin(struct opal_dev *dev)
        return finalize_and_send(dev, parse_and_check_status);
 }
 
-static int add_user_to_lr(struct opal_dev *dev)
+static int add_user_to_lr(struct opal_dev *dev, void *data)
 {
        u8 lr_buffer[OPAL_UID_LENGTH];
        u8 user_uid[OPAL_UID_LENGTH];
-       struct opal_lock_unlock *lkul;
+       struct opal_lock_unlock *lkul = data;
        int err = 0;
 
        clear_opal_cmd(dev);
        set_comid(dev, dev->comid);
 
-       lkul = dev->func_data[dev->state];
-
        memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED],
               OPAL_UID_LENGTH);
 
@@ -1675,11 +1666,11 @@ static int add_user_to_lr(struct opal_dev *dev)
        return finalize_and_send(dev, parse_and_check_status);
 }
 
-static int lock_unlock_locking_range(struct opal_dev *dev)
+static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
 {
        u8 lr_buffer[OPAL_UID_LENGTH];
        const u8 *method;
-       struct opal_lock_unlock *lkul;
+       struct opal_lock_unlock *lkul = data;
        u8 read_locked = 1, write_locked = 1;
        int err = 0;
 
@@ -1687,7 +1678,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev)
        set_comid(dev, dev->comid);
 
        method = opalmethod[OPAL_SET];
-       lkul = dev->func_data[dev->state];
        if (build_locking_range(lr_buffer, sizeof(lr_buffer),
                                lkul->session.opal_key.lr) < 0)
                return -ERANGE;
@@ -1739,19 +1729,18 @@ static int lock_unlock_locking_range(struct opal_dev *dev)
 }
 
 
-static int lock_unlock_locking_range_sum(struct opal_dev *dev)
+static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data)
 {
        u8 lr_buffer[OPAL_UID_LENGTH];
        u8 read_locked = 1, write_locked = 1;
        const u8 *method;
-       struct opal_lock_unlock *lkul;
+       struct opal_lock_unlock *lkul = data;
        int ret;
 
        clear_opal_cmd(dev);
        set_comid(dev, dev->comid);
 
        method = opalmethod[OPAL_SET];
-       lkul = dev->func_data[dev->state];
        if (build_locking_range(lr_buffer, sizeof(lr_buffer),
                                lkul->session.opal_key.lr) < 0)
                return -ERANGE;
@@ -1782,9 +1771,9 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev)
        return finalize_and_send(dev, parse_and_check_status);
 }
 
-static int activate_lsp(struct opal_dev *dev)
+static int activate_lsp(struct opal_dev *dev, void *data)
 {
-       struct opal_lr_act *opal_act;
+       struct opal_lr_act *opal_act = data;
        u8 user_lr[OPAL_UID_LENGTH];
        u8 uint_3 = 0x83;
        int err = 0, i;
@@ -1792,8 +1781,6 @@ static int activate_lsp(struct opal_dev *dev)
        clear_opal_cmd(dev);
        set_comid(dev, dev->comid);
 
-       opal_act = dev->func_data[dev->state];
-
        add_token_u8(&err, dev, OPAL_CALL);
        add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID],
                             OPAL_UID_LENGTH);
@@ -1858,7 +1845,7 @@ static int get_lsp_lifecycle_cont(struct opal_dev *dev)
 }
 
 /* Determine if we're in the Manufactured Inactive or Active state */
-static int get_lsp_lifecycle(struct opal_dev *dev)
+static int get_lsp_lifecycle(struct opal_dev *dev, void *data)
 {
        int err = 0;
 
@@ -1919,14 +1906,13 @@ static int get_msid_cpin_pin_cont(struct opal_dev *dev)
        return 0;
 }
 
-static int get_msid_cpin_pin(struct opal_dev *dev)
+static int get_msid_cpin_pin(struct opal_dev *dev, void *data)
 {
        int err = 0;
 
        clear_opal_cmd(dev);
        set_comid(dev, dev->comid);
 
-
        add_token_u8(&err, dev, OPAL_CALL);
        add_token_bytestring(&err, dev, opaluid[OPAL_C_PIN_MSID],
                             OPAL_UID_LENGTH);
@@ -1956,64 +1942,76 @@ static int get_msid_cpin_pin(struct opal_dev *dev)
        return finalize_and_send(dev, get_msid_cpin_pin_cont);
 }
 
-static int build_end_opal_session(struct opal_dev *dev)
+static int end_opal_session(struct opal_dev *dev, void *data)
 {
        int err = 0;
 
        clear_opal_cmd(dev);
-
        set_comid(dev, dev->comid);
        add_token_u8(&err, dev, OPAL_ENDOFSESSION);
-       return err;
-}
 
-static int end_opal_session(struct opal_dev *dev)
-{
-       int ret = build_end_opal_session(dev);
-
-       if (ret < 0)
-               return ret;
+       if (err < 0)
+               return err;
        return finalize_and_send(dev, end_session_cont);
 }
 
 static int end_opal_session_error(struct opal_dev *dev)
 {
-       const opal_step error_end_session[] = {
-               end_opal_session,
-               NULL,
+       const struct opal_step error_end_session[] = {
+               { end_opal_session, },
+               { NULL, }
        };
-       dev->funcs = error_end_session;
-       dev->state = 0;
+       dev->steps = error_end_session;
        return next(dev);
 }
 
 static inline void setup_opal_dev(struct opal_dev *dev,
-                                 const opal_step *funcs)
+                                 const struct opal_step *steps)
 {
-       dev->state = 0;
-       dev->funcs = funcs;
+       dev->steps = steps;
        dev->tsn = 0;
        dev->hsn = 0;
-       dev->func_data = NULL;
        dev->prev_data = NULL;
 }
 
 static int check_opal_support(struct opal_dev *dev)
 {
-       static const opal_step funcs[] = {
-               opal_discovery0,
-               NULL
+       const struct opal_step steps[] = {
+               { opal_discovery0, },
+               { NULL, }
        };
        int ret;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, funcs);
+       setup_opal_dev(dev, steps);
        ret = next(dev);
        dev->supported = !ret;
        mutex_unlock(&dev->dev_lock);
        return ret;
 }
 
+static void clean_opal_dev(struct opal_dev *dev)
+{
+
+       struct opal_suspend_data *suspend, *next;
+
+       mutex_lock(&dev->dev_lock);
+       list_for_each_entry_safe(suspend, next, &dev->unlk_lst, node) {
+               list_del(&suspend->node);
+               kfree(suspend);
+       }
+       mutex_unlock(&dev->dev_lock);
+}
+
+void free_opal_dev(struct opal_dev *dev)
+{
+       if (!dev)
+               return;
+       clean_opal_dev(dev);
+       kfree(dev);
+}
+EXPORT_SYMBOL(free_opal_dev);
+
 struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv)
 {
        struct opal_dev *dev;
@@ -2038,24 +2036,18 @@ EXPORT_SYMBOL(init_opal_dev);
 static int opal_secure_erase_locking_range(struct opal_dev *dev,
                                           struct opal_session_info *opal_session)
 {
-       void *data[3] = { NULL };
-       static const opal_step erase_funcs[] = {
-               opal_discovery0,
-               start_auth_opal_session,
-               get_active_key,
-               gen_key,
-               end_opal_session,
-               NULL,
+       const struct opal_step erase_steps[] = {
+               { opal_discovery0, },
+               { start_auth_opal_session, opal_session },
+               { get_active_key, &opal_session->opal_key.lr },
+               { gen_key, },
+               { end_opal_session, },
+               { NULL, }
        };
        int ret;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, erase_funcs);
-
-       dev->func_data = data;
-       dev->func_data[1] = opal_session;
-       dev->func_data[2] = &opal_session->opal_key.lr;
-
+       setup_opal_dev(dev, erase_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2064,23 +2056,17 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev,
 static int opal_erase_locking_range(struct opal_dev *dev,
                                    struct opal_session_info *opal_session)
 {
-       void *data[3] = { NULL };
-       static const opal_step erase_funcs[] = {
-               opal_discovery0,
-               start_auth_opal_session,
-               erase_locking_range,
-               end_opal_session,
-               NULL,
+       const struct opal_step erase_steps[] = {
+               { opal_discovery0, },
+               { start_auth_opal_session, opal_session },
+               { erase_locking_range, opal_session },
+               { end_opal_session, },
+               { NULL, }
        };
        int ret;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, erase_funcs);
-
-       dev->func_data = data;
-       dev->func_data[1] = opal_session;
-       dev->func_data[2] = opal_session;
-
+       setup_opal_dev(dev, erase_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2089,16 +2075,15 @@ static int opal_erase_locking_range(struct opal_dev *dev,
 static int opal_enable_disable_shadow_mbr(struct opal_dev *dev,
                                          struct opal_mbr_data *opal_mbr)
 {
-       void *func_data[6] = { NULL };
-       static const opal_step mbr_funcs[] = {
-               opal_discovery0,
-               start_admin1LSP_opal_session,
-               set_mbr_done,
-               end_opal_session,
-               start_admin1LSP_opal_session,
-               set_mbr_enable_disable,
-               end_opal_session,
-               NULL,
+       const struct opal_step mbr_steps[] = {
+               { opal_discovery0, },
+               { start_admin1LSP_opal_session, &opal_mbr->key },
+               { set_mbr_done, &opal_mbr->enable_disable },
+               { end_opal_session, },
+               { start_admin1LSP_opal_session, &opal_mbr->key },
+               { set_mbr_enable_disable, &opal_mbr->enable_disable },
+               { end_opal_session, },
+               { NULL, }
        };
        int ret;
 
@@ -2107,12 +2092,7 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev,
                return -EINVAL;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, mbr_funcs);
-       dev->func_data = func_data;
-       dev->func_data[1] = &opal_mbr->key;
-       dev->func_data[2] = &opal_mbr->enable_disable;
-       dev->func_data[4] = &opal_mbr->key;
-       dev->func_data[5] = &opal_mbr->enable_disable;
+       setup_opal_dev(dev, mbr_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2139,13 +2119,12 @@ static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk)
 static int opal_add_user_to_lr(struct opal_dev *dev,
                               struct opal_lock_unlock *lk_unlk)
 {
-       void *func_data[3] = { NULL };
-       static const opal_step funcs[] = {
-               opal_discovery0,
-               start_admin1LSP_opal_session,
-               add_user_to_lr,
-               end_opal_session,
-               NULL
+       const struct opal_step steps[] = {
+               { opal_discovery0, },
+               { start_admin1LSP_opal_session, &lk_unlk->session.opal_key },
+               { add_user_to_lr, lk_unlk },
+               { end_opal_session, },
+               { NULL, }
        };
        int ret;
 
@@ -2167,10 +2146,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
        }
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, funcs);
-       dev->func_data = func_data;
-       dev->func_data[1] = &lk_unlk->session.opal_key;
-       dev->func_data[2] = lk_unlk;
+       setup_opal_dev(dev, steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2178,55 +2154,54 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
 
 static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal)
 {
-       void *data[2] = { NULL };
-       static const opal_step revert_funcs[] = {
-               opal_discovery0,
-               start_SIDASP_opal_session,
-               revert_tper, /* controller will terminate session */
-               NULL,
+       const struct opal_step revert_steps[] = {
+               { opal_discovery0, },
+               { start_SIDASP_opal_session, opal },
+               { revert_tper, }, /* controller will terminate session */
+               { NULL, }
        };
        int ret;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, revert_funcs);
-       dev->func_data = data;
-       dev->func_data[1] = opal;
+       setup_opal_dev(dev, revert_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
-       return ret;
-}
 
-static int __opal_lock_unlock_sum(struct opal_dev *dev)
-{
-       static const opal_step ulk_funcs_sum[] = {
-               opal_discovery0,
-               start_auth_opal_session,
-               lock_unlock_locking_range_sum,
-               end_opal_session,
-               NULL
-       };
+       /*
+        * If we successfully reverted lets clean
+        * any saved locking ranges.
+        */
+       if (!ret)
+               clean_opal_dev(dev);
 
-       dev->funcs = ulk_funcs_sum;
-       return next(dev);
+       return ret;
 }
 
-static int __opal_lock_unlock(struct opal_dev *dev)
+static int __opal_lock_unlock(struct opal_dev *dev,
+                             struct opal_lock_unlock *lk_unlk)
 {
-       static const opal_step _unlock_funcs[] = {
-               opal_discovery0,
-               start_auth_opal_session,
-               lock_unlock_locking_range,
-               end_opal_session,
-               NULL
+       const struct opal_step unlock_steps[] = {
+               { opal_discovery0, },
+               { start_auth_opal_session, &lk_unlk->session },
+               { lock_unlock_locking_range, lk_unlk },
+               { end_opal_session, },
+               { NULL, }
+       };
+       const struct opal_step unlock_sum_steps[] = {
+               { opal_discovery0, },
+               { start_auth_opal_session, &lk_unlk->session },
+               { lock_unlock_locking_range_sum, lk_unlk },
+               { end_opal_session, },
+               { NULL, }
        };
 
-       dev->funcs = _unlock_funcs;
+       dev->steps = lk_unlk->session.sum ? unlock_sum_steps : unlock_steps;
        return next(dev);
 }
 
-static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk)
+static int opal_lock_unlock(struct opal_dev *dev,
+                           struct opal_lock_unlock *lk_unlk)
 {
-       void *func_data[3] = { NULL };
        int ret;
 
        if (lk_unlk->session.who < OPAL_ADMIN1 ||
@@ -2234,43 +2209,30 @@ static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_un
                return -EINVAL;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, NULL);
-       dev->func_data = func_data;
-       dev->func_data[1] = &lk_unlk->session;
-       dev->func_data[2] = lk_unlk;
-
-       if (lk_unlk->session.sum)
-               ret = __opal_lock_unlock_sum(dev);
-       else
-               ret = __opal_lock_unlock(dev);
-
+       ret = __opal_lock_unlock(dev, lk_unlk);
        mutex_unlock(&dev->dev_lock);
        return ret;
 }
 
 static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal)
 {
-       static const opal_step owner_funcs[] = {
-               opal_discovery0,
-               start_anybodyASP_opal_session,
-               get_msid_cpin_pin,
-               end_opal_session,
-               start_SIDASP_opal_session,
-               set_sid_cpin_pin,
-               end_opal_session,
-               NULL
+       const struct opal_step owner_steps[] = {
+               { opal_discovery0, },
+               { start_anybodyASP_opal_session, },
+               { get_msid_cpin_pin, },
+               { end_opal_session, },
+               { start_SIDASP_opal_session, opal },
+               { set_sid_cpin_pin, opal },
+               { end_opal_session, },
+               { NULL, }
        };
-       void *data[6] = { NULL };
        int ret;
 
        if (!dev)
                return -ENODEV;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, owner_funcs);
-       dev->func_data = data;
-       dev->func_data[4] = opal;
-       dev->func_data[5] = opal;
+       setup_opal_dev(dev, owner_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2278,14 +2240,13 @@ static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal)
 
 static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_act)
 {
-       void *data[4] = { NULL };
-       static const opal_step active_funcs[] = {
-               opal_discovery0,
-               start_SIDASP_opal_session, /* Open session as SID auth */
-               get_lsp_lifecycle,
-               activate_lsp,
-               end_opal_session,
-               NULL
+       const struct opal_step active_steps[] = {
+               { opal_discovery0, },
+               { start_SIDASP_opal_session, &opal_lr_act->key },
+               { get_lsp_lifecycle, },
+               { activate_lsp, opal_lr_act },
+               { end_opal_session, },
+               { NULL, }
        };
        int ret;
 
@@ -2293,10 +2254,7 @@ static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_a
                return -EINVAL;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, active_funcs);
-       dev->func_data = data;
-       dev->func_data[1] = &opal_lr_act->key;
-       dev->func_data[3] = opal_lr_act;
+       setup_opal_dev(dev, active_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2305,21 +2263,17 @@ static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_a
 static int opal_setup_locking_range(struct opal_dev *dev,
                                    struct opal_user_lr_setup *opal_lrs)
 {
-       void *data[3] = { NULL };
-       static const opal_step lr_funcs[] = {
-               opal_discovery0,
-               start_auth_opal_session,
-               setup_locking_range,
-               end_opal_session,
-               NULL,
+       const struct opal_step lr_steps[] = {
+               { opal_discovery0, },
+               { start_auth_opal_session, &opal_lrs->session },
+               { setup_locking_range, opal_lrs },
+               { end_opal_session, },
+               { NULL, }
        };
        int ret;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, lr_funcs);
-       dev->func_data = data;
-       dev->func_data[1] = &opal_lrs->session;
-       dev->func_data[2] = opal_lrs;
+       setup_opal_dev(dev, lr_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2327,14 +2281,13 @@ static int opal_setup_locking_range(struct opal_dev *dev,
 
 static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
 {
-       static const opal_step pw_funcs[] = {
-               opal_discovery0,
-               start_auth_opal_session,
-               set_new_pw,
-               end_opal_session,
-               NULL
+       const struct opal_step pw_steps[] = {
+               { opal_discovery0, },
+               { start_auth_opal_session, &opal_pw->session },
+               { set_new_pw, &opal_pw->new_user_pw },
+               { end_opal_session, },
+               { NULL }
        };
-       void *data[3] = { NULL };
        int ret;
 
        if (opal_pw->session.who < OPAL_ADMIN1 ||
@@ -2344,11 +2297,7 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
                return -EINVAL;
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, pw_funcs);
-       dev->func_data = data;
-       dev->func_data[1] = (void *) &opal_pw->session;
-       dev->func_data[2] = (void *) &opal_pw->new_user_pw;
-
+       setup_opal_dev(dev, pw_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2357,14 +2306,13 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
 static int opal_activate_user(struct opal_dev *dev,
                              struct opal_session_info *opal_session)
 {
-       static const opal_step act_funcs[] = {
-               opal_discovery0,
-               start_admin1LSP_opal_session,
-               internal_activate_user,
-               end_opal_session,
-               NULL
+       const struct opal_step act_steps[] = {
+               { opal_discovery0, },
+               { start_admin1LSP_opal_session, &opal_session->opal_key },
+               { internal_activate_user, opal_session },
+               { end_opal_session, },
+               { NULL, }
        };
-       void *data[3] = { NULL };
        int ret;
 
        /* We can't activate Admin1 it's active as manufactured */
@@ -2375,10 +2323,7 @@ static int opal_activate_user(struct opal_dev *dev,
        }
 
        mutex_lock(&dev->dev_lock);
-       setup_opal_dev(dev, act_funcs);
-       dev->func_data = data;
-       dev->func_data[1] = &opal_session->opal_key;
-       dev->func_data[2] = opal_session;
+       setup_opal_dev(dev, act_steps);
        ret = next(dev);
        mutex_unlock(&dev->dev_lock);
        return ret;
@@ -2387,7 +2332,6 @@ static int opal_activate_user(struct opal_dev *dev,
 bool opal_unlock_from_suspend(struct opal_dev *dev)
 {
        struct opal_suspend_data *suspend;
-       void *func_data[3] = { NULL };
        bool was_failure = false;
        int ret = 0;
 
@@ -2398,19 +2342,12 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
 
        mutex_lock(&dev->dev_lock);
        setup_opal_dev(dev, NULL);
-       dev->func_data = func_data;
 
        list_for_each_entry(suspend, &dev->unlk_lst, node) {
-               dev->state = 0;
-               dev->func_data[1] = &suspend->unlk.session;
-               dev->func_data[2] = &suspend->unlk;
                dev->tsn = 0;
                dev->hsn = 0;
 
-               if (suspend->unlk.session.sum)
-                       ret = __opal_lock_unlock_sum(dev);
-               else
-                       ret = __opal_lock_unlock(dev);
+               ret = __opal_lock_unlock(dev, &suspend->unlk);
                if (ret) {
                        pr_warn("Failed to unlock LR %hhu with sum %d\n",
                                suspend->unlk.session.opal_key.lr,
@@ -2437,7 +2374,7 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
                return -ENOTSUPP;
        }
 
-       p = memdup_user(arg,  _IOC_SIZE(cmd));
+       p = memdup_user(arg, _IOC_SIZE(cmd));
        if (IS_ERR(p))
                return PTR_ERR(p);
 
index 99c1b2cc2976ad31aca9cfef182bf0318b6e95af..71eff9b01b12296405f8d52a10f8427b3cbc5ca6 100644 (file)
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
                                 u8 *dst, unsigned int *dlen, void *ctx)
 {
-       size_t tmp_len = *dlen;
-       int err;
+       int out_len = LZ4_compress_default(src, dst,
+               slen, *dlen, ctx);
 
-       err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
-       if (err < 0)
+       if (!out_len)
                return -EINVAL;
 
-       *dlen = tmp_len;
+       *dlen = out_len;
        return 0;
 }
 
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
                                   u8 *dst, unsigned int *dlen, void *ctx)
 {
-       int err;
-       size_t tmp_len = *dlen;
-       size_t __slen = slen;
+       int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
 
-       err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-       if (err < 0)
-               return -EINVAL;
+       if (out_len < 0)
+               return out_len;
 
-       *dlen = tmp_len;
-       return err;
+       *dlen = out_len;
+       return 0;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
index 75ffc4a3f786c84f867670147e73427c15a20525..03a34a8109c0845440cacd5d21d61daa093bff51 100644 (file)
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
                                   u8 *dst, unsigned int *dlen, void *ctx)
 {
-       size_t tmp_len = *dlen;
-       int err;
+       int out_len = LZ4_compress_HC(src, dst, slen,
+               *dlen, LZ4HC_DEFAULT_CLEVEL, ctx);
 
-       err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
-       if (err < 0)
+       if (!out_len)
                return -EINVAL;
 
-       *dlen = tmp_len;
+       *dlen = out_len;
        return 0;
 }
 
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
                                     u8 *dst, unsigned int *dlen, void *ctx)
 {
-       int err;
-       size_t tmp_len = *dlen;
-       size_t __slen = slen;
+       int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
 
-       err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-       if (err < 0)
-               return -EINVAL;
+       if (out_len < 0)
+               return out_len;
 
-       *dlen = tmp_len;
-       return err;
+       *dlen = out_len;
+       return 0;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
index f85e51cf7dcca639873cef179d21066202c7079b..006ecc434351808382b6fbb16c751ca16817c289 100644 (file)
@@ -34293,61 +34293,123 @@ static struct hash_testvec bfin_crc_tv_template[] = {
 
 static struct comp_testvec lz4_comp_tv_template[] = {
        {
-               .inlen  = 70,
-               .outlen = 45,
-               .input  = "Join us now and share the software "
-                         "Join us now and share the software ",
-               .output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-                         "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-                         "\x64\x20\x73\x68\x61\x72\x65\x20"
-                         "\x74\x68\x65\x20\x73\x6f\x66\x74"
-                         "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-                         "\x77\x61\x72\x65\x20",
+               .inlen  = 255,
+               .outlen = 218,
+               .input  = "LZ4 is lossless compression algorithm, providing"
+                        " compression speed at 400 MB/s per core, scalable "
+                        "with multi-cores CPU. It features an extremely fast "
+                        "decoder, with speed in multiple GB/s per core, "
+                        "typically reaching RAM speed limits on multi-core "
+                        "systems.",
+               .output = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+                         "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+                         "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+                         "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+                         "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+                         "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+                         "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+                         "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+                         "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+                         "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+                         "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+                         "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+                         "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+                         "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+                         "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83"
+                         "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00"
+                         "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e",
+
        },
 };
 
 static struct comp_testvec lz4_decomp_tv_template[] = {
        {
-               .inlen  = 45,
-               .outlen = 70,
-               .input  = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-                         "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-                         "\x64\x20\x73\x68\x61\x72\x65\x20"
-                         "\x74\x68\x65\x20\x73\x6f\x66\x74"
-                         "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-                         "\x77\x61\x72\x65\x20",
-               .output = "Join us now and share the software "
-                         "Join us now and share the software ",
+               .inlen  = 218,
+               .outlen = 255,
+               .input  = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+                         "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+                         "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+                         "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+                         "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+                         "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+                         "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+                         "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+                         "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+                         "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+                         "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+                         "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+                         "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+                         "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+                         "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83"
+                         "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00"
+                         "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e",
+               .output = "LZ4 is lossless compression algorithm, providing"
+                        " compression speed at 400 MB/s per core, scalable "
+                        "with multi-cores CPU. It features an extremely fast "
+                        "decoder, with speed in multiple GB/s per core, "
+                        "typically reaching RAM speed limits on multi-core "
+                        "systems.",
        },
 };
 
 static struct comp_testvec lz4hc_comp_tv_template[] = {
        {
-               .inlen  = 70,
-               .outlen = 45,
-               .input  = "Join us now and share the software "
-                         "Join us now and share the software ",
-               .output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-                         "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-                         "\x64\x20\x73\x68\x61\x72\x65\x20"
-                         "\x74\x68\x65\x20\x73\x6f\x66\x74"
-                         "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-                         "\x77\x61\x72\x65\x20",
+               .inlen  = 255,
+               .outlen = 216,
+               .input  = "LZ4 is lossless compression algorithm, providing"
+                        " compression speed at 400 MB/s per core, scalable "
+                        "with multi-cores CPU. It features an extremely fast "
+                        "decoder, with speed in multiple GB/s per core, "
+                        "typically reaching RAM speed limits on multi-core "
+                        "systems.",
+               .output = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+                         "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+                         "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+                         "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+                         "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+                         "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+                         "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+                         "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+                         "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+                         "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+                         "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+                         "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+                         "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+                         "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+                         "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97"
+                         "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20"
+                         "\x73\x79\x73\x74\x65\x6d\x73\x2e",
+
        },
 };
 
 static struct comp_testvec lz4hc_decomp_tv_template[] = {
        {
-               .inlen  = 45,
-               .outlen = 70,
-               .input  = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-                         "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-                         "\x64\x20\x73\x68\x61\x72\x65\x20"
-                         "\x74\x68\x65\x20\x73\x6f\x66\x74"
-                         "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-                         "\x77\x61\x72\x65\x20",
-               .output = "Join us now and share the software "
-                         "Join us now and share the software ",
+               .inlen  = 216,
+               .outlen = 255,
+               .input  = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+                         "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+                         "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+                         "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+                         "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+                         "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+                         "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+                         "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+                         "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+                         "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+                         "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+                         "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+                         "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+                         "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+                         "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97"
+                         "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20"
+                         "\x73\x79\x73\x74\x65\x6d\x73\x2e",
+               .output = "LZ4 is lossless compression algorithm, providing"
+                        " compression speed at 400 MB/s per core, scalable "
+                        "with multi-cores CPU. It features an extremely fast "
+                        "decoder, with speed in multiple GB/s per core, "
+                        "typically reaching RAM speed limits on multi-core "
+                        "systems.",
        },
 };
 
index f77956c3fd45f2756694f16afd688bba77346ceb..747c2ba98534f102bc90fa26ee86e29652af7318 100644 (file)
@@ -56,7 +56,7 @@ struct acpi_ipmi_device {
 struct ipmi_driver_data {
        struct list_head ipmi_devices;
        struct ipmi_smi_watcher bmc_events;
-       struct ipmi_user_hndl ipmi_hndlrs;
+       const struct ipmi_user_hndl ipmi_hndlrs;
        struct mutex ipmi_lock;
 
        /*
index 8ea836c046f8be056623f109fedb6494d23dc586..90d112a3063a4a9fb247a08433b5bf13360a3696 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/clk-lpss.h>
+#include <linux/platform_data/x86/pmc_atom.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 #include <linux/delay.h>
@@ -31,7 +32,6 @@ ACPI_MODULE_NAME("acpi_lpss");
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
-#include <asm/pmc_atom.h>
 
 #define LPSS_ADDR(desc) ((unsigned long)&desc)
 
index 251f9477a98411ef0cc7d116dfffb6c24eb595a1..857dbc43a9b133cec8f4f9b27ca91434806fe7f1 100644 (file)
@@ -242,7 +242,7 @@ acpi_status acpi_db_convert_to_package(char *string, union acpi_object *object)
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Convert a typed and tokenized string to an union acpi_object. Typing:
+ * DESCRIPTION: Convert a typed and tokenized string to a union acpi_object. Typing:
  *              1) String objects were surrounded by quotes.
  *              2) Buffer objects were surrounded by parentheses.
  *              3) Package objects were surrounded by brackets "[]".
index 3dbbecf220879cc7bd551b807f35d9b1fb8ad390..9d14b509529e3067d13aa68923599e83bee419dc 100644 (file)
@@ -323,7 +323,7 @@ acpi_ns_check_reference(struct acpi_evaluate_info *info,
 
        /*
         * Check the reference object for the correct reference type (opcode).
-        * The only type of reference that can be converted to an union acpi_object is
+        * The only type of reference that can be converted to a union acpi_object is
         * a reference to a named object (reference class: NAME)
         */
        if (return_object->reference.class == ACPI_REFCLASS_NAME) {
index 8e365c0e766bfa85aedcd9e7945148d2d26ce3c0..c944ff5c9c3d0a7d07df9b3c5593ec075c3c8ef8 100644 (file)
@@ -495,9 +495,9 @@ static void acpi_ns_resolve_references(struct acpi_evaluate_info *info)
        /*
         * Two types of references are supported - those created by Index and
         * ref_of operators. A name reference (AML_NAMEPATH_OP) can be converted
-        * to an union acpi_object, so it is not dereferenced here. A ddb_handle
+        * to a union acpi_object, so it is not dereferenced here. A ddb_handle
         * (AML_LOAD_OP) cannot be dereferenced, nor can it be converted to
-        * an union acpi_object.
+        * a union acpi_object.
         */
        switch (info->return_object->reference.class) {
        case ACPI_REFCLASS_INDEX:
index 8b11d6d385dcd22a75f9f8bfdf9bd81856bf87ba..cd4c4271dc4cd6ec5c676eb849cabcdc6a1736b7 100644 (file)
@@ -406,7 +406,7 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
        }
 
        /*
-        * In IO-APIC mode, use overrided attribute. Two reasons:
+        * In IO-APIC mode, use overridden attribute. Two reasons:
         * 1. BIOS bug in DSDT
         * 2. BIOS uses IO-APIC mode Interrupt Source Override
         *
index 2b5d0fac81f072a7649f97e402443da643867dd0..01c94669a2b0ad91976daf9f3c7ef3338b48e965 100644 (file)
@@ -46,7 +46,7 @@ static bool qdf2400_erratum_44_present(struct acpi_table_header *h)
  * console is registered and if @earlycon is true, earlycon is set up.
  *
  * When CONFIG_ACPI_SPCR_TABLE is defined, this function should be called
- * from arch inintialization code as soon as the DT/ACPI decision is made.
+ * from arch initialization code as soon as the DT/ACPI decision is made.
  *
  */
 int __init parse_spcr(bool earlycon)
index 15b263a420e8c00168faea63965bf25208980921..2bbcdc6fdfeec96b0c31ef43f3cf0024cfbd4a38 100644 (file)
@@ -3342,7 +3342,7 @@ static void binder_vma_close(struct vm_area_struct *vma)
        binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES);
 }
 
-static int binder_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int binder_vm_fault(struct vm_fault *vmf)
 {
        return VM_FAULT_SIGBUS;
 }
index 4e5bf36c5f4680fb079b7d4391d65e6a07cb0254..ef68232b52228f7b93c552c4cae6fba59862c928 100644 (file)
@@ -2034,7 +2034,7 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
  *     This is to expedite speed down decisions right after device is
  *     initially configured.
  *
- *     The followings are speed down rules.  #1 and #2 deal with
+ *     The following are speed down rules.  #1 and #2 deal with
  *     DUBIOUS errors.
  *
  *     1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
index f1a9198dfe5a4966cbe7f433b357005501eb002a..4a610795b585fd41765676529eaaccc0685cda93 100644 (file)
@@ -2394,12 +2394,7 @@ static int __init amb_module_init (void)
 {
   PRINTD (DBG_FLOW|DBG_INIT, "init_module");
   
-  // sanity check - cast needed as printk does not support %Zu
-  if (sizeof(amb_mem) != 4*16 + 4*12) {
-    PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
-           (unsigned long) sizeof(amb_mem));
-    return -ENOMEM;
-  }
+  BUILD_BUG_ON(sizeof(amb_mem) != 4*16 + 4*12);
   
   show_version();
   
index 623359e407aa20543d5f5e73d7999b48a2322262..b042ec45854407e4559a44903bb2f0235b463535 100644 (file)
@@ -2326,11 +2326,7 @@ static int __init eni_init(void)
 {
        struct sk_buff *skb; /* dummy for sizeof */
 
-       if (sizeof(skb->cb) < sizeof(struct eni_skb_prv)) {
-               printk(KERN_ERR "eni_detect: skb->cb is too small (%Zd < %Zd)\n",
-                   sizeof(skb->cb),sizeof(struct eni_skb_prv));
-               return -EIO;
-       }
+       BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct eni_skb_prv));
        return pci_register_driver(&eni_driver);
 }
 
index 80c2ddcfa92c514af55d200d57ce8e8cda280476..22dcab952a24f4bdd267c508ec89ff7d76399d50 100644 (file)
@@ -895,7 +895,7 @@ static int fs_open(struct atm_vcc *atm_vcc)
        /* XXX handle qos parameters (rate limiting) ? */
 
        vcc = kmalloc(sizeof(struct fs_vcc), GFP_KERNEL);
-       fs_dprintk (FS_DEBUG_ALLOC, "Alloc VCC: %p(%Zd)\n", vcc, sizeof(struct fs_vcc));
+       fs_dprintk (FS_DEBUG_ALLOC, "Alloc VCC: %p(%zd)\n", vcc, sizeof(struct fs_vcc));
        if (!vcc) {
                clear_bit(ATM_VF_ADDR, &atm_vcc->flags);
                return -ENOMEM;
@@ -946,7 +946,7 @@ static int fs_open(struct atm_vcc *atm_vcc)
 
        if (DO_DIRECTION (txtp)) {
                tc = kmalloc (sizeof (struct fs_transmit_config), GFP_KERNEL);
-               fs_dprintk (FS_DEBUG_ALLOC, "Alloc tc: %p(%Zd)\n",
+               fs_dprintk (FS_DEBUG_ALLOC, "Alloc tc: %p(%zd)\n",
                            tc, sizeof (struct fs_transmit_config));
                if (!tc) {
                        fs_dprintk (FS_DEBUG_OPEN, "fs: can't alloc transmit_config.\n");
@@ -1185,7 +1185,7 @@ static int fs_send (struct atm_vcc *atm_vcc, struct sk_buff *skb)
        vcc->last_skb = skb;
 
        td = kmalloc (sizeof (struct FS_BPENTRY), GFP_ATOMIC);
-       fs_dprintk (FS_DEBUG_ALLOC, "Alloc transd: %p(%Zd)\n", td, sizeof (struct FS_BPENTRY));
+       fs_dprintk (FS_DEBUG_ALLOC, "Alloc transd: %p(%zd)\n", td, sizeof (struct FS_BPENTRY));
        if (!td) {
                /* Oops out of mem */
                return -ENOMEM;
@@ -1492,7 +1492,7 @@ static void top_off_fp (struct fs_dev *dev, struct freepool *fp,
                fs_dprintk (FS_DEBUG_ALLOC, "Alloc rec-skb: %p(%d)\n", skb, fp->bufsize);
                if (!skb) break;
                ne = kmalloc (sizeof (struct FS_BPENTRY), gfp_flags);
-               fs_dprintk (FS_DEBUG_ALLOC, "Alloc rec-d: %p(%Zd)\n", ne, sizeof (struct FS_BPENTRY));
+               fs_dprintk (FS_DEBUG_ALLOC, "Alloc rec-d: %p(%zd)\n", ne, sizeof (struct FS_BPENTRY));
                if (!ne) {
                        fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p\n", skb);
                        dev_kfree_skb_any (skb);
@@ -1803,7 +1803,7 @@ static int fs_init(struct fs_dev *dev)
        }
        dev->atm_vccs = kcalloc (dev->nchannels, sizeof (struct atm_vcc *),
                                 GFP_KERNEL);
-       fs_dprintk (FS_DEBUG_ALLOC, "Alloc atmvccs: %p(%Zd)\n",
+       fs_dprintk (FS_DEBUG_ALLOC, "Alloc atmvccs: %p(%zd)\n",
                    dev->atm_vccs, dev->nchannels * sizeof (struct atm_vcc *));
 
        if (!dev->atm_vccs) {
@@ -1911,7 +1911,7 @@ static int firestream_init_one(struct pci_dev *pci_dev,
                goto err_out;
 
        fs_dev = kzalloc (sizeof (struct fs_dev), GFP_KERNEL);
-       fs_dprintk (FS_DEBUG_ALLOC, "Alloc fs-dev: %p(%Zd)\n",
+       fs_dprintk (FS_DEBUG_ALLOC, "Alloc fs-dev: %p(%zd)\n",
                    fs_dev, sizeof (struct fs_dev));
        if (!fs_dev)
                goto err_out;
index 584aa881882b9b8961f3eaaae10bf21bdb4664be..2bf1ef1c3c786e9d9546f4acb802974735fda470 100644 (file)
@@ -2884,12 +2884,7 @@ static struct pci_driver hrz_driver = {
 /********** module entry **********/
 
 static int __init hrz_module_init (void) {
-  // sanity check - cast is needed since printk does not support %Zu
-  if (sizeof(struct MEMMAP) != 128*1024/4) {
-    PRINTK (KERN_ERR, "Fix struct MEMMAP (is %lu fakewords).",
-           (unsigned long) sizeof(struct MEMMAP));
-    return -ENOMEM;
-  }
+  BUILD_BUG_ON(sizeof(struct MEMMAP) != 128*1024/4);
   
   show_version();
   
index 8640bafeb471eec5a3b8925cc0753d175b0ea6f6..a4fa6c82261e87886250a988cd0a27590e2594cc 100644 (file)
@@ -21,7 +21,7 @@
       supports a variety of varients of Interphase ATM PCI (i)Chip adapter 
       card family (See www.iphase.com/products/ClassSheet.cfm?ClassID=ATM) 
       in terms of PHY type, the size of control memory and the size of 
-      packet memory. The followings are the change log and history:
+      packet memory. The following are the change log and history:
      
           Bugfix the Mona's UBR driver.
           Modify the basic memory allocation and dma logic.
index 53ecac5a2161d729fc957b8b4379a526ca89e0d3..2beacf2fc1ecb0be03959f20288b85f2b7233f3f 100644 (file)
@@ -21,7 +21,7 @@
       supports a variety of varients of Interphase ATM PCI (i)Chip adapter 
       card family (See www.iphase.com/products/ClassSheet.cfm?ClassID=ATM) 
       in terms of PHY type, the size of control memory and the size of 
-      packet memory. The followings are the change log and history:
+      packet memory. The following are the change log and history:
      
           Bugfix the Mona's UBR driver.
           Modify the basic memory allocation and dma logic.
index 445505d9ea07187902ed95c6ae121591b23cef5c..1a9bc51284b0d547bd85bb1eb6eeebf5dac1492b 100644 (file)
@@ -1389,7 +1389,7 @@ static void vcc_rx_aal5(struct lanai_vcc *lvcc, int endptr)
        if (n < 0)
                n += lanai_buf_size(&lvcc->rx.buf);
        APRINTK(n >= 0 && n < lanai_buf_size(&lvcc->rx.buf) && !(n & 15),
-           "vcc_rx_aal5: n out of range (%d/%Zu)\n",
+           "vcc_rx_aal5: n out of range (%d/%zu)\n",
            n, lanai_buf_size(&lvcc->rx.buf));
        /* Recover the second-to-last word to get true pdu length */
        if ((x = &end[-2]) < lvcc->rx.buf.start)
@@ -1493,9 +1493,9 @@ static int lanai_get_sized_buffer(struct lanai_dev *lanai,
                return -ENOMEM;
        if (unlikely(lanai_buf_size(buf) < size))
                printk(KERN_WARNING DEV_LABEL "(itf %d): wanted %d bytes "
-                   "for %s buffer, got only %Zu\n", lanai->number, size,
+                   "for %s buffer, got only %zu\n", lanai->number, size,
                    name, lanai_buf_size(buf));
-       DPRINTK("Allocated %Zu byte %s buffer\n", lanai_buf_size(buf), name);
+       DPRINTK("Allocated %zu byte %s buffer\n", lanai_buf_size(buf), name);
        return 0;
 }
 
@@ -1586,7 +1586,7 @@ static int service_buffer_allocate(struct lanai_dev *lanai)
            lanai->pci);
        if (unlikely(lanai->service.start == NULL))
                return -ENOMEM;
-       DPRINTK("allocated service buffer at 0x%08lX, size %Zu(%d)\n",
+       DPRINTK("allocated service buffer at 0x%08lX, size %zu(%d)\n",
            (unsigned long) lanai->service.start,
            lanai_buf_size(&lanai->service),
            lanai_buf_size_cardorder(&lanai->service));
@@ -2467,8 +2467,8 @@ static int lanai_proc_read(struct atm_dev *atmdev, loff_t *pos, char *page)
                    (lanai->status & STATUS_LED) ? 1 : 0,
                    (lanai->status & STATUS_GPIN) ? 1 : 0);
        if (left-- == 0)
-               return sprintf(page, "global buffer sizes: service=%Zu, "
-                   "aal0_rx=%Zu\n", lanai_buf_size(&lanai->service),
+               return sprintf(page, "global buffer sizes: service=%zu, "
+                   "aal0_rx=%zu\n", lanai_buf_size(&lanai->service),
                    lanai->naal0 ? lanai_buf_size(&lanai->aal0buf) : 0);
        if (left-- == 0) {
                get_statistics(lanai);
@@ -2513,7 +2513,7 @@ static int lanai_proc_read(struct atm_dev *atmdev, loff_t *pos, char *page)
                left += sprintf(&page[left], ",\n          rx_AAL=%d",
                    lvcc->rx.atmvcc->qos.aal == ATM_AAL5 ? 5 : 0);
                if (lvcc->rx.atmvcc->qos.aal == ATM_AAL5)
-                       left += sprintf(&page[left], ", rx_buf_size=%Zu, "
+                       left += sprintf(&page[left], ", rx_buf_size=%zu, "
                            "rx_bad_len=%u,\n          rx_service_trash=%u, "
                            "rx_service_stream=%u, rx_bad_crc=%u",
                            lanai_buf_size(&lvcc->rx.buf),
@@ -2524,7 +2524,7 @@ static int lanai_proc_read(struct atm_dev *atmdev, loff_t *pos, char *page)
        }
        if (lvcc->tx.atmvcc != NULL)
                left += sprintf(&page[left], ",\n          tx_AAL=%d, "
-                   "tx_buf_size=%Zu, tx_qos=%cBR, tx_backlogged=%c",
+                   "tx_buf_size=%zu, tx_qos=%cBR, tx_backlogged=%c",
                    lvcc->tx.atmvcc->qos.aal == ATM_AAL5 ? 5 : 0,
                    lanai_buf_size(&lvcc->tx.buf),
                    lvcc->tx.atmvcc == lanai->cbrvcc ? 'C' : 'U',
index cb28579e8a946f6820fdff21433fd715870a6436..d879f3bca1076dbbcfe1d3313f8286d984e1bbd5 100644 (file)
@@ -1980,13 +1980,12 @@ static void dequeue_rx(ns_dev * card, ns_rsqe * rsqe)
        card->lbfqc = ns_stat_lfbqc_get(stat);
 
        id = le32_to_cpu(rsqe->buffer_handle);
-       skb = idr_find(&card->idr, id);
+       skb = idr_remove(&card->idr, id);
        if (!skb) {
                RXPRINTK(KERN_ERR
-                        "nicstar%d: idr_find() failed!\n", card->index);
+                        "nicstar%d: skb not found!\n", card->index);
                return;
        }
-       idr_remove(&card->idr, id);
        dma_sync_single_for_cpu(&card->pcidev->dev,
                                NS_PRV_DMA(skb),
                                (NS_PRV_BUFTYPE(skb) == BUF_SM
index 8c25e68e67d7354005dc81a92342eb10519449ed..3050e6f994031ffb308976eb8dd19a6a07cb2b28 100644 (file)
@@ -638,6 +638,11 @@ int lock_device_hotplug_sysfs(void)
        return restart_syscall();
 }
 
+void assert_held_device_hotplug(void)
+{
+       lockdep_assert_held(&device_hotplug_lock);
+}
+
 #ifdef CONFIG_BLOCK
 static inline int device_is_not_partition(struct device *dev)
 {
index e167a1e1bccb062efef2595fcd5299301a97df80..b55804cac4c46ef1fd990a2d89a2eb1c917fc81f 100644 (file)
@@ -181,6 +181,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
  * @dev:   Pointer to device for which the allocation is performed.
  * @count: Requested number of pages.
  * @align: Requested alignment of pages (in PAGE_SIZE order).
+ * @gfp_mask: GFP flags to use for this allocation.
  *
  * This function allocates memory buffer for specified device. It uses
  * device specific contiguous memory area if available or the default
@@ -188,12 +189,12 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
  * function.
  */
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-                                      unsigned int align)
+                                      unsigned int align, gfp_t gfp_mask)
 {
        if (align > CONFIG_CMA_ALIGNMENT)
                align = CONFIG_CMA_ALIGNMENT;
 
-       return cma_alloc(dev_get_cma_area(dev), count, align);
+       return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask);
 }
 
 /**
index fa26ffd25fa61bae95bd441699a54ee8e16818d2..cc4f1d0cbffe073a8cad0257d22945c3eedad227 100644 (file)
@@ -249,7 +249,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t
        return ret;
 }
 
-int memory_block_change_state(struct memory_block *mem,
+static int memory_block_change_state(struct memory_block *mem,
                unsigned long to_state, unsigned long from_state_req)
 {
        int ret = 0;
index a18de9d727b096cac76b14c64cb6991608d02c30..01a1f7e249782499e66712ccf3abd854344bc5dc 100644 (file)
  *    02111-1307, USA.
  *
  *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
- *    
+ *
  *    Author: Stephen M. Cameron
  */
 #ifdef CONFIG_CISS_SCSI_TAPE
 
-/* Here we have code to present the driver as a scsi driver 
-   as it is simultaneously presented as a block driver.  The 
+/* Here we have code to present the driver as a scsi driver
+   as it is simultaneously presented as a block driver.  The
    reason for doing this is to allow access to SCSI tape drives
-   through the array controller.  Note in particular, neither 
+   through the array controller.  Note in particular, neither
    physical nor logical disks are presented through the scsi layer. */
 
 #include <linux/timer.h>
@@ -37,7 +37,7 @@
 
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
-#include <scsi/scsi_host.h> 
+#include <scsi/scsi_host.h>
 
 #include "cciss_scsi.h"
 
@@ -120,7 +120,7 @@ struct cciss_scsi_adapter_data_t {
        struct cciss_scsi_cmd_stack_t cmd_stack;
        SGDescriptor_struct **cmd_sg_list;
        int registered;
-       spinlock_t lock; // to protect ccissscsi[ctlr]; 
+       spinlock_t lock; // to protect ccissscsi[ctlr];
 };
 
 #define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \
@@ -143,36 +143,36 @@ scsi_cmd_alloc(ctlr_info_t *h)
        u64bit temp64;
 
        sa = h->scsi_ctlr;
-       stk = &sa->cmd_stack; 
+       stk = &sa->cmd_stack;
 
-       if (stk->top < 0) 
+       if (stk->top < 0)
                return NULL;
-       c = stk->elem[stk->top];        
+       c = stk->elem[stk->top];
        /* memset(c, 0, sizeof(*c)); */
        memset(&c->cmd, 0, sizeof(c->cmd));
        memset(&c->Err, 0, sizeof(c->Err));
        /* set physical addr of cmd and addr of scsi parameters */
-       c->cmd.busaddr = c->busaddr; 
+       c->cmd.busaddr = c->busaddr;
        c->cmd.cmdindex = c->cmdindex;
-       /* (__u32) (stk->cmd_pool_handle + 
+       /* (__u32) (stk->cmd_pool_handle +
                (sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top)); */
 
        temp64.val = (__u64) (c->busaddr + sizeof(CommandList_struct));
-       /* (__u64) (stk->cmd_pool_handle + 
+       /* (__u64) (stk->cmd_pool_handle +
                (sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top) +
                 sizeof(CommandList_struct)); */
        stk->top--;
        c->cmd.ErrDesc.Addr.lower = temp64.val32.lower;
        c->cmd.ErrDesc.Addr.upper = temp64.val32.upper;
        c->cmd.ErrDesc.Len = sizeof(ErrorInfo_struct);
-       
+
        c->cmd.ctlr = h->ctlr;
        c->cmd.err_info = &c->Err;
 
        return (CommandList_struct *) c;
 }
 
-static void 
+static void
 scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
 {
        /* assume only one process in here at a time, locking done by caller. */
@@ -183,7 +183,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
        struct cciss_scsi_cmd_stack_t *stk;
 
        sa = h->scsi_ctlr;
-       stk = &sa->cmd_stack; 
+       stk = &sa->cmd_stack;
        stk->top++;
        if (stk->top >= stk->nelems) {
                dev_err(&h->pdev->dev,
@@ -228,7 +228,7 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
        }
        for (i = 0; i < stk->nelems; i++) {
                stk->elem[i] = &stk->pool[i];
-               stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + 
+               stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle +
                        (sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
                stk->elem[i]->cmdindex = i;
        }
@@ -244,7 +244,7 @@ scsi_cmd_stack_free(ctlr_info_t *h)
        size_t size;
 
        sa = h->scsi_ctlr;
-       stk = &sa->cmd_stack; 
+       stk = &sa->cmd_stack;
        if (stk->top != stk->nelems-1) {
                dev_warn(&h->pdev->dev,
                        "bug: %d scsi commands are still outstanding.\n",
@@ -266,7 +266,7 @@ print_cmd(CommandList_struct *cp)
        printk("queue:%d\n", cp->Header.ReplyQueue);
        printk("sglist:%d\n", cp->Header.SGList);
        printk("sgtot:%d\n", cp->Header.SGTotal);
-       printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper, 
+       printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper,
                        cp->Header.Tag.lower);
        printk("LUN:0x%8phN\n", cp->Header.LUN.LunAddrBytes);
        printk("CDBLen:%d\n", cp->Request.CDBLen);
@@ -275,8 +275,8 @@ print_cmd(CommandList_struct *cp)
        printk(" Dir:%d\n",cp->Request.Type.Direction);
        printk("Timeout:%d\n",cp->Request.Timeout);
        printk("CDB: %16ph\n", cp->Request.CDB);
-       printk("edesc.Addr: 0x%08x/0%08x, Len  = %d\n", 
-               cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower, 
+       printk("edesc.Addr: 0x%08x/0%08x, Len  = %d\n",
+               cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower,
                        cp->ErrDesc.Len);
        printk("sgs..........Errorinfo:\n");
        printk("scsistatus:%d\n", cp->err_info->ScsiStatus);
@@ -289,7 +289,7 @@ print_cmd(CommandList_struct *cp)
 }
 #endif
 
-static int 
+static int
 find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
 {
        /* finds an unused bus, target, lun for a new device */
@@ -299,24 +299,24 @@ find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
 
        memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA);
 
-       target_taken[SELF_SCSI_ID] = 1; 
+       target_taken[SELF_SCSI_ID] = 1;
        for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++)
                target_taken[ccissscsi[h->ctlr].dev[i].target] = 1;
-       
+
        for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) {
                if (!target_taken[i]) {
                        *bus = 0; *target=i; *lun = 0; found=1;
                        break;
                }
        }
-       return (!found);        
+       return (!found);
 }
 struct scsi2map {
        char scsi3addr[8];
        int bus, target, lun;
 };
 
-static int 
+static int
 cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
                struct cciss_scsi_dev_t *device,
                struct scsi2map *added, int *nadded)
@@ -381,8 +381,8 @@ cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
 
        ccissscsi[h->ctlr].ndevices++;
 
-       /* initially, (before registering with scsi layer) we don't 
-          know our hostno and we don't want to print anything first 
+       /* initially, (before registering with scsi layer) we don't
+          know our hostno and we don't want to print anything first
           time anyway (the scsi layer's inquiries will show that info) */
        if (hostno != -1)
                dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
@@ -467,7 +467,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
        /* sd contains scsi3 addresses and devtypes, but
           bus target and lun are not filled in.  This funciton
           takes what's in sd to be the current and adjusts
-          ccissscsi[] to be in line with what's in sd. */ 
+          ccissscsi[] to be in line with what's in sd. */
 
        int i,j, found, changes=0;
        struct cciss_scsi_dev_t *csd;
@@ -492,7 +492,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
        if (hostno != -1)  /* if it's not the first time... */
                sh = h->scsi_ctlr->scsi_host;
 
-       /* find any devices in ccissscsi[] that are not in 
+       /* find any devices in ccissscsi[] that are not in
           sd[] and remove them from ccissscsi[] */
 
        i = 0;
@@ -512,7 +512,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
                        }
                }
 
-               if (found == 0) { /* device no longer present. */ 
+               if (found == 0) { /* device no longer present. */
                        changes++;
                        cciss_scsi_remove_entry(h, hostno, i,
                                removed, &nremoved);
@@ -641,14 +641,13 @@ lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr)
        return -1;
 }
 
-static void 
+static void
 cciss_scsi_setup(ctlr_info_t *h)
 {
        struct cciss_scsi_adapter_data_t * shba;
 
        ccissscsi[h->ctlr].ndevices = 0;
-       shba = (struct cciss_scsi_adapter_data_t *)
-               kmalloc(sizeof(*shba), GFP_KERNEL);     
+       shba = kmalloc(sizeof(*shba), GFP_KERNEL);
        if (shba == NULL)
                return;
        shba->scsi_host = NULL;
@@ -693,20 +692,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
 
        /* copy the sense data whether we need to or not. */
 
-       memcpy(cmd->sense_buffer, ei->SenseInfo, 
+       memcpy(cmd->sense_buffer, ei->SenseInfo,
                ei->SenseLen > SCSI_SENSE_BUFFERSIZE ?
-                       SCSI_SENSE_BUFFERSIZE : 
+                       SCSI_SENSE_BUFFERSIZE :
                        ei->SenseLen);
        scsi_set_resid(cmd, ei->ResidualCnt);
 
-       if(ei->CommandStatus != 0) 
-       { /* an error has occurred */ 
-               switch(ei->CommandStatus)
-               {
+       if (ei->CommandStatus != 0) { /* an error has occurred */
+               switch (ei->CommandStatus) {
                        case CMD_TARGET_STATUS:
                                /* Pass it up to the upper layers... */
                                if (!ei->ScsiStatus) {
-                                       
+
        /* Ordinarily, this case should never happen, but there is a bug
           in some released firmware revisions that allows it to happen
           if, for example, a 4100 backplane loses power and the tape
@@ -731,7 +728,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
                                print_cmd(c);
                                 */
      /* We get CMD_INVALID if you address a non-existent tape drive instead
-       of a selection timeout (no response).  You will see this if you yank 
+       of a selection timeout (no response).  You will see this if you yank
        out a tape drive, then try to access it. This is kind of a shame
        because it means that any other CMD_INVALID (e.g. driver bug) will
        get interpreted as a missing target. */
@@ -780,7 +777,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
                                cmd->result = DID_ERROR << 16;
                                dev_warn(&h->pdev->dev,
                                        "%p returned unknown status %x\n", c,
-                                               ei->CommandStatus); 
+                                               ei->CommandStatus);
                }
        }
        cmd->scsi_done(cmd);
@@ -796,15 +793,15 @@ cciss_scsi_detect(ctlr_info_t *h)
        sh = scsi_host_alloc(&cciss_driver_template, sizeof(struct ctlr_info *));
        if (sh == NULL)
                goto fail;
-       sh->io_port = 0;        // good enough?  FIXME, 
+       sh->io_port = 0;        // good enough?  FIXME,
        sh->n_io_port = 0;      // I don't think we use these two...
-       sh->this_id = SELF_SCSI_ID;  
+       sh->this_id = SELF_SCSI_ID;
        sh->can_queue = cciss_tape_cmds;
        sh->sg_tablesize = h->maxsgentries;
        sh->max_cmd_len = MAX_COMMAND_SIZE;
        sh->max_sectors = h->cciss_max_sectors;
 
-       ((struct cciss_scsi_adapter_data_t *) 
+       ((struct cciss_scsi_adapter_data_t *)
                h->scsi_ctlr)->scsi_host = sh;
        sh->hostdata[0] = (unsigned long) h;
        sh->irq = h->intr[SIMPLE_MODE_INT];
@@ -856,7 +853,7 @@ cciss_map_one(struct pci_dev *pdev,
 static int
 cciss_scsi_do_simple_cmd(ctlr_info_t *h,
                        CommandList_struct *c,
-                       unsigned char *scsi3addr, 
+                       unsigned char *scsi3addr,
                        unsigned char *cdb,
                        unsigned char cdblen,
                        unsigned char *buf, int bufsize,
@@ -871,7 +868,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
        c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
        // Fill in the request block...
 
-       /* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", 
+       /* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n",
                scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
                scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */
 
@@ -885,7 +882,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
 
        /* Fill in the SG list and do dma mapping */
        cciss_map_one(h->pdev, c, (unsigned char *) buf,
-                       bufsize, DMA_FROM_DEVICE); 
+                       bufsize, DMA_FROM_DEVICE);
 
        c->waiting = &wait;
        enqueue_cmd_and_start_io(h, c);
@@ -896,14 +893,13 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h,
        return(0);
 }
 
-static void 
+static void
 cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
 {
        ErrorInfo_struct *ei;
 
        ei = c->err_info;
-       switch(ei->CommandStatus)
-       {
+       switch (ei->CommandStatus) {
                case CMD_TARGET_STATUS:
                        dev_warn(&h->pdev->dev,
                                "cmd %p has completed with errors\n", c);
@@ -1005,7 +1001,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
 
        if (rc != 0) return rc; /* something went wrong */
 
-       if (ei->CommandStatus != 0 && 
+       if (ei->CommandStatus != 0 &&
            ei->CommandStatus != CMD_DATA_UNDERRUN) {
                cciss_scsi_interpret_error(h, c);
                rc = -1;
@@ -1013,7 +1009,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
        spin_lock_irqsave(&h->lock, flags);
        scsi_cmd_free(h, c);
        spin_unlock_irqrestore(&h->lock, flags);
-       return rc;      
+       return rc;
 }
 
 /* Get the device id from inquiry page 0x83 */
@@ -1042,7 +1038,7 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
        int rc;
        CommandList_struct *c;
        unsigned char cdb[12];
-       unsigned char scsi3addr[8]; 
+       unsigned char scsi3addr[8];
        ErrorInfo_struct *ei;
        unsigned long flags;
 
@@ -1069,14 +1065,14 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
        cdb[11] = 0;
 
        rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr,
-                               cdb, 12, 
-                               (unsigned char *) buf, 
+                               cdb, 12,
+                               (unsigned char *) buf,
                                bufsize, XFER_READ);
 
        if (rc != 0) return rc; /* something went wrong */
 
        ei = c->err_info;
-       if (ei->CommandStatus != 0 && 
+       if (ei->CommandStatus != 0 &&
            ei->CommandStatus != CMD_DATA_UNDERRUN) {
                cciss_scsi_interpret_error(h, c);
                rc = -1;
@@ -1084,36 +1080,36 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
        spin_lock_irqsave(&h->lock, flags);
        scsi_cmd_free(h, c);
        spin_unlock_irqrestore(&h->lock, flags);
-       return rc;      
+       return rc;
 }
 
 static void
 cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
 {
        /* the idea here is we could get notified from /proc
-          that some devices have changed, so we do a report 
-          physical luns cmd, and adjust our list of devices 
+          that some devices have changed, so we do a report
+          physical luns cmd, and adjust our list of devices
           accordingly.  (We can't rely on the scsi-mid layer just
-          doing inquiries, because the "busses" that the scsi 
+          doing inquiries, because the "busses" that the scsi
           mid-layer probes are totally fabricated by this driver,
           so new devices wouldn't show up.
 
-          the scsi3addr's of devices won't change so long as the 
-          adapter is not reset.  That means we can rescan and 
-          tell which devices we already know about, vs. new 
+          the scsi3addr's of devices won't change so long as the
+          adapter is not reset.  That means we can rescan and
+          tell which devices we already know about, vs. new
           devices, vs.  disappearing devices.
 
           Also, if you yank out a tape drive, then put in a disk
-          in it's place, (say, a configured volume from another 
-          array controller for instance)  _don't_ poke this driver 
-           (so it thinks it's still a tape, but _do_ poke the scsi 
-           mid layer, so it does an inquiry... the scsi mid layer 
+          in it's place, (say, a configured volume from another
+          array controller for instance)  _don't_ poke this driver
+           (so it thinks it's still a tape, but _do_ poke the scsi
+           mid layer, so it does an inquiry... the scsi mid layer
            will see the physical disk.  This would be bad.  Need to
-          think about how to prevent that.  One idea would be to 
+          think about how to prevent that.  One idea would be to
           snoop all scsi responses and if an inquiry repsonse comes
           back that reports a disk, chuck it an return selection
           timeout instead and adjust our table...  Not sure i like
-          that though.  
+          that though.
 
         */
 #define OBDR_TAPE_INQ_SIZE 49
@@ -1141,9 +1137,9 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
                ch = &ld_buff->LUNListLength[0];
                num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
                if (num_luns > CISS_MAX_PHYS_LUN) {
-                       printk(KERN_WARNING 
+                       printk(KERN_WARNING
                                "cciss: Maximum physical LUNs (%d) exceeded.  "
-                               "%d LUNs ignored.\n", CISS_MAX_PHYS_LUN, 
+                               "%d LUNs ignored.\n", CISS_MAX_PHYS_LUN,
                                num_luns - CISS_MAX_PHYS_LUN);
                        num_luns = CISS_MAX_PHYS_LUN;
                }
@@ -1154,7 +1150,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
        }
 
 
-       /* adjust our table of devices */       
+       /* adjust our table of devices */
        for (i = 0; i < num_luns; i++) {
                /* for each physical lun, do an inquiry */
                if (ld_buff->LUN[i][3] & 0xC0) continue;
@@ -1182,8 +1178,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
                cciss_scsi_get_device_id(h, scsi3addr,
                        this_device->device_id, sizeof(this_device->device_id));
 
-               switch (this_device->devtype)
-               {
+               switch (this_device->devtype) {
                  case 0x05: /* CD-ROM */ {
 
                        /* We don't *really* support actual CD-ROM devices,
@@ -1213,7 +1208,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
                        currentsd[ncurrent] = *this_device;
                        ncurrent++;
                        break;
-                 default: 
+                 default:
                        break;
                }
        }
@@ -1258,8 +1253,8 @@ cciss_scsi_write_info(struct Scsi_Host *sh,
                return -EINVAL;
 
        return cciss_scsi_user_command(h, sh->host_no,
-                       buffer, length);        
-} 
+                       buffer, length);
+}
 
 static int
 cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh)
@@ -1297,8 +1292,8 @@ cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh)
        return 0;
 }
 
-/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci 
-   dma mapping  and fills in the scatter gather entries of the 
+/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
+   dma mapping  and fills in the scatter gather entries of the
    cciss command, c. */
 
 static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
@@ -1394,7 +1389,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
 
        // Fill in the command list header
 
-       cmd->scsi_done = done;    // save this for use by completion code 
+       cmd->scsi_done = done;    // save this for use by completion code
 
        /* save c in case we have to abort it */
        cmd->host_scribble = (unsigned char *) c;
@@ -1404,7 +1399,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
        c->Header.ReplyQueue = 0;  /* unused in simple mode */
        memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
        c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
-       
+
        // Fill in the request block...
 
        c->Request.Timeout = 0;
@@ -1414,8 +1409,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
        memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len);
        c->Request.Type.Type = TYPE_CMD;
        c->Request.Type.Attribute = ATTR_SIMPLE;
-       switch(cmd->sc_data_direction)
-       {
+       switch (cmd->sc_data_direction) {
          case DMA_TO_DEVICE:
                c->Request.Type.Direction = XFER_WRITE;
                break;
@@ -1432,15 +1426,15 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
 
                c->Request.Type.Direction = XFER_RSVD;
                // This is technically wrong, and cciss controllers should
-               // reject it with CMD_INVALID, which is the most correct 
-               // response, but non-fibre backends appear to let it 
+               // reject it with CMD_INVALID, which is the most correct
+               // response, but non-fibre backends appear to let it
                // slide by, and give the same results as if this field
                // were set correctly.  Either way is acceptable for
                // our purposes here.
 
                break;
 
-         default: 
+         default:
                dev_warn(&h->pdev->dev, "unknown data direction: %d\n",
                        cmd->sc_data_direction);
                BUG();
@@ -1464,9 +1458,9 @@ static void cciss_unregister_scsi(ctlr_info_t *h)
 
        spin_lock_irqsave(&h->lock, flags);
        sa = h->scsi_ctlr;
-       stk = &sa->cmd_stack; 
+       stk = &sa->cmd_stack;
 
-       /* if we weren't ever actually registered, don't unregister */ 
+       /* if we weren't ever actually registered, don't unregister */
        if (sa->registered) {
                spin_unlock_irqrestore(&h->lock, flags);
                scsi_remove_host(sa->scsi_host);
@@ -1474,7 +1468,7 @@ static void cciss_unregister_scsi(ctlr_info_t *h)
                spin_lock_irqsave(&h->lock, flags);
        }
 
-       /* set scsi_host to NULL so our detect routine will 
+       /* set scsi_host to NULL so our detect routine will
           find us on register */
        sa->scsi_host = NULL;
        spin_unlock_irqrestore(&h->lock, flags);
@@ -1490,7 +1484,7 @@ static int cciss_engage_scsi(ctlr_info_t *h)
 
        spin_lock_irqsave(&h->lock, flags);
        sa = h->scsi_ctlr;
-       stk = &sa->cmd_stack; 
+       stk = &sa->cmd_stack;
 
        if (sa->registered) {
                dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n");
@@ -1586,13 +1580,13 @@ retry_tur:
        return rc;
 }
 
-/* Need at least one of these error handlers to keep ../scsi/hosts.c from 
- * complaining.  Doing a host- or bus-reset can't do anything good here. 
+/* Need at least one of these error handlers to keep ../scsi/hosts.c from
+ * complaining.  Doing a host- or bus-reset can't do anything good here.
  * Despite what it might say in scsi_error.c, there may well be commands
  * on the controller, as the cciss driver registers twice, once as a block
  * device for the logical drives, and once as a scsi device, for any tape
  * drives.  So we know there are no commands out on the tape drives, but we
- * don't know there are no commands on the controller, and it is likely 
+ * don't know there are no commands on the controller, and it is likely
  * that there probably are, as the cciss block device is most commonly used
  * as a boot device (embedded controller on HP/Compaq systems.)
 */
index 615e5b5178a0541852820e67698d7a8b41cc268d..116509852a34da5730402edfedd025afd1b4a934 100644 (file)
@@ -2915,11 +2915,9 @@ out_idr_remove_vol:
        idr_remove(&connection->peer_devices, vnr);
 out_idr_remove_from_resource:
        for_each_connection(connection, resource) {
-               peer_device = idr_find(&connection->peer_devices, vnr);
-               if (peer_device) {
-                       idr_remove(&connection->peer_devices, vnr);
+               peer_device = idr_remove(&connection->peer_devices, vnr);
+               if (peer_device)
                        kref_put(&connection->kref, drbd_destroy_connection);
-               }
        }
        for_each_peer_device_safe(peer_device, tmp_peer_device, device) {
                list_del(&peer_device->peer_devices);
index 304377182c1ad462b9832b7a5e9b1fb930ca62fa..4b52a16903298c5fe22cd6529578dfdf47fd27b9 100644 (file)
@@ -186,7 +186,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
         *
         * TODO: the above condition may be loosed in the future, and
         * direct I/O may be switched runtime at that time because most
-        * of requests in sane appplications should be PAGE_SIZE algined
+        * of requests in sane applications should be PAGE_SIZE aligned
         */
        if (dio) {
                if (queue_logical_block_size(lo->lo_queue) >= sb_bsize &&
index 0be84a3cb6d7bbb605a252ff5666c1642a1c201a..0bf2b21a62cb770a3129889b59a0b5f735eddc52 100644 (file)
@@ -96,6 +96,10 @@ static int max_part;
 static struct workqueue_struct *recv_workqueue;
 static int part_shift;
 
+static int nbd_dev_dbg_init(struct nbd_device *nbd);
+static void nbd_dev_dbg_close(struct nbd_device *nbd);
+
+
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
        return disk_to_dev(nbd->disk);
@@ -120,7 +124,7 @@ static const char *nbdcmd_to_ascii(int cmd)
 
 static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
 {
-       bdev->bd_inode->i_size = 0;
+       bd_set_size(bdev, 0);
        set_capacity(nbd->disk, 0);
        kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 
@@ -129,29 +133,20 @@ static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
 
 static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
 {
-       if (!nbd_is_connected(nbd))
-               return;
-
-       bdev->bd_inode->i_size = nbd->bytesize;
+       blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
+       blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
+       bd_set_size(bdev, nbd->bytesize);
        set_capacity(nbd->disk, nbd->bytesize >> 9);
        kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 }
 
-static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
+static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
                        loff_t blocksize, loff_t nr_blocks)
 {
-       int ret;
-
-       ret = set_blocksize(bdev, blocksize);
-       if (ret)
-               return ret;
-
        nbd->blksize = blocksize;
        nbd->bytesize = blocksize * nr_blocks;
-
-       nbd_size_update(nbd, bdev);
-
-       return 0;
+       if (nbd_is_connected(nbd))
+               nbd_size_update(nbd, bdev);
 }
 
 static void nbd_end_request(struct nbd_cmd *cmd)
@@ -571,10 +566,17 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
        return BLK_MQ_RQ_QUEUE_OK;
 }
 
-static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
+static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
+                         unsigned long arg)
 {
+       struct socket *sock;
        struct nbd_sock **socks;
        struct nbd_sock *nsock;
+       int err;
+
+       sock = sockfd_lookup(arg, &err);
+       if (!sock)
+               return err;
 
        if (!nbd->task_setup)
                nbd->task_setup = current;
@@ -598,26 +600,20 @@ static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
        nsock->sock = sock;
        socks[nbd->num_connections++] = nsock;
 
+       if (max_part)
+               bdev->bd_invalidated = 1;
        return 0;
 }
 
 /* Reset all properties of an NBD device */
 static void nbd_reset(struct nbd_device *nbd)
 {
-       int i;
-
-       for (i = 0; i < nbd->num_connections; i++)
-               kfree(nbd->socks[i]);
-       kfree(nbd->socks);
-       nbd->socks = NULL;
        nbd->runtime_flags = 0;
        nbd->blksize = 1024;
        nbd->bytesize = 0;
        set_capacity(nbd->disk, 0);
        nbd->flags = 0;
        nbd->tag_set.timeout = 0;
-       nbd->num_connections = 0;
-       nbd->task_setup = NULL;
        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 }
 
@@ -659,81 +655,143 @@ static void send_disconnects(struct nbd_device *nbd)
        }
 }
 
-static int nbd_dev_dbg_init(struct nbd_device *nbd);
-static void nbd_dev_dbg_close(struct nbd_device *nbd);
+static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
+{
+       dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
+       if (!nbd->socks)
+               return -EINVAL;
 
-/* Must be called with config_lock held */
-static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
-                      unsigned int cmd, unsigned long arg)
+       mutex_unlock(&nbd->config_lock);
+       fsync_bdev(bdev);
+       mutex_lock(&nbd->config_lock);
+
+       /* Check again after getting mutex back.  */
+       if (!nbd->socks)
+               return -EINVAL;
+
+       if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
+                             &nbd->runtime_flags))
+               send_disconnects(nbd);
+       return 0;
+}
+
+static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
 {
-       switch (cmd) {
-       case NBD_DISCONNECT: {
-               dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
-               if (!nbd->socks)
-                       return -EINVAL;
-
-               mutex_unlock(&nbd->config_lock);
-               fsync_bdev(bdev);
-               mutex_lock(&nbd->config_lock);
-
-               /* Check again after getting mutex back.  */
-               if (!nbd->socks)
-                       return -EINVAL;
-
-               if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
-                                     &nbd->runtime_flags))
-                       send_disconnects(nbd);
-               return 0;
+       sock_shutdown(nbd);
+       nbd_clear_que(nbd);
+       kill_bdev(bdev);
+       nbd_bdev_reset(bdev);
+       /*
+        * We want to give the run thread a chance to wait for everybody
+        * to clean up and then do it's own cleanup.
+        */
+       if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
+           nbd->num_connections) {
+               int i;
+
+               for (i = 0; i < nbd->num_connections; i++)
+                       kfree(nbd->socks[i]);
+               kfree(nbd->socks);
+               nbd->socks = NULL;
+               nbd->num_connections = 0;
        }
+       nbd->task_setup = NULL;
 
-       case NBD_CLEAR_SOCK:
-               sock_shutdown(nbd);
-               nbd_clear_que(nbd);
-               kill_bdev(bdev);
-               nbd_bdev_reset(bdev);
-               /*
-                * We want to give the run thread a chance to wait for everybody
-                * to clean up and then do it's own cleanup.
-                */
-               if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) {
-                       int i;
-
-                       for (i = 0; i < nbd->num_connections; i++)
-                               kfree(nbd->socks[i]);
-                       kfree(nbd->socks);
-                       nbd->socks = NULL;
-                       nbd->num_connections = 0;
-                       nbd->task_setup = NULL;
-               }
-               return 0;
+       return 0;
+}
+
+static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
+{
+       struct recv_thread_args *args;
+       int num_connections = nbd->num_connections;
+       int error = 0, i;
 
-       case NBD_SET_SOCK: {
-               int err;
-               struct socket *sock = sockfd_lookup(arg, &err);
+       if (nbd->task_recv)
+               return -EBUSY;
+       if (!nbd->socks)
+               return -EINVAL;
+       if (num_connections > 1 &&
+           !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
+               dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
+               error = -EINVAL;
+               goto out_err;
+       }
 
-               if (!sock)
-                       return err;
+       set_bit(NBD_RUNNING, &nbd->runtime_flags);
+       blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
+       args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
+       if (!args) {
+               error = -ENOMEM;
+               goto out_err;
+       }
+       nbd->task_recv = current;
+       mutex_unlock(&nbd->config_lock);
 
-               err = nbd_add_socket(nbd, sock);
-               if (!err && max_part)
-                       bdev->bd_invalidated = 1;
+       nbd_parse_flags(nbd, bdev);
 
-               return err;
+       error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
+       if (error) {
+               dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
+               goto out_recv;
        }
 
-       case NBD_SET_BLKSIZE: {
-               loff_t bsize = div_s64(nbd->bytesize, arg);
+       nbd_size_update(nbd, bdev);
 
-               return nbd_size_set(nbd, bdev, arg, bsize);
+       nbd_dev_dbg_init(nbd);
+       for (i = 0; i < num_connections; i++) {
+               sk_set_memalloc(nbd->socks[i]->sock->sk);
+               atomic_inc(&nbd->recv_threads);
+               INIT_WORK(&args[i].work, recv_work);
+               args[i].nbd = nbd;
+               args[i].index = i;
+               queue_work(recv_workqueue, &args[i].work);
        }
+       wait_event_interruptible(nbd->recv_wq,
+                                atomic_read(&nbd->recv_threads) == 0);
+       for (i = 0; i < num_connections; i++)
+               flush_work(&args[i].work);
+       nbd_dev_dbg_close(nbd);
+       nbd_size_clear(nbd, bdev);
+       device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+out_recv:
+       mutex_lock(&nbd->config_lock);
+       nbd->task_recv = NULL;
+out_err:
+       clear_bit(NBD_RUNNING, &nbd->runtime_flags);
+       nbd_clear_sock(nbd, bdev);
 
-       case NBD_SET_SIZE:
-               return nbd_size_set(nbd, bdev, nbd->blksize,
-                                       div_s64(arg, nbd->blksize));
+       /* user requested, ignore socket errors */
+       if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
+               error = 0;
+       if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
+               error = -ETIMEDOUT;
 
-       case NBD_SET_SIZE_BLOCKS:
-               return nbd_size_set(nbd, bdev, nbd->blksize, arg);
+       nbd_reset(nbd);
+       return error;
+}
 
+/* Must be called with config_lock held */
+static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
+                      unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case NBD_DISCONNECT:
+               return nbd_disconnect(nbd, bdev);
+       case NBD_CLEAR_SOCK:
+               return nbd_clear_sock(nbd, bdev);
+       case NBD_SET_SOCK:
+               return nbd_add_socket(nbd, bdev, arg);
+       case NBD_SET_BLKSIZE:
+               nbd_size_set(nbd, bdev, arg,
+                            div_s64(nbd->bytesize, arg));
+               return 0;
+       case NBD_SET_SIZE:
+               nbd_size_set(nbd, bdev, nbd->blksize,
+                            div_s64(arg, nbd->blksize));
+               return 0;
+       case NBD_SET_SIZE_BLOCKS:
+               nbd_size_set(nbd, bdev, nbd->blksize, arg);
+               return 0;
        case NBD_SET_TIMEOUT:
                nbd->tag_set.timeout = arg * HZ;
                return 0;
@@ -741,85 +799,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
        case NBD_SET_FLAGS:
                nbd->flags = arg;
                return 0;
-
-       case NBD_DO_IT: {
-               struct recv_thread_args *args;
-               int num_connections = nbd->num_connections;
-               int error = 0, i;
-
-               if (nbd->task_recv)
-                       return -EBUSY;
-               if (!nbd->socks)
-                       return -EINVAL;
-               if (num_connections > 1 &&
-                   !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
-                       dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
-                       error = -EINVAL;
-                       goto out_err;
-               }
-
-               set_bit(NBD_RUNNING, &nbd->runtime_flags);
-               blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
-               args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
-               if (!args) {
-                       error = -ENOMEM;
-                       goto out_err;
-               }
-               nbd->task_recv = current;
-               mutex_unlock(&nbd->config_lock);
-
-               nbd_parse_flags(nbd, bdev);
-
-               error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
-               if (error) {
-                       dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
-                       goto out_recv;
-               }
-
-               nbd_size_update(nbd, bdev);
-
-               nbd_dev_dbg_init(nbd);
-               for (i = 0; i < num_connections; i++) {
-                       sk_set_memalloc(nbd->socks[i]->sock->sk);
-                       atomic_inc(&nbd->recv_threads);
-                       INIT_WORK(&args[i].work, recv_work);
-                       args[i].nbd = nbd;
-                       args[i].index = i;
-                       queue_work(recv_workqueue, &args[i].work);
-               }
-               wait_event_interruptible(nbd->recv_wq,
-                                        atomic_read(&nbd->recv_threads) == 0);
-               for (i = 0; i < num_connections; i++)
-                       flush_work(&args[i].work);
-               nbd_dev_dbg_close(nbd);
-               nbd_size_clear(nbd, bdev);
-               device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
-out_recv:
-               mutex_lock(&nbd->config_lock);
-               nbd->task_recv = NULL;
-out_err:
-               sock_shutdown(nbd);
-               nbd_clear_que(nbd);
-               kill_bdev(bdev);
-               nbd_bdev_reset(bdev);
-
-               /* user requested, ignore socket errors */
-               if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
-                       error = 0;
-               if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
-                       error = -ETIMEDOUT;
-
-               nbd_reset(nbd);
-               return error;
-       }
-
+       case NBD_DO_IT:
+               return nbd_start_device(nbd, bdev);
        case NBD_CLEAR_QUE:
                /*
                 * This is for compatibility only.  The queue is always cleared
                 * by NBD_DO_IT or NBD_CLEAR_SOCK.
                 */
                return 0;
-
        case NBD_PRINT_DEBUG:
                /*
                 * For compatibility only, we no longer keep a list of
@@ -1134,8 +1121,10 @@ static int __init nbd_init(void)
        if (!recv_workqueue)
                return -ENOMEM;
 
-       if (register_blkdev(NBD_MAJOR, "nbd"))
+       if (register_blkdev(NBD_MAJOR, "nbd")) {
+               destroy_workqueue(recv_workqueue);
                return -EIO;
+       }
 
        nbd_dbg_init();
 
index 362cecc77130260459d81d18d8853f39a7eb35eb..4d680772379828423d8605b1cae8c5da271ec5b8 100644 (file)
@@ -123,9 +123,11 @@ static int atomic_dec_return_safe(atomic_t *v)
 #define RBD_FEATURE_LAYERING   (1<<0)
 #define RBD_FEATURE_STRIPINGV2 (1<<1)
 #define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2)
+#define RBD_FEATURE_DATA_POOL (1<<7)
 #define RBD_FEATURES_ALL       (RBD_FEATURE_LAYERING |         \
                                 RBD_FEATURE_STRIPINGV2 |       \
-                                RBD_FEATURE_EXCLUSIVE_LOCK)
+                                RBD_FEATURE_EXCLUSIVE_LOCK |   \
+                                RBD_FEATURE_DATA_POOL)
 
 /* Features supported by this (client software) implementation. */
 
@@ -144,10 +146,9 @@ struct rbd_image_header {
        /* These six fields never change for a given rbd image */
        char *object_prefix;
        __u8 obj_order;
-       __u8 crypt_type;
-       __u8 comp_type;
        u64 stripe_unit;
        u64 stripe_count;
+       s64 data_pool_id;
        u64 features;           /* Might be changeable someday? */
 
        /* The remaining fields need to be updated occasionally */
@@ -230,7 +231,7 @@ enum obj_req_flags {
 };
 
 struct rbd_obj_request {
-       const char              *object_name;
+       u64                     object_no;
        u64                     offset;         /* object start byte */
        u64                     length;         /* bytes from offset */
        unsigned long           flags;
@@ -438,7 +439,6 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 
 static struct kmem_cache       *rbd_img_request_cache;
 static struct kmem_cache       *rbd_obj_request_cache;
-static struct kmem_cache       *rbd_segment_name_cache;
 
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
@@ -972,6 +972,30 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
        return true;
 }
 
+/*
+ * returns the size of an object in the image
+ */
+static u32 rbd_obj_bytes(struct rbd_image_header *header)
+{
+       return 1U << header->obj_order;
+}
+
+static void rbd_init_layout(struct rbd_device *rbd_dev)
+{
+       if (rbd_dev->header.stripe_unit == 0 ||
+           rbd_dev->header.stripe_count == 0) {
+               rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header);
+               rbd_dev->header.stripe_count = 1;
+       }
+
+       rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit;
+       rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count;
+       rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header);
+       rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ?
+                         rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id;
+       RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
+}
+
 /*
  * Fill an rbd image header with information from the given format 1
  * on-disk header.
@@ -992,15 +1016,11 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
        /* Allocate this now to avoid having to handle failure below */
 
        if (first_time) {
-               size_t len;
-
-               len = strnlen(ondisk->object_prefix,
-                               sizeof (ondisk->object_prefix));
-               object_prefix = kmalloc(len + 1, GFP_KERNEL);
+               object_prefix = kstrndup(ondisk->object_prefix,
+                                        sizeof(ondisk->object_prefix),
+                                        GFP_KERNEL);
                if (!object_prefix)
                        return -ENOMEM;
-               memcpy(object_prefix, ondisk->object_prefix, len);
-               object_prefix[len] = '\0';
        }
 
        /* Allocate the snapshot context and fill it in */
@@ -1051,12 +1071,7 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
        if (first_time) {
                header->object_prefix = object_prefix;
                header->obj_order = ondisk->options.order;
-               header->crypt_type = ondisk->options.crypt_type;
-               header->comp_type = ondisk->options.comp_type;
-               /* The rest aren't used for format 1 images */
-               header->stripe_unit = 0;
-               header->stripe_count = 0;
-               header->features = 0;
+               rbd_init_layout(rbd_dev);
        } else {
                ceph_put_snap_context(header->snapc);
                kfree(header->snap_names);
@@ -1232,42 +1247,9 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
        rbd_dev->mapping.features = 0;
 }
 
-static void rbd_segment_name_free(const char *name)
-{
-       /* The explicit cast here is needed to drop the const qualifier */
-
-       kmem_cache_free(rbd_segment_name_cache, (void *)name);
-}
-
-static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
-{
-       char *name;
-       u64 segment;
-       int ret;
-       char *name_format;
-
-       name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO);
-       if (!name)
-               return NULL;
-       segment = offset >> rbd_dev->header.obj_order;
-       name_format = "%s.%012llx";
-       if (rbd_dev->image_format == 2)
-               name_format = "%s.%016llx";
-       ret = snprintf(name, CEPH_MAX_OID_NAME_LEN + 1, name_format,
-                       rbd_dev->header.object_prefix, segment);
-       if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) {
-               pr_err("error formatting segment name for #%llu (%d)\n",
-                       segment, ret);
-               rbd_segment_name_free(name);
-               name = NULL;
-       }
-
-       return name;
-}
-
 static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
 {
-       u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
+       u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
 
        return offset & (segment_size - 1);
 }
@@ -1275,7 +1257,7 @@ static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
 static u64 rbd_segment_length(struct rbd_device *rbd_dev,
                                u64 offset, u64 length)
 {
-       u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
+       u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
 
        offset &= segment_size - 1;
 
@@ -1286,14 +1268,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev,
        return length;
 }
 
-/*
- * returns the size of an object in the image
- */
-static u64 rbd_obj_bytes(struct rbd_image_header *header)
-{
-       return 1 << header->obj_order;
-}
-
 /*
  * bio helpers
  */
@@ -1623,7 +1597,9 @@ static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 {
        struct ceph_osd_request *osd_req = obj_request->osd_req;
 
-       dout("%s %p osd_req %p\n", __func__, obj_request, osd_req);
+       dout("%s %p object_no %016llx %llu~%llu osd_req %p\n", __func__,
+            obj_request, obj_request->object_no, obj_request->offset,
+            obj_request->length, osd_req);
        if (obj_request_img_data_test(obj_request)) {
                WARN_ON(obj_request->callback != rbd_img_obj_callback);
                rbd_img_request_get(obj_request->img_request);
@@ -1631,44 +1607,6 @@ static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
        ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
 }
 
-static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
-{
-       dout("%s %p\n", __func__, obj_request);
-       ceph_osdc_cancel_request(obj_request->osd_req);
-}
-
-/*
- * Wait for an object request to complete.  If interrupted, cancel the
- * underlying osd request.
- *
- * @timeout: in jiffies, 0 means "wait forever"
- */
-static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request,
-                                 unsigned long timeout)
-{
-       long ret;
-
-       dout("%s %p\n", __func__, obj_request);
-       ret = wait_for_completion_interruptible_timeout(
-                                       &obj_request->completion,
-                                       ceph_timeout_jiffies(timeout));
-       if (ret <= 0) {
-               if (ret == 0)
-                       ret = -ETIMEDOUT;
-               rbd_obj_request_end(obj_request);
-       } else {
-               ret = 0;
-       }
-
-       dout("%s %p ret %d\n", __func__, obj_request, (int)ret);
-       return ret;
-}
-
-static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
-{
-       return __rbd_obj_request_wait(obj_request, 0);
-}
-
 static void rbd_img_request_complete(struct rbd_img_request *img_request)
 {
 
@@ -1955,8 +1893,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
                rbd_osd_call_callback(obj_request);
                break;
        default:
-               rbd_warn(NULL, "%s: unsupported op %hu",
-                       obj_request->object_name, (unsigned short) opcode);
+               rbd_warn(NULL, "unexpected OSD op: object_no %016llx opcode %d",
+                        obj_request->object_no, opcode);
                break;
        }
 
@@ -1980,6 +1918,40 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
        osd_req->r_data_offset = obj_request->offset;
 }
 
+static struct ceph_osd_request *
+__rbd_osd_req_create(struct rbd_device *rbd_dev,
+                    struct ceph_snap_context *snapc,
+                    int num_ops, unsigned int flags,
+                    struct rbd_obj_request *obj_request)
+{
+       struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       struct ceph_osd_request *req;
+       const char *name_format = rbd_dev->image_format == 1 ?
+                                     RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
+
+       req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
+       if (!req)
+               return NULL;
+
+       req->r_flags = flags;
+       req->r_callback = rbd_osd_req_callback;
+       req->r_priv = obj_request;
+
+       req->r_base_oloc.pool = rbd_dev->layout.pool_id;
+       if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
+                       rbd_dev->header.object_prefix, obj_request->object_no))
+               goto err_req;
+
+       if (ceph_osdc_alloc_messages(req, GFP_NOIO))
+               goto err_req;
+
+       return req;
+
+err_req:
+       ceph_osdc_put_request(req);
+       return NULL;
+}
+
 /*
  * Create an osd request.  A read request has one osd op (read).
  * A write request has either one (watch) or two (hint+write) osd ops.
@@ -1993,8 +1965,6 @@ static struct ceph_osd_request *rbd_osd_req_create(
                                        struct rbd_obj_request *obj_request)
 {
        struct ceph_snap_context *snapc = NULL;
-       struct ceph_osd_client *osdc;
-       struct ceph_osd_request *osd_req;
 
        if (obj_request_img_data_test(obj_request) &&
                (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) {
@@ -2009,35 +1979,9 @@ static struct ceph_osd_request *rbd_osd_req_create(
 
        rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2));
 
-       /* Allocate and initialize the request, for the num_ops ops */
-
-       osdc = &rbd_dev->rbd_client->client->osdc;
-       osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false,
-                                         GFP_NOIO);
-       if (!osd_req)
-               goto fail;
-
-       if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
-               osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
-       else
-               osd_req->r_flags = CEPH_OSD_FLAG_READ;
-
-       osd_req->r_callback = rbd_osd_req_callback;
-       osd_req->r_priv = obj_request;
-
-       osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id;
-       if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s",
-                            obj_request->object_name))
-               goto fail;
-
-       if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO))
-               goto fail;
-
-       return osd_req;
-
-fail:
-       ceph_osdc_put_request(osd_req);
-       return NULL;
+       return __rbd_osd_req_create(rbd_dev, snapc, num_ops,
+           (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) ?
+           CEPH_OSD_FLAG_WRITE : CEPH_OSD_FLAG_READ, obj_request);
 }
 
 /*
@@ -2050,10 +1994,6 @@ static struct ceph_osd_request *
 rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
 {
        struct rbd_img_request *img_request;
-       struct ceph_snap_context *snapc;
-       struct rbd_device *rbd_dev;
-       struct ceph_osd_client *osdc;
-       struct ceph_osd_request *osd_req;
        int num_osd_ops = 3;
 
        rbd_assert(obj_request_img_data_test(obj_request));
@@ -2065,77 +2005,34 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
        if (img_request_discard_test(img_request))
                num_osd_ops = 2;
 
-       /* Allocate and initialize the request, for all the ops */
-
-       snapc = img_request->snapc;
-       rbd_dev = img_request->rbd_dev;
-       osdc = &rbd_dev->rbd_client->client->osdc;
-       osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops,
-                                               false, GFP_NOIO);
-       if (!osd_req)
-               goto fail;
-
-       osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
-       osd_req->r_callback = rbd_osd_req_callback;
-       osd_req->r_priv = obj_request;
-
-       osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id;
-       if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s",
-                            obj_request->object_name))
-               goto fail;
-
-       if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO))
-               goto fail;
-
-       return osd_req;
-
-fail:
-       ceph_osdc_put_request(osd_req);
-       return NULL;
+       return __rbd_osd_req_create(img_request->rbd_dev,
+                                   img_request->snapc, num_osd_ops,
+                                   CEPH_OSD_FLAG_WRITE, obj_request);
 }
 
-
 static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
 {
        ceph_osdc_put_request(osd_req);
 }
 
-/* object_name is assumed to be a non-null pointer and NUL-terminated */
-
-static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
-                                               u64 offset, u64 length,
-                                               enum obj_request_type type)
+static struct rbd_obj_request *
+rbd_obj_request_create(enum obj_request_type type)
 {
        struct rbd_obj_request *obj_request;
-       size_t size;
-       char *name;
 
        rbd_assert(obj_request_type_valid(type));
 
-       size = strlen(object_name) + 1;
-       name = kmalloc(size, GFP_NOIO);
-       if (!name)
-               return NULL;
-
        obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
-       if (!obj_request) {
-               kfree(name);
+       if (!obj_request)
                return NULL;
-       }
 
-       obj_request->object_name = memcpy(name, object_name, size);
-       obj_request->offset = offset;
-       obj_request->length = length;
-       obj_request->flags = 0;
        obj_request->which = BAD_WHICH;
        obj_request->type = type;
        INIT_LIST_HEAD(&obj_request->links);
        init_completion(&obj_request->completion);
        kref_init(&obj_request->kref);
 
-       dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name,
-               offset, length, (int)type, obj_request);
-
+       dout("%s %p\n", __func__, obj_request);
        return obj_request;
 }
 
@@ -2170,8 +2067,6 @@ static void rbd_obj_request_destroy(struct kref *kref)
                break;
        }
 
-       kfree(obj_request->object_name);
-       obj_request->object_name = NULL;
        kmem_cache_free(rbd_obj_request_cache, obj_request);
 }
 
@@ -2546,22 +2441,18 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 
        while (resid) {
                struct ceph_osd_request *osd_req;
-               const char *object_name;
-               u64 offset;
-               u64 length;
+               u64 object_no = img_offset >> rbd_dev->header.obj_order;
+               u64 offset = rbd_segment_offset(rbd_dev, img_offset);
+               u64 length = rbd_segment_length(rbd_dev, img_offset, resid);
 
-               object_name = rbd_segment_name(rbd_dev, img_offset);
-               if (!object_name)
-                       goto out_unwind;
-               offset = rbd_segment_offset(rbd_dev, img_offset);
-               length = rbd_segment_length(rbd_dev, img_offset, resid);
-               obj_request = rbd_obj_request_create(object_name,
-                                               offset, length, type);
-               /* object request has its own copy of the object name */
-               rbd_segment_name_free(object_name);
+               obj_request = rbd_obj_request_create(type);
                if (!obj_request)
                        goto out_unwind;
 
+               obj_request->object_no = object_no;
+               obj_request->offset = offset;
+               obj_request->length = length;
+
                /*
                 * set obj_request->img_request before creating the
                 * osd_request so that it gets the right snapc
@@ -2771,7 +2662,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
         * child image to which the original request was to be sent.
         */
        img_offset = obj_request->img_offset - obj_request->offset;
-       length = (u64)1 << rbd_dev->header.obj_order;
+       length = rbd_obj_bytes(&rbd_dev->header);
 
        /*
         * There is no defined parent data beyond the parent
@@ -2900,11 +2791,12 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
        size_t size;
        int ret;
 
-       stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
-                                             OBJ_REQUEST_PAGES);
+       stat_request = rbd_obj_request_create(OBJ_REQUEST_PAGES);
        if (!stat_request)
                return -ENOMEM;
 
+       stat_request->object_no = obj_request->object_no;
+
        stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
                                                   stat_request);
        if (!stat_request->osd_req) {
@@ -3983,17 +3875,17 @@ out:
  * returned in the outbound buffer, or a negative error code.
  */
 static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
-                            const char *object_name,
-                            const char *class_name,
+                            struct ceph_object_id *oid,
+                            struct ceph_object_locator *oloc,
                             const char *method_name,
                             const void *outbound,
                             size_t outbound_size,
                             void *inbound,
                             size_t inbound_size)
 {
-       struct rbd_obj_request *obj_request;
-       struct page **pages;
-       u32 page_count;
+       struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       struct page *req_page = NULL;
+       struct page *reply_page;
        int ret;
 
        /*
@@ -4003,61 +3895,35 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
         * method.  Currently if this is present it will be a
         * snapshot id.
         */
-       page_count = (u32)calc_pages_for(0, inbound_size);
-       pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
-       if (IS_ERR(pages))
-               return PTR_ERR(pages);
-
-       ret = -ENOMEM;
-       obj_request = rbd_obj_request_create(object_name, 0, inbound_size,
-                                                       OBJ_REQUEST_PAGES);
-       if (!obj_request)
-               goto out;
+       if (outbound) {
+               if (outbound_size > PAGE_SIZE)
+                       return -E2BIG;
 
-       obj_request->pages = pages;
-       obj_request->page_count = page_count;
-
-       obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
-                                                 obj_request);
-       if (!obj_request->osd_req)
-               goto out;
-
-       osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL,
-                                       class_name, method_name);
-       if (outbound_size) {
-               struct ceph_pagelist *pagelist;
-
-               pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
-               if (!pagelist)
-                       goto out;
+               req_page = alloc_page(GFP_KERNEL);
+               if (!req_page)
+                       return -ENOMEM;
 
-               ceph_pagelist_init(pagelist);
-               ceph_pagelist_append(pagelist, outbound, outbound_size);
-               osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0,
-                                               pagelist);
+               memcpy(page_address(req_page), outbound, outbound_size);
        }
-       osd_req_op_cls_response_data_pages(obj_request->osd_req, 0,
-                                       obj_request->pages, inbound_size,
-                                       0, false, false);
-
-       rbd_obj_request_submit(obj_request);
-       ret = rbd_obj_request_wait(obj_request);
-       if (ret)
-               goto out;
 
-       ret = obj_request->result;
-       if (ret < 0)
-               goto out;
+       reply_page = alloc_page(GFP_KERNEL);
+       if (!reply_page) {
+               if (req_page)
+                       __free_page(req_page);
+               return -ENOMEM;
+       }
 
-       rbd_assert(obj_request->xferred < (u64)INT_MAX);
-       ret = (int)obj_request->xferred;
-       ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
-out:
-       if (obj_request)
-               rbd_obj_request_put(obj_request);
-       else
-               ceph_release_page_vector(pages, page_count);
+       ret = ceph_osdc_call(osdc, oid, oloc, RBD_DRV_NAME, method_name,
+                            CEPH_OSD_FLAG_READ, req_page, outbound_size,
+                            reply_page, &inbound_size);
+       if (!ret) {
+               memcpy(inbound, page_address(reply_page), inbound_size);
+               ret = inbound_size;
+       }
 
+       if (req_page)
+               __free_page(req_page);
+       __free_page(reply_page);
        return ret;
 }
 
@@ -4256,63 +4122,46 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
 }
 
 static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
-                               const char *object_name,
-                               u64 offset, u64 length, void *buf)
+                            struct ceph_object_id *oid,
+                            struct ceph_object_locator *oloc,
+                            void *buf, int buf_len)
 
 {
-       struct rbd_obj_request *obj_request;
-       struct page **pages = NULL;
-       u32 page_count;
-       size_t size;
+       struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       struct ceph_osd_request *req;
+       struct page **pages;
+       int num_pages = calc_pages_for(0, buf_len);
        int ret;
 
-       page_count = (u32) calc_pages_for(offset, length);
-       pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
-       if (IS_ERR(pages))
-               return PTR_ERR(pages);
-
-       ret = -ENOMEM;
-       obj_request = rbd_obj_request_create(object_name, offset, length,
-                                                       OBJ_REQUEST_PAGES);
-       if (!obj_request)
-               goto out;
-
-       obj_request->pages = pages;
-       obj_request->page_count = page_count;
-
-       obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
-                                                 obj_request);
-       if (!obj_request->osd_req)
-               goto out;
+       req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+       if (!req)
+               return -ENOMEM;
 
-       osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ,
-                                       offset, length, 0, 0);
-       osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0,
-                                       obj_request->pages,
-                                       obj_request->length,
-                                       obj_request->offset & ~PAGE_MASK,
-                                       false, false);
+       ceph_oid_copy(&req->r_base_oid, oid);
+       ceph_oloc_copy(&req->r_base_oloc, oloc);
+       req->r_flags = CEPH_OSD_FLAG_READ;
 
-       rbd_obj_request_submit(obj_request);
-       ret = rbd_obj_request_wait(obj_request);
+       ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
        if (ret)
-               goto out;
+               goto out_req;
 
-       ret = obj_request->result;
-       if (ret < 0)
-               goto out;
+       pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+       if (IS_ERR(pages)) {
+               ret = PTR_ERR(pages);
+               goto out_req;
+       }
 
-       rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
-       size = (size_t) obj_request->xferred;
-       ceph_copy_from_page_vector(pages, buf, 0, size);
-       rbd_assert(size <= (size_t)INT_MAX);
-       ret = (int)size;
-out:
-       if (obj_request)
-               rbd_obj_request_put(obj_request);
-       else
-               ceph_release_page_vector(pages, page_count);
+       osd_req_op_extent_init(req, 0, CEPH_OSD_OP_READ, 0, buf_len, 0, 0);
+       osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false,
+                                        true);
+
+       ceph_osdc_start_request(osdc, req, false);
+       ret = ceph_osdc_wait_request(osdc, req);
+       if (ret >= 0)
+               ceph_copy_from_page_vector(pages, buf, 0, ret);
 
+out_req:
+       ceph_osdc_put_request(req);
        return ret;
 }
 
@@ -4348,8 +4197,8 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
                if (!ondisk)
                        return -ENOMEM;
 
-               ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_oid.name,
-                                      0, size, ondisk);
+               ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid,
+                                       &rbd_dev->header_oloc, ondisk, size);
                if (ret < 0)
                        goto out;
                if ((size_t)ret < size) {
@@ -4781,7 +4630,7 @@ static const struct attribute_group *rbd_attr_groups[] = {
 
 static void rbd_dev_release(struct device *dev);
 
-static struct device_type rbd_device_type = {
+static const struct device_type rbd_device_type = {
        .name           = "rbd",
        .groups         = rbd_attr_groups,
        .release        = rbd_dev_release,
@@ -4876,8 +4725,9 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
        INIT_LIST_HEAD(&rbd_dev->node);
        init_rwsem(&rbd_dev->header_rwsem);
 
+       rbd_dev->header.data_pool_id = CEPH_NOPOOL;
        ceph_oid_init(&rbd_dev->header_oid);
-       ceph_oloc_init(&rbd_dev->header_oloc);
+       rbd_dev->header_oloc.pool = spec->pool_id;
 
        mutex_init(&rbd_dev->watch_mutex);
        rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
@@ -4899,12 +4749,6 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
        rbd_dev->rbd_client = rbdc;
        rbd_dev->spec = spec;
 
-       rbd_dev->layout.stripe_unit = 1 << RBD_MAX_OBJ_ORDER;
-       rbd_dev->layout.stripe_count = 1;
-       rbd_dev->layout.object_size = 1 << RBD_MAX_OBJ_ORDER;
-       rbd_dev->layout.pool_id = spec->pool_id;
-       RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
-
        return rbd_dev;
 }
 
@@ -4970,10 +4814,10 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
                __le64 size;
        } __attribute__ ((packed)) size_buf = { 0 };
 
-       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-                               "rbd", "get_size",
-                               &snapid, sizeof (snapid),
-                               &size_buf, sizeof (size_buf));
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                                 &rbd_dev->header_oloc, "get_size",
+                                 &snapid, sizeof(snapid),
+                                 &size_buf, sizeof(size_buf));
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                return ret;
@@ -5010,9 +4854,9 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
        if (!reply_buf)
                return -ENOMEM;
 
-       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-                               "rbd", "get_object_prefix", NULL, 0,
-                               reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                                 &rbd_dev->header_oloc, "get_object_prefix",
+                                 NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out;
@@ -5045,10 +4889,10 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
        u64 unsup;
        int ret;
 
-       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-                               "rbd", "get_features",
-                               &snapid, sizeof (snapid),
-                               &features_buf, sizeof (features_buf));
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                                 &rbd_dev->header_oloc, "get_features",
+                                 &snapid, sizeof(snapid),
+                                 &features_buf, sizeof(features_buf));
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                return ret;
@@ -5107,10 +4951,9 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
        }
 
        snapid = cpu_to_le64(rbd_dev->spec->snap_id);
-       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-                               "rbd", "get_parent",
-                               &snapid, sizeof (snapid),
-                               reply_buf, size);
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                                 &rbd_dev->header_oloc, "get_parent",
+                                 &snapid, sizeof(snapid), reply_buf, size);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out_err;
@@ -5210,9 +5053,9 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
        u64 stripe_count;
        int ret;
 
-       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-                               "rbd", "get_stripe_unit_count", NULL, 0,
-                               (char *)&striping_info_buf, size);
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                               &rbd_dev->header_oloc, "get_stripe_unit_count",
+                               NULL, 0, &striping_info_buf, size);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                return ret;
@@ -5226,7 +5069,7 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
         * out, and only fail if the image has non-default values.
         */
        ret = -EINVAL;
-       obj_size = (u64)1 << rbd_dev->header.obj_order;
+       obj_size = rbd_obj_bytes(&rbd_dev->header);
        p = &striping_info_buf;
        stripe_unit = ceph_decode_64(&p);
        if (stripe_unit != obj_size) {
@@ -5247,8 +5090,27 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
        return 0;
 }
 
+static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
+{
+       __le64 data_pool_id;
+       int ret;
+
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                                 &rbd_dev->header_oloc, "get_data_pool",
+                                 NULL, 0, &data_pool_id, sizeof(data_pool_id));
+       if (ret < 0)
+               return ret;
+       if (ret < sizeof(data_pool_id))
+               return -EBADMSG;
+
+       rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
+       WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
+       return 0;
+}
+
 static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
 {
+       CEPH_DEFINE_OID_ONSTACK(oid);
        size_t image_id_size;
        char *image_id;
        void *p;
@@ -5276,10 +5138,10 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
        if (!reply_buf)
                goto out;
 
-       ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY,
-                               "rbd", "dir_get_name",
-                               image_id, image_id_size,
-                               reply_buf, size);
+       ceph_oid_printf(&oid, "%s", RBD_DIRECTORY);
+       ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
+                                 "dir_get_name", image_id, image_id_size,
+                                 reply_buf, size);
        if (ret < 0)
                goto out;
        p = reply_buf;
@@ -5458,9 +5320,9 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
        if (!reply_buf)
                return -ENOMEM;
 
-       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-                               "rbd", "get_snapcontext", NULL, 0,
-                               reply_buf, size);
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                                 &rbd_dev->header_oloc, "get_snapcontext",
+                                 NULL, 0, reply_buf, size);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out;
@@ -5523,10 +5385,9 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
                return ERR_PTR(-ENOMEM);
 
        snapid = cpu_to_le64(snap_id);
-       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-                               "rbd", "get_snapshot_name",
-                               &snapid, sizeof (snapid),
-                               reply_buf, size);
+       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+                                 &rbd_dev->header_oloc, "get_snapshot_name",
+                                 &snapid, sizeof(snapid), reply_buf, size);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0) {
                snap_name = ERR_PTR(ret);
@@ -5833,7 +5694,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 {
        int ret;
        size_t size;
-       char *object_name;
+       CEPH_DEFINE_OID_ONSTACK(oid);
        void *response;
        char *image_id;
 
@@ -5853,12 +5714,12 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
         * First, see if the format 2 image id file exists, and if
         * so, get the image's persistent id from it.
         */
-       size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name);
-       object_name = kmalloc(size, GFP_NOIO);
-       if (!object_name)
-               return -ENOMEM;
-       sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name);
-       dout("rbd id object name is %s\n", object_name);
+       ret = ceph_oid_aprintf(&oid, GFP_KERNEL, "%s%s", RBD_ID_PREFIX,
+                              rbd_dev->spec->image_name);
+       if (ret)
+               return ret;
+
+       dout("rbd id object name is %s\n", oid.name);
 
        /* Response will be an encoded string, which includes a length */
 
@@ -5871,9 +5732,9 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 
        /* If it doesn't exist we'll assume it's a format 1 image */
 
-       ret = rbd_obj_method_sync(rbd_dev, object_name,
-                               "rbd", "get_id", NULL, 0,
-                               response, RBD_IMAGE_ID_LEN_MAX);
+       ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
+                                 "get_id", NULL, 0,
+                                 response, RBD_IMAGE_ID_LEN_MAX);
        dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret == -ENOENT) {
                image_id = kstrdup("", GFP_KERNEL);
@@ -5896,8 +5757,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
        }
 out:
        kfree(response);
-       kfree(object_name);
-
+       ceph_oid_destroy(&oid);
        return ret;
 }
 
@@ -5944,14 +5804,20 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
                if (ret < 0)
                        goto out_err;
        }
-       /* No support for crypto and compression type format 2 images */
 
+       if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
+               ret = rbd_dev_v2_data_pool(rbd_dev);
+               if (ret)
+                       goto out_err;
+       }
+
+       rbd_init_layout(rbd_dev);
        return 0;
+
 out_err:
        rbd_dev->header.features = 0;
        kfree(rbd_dev->header.object_prefix);
        rbd_dev->header.object_prefix = NULL;
-
        return ret;
 }
 
@@ -6077,8 +5943,6 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
        /* Record the header object name for this rbd image. */
 
        rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
-
-       rbd_dev->header_oloc.pool = rbd_dev->layout.pool_id;
        if (rbd_dev->image_format == 1)
                ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
                                       spec->image_name, RBD_SUFFIX);
@@ -6471,27 +6335,16 @@ static int rbd_slab_init(void)
        if (!rbd_obj_request_cache)
                goto out_err;
 
-       rbd_assert(!rbd_segment_name_cache);
-       rbd_segment_name_cache = kmem_cache_create("rbd_segment_name",
-                                       CEPH_MAX_OID_NAME_LEN + 1, 1, 0, NULL);
-       if (rbd_segment_name_cache)
-               return 0;
-out_err:
-       kmem_cache_destroy(rbd_obj_request_cache);
-       rbd_obj_request_cache = NULL;
+       return 0;
 
+out_err:
        kmem_cache_destroy(rbd_img_request_cache);
        rbd_img_request_cache = NULL;
-
        return -ENOMEM;
 }
 
 static void rbd_slab_exit(void)
 {
-       rbd_assert(rbd_segment_name_cache);
-       kmem_cache_destroy(rbd_segment_name_cache);
-       rbd_segment_name_cache = NULL;
-
        rbd_assert(rbd_obj_request_cache);
        kmem_cache_destroy(rbd_obj_request_cache);
        rbd_obj_request_cache = NULL;
index 94f367db27b0b816e9585da18f0063a1523b23ca..62ff50d3e7a6f1f4da2d10249512f9c2cff1859f 100644 (file)
@@ -25,8 +25,8 @@
  */
 
 #define RBD_HEADER_PREFIX      "rbd_header."
-#define RBD_DATA_PREFIX        "rbd_data."
 #define RBD_ID_PREFIX          "rbd_id."
+#define RBD_V2_DATA_FORMAT     "%s.%016llx"
 
 #define RBD_LOCK_NAME          "rbd_lock"
 #define RBD_LOCK_TAG           "internal"
@@ -42,13 +42,14 @@ enum rbd_notify_op {
 /*
  * For format version 1, rbd image 'foo' consists of objects
  *   foo.rbd           - image metadata
- *   rb.<idhi>.<idlo>.00000000
- *   rb.<idhi>.<idlo>.00000001
+ *   rb.<idhi>.<idlo>.<extra>.000000000000
+ *   rb.<idhi>.<idlo>.<extra>.000000000001
  *   ...               - data
  * There is no notion of a persistent image id in rbd format 1.
  */
 
 #define RBD_SUFFIX             ".rbd"
+#define RBD_V1_DATA_FORMAT     "%s.%012llx"
 
 #define RBD_DIRECTORY           "rbd_directory"
 #define RBD_INFO                "rbd_info"
@@ -57,9 +58,6 @@ enum rbd_notify_op {
 #define RBD_MIN_OBJ_ORDER       16
 #define RBD_MAX_OBJ_ORDER       30
 
-#define RBD_COMP_NONE          0
-#define RBD_CRYPT_NONE         0
-
 #define RBD_HEADER_TEXT                "<<< Rados Block Device Image >>>\n"
 #define RBD_HEADER_SIGNATURE   "RBD"
 #define RBD_HEADER_VERSION     "001.005"
index cab157331c4eae29a65d50df6204a5a46c2587a6..3f3a3ab3d50ae02b418c27dc4a34d9ef8a44e9c4 100644 (file)
@@ -34,6 +34,7 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
 
 #define VDC_TX_RING_SIZE       512
+#define VDC_DEFAULT_BLK_SIZE   512
 
 #define WAITING_FOR_LINK_UP    0x01
 #define WAITING_FOR_TX_SPACE   0x02
@@ -73,6 +74,7 @@ struct vdc_port {
        u32                     vdisk_size;
        u8                      vdisk_type;
        u8                      vdisk_mtype;
+       u32                     vdisk_phys_blksz;
 
        char                    disk_name[32];
 };
@@ -88,6 +90,7 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
 
 /* Ordered from largest major to lowest */
 static struct vio_version vdc_versions[] = {
+       { .major = 1, .minor = 2 },
        { .major = 1, .minor = 1 },
        { .major = 1, .minor = 0 },
 };
@@ -271,6 +274,11 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
                if (pkt->max_xfer_size < port->max_xfer_size)
                        port->max_xfer_size = pkt->max_xfer_size;
                port->vdisk_block_size = pkt->vdisk_block_size;
+
+               port->vdisk_phys_blksz = VDC_DEFAULT_BLK_SIZE;
+               if (vdc_version_supported(port, 1, 2))
+                       port->vdisk_phys_blksz = pkt->phys_block_size;
+
                return 0;
        } else {
                printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
@@ -754,6 +762,12 @@ static int probe_disk(struct vdc_port *port)
        if (err)
                return err;
 
+       /* Using version 1.2 means vdisk_phys_blksz should be set unless the
+        * disk is reserved by another system.
+        */
+       if (vdc_version_supported(port, 1, 2) && !port->vdisk_phys_blksz)
+               return -ENODEV;
+
        if (vdc_version_supported(port, 1, 1)) {
                /* vdisk_size should be set during the handshake, if it wasn't
                 * then the underlying disk is reserved by another system
@@ -829,6 +843,8 @@ static int probe_disk(struct vdc_port *port)
                }
        }
 
+       blk_queue_physical_block_size(q, port->vdisk_phys_blksz);
+
        pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
               g->disk_name,
               port->vdisk_size, (port->vdisk_size >> (20 - 9)),
@@ -910,7 +926,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        if (err)
                goto err_out_free_port;
 
-       port->vdisk_block_size = 512;
+       port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE;
        port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size);
        port->ring_cookies = ((port->max_xfer_size *
                               port->vdisk_block_size) / PAGE_SIZE) + 2;
index c73fede582f7292ae7193719e0368ea91acceb43..e27d89a36c34170d1c894b60f43ab3903a5fbf70 100644 (file)
@@ -74,6 +74,17 @@ static void zram_clear_flag(struct zram_meta *meta, u32 index,
        meta->table[index].value &= ~BIT(flag);
 }
 
+static inline void zram_set_element(struct zram_meta *meta, u32 index,
+                       unsigned long element)
+{
+       meta->table[index].element = element;
+}
+
+static inline void zram_clear_element(struct zram_meta *meta, u32 index)
+{
+       meta->table[index].element = 0;
+}
+
 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
 {
        return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
@@ -146,31 +157,46 @@ static inline void update_used_max(struct zram *zram,
        } while (old_max != cur_max);
 }
 
-static bool page_zero_filled(void *ptr)
+static inline void zram_fill_page(char *ptr, unsigned long len,
+                                       unsigned long value)
+{
+       int i;
+       unsigned long *page = (unsigned long *)ptr;
+
+       WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
+
+       if (likely(value == 0)) {
+               memset(ptr, 0, len);
+       } else {
+               for (i = 0; i < len / sizeof(*page); i++)
+                       page[i] = value;
+       }
+}
+
+static bool page_same_filled(void *ptr, unsigned long *element)
 {
        unsigned int pos;
        unsigned long *page;
 
        page = (unsigned long *)ptr;
 
-       for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
-               if (page[pos])
+       for (pos = 0; pos < PAGE_SIZE / sizeof(*page) - 1; pos++) {
+               if (page[pos] != page[pos + 1])
                        return false;
        }
 
+       *element = page[pos];
+
        return true;
 }
 
-static void handle_zero_page(struct bio_vec *bvec)
+static void handle_same_page(struct bio_vec *bvec, unsigned long element)
 {
        struct page *page = bvec->bv_page;
        void *user_mem;
 
        user_mem = kmap_atomic(page);
-       if (is_partial_io(bvec))
-               memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
-       else
-               clear_page(user_mem);
+       zram_fill_page(user_mem + bvec->bv_offset, bvec->bv_len, element);
        kunmap_atomic(user_mem);
 
        flush_dcache_page(page);
@@ -363,7 +389,7 @@ static ssize_t mm_stat_show(struct device *dev,
                        mem_used << PAGE_SHIFT,
                        zram->limit_pages << PAGE_SHIFT,
                        max_used << PAGE_SHIFT,
-                       (u64)atomic64_read(&zram->stats.zero_pages),
+                       (u64)atomic64_read(&zram->stats.same_pages),
                        pool_stats.pages_compacted);
        up_read(&zram->init_lock);
 
@@ -391,18 +417,6 @@ static DEVICE_ATTR_RO(io_stat);
 static DEVICE_ATTR_RO(mm_stat);
 static DEVICE_ATTR_RO(debug_stat);
 
-static inline bool zram_meta_get(struct zram *zram)
-{
-       if (atomic_inc_not_zero(&zram->refcount))
-               return true;
-       return false;
-}
-
-static inline void zram_meta_put(struct zram *zram)
-{
-       atomic_dec(&zram->refcount);
-}
-
 static void zram_meta_free(struct zram_meta *meta, u64 disksize)
 {
        size_t num_pages = disksize >> PAGE_SHIFT;
@@ -411,8 +425,11 @@ static void zram_meta_free(struct zram_meta *meta, u64 disksize)
        /* Free all pages that are still in this zram device */
        for (index = 0; index < num_pages; index++) {
                unsigned long handle = meta->table[index].handle;
-
-               if (!handle)
+               /*
+                * No memory is allocated for same element filled pages.
+                * Simply clear same page flag.
+                */
+               if (!handle || zram_test_flag(meta, index, ZRAM_SAME))
                        continue;
 
                zs_free(meta->mem_pool, handle);
@@ -462,18 +479,20 @@ static void zram_free_page(struct zram *zram, size_t index)
        struct zram_meta *meta = zram->meta;
        unsigned long handle = meta->table[index].handle;
 
-       if (unlikely(!handle)) {
-               /*
-                * No memory is allocated for zero filled pages.
-                * Simply clear zero page flag.
-                */
-               if (zram_test_flag(meta, index, ZRAM_ZERO)) {
-                       zram_clear_flag(meta, index, ZRAM_ZERO);
-                       atomic64_dec(&zram->stats.zero_pages);
-               }
+       /*
+        * No memory is allocated for same element filled pages.
+        * Simply clear same page flag.
+        */
+       if (zram_test_flag(meta, index, ZRAM_SAME)) {
+               zram_clear_flag(meta, index, ZRAM_SAME);
+               zram_clear_element(meta, index);
+               atomic64_dec(&zram->stats.same_pages);
                return;
        }
 
+       if (!handle)
+               return;
+
        zs_free(meta->mem_pool, handle);
 
        atomic64_sub(zram_get_obj_size(meta, index),
@@ -496,9 +515,9 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
        handle = meta->table[index].handle;
        size = zram_get_obj_size(meta, index);
 
-       if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
+       if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) {
                bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
-               clear_page(mem);
+               zram_fill_page(mem, PAGE_SIZE, meta->table[index].element);
                return 0;
        }
 
@@ -534,9 +553,9 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 
        bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        if (unlikely(!meta->table[index].handle) ||
-                       zram_test_flag(meta, index, ZRAM_ZERO)) {
+                       zram_test_flag(meta, index, ZRAM_SAME)) {
                bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
-               handle_zero_page(bvec);
+               handle_same_page(bvec, meta->table[index].element);
                return 0;
        }
        bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
@@ -584,6 +603,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
        struct zram_meta *meta = zram->meta;
        struct zcomp_strm *zstrm = NULL;
        unsigned long alloced_pages;
+       unsigned long element;
 
        page = bvec->bv_page;
        if (is_partial_io(bvec)) {
@@ -612,16 +632,17 @@ compress_again:
                uncmem = user_mem;
        }
 
-       if (page_zero_filled(uncmem)) {
+       if (page_same_filled(uncmem, &element)) {
                if (user_mem)
                        kunmap_atomic(user_mem);
                /* Free memory associated with this sector now. */
                bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
                zram_free_page(zram, index);
-               zram_set_flag(meta, index, ZRAM_ZERO);
+               zram_set_flag(meta, index, ZRAM_SAME);
+               zram_set_element(meta, index, element);
                bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
 
-               atomic64_inc(&zram->stats.zero_pages);
+               atomic64_inc(&zram->stats.same_pages);
                ret = 0;
                goto out;
        }
@@ -859,22 +880,17 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
 {
        struct zram *zram = queue->queuedata;
 
-       if (unlikely(!zram_meta_get(zram)))
-               goto error;
-
        blk_queue_split(queue, &bio, queue->bio_split);
 
        if (!valid_io_request(zram, bio->bi_iter.bi_sector,
                                        bio->bi_iter.bi_size)) {
                atomic64_inc(&zram->stats.invalid_io);
-               goto put_zram;
+               goto error;
        }
 
        __zram_make_request(zram, bio);
-       zram_meta_put(zram);
        return BLK_QC_T_NONE;
-put_zram:
-       zram_meta_put(zram);
+
 error:
        bio_io_error(bio);
        return BLK_QC_T_NONE;
@@ -904,13 +920,11 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
        struct bio_vec bv;
 
        zram = bdev->bd_disk->private_data;
-       if (unlikely(!zram_meta_get(zram)))
-               goto out;
 
        if (!valid_io_request(zram, sector, PAGE_SIZE)) {
                atomic64_inc(&zram->stats.invalid_io);
                err = -EINVAL;
-               goto put_zram;
+               goto out;
        }
 
        index = sector >> SECTORS_PER_PAGE_SHIFT;
@@ -921,8 +935,6 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
        bv.bv_offset = 0;
 
        err = zram_bvec_rw(zram, &bv, index, offset, is_write);
-put_zram:
-       zram_meta_put(zram);
 out:
        /*
         * If I/O fails, just return error(ie, non-zero) without
@@ -955,17 +967,6 @@ static void zram_reset_device(struct zram *zram)
        meta = zram->meta;
        comp = zram->comp;
        disksize = zram->disksize;
-       /*
-        * Refcount will go down to 0 eventually and r/w handler
-        * cannot handle further I/O so it will bail out by
-        * check zram_meta_get.
-        */
-       zram_meta_put(zram);
-       /*
-        * We want to free zram_meta in process context to avoid
-        * deadlock between reclaim path and any other locks.
-        */
-       wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
 
        /* Reset stats */
        memset(&zram->stats, 0, sizeof(zram->stats));
@@ -1013,8 +1014,6 @@ static ssize_t disksize_store(struct device *dev,
                goto out_destroy_comp;
        }
 
-       init_waitqueue_head(&zram->io_done);
-       atomic_set(&zram->refcount, 1);
        zram->meta = meta;
        zram->comp = comp;
        zram->disksize = disksize;
index 74fcf10da37499a610d99f3381878b75546ea24d..caeff51f1571af0957112bc28fe020f522503f94 100644 (file)
@@ -61,7 +61,7 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
 /* Flags for zram pages (table[page_no].value) */
 enum zram_pageflags {
        /* Page consists entirely of zeros */
-       ZRAM_ZERO = ZRAM_FLAG_SHIFT,
+       ZRAM_SAME = ZRAM_FLAG_SHIFT,
        ZRAM_ACCESS,    /* page is now accessed */
 
        __NR_ZRAM_PAGEFLAGS,
@@ -71,7 +71,10 @@ enum zram_pageflags {
 
 /* Allocated for each disk page */
 struct zram_table_entry {
-       unsigned long handle;
+       union {
+               unsigned long handle;
+               unsigned long element;
+       };
        unsigned long value;
 };
 
@@ -83,7 +86,7 @@ struct zram_stats {
        atomic64_t failed_writes;       /* can happen when memory is too low */
        atomic64_t invalid_io;  /* non-page-aligned I/O requests */
        atomic64_t notify_free; /* no. of swap slot free notifications */
-       atomic64_t zero_pages;          /* no. of zero filled pages */
+       atomic64_t same_pages;          /* no. of same element filled pages */
        atomic64_t pages_stored;        /* no. of pages currently stored */
        atomic_long_t max_used_pages;   /* no. of maximum pages stored */
        atomic64_t writestall;          /* no. of write slow paths */
@@ -106,9 +109,6 @@ struct zram {
        unsigned long limit_pages;
 
        struct zram_stats stats;
-       atomic_t refcount; /* refcount for zram_meta */
-       /* wait all IO under all of cpu are done */
-       wait_queue_head_t io_done;
        /*
         * This is the limit on amount of *uncompressed* worth of data
         * we can store in a disk.
index 737187865269e9470ae6ef59556727144afb54d4..53fe633df1e8d9c1187e862b6a305a1905bbc2bb 100644 (file)
 
 #include "agp.h"
 
-static int alpha_core_agp_vm_fault(struct vm_area_struct *vma,
-                                       struct vm_fault *vmf)
+static int alpha_core_agp_vm_fault(struct vm_fault *vmf)
 {
        alpha_agp_info *agp = agp_bridge->dev_private_data;
        dma_addr_t dma_addr;
        unsigned long pa;
        struct page *page;
 
-       dma_addr = vmf->address - vma->vm_start + agp->aperture.bus_base;
+       dma_addr = vmf->address - vmf->vma->vm_start + agp->aperture.bus_base;
        pa = agp->ops->translate(agp, dma_addr);
 
        if (pa == (unsigned long)-EINVAL)
index 7f816655cbbfafc9ea3f16097ba4a6f8a0f4b91b..90f3edffb0677690b732a9d3b2289ee0fa276498 100644 (file)
@@ -78,7 +78,8 @@ config IPMI_POWEROFF
 endif # IPMI_HANDLER
 
 config ASPEED_BT_IPMI_BMC
-       depends on ARCH_ASPEED
+       depends on ARCH_ASPEED || COMPILE_TEST
+       depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
        tristate "BT IPMI bmc driver"
        help
          Provides a driver for the BT (Block Transfer) IPMI interface
index fc9e8891eae36458205624bce3bef9cc84be8f12..d6f5d9eb102dd5dce22179cc4cc8290d4bbce927 100644 (file)
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/mfd/syscon.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/poll.h>
+#include <linux/regmap.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 
@@ -60,7 +63,8 @@
 struct bt_bmc {
        struct device           dev;
        struct miscdevice       miscdev;
-       void __iomem            *base;
+       struct regmap           *map;
+       int                     offset;
        int                     irq;
        wait_queue_head_t       queue;
        struct timer_list       poll_timer;
@@ -69,14 +73,29 @@ struct bt_bmc {
 
 static atomic_t open_count = ATOMIC_INIT(0);
 
+static const struct regmap_config bt_regmap_cfg = {
+       .reg_bits = 32,
+       .val_bits = 32,
+       .reg_stride = 4,
+};
+
 static u8 bt_inb(struct bt_bmc *bt_bmc, int reg)
 {
-       return ioread8(bt_bmc->base + reg);
+       uint32_t val = 0;
+       int rc;
+
+       rc = regmap_read(bt_bmc->map, bt_bmc->offset + reg, &val);
+       WARN(rc != 0, "regmap_read() failed: %d\n", rc);
+
+       return rc == 0 ? (u8) val : 0;
 }
 
 static void bt_outb(struct bt_bmc *bt_bmc, u8 data, int reg)
 {
-       iowrite8(data, bt_bmc->base + reg);
+       int rc;
+
+       rc = regmap_write(bt_bmc->map, bt_bmc->offset + reg, data);
+       WARN(rc != 0, "regmap_write() failed: %d\n", rc);
 }
 
 static void clr_rd_ptr(struct bt_bmc *bt_bmc)
@@ -367,14 +386,18 @@ static irqreturn_t bt_bmc_irq(int irq, void *arg)
 {
        struct bt_bmc *bt_bmc = arg;
        u32 reg;
+       int rc;
+
+       rc = regmap_read(bt_bmc->map, bt_bmc->offset + BT_CR2, &reg);
+       if (rc)
+               return IRQ_NONE;
 
-       reg = ioread32(bt_bmc->base + BT_CR2);
        reg &= BT_CR2_IRQ_H2B | BT_CR2_IRQ_HBUSY;
        if (!reg)
                return IRQ_NONE;
 
        /* ack pending IRQs */
-       iowrite32(reg, bt_bmc->base + BT_CR2);
+       regmap_write(bt_bmc->map, bt_bmc->offset + BT_CR2, reg);
 
        wake_up(&bt_bmc->queue);
        return IRQ_HANDLED;
@@ -384,7 +407,6 @@ static int bt_bmc_config_irq(struct bt_bmc *bt_bmc,
                             struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
-       u32 reg;
        int rc;
 
        bt_bmc->irq = platform_get_irq(pdev, 0);
@@ -405,18 +427,17 @@ static int bt_bmc_config_irq(struct bt_bmc *bt_bmc,
         * will be cleared (along with B2H) when we can write the next
         * message to the BT buffer
         */
-       reg = ioread32(bt_bmc->base + BT_CR1);
-       reg |= BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY;
-       iowrite32(reg, bt_bmc->base + BT_CR1);
+       rc = regmap_update_bits(bt_bmc->map, bt_bmc->offset + BT_CR1,
+                               (BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY),
+                               (BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY));
 
-       return 0;
+       return rc;
 }
 
 static int bt_bmc_probe(struct platform_device *pdev)
 {
        struct bt_bmc *bt_bmc;
        struct device *dev;
-       struct resource *res;
        int rc;
 
        if (!pdev || !pdev->dev.of_node)
@@ -431,10 +452,27 @@ static int bt_bmc_probe(struct platform_device *pdev)
 
        dev_set_drvdata(&pdev->dev, bt_bmc);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       bt_bmc->base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(bt_bmc->base))
-               return PTR_ERR(bt_bmc->base);
+       bt_bmc->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
+       if (IS_ERR(bt_bmc->map)) {
+               struct resource *res;
+               void __iomem *base;
+
+               /*
+                * Assume it's not the MFD-based devicetree description, in
+                * which case generate a regmap ourselves
+                */
+               res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+               base = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(base))
+                       return PTR_ERR(base);
+
+               bt_bmc->map = devm_regmap_init_mmio(dev, base, &bt_regmap_cfg);
+               bt_bmc->offset = 0;
+       } else {
+               rc = of_property_read_u32(dev->of_node, "reg", &bt_bmc->offset);
+               if (rc)
+                       return rc;
+       }
 
        mutex_init(&bt_bmc->mutex);
        init_waitqueue_head(&bt_bmc->queue);
@@ -461,12 +499,12 @@ static int bt_bmc_probe(struct platform_device *pdev)
                add_timer(&bt_bmc->poll_timer);
        }
 
-       iowrite32((BT_IO_BASE << BT_CR0_IO_BASE) |
-                 (BT_IRQ << BT_CR0_IRQ) |
-                 BT_CR0_EN_CLR_SLV_RDP |
-                 BT_CR0_EN_CLR_SLV_WRP |
-                 BT_CR0_ENABLE_IBT,
-                 bt_bmc->base + BT_CR0);
+       regmap_write(bt_bmc->map, bt_bmc->offset + BT_CR0,
+                    (BT_IO_BASE << BT_CR0_IO_BASE) |
+                    (BT_IRQ << BT_CR0_IRQ) |
+                    BT_CR0_EN_CLR_SLV_RDP |
+                    BT_CR0_EN_CLR_SLV_WRP |
+                    BT_CR0_ENABLE_IBT);
 
        clr_b_busy(bt_bmc);
 
index a21407de46aeb859eb7a0c11da17af887bb53175..f45119c5337d7b3bdf5eb3b5663f5873dd8ac6c1 100644 (file)
@@ -108,7 +108,7 @@ static int ipmi_fasync(int fd, struct file *file, int on)
        return (result);
 }
 
-static struct ipmi_user_hndl ipmi_hndlrs =
+static const struct ipmi_user_hndl ipmi_hndlrs =
 {
        .ipmi_recv_hndl = file_receive_handler,
 };
index 92e53acf2cd201bfc519044149651707d5370284..9f699951b75aa40a15ed3aec78a97b1daf7cab41 100644 (file)
@@ -102,7 +102,7 @@ struct ipmi_user {
        struct kref refcount;
 
        /* The upper layer that handles receive messages. */
-       struct ipmi_user_hndl *handler;
+       const struct ipmi_user_hndl *handler;
        void             *handler_data;
 
        /* The interface this user is bound to. */
@@ -919,7 +919,7 @@ static int intf_err_seq(ipmi_smi_t   intf,
 
 
 int ipmi_create_user(unsigned int          if_num,
-                    struct ipmi_user_hndl *handler,
+                    const struct ipmi_user_hndl *handler,
                     void                  *handler_data,
                     ipmi_user_t           *user)
 {
index 6e658aa114f19da873b5d201d6c131b7350311cd..b338a4becbf8c72baa8a8e9b6b0396c1e6ee3c60 100644 (file)
@@ -196,7 +196,7 @@ static void ipmi_powernv_poll(void *send_info)
        ipmi_powernv_recv(smi);
 }
 
-static struct ipmi_smi_handlers ipmi_powernv_smi_handlers = {
+static const struct ipmi_smi_handlers ipmi_powernv_smi_handlers = {
        .owner                  = THIS_MODULE,
        .start_processing       = ipmi_powernv_start_processing,
        .sender                 = ipmi_powernv_send,
index 4035495f3a86c8bc83c0ab52ffae9a59f6a9c8bc..30b9e83bf1bfc668f4093ba2d79c973a1a2c32d3 100644 (file)
@@ -985,7 +985,7 @@ static void ipmi_wdog_pretimeout_handler(void *handler_data)
        pretimeout_since_last_heartbeat = 1;
 }
 
-static struct ipmi_user_hndl ipmi_hndlrs = {
+static const struct ipmi_user_hndl ipmi_hndlrs = {
        .ipmi_recv_hndl           = ipmi_wdog_msg_handler,
        .ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler
 };
index a697ca0cab1e1e2ff56fcf86c41793aad507c524..a9c2fa3c81e5385def9e50b3d35cd16ac747efde 100644 (file)
@@ -191,12 +191,12 @@ mspec_close(struct vm_area_struct *vma)
  * Creates a mspec page and maps it to user space.
  */
 static int
-mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+mspec_fault(struct vm_fault *vmf)
 {
        unsigned long paddr, maddr;
        unsigned long pfn;
        pgoff_t index = vmf->pgoff;
-       struct vma_data *vdata = vma->vm_private_data;
+       struct vma_data *vdata = vmf->vma->vm_private_data;
 
        maddr = (volatile unsigned long) vdata->maddr[index];
        if (maddr == 0) {
@@ -227,7 +227,7 @@ mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
         * be because another thread has installed the pte first, so it
         * is no problem.
         */
-       vm_insert_pfn(vma, vmf->address, pfn);
+       vm_insert_pfn(vmf->vma, vmf->address, pfn);
 
        return VM_FAULT_NOPAGE;
 }
index e051fc8aa7d7748871785b765aa2962261eeef53..cd53771b9ae7629ed1fd85ab103cedac7cbd0994 100644 (file)
@@ -655,7 +655,7 @@ static void terminate_monitor(struct cm4000_dev *dev)
  * monitor the card every 50msec. as a side-effect, retrieve the
  * atr once a card is inserted. another side-effect of retrieving the
  * atr is that the card will be powered on, so there is no need to
- * power on the card explictely from the application: the driver
+ * power on the card explicitly from the application: the driver
  * is already doing that for you.
  */
 
@@ -1037,7 +1037,7 @@ release_io:
        clear_bit(LOCK_IO, &dev->flags);
        wake_up_interruptible(&dev->ioq);
 
-       DEBUGP(2, dev, "<- cmm_read returns: rc = %Zi\n",
+       DEBUGP(2, dev, "<- cmm_read returns: rc = %zi\n",
               (rc < 0 ? rc : count));
        return rc < 0 ? rc : count;
 }
index d7123259143e9e3c849698068174a92589490d86..d4dbd8d8e524d7b712f9668cbee57c7d722440b2 100644 (file)
@@ -331,7 +331,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
        }
 
        if ((count < 5) || (count > READ_WRITE_BUFFER_SIZE)) {
-               DEBUGP(2, dev, "<- cm4040_write buffersize=%Zd < 5\n", count);
+               DEBUGP(2, dev, "<- cm4040_write buffersize=%zd < 5\n", count);
                return -EIO;
        }
 
index 4fa7fcd8af36aaecd214ee4287f84792e9ef5350..f4f866ee54bcb1ca05b8ac5c401e993e36c54b6a 100644 (file)
@@ -603,7 +603,7 @@ static void sonypi_type3_srs(void)
        u16 v16;
        u8  v8;
 
-       /* This model type uses the same initialiazation of
+       /* This model type uses the same initialization of
         * the embedded controller as the type2 models. */
        sonypi_type2_srs();
 
index 56c1998ced3e12183e2a3a58e1ef27c0d7135556..9356ab4b7d76e7c95057d76c83259d7a2fa2ce63 100644 (file)
@@ -95,16 +95,17 @@ config COMMON_CLK_CDCE706
          This driver supports TI CDCE706 programmable 3-PLL clock synthesizer.
 
 config COMMON_CLK_CDCE925
-       tristate "Clock driver for TI CDCE925 devices"
+       tristate "Clock driver for TI CDCE913/925/937/949 devices"
        depends on I2C
        depends on OF
        select REGMAP_I2C
        help
        ---help---
-         This driver supports the TI CDCE925 programmable clock synthesizer.
-         The chip contains two PLLs with spread-spectrum clocking support and
-         five output dividers. The driver only supports the following setup,
-         and uses a fixed setting for the output muxes.
+         This driver supports the TI CDCE913/925/937/949 programmable clock
+         synthesizer. Each chip has different number of PLLs and outputs.
+         For example, the CDCE925 contains two PLLs with spread-spectrum
+         clocking support and five output dividers. The driver only supports
+         the following setup, and uses a fixed setting for the output muxes.
          Y1 is derived from the input clock
          Y2 and Y3 derive from PLL1
          Y4 and Y5 derive from PLL2
@@ -198,6 +199,16 @@ config COMMON_CLK_OXNAS
        ---help---
          Support for the OXNAS SoC Family clocks.
 
+config COMMON_CLK_VC5
+       tristate "Clock driver for IDT VersaClock5 devices"
+       depends on I2C
+       depends on OF
+       select REGMAP_I2C
+       help
+       ---help---
+         This driver supports the IDT VersaClock5 programmable clock
+         generator.
+
 source "drivers/clk/bcm/Kconfig"
 source "drivers/clk/hisilicon/Kconfig"
 source "drivers/clk/mediatek/Kconfig"
index 925081ec14c04935e1eb17bf2709640c7ae3ae10..92c12b86c2e86f2022faeb434bb961f2410faa3e 100644 (file)
@@ -46,6 +46,7 @@ obj-$(CONFIG_ARCH_TANGO)              += clk-tango4.o
 obj-$(CONFIG_CLK_TWL6040)              += clk-twl6040.o
 obj-$(CONFIG_ARCH_U300)                        += clk-u300.o
 obj-$(CONFIG_ARCH_VT8500)              += clk-vt8500.o
+obj-$(CONFIG_COMMON_CLK_VC5)           += clk-versaclock5.o
 obj-$(CONFIG_COMMON_CLK_WM831X)                += clk-wm831x.o
 obj-$(CONFIG_COMMON_CLK_XGENE)         += clk-xgene.o
 
@@ -87,6 +88,8 @@ obj-y                                 += ti/
 obj-$(CONFIG_CLK_UNIPHIER)             += uniphier/
 obj-$(CONFIG_ARCH_U8500)               += ux500/
 obj-$(CONFIG_COMMON_CLK_VERSATILE)     += versatile/
+ifeq ($(CONFIG_COMMON_CLK), y)
 obj-$(CONFIG_X86)                      += x86/
+endif
 obj-$(CONFIG_ARCH_ZX)                  += zte/
 obj-$(CONFIG_ARCH_ZYNQ)                        += zynq/
index 411310d295816cb3ef645edb7562689e83ff141a..02d3bcd6216cbac49ea584a0e205c6d664c58ce7 100644 (file)
@@ -182,6 +182,7 @@ static int i2s_pll_clk_probe(struct platform_device *pdev)
        if (IS_ERR(pll_clk->base))
                return PTR_ERR(pll_clk->base);
 
+       memset(&init, 0, sizeof(init));
        clk_name = node->name;
        init.name = clk_name;
        init.ops = &i2s_pll_ops;
index 0d14409097e777ce4546de30e9278fdebf74ec44..02585387061967ac9408e18ac1bce67e9e9414c0 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/clk.h>
 #include <linux/clk/bcm2835.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
@@ -98,7 +99,8 @@
 #define CM_SMIDIV              0x0b4
 /* no definition for 0x0b8  and 0x0bc */
 #define CM_TCNTCTL             0x0c0
-#define CM_TCNTDIV             0x0c4
+# define CM_TCNT_SRC1_SHIFT            12
+#define CM_TCNTCNT             0x0c4
 #define CM_TECCTL              0x0c8
 #define CM_TECDIV              0x0cc
 #define CM_TD0CTL              0x0d0
 #define LOCK_TIMEOUT_NS                100000000
 #define BCM2835_MAX_FB_RATE    1750000000u
 
+/*
+ * Names of clocks used within the driver that need to be replaced
+ * with an external parent's name.  This array is in the order that
+ * the clocks node in the DT references external clocks.
+ */
+static const char *const cprman_parent_names[] = {
+       "xosc",
+       "dsi0_byte",
+       "dsi0_ddr2",
+       "dsi0_ddr",
+       "dsi1_byte",
+       "dsi1_ddr2",
+       "dsi1_ddr",
+};
+
 struct bcm2835_cprman {
        struct device *dev;
        void __iomem *regs;
        spinlock_t regs_lock; /* spinlock for all clocks */
-       const char *osc_name;
+
+       /*
+        * Real names of cprman clock parents looked up through
+        * of_clk_get_parent_name(), which will be used in the
+        * parent_names[] arrays for clock registration.
+        */
+       const char *real_parent_names[ARRAY_SIZE(cprman_parent_names)];
 
        /* Must be last */
        struct clk_hw_onecell_data onecell;
@@ -317,6 +340,61 @@ static inline u32 cprman_read(struct bcm2835_cprman *cprman, u32 reg)
        return readl(cprman->regs + reg);
 }
 
+/* Does a cycle of measuring a clock through the TCNT clock, which may
+ * source from many other clocks in the system.
+ */
+static unsigned long bcm2835_measure_tcnt_mux(struct bcm2835_cprman *cprman,
+                                             u32 tcnt_mux)
+{
+       u32 osccount = 19200; /* 1ms */
+       u32 count;
+       ktime_t timeout;
+
+       spin_lock(&cprman->regs_lock);
+
+       cprman_write(cprman, CM_TCNTCTL, CM_KILL);
+
+       cprman_write(cprman, CM_TCNTCTL,
+                    (tcnt_mux & CM_SRC_MASK) |
+                    (tcnt_mux >> CM_SRC_BITS) << CM_TCNT_SRC1_SHIFT);
+
+       cprman_write(cprman, CM_OSCCOUNT, osccount);
+
+       /* do a kind delay at the start */
+       mdelay(1);
+
+       /* Finish off whatever is left of OSCCOUNT */
+       timeout = ktime_add_ns(ktime_get(), LOCK_TIMEOUT_NS);
+       while (cprman_read(cprman, CM_OSCCOUNT)) {
+               if (ktime_after(ktime_get(), timeout)) {
+                       dev_err(cprman->dev, "timeout waiting for OSCCOUNT\n");
+                       count = 0;
+                       goto out;
+               }
+               cpu_relax();
+       }
+
+       /* Wait for BUSY to clear. */
+       timeout = ktime_add_ns(ktime_get(), LOCK_TIMEOUT_NS);
+       while (cprman_read(cprman, CM_TCNTCTL) & CM_BUSY) {
+               if (ktime_after(ktime_get(), timeout)) {
+                       dev_err(cprman->dev, "timeout waiting for !BUSY\n");
+                       count = 0;
+                       goto out;
+               }
+               cpu_relax();
+       }
+
+       count = cprman_read(cprman, CM_TCNTCNT);
+
+       cprman_write(cprman, CM_TCNTCTL, 0);
+
+out:
+       spin_unlock(&cprman->regs_lock);
+
+       return count * 1000;
+}
+
 static int bcm2835_debugfs_regset(struct bcm2835_cprman *cprman, u32 base,
                                  struct debugfs_reg32 *regs, size_t nregs,
                                  struct dentry *dentry)
@@ -428,6 +506,7 @@ struct bcm2835_pll_divider_data {
        u32 load_mask;
        u32 hold_mask;
        u32 fixed_divider;
+       u32 flags;
 };
 
 struct bcm2835_clock_data {
@@ -451,6 +530,8 @@ struct bcm2835_clock_data {
 
        bool is_vpu_clock;
        bool is_mash_clock;
+
+       u32 tcnt_mux;
 };
 
 struct bcm2835_gate_data {
@@ -906,6 +987,9 @@ static long bcm2835_clock_rate_from_divisor(struct bcm2835_clock *clock,
        const struct bcm2835_clock_data *data = clock->data;
        u64 temp;
 
+       if (data->int_bits == 0 && data->frac_bits == 0)
+               return parent_rate;
+
        /*
         * The divisor is a 12.12 fixed point field, but only some of
         * the bits are populated in any given clock.
@@ -929,7 +1013,12 @@ static unsigned long bcm2835_clock_get_rate(struct clk_hw *hw,
        struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw);
        struct bcm2835_cprman *cprman = clock->cprman;
        const struct bcm2835_clock_data *data = clock->data;
-       u32 div = cprman_read(cprman, data->div_reg);
+       u32 div;
+
+       if (data->int_bits == 0 && data->frac_bits == 0)
+               return parent_rate;
+
+       div = cprman_read(cprman, data->div_reg);
 
        return bcm2835_clock_rate_from_divisor(clock, parent_rate, div);
 }
@@ -978,6 +1067,17 @@ static int bcm2835_clock_on(struct clk_hw *hw)
                     CM_GATE);
        spin_unlock(&cprman->regs_lock);
 
+       /* Debug code to measure the clock once it's turned on to see
+        * if it's ticking at the rate we expect.
+        */
+       if (data->tcnt_mux && false) {
+               dev_info(cprman->dev,
+                        "clk %s: rate %ld, measure %ld\n",
+                        data->name,
+                        clk_hw_get_rate(hw),
+                        bcm2835_measure_tcnt_mux(cprman, data->tcnt_mux));
+       }
+
        return 0;
 }
 
@@ -1208,7 +1308,7 @@ static struct clk_hw *bcm2835_register_pll(struct bcm2835_cprman *cprman,
        memset(&init, 0, sizeof(init));
 
        /* All of the PLLs derive from the external oscillator. */
-       init.parent_names = &cprman->osc_name;
+       init.parent_names = &cprman->real_parent_names[0];
        init.num_parents = 1;
        init.name = data->name;
        init.ops = &bcm2835_pll_clk_ops;
@@ -1252,7 +1352,7 @@ bcm2835_register_pll_divider(struct bcm2835_cprman *cprman,
        init.num_parents = 1;
        init.name = divider_name;
        init.ops = &bcm2835_pll_divider_clk_ops;
-       init.flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED;
+       init.flags = data->flags | CLK_IGNORE_UNUSED;
 
        divider = devm_kzalloc(cprman->dev, sizeof(*divider), GFP_KERNEL);
        if (!divider)
@@ -1294,18 +1394,22 @@ static struct clk_hw *bcm2835_register_clock(struct bcm2835_cprman *cprman,
        struct bcm2835_clock *clock;
        struct clk_init_data init;
        const char *parents[1 << CM_SRC_BITS];
-       size_t i;
+       size_t i, j;
        int ret;
 
        /*
-        * Replace our "xosc" references with the oscillator's
-        * actual name.
+        * Replace our strings referencing parent clocks with the
+        * actual clock-output-name of the parent.
         */
        for (i = 0; i < data->num_mux_parents; i++) {
-               if (strcmp(data->parents[i], "xosc") == 0)
-                       parents[i] = cprman->osc_name;
-               else
-                       parents[i] = data->parents[i];
+               parents[i] = data->parents[i];
+
+               for (j = 0; j < ARRAY_SIZE(cprman_parent_names); j++) {
+                       if (strcmp(parents[i], cprman_parent_names[j]) == 0) {
+                               parents[i] = cprman->real_parent_names[j];
+                               break;
+                       }
+               }
        }
 
        memset(&init, 0, sizeof(init));
@@ -1431,6 +1535,47 @@ static const char *const bcm2835_clock_vpu_parents[] = {
        .parents = bcm2835_clock_vpu_parents,                           \
        __VA_ARGS__)
 
+/*
+ * DSI parent clocks.  The DSI byte/DDR/DDR2 clocks come from the DSI
+ * analog PHY.  The _inv variants are generated internally to cprman,
+ * but we don't use them so they aren't hooked up.
+ */
+static const char *const bcm2835_clock_dsi0_parents[] = {
+       "gnd",
+       "xosc",
+       "testdebug0",
+       "testdebug1",
+       "dsi0_ddr",
+       "dsi0_ddr_inv",
+       "dsi0_ddr2",
+       "dsi0_ddr2_inv",
+       "dsi0_byte",
+       "dsi0_byte_inv",
+};
+
+static const char *const bcm2835_clock_dsi1_parents[] = {
+       "gnd",
+       "xosc",
+       "testdebug0",
+       "testdebug1",
+       "dsi1_ddr",
+       "dsi1_ddr_inv",
+       "dsi1_ddr2",
+       "dsi1_ddr2_inv",
+       "dsi1_byte",
+       "dsi1_byte_inv",
+};
+
+#define REGISTER_DSI0_CLK(...) REGISTER_CLK(                           \
+       .num_mux_parents = ARRAY_SIZE(bcm2835_clock_dsi0_parents),      \
+       .parents = bcm2835_clock_dsi0_parents,                          \
+       __VA_ARGS__)
+
+#define REGISTER_DSI1_CLK(...) REGISTER_CLK(                           \
+       .num_mux_parents = ARRAY_SIZE(bcm2835_clock_dsi1_parents),      \
+       .parents = bcm2835_clock_dsi1_parents,                          \
+       __VA_ARGS__)
+
 /*
  * the real definition of all the pll, pll_dividers and clocks
  * these make use of the above REGISTER_* macros
@@ -1466,7 +1611,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLA_CORE,
                .load_mask = CM_PLLA_LOADCORE,
                .hold_mask = CM_PLLA_HOLDCORE,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLA_PER]      = REGISTER_PLL_DIV(
                .name = "plla_per",
                .source_pll = "plla",
@@ -1474,7 +1620,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLA_PER,
                .load_mask = CM_PLLA_LOADPER,
                .hold_mask = CM_PLLA_HOLDPER,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLA_DSI0]     = REGISTER_PLL_DIV(
                .name = "plla_dsi0",
                .source_pll = "plla",
@@ -1490,7 +1637,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLA_CCP2,
                .load_mask = CM_PLLA_LOADCCP2,
                .hold_mask = CM_PLLA_HOLDCCP2,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
 
        /* PLLB is used for the ARM's clock. */
        [BCM2835_PLLB]          = REGISTER_PLL(
@@ -1514,7 +1662,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLB_ARM,
                .load_mask = CM_PLLB_LOADARM,
                .hold_mask = CM_PLLB_HOLDARM,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
 
        /*
         * PLLC is the core PLL, used to drive the core VPU clock.
@@ -1543,7 +1692,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLC_CORE0,
                .load_mask = CM_PLLC_LOADCORE0,
                .hold_mask = CM_PLLC_HOLDCORE0,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLC_CORE1]    = REGISTER_PLL_DIV(
                .name = "pllc_core1",
                .source_pll = "pllc",
@@ -1551,7 +1701,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLC_CORE1,
                .load_mask = CM_PLLC_LOADCORE1,
                .hold_mask = CM_PLLC_HOLDCORE1,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLC_CORE2]    = REGISTER_PLL_DIV(
                .name = "pllc_core2",
                .source_pll = "pllc",
@@ -1559,7 +1710,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLC_CORE2,
                .load_mask = CM_PLLC_LOADCORE2,
                .hold_mask = CM_PLLC_HOLDCORE2,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLC_PER]      = REGISTER_PLL_DIV(
                .name = "pllc_per",
                .source_pll = "pllc",
@@ -1567,7 +1719,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLC_PER,
                .load_mask = CM_PLLC_LOADPER,
                .hold_mask = CM_PLLC_HOLDPER,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
 
        /*
         * PLLD is the display PLL, used to drive DSI display panels.
@@ -1596,7 +1749,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLD_CORE,
                .load_mask = CM_PLLD_LOADCORE,
                .hold_mask = CM_PLLD_HOLDCORE,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLD_PER]      = REGISTER_PLL_DIV(
                .name = "plld_per",
                .source_pll = "plld",
@@ -1604,7 +1758,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLD_PER,
                .load_mask = CM_PLLD_LOADPER,
                .hold_mask = CM_PLLD_HOLDPER,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLD_DSI0]     = REGISTER_PLL_DIV(
                .name = "plld_dsi0",
                .source_pll = "plld",
@@ -1649,7 +1804,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLH_RCAL,
                .load_mask = CM_PLLH_LOADRCAL,
                .hold_mask = 0,
-               .fixed_divider = 10),
+               .fixed_divider = 10,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLH_AUX]      = REGISTER_PLL_DIV(
                .name = "pllh_aux",
                .source_pll = "pllh",
@@ -1657,7 +1813,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLH_AUX,
                .load_mask = CM_PLLH_LOADAUX,
                .hold_mask = 0,
-               .fixed_divider = 1),
+               .fixed_divider = 1,
+               .flags = CLK_SET_RATE_PARENT),
        [BCM2835_PLLH_PIX]      = REGISTER_PLL_DIV(
                .name = "pllh_pix",
                .source_pll = "pllh",
@@ -1665,7 +1822,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .a2w_reg = A2W_PLLH_PIX,
                .load_mask = CM_PLLH_LOADPIX,
                .hold_mask = 0,
-               .fixed_divider = 10),
+               .fixed_divider = 10,
+               .flags = CLK_SET_RATE_PARENT),
 
        /* the clocks */
 
@@ -1677,7 +1835,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .ctl_reg = CM_OTPCTL,
                .div_reg = CM_OTPDIV,
                .int_bits = 4,
-               .frac_bits = 0),
+               .frac_bits = 0,
+               .tcnt_mux = 6),
        /*
         * Used for a 1Mhz clock for the system clocksource, and also used
         * bythe watchdog timer and the camera pulse generator.
@@ -1711,13 +1870,15 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .ctl_reg = CM_H264CTL,
                .div_reg = CM_H264DIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 1),
        [BCM2835_CLOCK_ISP]     = REGISTER_VPU_CLK(
                .name = "isp",
                .ctl_reg = CM_ISPCTL,
                .div_reg = CM_ISPDIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 2),
 
        /*
         * Secondary SDRAM clock.  Used for low-voltage modes when the PLL
@@ -1728,13 +1889,15 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .ctl_reg = CM_SDCCTL,
                .div_reg = CM_SDCDIV,
                .int_bits = 6,
-               .frac_bits = 0),
+               .frac_bits = 0,
+               .tcnt_mux = 3),
        [BCM2835_CLOCK_V3D]     = REGISTER_VPU_CLK(
                .name = "v3d",
                .ctl_reg = CM_V3DCTL,
                .div_reg = CM_V3DDIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 4),
        /*
         * VPU clock.  This doesn't have an enable bit, since it drives
         * the bus for everything else, and is special so it doesn't need
@@ -1748,7 +1911,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .int_bits = 12,
                .frac_bits = 8,
                .flags = CLK_IS_CRITICAL,
-               .is_vpu_clock = true),
+               .is_vpu_clock = true,
+               .tcnt_mux = 5),
 
        /* clocks with per parent mux */
        [BCM2835_CLOCK_AVEO]    = REGISTER_PER_CLK(
@@ -1756,19 +1920,22 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .ctl_reg = CM_AVEOCTL,
                .div_reg = CM_AVEODIV,
                .int_bits = 4,
-               .frac_bits = 0),
+               .frac_bits = 0,
+               .tcnt_mux = 38),
        [BCM2835_CLOCK_CAM0]    = REGISTER_PER_CLK(
                .name = "cam0",
                .ctl_reg = CM_CAM0CTL,
                .div_reg = CM_CAM0DIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 14),
        [BCM2835_CLOCK_CAM1]    = REGISTER_PER_CLK(
                .name = "cam1",
                .ctl_reg = CM_CAM1CTL,
                .div_reg = CM_CAM1DIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 15),
        [BCM2835_CLOCK_DFT]     = REGISTER_PER_CLK(
                .name = "dft",
                .ctl_reg = CM_DFTCTL,
@@ -1780,7 +1947,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .ctl_reg = CM_DPICTL,
                .div_reg = CM_DPIDIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 17),
 
        /* Arasan EMMC clock */
        [BCM2835_CLOCK_EMMC]    = REGISTER_PER_CLK(
@@ -1788,7 +1956,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .ctl_reg = CM_EMMCCTL,
                .div_reg = CM_EMMCDIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 39),
 
        /* General purpose (GPIO) clocks */
        [BCM2835_CLOCK_GP0]     = REGISTER_PER_CLK(
@@ -1797,7 +1966,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .div_reg = CM_GP0DIV,
                .int_bits = 12,
                .frac_bits = 12,
-               .is_mash_clock = true),
+               .is_mash_clock = true,
+               .tcnt_mux = 20),
        [BCM2835_CLOCK_GP1]     = REGISTER_PER_CLK(
                .name = "gp1",
                .ctl_reg = CM_GP1CTL,
@@ -1805,7 +1975,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .int_bits = 12,
                .frac_bits = 12,
                .flags = CLK_IS_CRITICAL,
-               .is_mash_clock = true),
+               .is_mash_clock = true,
+               .tcnt_mux = 21),
        [BCM2835_CLOCK_GP2]     = REGISTER_PER_CLK(
                .name = "gp2",
                .ctl_reg = CM_GP2CTL,
@@ -1820,40 +1991,46 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .ctl_reg = CM_HSMCTL,
                .div_reg = CM_HSMDIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 22),
        [BCM2835_CLOCK_PCM]     = REGISTER_PER_CLK(
                .name = "pcm",
                .ctl_reg = CM_PCMCTL,
                .div_reg = CM_PCMDIV,
                .int_bits = 12,
                .frac_bits = 12,
-               .is_mash_clock = true),
+               .is_mash_clock = true,
+               .tcnt_mux = 23),
        [BCM2835_CLOCK_PWM]     = REGISTER_PER_CLK(
                .name = "pwm",
                .ctl_reg = CM_PWMCTL,
                .div_reg = CM_PWMDIV,
                .int_bits = 12,
                .frac_bits = 12,
-               .is_mash_clock = true),
+               .is_mash_clock = true,
+               .tcnt_mux = 24),
        [BCM2835_CLOCK_SLIM]    = REGISTER_PER_CLK(
                .name = "slim",
                .ctl_reg = CM_SLIMCTL,
                .div_reg = CM_SLIMDIV,
                .int_bits = 12,
                .frac_bits = 12,
-               .is_mash_clock = true),
+               .is_mash_clock = true,
+               .tcnt_mux = 25),
        [BCM2835_CLOCK_SMI]     = REGISTER_PER_CLK(
                .name = "smi",
                .ctl_reg = CM_SMICTL,
                .div_reg = CM_SMIDIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 27),
        [BCM2835_CLOCK_UART]    = REGISTER_PER_CLK(
                .name = "uart",
                .ctl_reg = CM_UARTCTL,
                .div_reg = CM_UARTDIV,
                .int_bits = 10,
-               .frac_bits = 12),
+               .frac_bits = 12,
+               .tcnt_mux = 28),
 
        /* TV encoder clock.  Only operating frequency is 108Mhz.  */
        [BCM2835_CLOCK_VEC]     = REGISTER_PER_CLK(
@@ -1866,7 +2043,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                 * Allow rate change propagation only on PLLH_AUX which is
                 * assigned index 7 in the parent array.
                 */
-               .set_rate_parent = BIT(7)),
+               .set_rate_parent = BIT(7),
+               .tcnt_mux = 29),
 
        /* dsi clocks */
        [BCM2835_CLOCK_DSI0E]   = REGISTER_PER_CLK(
@@ -1874,13 +2052,29 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
                .ctl_reg = CM_DSI0ECTL,
                .div_reg = CM_DSI0EDIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 18),
        [BCM2835_CLOCK_DSI1E]   = REGISTER_PER_CLK(
                .name = "dsi1e",
                .ctl_reg = CM_DSI1ECTL,
                .div_reg = CM_DSI1EDIV,
                .int_bits = 4,
-               .frac_bits = 8),
+               .frac_bits = 8,
+               .tcnt_mux = 19),
+       [BCM2835_CLOCK_DSI0P]   = REGISTER_DSI0_CLK(
+               .name = "dsi0p",
+               .ctl_reg = CM_DSI0PCTL,
+               .div_reg = CM_DSI0PDIV,
+               .int_bits = 0,
+               .frac_bits = 0,
+               .tcnt_mux = 12),
+       [BCM2835_CLOCK_DSI1P]   = REGISTER_DSI1_CLK(
+               .name = "dsi1p",
+               .ctl_reg = CM_DSI1PCTL,
+               .div_reg = CM_DSI1PDIV,
+               .int_bits = 0,
+               .frac_bits = 0,
+               .tcnt_mux = 13),
 
        /* the gates */
 
@@ -1939,8 +2133,19 @@ static int bcm2835_clk_probe(struct platform_device *pdev)
        if (IS_ERR(cprman->regs))
                return PTR_ERR(cprman->regs);
 
-       cprman->osc_name = of_clk_get_parent_name(dev->of_node, 0);
-       if (!cprman->osc_name)
+       memcpy(cprman->real_parent_names, cprman_parent_names,
+              sizeof(cprman_parent_names));
+       of_clk_parent_fill(dev->of_node, cprman->real_parent_names,
+                          ARRAY_SIZE(cprman_parent_names));
+
+       /*
+        * Make sure the external oscillator has been registered.
+        *
+        * The other (DSI) clocks are not present on older device
+        * trees, which we still need to support for backwards
+        * compatibility.
+        */
+       if (!cprman->real_parent_names[0])
                return -ENODEV;
 
        platform_set_drvdata(pdev, cprman);
index f793b2d9238cea8df7d135e3a2d4fcbe20a37da4..c933be01c7db66f999331b9d187c1125682b127e 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Driver for TI Dual PLL CDCE925 clock synthesizer
+ * Driver for TI Multi PLL CDCE913/925/937/949 clock synthesizer
  *
- * This driver always connects the Y1 to the input clock, Y2/Y3 to PLL1
- * and Y4/Y5 to PLL2. PLL frequency is set on a first-come-first-serve
+ * This driver always connects the Y1 to the input clock, Y2/Y3 to PLL1,
+ * Y4/Y5 to PLL2, and so on. PLL frequency is set on a first-come-first-serve
  * basis. Clients can directly request any frequency that the chip can
  * deliver using the standard clk framework. In addition, the device can
  * be configured and activated via the devicetree.
 #include <linux/slab.h>
 #include <linux/gcd.h>
 
-/* The chip has 2 PLLs which can be routed through dividers to 5 outputs.
+/* Each chip has different number of PLLs and outputs, for example:
+ * The CECE925 has 2 PLLs which can be routed through dividers to 5 outputs.
  * Model this as 2 PLL clocks which are parents to the outputs.
  */
-#define NUMBER_OF_PLLS 2
-#define NUMBER_OF_OUTPUTS      5
+
+enum {
+       CDCE913,
+       CDCE925,
+       CDCE937,
+       CDCE949,
+};
+
+struct clk_cdce925_chip_info {
+       int num_plls;
+       int num_outputs;
+};
+
+static const struct clk_cdce925_chip_info clk_cdce925_chip_info_tbl[] = {
+       [CDCE913] = { .num_plls = 1, .num_outputs = 3 },
+       [CDCE925] = { .num_plls = 2, .num_outputs = 5 },
+       [CDCE937] = { .num_plls = 3, .num_outputs = 7 },
+       [CDCE949] = { .num_plls = 4, .num_outputs = 9 },
+};
+
+#define MAX_NUMBER_OF_PLLS     4
+#define MAX_NUMBER_OF_OUTPUTS  9
 
 #define CDCE925_REG_GLOBAL1    0x01
 #define CDCE925_REG_Y1SPIPDIVH 0x02
@@ -43,7 +64,7 @@ struct clk_cdce925_output {
        struct clk_hw hw;
        struct clk_cdce925_chip *chip;
        u8 index;
-       u16 pdiv; /* 1..127 for Y2-Y5; 1..1023 for Y1 */
+       u16 pdiv; /* 1..127 for Y2-Y9; 1..1023 for Y1 */
 };
 #define to_clk_cdce925_output(_hw) \
        container_of(_hw, struct clk_cdce925_output, hw)
@@ -60,8 +81,9 @@ struct clk_cdce925_pll {
 struct clk_cdce925_chip {
        struct regmap *regmap;
        struct i2c_client *i2c_client;
-       struct clk_cdce925_pll pll[NUMBER_OF_PLLS];
-       struct clk_cdce925_output clk[NUMBER_OF_OUTPUTS];
+       const struct clk_cdce925_chip_info *chip_info;
+       struct clk_cdce925_pll pll[MAX_NUMBER_OF_PLLS];
+       struct clk_cdce925_output clk[MAX_NUMBER_OF_OUTPUTS];
 };
 
 /* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */
@@ -284,6 +306,18 @@ static void cdce925_clk_set_pdiv(struct clk_cdce925_output *data, u16 pdiv)
        case 4:
                regmap_update_bits(data->chip->regmap, 0x27, 0x7F, pdiv);
                break;
+       case 5:
+               regmap_update_bits(data->chip->regmap, 0x36, 0x7F, pdiv);
+               break;
+       case 6:
+               regmap_update_bits(data->chip->regmap, 0x37, 0x7F, pdiv);
+               break;
+       case 7:
+               regmap_update_bits(data->chip->regmap, 0x46, 0x7F, pdiv);
+               break;
+       case 8:
+               regmap_update_bits(data->chip->regmap, 0x47, 0x7F, pdiv);
+               break;
        }
 }
 
@@ -302,6 +336,14 @@ static void cdce925_clk_activate(struct clk_cdce925_output *data)
        case 4:
                regmap_update_bits(data->chip->regmap, 0x24, 0x03, 0x03);
                break;
+       case 5:
+       case 6:
+               regmap_update_bits(data->chip->regmap, 0x34, 0x03, 0x03);
+               break;
+       case 7:
+       case 8:
+               regmap_update_bits(data->chip->regmap, 0x44, 0x03, 0x03);
+               break;
        }
 }
 
@@ -474,15 +516,6 @@ static const struct clk_ops cdce925_clk_y1_ops = {
        .set_rate = cdce925_clk_y1_set_rate,
 };
 
-
-static struct regmap_config cdce925_regmap_config = {
-       .name = "configuration0",
-       .reg_bits = 8,
-       .val_bits = 8,
-       .cache_type = REGCACHE_RBTREE,
-       .max_register = 0x2F,
-};
-
 #define CDCE925_I2C_COMMAND_BLOCK_TRANSFER     0x00
 #define CDCE925_I2C_COMMAND_BYTE_TRANSFER      0x80
 
@@ -582,13 +615,19 @@ static int cdce925_probe(struct i2c_client *client,
        struct clk_cdce925_chip *data;
        struct device_node *node = client->dev.of_node;
        const char *parent_name;
-       const char *pll_clk_name[NUMBER_OF_PLLS] = {NULL,};
+       const char *pll_clk_name[MAX_NUMBER_OF_PLLS] = {NULL,};
        struct clk_init_data init;
        u32 value;
        int i;
        int err;
        struct device_node *np_output;
        char child_name[6];
+       struct regmap_config config = {
+               .name = "configuration0",
+               .reg_bits = 8,
+               .val_bits = 8,
+               .cache_type = REGCACHE_RBTREE,
+       };
 
        dev_dbg(&client->dev, "%s\n", __func__);
        data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
@@ -596,8 +635,11 @@ static int cdce925_probe(struct i2c_client *client,
                return -ENOMEM;
 
        data->i2c_client = client;
+       data->chip_info = &clk_cdce925_chip_info_tbl[id->driver_data];
+       config.max_register = CDCE925_OFFSET_PLL +
+               data->chip_info->num_plls * 0x10 - 1;
        data->regmap = devm_regmap_init(&client->dev, &regmap_cdce925_bus,
-                       &client->dev, &cdce925_regmap_config);
+                       &client->dev, &config);
        if (IS_ERR(data->regmap)) {
                dev_err(&client->dev, "failed to allocate register map\n");
                return PTR_ERR(data->regmap);
@@ -626,7 +668,7 @@ static int cdce925_probe(struct i2c_client *client,
        init.num_parents = parent_name ? 1 : 0;
 
        /* Register PLL clocks */
-       for (i = 0; i < NUMBER_OF_PLLS; ++i) {
+       for (i = 0; i < data->chip_info->num_plls; ++i) {
                pll_clk_name[i] = kasprintf(GFP_KERNEL, "%s.pll%d",
                        client->dev.of_node->name, i);
                init.name = pll_clk_name[i];
@@ -684,7 +726,7 @@ static int cdce925_probe(struct i2c_client *client,
        init.ops = &cdce925_clk_ops;
        init.flags = CLK_SET_RATE_PARENT;
        init.num_parents = 1;
-       for (i = 1; i < NUMBER_OF_OUTPUTS; ++i) {
+       for (i = 1; i < data->chip_info->num_outputs; ++i) {
                init.name = kasprintf(GFP_KERNEL, "%s.Y%d",
                        client->dev.of_node->name, i+1);
                data->clk[i].chip = data;
@@ -702,6 +744,16 @@ static int cdce925_probe(struct i2c_client *client,
                        /* Mux Y4/5 to PLL2 */
                        init.parent_names = &pll_clk_name[1];
                        break;
+               case 5:
+               case 6:
+                       /* Mux Y6/7 to PLL3 */
+                       init.parent_names = &pll_clk_name[2];
+                       break;
+               case 7:
+               case 8:
+                       /* Mux Y8/9 to PLL4 */
+                       init.parent_names = &pll_clk_name[3];
+                       break;
                }
                err = devm_clk_hw_register(&client->dev, &data->clk[i].hw);
                kfree(init.name); /* clock framework made a copy of the name */
@@ -720,7 +772,7 @@ static int cdce925_probe(struct i2c_client *client,
        err = 0;
 
 error:
-       for (i = 0; i < NUMBER_OF_PLLS; ++i)
+       for (i = 0; i < data->chip_info->num_plls; ++i)
                /* clock framework made a copy of the name */
                kfree(pll_clk_name[i]);
 
@@ -728,13 +780,19 @@ error:
 }
 
 static const struct i2c_device_id cdce925_id[] = {
-       { "cdce925", 0 },
+       { "cdce913", CDCE913 },
+       { "cdce925", CDCE925 },
+       { "cdce937", CDCE937 },
+       { "cdce949", CDCE949 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, cdce925_id);
 
 static const struct of_device_id clk_cdce925_of_match[] = {
+       { .compatible = "ti,cdce913" },
        { .compatible = "ti,cdce925" },
+       { .compatible = "ti,cdce937" },
+       { .compatible = "ti,cdce949" },
        { },
 };
 MODULE_DEVICE_TABLE(of, clk_cdce925_of_match);
@@ -750,5 +808,5 @@ static struct i2c_driver cdce925_driver = {
 module_i2c_driver(cdce925_driver);
 
 MODULE_AUTHOR("Mike Looijmans <mike.looijmans@topic.nl>");
-MODULE_DESCRIPTION("cdce925 driver");
+MODULE_DESCRIPTION("TI CDCE913/925/937/949 driver");
 MODULE_LICENSE("GPL");
index 674785d968a3e1c507637ebbeb3865e17fcb5def..e0e02a6e59009e7ac56ae85ddaecc8624568e5b3 100644 (file)
@@ -40,8 +40,9 @@ static int __set_clk_parents(struct device_node *node, bool clk_supplier)
                        return 0;
                pclk = of_clk_get_from_provider(&clkspec);
                if (IS_ERR(pclk)) {
-                       pr_warn("clk: couldn't get parent clock %d for %s\n",
-                               index, node->full_name);
+                       if (PTR_ERR(pclk) != -EPROBE_DEFER)
+                               pr_warn("clk: couldn't get parent clock %d for %s\n",
+                                       index, node->full_name);
                        return PTR_ERR(pclk);
                }
 
@@ -55,8 +56,9 @@ static int __set_clk_parents(struct device_node *node, bool clk_supplier)
                }
                clk = of_clk_get_from_provider(&clkspec);
                if (IS_ERR(clk)) {
-                       pr_warn("clk: couldn't get assigned clock %d for %s\n",
-                               index, node->full_name);
+                       if (PTR_ERR(clk) != -EPROBE_DEFER)
+                               pr_warn("clk: couldn't get assigned clock %d for %s\n",
+                                       index, node->full_name);
                        rc = PTR_ERR(clk);
                        goto err;
                }
@@ -99,8 +101,9 @@ static int __set_clk_rates(struct device_node *node, bool clk_supplier)
 
                        clk = of_clk_get_from_provider(&clkspec);
                        if (IS_ERR(clk)) {
-                               pr_warn("clk: couldn't get clock %d for %s\n",
-                                       index, node->full_name);
+                               if (PTR_ERR(clk) != -EPROBE_DEFER)
+                                       pr_warn("clk: couldn't get clock %d for %s\n",
+                                               index, node->full_name);
                                return PTR_ERR(clk);
                        }
 
index 021f3daf34e140edef6b678fd99d2de0f5eee3b6..3fca0526d940af68fc2a9b550b5951014f2a7dcc 100644 (file)
@@ -59,6 +59,10 @@ struct cs2000_priv {
        struct i2c_client *client;
        struct clk *clk_in;
        struct clk *ref_clk;
+
+       /* suspend/resume */
+       unsigned long saved_rate;
+       unsigned long saved_parent_rate;
 };
 
 static const struct of_device_id cs2000_of_match[] = {
@@ -286,6 +290,9 @@ static int __cs2000_set_rate(struct cs2000_priv *priv, int ch,
        if (ret < 0)
                return ret;
 
+       priv->saved_rate        = rate;
+       priv->saved_parent_rate = parent_rate;
+
        return 0;
 }
 
@@ -489,9 +496,24 @@ probe_err:
        return ret;
 }
 
+static int cs2000_resume(struct device *dev)
+{
+       struct cs2000_priv *priv = dev_get_drvdata(dev);
+       int ch = 0; /* it uses ch0 only at this point */
+
+       return __cs2000_set_rate(priv, ch,
+                                priv->saved_rate,
+                                priv->saved_parent_rate);
+}
+
+static const struct dev_pm_ops cs2000_pm_ops = {
+       .resume_early   = cs2000_resume,
+};
+
 static struct i2c_driver cs2000_driver = {
        .driver = {
                .name = "cs2000-cp",
+               .pm     = &cs2000_pm_ops,
                .of_match_table = cs2000_of_match,
        },
        .probe          = cs2000_probe,
index 2a3e9d8e88b0ed0151b513f452cf5bd2ecbcf0f5..96d37175d0ad59f47abd218db271f558c5921a4d 100644 (file)
@@ -290,13 +290,15 @@ static int scpi_clocks_probe(struct platform_device *pdev)
                        of_node_put(child);
                        return ret;
                }
-       }
-       /* Add the virtual cpufreq device */
-       cpufreq_dev = platform_device_register_simple("scpi-cpufreq",
-                                                     -1, NULL, 0);
-       if (IS_ERR(cpufreq_dev))
-               pr_warn("unable to register cpufreq device");
 
+               if (match->data != &scpi_dvfs_ops)
+                       continue;
+               /* Add the virtual cpufreq device if it's DVFS clock provider */
+               cpufreq_dev = platform_device_register_simple("scpi-cpufreq",
+                                                             -1, NULL, 0);
+               if (IS_ERR(cpufreq_dev))
+                       pr_warn("unable to register cpufreq device");
+       }
        return 0;
 }
 
index fc585f370549a336ea12cc5c323dbc77aec69825..ab609a76706f7bb0258ce47dda61366a0602d5cc 100644 (file)
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
 
+/*
+ * Include list of clocks wich are not derived from system clock (SYSCLOCK)
+ * The index of these clocks is the secondary index of DT bindings
+ *
+ */
+#include <dt-bindings/clock/stm32fx-clock.h>
+
+#define STM32F4_RCC_CR                 0x00
 #define STM32F4_RCC_PLLCFGR            0x04
 #define STM32F4_RCC_CFGR               0x08
 #define STM32F4_RCC_AHB1ENR            0x30
 #define STM32F4_RCC_APB2ENR            0x44
 #define STM32F4_RCC_BDCR               0x70
 #define STM32F4_RCC_CSR                        0x74
+#define STM32F4_RCC_PLLI2SCFGR         0x84
+#define STM32F4_RCC_PLLSAICFGR         0x88
+#define STM32F4_RCC_DCKCFGR            0x8c
+#define STM32F7_RCC_DCKCFGR2           0x90
+
+#define NONE -1
+#define NO_IDX  NONE
+#define NO_MUX  NONE
+#define NO_GATE NONE
 
 struct stm32f4_gate_data {
        u8      offset;
@@ -195,7 +212,7 @@ static const struct stm32f4_gate_data stm32f469_gates[] __initconst = {
        { STM32F4_RCC_APB2ENR,  8,      "adc1",         "apb2_div" },
        { STM32F4_RCC_APB2ENR,  9,      "adc2",         "apb2_div" },
        { STM32F4_RCC_APB2ENR, 10,      "adc3",         "apb2_div" },
-       { STM32F4_RCC_APB2ENR, 11,      "sdio",         "pll48" },
+       { STM32F4_RCC_APB2ENR, 11,      "sdio",         "sdmux" },
        { STM32F4_RCC_APB2ENR, 12,      "spi1",         "apb2_div" },
        { STM32F4_RCC_APB2ENR, 13,      "spi4",         "apb2_div" },
        { STM32F4_RCC_APB2ENR, 14,      "syscfg",       "apb2_div" },
@@ -208,7 +225,79 @@ static const struct stm32f4_gate_data stm32f469_gates[] __initconst = {
        { STM32F4_RCC_APB2ENR, 26,      "ltdc",         "apb2_div" },
 };
 
-enum { SYSTICK, FCLK, CLK_LSI, CLK_LSE, CLK_HSE_RTC, CLK_RTC, END_PRIMARY_CLK };
+static const struct stm32f4_gate_data stm32f746_gates[] __initconst = {
+       { STM32F4_RCC_AHB1ENR,  0,      "gpioa",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  1,      "gpiob",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  2,      "gpioc",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  3,      "gpiod",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  4,      "gpioe",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  5,      "gpiof",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  6,      "gpiog",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  7,      "gpioh",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  8,      "gpioi",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR,  9,      "gpioj",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 10,      "gpiok",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 12,      "crc",          "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 18,      "bkpsra",       "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 20,      "dtcmram",      "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 21,      "dma1",         "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 22,      "dma2",         "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 23,      "dma2d",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 25,      "ethmac",       "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 26,      "ethmactx",     "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 27,      "ethmacrx",     "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 28,      "ethmacptp",    "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 29,      "otghs",        "ahb_div" },
+       { STM32F4_RCC_AHB1ENR, 30,      "otghsulpi",    "ahb_div" },
+
+       { STM32F4_RCC_AHB2ENR,  0,      "dcmi",         "ahb_div" },
+       { STM32F4_RCC_AHB2ENR,  4,      "cryp",         "ahb_div" },
+       { STM32F4_RCC_AHB2ENR,  5,      "hash",         "ahb_div" },
+       { STM32F4_RCC_AHB2ENR,  6,      "rng",          "pll48"   },
+       { STM32F4_RCC_AHB2ENR,  7,      "otgfs",        "pll48"   },
+
+       { STM32F4_RCC_AHB3ENR,  0,      "fmc",          "ahb_div",
+               CLK_IGNORE_UNUSED },
+       { STM32F4_RCC_AHB3ENR,  1,      "qspi",         "ahb_div",
+               CLK_IGNORE_UNUSED },
+
+       { STM32F4_RCC_APB1ENR,  0,      "tim2",         "apb1_mul" },
+       { STM32F4_RCC_APB1ENR,  1,      "tim3",         "apb1_mul" },
+       { STM32F4_RCC_APB1ENR,  2,      "tim4",         "apb1_mul" },
+       { STM32F4_RCC_APB1ENR,  3,      "tim5",         "apb1_mul" },
+       { STM32F4_RCC_APB1ENR,  4,      "tim6",         "apb1_mul" },
+       { STM32F4_RCC_APB1ENR,  5,      "tim7",         "apb1_mul" },
+       { STM32F4_RCC_APB1ENR,  6,      "tim12",        "apb1_mul" },
+       { STM32F4_RCC_APB1ENR,  7,      "tim13",        "apb1_mul" },
+       { STM32F4_RCC_APB1ENR,  8,      "tim14",        "apb1_mul" },
+       { STM32F4_RCC_APB1ENR, 11,      "wwdg",         "apb1_div" },
+       { STM32F4_RCC_APB1ENR, 14,      "spi2",         "apb1_div" },
+       { STM32F4_RCC_APB1ENR, 15,      "spi3",         "apb1_div" },
+       { STM32F4_RCC_APB1ENR, 16,      "spdifrx",      "apb1_div" },
+       { STM32F4_RCC_APB1ENR, 25,      "can1",         "apb1_div" },
+       { STM32F4_RCC_APB1ENR, 26,      "can2",         "apb1_div" },
+       { STM32F4_RCC_APB1ENR, 27,      "cec",          "apb1_div" },
+       { STM32F4_RCC_APB1ENR, 28,      "pwr",          "apb1_div" },
+       { STM32F4_RCC_APB1ENR, 29,      "dac",          "apb1_div" },
+
+       { STM32F4_RCC_APB2ENR,  0,      "tim1",         "apb2_mul" },
+       { STM32F4_RCC_APB2ENR,  1,      "tim8",         "apb2_mul" },
+       { STM32F4_RCC_APB2ENR,  8,      "adc1",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR,  9,      "adc2",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 10,      "adc3",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 11,      "sdmmc",        "sdmux"    },
+       { STM32F4_RCC_APB2ENR, 12,      "spi1",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 13,      "spi4",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 14,      "syscfg",       "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 16,      "tim9",         "apb2_mul" },
+       { STM32F4_RCC_APB2ENR, 17,      "tim10",        "apb2_mul" },
+       { STM32F4_RCC_APB2ENR, 18,      "tim11",        "apb2_mul" },
+       { STM32F4_RCC_APB2ENR, 20,      "spi5",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 21,      "spi6",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 22,      "sai1",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 23,      "sai2",         "apb2_div" },
+       { STM32F4_RCC_APB2ENR, 26,      "ltdc",         "apb2_div" },
+};
 
 /*
  * This bitmask tells us which bit offsets (0..192) on STM32F4[23]xxx
@@ -224,6 +313,10 @@ static const u64 stm32f46xx_gate_map[MAX_GATE_MAP] = { 0x000000f17ef417ffull,
                                                       0x0000000000000003ull,
                                                       0x0c777f33f6fec9ffull };
 
+static const u64 stm32f746_gate_map[MAX_GATE_MAP] = { 0x000000f17ef417ffull,
+                                                     0x0000000000000003ull,
+                                                     0x04f77f033e01c9ffull };
+
 static const u64 *stm32f4_gate_map;
 
 static struct clk_hw **clks;
@@ -233,6 +326,8 @@ static void __iomem *base;
 
 static struct regmap *pdrm;
 
+static int stm32fx_end_primary_clk;
+
 /*
  * "Multiplier" device for APBx clocks.
  *
@@ -324,23 +419,342 @@ static struct clk *clk_register_apb_mul(struct device *dev, const char *name,
        return clk;
 }
 
-/*
- * Decode current PLL state and (statically) model the state we inherit from
- * the bootloader.
- */
-static void stm32f4_rcc_register_pll(const char *hse_clk, const char *hsi_clk)
+enum {
+       PLL,
+       PLL_I2S,
+       PLL_SAI,
+};
+
+static const struct clk_div_table pll_divp_table[] = {
+       { 0, 2 }, { 1, 4 }, { 2, 6 }, { 3, 8 }, { 0 }
+};
+
+static const struct clk_div_table pll_divr_table[] = {
+       { 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 }, { 0 }
+};
+
+struct stm32f4_pll {
+       spinlock_t *lock;
+       struct  clk_gate gate;
+       u8 offset;
+       u8 bit_rdy_idx;
+       u8 status;
+       u8 n_start;
+};
+
+#define to_stm32f4_pll(_gate) container_of(_gate, struct stm32f4_pll, gate)
+
+struct stm32f4_pll_post_div_data {
+       int idx;
+       u8 pll_num;
+       const char *name;
+       const char *parent;
+       u8 flag;
+       u8 offset;
+       u8 shift;
+       u8 width;
+       u8 flag_div;
+       const struct clk_div_table *div_table;
+};
+
+struct stm32f4_vco_data {
+       const char *vco_name;
+       u8 offset;
+       u8 bit_idx;
+       u8 bit_rdy_idx;
+};
+
+static const struct stm32f4_vco_data  vco_data[] = {
+       { "vco",     STM32F4_RCC_PLLCFGR,    24, 25 },
+       { "vco-i2s", STM32F4_RCC_PLLI2SCFGR, 26, 27 },
+       { "vco-sai", STM32F4_RCC_PLLSAICFGR, 28, 29 },
+};
+
+
+static const struct clk_div_table post_divr_table[] = {
+       { 0, 2 }, { 1, 4 }, { 2, 8 }, { 3, 16 }, { 0 }
+};
+
+#define MAX_POST_DIV 3
+static const struct stm32f4_pll_post_div_data  post_div_data[MAX_POST_DIV] = {
+       { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q",
+               CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL},
+
+       { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q",
+               CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL },
+
+       { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
+               STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table },
+};
+
+struct stm32f4_div_data {
+       u8 shift;
+       u8 width;
+       u8 flag_div;
+       const struct clk_div_table *div_table;
+};
+
+#define MAX_PLL_DIV 3
+static const struct stm32f4_div_data  div_data[MAX_PLL_DIV] = {
+       { 16, 2, 0,                     pll_divp_table  },
+       { 24, 4, CLK_DIVIDER_ONE_BASED, NULL            },
+       { 28, 3, 0,                     pll_divr_table  },
+};
+
+struct stm32f4_pll_data {
+       u8 pll_num;
+       u8 n_start;
+       const char *div_name[MAX_PLL_DIV];
+};
+
+static const struct stm32f4_pll_data stm32f429_pll[MAX_PLL_DIV] = {
+       { PLL,     192, { "pll", "pll48",    NULL       } },
+       { PLL_I2S, 192, { NULL,  "plli2s-q", "plli2s-r" } },
+       { PLL_SAI,  49, { NULL,  "pllsai-q", "pllsai-r" } },
+};
+
+static const struct stm32f4_pll_data stm32f469_pll[MAX_PLL_DIV] = {
+       { PLL,     50, { "pll",      "pll-q",    NULL       } },
+       { PLL_I2S, 50, { "plli2s-p", "plli2s-q", "plli2s-r" } },
+       { PLL_SAI, 50, { "pllsai-p", "pllsai-q", "pllsai-r" } },
+};
+
+static int stm32f4_pll_is_enabled(struct clk_hw *hw)
+{
+       return clk_gate_ops.is_enabled(hw);
+}
+
+static int stm32f4_pll_enable(struct clk_hw *hw)
+{
+       struct clk_gate *gate = to_clk_gate(hw);
+       struct stm32f4_pll *pll = to_stm32f4_pll(gate);
+       int ret = 0;
+       unsigned long reg;
+
+       ret = clk_gate_ops.enable(hw);
+
+       ret = readl_relaxed_poll_timeout_atomic(base + STM32F4_RCC_CR, reg,
+                       reg & (1 << pll->bit_rdy_idx), 0, 10000);
+
+       return ret;
+}
+
+static void stm32f4_pll_disable(struct clk_hw *hw)
+{
+       clk_gate_ops.disable(hw);
+}
+
+static unsigned long stm32f4_pll_recalc(struct clk_hw *hw,
+               unsigned long parent_rate)
+{
+       struct clk_gate *gate = to_clk_gate(hw);
+       struct stm32f4_pll *pll = to_stm32f4_pll(gate);
+       unsigned long n;
+
+       n = (readl(base + pll->offset) >> 6) & 0x1ff;
+
+       return parent_rate * n;
+}
+
+static long stm32f4_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+               unsigned long *prate)
+{
+       struct clk_gate *gate = to_clk_gate(hw);
+       struct stm32f4_pll *pll = to_stm32f4_pll(gate);
+       unsigned long n;
+
+       n = rate / *prate;
+
+       if (n < pll->n_start)
+               n = pll->n_start;
+       else if (n > 432)
+               n = 432;
+
+       return *prate * n;
+}
+
+static int stm32f4_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+                               unsigned long parent_rate)
+{
+       struct clk_gate *gate = to_clk_gate(hw);
+       struct stm32f4_pll *pll = to_stm32f4_pll(gate);
+
+       unsigned long n;
+       unsigned long val;
+       int pll_state;
+
+       pll_state = stm32f4_pll_is_enabled(hw);
+
+       if (pll_state)
+               stm32f4_pll_disable(hw);
+
+       n = rate  / parent_rate;
+
+       val = readl(base + pll->offset) & ~(0x1ff << 6);
+
+       writel(val | ((n & 0x1ff) <<  6), base + pll->offset);
+
+       if (pll_state)
+               stm32f4_pll_enable(hw);
+
+       return 0;
+}
+
+static const struct clk_ops stm32f4_pll_gate_ops = {
+       .enable         = stm32f4_pll_enable,
+       .disable        = stm32f4_pll_disable,
+       .is_enabled     = stm32f4_pll_is_enabled,
+       .recalc_rate    = stm32f4_pll_recalc,
+       .round_rate     = stm32f4_pll_round_rate,
+       .set_rate       = stm32f4_pll_set_rate,
+};
+
+struct stm32f4_pll_div {
+       struct clk_divider div;
+       struct clk_hw *hw_pll;
+};
+
+#define to_pll_div_clk(_div) container_of(_div, struct stm32f4_pll_div, div)
+
+static unsigned long stm32f4_pll_div_recalc_rate(struct clk_hw *hw,
+               unsigned long parent_rate)
+{
+       return clk_divider_ops.recalc_rate(hw, parent_rate);
+}
+
+static long stm32f4_pll_div_round_rate(struct clk_hw *hw, unsigned long rate,
+                               unsigned long *prate)
+{
+       return clk_divider_ops.round_rate(hw, rate, prate);
+}
+
+static int stm32f4_pll_div_set_rate(struct clk_hw *hw, unsigned long rate,
+                               unsigned long parent_rate)
 {
-       unsigned long pllcfgr = readl(base + STM32F4_RCC_PLLCFGR);
+       int pll_state, ret;
+
+       struct clk_divider *div = to_clk_divider(hw);
+       struct stm32f4_pll_div *pll_div = to_pll_div_clk(div);
+
+       pll_state = stm32f4_pll_is_enabled(pll_div->hw_pll);
+
+       if (pll_state)
+               stm32f4_pll_disable(pll_div->hw_pll);
+
+       ret = clk_divider_ops.set_rate(hw, rate, parent_rate);
 
-       unsigned long pllm   = pllcfgr & 0x3f;
-       unsigned long plln   = (pllcfgr >> 6) & 0x1ff;
-       unsigned long pllp   = BIT(((pllcfgr >> 16) & 3) + 1);
-       const char   *pllsrc = pllcfgr & BIT(22) ? hse_clk : hsi_clk;
-       unsigned long pllq   = (pllcfgr >> 24) & 0xf;
+       if (pll_state)
+               stm32f4_pll_enable(pll_div->hw_pll);
 
-       clk_register_fixed_factor(NULL, "vco", pllsrc, 0, plln, pllm);
-       clk_register_fixed_factor(NULL, "pll", "vco", 0, 1, pllp);
-       clk_register_fixed_factor(NULL, "pll48", "vco", 0, 1, pllq);
+       return ret;
+}
+
+static const struct clk_ops stm32f4_pll_div_ops = {
+       .recalc_rate = stm32f4_pll_div_recalc_rate,
+       .round_rate = stm32f4_pll_div_round_rate,
+       .set_rate = stm32f4_pll_div_set_rate,
+};
+
+static struct clk_hw *clk_register_pll_div(const char *name,
+               const char *parent_name, unsigned long flags,
+               void __iomem *reg, u8 shift, u8 width,
+               u8 clk_divider_flags, const struct clk_div_table *table,
+               struct clk_hw *pll_hw, spinlock_t *lock)
+{
+       struct stm32f4_pll_div *pll_div;
+       struct clk_hw *hw;
+       struct clk_init_data init;
+       int ret;
+
+       /* allocate the divider */
+       pll_div = kzalloc(sizeof(*pll_div), GFP_KERNEL);
+       if (!pll_div)
+               return ERR_PTR(-ENOMEM);
+
+       init.name = name;
+       init.ops = &stm32f4_pll_div_ops;
+       init.flags = flags;
+       init.parent_names = (parent_name ? &parent_name : NULL);
+       init.num_parents = (parent_name ? 1 : 0);
+
+       /* struct clk_divider assignments */
+       pll_div->div.reg = reg;
+       pll_div->div.shift = shift;
+       pll_div->div.width = width;
+       pll_div->div.flags = clk_divider_flags;
+       pll_div->div.lock = lock;
+       pll_div->div.table = table;
+       pll_div->div.hw.init = &init;
+
+       pll_div->hw_pll = pll_hw;
+
+       /* register the clock */
+       hw = &pll_div->div.hw;
+       ret = clk_hw_register(NULL, hw);
+       if (ret) {
+               kfree(pll_div);
+               hw = ERR_PTR(ret);
+       }
+
+       return hw;
+}
+
+static struct clk_hw *stm32f4_rcc_register_pll(const char *pllsrc,
+               const struct stm32f4_pll_data *data,  spinlock_t *lock)
+{
+       struct stm32f4_pll *pll;
+       struct clk_init_data init = { NULL };
+       void __iomem *reg;
+       struct clk_hw *pll_hw;
+       int ret;
+       int i;
+       const struct stm32f4_vco_data *vco;
+
+
+       pll = kzalloc(sizeof(*pll), GFP_KERNEL);
+       if (!pll)
+               return ERR_PTR(-ENOMEM);
+
+       vco = &vco_data[data->pll_num];
+
+       init.name = vco->vco_name;
+       init.ops = &stm32f4_pll_gate_ops;
+       init.flags = CLK_SET_RATE_GATE;
+       init.parent_names = &pllsrc;
+       init.num_parents = 1;
+
+       pll->gate.lock = lock;
+       pll->gate.reg = base + STM32F4_RCC_CR;
+       pll->gate.bit_idx = vco->bit_idx;
+       pll->gate.hw.init = &init;
+
+       pll->offset = vco->offset;
+       pll->n_start = data->n_start;
+       pll->bit_rdy_idx = vco->bit_rdy_idx;
+       pll->status = (readl(base + STM32F4_RCC_CR) >> vco->bit_idx) & 0x1;
+
+       reg = base + pll->offset;
+
+       pll_hw = &pll->gate.hw;
+       ret = clk_hw_register(NULL, pll_hw);
+       if (ret) {
+               kfree(pll);
+               return ERR_PTR(ret);
+       }
+
+       for (i = 0; i < MAX_PLL_DIV; i++)
+               if (data->div_name[i])
+                       clk_register_pll_div(data->div_name[i],
+                                       vco->vco_name,
+                                       0,
+                                       reg,
+                                       div_data[i].shift,
+                                       div_data[i].width,
+                                       div_data[i].flag_div,
+                                       div_data[i].div_table,
+                                       pll_hw,
+                                       lock);
+       return pll_hw;
 }
 
 /*
@@ -352,7 +766,7 @@ static int stm32f4_rcc_lookup_clk_idx(u8 primary, u8 secondary)
        u64 table[MAX_GATE_MAP];
 
        if (primary == 1) {
-               if (WARN_ON(secondary >= END_PRIMARY_CLK))
+               if (WARN_ON(secondary >= stm32fx_end_primary_clk))
                        return -EINVAL;
                return secondary;
        }
@@ -369,7 +783,7 @@ static int stm32f4_rcc_lookup_clk_idx(u8 primary, u8 secondary)
        table[BIT_ULL_WORD(secondary)] &=
            GENMASK_ULL(secondary % BITS_PER_LONG_LONG, 0);
 
-       return END_PRIMARY_CLK - 1 + hweight64(table[0]) +
+       return stm32fx_end_primary_clk - 1 + hweight64(table[0]) +
               (BIT_ULL_WORD(secondary) >= 1 ? hweight64(table[1]) : 0) +
               (BIT_ULL_WORD(secondary) >= 2 ? hweight64(table[2]) : 0);
 }
@@ -611,22 +1025,291 @@ static const char *rtc_parents[4] = {
        "no-clock", "lse", "lsi", "hse-rtc"
 };
 
+static const char *lcd_parent[1] = { "pllsai-r-div" };
+
+static const char *i2s_parents[2] = { "plli2s-r", NULL };
+
+static const char *sai_parents[4] = { "pllsai-q-div", "plli2s-q-div", NULL,
+       "no-clock" };
+
+static const char *pll48_parents[2] = { "pll-q", "pllsai-p" };
+
+static const char *sdmux_parents[2] = { "pll48", "sys" };
+
+static const char *hdmi_parents[2] = { "lse", "hsi_div488" };
+
+static const char *spdif_parent[1] = { "plli2s-p" };
+
+static const char *lptim_parent[4] = { "apb1_mul", "lsi", "hsi", "lse" };
+
+static const char *uart_parents1[4] = { "apb2_div", "sys", "hsi", "lse" };
+static const char *uart_parents2[4] = { "apb1_div", "sys", "hsi", "lse" };
+
+static const char *i2c_parents[4] = { "apb1_div", "sys", "hsi", "no-clock" };
+
+struct stm32_aux_clk {
+       int idx;
+       const char *name;
+       const char * const *parent_names;
+       int num_parents;
+       int offset_mux;
+       u8 shift;
+       u8 mask;
+       int offset_gate;
+       u8 bit_idx;
+       unsigned long flags;
+};
+
 struct stm32f4_clk_data {
        const struct stm32f4_gate_data *gates_data;
        const u64 *gates_map;
        int gates_num;
+       const struct stm32f4_pll_data *pll_data;
+       const struct stm32_aux_clk *aux_clk;
+       int aux_clk_num;
+       int end_primary;
+};
+
+static const struct stm32_aux_clk stm32f429_aux_clk[] = {
+       {
+               CLK_LCD, "lcd-tft", lcd_parent, ARRAY_SIZE(lcd_parent),
+               NO_MUX, 0, 0,
+               STM32F4_RCC_APB2ENR, 26,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_I2S, "i2s", i2s_parents, ARRAY_SIZE(i2s_parents),
+               STM32F4_RCC_CFGR, 23, 1,
+               NO_GATE, 0,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_SAI1, "sai1-a", sai_parents, ARRAY_SIZE(sai_parents),
+               STM32F4_RCC_DCKCFGR, 20, 3,
+               STM32F4_RCC_APB2ENR, 22,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_SAI2, "sai1-b", sai_parents, ARRAY_SIZE(sai_parents),
+               STM32F4_RCC_DCKCFGR, 22, 3,
+               STM32F4_RCC_APB2ENR, 22,
+               CLK_SET_RATE_PARENT
+       },
+};
+
+static const struct stm32_aux_clk stm32f469_aux_clk[] = {
+       {
+               CLK_LCD, "lcd-tft", lcd_parent, ARRAY_SIZE(lcd_parent),
+               NO_MUX, 0, 0,
+               STM32F4_RCC_APB2ENR, 26,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_I2S, "i2s", i2s_parents, ARRAY_SIZE(i2s_parents),
+               STM32F4_RCC_CFGR, 23, 1,
+               NO_GATE, 0,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_SAI1, "sai1-a", sai_parents, ARRAY_SIZE(sai_parents),
+               STM32F4_RCC_DCKCFGR, 20, 3,
+               STM32F4_RCC_APB2ENR, 22,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_SAI2, "sai1-b", sai_parents, ARRAY_SIZE(sai_parents),
+               STM32F4_RCC_DCKCFGR, 22, 3,
+               STM32F4_RCC_APB2ENR, 22,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               NO_IDX, "pll48", pll48_parents, ARRAY_SIZE(pll48_parents),
+               STM32F4_RCC_DCKCFGR, 27, 1,
+               NO_GATE, 0,
+               0
+       },
+       {
+               NO_IDX, "sdmux", sdmux_parents, ARRAY_SIZE(sdmux_parents),
+               STM32F4_RCC_DCKCFGR, 28, 1,
+               NO_GATE, 0,
+               0
+       },
+};
+
+static const struct stm32_aux_clk stm32f746_aux_clk[] = {
+       {
+               CLK_LCD, "lcd-tft", lcd_parent, ARRAY_SIZE(lcd_parent),
+               NO_MUX, 0, 0,
+               STM32F4_RCC_APB2ENR, 26,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_I2S, "i2s", i2s_parents, ARRAY_SIZE(i2s_parents),
+               STM32F4_RCC_CFGR, 23, 1,
+               NO_GATE, 0,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_SAI1, "sai1_clk", sai_parents, ARRAY_SIZE(sai_parents),
+               STM32F4_RCC_DCKCFGR, 20, 3,
+               STM32F4_RCC_APB2ENR, 22,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_SAI2, "sai2_clk", sai_parents, ARRAY_SIZE(sai_parents),
+               STM32F4_RCC_DCKCFGR, 22, 3,
+               STM32F4_RCC_APB2ENR, 23,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               NO_IDX, "pll48", pll48_parents, ARRAY_SIZE(pll48_parents),
+               STM32F7_RCC_DCKCFGR2, 27, 1,
+               NO_GATE, 0,
+               0
+       },
+       {
+               NO_IDX, "sdmux", sdmux_parents, ARRAY_SIZE(sdmux_parents),
+               STM32F7_RCC_DCKCFGR2, 28, 1,
+               NO_GATE, 0,
+               0
+       },
+       {
+               CLK_HDMI_CEC, "hdmi-cec",
+               hdmi_parents, ARRAY_SIZE(hdmi_parents),
+               STM32F7_RCC_DCKCFGR2, 26, 1,
+               NO_GATE, 0,
+               0
+       },
+       {
+               CLK_SPDIF, "spdif-rx",
+               spdif_parent, ARRAY_SIZE(spdif_parent),
+               STM32F7_RCC_DCKCFGR2, 22, 3,
+               STM32F4_RCC_APB2ENR, 23,
+               CLK_SET_RATE_PARENT
+       },
+       {
+               CLK_USART1, "usart1",
+               uart_parents1, ARRAY_SIZE(uart_parents1),
+               STM32F7_RCC_DCKCFGR2, 0, 3,
+               STM32F4_RCC_APB2ENR, 4,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_USART2, "usart2",
+               uart_parents2, ARRAY_SIZE(uart_parents1),
+               STM32F7_RCC_DCKCFGR2, 2, 3,
+               STM32F4_RCC_APB1ENR, 17,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_USART3, "usart3",
+               uart_parents2, ARRAY_SIZE(uart_parents1),
+               STM32F7_RCC_DCKCFGR2, 4, 3,
+               STM32F4_RCC_APB1ENR, 18,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_UART4, "uart4",
+               uart_parents2, ARRAY_SIZE(uart_parents1),
+               STM32F7_RCC_DCKCFGR2, 6, 3,
+               STM32F4_RCC_APB1ENR, 19,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_UART5, "uart5",
+               uart_parents2, ARRAY_SIZE(uart_parents1),
+               STM32F7_RCC_DCKCFGR2, 8, 3,
+               STM32F4_RCC_APB1ENR, 20,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_USART6, "usart6",
+               uart_parents1, ARRAY_SIZE(uart_parents1),
+               STM32F7_RCC_DCKCFGR2, 10, 3,
+               STM32F4_RCC_APB2ENR, 5,
+               CLK_SET_RATE_PARENT,
+       },
+
+       {
+               CLK_UART7, "uart7",
+               uart_parents2, ARRAY_SIZE(uart_parents1),
+               STM32F7_RCC_DCKCFGR2, 12, 3,
+               STM32F4_RCC_APB1ENR, 30,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_UART8, "uart8",
+               uart_parents2, ARRAY_SIZE(uart_parents1),
+               STM32F7_RCC_DCKCFGR2, 14, 3,
+               STM32F4_RCC_APB1ENR, 31,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_I2C1, "i2c1",
+               i2c_parents, ARRAY_SIZE(i2c_parents),
+               STM32F7_RCC_DCKCFGR2, 16, 3,
+               STM32F4_RCC_APB1ENR, 21,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_I2C2, "i2c2",
+               i2c_parents, ARRAY_SIZE(i2c_parents),
+               STM32F7_RCC_DCKCFGR2, 18, 3,
+               STM32F4_RCC_APB1ENR, 22,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_I2C3, "i2c3",
+               i2c_parents, ARRAY_SIZE(i2c_parents),
+               STM32F7_RCC_DCKCFGR2, 20, 3,
+               STM32F4_RCC_APB1ENR, 23,
+               CLK_SET_RATE_PARENT,
+       },
+       {
+               CLK_I2C4, "i2c4",
+               i2c_parents, ARRAY_SIZE(i2c_parents),
+               STM32F7_RCC_DCKCFGR2, 22, 3,
+               STM32F4_RCC_APB1ENR, 24,
+               CLK_SET_RATE_PARENT,
+       },
+
+       {
+               CLK_LPTIMER, "lptim1",
+               lptim_parent, ARRAY_SIZE(lptim_parent),
+               STM32F7_RCC_DCKCFGR2, 24, 3,
+               STM32F4_RCC_APB1ENR, 9,
+               CLK_SET_RATE_PARENT
+       },
 };
 
 static const struct stm32f4_clk_data stm32f429_clk_data = {
+       .end_primary    = END_PRIMARY_CLK,
        .gates_data     = stm32f429_gates,
        .gates_map      = stm32f42xx_gate_map,
        .gates_num      = ARRAY_SIZE(stm32f429_gates),
+       .pll_data       = stm32f429_pll,
+       .aux_clk        = stm32f429_aux_clk,
+       .aux_clk_num    = ARRAY_SIZE(stm32f429_aux_clk),
 };
 
 static const struct stm32f4_clk_data stm32f469_clk_data = {
+       .end_primary    = END_PRIMARY_CLK,
        .gates_data     = stm32f469_gates,
        .gates_map      = stm32f46xx_gate_map,
        .gates_num      = ARRAY_SIZE(stm32f469_gates),
+       .pll_data       = stm32f469_pll,
+       .aux_clk        = stm32f469_aux_clk,
+       .aux_clk_num    = ARRAY_SIZE(stm32f469_aux_clk),
+};
+
+static const struct stm32f4_clk_data stm32f746_clk_data = {
+       .end_primary    = END_PRIMARY_CLK_F7,
+       .gates_data     = stm32f746_gates,
+       .gates_map      = stm32f746_gate_map,
+       .gates_num      = ARRAY_SIZE(stm32f746_gates),
+       .pll_data       = stm32f469_pll,
+       .aux_clk        = stm32f746_aux_clk,
+       .aux_clk_num    = ARRAY_SIZE(stm32f746_aux_clk),
 };
 
 static const struct of_device_id stm32f4_of_match[] = {
@@ -638,15 +1321,84 @@ static const struct of_device_id stm32f4_of_match[] = {
                .compatible = "st,stm32f469-rcc",
                .data = &stm32f469_clk_data
        },
+       {
+               .compatible = "st,stm32f746-rcc",
+               .data = &stm32f746_clk_data
+       },
        {}
 };
 
+static struct clk_hw *stm32_register_aux_clk(const char *name,
+               const char * const *parent_names, int num_parents,
+               int offset_mux, u8 shift, u8 mask,
+               int offset_gate, u8 bit_idx,
+               unsigned long flags, spinlock_t *lock)
+{
+       struct clk_hw *hw;
+       struct clk_gate *gate = NULL;
+       struct clk_mux *mux = NULL;
+       struct clk_hw *mux_hw = NULL, *gate_hw = NULL;
+       const struct clk_ops *mux_ops = NULL, *gate_ops = NULL;
+
+       if (offset_gate != NO_GATE) {
+               gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+               if (!gate) {
+                       hw = ERR_PTR(-EINVAL);
+                       goto fail;
+               }
+
+               gate->reg = base + offset_gate;
+               gate->bit_idx = bit_idx;
+               gate->flags = 0;
+               gate->lock = lock;
+               gate_hw = &gate->hw;
+               gate_ops = &clk_gate_ops;
+       }
+
+       if (offset_mux != NO_MUX) {
+               mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+               if (!mux) {
+                       hw = ERR_PTR(-EINVAL);
+                       goto fail;
+               }
+
+               mux->reg = base + offset_mux;
+               mux->shift = shift;
+               mux->mask = mask;
+               mux->flags = 0;
+               mux_hw = &mux->hw;
+               mux_ops = &clk_mux_ops;
+       }
+
+       if (mux_hw == NULL && gate_hw == NULL) {
+               hw = ERR_PTR(-EINVAL);
+               goto fail;
+       }
+
+       hw = clk_hw_register_composite(NULL, name, parent_names, num_parents,
+                       mux_hw, mux_ops,
+                       NULL, NULL,
+                       gate_hw, gate_ops,
+                       flags);
+
+fail:
+       if (IS_ERR(hw)) {
+               kfree(gate);
+               kfree(mux);
+       }
+
+       return hw;
+}
+
 static void __init stm32f4_rcc_init(struct device_node *np)
 {
-       const char *hse_clk;
+       const char *hse_clk, *i2s_in_clk;
        int n;
        const struct of_device_id *match;
        const struct stm32f4_clk_data *data;
+       unsigned long pllcfgr;
+       const char *pllsrc;
+       unsigned long pllm;
 
        base = of_iomap(np, 0);
        if (!base) {
@@ -666,7 +1418,9 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 
        data = match->data;
 
-       clks = kmalloc_array(data->gates_num + END_PRIMARY_CLK,
+       stm32fx_end_primary_clk = data->end_primary;
+
+       clks = kmalloc_array(data->gates_num + stm32fx_end_primary_clk,
                        sizeof(*clks), GFP_KERNEL);
        if (!clks)
                goto fail;
@@ -675,12 +1429,54 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 
        hse_clk = of_clk_get_parent_name(np, 0);
 
-       clk_register_fixed_rate_with_accuracy(NULL, "hsi", NULL, 0,
-                       16000000, 160000);
-       stm32f4_rcc_register_pll(hse_clk, "hsi");
+       i2s_in_clk = of_clk_get_parent_name(np, 1);
+
+       i2s_parents[1] = i2s_in_clk;
+       sai_parents[2] = i2s_in_clk;
+
+       clks[CLK_HSI] = clk_hw_register_fixed_rate_with_accuracy(NULL, "hsi",
+                       NULL, 0, 16000000, 160000);
+
+       pllcfgr = readl(base + STM32F4_RCC_PLLCFGR);
+       pllsrc = pllcfgr & BIT(22) ? hse_clk : "hsi";
+       pllm = pllcfgr & 0x3f;
+
+       clk_hw_register_fixed_factor(NULL, "vco_in", pllsrc,
+                                              0, 1, pllm);
+
+       stm32f4_rcc_register_pll("vco_in", &data->pll_data[0],
+                       &stm32f4_clk_lock);
+
+       clks[PLL_VCO_I2S] = stm32f4_rcc_register_pll("vco_in",
+                       &data->pll_data[1], &stm32f4_clk_lock);
+
+       clks[PLL_VCO_SAI] = stm32f4_rcc_register_pll("vco_in",
+                       &data->pll_data[2], &stm32f4_clk_lock);
+
+       for (n = 0; n < MAX_POST_DIV; n++) {
+               const struct stm32f4_pll_post_div_data *post_div;
+               struct clk_hw *hw;
+
+               post_div = &post_div_data[n];
+
+               hw = clk_register_pll_div(post_div->name,
+                               post_div->parent,
+                               post_div->flag,
+                               base + post_div->offset,
+                               post_div->shift,
+                               post_div->width,
+                               post_div->flag_div,
+                               post_div->div_table,
+                               clks[post_div->pll_num],
+                               &stm32f4_clk_lock);
+
+               if (post_div->idx != NO_IDX)
+                       clks[post_div->idx] = hw;
+       }
 
        sys_parents[1] = hse_clk;
-       clk_register_mux_table(
+
+       clks[CLK_SYSCLK] = clk_hw_register_mux_table(
            NULL, "sys", sys_parents, ARRAY_SIZE(sys_parents), 0,
            base + STM32F4_RCC_CFGR, 0, 3, 0, NULL, &stm32f4_clk_lock);
 
@@ -762,6 +1558,33 @@ static void __init stm32f4_rcc_init(struct device_node *np)
                goto fail;
        }
 
+       for (n = 0; n < data->aux_clk_num; n++) {
+               const struct stm32_aux_clk *aux_clk;
+               struct clk_hw *hw;
+
+               aux_clk = &data->aux_clk[n];
+
+               hw = stm32_register_aux_clk(aux_clk->name,
+                               aux_clk->parent_names, aux_clk->num_parents,
+                               aux_clk->offset_mux, aux_clk->shift,
+                               aux_clk->mask, aux_clk->offset_gate,
+                               aux_clk->bit_idx, aux_clk->flags,
+                               &stm32f4_clk_lock);
+
+               if (IS_ERR(hw)) {
+                       pr_warn("Unable to register %s clk\n", aux_clk->name);
+                       continue;
+               }
+
+               if (aux_clk->idx != NO_IDX)
+                       clks[aux_clk->idx] = hw;
+       }
+
+       if (of_device_is_compatible(np, "st,stm32f746-rcc"))
+
+               clk_hw_register_fixed_factor(NULL, "hsi_div488", "hsi", 0,
+                               1, 488);
+
        of_clk_add_hw_provider(np, stm32f4_rcc_lookup_clk, NULL);
        return;
 fail:
@@ -770,3 +1593,4 @@ fail:
 }
 CLK_OF_DECLARE_DRIVER(stm32f42xx_rcc, "st,stm32f42xx-rcc", stm32f4_rcc_init);
 CLK_OF_DECLARE_DRIVER(stm32f46xx_rcc, "st,stm32f469-rcc", stm32f4_rcc_init);
+CLK_OF_DECLARE_DRIVER(stm32f746_rcc, "st,stm32f746-rcc", stm32f4_rcc_init);
diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c
new file mode 100644 (file)
index 0000000..56741f3
--- /dev/null
@@ -0,0 +1,791 @@
+/*
+ * Driver for IDT Versaclock 5
+ *
+ * Copyright (C) 2017 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Possible optimizations:
+ * - Use spread spectrum
+ * - Use integer divider in FOD if applicable
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/rational.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+/* VersaClock5 registers */
+#define VC5_OTP_CONTROL                                0x00
+
+/* Factory-reserved register block */
+#define VC5_RSVD_DEVICE_ID                     0x01
+#define VC5_RSVD_ADC_GAIN_7_0                  0x02
+#define VC5_RSVD_ADC_GAIN_15_8                 0x03
+#define VC5_RSVD_ADC_OFFSET_7_0                        0x04
+#define VC5_RSVD_ADC_OFFSET_15_8               0x05
+#define VC5_RSVD_TEMPY                         0x06
+#define VC5_RSVD_OFFSET_TBIN                   0x07
+#define VC5_RSVD_GAIN                          0x08
+#define VC5_RSVD_TEST_NP                       0x09
+#define VC5_RSVD_UNUSED                                0x0a
+#define VC5_RSVD_BANDGAP_TRIM_UP               0x0b
+#define VC5_RSVD_BANDGAP_TRIM_DN               0x0c
+#define VC5_RSVD_CLK_R_12_CLK_AMP_4            0x0d
+#define VC5_RSVD_CLK_R_34_CLK_AMP_4            0x0e
+#define VC5_RSVD_CLK_AMP_123                   0x0f
+
+/* Configuration register block */
+#define VC5_PRIM_SRC_SHDN                      0x10
+#define VC5_PRIM_SRC_SHDN_EN_XTAL              BIT(7)
+#define VC5_PRIM_SRC_SHDN_EN_CLKIN             BIT(6)
+#define VC5_PRIM_SRC_SHDN_SP                   BIT(1)
+#define VC5_PRIM_SRC_SHDN_EN_GBL_SHDN          BIT(0)
+
+#define VC5_VCO_BAND                           0x11
+#define VC5_XTAL_X1_LOAD_CAP                   0x12
+#define VC5_XTAL_X2_LOAD_CAP                   0x13
+#define VC5_REF_DIVIDER                                0x15
+#define VC5_REF_DIVIDER_SEL_PREDIV2            BIT(7)
+#define VC5_REF_DIVIDER_REF_DIV(n)             ((n) & 0x3f)
+
+#define VC5_VCO_CTRL_AND_PREDIV                        0x16
+#define VC5_VCO_CTRL_AND_PREDIV_BYPASS_PREDIV  BIT(7)
+
+#define VC5_FEEDBACK_INT_DIV                   0x17
+#define VC5_FEEDBACK_INT_DIV_BITS              0x18
+#define VC5_FEEDBACK_FRAC_DIV(n)               (0x19 + (n))
+#define VC5_RC_CONTROL0                                0x1e
+#define VC5_RC_CONTROL1                                0x1f
+/* Register 0x20 is factory reserved */
+
+/* Output divider control for divider 1,2,3,4 */
+#define VC5_OUT_DIV_CONTROL(idx)       (0x21 + ((idx) * 0x10))
+#define VC5_OUT_DIV_CONTROL_RESET      BIT(7)
+#define VC5_OUT_DIV_CONTROL_SELB_NORM  BIT(3)
+#define VC5_OUT_DIV_CONTROL_SEL_EXT    BIT(2)
+#define VC5_OUT_DIV_CONTROL_INT_MODE   BIT(1)
+#define VC5_OUT_DIV_CONTROL_EN_FOD     BIT(0)
+
+#define VC5_OUT_DIV_FRAC(idx, n)       (0x22 + ((idx) * 0x10) + (n))
+#define VC5_OUT_DIV_FRAC4_OD_SCEE      BIT(1)
+
+#define VC5_OUT_DIV_STEP_SPREAD(idx, n)        (0x26 + ((idx) * 0x10) + (n))
+#define VC5_OUT_DIV_SPREAD_MOD(idx, n) (0x29 + ((idx) * 0x10) + (n))
+#define VC5_OUT_DIV_SKEW_INT(idx, n)   (0x2b + ((idx) * 0x10) + (n))
+#define VC5_OUT_DIV_INT(idx, n)                (0x2d + ((idx) * 0x10) + (n))
+#define VC5_OUT_DIV_SKEW_FRAC(idx)     (0x2f + ((idx) * 0x10))
+/* Registers 0x30, 0x40, 0x50 are factory reserved */
+
+/* Clock control register for clock 1,2 */
+#define VC5_CLK_OUTPUT_CFG(idx, n)     (0x60 + ((idx) * 0x2) + (n))
+#define VC5_CLK_OUTPUT_CFG1_EN_CLKBUF  BIT(0)
+
+#define VC5_CLK_OE_SHDN                                0x68
+#define VC5_CLK_OS_SHDN                                0x69
+
+#define VC5_GLOBAL_REGISTER                    0x76
+#define VC5_GLOBAL_REGISTER_GLOBAL_RESET       BIT(5)
+
+/* PLL/VCO runs between 2.5 GHz and 3.0 GHz */
+#define VC5_PLL_VCO_MIN                                2500000000UL
+#define VC5_PLL_VCO_MAX                                3000000000UL
+
+/* VC5 Input mux settings */
+#define VC5_MUX_IN_XIN         BIT(0)
+#define VC5_MUX_IN_CLKIN       BIT(1)
+
+/* Supported IDT VC5 models. */
+enum vc5_model {
+       IDT_VC5_5P49V5923,
+       IDT_VC5_5P49V5933,
+};
+
+struct vc5_driver_data;
+
+struct vc5_hw_data {
+       struct clk_hw           hw;
+       struct vc5_driver_data  *vc5;
+       u32                     div_int;
+       u32                     div_frc;
+       unsigned int            num;
+};
+
+struct vc5_driver_data {
+       struct i2c_client       *client;
+       struct regmap           *regmap;
+       enum vc5_model          model;
+
+       struct clk              *pin_xin;
+       struct clk              *pin_clkin;
+       unsigned char           clk_mux_ins;
+       struct clk_hw           clk_mux;
+       struct vc5_hw_data      clk_pll;
+       struct vc5_hw_data      clk_fod[2];
+       struct vc5_hw_data      clk_out[3];
+};
+
+static const char * const vc5_mux_names[] = {
+       "mux"
+};
+
+static const char * const vc5_pll_names[] = {
+       "pll"
+};
+
+static const char * const vc5_fod_names[] = {
+       "fod0", "fod1", "fod2", "fod3",
+};
+
+static const char * const vc5_clk_out_names[] = {
+       "out0_sel_i2cb", "out1", "out2", "out3", "out4",
+};
+
+/*
+ * VersaClock5 i2c regmap
+ */
+static bool vc5_regmap_is_writeable(struct device *dev, unsigned int reg)
+{
+       /* Factory reserved regs, make them read-only */
+       if (reg <= 0xf)
+               return false;
+
+       /* Factory reserved regs, make them read-only */
+       if (reg == 0x14 || reg == 0x1c || reg == 0x1d)
+               return false;
+
+       return true;
+}
+
+static const struct regmap_config vc5_regmap_config = {
+       .reg_bits = 8,
+       .val_bits = 8,
+       .cache_type = REGCACHE_RBTREE,
+       .max_register = 0x76,
+       .writeable_reg = vc5_regmap_is_writeable,
+};
+
+/*
+ * VersaClock5 input multiplexer between XTAL and CLKIN divider
+ */
+static unsigned char vc5_mux_get_parent(struct clk_hw *hw)
+{
+       struct vc5_driver_data *vc5 =
+               container_of(hw, struct vc5_driver_data, clk_mux);
+       const u8 mask = VC5_PRIM_SRC_SHDN_EN_XTAL | VC5_PRIM_SRC_SHDN_EN_CLKIN;
+       unsigned int src;
+
+       regmap_read(vc5->regmap, VC5_PRIM_SRC_SHDN, &src);
+       src &= mask;
+
+       if (src == VC5_PRIM_SRC_SHDN_EN_XTAL)
+               return 0;
+
+       if (src == VC5_PRIM_SRC_SHDN_EN_CLKIN)
+               return 1;
+
+       dev_warn(&vc5->client->dev,
+                "Invalid clock input configuration (%02x)\n", src);
+       return 0;
+}
+
+static int vc5_mux_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct vc5_driver_data *vc5 =
+               container_of(hw, struct vc5_driver_data, clk_mux);
+       const u8 mask = VC5_PRIM_SRC_SHDN_EN_XTAL | VC5_PRIM_SRC_SHDN_EN_CLKIN;
+       u8 src;
+
+       if ((index > 1) || !vc5->clk_mux_ins)
+               return -EINVAL;
+
+       if (vc5->clk_mux_ins == (VC5_MUX_IN_CLKIN | VC5_MUX_IN_XIN)) {
+               if (index == 0)
+                       src = VC5_PRIM_SRC_SHDN_EN_XTAL;
+               if (index == 1)
+                       src = VC5_PRIM_SRC_SHDN_EN_CLKIN;
+       } else {
+               if (index != 0)
+                       return -EINVAL;
+
+               if (vc5->clk_mux_ins == VC5_MUX_IN_XIN)
+                       src = VC5_PRIM_SRC_SHDN_EN_XTAL;
+               if (vc5->clk_mux_ins == VC5_MUX_IN_CLKIN)
+                       src = VC5_PRIM_SRC_SHDN_EN_CLKIN;
+       }
+
+       return regmap_update_bits(vc5->regmap, VC5_PRIM_SRC_SHDN, mask, src);
+}
+
+static unsigned long vc5_mux_recalc_rate(struct clk_hw *hw,
+                                        unsigned long parent_rate)
+{
+       struct vc5_driver_data *vc5 =
+               container_of(hw, struct vc5_driver_data, clk_mux);
+       unsigned int prediv, div;
+
+       regmap_read(vc5->regmap, VC5_VCO_CTRL_AND_PREDIV, &prediv);
+
+       /* The bypass_prediv is set, PLL fed from Ref_in directly. */
+       if (prediv & VC5_VCO_CTRL_AND_PREDIV_BYPASS_PREDIV)
+               return parent_rate;
+
+       regmap_read(vc5->regmap, VC5_REF_DIVIDER, &div);
+
+       /* The Sel_prediv2 is set, PLL fed from prediv2 (Ref_in / 2) */
+       if (div & VC5_REF_DIVIDER_SEL_PREDIV2)
+               return parent_rate / 2;
+       else
+               return parent_rate / VC5_REF_DIVIDER_REF_DIV(div);
+}
+
+static long vc5_mux_round_rate(struct clk_hw *hw, unsigned long rate,
+                              unsigned long *parent_rate)
+{
+       unsigned long idiv;
+
+       /* PLL cannot operate with input clock above 50 MHz. */
+       if (rate > 50000000)
+               return -EINVAL;
+
+       /* CLKIN within range of PLL input, feed directly to PLL. */
+       if (*parent_rate <= 50000000)
+               return *parent_rate;
+
+       idiv = DIV_ROUND_UP(*parent_rate, rate);
+       if (idiv > 127)
+               return -EINVAL;
+
+       return *parent_rate / idiv;
+}
+
+static int vc5_mux_set_rate(struct clk_hw *hw, unsigned long rate,
+                           unsigned long parent_rate)
+{
+       struct vc5_driver_data *vc5 =
+               container_of(hw, struct vc5_driver_data, clk_mux);
+       unsigned long idiv;
+       u8 div;
+
+       /* CLKIN within range of PLL input, feed directly to PLL. */
+       if (parent_rate <= 50000000) {
+               regmap_update_bits(vc5->regmap, VC5_VCO_CTRL_AND_PREDIV,
+                                  VC5_VCO_CTRL_AND_PREDIV_BYPASS_PREDIV,
+                                  VC5_VCO_CTRL_AND_PREDIV_BYPASS_PREDIV);
+               regmap_update_bits(vc5->regmap, VC5_REF_DIVIDER, 0xff, 0x00);
+               return 0;
+       }
+
+       idiv = DIV_ROUND_UP(parent_rate, rate);
+
+       /* We have dedicated div-2 predivider. */
+       if (idiv == 2)
+               div = VC5_REF_DIVIDER_SEL_PREDIV2;
+       else
+               div = VC5_REF_DIVIDER_REF_DIV(idiv);
+
+       regmap_update_bits(vc5->regmap, VC5_REF_DIVIDER, 0xff, div);
+       regmap_update_bits(vc5->regmap, VC5_VCO_CTRL_AND_PREDIV,
+                          VC5_VCO_CTRL_AND_PREDIV_BYPASS_PREDIV, 0);
+
+       return 0;
+}
+
+static const struct clk_ops vc5_mux_ops = {
+       .set_parent     = vc5_mux_set_parent,
+       .get_parent     = vc5_mux_get_parent,
+       .recalc_rate    = vc5_mux_recalc_rate,
+       .round_rate     = vc5_mux_round_rate,
+       .set_rate       = vc5_mux_set_rate,
+};
+
+/*
+ * VersaClock5 PLL/VCO
+ */
+static unsigned long vc5_pll_recalc_rate(struct clk_hw *hw,
+                                        unsigned long parent_rate)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       struct vc5_driver_data *vc5 = hwdata->vc5;
+       u32 div_int, div_frc;
+       u8 fb[5];
+
+       regmap_bulk_read(vc5->regmap, VC5_FEEDBACK_INT_DIV, fb, 5);
+
+       div_int = (fb[0] << 4) | (fb[1] >> 4);
+       div_frc = (fb[2] << 16) | (fb[3] << 8) | fb[4];
+
+       /* The PLL divider has 12 integer bits and 24 fractional bits */
+       return (parent_rate * div_int) + ((parent_rate * div_frc) >> 24);
+}
+
+static long vc5_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+                              unsigned long *parent_rate)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       u32 div_int;
+       u64 div_frc;
+
+       if (rate < VC5_PLL_VCO_MIN)
+               rate = VC5_PLL_VCO_MIN;
+       if (rate > VC5_PLL_VCO_MAX)
+               rate = VC5_PLL_VCO_MAX;
+
+       /* Determine integer part, which is 12 bit wide */
+       div_int = rate / *parent_rate;
+       if (div_int > 0xfff)
+               rate = *parent_rate * 0xfff;
+
+       /* Determine best fractional part, which is 24 bit wide */
+       div_frc = rate % *parent_rate;
+       div_frc *= BIT(24) - 1;
+       do_div(div_frc, *parent_rate);
+
+       hwdata->div_int = div_int;
+       hwdata->div_frc = (u32)div_frc;
+
+       return (*parent_rate * div_int) + ((*parent_rate * div_frc) >> 24);
+}
+
+static int vc5_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+                           unsigned long parent_rate)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       struct vc5_driver_data *vc5 = hwdata->vc5;
+       u8 fb[5];
+
+       fb[0] = hwdata->div_int >> 4;
+       fb[1] = hwdata->div_int << 4;
+       fb[2] = hwdata->div_frc >> 16;
+       fb[3] = hwdata->div_frc >> 8;
+       fb[4] = hwdata->div_frc;
+
+       return regmap_bulk_write(vc5->regmap, VC5_FEEDBACK_INT_DIV, fb, 5);
+}
+
+static const struct clk_ops vc5_pll_ops = {
+       .recalc_rate    = vc5_pll_recalc_rate,
+       .round_rate     = vc5_pll_round_rate,
+       .set_rate       = vc5_pll_set_rate,
+};
+
+static unsigned long vc5_fod_recalc_rate(struct clk_hw *hw,
+                                        unsigned long parent_rate)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       struct vc5_driver_data *vc5 = hwdata->vc5;
+       /* VCO frequency is divided by two before entering FOD */
+       u32 f_in = parent_rate / 2;
+       u32 div_int, div_frc;
+       u8 od_int[2];
+       u8 od_frc[4];
+
+       regmap_bulk_read(vc5->regmap, VC5_OUT_DIV_INT(hwdata->num, 0),
+                        od_int, 2);
+       regmap_bulk_read(vc5->regmap, VC5_OUT_DIV_FRAC(hwdata->num, 0),
+                        od_frc, 4);
+
+       div_int = (od_int[0] << 4) | (od_int[1] >> 4);
+       div_frc = (od_frc[0] << 22) | (od_frc[1] << 14) |
+                 (od_frc[2] << 6) | (od_frc[3] >> 2);
+
+       /* The PLL divider has 12 integer bits and 30 fractional bits */
+       return div64_u64((u64)f_in << 24ULL, ((u64)div_int << 24ULL) + div_frc);
+}
+
+static long vc5_fod_round_rate(struct clk_hw *hw, unsigned long rate,
+                              unsigned long *parent_rate)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       /* VCO frequency is divided by two before entering FOD */
+       u32 f_in = *parent_rate / 2;
+       u32 div_int;
+       u64 div_frc;
+
+       /* Determine integer part, which is 12 bit wide */
+       div_int = f_in / rate;
+       /*
+        * WARNING: The clock chip does not output signal if the integer part
+        *          of the divider is 0xfff and fractional part is non-zero.
+        *          Clamp the divider at 0xffe to keep the code simple.
+        */
+       if (div_int > 0xffe) {
+               div_int = 0xffe;
+               rate = f_in / div_int;
+       }
+
+       /* Determine best fractional part, which is 30 bit wide */
+       div_frc = f_in % rate;
+       div_frc <<= 24;
+       do_div(div_frc, rate);
+
+       hwdata->div_int = div_int;
+       hwdata->div_frc = (u32)div_frc;
+
+       return div64_u64((u64)f_in << 24ULL, ((u64)div_int << 24ULL) + div_frc);
+}
+
+static int vc5_fod_set_rate(struct clk_hw *hw, unsigned long rate,
+                           unsigned long parent_rate)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       struct vc5_driver_data *vc5 = hwdata->vc5;
+       u8 data[14] = {
+               hwdata->div_frc >> 22, hwdata->div_frc >> 14,
+               hwdata->div_frc >> 6, hwdata->div_frc << 2,
+               0, 0, 0, 0, 0,
+               0, 0,
+               hwdata->div_int >> 4, hwdata->div_int << 4,
+               0
+       };
+
+       regmap_bulk_write(vc5->regmap, VC5_OUT_DIV_FRAC(hwdata->num, 0),
+                         data, 14);
+
+       /*
+        * Toggle magic bit in undocumented register for unknown reason.
+        * This is what the IDT timing commander tool does and the chip
+        * datasheet somewhat implies this is needed, but the register
+        * and the bit is not documented.
+        */
+       regmap_update_bits(vc5->regmap, VC5_GLOBAL_REGISTER,
+                          VC5_GLOBAL_REGISTER_GLOBAL_RESET, 0);
+       regmap_update_bits(vc5->regmap, VC5_GLOBAL_REGISTER,
+                          VC5_GLOBAL_REGISTER_GLOBAL_RESET,
+                          VC5_GLOBAL_REGISTER_GLOBAL_RESET);
+       return 0;
+}
+
+static const struct clk_ops vc5_fod_ops = {
+       .recalc_rate    = vc5_fod_recalc_rate,
+       .round_rate     = vc5_fod_round_rate,
+       .set_rate       = vc5_fod_set_rate,
+};
+
+static int vc5_clk_out_prepare(struct clk_hw *hw)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       struct vc5_driver_data *vc5 = hwdata->vc5;
+
+       /* Enable the clock buffer */
+       regmap_update_bits(vc5->regmap, VC5_CLK_OUTPUT_CFG(hwdata->num, 1),
+                          VC5_CLK_OUTPUT_CFG1_EN_CLKBUF,
+                          VC5_CLK_OUTPUT_CFG1_EN_CLKBUF);
+       return 0;
+}
+
+static void vc5_clk_out_unprepare(struct clk_hw *hw)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       struct vc5_driver_data *vc5 = hwdata->vc5;
+
+       /* Enable the clock buffer */
+       regmap_update_bits(vc5->regmap, VC5_CLK_OUTPUT_CFG(hwdata->num, 1),
+                          VC5_CLK_OUTPUT_CFG1_EN_CLKBUF, 0);
+}
+
+static unsigned char vc5_clk_out_get_parent(struct clk_hw *hw)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       struct vc5_driver_data *vc5 = hwdata->vc5;
+       const u8 mask = VC5_OUT_DIV_CONTROL_SELB_NORM |
+                       VC5_OUT_DIV_CONTROL_SEL_EXT |
+                       VC5_OUT_DIV_CONTROL_EN_FOD;
+       const u8 fodclkmask = VC5_OUT_DIV_CONTROL_SELB_NORM |
+                             VC5_OUT_DIV_CONTROL_EN_FOD;
+       const u8 extclk = VC5_OUT_DIV_CONTROL_SELB_NORM |
+                         VC5_OUT_DIV_CONTROL_SEL_EXT;
+       unsigned int src;
+
+       regmap_read(vc5->regmap, VC5_OUT_DIV_CONTROL(hwdata->num), &src);
+       src &= mask;
+
+       if ((src & fodclkmask) == VC5_OUT_DIV_CONTROL_EN_FOD)
+               return 0;
+
+       if (src == extclk)
+               return 1;
+
+       dev_warn(&vc5->client->dev,
+                "Invalid clock output configuration (%02x)\n", src);
+       return 0;
+}
+
+static int vc5_clk_out_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw);
+       struct vc5_driver_data *vc5 = hwdata->vc5;
+       const u8 mask = VC5_OUT_DIV_CONTROL_RESET |
+                       VC5_OUT_DIV_CONTROL_SELB_NORM |
+                       VC5_OUT_DIV_CONTROL_SEL_EXT |
+                       VC5_OUT_DIV_CONTROL_EN_FOD;
+       const u8 extclk = VC5_OUT_DIV_CONTROL_SELB_NORM |
+                         VC5_OUT_DIV_CONTROL_SEL_EXT;
+       u8 src = VC5_OUT_DIV_CONTROL_RESET;
+
+       if (index == 0)
+               src |= VC5_OUT_DIV_CONTROL_EN_FOD;
+       else
+               src |= extclk;
+
+       return regmap_update_bits(vc5->regmap, VC5_OUT_DIV_CONTROL(hwdata->num),
+                                 mask, src);
+}
+
+static const struct clk_ops vc5_clk_out_ops = {
+       .prepare        = vc5_clk_out_prepare,
+       .unprepare      = vc5_clk_out_unprepare,
+       .set_parent     = vc5_clk_out_set_parent,
+       .get_parent     = vc5_clk_out_get_parent,
+};
+
+static struct clk_hw *vc5_of_clk_get(struct of_phandle_args *clkspec,
+                                    void *data)
+{
+       struct vc5_driver_data *vc5 = data;
+       unsigned int idx = clkspec->args[0];
+
+       if (idx > 2)
+               return ERR_PTR(-EINVAL);
+
+       return &vc5->clk_out[idx].hw;
+}
+
+static int vc5_map_index_to_output(const enum vc5_model model,
+                                  const unsigned int n)
+{
+       switch (model) {
+       case IDT_VC5_5P49V5933:
+               return (n == 0) ? 0 : 3;
+       case IDT_VC5_5P49V5923:
+       default:
+               return n;
+       }
+}
+
+static const struct of_device_id clk_vc5_of_match[];
+
+static int vc5_probe(struct i2c_client *client,
+                    const struct i2c_device_id *id)
+{
+       const struct of_device_id *of_id =
+               of_match_device(clk_vc5_of_match, &client->dev);
+       struct vc5_driver_data *vc5;
+       struct clk_init_data init;
+       const char *parent_names[2];
+       unsigned int n, idx;
+       int ret;
+
+       vc5 = devm_kzalloc(&client->dev, sizeof(*vc5), GFP_KERNEL);
+       if (vc5 == NULL)
+               return -ENOMEM;
+
+       i2c_set_clientdata(client, vc5);
+       vc5->client = client;
+       vc5->model = (enum vc5_model)of_id->data;
+
+       vc5->pin_xin = devm_clk_get(&client->dev, "xin");
+       if (PTR_ERR(vc5->pin_xin) == -EPROBE_DEFER)
+               return -EPROBE_DEFER;
+
+       vc5->pin_clkin = devm_clk_get(&client->dev, "clkin");
+       if (PTR_ERR(vc5->pin_clkin) == -EPROBE_DEFER)
+               return -EPROBE_DEFER;
+
+       vc5->regmap = devm_regmap_init_i2c(client, &vc5_regmap_config);
+       if (IS_ERR(vc5->regmap)) {
+               dev_err(&client->dev, "failed to allocate register map\n");
+               return PTR_ERR(vc5->regmap);
+       }
+
+       /* Register clock input mux */
+       memset(&init, 0, sizeof(init));
+
+       if (!IS_ERR(vc5->pin_xin)) {
+               vc5->clk_mux_ins |= VC5_MUX_IN_XIN;
+               parent_names[init.num_parents++] = __clk_get_name(vc5->pin_xin);
+       } else if (vc5->model == IDT_VC5_5P49V5933) {
+               /* IDT VC5 5P49V5933 has built-in oscilator. */
+               vc5->pin_xin = clk_register_fixed_rate(&client->dev,
+                                                      "internal-xtal", NULL,
+                                                      0, 25000000);
+               if (IS_ERR(vc5->pin_xin))
+                       return PTR_ERR(vc5->pin_xin);
+               vc5->clk_mux_ins |= VC5_MUX_IN_XIN;
+               parent_names[init.num_parents++] = __clk_get_name(vc5->pin_xin);
+       }
+
+       if (!IS_ERR(vc5->pin_clkin)) {
+               vc5->clk_mux_ins |= VC5_MUX_IN_CLKIN;
+               parent_names[init.num_parents++] =
+                       __clk_get_name(vc5->pin_clkin);
+       }
+
+       if (!init.num_parents) {
+               dev_err(&client->dev, "no input clock specified!\n");
+               return -EINVAL;
+       }
+
+       init.name = vc5_mux_names[0];
+       init.ops = &vc5_mux_ops;
+       init.flags = 0;
+       init.parent_names = parent_names;
+       vc5->clk_mux.init = &init;
+       ret = devm_clk_hw_register(&client->dev, &vc5->clk_mux);
+       if (ret) {
+               dev_err(&client->dev, "unable to register %s\n", init.name);
+               goto err_clk;
+       }
+
+       /* Register PLL */
+       memset(&init, 0, sizeof(init));
+       init.name = vc5_pll_names[0];
+       init.ops = &vc5_pll_ops;
+       init.flags = CLK_SET_RATE_PARENT;
+       init.parent_names = vc5_mux_names;
+       init.num_parents = 1;
+       vc5->clk_pll.num = 0;
+       vc5->clk_pll.vc5 = vc5;
+       vc5->clk_pll.hw.init = &init;
+       ret = devm_clk_hw_register(&client->dev, &vc5->clk_pll.hw);
+       if (ret) {
+               dev_err(&client->dev, "unable to register %s\n", init.name);
+               goto err_clk;
+       }
+
+       /* Register FODs */
+       for (n = 0; n < 2; n++) {
+               idx = vc5_map_index_to_output(vc5->model, n);
+               memset(&init, 0, sizeof(init));
+               init.name = vc5_fod_names[idx];
+               init.ops = &vc5_fod_ops;
+               init.flags = CLK_SET_RATE_PARENT;
+               init.parent_names = vc5_pll_names;
+               init.num_parents = 1;
+               vc5->clk_fod[n].num = idx;
+               vc5->clk_fod[n].vc5 = vc5;
+               vc5->clk_fod[n].hw.init = &init;
+               ret = devm_clk_hw_register(&client->dev, &vc5->clk_fod[n].hw);
+               if (ret) {
+                       dev_err(&client->dev, "unable to register %s\n",
+                               init.name);
+                       goto err_clk;
+               }
+       }
+
+       /* Register MUX-connected OUT0_I2C_SELB output */
+       memset(&init, 0, sizeof(init));
+       init.name = vc5_clk_out_names[0];
+       init.ops = &vc5_clk_out_ops;
+       init.flags = CLK_SET_RATE_PARENT;
+       init.parent_names = vc5_mux_names;
+       init.num_parents = 1;
+       vc5->clk_out[0].num = idx;
+       vc5->clk_out[0].vc5 = vc5;
+       vc5->clk_out[0].hw.init = &init;
+       ret = devm_clk_hw_register(&client->dev, &vc5->clk_out[0].hw);
+       if (ret) {
+               dev_err(&client->dev, "unable to register %s\n",
+                       init.name);
+               goto err_clk;
+       }
+
+       /* Register FOD-connected OUTx outputs */
+       for (n = 1; n < 3; n++) {
+               idx = vc5_map_index_to_output(vc5->model, n - 1);
+               parent_names[0] = vc5_fod_names[idx];
+               if (n == 1)
+                       parent_names[1] = vc5_mux_names[0];
+               else
+                       parent_names[1] = vc5_clk_out_names[n - 1];
+
+               memset(&init, 0, sizeof(init));
+               init.name = vc5_clk_out_names[idx + 1];
+               init.ops = &vc5_clk_out_ops;
+               init.flags = CLK_SET_RATE_PARENT;
+               init.parent_names = parent_names;
+               init.num_parents = 2;
+               vc5->clk_out[n].num = idx;
+               vc5->clk_out[n].vc5 = vc5;
+               vc5->clk_out[n].hw.init = &init;
+               ret = devm_clk_hw_register(&client->dev,
+                                          &vc5->clk_out[n].hw);
+               if (ret) {
+                       dev_err(&client->dev, "unable to register %s\n",
+                               init.name);
+                       goto err_clk;
+               }
+       }
+
+       ret = of_clk_add_hw_provider(client->dev.of_node, vc5_of_clk_get, vc5);
+       if (ret) {
+               dev_err(&client->dev, "unable to add clk provider\n");
+               goto err_clk;
+       }
+
+       return 0;
+
+err_clk:
+       if (vc5->model == IDT_VC5_5P49V5933)
+               clk_unregister_fixed_rate(vc5->pin_xin);
+       return ret;
+}
+
+static int vc5_remove(struct i2c_client *client)
+{
+       struct vc5_driver_data *vc5 = i2c_get_clientdata(client);
+
+       of_clk_del_provider(client->dev.of_node);
+
+       if (vc5->model == IDT_VC5_5P49V5933)
+               clk_unregister_fixed_rate(vc5->pin_xin);
+
+       return 0;
+}
+
+static const struct i2c_device_id vc5_id[] = {
+       { "5p49v5923", .driver_data = IDT_VC5_5P49V5923 },
+       { "5p49v5933", .driver_data = IDT_VC5_5P49V5933 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, vc5_id);
+
+static const struct of_device_id clk_vc5_of_match[] = {
+       { .compatible = "idt,5p49v5923", .data = (void *)IDT_VC5_5P49V5923 },
+       { .compatible = "idt,5p49v5933", .data = (void *)IDT_VC5_5P49V5933 },
+       { },
+};
+MODULE_DEVICE_TABLE(of, clk_vc5_of_match);
+
+static struct i2c_driver vc5_driver = {
+       .driver = {
+               .name = "vc5",
+               .of_match_table = clk_vc5_of_match,
+       },
+       .probe          = vc5_probe,
+       .remove         = vc5_remove,
+       .id_table       = vc5_id,
+};
+module_i2c_driver(vc5_driver);
+
+MODULE_AUTHOR("Marek Vasut <marek.vasut@gmail.com>");
+MODULE_DESCRIPTION("IDT VersaClock 5 driver");
+MODULE_LICENSE("GPL");
index 0621fbfb4bebfd548b21f62e7320f1bab2294b48..a47960aacfa52c30749787ea3cb1a3d35b0576d0 100644 (file)
@@ -97,7 +97,8 @@ static int wm831x_fll_prepare(struct clk_hw *hw)
        if (ret != 0)
                dev_crit(wm831x->dev, "Failed to enable FLL: %d\n", ret);
 
-       usleep_range(2000, 2000);
+       /* wait 2-3 ms for new frequency taking effect */
+       usleep_range(2000, 3000);
 
        return ret;
 }
index cbed6602172be2f302db8d30698095a81a7f0463..7098bfd32b1b23ad6cc8dfd18786a874cd67bf9b 100644 (file)
@@ -14,6 +14,13 @@ config COMMON_CLK_HI3519
        help
          Build the clock driver for hi3519.
 
+config COMMON_CLK_HI3660
+       bool "Hi3660 Clock Driver"
+       depends on ARCH_HISI || COMPILE_TEST
+       default ARCH_HISI
+       help
+         Build the clock driver for hi3660.
+
 config COMMON_CLK_HI3798CV200
        tristate "Hi3798CV200 Clock Driver"
        depends on ARCH_HISI || COMPILE_TEST
index 4eec5e511e4c22c48f3e7f8e038c60f294cb1d75..1e4c3ddbad840ed88f760c7c252dfbf33bbb0cd8 100644 (file)
@@ -9,6 +9,7 @@ obj-$(CONFIG_ARCH_HIP04)        += clk-hip04.o
 obj-$(CONFIG_ARCH_HIX5HD2)     += clk-hix5hd2.o
 obj-$(CONFIG_COMMON_CLK_HI3516CV300)   += crg-hi3516cv300.o
 obj-$(CONFIG_COMMON_CLK_HI3519)        += clk-hi3519.o
+obj-$(CONFIG_COMMON_CLK_HI3660) += clk-hi3660.o
 obj-$(CONFIG_COMMON_CLK_HI3798CV200)   += crg-hi3798cv200.o
 obj-$(CONFIG_COMMON_CLK_HI6220)        += clk-hi6220.o
 obj-$(CONFIG_RESET_HISI)       += reset.o
diff --git a/drivers/clk/hisilicon/clk-hi3660.c b/drivers/clk/hisilicon/clk-hi3660.c
new file mode 100644 (file)
index 0000000..96a9697
--- /dev/null
@@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 2016-2017 Linaro Ltd.
+ * Copyright (c) 2016-2017 HiSilicon Technologies Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <dt-bindings/clock/hi3660-clock.h>
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include "clk.h"
+
+static const struct hisi_fixed_rate_clock hi3660_fixed_rate_clks[] = {
+       { HI3660_CLKIN_SYS, "clkin_sys", NULL, 0, 19200000, },
+       { HI3660_CLKIN_REF, "clkin_ref", NULL, 0, 32764, },
+       { HI3660_CLK_FLL_SRC, "clk_fll_src", NULL, 0, 128000000, },
+       { HI3660_CLK_PPLL0, "clk_ppll0", NULL, 0, 1600000000, },
+       { HI3660_CLK_PPLL1, "clk_ppll1", NULL, 0, 1866000000, },
+       { HI3660_CLK_PPLL2, "clk_ppll2", NULL, 0, 960000000, },
+       { HI3660_CLK_PPLL3, "clk_ppll3", NULL, 0, 1290000000, },
+       { HI3660_CLK_SCPLL, "clk_scpll", NULL, 0, 245760000, },
+       { HI3660_PCLK, "pclk", NULL, 0, 20000000, },
+       { HI3660_CLK_UART0_DBG, "clk_uart0_dbg", NULL, 0, 19200000, },
+       { HI3660_CLK_UART6, "clk_uart6", NULL, 0, 19200000, },
+       { HI3660_OSC32K, "osc32k", NULL, 0, 32764, },
+       { HI3660_OSC19M, "osc19m", NULL, 0, 19200000, },
+       { HI3660_CLK_480M, "clk_480m", NULL, 0, 480000000, },
+       { HI3660_CLK_INV, "clk_inv", NULL, 0, 10000000, },
+};
+
+/* crgctrl */
+static const struct hisi_fixed_factor_clock hi3660_crg_fixed_factor_clks[] = {
+       { HI3660_FACTOR_UART3, "clk_factor_uart3", "iomcu_peri0", 1, 8, 0, },
+       { HI3660_CLK_FACTOR_MMC, "clk_factor_mmc", "clkin_sys", 1, 6, 0, },
+       { HI3660_CLK_GATE_I2C0, "clk_gate_i2c0", "clk_i2c0_iomcu", 1, 4, 0, },
+       { HI3660_CLK_GATE_I2C1, "clk_gate_i2c1", "clk_i2c1_iomcu", 1, 4, 0, },
+       { HI3660_CLK_GATE_I2C2, "clk_gate_i2c2", "clk_i2c2_iomcu", 1, 4, 0, },
+       { HI3660_CLK_GATE_I2C6, "clk_gate_i2c6", "clk_i2c6_iomcu", 1, 4, 0, },
+       { HI3660_CLK_DIV_SYSBUS, "clk_div_sysbus", "clk_mux_sysbus", 1, 7, 0, },
+       { HI3660_CLK_DIV_320M, "clk_div_320m", "clk_320m_pll_gt", 1, 5, 0, },
+       { HI3660_CLK_DIV_A53, "clk_div_a53hpm", "clk_a53hpm_andgt", 1, 2, 0, },
+       { HI3660_CLK_GATE_SPI0, "clk_gate_spi0", "clk_ppll0", 1, 8, 0, },
+       { HI3660_CLK_GATE_SPI2, "clk_gate_spi2", "clk_ppll0", 1, 8, 0, },
+       { HI3660_PCIEPHY_REF, "clk_pciephy_ref", "clk_div_pciephy", 1, 1, 0, },
+       { HI3660_CLK_ABB_USB, "clk_abb_usb", "clk_gate_usb_tcxo_en", 1, 1, 0 },
+};
+
+static const struct hisi_gate_clock hi3660_crgctrl_gate_sep_clks[] = {
+       { HI3660_HCLK_GATE_SDIO0, "hclk_gate_sdio0", "clk_div_sysbus",
+         CLK_SET_RATE_PARENT, 0x0, 21, 0, },
+       { HI3660_HCLK_GATE_SD, "hclk_gate_sd", "clk_div_sysbus",
+         CLK_SET_RATE_PARENT, 0x0, 30, 0, },
+       { HI3660_CLK_GATE_AOMM, "clk_gate_aomm", "clk_div_aomm",
+         CLK_SET_RATE_PARENT, 0x0, 31, 0, },
+       { HI3660_PCLK_GPIO0, "pclk_gpio0", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 0, 0, },
+       { HI3660_PCLK_GPIO1, "pclk_gpio1", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 1, 0, },
+       { HI3660_PCLK_GPIO2, "pclk_gpio2", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 2, 0, },
+       { HI3660_PCLK_GPIO3, "pclk_gpio3", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 3, 0, },
+       { HI3660_PCLK_GPIO4, "pclk_gpio4", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 4, 0, },
+       { HI3660_PCLK_GPIO5, "pclk_gpio5", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 5, 0, },
+       { HI3660_PCLK_GPIO6, "pclk_gpio6", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 6, 0, },
+       { HI3660_PCLK_GPIO7, "pclk_gpio7", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 7, 0, },
+       { HI3660_PCLK_GPIO8, "pclk_gpio8", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 8, 0, },
+       { HI3660_PCLK_GPIO9, "pclk_gpio9", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 9, 0, },
+       { HI3660_PCLK_GPIO10, "pclk_gpio10", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 10, 0, },
+       { HI3660_PCLK_GPIO11, "pclk_gpio11", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 11, 0, },
+       { HI3660_PCLK_GPIO12, "pclk_gpio12", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 12, 0, },
+       { HI3660_PCLK_GPIO13, "pclk_gpio13", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 13, 0, },
+       { HI3660_PCLK_GPIO14, "pclk_gpio14", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 14, 0, },
+       { HI3660_PCLK_GPIO15, "pclk_gpio15", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 15, 0, },
+       { HI3660_PCLK_GPIO16, "pclk_gpio16", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 16, 0, },
+       { HI3660_PCLK_GPIO17, "pclk_gpio17", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 17, 0, },
+       { HI3660_PCLK_GPIO18, "pclk_gpio18", "clk_div_ioperi",
+         CLK_SET_RATE_PARENT, 0x10, 18, 0, },
+       { HI3660_PCLK_GPIO19, "pclk_gpio19", "clk_div_ioperi",
+         CLK_SET_RATE_PARENT, 0x10, 19, 0, },
+       { HI3660_PCLK_GPIO20, "pclk_gpio20", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 20, 0, },
+       { HI3660_PCLK_GPIO21, "pclk_gpio21", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x10, 21, 0, },
+       { HI3660_CLK_GATE_SPI3, "clk_gate_spi3", "clk_div_ioperi",
+         CLK_SET_RATE_PARENT, 0x10, 30, 0, },
+       { HI3660_CLK_GATE_I2C7, "clk_gate_i2c7", "clk_mux_i2c",
+         CLK_SET_RATE_PARENT, 0x10, 31, 0, },
+       { HI3660_CLK_GATE_I2C3, "clk_gate_i2c3", "clk_mux_i2c",
+         CLK_SET_RATE_PARENT, 0x20, 7, 0, },
+       { HI3660_CLK_GATE_SPI1, "clk_gate_spi1", "clk_mux_spi",
+         CLK_SET_RATE_PARENT, 0x20, 9, 0, },
+       { HI3660_CLK_GATE_UART1, "clk_gate_uart1", "clk_mux_uarth",
+         CLK_SET_RATE_PARENT, 0x20, 11, 0, },
+       { HI3660_CLK_GATE_UART2, "clk_gate_uart2", "clk_mux_uart1",
+         CLK_SET_RATE_PARENT, 0x20, 12, 0, },
+       { HI3660_CLK_GATE_UART4, "clk_gate_uart4", "clk_mux_uarth",
+         CLK_SET_RATE_PARENT, 0x20, 14, 0, },
+       { HI3660_CLK_GATE_UART5, "clk_gate_uart5", "clk_mux_uart1",
+         CLK_SET_RATE_PARENT, 0x20, 15, 0, },
+       { HI3660_CLK_GATE_I2C4, "clk_gate_i2c4", "clk_mux_i2c",
+         CLK_SET_RATE_PARENT, 0x20, 27, 0, },
+       { HI3660_CLK_GATE_DMAC, "clk_gate_dmac", "clk_div_sysbus",
+         CLK_SET_RATE_PARENT, 0x30, 1, 0, },
+       { HI3660_PCLK_GATE_DSS, "pclk_gate_dss", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x30, 12, 0, },
+       { HI3660_ACLK_GATE_DSS, "aclk_gate_dss", "clk_gate_vivobus",
+         CLK_SET_RATE_PARENT, 0x30, 13, 0, },
+       { HI3660_CLK_GATE_LDI1, "clk_gate_ldi1", "clk_div_ldi1",
+         CLK_SET_RATE_PARENT, 0x30, 14, 0, },
+       { HI3660_CLK_GATE_LDI0, "clk_gate_ldi0", "clk_div_ldi0",
+         CLK_SET_RATE_PARENT, 0x30, 15, 0, },
+       { HI3660_CLK_GATE_VIVOBUS, "clk_gate_vivobus", "clk_div_vivobus",
+         CLK_SET_RATE_PARENT, 0x30, 16, 0, },
+       { HI3660_CLK_GATE_EDC0, "clk_gate_edc0", "clk_div_edc0",
+         CLK_SET_RATE_PARENT, 0x30, 17, 0, },
+       { HI3660_CLK_GATE_TXDPHY0_CFG, "clk_gate_txdphy0_cfg", "clkin_sys",
+         CLK_SET_RATE_PARENT, 0x30, 28, 0, },
+       { HI3660_CLK_GATE_TXDPHY0_REF, "clk_gate_txdphy0_ref", "clkin_sys",
+         CLK_SET_RATE_PARENT, 0x30, 29, 0, },
+       { HI3660_CLK_GATE_TXDPHY1_CFG, "clk_gate_txdphy1_cfg", "clkin_sys",
+         CLK_SET_RATE_PARENT, 0x30, 30, 0, },
+       { HI3660_CLK_GATE_TXDPHY1_REF, "clk_gate_txdphy1_ref", "clkin_sys",
+         CLK_SET_RATE_PARENT, 0x30, 31, 0, },
+       { HI3660_ACLK_GATE_USB3OTG, "aclk_gate_usb3otg", "clk_div_mmc0bus",
+         CLK_SET_RATE_PARENT, 0x40, 1, 0, },
+       { HI3660_CLK_GATE_SPI4, "clk_gate_spi4", "clk_mux_spi",
+         CLK_SET_RATE_PARENT, 0x40, 4, 0, },
+       { HI3660_CLK_GATE_SD, "clk_gate_sd", "clk_mux_sd_sys",
+         CLK_SET_RATE_PARENT, 0x40, 17, 0, },
+       { HI3660_CLK_GATE_SDIO0, "clk_gate_sdio0", "clk_mux_sdio_sys",
+         CLK_SET_RATE_PARENT, 0x40, 19, 0, },
+       { HI3660_CLK_GATE_UFS_SUBSYS, "clk_gate_ufs_subsys", "clk_div_sysbus",
+         CLK_SET_RATE_PARENT, 0x50, 21, 0, },
+       { HI3660_PCLK_GATE_DSI0, "pclk_gate_dsi0", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x50, 28, 0, },
+       { HI3660_PCLK_GATE_DSI1, "pclk_gate_dsi1", "clk_div_cfgbus",
+         CLK_SET_RATE_PARENT, 0x50, 29, 0, },
+       { HI3660_ACLK_GATE_PCIE, "aclk_gate_pcie", "clk_div_mmc1bus",
+         CLK_SET_RATE_PARENT, 0x420, 5, 0, },
+       { HI3660_PCLK_GATE_PCIE_SYS, "pclk_gate_pcie_sys", "clk_div_mmc1bus",
+         CLK_SET_RATE_PARENT, 0x420, 7, 0, },
+       { HI3660_CLK_GATE_PCIEAUX, "clk_gate_pcieaux", "clkin_sys",
+         CLK_SET_RATE_PARENT, 0x420, 8, 0, },
+       { HI3660_PCLK_GATE_PCIE_PHY, "pclk_gate_pcie_phy", "clk_div_mmc1bus",
+         CLK_SET_RATE_PARENT, 0x420, 9, 0, },
+};
+
+static const struct hisi_gate_clock hi3660_crgctrl_gate_clks[] = {
+       { HI3660_CLK_ANDGT_LDI0, "clk_andgt_ldi0", "clk_mux_ldi0",
+         CLK_SET_RATE_PARENT, 0xf0, 6, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_LDI1, "clk_andgt_ldi1", "clk_mux_ldi1",
+         CLK_SET_RATE_PARENT, 0xf0, 7, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_EDC0, "clk_andgt_edc0", "clk_mux_edc0",
+         CLK_SET_RATE_PARENT, 0xf0, 8, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_GATE_UFSPHY_GT, "clk_gate_ufsphy_gt", "clk_div_ufsperi",
+         CLK_SET_RATE_PARENT, 0xf4, 1, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_MMC, "clk_andgt_mmc", "clk_mux_mmc_pll",
+         CLK_SET_RATE_PARENT, 0xf4, 2, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_SD, "clk_andgt_sd", "clk_mux_sd_pll",
+         CLK_SET_RATE_PARENT, 0xf4, 3, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_A53HPM_ANDGT, "clk_a53hpm_andgt", "clk_mux_a53hpm",
+         CLK_SET_RATE_PARENT, 0xf4, 7, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_SDIO, "clk_andgt_sdio", "clk_mux_sdio_pll",
+         CLK_SET_RATE_PARENT, 0xf4, 8, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_UART0, "clk_andgt_uart0", "clk_div_320m",
+         CLK_SET_RATE_PARENT, 0xf4, 9, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_UART1, "clk_andgt_uart1", "clk_div_320m",
+         CLK_SET_RATE_PARENT, 0xf4, 10, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_UARTH, "clk_andgt_uarth", "clk_div_320m",
+         CLK_SET_RATE_PARENT, 0xf4, 11, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_ANDGT_SPI, "clk_andgt_spi", "clk_div_320m",
+         CLK_SET_RATE_PARENT, 0xf4, 13, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_VIVOBUS_ANDGT, "clk_vivobus_andgt", "clk_mux_vivobus",
+         CLK_SET_RATE_PARENT, 0xf8, 1, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_AOMM_ANDGT, "clk_aomm_andgt", "clk_ppll2",
+         CLK_SET_RATE_PARENT, 0xf8, 3, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_320M_PLL_GT, "clk_320m_pll_gt", "clk_mux_320m",
+         CLK_SET_RATE_PARENT, 0xf8, 10, 0, },
+       { HI3660_AUTODIV_EMMC0BUS, "autodiv_emmc0bus", "autodiv_sysbus",
+         CLK_SET_RATE_PARENT, 0x404, 1, CLK_GATE_HIWORD_MASK, },
+       { HI3660_AUTODIV_SYSBUS, "autodiv_sysbus", "clk_div_sysbus",
+         CLK_SET_RATE_PARENT, 0x404, 5, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_GATE_UFSPHY_CFG, "clk_gate_ufsphy_cfg",
+         "clk_div_ufsphy_cfg", CLK_SET_RATE_PARENT, 0x420, 12, 0, },
+       { HI3660_CLK_GATE_UFSIO_REF, "clk_gate_ufsio_ref",
+         "clk_gate_ufs_tcxo_en", CLK_SET_RATE_PARENT, 0x420, 14, 0, },
+};
+
+static const char *const
+clk_mux_sdio_sys_p[] = {"clk_factor_mmc", "clk_div_sdio",};
+static const char *const
+clk_mux_sd_sys_p[] = {"clk_factor_mmc", "clk_div_sd",};
+static const char *const
+clk_mux_pll_p[] = {"clk_ppll0", "clk_ppll1", "clk_ppll2", "clk_ppll2",};
+static const char *const
+clk_mux_pll0123_p[] = {"clk_ppll0", "clk_ppll1", "clk_ppll2", "clk_ppll3",};
+static const char *const
+clk_mux_edc0_p[] = {"clk_inv", "clk_ppll0", "clk_ppll1", "clk_inv",
+                   "clk_ppll2", "clk_inv", "clk_inv", "clk_inv",
+                   "clk_ppll3", "clk_inv", "clk_inv", "clk_inv",
+                   "clk_inv", "clk_inv", "clk_inv", "clk_inv",};
+static const char *const
+clk_mux_ldi0_p[] = {"clk_inv", "clk_ppll0", "clk_ppll2", "clk_inv",
+                   "clk_ppll1", "clk_inv", "clk_inv", "clk_inv",
+                   "clk_ppll3", "clk_inv", "clk_inv", "clk_inv",
+                   "clk_inv", "clk_inv", "clk_inv", "clk_inv",};
+static const char *const
+clk_mux_uart0_p[] = {"clkin_sys", "clk_div_uart0",};
+static const char *const
+clk_mux_uart1_p[] = {"clkin_sys", "clk_div_uart1",};
+static const char *const
+clk_mux_uarth_p[] = {"clkin_sys", "clk_div_uarth",};
+static const char *const
+clk_mux_pll02p[] = {"clk_ppll0", "clk_ppll2",};
+static const char *const
+clk_mux_ioperi_p[] = {"clk_div_320m", "clk_div_a53hpm",};
+static const char *const
+clk_mux_spi_p[] = {"clkin_sys", "clk_div_spi",};
+static const char *const
+clk_mux_i2c_p[] = {"clkin_sys", "clk_div_i2c",};
+
+static const struct hisi_mux_clock hi3660_crgctrl_mux_clks[] = {
+       { HI3660_CLK_MUX_SYSBUS, "clk_mux_sysbus", clk_mux_sdio_sys_p,
+         ARRAY_SIZE(clk_mux_sdio_sys_p), CLK_SET_RATE_PARENT, 0xac, 0, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_UART0, "clk_mux_uart0", clk_mux_uart0_p,
+         ARRAY_SIZE(clk_mux_uart0_p), CLK_SET_RATE_PARENT, 0xac, 2, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_UART1, "clk_mux_uart1", clk_mux_uart1_p,
+         ARRAY_SIZE(clk_mux_uart1_p), CLK_SET_RATE_PARENT, 0xac, 3, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_UARTH, "clk_mux_uarth", clk_mux_uarth_p,
+         ARRAY_SIZE(clk_mux_uarth_p), CLK_SET_RATE_PARENT, 0xac, 4, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_SPI, "clk_mux_spi", clk_mux_spi_p,
+         ARRAY_SIZE(clk_mux_spi_p), CLK_SET_RATE_PARENT, 0xac, 8, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_I2C, "clk_mux_i2c", clk_mux_i2c_p,
+         ARRAY_SIZE(clk_mux_i2c_p), CLK_SET_RATE_PARENT, 0xac, 13, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_MMC_PLL, "clk_mux_mmc_pll", clk_mux_pll02p,
+         ARRAY_SIZE(clk_mux_pll02p), CLK_SET_RATE_PARENT, 0xb4, 0, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_LDI1, "clk_mux_ldi1", clk_mux_ldi0_p,
+         ARRAY_SIZE(clk_mux_ldi0_p), CLK_SET_RATE_PARENT, 0xb4, 8, 4,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_LDI0, "clk_mux_ldi0", clk_mux_ldi0_p,
+         ARRAY_SIZE(clk_mux_ldi0_p), CLK_SET_RATE_PARENT, 0xb4, 12, 4,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_SD_PLL, "clk_mux_sd_pll", clk_mux_pll_p,
+         ARRAY_SIZE(clk_mux_pll_p), CLK_SET_RATE_PARENT, 0xb8, 4, 2,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_SD_SYS, "clk_mux_sd_sys", clk_mux_sd_sys_p,
+         ARRAY_SIZE(clk_mux_sd_sys_p), CLK_SET_RATE_PARENT, 0xb8, 6, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_EDC0, "clk_mux_edc0", clk_mux_edc0_p,
+         ARRAY_SIZE(clk_mux_edc0_p), CLK_SET_RATE_PARENT, 0xbc, 6, 4,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_SDIO_SYS, "clk_mux_sdio_sys", clk_mux_sdio_sys_p,
+         ARRAY_SIZE(clk_mux_sdio_sys_p), CLK_SET_RATE_PARENT, 0xc0, 6, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_SDIO_PLL, "clk_mux_sdio_pll", clk_mux_pll_p,
+         ARRAY_SIZE(clk_mux_pll_p), CLK_SET_RATE_PARENT, 0xc0, 4, 2,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_VIVOBUS, "clk_mux_vivobus", clk_mux_pll0123_p,
+         ARRAY_SIZE(clk_mux_pll0123_p), CLK_SET_RATE_PARENT, 0xd0, 12, 2,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_A53HPM, "clk_mux_a53hpm", clk_mux_pll02p,
+         ARRAY_SIZE(clk_mux_pll02p), CLK_SET_RATE_PARENT, 0xd4, 9, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_320M, "clk_mux_320m", clk_mux_pll02p,
+         ARRAY_SIZE(clk_mux_pll02p), CLK_SET_RATE_PARENT, 0x100, 0, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_MUX_IOPERI, "clk_mux_ioperi", clk_mux_ioperi_p,
+         ARRAY_SIZE(clk_mux_ioperi_p), CLK_SET_RATE_PARENT, 0x108, 10, 1,
+         CLK_MUX_HIWORD_MASK, },
+};
+
+static const struct hisi_divider_clock hi3660_crgctrl_divider_clks[] = {
+       { HI3660_CLK_DIV_UART0, "clk_div_uart0", "clk_andgt_uart0",
+         CLK_SET_RATE_PARENT, 0xb0, 4, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_UART1, "clk_div_uart1", "clk_andgt_uart1",
+         CLK_SET_RATE_PARENT, 0xb0, 8, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_UARTH, "clk_div_uarth", "clk_andgt_uarth",
+         CLK_SET_RATE_PARENT, 0xb0, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_MMC, "clk_div_mmc", "clk_andgt_mmc",
+         CLK_SET_RATE_PARENT, 0xb4, 3, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_SD, "clk_div_sd", "clk_andgt_sd",
+         CLK_SET_RATE_PARENT, 0xb8, 0, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_EDC0, "clk_div_edc0", "clk_andgt_edc0",
+         CLK_SET_RATE_PARENT, 0xbc, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_LDI0, "clk_div_ldi0", "clk_andgt_ldi0",
+         CLK_SET_RATE_PARENT, 0xbc, 10, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_SDIO, "clk_div_sdio", "clk_andgt_sdio",
+         CLK_SET_RATE_PARENT, 0xc0, 0, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_LDI1, "clk_div_ldi1", "clk_andgt_ldi1",
+         CLK_SET_RATE_PARENT, 0xc0, 8, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_SPI, "clk_div_spi", "clk_andgt_spi",
+         CLK_SET_RATE_PARENT, 0xc4, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_VIVOBUS, "clk_div_vivobus", "clk_vivobus_andgt",
+         CLK_SET_RATE_PARENT, 0xd0, 7, 5, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_I2C, "clk_div_i2c", "clk_div_320m",
+         CLK_SET_RATE_PARENT, 0xe8, 4, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_UFSPHY, "clk_div_ufsphy_cfg", "clk_gate_ufsphy_gt",
+         CLK_SET_RATE_PARENT, 0xe8, 9, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_CFGBUS, "clk_div_cfgbus", "clk_div_sysbus",
+         CLK_SET_RATE_PARENT, 0xec, 0, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_MMC0BUS, "clk_div_mmc0bus", "autodiv_emmc0bus",
+         CLK_SET_RATE_PARENT, 0xec, 2, 1, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_MMC1BUS, "clk_div_mmc1bus", "clk_div_sysbus",
+         CLK_SET_RATE_PARENT, 0xec, 3, 1, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_UFSPERI, "clk_div_ufsperi", "clk_gate_ufs_subsys",
+         CLK_SET_RATE_PARENT, 0xec, 14, 1, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_AOMM, "clk_div_aomm", "clk_aomm_andgt",
+         CLK_SET_RATE_PARENT, 0x100, 7, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_IOPERI, "clk_div_ioperi", "clk_mux_ioperi",
+         CLK_SET_RATE_PARENT, 0x108, 11, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+};
+
+/* clk_pmuctrl */
+/* pmu register need shift 2 bits */
+static const struct hisi_gate_clock hi3660_pmu_gate_clks[] = {
+       { HI3660_GATE_ABB_192, "clk_gate_abb_192", "clkin_sys",
+         CLK_SET_RATE_PARENT, (0x10a << 2), 3, 0, },
+};
+
+/* clk_pctrl */
+static const struct hisi_gate_clock hi3660_pctrl_gate_clks[] = {
+       { HI3660_GATE_UFS_TCXO_EN, "clk_gate_ufs_tcxo_en",
+         "clk_gate_abb_192", CLK_SET_RATE_PARENT, 0x10, 0,
+         CLK_GATE_HIWORD_MASK, },
+       { HI3660_GATE_USB_TCXO_EN, "clk_gate_usb_tcxo_en", "clk_gate_abb_192",
+         CLK_SET_RATE_PARENT, 0x10, 1, CLK_GATE_HIWORD_MASK, },
+};
+
+/* clk_sctrl */
+static const struct hisi_gate_clock hi3660_sctrl_gate_sep_clks[] = {
+       { HI3660_PCLK_AO_GPIO0, "pclk_ao_gpio0", "clk_div_aobus",
+         CLK_SET_RATE_PARENT, 0x160, 11, 0, },
+       { HI3660_PCLK_AO_GPIO1, "pclk_ao_gpio1", "clk_div_aobus",
+         CLK_SET_RATE_PARENT, 0x160, 12, 0, },
+       { HI3660_PCLK_AO_GPIO2, "pclk_ao_gpio2", "clk_div_aobus",
+         CLK_SET_RATE_PARENT, 0x160, 13, 0, },
+       { HI3660_PCLK_AO_GPIO3, "pclk_ao_gpio3", "clk_div_aobus",
+         CLK_SET_RATE_PARENT, 0x160, 14, 0, },
+       { HI3660_PCLK_AO_GPIO4, "pclk_ao_gpio4", "clk_div_aobus",
+         CLK_SET_RATE_PARENT, 0x160, 21, 0, },
+       { HI3660_PCLK_AO_GPIO5, "pclk_ao_gpio5", "clk_div_aobus",
+         CLK_SET_RATE_PARENT, 0x160, 22, 0, },
+       { HI3660_PCLK_AO_GPIO6, "pclk_ao_gpio6", "clk_div_aobus",
+         CLK_SET_RATE_PARENT, 0x160, 25, 0, },
+       { HI3660_PCLK_GATE_MMBUF, "pclk_gate_mmbuf", "pclk_div_mmbuf",
+         CLK_SET_RATE_PARENT, 0x170, 23, 0, },
+       { HI3660_CLK_GATE_DSS_AXI_MM, "clk_gate_dss_axi_mm", "aclk_mux_mmbuf",
+         CLK_SET_RATE_PARENT, 0x170, 24, 0, },
+};
+
+static const struct hisi_gate_clock hi3660_sctrl_gate_clks[] = {
+       { HI3660_PCLK_MMBUF_ANDGT, "pclk_mmbuf_andgt", "clk_sw_mmbuf",
+         CLK_SET_RATE_PARENT, 0x258, 7, CLK_GATE_HIWORD_MASK, },
+       { HI3660_CLK_MMBUF_PLL_ANDGT, "clk_mmbuf_pll_andgt", "clk_ppll0",
+         CLK_SET_RATE_PARENT, 0x260, 11, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_FLL_MMBUF_ANDGT, "clk_fll_mmbuf_andgt", "clk_fll_src",
+         CLK_SET_RATE_PARENT, 0x260, 12, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_SYS_MMBUF_ANDGT, "clk_sys_mmbuf_andgt", "clkin_sys",
+         CLK_SET_RATE_PARENT, 0x260, 13, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_GATE_PCIEPHY_GT, "clk_gate_pciephy_gt", "clk_ppll0",
+         CLK_SET_RATE_PARENT, 0x268, 11, CLK_DIVIDER_HIWORD_MASK, 0, },
+};
+
+static const char *const
+aclk_mux_mmbuf_p[] = {"aclk_div_mmbuf", "clk_gate_aomm",};
+static const char *const
+clk_sw_mmbuf_p[] = {"clk_sys_mmbuf_andgt", "clk_fll_mmbuf_andgt",
+                   "aclk_mux_mmbuf", "aclk_mux_mmbuf"};
+
+static const struct hisi_mux_clock hi3660_sctrl_mux_clks[] = {
+       { HI3660_ACLK_MUX_MMBUF, "aclk_mux_mmbuf", aclk_mux_mmbuf_p,
+         ARRAY_SIZE(aclk_mux_mmbuf_p), CLK_SET_RATE_PARENT, 0x250, 12, 1,
+         CLK_MUX_HIWORD_MASK, },
+       { HI3660_CLK_SW_MMBUF, "clk_sw_mmbuf", clk_sw_mmbuf_p,
+         ARRAY_SIZE(clk_sw_mmbuf_p), CLK_SET_RATE_PARENT, 0x258, 8, 2,
+         CLK_MUX_HIWORD_MASK, },
+};
+
+static const struct hisi_divider_clock hi3660_sctrl_divider_clks[] = {
+       { HI3660_CLK_DIV_AOBUS, "clk_div_aobus", "clk_ppll0",
+         CLK_SET_RATE_PARENT, 0x254, 0, 6, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_PCLK_DIV_MMBUF, "pclk_div_mmbuf", "pclk_mmbuf_andgt",
+         CLK_SET_RATE_PARENT, 0x258, 10, 2, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_ACLK_DIV_MMBUF, "aclk_div_mmbuf", "clk_mmbuf_pll_andgt",
+         CLK_SET_RATE_PARENT, 0x258, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+       { HI3660_CLK_DIV_PCIEPHY, "clk_div_pciephy", "clk_gate_pciephy_gt",
+         CLK_SET_RATE_PARENT, 0x268, 12, 4, CLK_DIVIDER_HIWORD_MASK, 0, },
+};
+
+/* clk_iomcu */
+static const struct hisi_gate_clock hi3660_iomcu_gate_sep_clks[] = {
+       { HI3660_CLK_I2C0_IOMCU, "clk_i2c0_iomcu", "clk_fll_src",
+         CLK_SET_RATE_PARENT, 0x10, 3, 0, },
+       { HI3660_CLK_I2C1_IOMCU, "clk_i2c1_iomcu", "clk_fll_src",
+         CLK_SET_RATE_PARENT, 0x10, 4, 0, },
+       { HI3660_CLK_I2C2_IOMCU, "clk_i2c2_iomcu", "clk_fll_src",
+         CLK_SET_RATE_PARENT, 0x10, 5, 0, },
+       { HI3660_CLK_I2C6_IOMCU, "clk_i2c6_iomcu", "clk_fll_src",
+         CLK_SET_RATE_PARENT, 0x10, 27, 0, },
+       { HI3660_CLK_IOMCU_PERI0, "iomcu_peri0", "clk_ppll0",
+         CLK_SET_RATE_PARENT, 0x90, 0, 0, },
+};
+
+static void hi3660_clk_iomcu_init(struct device_node *np)
+{
+       struct hisi_clock_data *clk_data;
+       int nr = ARRAY_SIZE(hi3660_iomcu_gate_sep_clks);
+
+       clk_data = hisi_clk_init(np, nr);
+       if (!clk_data)
+               return;
+
+       hisi_clk_register_gate_sep(hi3660_iomcu_gate_sep_clks,
+                                  ARRAY_SIZE(hi3660_iomcu_gate_sep_clks),
+                                  clk_data);
+}
+
+static void hi3660_clk_pmuctrl_init(struct device_node *np)
+{
+       struct hisi_clock_data *clk_data;
+       int nr = ARRAY_SIZE(hi3660_pmu_gate_clks);
+
+       clk_data = hisi_clk_init(np, nr);
+       if (!clk_data)
+               return;
+
+       hisi_clk_register_gate(hi3660_pmu_gate_clks,
+                              ARRAY_SIZE(hi3660_pmu_gate_clks), clk_data);
+}
+
+static void hi3660_clk_pctrl_init(struct device_node *np)
+{
+       struct hisi_clock_data *clk_data;
+       int nr = ARRAY_SIZE(hi3660_pctrl_gate_clks);
+
+       clk_data = hisi_clk_init(np, nr);
+       if (!clk_data)
+               return;
+       hisi_clk_register_gate(hi3660_pctrl_gate_clks,
+                              ARRAY_SIZE(hi3660_pctrl_gate_clks), clk_data);
+}
+
+static void hi3660_clk_sctrl_init(struct device_node *np)
+{
+       struct hisi_clock_data *clk_data;
+       int nr = ARRAY_SIZE(hi3660_sctrl_gate_clks) +
+                ARRAY_SIZE(hi3660_sctrl_gate_sep_clks) +
+                ARRAY_SIZE(hi3660_sctrl_mux_clks) +
+                ARRAY_SIZE(hi3660_sctrl_divider_clks);
+
+       clk_data = hisi_clk_init(np, nr);
+       if (!clk_data)
+               return;
+       hisi_clk_register_gate(hi3660_sctrl_gate_clks,
+                              ARRAY_SIZE(hi3660_sctrl_gate_clks), clk_data);
+       hisi_clk_register_gate_sep(hi3660_sctrl_gate_sep_clks,
+                                  ARRAY_SIZE(hi3660_sctrl_gate_sep_clks),
+                                  clk_data);
+       hisi_clk_register_mux(hi3660_sctrl_mux_clks,
+                             ARRAY_SIZE(hi3660_sctrl_mux_clks), clk_data);
+       hisi_clk_register_divider(hi3660_sctrl_divider_clks,
+                                 ARRAY_SIZE(hi3660_sctrl_divider_clks),
+                                 clk_data);
+}
+
+static void hi3660_clk_crgctrl_init(struct device_node *np)
+{
+       struct hisi_clock_data *clk_data;
+       int nr = ARRAY_SIZE(hi3660_fixed_rate_clks) +
+                ARRAY_SIZE(hi3660_crgctrl_gate_sep_clks) +
+                ARRAY_SIZE(hi3660_crgctrl_gate_clks) +
+                ARRAY_SIZE(hi3660_crgctrl_mux_clks) +
+                ARRAY_SIZE(hi3660_crg_fixed_factor_clks) +
+                ARRAY_SIZE(hi3660_crgctrl_divider_clks);
+
+       clk_data = hisi_clk_init(np, nr);
+       if (!clk_data)
+               return;
+
+       hisi_clk_register_fixed_rate(hi3660_fixed_rate_clks,
+                                    ARRAY_SIZE(hi3660_fixed_rate_clks),
+                                    clk_data);
+       hisi_clk_register_gate_sep(hi3660_crgctrl_gate_sep_clks,
+                                  ARRAY_SIZE(hi3660_crgctrl_gate_sep_clks),
+                                  clk_data);
+       hisi_clk_register_gate(hi3660_crgctrl_gate_clks,
+                              ARRAY_SIZE(hi3660_crgctrl_gate_clks),
+                              clk_data);
+       hisi_clk_register_mux(hi3660_crgctrl_mux_clks,
+                             ARRAY_SIZE(hi3660_crgctrl_mux_clks),
+                             clk_data);
+       hisi_clk_register_fixed_factor(hi3660_crg_fixed_factor_clks,
+                                      ARRAY_SIZE(hi3660_crg_fixed_factor_clks),
+                                      clk_data);
+       hisi_clk_register_divider(hi3660_crgctrl_divider_clks,
+                                 ARRAY_SIZE(hi3660_crgctrl_divider_clks),
+                                 clk_data);
+}
+
+static const struct of_device_id hi3660_clk_match_table[] = {
+       { .compatible = "hisilicon,hi3660-crgctrl",
+         .data = hi3660_clk_crgctrl_init },
+       { .compatible = "hisilicon,hi3660-pctrl",
+         .data = hi3660_clk_pctrl_init },
+       { .compatible = "hisilicon,hi3660-pmuctrl",
+         .data = hi3660_clk_pmuctrl_init },
+       { .compatible = "hisilicon,hi3660-sctrl",
+         .data = hi3660_clk_sctrl_init },
+       { .compatible = "hisilicon,hi3660-iomcu",
+         .data = hi3660_clk_iomcu_init },
+       { }
+};
+
+static int hi3660_clk_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *np = pdev->dev.of_node;
+       void (*init_func)(struct device_node *np);
+
+       init_func = of_device_get_match_data(dev);
+       if (!init_func)
+               return -ENODEV;
+
+       init_func(np);
+
+       return 0;
+}
+
+static struct platform_driver hi3660_clk_driver = {
+       .probe          = hi3660_clk_probe,
+       .driver         = {
+               .name   = "hi3660-clk",
+               .of_match_table = hi3660_clk_match_table,
+       },
+};
+
+static int __init hi3660_clk_init(void)
+{
+       return platform_driver_register(&hi3660_clk_driver);
+}
+core_initcall(hi3660_clk_init);
index a47812f56a17bb97d5a98521b67f1ace784ad9a4..7908bc3c9ec73c0bbc498197d774bef5075797cf 100644 (file)
@@ -120,6 +120,7 @@ struct clk *hisi_register_clkgate_sep(struct device *dev, const char *name,
        sclk->bit_idx = bit_idx;
        sclk->flags = clk_gate_flags;
        sclk->hw.init = &init;
+       sclk->lock = lock;
 
        clk = clk_register(dev, &sclk->hw);
        if (IS_ERR(clk))
index 42ffc1c92bab0ca95454eb4ebfff28c78aa19196..c07df719b8a35d16ed88014dcaca37c6d957d158 100644 (file)
@@ -592,15 +592,20 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 
        imx6q_mmdc_ch1_mask_handshake(base);
 
-       /*
-        * The LDB_DI0/1_SEL muxes are registered read-only due to a hardware
-        * bug. Set the muxes to the requested values before registering the
-        * ldb_di_sel clocks.
-        */
-       init_ldb_clks(np, base);
+       if (clk_on_imx6qp()) {
+               clk[IMX6QDL_CLK_LDB_DI0_SEL]      = imx_clk_mux_flags("ldb_di0_sel", base + 0x2c, 9,  3, ldb_di_sels,      ARRAY_SIZE(ldb_di_sels), CLK_SET_RATE_PARENT);
+               clk[IMX6QDL_CLK_LDB_DI1_SEL]      = imx_clk_mux_flags("ldb_di1_sel", base + 0x2c, 12, 3, ldb_di_sels,      ARRAY_SIZE(ldb_di_sels), CLK_SET_RATE_PARENT);
+       } else {
+               /*
+                * The LDB_DI0/1_SEL muxes are registered read-only due to a hardware
+                * bug. Set the muxes to the requested values before registering the
+                * ldb_di_sel clocks.
+                */
+               init_ldb_clks(np, base);
 
-       clk[IMX6QDL_CLK_LDB_DI0_SEL]      = imx_clk_mux_ldb("ldb_di0_sel", base + 0x2c, 9,  3, ldb_di_sels,      ARRAY_SIZE(ldb_di_sels));
-       clk[IMX6QDL_CLK_LDB_DI1_SEL]      = imx_clk_mux_ldb("ldb_di1_sel", base + 0x2c, 12, 3, ldb_di_sels,      ARRAY_SIZE(ldb_di_sels));
+               clk[IMX6QDL_CLK_LDB_DI0_SEL]      = imx_clk_mux_ldb("ldb_di0_sel", base + 0x2c, 9,  3, ldb_di_sels,      ARRAY_SIZE(ldb_di_sels));
+               clk[IMX6QDL_CLK_LDB_DI1_SEL]      = imx_clk_mux_ldb("ldb_di1_sel", base + 0x2c, 12, 3, ldb_di_sels,      ARRAY_SIZE(ldb_di_sels));
+       }
        clk[IMX6QDL_CLK_IPU1_DI0_PRE_SEL] = imx_clk_mux_flags("ipu1_di0_pre_sel", base + 0x34, 6,  3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
        clk[IMX6QDL_CLK_IPU1_DI1_PRE_SEL] = imx_clk_mux_flags("ipu1_di1_pre_sel", base + 0x34, 15, 3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
        clk[IMX6QDL_CLK_IPU2_DI0_PRE_SEL] = imx_clk_mux_flags("ipu2_di0_pre_sel", base + 0x38, 6,  3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
index e7c7353a86fc62f319a39c5a42cdc06214f4744c..ae1d31be906e4d5a169500f91b9fdca5b29b6ac2 100644 (file)
@@ -803,6 +803,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
        clks[IMX7D_DRAM_PHYM_ROOT_CLK] = imx_clk_gate4("dram_phym_root_clk", "dram_phym_cg", base + 0x4130, 0);
        clks[IMX7D_DRAM_PHYM_ALT_ROOT_CLK] = imx_clk_gate4("dram_phym_alt_root_clk", "dram_phym_alt_post_div", base + 0x4130, 0);
        clks[IMX7D_DRAM_ALT_ROOT_CLK] = imx_clk_gate4("dram_alt_root_clk", "dram_alt_post_div", base + 0x4130, 0);
+       clks[IMX7D_OCOTP_CLK] = imx_clk_gate4("ocotp_clk", "ipg_root_clk", base + 0x4230, 0);
        clks[IMX7D_USB_HSIC_ROOT_CLK] = imx_clk_gate4("usb_hsic_root_clk", "usb_hsic_post_div", base + 0x4420, 0);
        clks[IMX7D_SDMA_CORE_CLK] = imx_clk_gate4("sdma_root_clk", "ahb_root_clk", base + 0x4480, 0);
        clks[IMX7D_PCIE_CTRL_ROOT_CLK] = imx_clk_gate4("pcie_ctrl_root_clk", "pcie_ctrl_post_div", base + 0x4600, 0);
index ed3a2df536ea92e7960026f06db778585b75f2fb..f1099167ba3138732cf277e19912dfcf4f263f6c 100644 (file)
@@ -21,6 +21,9 @@
 #define PLL_NUM_OFFSET         0x10
 #define PLL_DENOM_OFFSET       0x20
 
+#define PLL_VF610_NUM_OFFSET   0x20
+#define PLL_VF610_DENOM_OFFSET 0x30
+
 #define BM_PLL_POWER           (0x1 << 12)
 #define BM_PLL_LOCK            (0x1 << 31)
 #define IMX7_ENET_PLL_POWER    (0x1 << 5)
@@ -300,6 +303,99 @@ static const struct clk_ops clk_pllv3_av_ops = {
        .set_rate       = clk_pllv3_av_set_rate,
 };
 
+struct clk_pllv3_vf610_mf {
+       u32 mfi;        /* integer part, can be 20 or 22 */
+       u32 mfn;        /* numerator, 30-bit value */
+       u32 mfd;        /* denominator, 30-bit value, must be less than mfn */
+};
+
+static unsigned long clk_pllv3_vf610_mf_to_rate(unsigned long parent_rate,
+               struct clk_pllv3_vf610_mf mf)
+{
+       u64 temp64;
+
+       temp64 = parent_rate;
+       temp64 *= mf.mfn;
+       do_div(temp64, mf.mfd);
+
+       return (parent_rate * mf.mfi) + temp64;
+}
+
+static struct clk_pllv3_vf610_mf clk_pllv3_vf610_rate_to_mf(
+               unsigned long parent_rate, unsigned long rate)
+{
+       struct clk_pllv3_vf610_mf mf;
+       u64 temp64;
+
+       mf.mfi = (rate >= 22 * parent_rate) ? 22 : 20;
+       mf.mfd = 0x3fffffff;    /* use max supported value for best accuracy */
+
+       if (rate <= parent_rate * mf.mfi)
+               mf.mfn = 0;
+       else if (rate >= parent_rate * (mf.mfi + 1))
+               mf.mfn = mf.mfd - 1;
+       else {
+               /* rate = parent_rate * (mfi + mfn/mfd) */
+               temp64 = rate - parent_rate * mf.mfi;
+               temp64 *= mf.mfd;
+               do_div(temp64, parent_rate);
+               mf.mfn = temp64;
+       }
+
+       return mf;
+}
+
+static unsigned long clk_pllv3_vf610_recalc_rate(struct clk_hw *hw,
+                                             unsigned long parent_rate)
+{
+       struct clk_pllv3 *pll = to_clk_pllv3(hw);
+       struct clk_pllv3_vf610_mf mf;
+
+       mf.mfn = readl_relaxed(pll->base + PLL_VF610_NUM_OFFSET);
+       mf.mfd = readl_relaxed(pll->base + PLL_VF610_DENOM_OFFSET);
+       mf.mfi = (readl_relaxed(pll->base) & pll->div_mask) ? 22 : 20;
+
+       return clk_pllv3_vf610_mf_to_rate(parent_rate, mf);
+}
+
+static long clk_pllv3_vf610_round_rate(struct clk_hw *hw, unsigned long rate,
+                                   unsigned long *prate)
+{
+       struct clk_pllv3_vf610_mf mf = clk_pllv3_vf610_rate_to_mf(*prate, rate);
+
+       return clk_pllv3_vf610_mf_to_rate(*prate, mf);
+}
+
+static int clk_pllv3_vf610_set_rate(struct clk_hw *hw, unsigned long rate,
+               unsigned long parent_rate)
+{
+       struct clk_pllv3 *pll = to_clk_pllv3(hw);
+       struct clk_pllv3_vf610_mf mf =
+                       clk_pllv3_vf610_rate_to_mf(parent_rate, rate);
+       u32 val;
+
+       val = readl_relaxed(pll->base);
+       if (mf.mfi == 20)
+               val &= ~pll->div_mask;  /* clear bit for mfi=20 */
+       else
+               val |= pll->div_mask;   /* set bit for mfi=22 */
+       writel_relaxed(val, pll->base);
+
+       writel_relaxed(mf.mfn, pll->base + PLL_VF610_NUM_OFFSET);
+       writel_relaxed(mf.mfd, pll->base + PLL_VF610_DENOM_OFFSET);
+
+       return clk_pllv3_wait_lock(pll);
+}
+
+static const struct clk_ops clk_pllv3_vf610_ops = {
+       .prepare        = clk_pllv3_prepare,
+       .unprepare      = clk_pllv3_unprepare,
+       .is_prepared    = clk_pllv3_is_prepared,
+       .recalc_rate    = clk_pllv3_vf610_recalc_rate,
+       .round_rate     = clk_pllv3_vf610_round_rate,
+       .set_rate       = clk_pllv3_vf610_set_rate,
+};
+
 static unsigned long clk_pllv3_enet_recalc_rate(struct clk_hw *hw,
                                                unsigned long parent_rate)
 {
@@ -334,6 +430,9 @@ struct clk *imx_clk_pllv3(enum imx_pllv3_type type, const char *name,
        case IMX_PLLV3_SYS:
                ops = &clk_pllv3_sys_ops;
                break;
+       case IMX_PLLV3_SYS_VF610:
+               ops = &clk_pllv3_vf610_ops;
+               break;
        case IMX_PLLV3_USB_VF610:
                pll->div_shift = 1;
        case IMX_PLLV3_USB:
index 0476353ab423f38155a4ed1c83e6b29af479c011..59b1863deb88863fcf9e1b113ea14303dfcb5f04 100644 (file)
@@ -219,8 +219,8 @@ static void __init vf610_clocks_init(struct device_node *ccm_node)
        clk[VF610_CLK_PLL6_BYPASS_SRC] = imx_clk_mux("pll6_bypass_src", PLL6_CTRL, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
        clk[VF610_CLK_PLL7_BYPASS_SRC] = imx_clk_mux("pll7_bypass_src", PLL7_CTRL, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
 
-       clk[VF610_CLK_PLL1] = imx_clk_pllv3(IMX_PLLV3_GENERIC, "pll1", "pll1_bypass_src", PLL1_CTRL, 0x1);
-       clk[VF610_CLK_PLL2] = imx_clk_pllv3(IMX_PLLV3_GENERIC, "pll2", "pll2_bypass_src", PLL2_CTRL, 0x1);
+       clk[VF610_CLK_PLL1] = imx_clk_pllv3(IMX_PLLV3_SYS_VF610, "pll1", "pll1_bypass_src", PLL1_CTRL, 0x1);
+       clk[VF610_CLK_PLL2] = imx_clk_pllv3(IMX_PLLV3_SYS_VF610, "pll2", "pll2_bypass_src", PLL2_CTRL, 0x1);
        clk[VF610_CLK_PLL3] = imx_clk_pllv3(IMX_PLLV3_USB_VF610,     "pll3", "pll3_bypass_src", PLL3_CTRL, 0x2);
        clk[VF610_CLK_PLL4] = imx_clk_pllv3(IMX_PLLV3_AV,      "pll4", "pll4_bypass_src", PLL4_CTRL, 0x7f);
        clk[VF610_CLK_PLL5] = imx_clk_pllv3(IMX_PLLV3_ENET,    "pll5", "pll5_bypass_src", PLL5_CTRL, 0x3);
index 4afad3b96a61cdf1a7e4ddca6529884ba78e3b96..e1f5e425db732b9fd5230bb34d26fa42311cb634 100644 (file)
@@ -34,6 +34,7 @@ enum imx_pllv3_type {
        IMX_PLLV3_AV,
        IMX_PLLV3_ENET,
        IMX_PLLV3_ENET_IMX7,
+       IMX_PLLV3_SYS_VF610,
 };
 
 struct clk *imx_clk_pllv3(enum imx_pllv3_type type, const char *name,
index 0bd631a41f6a9287d3b1ed668bd604df595748a6..a01ef7806aedc2e118c02c548d5d521313e319fb 100644 (file)
@@ -8,52 +8,53 @@ config COMMON_CLK_MEDIATEK
 
 config COMMON_CLK_MT2701
        bool "Clock driver for Mediatek MT2701"
+       depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST
        select COMMON_CLK_MEDIATEK
-       default ARCH_MEDIATEK
+       default ARCH_MEDIATEK && ARM
        ---help---
          This driver supports Mediatek MT2701 basic clocks.
 
 config COMMON_CLK_MT2701_MMSYS
        bool "Clock driver for Mediatek MT2701 mmsys"
-       select COMMON_CLK_MT2701
+       depends on COMMON_CLK_MT2701
        ---help---
          This driver supports Mediatek MT2701 mmsys clocks.
 
 config COMMON_CLK_MT2701_IMGSYS
        bool "Clock driver for Mediatek MT2701 imgsys"
-       select COMMON_CLK_MT2701
+       depends on COMMON_CLK_MT2701
        ---help---
          This driver supports Mediatek MT2701 imgsys clocks.
 
 config COMMON_CLK_MT2701_VDECSYS
        bool "Clock driver for Mediatek MT2701 vdecsys"
-       select COMMON_CLK_MT2701
+       depends on COMMON_CLK_MT2701
        ---help---
          This driver supports Mediatek MT2701 vdecsys clocks.
 
 config COMMON_CLK_MT2701_HIFSYS
        bool "Clock driver for Mediatek MT2701 hifsys"
-       select COMMON_CLK_MT2701
+       depends on COMMON_CLK_MT2701
        ---help---
          This driver supports Mediatek MT2701 hifsys clocks.
 
 config COMMON_CLK_MT2701_ETHSYS
        bool "Clock driver for Mediatek MT2701 ethsys"
-       select COMMON_CLK_MT2701
+       depends on COMMON_CLK_MT2701
        ---help---
          This driver supports Mediatek MT2701 ethsys clocks.
 
 config COMMON_CLK_MT2701_BDPSYS
        bool "Clock driver for Mediatek MT2701 bdpsys"
-       select COMMON_CLK_MT2701
+       depends on COMMON_CLK_MT2701
        ---help---
          This driver supports Mediatek MT2701 bdpsys clocks.
 
 config COMMON_CLK_MT8135
        bool "Clock driver for Mediatek MT8135"
-       depends on ARCH_MEDIATEK || COMPILE_TEST
+       depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST
        select COMMON_CLK_MEDIATEK
-       default ARCH_MEDIATEK
+       default ARCH_MEDIATEK && ARM
        ---help---
          This driver supports Mediatek MT8135 clocks.
 
index 3f1be46cbb33749aa1adbf9f073cfa957348f10c..888494d4fb8acb108ad40f271aa4a5d9dd709d3a 100644 (file)
@@ -607,7 +607,6 @@ static int meson8b_clkc_probe(struct platform_device *pdev)
        /* Populate the base address for the MPEG clks */
        meson8b_mpeg_clk_sel.reg = clk_base + (u32)meson8b_mpeg_clk_sel.reg;
        meson8b_mpeg_clk_div.reg = clk_base + (u32)meson8b_mpeg_clk_div.reg;
-       meson8b_clk81.reg = clk_base + (u32)meson8b_clk81.reg;
 
        /* Populate base address for gates */
        for (i = 0; i < ARRAY_SIZE(meson8b_clk_gates); i++)
index d9ae97fb43c45ffd9c528e2b1e094d060ce0473f..d71c7fd5da168d24f9a6f63deb4ca0ff00755e63 100644 (file)
@@ -9,7 +9,7 @@ obj-$(CONFIG_ARMADA_39X_CLK)    += armada-39x.o
 obj-$(CONFIG_ARMADA_37XX_CLK)  += armada-37xx-xtal.o
 obj-$(CONFIG_ARMADA_37XX_CLK)  += armada-37xx-tbg.o
 obj-$(CONFIG_ARMADA_37XX_CLK)  += armada-37xx-periph.o
-obj-$(CONFIG_ARMADA_XP_CLK)    += armada-xp.o
+obj-$(CONFIG_ARMADA_XP_CLK)    += armada-xp.o mv98dx3236.o
 obj-$(CONFIG_ARMADA_AP806_SYSCON) += ap806-system-controller.o
 obj-$(CONFIG_ARMADA_CP110_SYSCON) += cp110-system-controller.o
 obj-$(CONFIG_DOVE_CLK)         += dove.o dove-divider.o
index 8181b919f062c3b40ad51b6657840357b574baac..f17702107ac5e89e1c8467fb7ed68cd180b43010 100644 (file)
@@ -55,21 +55,39 @@ static int ap806_syscon_clk_probe(struct platform_device *pdev)
 
        freq_mode = reg & AP806_SAR_CLKFREQ_MODE_MASK;
        switch (freq_mode) {
-       case 0x0 ... 0x5:
+       case 0x0:
+       case 0x1:
                cpuclk_freq = 2000;
                break;
-       case 0x6 ... 0xB:
+       case 0x6:
+       case 0x7:
                cpuclk_freq = 1800;
                break;
-       case 0xC ... 0x11:
+       case 0x4:
+       case 0xB:
+       case 0xD:
                cpuclk_freq = 1600;
                break;
-       case 0x12 ... 0x16:
+       case 0x1a:
                cpuclk_freq = 1400;
                break;
-       case 0x17 ... 0x19:
+       case 0x14:
+       case 0x17:
                cpuclk_freq = 1300;
                break;
+       case 0x19:
+               cpuclk_freq = 1200;
+               break;
+       case 0x13:
+       case 0x1d:
+               cpuclk_freq = 1000;
+               break;
+       case 0x1c:
+               cpuclk_freq = 800;
+               break;
+       case 0x1b:
+               cpuclk_freq = 600;
+               break;
        default:
                dev_err(&pdev->dev, "invalid SAR value\n");
                return -EINVAL;
index b3094315a3c0faa89926dcf7442d5034f39b4f15..0ec44ae9a2a2676ea383925d3841cffa0dd93a67 100644 (file)
@@ -52,6 +52,12 @@ static u32 __init axp_get_tclk_freq(void __iomem *sar)
        return 250000000;
 }
 
+/* MV98DX3236 TCLK frequency is fixed to 200MHz */
+static u32 __init mv98dx3236_get_tclk_freq(void __iomem *sar)
+{
+       return 200000000;
+}
+
 static const u32 axp_cpu_freqs[] __initconst = {
        1000000000,
        1066000000,
@@ -89,6 +95,12 @@ static u32 __init axp_get_cpu_freq(void __iomem *sar)
        return cpu_freq;
 }
 
+/* MV98DX3236 CLK frequency is fixed to 800MHz */
+static u32 __init mv98dx3236_get_cpu_freq(void __iomem *sar)
+{
+       return 800000000;
+}
+
 static const int axp_nbclk_ratios[32][2] __initconst = {
        {0, 1}, {1, 2}, {2, 2}, {2, 2},
        {1, 2}, {1, 2}, {1, 1}, {2, 3},
@@ -158,6 +170,11 @@ static const struct coreclk_soc_desc axp_coreclks = {
        .num_ratios = ARRAY_SIZE(axp_coreclk_ratios),
 };
 
+static const struct coreclk_soc_desc mv98dx3236_coreclks = {
+       .get_tclk_freq = mv98dx3236_get_tclk_freq,
+       .get_cpu_freq = mv98dx3236_get_cpu_freq,
+};
+
 /*
  * Clock Gating Control
  */
@@ -195,6 +212,15 @@ static const struct clk_gating_soc_desc axp_gating_desc[] __initconst = {
        { }
 };
 
+static const struct clk_gating_soc_desc mv98dx3236_gating_desc[] __initconst = {
+       { "ge1", NULL, 3, 0 },
+       { "ge0", NULL, 4, 0 },
+       { "pex00", NULL, 5, 0 },
+       { "sdio", NULL, 17, 0 },
+       { "xor0", NULL, 22, 0 },
+       { }
+};
+
 static void __init axp_clk_init(struct device_node *np)
 {
        struct device_node *cgnp =
index d1e5863d337525ba5138c1505bca994e1d2b3268..8491979f40965e2dbfafa20c23c4b4c54f9b40fc 100644 (file)
@@ -71,6 +71,10 @@ static const struct clk_corediv_desc mvebu_corediv_desc[] = {
        { .mask = 0x3f, .offset = 8, .fieldbit = 1 }, /* NAND clock */
 };
 
+static const struct clk_corediv_desc mv98dx3236_corediv_desc[] = {
+       { .mask = 0x0f, .offset = 6, .fieldbit = 26 }, /* NAND clock */
+};
+
 #define to_corediv_clk(p) container_of(p, struct clk_corediv, hw)
 
 static int clk_corediv_is_enabled(struct clk_hw *hwclk)
@@ -232,6 +236,18 @@ static const struct clk_corediv_soc_desc armada375_corediv_soc = {
        .ratio_offset = 0x4,
 };
 
+static const struct clk_corediv_soc_desc mv98dx3236_corediv_soc = {
+       .descs = mv98dx3236_corediv_desc,
+       .ndescs = ARRAY_SIZE(mv98dx3236_corediv_desc),
+       .ops = {
+               .recalc_rate = clk_corediv_recalc_rate,
+               .round_rate = clk_corediv_round_rate,
+               .set_rate = clk_corediv_set_rate,
+       },
+       .ratio_reload = BIT(10),
+       .ratio_offset = 0x8,
+};
+
 static void __init
 mvebu_corediv_clk_init(struct device_node *node,
                       const struct clk_corediv_soc_desc *soc_desc)
@@ -313,3 +329,10 @@ static void __init armada380_corediv_clk_init(struct device_node *node)
 }
 CLK_OF_DECLARE(armada380_corediv_clk, "marvell,armada-380-corediv-clock",
               armada380_corediv_clk_init);
+
+static void __init mv98dx3236_corediv_clk_init(struct device_node *node)
+{
+       return mvebu_corediv_clk_init(node, &mv98dx3236_corediv_soc);
+}
+CLK_OF_DECLARE(mv98dx3236_corediv_clk, "marvell,mv98dx3236-corediv-clock",
+              mv98dx3236_corediv_clk_init);
index 5837eb8a212fbdcd8446ff9da77f393cc05c128a..044892b6534d93ed3427ea405f78117349136cf7 100644 (file)
@@ -245,3 +245,11 @@ cpuclk_out:
 
 CLK_OF_DECLARE(armada_xp_cpu_clock, "marvell,armada-xp-cpu-clock",
                                         of_cpu_clk_setup);
+
+static void __init of_mv98dx3236_cpu_clk_setup(struct device_node *node)
+{
+       of_clk_add_provider(node, of_clk_src_simple_get, NULL);
+}
+
+CLK_OF_DECLARE(mv98dx3236_cpu_clock, "marvell,mv98dx3236-cpu-clock",
+                                        of_mv98dx3236_cpu_clk_setup);
index 32e5b43c086f3c24c35550540d3ea3cca12675d9..6b11d7b3e0e090a9369ca264f08038c34cf8d0cd 100644 (file)
@@ -64,8 +64,11 @@ enum {
 #define CP110_GATE_NAND                        2
 #define CP110_GATE_PPV2                        3
 #define CP110_GATE_SDIO                        4
+#define CP110_GATE_MG                  5
+#define CP110_GATE_MG_CORE             6
 #define CP110_GATE_XOR1                        7
 #define CP110_GATE_XOR0                        8
+#define CP110_GATE_GOP_DP              9
 #define CP110_GATE_PCIE_X1_0           11
 #define CP110_GATE_PCIE_X1_1           12
 #define CP110_GATE_PCIE_X4             13
@@ -73,7 +76,7 @@ enum {
 #define CP110_GATE_SATA                        15
 #define CP110_GATE_SATA_USB            16
 #define CP110_GATE_MAIN                        17
-#define CP110_GATE_SDMMC               18
+#define CP110_GATE_SDMMC_GOP           18
 #define CP110_GATE_SLOW_IO             21
 #define CP110_GATE_USB3H0              22
 #define CP110_GATE_USB3H1              23
@@ -296,6 +299,11 @@ static int cp110_syscon_clk_probe(struct platform_device *pdev)
                                                      "gate-clock-output-names",
                                                      CP110_GATE_MAIN, &parent);
                        break;
+               case CP110_GATE_MG:
+                       of_property_read_string_index(np,
+                                                     "gate-clock-output-names",
+                                                     CP110_GATE_MG_CORE, &parent);
+                       break;
                case CP110_GATE_NAND:
                        parent = nand_name;
                        break;
@@ -303,9 +311,10 @@ static int cp110_syscon_clk_probe(struct platform_device *pdev)
                        parent = ppv2_name;
                        break;
                case CP110_GATE_SDIO:
+               case CP110_GATE_GOP_DP:
                        of_property_read_string_index(np,
                                                      "gate-clock-output-names",
-                                                     CP110_GATE_SDMMC, &parent);
+                                                     CP110_GATE_SDMMC_GOP, &parent);
                        break;
                case CP110_GATE_XOR1:
                case CP110_GATE_XOR0:
diff --git a/drivers/clk/mvebu/mv98dx3236.c b/drivers/clk/mvebu/mv98dx3236.c
new file mode 100644 (file)
index 0000000..6e203af
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Marvell MV98DX3236 SoC clocks
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ * Andrew Lunn <andrew@lunn.ch>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/clk-provider.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include "common.h"
+
+
+/*
+ * For 98DX4251 Sample At Reset the CPU, DDR and Main PLL clocks are all
+ * defined at the same time
+ *
+ * SAR1[20:18]   : CPU frequency    DDR frequency   MPLL frequency
+ *              0  =  400 MHz      400 MHz         800 MHz
+ *              2  =  667 MHz      667 MHz         2000 MHz
+ *              3  =  800 MHz      800 MHz         1600 MHz
+ *              others reserved.
+ *
+ * For 98DX3236 Sample At Reset the CPU, DDR and Main PLL clocks are all
+ * defined at the same time
+ *
+ * SAR1[20:18]   : CPU frequency    DDR frequency   MPLL frequency
+ *              1  =  667 MHz      667 MHz         2000 MHz
+ *              2  =  400 MHz      400 MHz         400 MHz
+ *              3  =  800 MHz      800 MHz         800 MHz
+ *              5  =  800 MHz      400 MHz         800 MHz
+ *              others reserved.
+ */
+
+#define SAR1_MV98DX3236_CPU_DDR_MPLL_FREQ_OPT          18
+#define SAR1_MV98DX3236_CPU_DDR_MPLL_FREQ_OPT_MASK     0x7
+
+static u32 __init mv98dx3236_get_tclk_freq(void __iomem *sar)
+{
+       /* Tclk = 200MHz, no SaR dependency */
+       return 200000000;
+}
+
+static const u32 mv98dx3236_cpu_frequencies[] __initconst = {
+       0,
+       667000000,
+       400000000,
+       800000000,
+       0,
+       800000000,
+       0, 0,
+};
+
+static const u32 mv98dx4251_cpu_frequencies[] __initconst = {
+       400000000,
+       0,
+       667000000,
+       800000000,
+       0, 0, 0, 0,
+};
+
+static u32 __init mv98dx3236_get_cpu_freq(void __iomem *sar)
+{
+       u32 cpu_freq = 0;
+       u8 cpu_freq_select = 0;
+
+       cpu_freq_select = ((readl(sar) >> SAR1_MV98DX3236_CPU_DDR_MPLL_FREQ_OPT) &
+                          SAR1_MV98DX3236_CPU_DDR_MPLL_FREQ_OPT_MASK);
+
+       if (of_machine_is_compatible("marvell,armadaxp-98dx4251"))
+               cpu_freq = mv98dx4251_cpu_frequencies[cpu_freq_select];
+       else if (of_machine_is_compatible("marvell,armadaxp-98dx3236"))
+               cpu_freq = mv98dx3236_cpu_frequencies[cpu_freq_select];
+
+       if (!cpu_freq)
+               pr_err("CPU freq select unsupported %d\n", cpu_freq_select);
+
+       return cpu_freq;
+}
+
+enum {
+       MV98DX3236_CPU_TO_DDR,
+       MV98DX3236_CPU_TO_MPLL
+};
+
+static const struct coreclk_ratio mv98dx3236_core_ratios[] __initconst = {
+       { .id = MV98DX3236_CPU_TO_DDR, .name = "ddrclk" },
+       { .id = MV98DX3236_CPU_TO_MPLL, .name = "mpll" },
+};
+
+static const int __initconst mv98dx3236_cpu_mpll_ratios[8][2] = {
+       {0, 1}, {3, 1}, {1, 1}, {1, 1},
+       {0, 1}, {1, 1}, {0, 1}, {0, 1},
+};
+
+static const int __initconst mv98dx3236_cpu_ddr_ratios[8][2] = {
+       {0, 1}, {1, 1}, {1, 1}, {1, 1},
+       {0, 1}, {1, 2}, {0, 1}, {0, 1},
+};
+
+static const int __initconst mv98dx4251_cpu_mpll_ratios[8][2] = {
+       {2, 1}, {0, 1}, {3, 1}, {2, 1},
+       {0, 1}, {0, 1}, {0, 1}, {0, 1},
+};
+
+static const int __initconst mv98dx4251_cpu_ddr_ratios[8][2] = {
+       {1, 1}, {0, 1}, {1, 1}, {1, 1},
+       {0, 1}, {0, 1}, {0, 1}, {0, 1},
+};
+
+static void __init mv98dx3236_get_clk_ratio(
+       void __iomem *sar, int id, int *mult, int *div)
+{
+       u32 opt = ((readl(sar) >> SAR1_MV98DX3236_CPU_DDR_MPLL_FREQ_OPT) &
+               SAR1_MV98DX3236_CPU_DDR_MPLL_FREQ_OPT_MASK);
+
+       switch (id) {
+       case MV98DX3236_CPU_TO_DDR:
+               if (of_machine_is_compatible("marvell,armadaxp-98dx4251")) {
+                       *mult = mv98dx4251_cpu_ddr_ratios[opt][0];
+                       *div = mv98dx4251_cpu_ddr_ratios[opt][1];
+               } else if (of_machine_is_compatible("marvell,armadaxp-98dx3236")) {
+                       *mult = mv98dx3236_cpu_ddr_ratios[opt][0];
+                       *div = mv98dx3236_cpu_ddr_ratios[opt][1];
+               }
+               break;
+       case MV98DX3236_CPU_TO_MPLL:
+               if (of_machine_is_compatible("marvell,armadaxp-98dx4251")) {
+                       *mult = mv98dx4251_cpu_mpll_ratios[opt][0];
+                       *div = mv98dx4251_cpu_mpll_ratios[opt][1];
+               } else if (of_machine_is_compatible("marvell,armadaxp-98dx3236")) {
+                       *mult = mv98dx3236_cpu_mpll_ratios[opt][0];
+                       *div = mv98dx3236_cpu_mpll_ratios[opt][1];
+               }
+               break;
+       }
+}
+
+static const struct coreclk_soc_desc mv98dx3236_core_clocks = {
+       .get_tclk_freq = mv98dx3236_get_tclk_freq,
+       .get_cpu_freq = mv98dx3236_get_cpu_freq,
+       .get_clk_ratio = mv98dx3236_get_clk_ratio,
+       .ratios = mv98dx3236_core_ratios,
+       .num_ratios = ARRAY_SIZE(mv98dx3236_core_ratios),
+};
+
+
+/*
+ * Clock Gating Control
+ */
+
+static const struct clk_gating_soc_desc mv98dx3236_gating_desc[] __initconst = {
+       { "ge1", NULL, 3, 0 },
+       { "ge0", NULL, 4, 0 },
+       { "pex00", NULL, 5, 0 },
+       { "sdio", NULL, 17, 0 },
+       { "usb0", NULL, 18, 0 },
+       { "xor0", NULL, 22, 0 },
+       { }
+};
+
+static void __init mv98dx3236_clk_init(struct device_node *np)
+{
+       struct device_node *cgnp =
+               of_find_compatible_node(NULL, NULL, "marvell,mv98dx3236-gating-clock");
+
+       mvebu_coreclk_setup(np, &mv98dx3236_core_clocks);
+
+       if (cgnp)
+               mvebu_clk_gating_setup(cgnp, mv98dx3236_gating_desc);
+}
+CLK_OF_DECLARE(mv98dx3236_clk, "marvell,mv98dx3236-core-clock", mv98dx3236_clk_init);
index 07e2cc6ed7813fb8c59b64b01090efdff626827b..3487c267833e40afbed8e31ecc93ed859785fa90 100644 (file)
@@ -462,8 +462,79 @@ static const struct rpm_smd_clk_desc rpm_clk_msm8916 = {
        .num_clks = ARRAY_SIZE(msm8916_clks),
 };
 
+/* msm8974 */
+DEFINE_CLK_SMD_RPM(msm8974, pnoc_clk, pnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 0);
+DEFINE_CLK_SMD_RPM(msm8974, snoc_clk, snoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 1);
+DEFINE_CLK_SMD_RPM(msm8974, cnoc_clk, cnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 2);
+DEFINE_CLK_SMD_RPM(msm8974, mmssnoc_ahb_clk, mmssnoc_ahb_a_clk, QCOM_SMD_RPM_BUS_CLK, 3);
+DEFINE_CLK_SMD_RPM(msm8974, bimc_clk, bimc_a_clk, QCOM_SMD_RPM_MEM_CLK, 0);
+DEFINE_CLK_SMD_RPM(msm8974, gfx3d_clk_src, gfx3d_a_clk_src, QCOM_SMD_RPM_MEM_CLK, 1);
+DEFINE_CLK_SMD_RPM(msm8974, ocmemgx_clk, ocmemgx_a_clk, QCOM_SMD_RPM_MEM_CLK, 2);
+DEFINE_CLK_SMD_RPM_QDSS(msm8974, qdss_clk, qdss_a_clk, QCOM_SMD_RPM_MISC_CLK, 1);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8974, cxo_d0, cxo_d0_a, 1);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8974, cxo_d1, cxo_d1_a, 2);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8974, cxo_a0, cxo_a0_a, 4);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8974, cxo_a1, cxo_a1_a, 5);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8974, cxo_a2, cxo_a2_a, 6);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8974, diff_clk, diff_a_clk, 7);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8974, div_clk1, div_a_clk1, 11);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8974, div_clk2, div_a_clk2, 12);
+DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8974, cxo_d0_pin, cxo_d0_a_pin, 1);
+DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8974, cxo_d1_pin, cxo_d1_a_pin, 2);
+DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8974, cxo_a0_pin, cxo_a0_a_pin, 4);
+DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8974, cxo_a1_pin, cxo_a1_a_pin, 5);
+DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8974, cxo_a2_pin, cxo_a2_a_pin, 6);
+
+static struct clk_smd_rpm *msm8974_clks[] = {
+       [RPM_SMD_PNOC_CLK]              = &msm8974_pnoc_clk,
+       [RPM_SMD_PNOC_A_CLK]            = &msm8974_pnoc_a_clk,
+       [RPM_SMD_SNOC_CLK]              = &msm8974_snoc_clk,
+       [RPM_SMD_SNOC_A_CLK]            = &msm8974_snoc_a_clk,
+       [RPM_SMD_CNOC_CLK]              = &msm8974_cnoc_clk,
+       [RPM_SMD_CNOC_A_CLK]            = &msm8974_cnoc_a_clk,
+       [RPM_SMD_MMSSNOC_AHB_CLK]       = &msm8974_mmssnoc_ahb_clk,
+       [RPM_SMD_MMSSNOC_AHB_A_CLK]     = &msm8974_mmssnoc_ahb_a_clk,
+       [RPM_SMD_BIMC_CLK]              = &msm8974_bimc_clk,
+       [RPM_SMD_BIMC_A_CLK]            = &msm8974_bimc_a_clk,
+       [RPM_SMD_OCMEMGX_CLK]           = &msm8974_ocmemgx_clk,
+       [RPM_SMD_OCMEMGX_A_CLK]         = &msm8974_ocmemgx_a_clk,
+       [RPM_SMD_QDSS_CLK]              = &msm8974_qdss_clk,
+       [RPM_SMD_QDSS_A_CLK]            = &msm8974_qdss_a_clk,
+       [RPM_SMD_CXO_D0]                = &msm8974_cxo_d0,
+       [RPM_SMD_CXO_D0_A]              = &msm8974_cxo_d0_a,
+       [RPM_SMD_CXO_D1]                = &msm8974_cxo_d1,
+       [RPM_SMD_CXO_D1_A]              = &msm8974_cxo_d1_a,
+       [RPM_SMD_CXO_A0]                = &msm8974_cxo_a0,
+       [RPM_SMD_CXO_A0_A]              = &msm8974_cxo_a0_a,
+       [RPM_SMD_CXO_A1]                = &msm8974_cxo_a1,
+       [RPM_SMD_CXO_A1_A]              = &msm8974_cxo_a1_a,
+       [RPM_SMD_CXO_A2]                = &msm8974_cxo_a2,
+       [RPM_SMD_CXO_A2_A]              = &msm8974_cxo_a2_a,
+       [RPM_SMD_DIFF_CLK]              = &msm8974_diff_clk,
+       [RPM_SMD_DIFF_A_CLK]            = &msm8974_diff_a_clk,
+       [RPM_SMD_DIV_CLK1]              = &msm8974_div_clk1,
+       [RPM_SMD_DIV_A_CLK1]            = &msm8974_div_a_clk1,
+       [RPM_SMD_DIV_CLK2]              = &msm8974_div_clk2,
+       [RPM_SMD_DIV_A_CLK2]            = &msm8974_div_a_clk2,
+       [RPM_SMD_CXO_D0_PIN]            = &msm8974_cxo_d0_pin,
+       [RPM_SMD_CXO_D0_A_PIN]          = &msm8974_cxo_d0_a_pin,
+       [RPM_SMD_CXO_D1_PIN]            = &msm8974_cxo_d1_pin,
+       [RPM_SMD_CXO_D1_A_PIN]          = &msm8974_cxo_d1_a_pin,
+       [RPM_SMD_CXO_A0_PIN]            = &msm8974_cxo_a0_pin,
+       [RPM_SMD_CXO_A0_A_PIN]          = &msm8974_cxo_a0_a_pin,
+       [RPM_SMD_CXO_A1_PIN]            = &msm8974_cxo_a1_pin,
+       [RPM_SMD_CXO_A1_A_PIN]          = &msm8974_cxo_a1_a_pin,
+       [RPM_SMD_CXO_A2_PIN]            = &msm8974_cxo_a2_pin,
+       [RPM_SMD_CXO_A2_A_PIN]          = &msm8974_cxo_a2_a_pin,
+};
+
+static const struct rpm_smd_clk_desc rpm_clk_msm8974 = {
+       .clks = msm8974_clks,
+       .num_clks = ARRAY_SIZE(msm8974_clks),
+};
 static const struct of_device_id rpm_smd_clk_match_table[] = {
        { .compatible = "qcom,rpmcc-msm8916", .data = &rpm_clk_msm8916 },
+       { .compatible = "qcom,rpmcc-msm8974", .data = &rpm_clk_msm8974 },
        { }
 };
 MODULE_DEVICE_TABLE(of, rpm_smd_clk_match_table);
index cfab7b400381ad467f39d9628930745f6345cce4..03f9d316f969126c201a42227180103b7d97903d 100644 (file)
@@ -145,7 +145,6 @@ static int _qcom_cc_register_board_clk(struct device *dev, const char *path,
        clocks_node = of_find_node_by_path("/clocks");
        if (clocks_node)
                node = of_find_node_by_name(clocks_node, path);
-       of_node_put(clocks_node);
 
        if (!node) {
                fixed = devm_kzalloc(dev, sizeof(*fixed), GFP_KERNEL);
index 33d09138f5e5b3b3c553a5e7d4f4787feb0b5507..46cb256b4aa23919e47fcf79e60b8e3103d680c0 100644 (file)
@@ -20,6 +20,9 @@
 #include <linux/clk-provider.h>
 #include <linux/regmap.h>
 #include <linux/reset-controller.h>
+#include <linux/math64.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
 
 #include <dt-bindings/clock/qcom,gcc-ipq4019.h>
 
 #include "clk-rcg.h"
 #include "clk-branch.h"
 #include "reset.h"
+#include "clk-regmap-divider.h"
+
+#define to_clk_regmap_div(_hw) container_of(to_clk_regmap(_hw),\
+                                       struct clk_regmap_div, clkr)
+
+#define to_clk_fepll(_hw) container_of(to_clk_regmap_div(_hw),\
+                                               struct clk_fepll, cdiv)
 
 enum {
        P_XO,
@@ -40,6 +50,41 @@ enum {
        P_DDRPLLAPSS,
 };
 
+/*
+ * struct clk_fepll_vco - vco feedback divider corresponds for FEPLL clocks
+ * @fdbkdiv_shift: lowest bit for FDBKDIV
+ * @fdbkdiv_width: number of bits in FDBKDIV
+ * @refclkdiv_shift: lowest bit for REFCLKDIV
+ * @refclkdiv_width: number of bits in REFCLKDIV
+ * @reg: PLL_DIV register address
+ */
+struct clk_fepll_vco {
+       u32 fdbkdiv_shift;
+       u32 fdbkdiv_width;
+       u32 refclkdiv_shift;
+       u32 refclkdiv_width;
+       u32 reg;
+};
+
+/*
+ * struct clk_fepll - clk divider corresponds to FEPLL clocks
+ * @fixed_div: fixed divider value if divider is fixed
+ * @parent_map: map from software's parent index to hardware's src_sel field
+ * @cdiv: divider values for PLL_DIV
+ * @pll_vco: vco feedback divider
+ * @div_table: mapping for actual divider value to register divider value
+ *             in case of non fixed divider
+ * @freq_tbl: frequency table
+ */
+struct clk_fepll {
+       u32 fixed_div;
+       const u8 *parent_map;
+       struct clk_regmap_div cdiv;
+       const struct clk_fepll_vco *pll_vco;
+       const struct clk_div_table *div_table;
+       const struct freq_tbl *freq_tbl;
+};
+
 static struct parent_map gcc_xo_200_500_map[] = {
        { P_XO, 0 },
        { P_FEPLL200, 1 },
@@ -80,7 +125,7 @@ static struct parent_map gcc_xo_sdcc1_500_map[] = {
 
 static const char * const gcc_xo_sdcc1_500[] = {
        "xo",
-       "ddrpll",
+       "ddrpllsdcc",
        "fepll500",
 };
 
@@ -121,6 +166,12 @@ static struct parent_map gcc_xo_ddr_500_200_map[] = {
        {  P_DDRPLLAPSS, 1 },
 };
 
+/*
+ * Contains index for safe clock during APSS freq change.
+ * fepll500 is being used as safe clock so initialize it
+ * with its index in parents list gcc_xo_ddr_500_200.
+ */
+static const int gcc_ipq4019_cpu_safe_parent = 2;
 static const char * const gcc_xo_ddr_500_200[] = {
        "xo",
        "fepll200",
@@ -505,7 +556,7 @@ static const struct freq_tbl ftbl_gcc_sdcc1_apps_clk[] = {
        F(25000000,  P_FEPLL500,                1,  1, 20),
        F(50000000,  P_FEPLL500,                1,  1, 10),
        F(100000000, P_FEPLL500,                1,  1, 5),
-       F(193000000, P_DDRPLL,          1,  0, 0),
+       F(192000000, P_DDRPLL,                  1,  0, 0),
        { }
 };
 
@@ -524,10 +575,20 @@ static struct clk_rcg2  sdcc1_apps_clk_src = {
 };
 
 static const struct freq_tbl ftbl_gcc_apps_clk[] = {
-       F(48000000, P_XO,          1, 0, 0),
+       F(48000000,  P_XO,         1, 0, 0),
        F(200000000, P_FEPLL200,   1, 0, 0),
+       F(384000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(413000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(448000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(488000000, P_DDRPLLAPSS, 1, 0, 0),
        F(500000000, P_FEPLL500,   1, 0, 0),
-       F(626000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(512000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(537000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(565000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(597000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(632000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(672000000, P_DDRPLLAPSS, 1, 0, 0),
+       F(716000000, P_DDRPLLAPSS, 1, 0, 0),
        { }
 };
 
@@ -541,6 +602,7 @@ static struct clk_rcg2 apps_clk_src = {
                .parent_names = gcc_xo_ddr_500_200,
                .num_parents = 4,
                .ops = &clk_rcg2_ops,
+               .flags = CLK_SET_RATE_PARENT,
        },
 };
 
@@ -1154,6 +1216,364 @@ static struct clk_branch gcc_wcss5g_rtc_clk = {
        },
 };
 
+/* Calculates the VCO rate for FEPLL. */
+static u64 clk_fepll_vco_calc_rate(struct clk_fepll *pll_div,
+                                  unsigned long parent_rate)
+{
+       const struct clk_fepll_vco *pll_vco = pll_div->pll_vco;
+       u32 fdbkdiv, refclkdiv, cdiv;
+       u64 vco;
+
+       regmap_read(pll_div->cdiv.clkr.regmap, pll_vco->reg, &cdiv);
+       refclkdiv = (cdiv >> pll_vco->refclkdiv_shift) &
+                   (BIT(pll_vco->refclkdiv_width) - 1);
+       fdbkdiv = (cdiv >> pll_vco->fdbkdiv_shift) &
+                 (BIT(pll_vco->fdbkdiv_width) - 1);
+
+       vco = parent_rate / refclkdiv;
+       vco *= 2;
+       vco *= fdbkdiv;
+
+       return vco;
+}
+
+static const struct clk_fepll_vco gcc_apss_ddrpll_vco = {
+       .fdbkdiv_shift = 16,
+       .fdbkdiv_width = 8,
+       .refclkdiv_shift = 24,
+       .refclkdiv_width = 5,
+       .reg = 0x2e020,
+};
+
+static const struct clk_fepll_vco gcc_fepll_vco = {
+       .fdbkdiv_shift = 16,
+       .fdbkdiv_width = 8,
+       .refclkdiv_shift = 24,
+       .refclkdiv_width = 5,
+       .reg = 0x2f020,
+};
+
+/*
+ * Round rate function for APSS CPU PLL Clock divider.
+ * It looks up the frequency table and returns the next higher frequency
+ * supported in hardware.
+ */
+static long clk_cpu_div_round_rate(struct clk_hw *hw, unsigned long rate,
+                                  unsigned long *p_rate)
+{
+       struct clk_fepll *pll = to_clk_fepll(hw);
+       struct clk_hw *p_hw;
+       const struct freq_tbl *f;
+
+       f = qcom_find_freq(pll->freq_tbl, rate);
+       if (!f)
+               return -EINVAL;
+
+       p_hw = clk_hw_get_parent_by_index(hw, f->src);
+       *p_rate = clk_hw_get_rate(p_hw);
+
+       return f->freq;
+};
+
+/*
+ * Clock set rate function for APSS CPU PLL Clock divider.
+ * It looks up the frequency table and updates the PLL divider to corresponding
+ * divider value.
+ */
+static int clk_cpu_div_set_rate(struct clk_hw *hw, unsigned long rate,
+                               unsigned long parent_rate)
+{
+       struct clk_fepll *pll = to_clk_fepll(hw);
+       const struct freq_tbl *f;
+       u32 mask;
+       int ret;
+
+       f = qcom_find_freq(pll->freq_tbl, rate);
+       if (!f)
+               return -EINVAL;
+
+       mask = (BIT(pll->cdiv.width) - 1) << pll->cdiv.shift;
+       ret = regmap_update_bits(pll->cdiv.clkr.regmap,
+                                pll->cdiv.reg, mask,
+                                f->pre_div << pll->cdiv.shift);
+       /*
+        * There is no status bit which can be checked for successful CPU
+        * divider update operation so using delay for the same.
+        */
+       udelay(1);
+
+       return 0;
+};
+
+/*
+ * Clock frequency calculation function for APSS CPU PLL Clock divider.
+ * This clock divider is nonlinear so this function calculates the actual
+ * divider and returns the output frequency by dividing VCO Frequency
+ * with this actual divider value.
+ */
+static unsigned long
+clk_cpu_div_recalc_rate(struct clk_hw *hw,
+                       unsigned long parent_rate)
+{
+       struct clk_fepll *pll = to_clk_fepll(hw);
+       u32 cdiv, pre_div;
+       u64 rate;
+
+       regmap_read(pll->cdiv.clkr.regmap, pll->cdiv.reg, &cdiv);
+       cdiv = (cdiv >> pll->cdiv.shift) & (BIT(pll->cdiv.width) - 1);
+
+       /*
+        * Some dividers have value in 0.5 fraction so multiply both VCO
+        * frequency(parent_rate) and pre_div with 2 to make integer
+        * calculation.
+        */
+       if (cdiv > 10)
+               pre_div = (cdiv + 1) * 2;
+       else
+               pre_div = cdiv + 12;
+
+       rate = clk_fepll_vco_calc_rate(pll, parent_rate) * 2;
+       do_div(rate, pre_div);
+
+       return rate;
+};
+
+static const struct clk_ops clk_regmap_cpu_div_ops = {
+       .round_rate = clk_cpu_div_round_rate,
+       .set_rate = clk_cpu_div_set_rate,
+       .recalc_rate = clk_cpu_div_recalc_rate,
+};
+
+static const struct freq_tbl ftbl_apss_ddr_pll[] = {
+       { 384000000, P_XO, 0xd, 0, 0 },
+       { 413000000, P_XO, 0xc, 0, 0 },
+       { 448000000, P_XO, 0xb, 0, 0 },
+       { 488000000, P_XO, 0xa, 0, 0 },
+       { 512000000, P_XO, 0x9, 0, 0 },
+       { 537000000, P_XO, 0x8, 0, 0 },
+       { 565000000, P_XO, 0x7, 0, 0 },
+       { 597000000, P_XO, 0x6, 0, 0 },
+       { 632000000, P_XO, 0x5, 0, 0 },
+       { 672000000, P_XO, 0x4, 0, 0 },
+       { 716000000, P_XO, 0x3, 0, 0 },
+       { 768000000, P_XO, 0x2, 0, 0 },
+       { 823000000, P_XO, 0x1, 0, 0 },
+       { 896000000, P_XO, 0x0, 0, 0 },
+       { }
+};
+
+static struct clk_fepll gcc_apss_cpu_plldiv_clk = {
+       .cdiv.reg = 0x2e020,
+       .cdiv.shift = 4,
+       .cdiv.width = 4,
+       .cdiv.clkr = {
+               .enable_reg = 0x2e000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "ddrpllapss",
+                       .parent_names = (const char *[]){
+                               "xo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_regmap_cpu_div_ops,
+               },
+       },
+       .freq_tbl = ftbl_apss_ddr_pll,
+       .pll_vco = &gcc_apss_ddrpll_vco,
+};
+
+/* Calculates the rate for PLL divider.
+ * If the divider value is not fixed then it gets the actual divider value
+ * from divider table. Then, it calculate the clock rate by dividing the
+ * parent rate with actual divider value.
+ */
+static unsigned long
+clk_regmap_clk_div_recalc_rate(struct clk_hw *hw,
+                              unsigned long parent_rate)
+{
+       struct clk_fepll *pll = to_clk_fepll(hw);
+       u32 cdiv, pre_div = 1;
+       u64 rate;
+       const struct clk_div_table *clkt;
+
+       if (pll->fixed_div) {
+               pre_div = pll->fixed_div;
+       } else {
+               regmap_read(pll->cdiv.clkr.regmap, pll->cdiv.reg, &cdiv);
+               cdiv = (cdiv >> pll->cdiv.shift) & (BIT(pll->cdiv.width) - 1);
+
+               for (clkt = pll->div_table; clkt->div; clkt++) {
+                       if (clkt->val == cdiv)
+                               pre_div = clkt->div;
+               }
+       }
+
+       rate = clk_fepll_vco_calc_rate(pll, parent_rate);
+       do_div(rate, pre_div);
+
+       return rate;
+};
+
+static const struct clk_ops clk_fepll_div_ops = {
+       .recalc_rate = clk_regmap_clk_div_recalc_rate,
+};
+
+static struct clk_fepll gcc_apss_sdcc_clk = {
+       .fixed_div = 28,
+       .cdiv.clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "ddrpllsdcc",
+                       .parent_names = (const char *[]){
+                               "xo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_fepll_div_ops,
+               },
+       },
+       .pll_vco = &gcc_apss_ddrpll_vco,
+};
+
+static struct clk_fepll gcc_fepll125_clk = {
+       .fixed_div = 32,
+       .cdiv.clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "fepll125",
+                       .parent_names = (const char *[]){
+                               "xo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_fepll_div_ops,
+               },
+       },
+       .pll_vco = &gcc_fepll_vco,
+};
+
+static struct clk_fepll gcc_fepll125dly_clk = {
+       .fixed_div = 32,
+       .cdiv.clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "fepll125dly",
+                       .parent_names = (const char *[]){
+                               "xo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_fepll_div_ops,
+               },
+       },
+       .pll_vco = &gcc_fepll_vco,
+};
+
+static struct clk_fepll gcc_fepll200_clk = {
+       .fixed_div = 20,
+       .cdiv.clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "fepll200",
+                       .parent_names = (const char *[]){
+                               "xo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_fepll_div_ops,
+               },
+       },
+       .pll_vco = &gcc_fepll_vco,
+};
+
+static struct clk_fepll gcc_fepll500_clk = {
+       .fixed_div = 8,
+       .cdiv.clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "fepll500",
+                       .parent_names = (const char *[]){
+                               "xo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_fepll_div_ops,
+               },
+       },
+       .pll_vco = &gcc_fepll_vco,
+};
+
+static const struct clk_div_table fepllwcss_clk_div_table[] = {
+       { 0, 15 },
+       { 1, 16 },
+       { 2, 18 },
+       { 3, 20 },
+       { },
+};
+
+static struct clk_fepll gcc_fepllwcss2g_clk = {
+       .cdiv.reg = 0x2f020,
+       .cdiv.shift = 8,
+       .cdiv.width = 2,
+       .cdiv.clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "fepllwcss2g",
+                       .parent_names = (const char *[]){
+                               "xo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_fepll_div_ops,
+               },
+       },
+       .div_table = fepllwcss_clk_div_table,
+       .pll_vco = &gcc_fepll_vco,
+};
+
+static struct clk_fepll gcc_fepllwcss5g_clk = {
+       .cdiv.reg = 0x2f020,
+       .cdiv.shift = 12,
+       .cdiv.width = 2,
+       .cdiv.clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "fepllwcss5g",
+                       .parent_names = (const char *[]){
+                               "xo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_fepll_div_ops,
+               },
+       },
+       .div_table = fepllwcss_clk_div_table,
+       .pll_vco = &gcc_fepll_vco,
+};
+
+static const struct freq_tbl ftbl_gcc_pcnoc_ahb_clk[] = {
+       F(48000000,  P_XO,       1, 0, 0),
+       F(100000000, P_FEPLL200, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_pcnoc_ahb_clk_src = {
+       .cmd_rcgr = 0x21024,
+       .hid_width = 5,
+       .parent_map = gcc_xo_200_500_map,
+       .freq_tbl = ftbl_gcc_pcnoc_ahb_clk,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_pcnoc_ahb_clk_src",
+               .parent_names = gcc_xo_200_500,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_branch pcnoc_clk_src = {
+       .halt_reg = 0x21030,
+       .clkr = {
+               .enable_reg = 0x21030,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "pcnoc_clk_src",
+                       .parent_names = (const char *[]){
+                               "gcc_pcnoc_ahb_clk_src",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+                       .flags = CLK_SET_RATE_PARENT |
+                               CLK_IS_CRITICAL,
+               },
+       },
+};
+
 static struct clk_regmap *gcc_ipq4019_clocks[] = {
        [AUDIO_CLK_SRC] = &audio_clk_src.clkr,
        [BLSP1_QUP1_I2C_APPS_CLK_SRC] = &blsp1_qup1_i2c_apps_clk_src.clkr,
@@ -1214,6 +1634,16 @@ static struct clk_regmap *gcc_ipq4019_clocks[] = {
        [GCC_WCSS5G_CLK] = &gcc_wcss5g_clk.clkr,
        [GCC_WCSS5G_REF_CLK] = &gcc_wcss5g_ref_clk.clkr,
        [GCC_WCSS5G_RTC_CLK] = &gcc_wcss5g_rtc_clk.clkr,
+       [GCC_SDCC_PLLDIV_CLK] = &gcc_apss_sdcc_clk.cdiv.clkr,
+       [GCC_FEPLL125_CLK] = &gcc_fepll125_clk.cdiv.clkr,
+       [GCC_FEPLL125DLY_CLK] = &gcc_fepll125dly_clk.cdiv.clkr,
+       [GCC_FEPLL200_CLK] = &gcc_fepll200_clk.cdiv.clkr,
+       [GCC_FEPLL500_CLK] = &gcc_fepll500_clk.cdiv.clkr,
+       [GCC_FEPLL_WCSS2G_CLK] = &gcc_fepllwcss2g_clk.cdiv.clkr,
+       [GCC_FEPLL_WCSS5G_CLK] = &gcc_fepllwcss5g_clk.cdiv.clkr,
+       [GCC_APSS_CPU_PLLDIV_CLK] = &gcc_apss_cpu_plldiv_clk.cdiv.clkr,
+       [GCC_PCNOC_AHB_CLK_SRC] = &gcc_pcnoc_ahb_clk_src.clkr,
+       [GCC_PCNOC_AHB_CLK] = &pcnoc_clk_src.clkr,
 };
 
 static const struct qcom_reset_map gcc_ipq4019_resets[] = {
@@ -1294,7 +1724,7 @@ static const struct regmap_config gcc_ipq4019_regmap_config = {
        .reg_bits       = 32,
        .reg_stride     = 4,
        .val_bits       = 32,
-       .max_register   = 0x2dfff,
+       .max_register   = 0x2ffff,
        .fast_io        = true,
 };
 
@@ -1312,23 +1742,44 @@ static const struct of_device_id gcc_ipq4019_match_table[] = {
 };
 MODULE_DEVICE_TABLE(of, gcc_ipq4019_match_table);
 
+static int
+gcc_ipq4019_cpu_clk_notifier_fn(struct notifier_block *nb,
+                               unsigned long action, void *data)
+{
+       int err = 0;
+
+       if (action == PRE_RATE_CHANGE)
+               err = clk_rcg2_ops.set_parent(&apps_clk_src.clkr.hw,
+                                             gcc_ipq4019_cpu_safe_parent);
+
+       return notifier_from_errno(err);
+}
+
+static struct notifier_block gcc_ipq4019_cpu_clk_notifier = {
+       .notifier_call = gcc_ipq4019_cpu_clk_notifier_fn,
+};
+
 static int gcc_ipq4019_probe(struct platform_device *pdev)
 {
-       struct device *dev = &pdev->dev;
+       int err;
 
-       clk_register_fixed_rate(dev, "fepll125", "xo", 0, 200000000);
-       clk_register_fixed_rate(dev, "fepll125dly", "xo", 0, 200000000);
-       clk_register_fixed_rate(dev, "fepllwcss2g", "xo", 0, 200000000);
-       clk_register_fixed_rate(dev, "fepllwcss5g", "xo", 0, 200000000);
-       clk_register_fixed_rate(dev, "fepll200", "xo", 0, 200000000);
-       clk_register_fixed_rate(dev, "fepll500", "xo", 0, 200000000);
-       clk_register_fixed_rate(dev, "ddrpllapss", "xo", 0, 666000000);
+       err = qcom_cc_probe(pdev, &gcc_ipq4019_desc);
+       if (err)
+               return err;
 
-       return qcom_cc_probe(pdev, &gcc_ipq4019_desc);
+       return clk_notifier_register(apps_clk_src.clkr.hw.clk,
+                                    &gcc_ipq4019_cpu_clk_notifier);
+}
+
+static int gcc_ipq4019_remove(struct platform_device *pdev)
+{
+       return clk_notifier_unregister(apps_clk_src.clkr.hw.clk,
+                                      &gcc_ipq4019_cpu_clk_notifier);
 }
 
 static struct platform_driver gcc_ipq4019_driver = {
        .probe          = gcc_ipq4019_probe,
+       .remove         = gcc_ipq4019_remove,
        .driver         = {
                .name   = "qcom,gcc-ipq4019",
                .of_match_table = gcc_ipq4019_match_table,
index 581a17f67379b5465fbf1c85a8cc12e490418791..b99dd406e907112f82f29a795fa6712804502f16 100644 (file)
@@ -1563,6 +1563,34 @@ static struct clk_branch rpm_msg_ram_h_clk = {
        },
 };
 
+static struct clk_branch ebi2_clk = {
+       .hwcg_reg = 0x2664,
+       .hwcg_bit = 6,
+       .halt_reg = 0x2fcc,
+       .halt_bit = 24,
+       .clkr = {
+               .enable_reg = 0x2664,
+               .enable_mask = BIT(6) | BIT(4),
+               .hw.init = &(struct clk_init_data){
+                       .name = "ebi2_clk",
+                       .ops = &clk_branch_ops,
+               },
+       },
+};
+
+static struct clk_branch ebi2_aon_clk = {
+       .halt_reg = 0x2fcc,
+       .halt_bit = 23,
+       .clkr = {
+               .enable_reg = 0x2664,
+               .enable_mask = BIT(8),
+               .hw.init = &(struct clk_init_data){
+                       .name = "ebi2_aon_clk",
+                       .ops = &clk_branch_ops,
+               },
+       },
+};
+
 static struct clk_hw *gcc_mdm9615_hws[] = {
        &cxo.hw,
 };
@@ -1637,6 +1665,8 @@ static struct clk_regmap *gcc_mdm9615_clks[] = {
        [PMIC_ARB1_H_CLK] = &pmic_arb1_h_clk.clkr,
        [PMIC_SSBI2_CLK] = &pmic_ssbi2_clk.clkr,
        [RPM_MSG_RAM_H_CLK] = &rpm_msg_ram_h_clk.clkr,
+       [EBI2_CLK] = &ebi2_clk.clkr,
+       [EBI2_AON_CLK] = &ebi2_aon_clk.clkr,
 };
 
 static const struct qcom_reset_map gcc_mdm9615_resets[] = {
index 8afd8304a0702e7e9cc85b1050a477a722501003..7983288d9141cbdc4ce59d476d10fdc39f3ea87d 100644 (file)
@@ -1888,6 +1888,23 @@ static struct clk_branch gcc_sdcc1_apps_clk = {
        },
 };
 
+static struct clk_branch gcc_sdcc1_ahb_clk = {
+       .halt_reg = 0x04c8,
+       .clkr = {
+               .enable_reg = 0x04c8,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data)
+               {
+                       .name = "gcc_sdcc1_ahb_clk",
+                       .parent_names = (const char *[]){
+                               "periph_noc_clk_src",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
 static struct clk_branch gcc_sdcc2_apps_clk = {
        .halt_reg = 0x0504,
        .clkr = {
@@ -2231,6 +2248,7 @@ static struct clk_regmap *gcc_msm8994_clocks[] = {
        [GCC_SDCC2_APPS_CLK] = &gcc_sdcc2_apps_clk.clkr,
        [GCC_SDCC3_APPS_CLK] = &gcc_sdcc3_apps_clk.clkr,
        [GCC_SDCC4_APPS_CLK] = &gcc_sdcc4_apps_clk.clkr,
+       [GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr,
        [GCC_SYS_NOC_UFS_AXI_CLK] = &gcc_sys_noc_ufs_axi_clk.clkr,
        [GCC_SYS_NOC_USB3_AXI_CLK] = &gcc_sys_noc_usb3_axi_clk.clkr,
        [GCC_TSIF_REF_CLK] = &gcc_tsif_ref_clk.clkr,
index 4b1fc1730d295f0b843cb5cd50de223371dbbb4c..8abc200d4fd36d7b6e87036c9ff6ec9aa908b4c8 100644 (file)
@@ -3448,6 +3448,7 @@ static const struct qcom_reset_map gcc_msm8996_resets[] = {
        [GCC_MSMPU_BCR] = { 0x8d000 },
        [GCC_MSS_Q6_BCR] = { 0x8e000 },
        [GCC_QREFS_VBG_CAL_BCR] = { 0x88020 },
+       [GCC_MSS_RESTART] = { 0x8f008 },
 };
 
 static const struct regmap_config gcc_msm8996_regmap_config = {
index 288186cce0ae420d499cf995636320bb0a9203d5..a4f3580587b7e121d25bcffefc1d50074127e103 100644 (file)
@@ -63,11 +63,26 @@ static int gdsc_hwctrl(struct gdsc *sc, bool en)
        return regmap_update_bits(sc->regmap, sc->gdscr, HW_CONTROL_MASK, val);
 }
 
+static int gdsc_poll_status(struct gdsc *sc, unsigned int reg, bool en)
+{
+       ktime_t start;
+
+       start = ktime_get();
+       do {
+               if (gdsc_is_enabled(sc, reg) == en)
+                       return 0;
+       } while (ktime_us_delta(ktime_get(), start) < TIMEOUT_US);
+
+       if (gdsc_is_enabled(sc, reg) == en)
+               return 0;
+
+       return -ETIMEDOUT;
+}
+
 static int gdsc_toggle_logic(struct gdsc *sc, bool en)
 {
        int ret;
        u32 val = en ? 0 : SW_COLLAPSE_MASK;
-       ktime_t start;
        unsigned int status_reg = sc->gdscr;
 
        ret = regmap_update_bits(sc->regmap, sc->gdscr, SW_COLLAPSE_MASK, val);
@@ -100,16 +115,7 @@ static int gdsc_toggle_logic(struct gdsc *sc, bool en)
                udelay(1);
        }
 
-       start = ktime_get();
-       do {
-               if (gdsc_is_enabled(sc, status_reg) == en)
-                       return 0;
-       } while (ktime_us_delta(ktime_get(), start) < TIMEOUT_US);
-
-       if (gdsc_is_enabled(sc, status_reg) == en)
-               return 0;
-
-       return -ETIMEDOUT;
+       return gdsc_poll_status(sc, status_reg, en);
 }
 
 static inline int gdsc_deassert_reset(struct gdsc *sc)
@@ -188,8 +194,20 @@ static int gdsc_enable(struct generic_pm_domain *domain)
        udelay(1);
 
        /* Turn on HW trigger mode if supported */
-       if (sc->flags & HW_CTRL)
-               return gdsc_hwctrl(sc, true);
+       if (sc->flags & HW_CTRL) {
+               ret = gdsc_hwctrl(sc, true);
+               if (ret)
+                       return ret;
+               /*
+                * Wait for the GDSC to go through a power down and
+                * up cycle.  In case a firmware ends up polling status
+                * bits for the gdsc, it might read an 'on' status before
+                * the GDSC can finish the power cycle.
+                * We wait 1us before returning to ensure the firmware
+                * can't immediately poll the status bits.
+                */
+               udelay(1);
+       }
 
        return 0;
 }
@@ -204,9 +222,23 @@ static int gdsc_disable(struct generic_pm_domain *domain)
 
        /* Turn off HW trigger mode if supported */
        if (sc->flags & HW_CTRL) {
+               unsigned int reg;
+
                ret = gdsc_hwctrl(sc, false);
                if (ret < 0)
                        return ret;
+               /*
+                * Wait for the GDSC to go through a power down and
+                * up cycle.  In case we end up polling status
+                * bits for the gdsc before the power cycle is completed
+                * it might read an 'on' status wrongly.
+                */
+               udelay(1);
+
+               reg = sc->gds_hw_ctrl ? sc->gds_hw_ctrl : sc->gdscr;
+               ret = gdsc_poll_status(sc, reg, true);
+               if (ret)
+                       return ret;
        }
 
        if (sc->pwrsts & PWRSTS_OFF)
index b533f99550e1b97dd55944a85cbf5d64153309f5..4067216bf31fbce9d3b62454b5565a80a92aa5a4 100644 (file)
@@ -91,6 +91,12 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable)
                value |= bitmask;
        cpg_mstp_write(group, value, group->smstpcr);
 
+       if (!group->mstpsr) {
+               /* dummy read to ensure write has completed */
+               cpg_mstp_read(group, group->smstpcr);
+               barrier_data(group->smstpcr);
+       }
+
        spin_unlock_irqrestore(&group->lock, flags);
 
        if (!enable || !group->mstpsr)
@@ -141,9 +147,9 @@ static const struct clk_ops cpg_mstp_clock_ops = {
        .is_enabled = cpg_mstp_clock_is_enabled,
 };
 
-static struct clk * __init
-cpg_mstp_clock_register(const char *name, const char *parent_name,
-                       unsigned int index, struct mstp_clock_group *group)
+static struct clk * __init cpg_mstp_clock_register(const char *name,
+       const char *parent_name, unsigned int index,
+       struct mstp_clock_group *group)
 {
        struct clk_init_data init;
        struct mstp_clock *clock;
@@ -158,6 +164,11 @@ cpg_mstp_clock_register(const char *name, const char *parent_name,
        init.name = name;
        init.ops = &cpg_mstp_clock_ops;
        init.flags = CLK_IS_BASIC | CLK_SET_RATE_PARENT;
+       /* INTC-SYS is the module clock of the GIC, and must not be disabled */
+       if (!strcmp(name, "intc-sys")) {
+               pr_debug("MSTP %s setting CLK_IS_CRITICAL\n", name);
+               init.flags |= CLK_IS_CRITICAL;
+       }
        init.parent_names = &parent_name;
        init.num_parents = 1;
 
index 50698a7d90745447576a4ad3afd9e0893911201b..bfffdb00df97254771b3c518a0fd824ffaf647b8 100644 (file)
@@ -221,6 +221,7 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
        DEF_MOD("can-if0",               916,   R8A7795_CLK_S3D4),
        DEF_MOD("i2c6",                  918,   R8A7795_CLK_S3D2),
        DEF_MOD("i2c5",                  919,   R8A7795_CLK_S3D2),
+       DEF_MOD("i2c-dvfs",              926,   R8A7795_CLK_CP),
        DEF_MOD("i2c4",                  927,   R8A7795_CLK_S3D2),
        DEF_MOD("i2c3",                  928,   R8A7795_CLK_S3D2),
        DEF_MOD("i2c2",                  929,   R8A7795_CLK_S3D2),
index 7d298c57a3e060b6ea9a7ff8af36602706293e79..11e084a56b0d9005746e788843d27fc1ae21f466 100644 (file)
@@ -103,7 +103,9 @@ static const struct cpg_core_clk r8a7796_core_clks[] __initconst = {
        DEF_FIXED("cl",         R8A7796_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
        DEF_FIXED("cp",         R8A7796_CLK_CP,    CLK_EXTAL,      2, 1),
 
+       DEF_DIV6P1("canfd",     R8A7796_CLK_CANFD, CLK_PLL1_DIV4, 0x244),
        DEF_DIV6P1("csi0",      R8A7796_CLK_CSI0,  CLK_PLL1_DIV4, 0x00c),
+       DEF_DIV6P1("mso",       R8A7796_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
 
        DEF_DIV6_RO("osc",      R8A7796_CLK_OSC,   CLK_EXTAL, CPG_RCKCR,  8),
        DEF_DIV6_RO("r_int",    CLK_RINT,          CLK_EXTAL, CPG_RCKCR, 32),
@@ -117,6 +119,10 @@ static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = {
        DEF_MOD("scif3",                 204,   R8A7796_CLK_S3D4),
        DEF_MOD("scif1",                 206,   R8A7796_CLK_S3D4),
        DEF_MOD("scif0",                 207,   R8A7796_CLK_S3D4),
+       DEF_MOD("msiof3",                208,   R8A7796_CLK_MSO),
+       DEF_MOD("msiof2",                209,   R8A7796_CLK_MSO),
+       DEF_MOD("msiof1",                210,   R8A7796_CLK_MSO),
+       DEF_MOD("msiof0",                211,   R8A7796_CLK_MSO),
        DEF_MOD("sys-dmac2",             217,   R8A7796_CLK_S0D3),
        DEF_MOD("sys-dmac1",             218,   R8A7796_CLK_S0D3),
        DEF_MOD("sys-dmac0",             219,   R8A7796_CLK_S0D3),
@@ -181,8 +187,12 @@ static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = {
        DEF_MOD("gpio2",                 910,   R8A7796_CLK_S3D4),
        DEF_MOD("gpio1",                 911,   R8A7796_CLK_S3D4),
        DEF_MOD("gpio0",                 912,   R8A7796_CLK_S3D4),
+       DEF_MOD("can-fd",                914,   R8A7796_CLK_S3D2),
+       DEF_MOD("can-if1",               915,   R8A7796_CLK_S3D4),
+       DEF_MOD("can-if0",               916,   R8A7796_CLK_S3D4),
        DEF_MOD("i2c6",                  918,   R8A7796_CLK_S0D6),
        DEF_MOD("i2c5",                  919,   R8A7796_CLK_S0D6),
+       DEF_MOD("i2c-dvfs",              926,   R8A7796_CLK_CP),
        DEF_MOD("i2c4",                  927,   R8A7796_CLK_S0D6),
        DEF_MOD("i2c3",                  928,   R8A7796_CLK_S0D6),
        DEF_MOD("i2c2",                  929,   R8A7796_CLK_S3D2),
index 8359ce75db7aa4fb6e4bfb7f0f1ede36d90e1349..eadcbd43ff88319ba68d99c0daa27b6908315b87 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clk/renesas.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/mod_devicetable.h>
@@ -25,6 +26,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_clock.h>
 #include <linux/pm_domain.h>
+#include <linux/reset-controller.h>
 #include <linux/slab.h>
 
 #include <dt-bindings/clock/renesas-cpg-mssr.h>
@@ -43,7 +45,7 @@
  * Module Standby and Software Reset register offets.
  *
  * If the registers exist, these are valid for SH-Mobile, R-Mobile,
- * R-Car Gen 2, and R-Car Gen 3.
+ * R-Car Gen2, R-Car Gen3, and RZ/G1.
  * These are NOT valid for R-Car Gen1 and RZ/A1!
  */
 
@@ -96,18 +98,22 @@ static const u16 srcr[] = {
 /**
  * Clock Pulse Generator / Module Standby and Software Reset Private Data
  *
+ * @rcdev: Optional reset controller entity
  * @dev: CPG/MSSR device
  * @base: CPG/MSSR register block base address
- * @mstp_lock: protects writes to SMSTPCR
+ * @rmw_lock: protects RMW register accesses
  * @clks: Array containing all Core and Module Clocks
  * @num_core_clks: Number of Core Clocks in clks[]
  * @num_mod_clks: Number of Module Clocks in clks[]
  * @last_dt_core_clk: ID of the last Core Clock exported to DT
  */
 struct cpg_mssr_priv {
+#ifdef CONFIG_RESET_CONTROLLER
+       struct reset_controller_dev rcdev;
+#endif
        struct device *dev;
        void __iomem *base;
-       spinlock_t mstp_lock;
+       spinlock_t rmw_lock;
 
        struct clk **clks;
        unsigned int num_core_clks;
@@ -144,7 +150,7 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable)
 
        dev_dbg(dev, "MSTP %u%02u/%pC %s\n", reg, bit, hw->clk,
                enable ? "ON" : "OFF");
-       spin_lock_irqsave(&priv->mstp_lock, flags);
+       spin_lock_irqsave(&priv->rmw_lock, flags);
 
        value = readl(priv->base + SMSTPCR(reg));
        if (enable)
@@ -153,7 +159,7 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable)
                value |= bitmask;
        writel(value, priv->base + SMSTPCR(reg));
 
-       spin_unlock_irqrestore(&priv->mstp_lock, flags);
+       spin_unlock_irqrestore(&priv->rmw_lock, flags);
 
        if (!enable)
                return 0;
@@ -346,17 +352,10 @@ static void __init cpg_mssr_register_mod_clk(const struct mssr_mod_clk *mod,
        init.flags = CLK_IS_BASIC | CLK_SET_RATE_PARENT;
        for (i = 0; i < info->num_crit_mod_clks; i++)
                if (id == info->crit_mod_clks[i]) {
-#ifdef CLK_ENABLE_HAND_OFF
-                       dev_dbg(dev, "MSTP %s setting CLK_ENABLE_HAND_OFF\n",
+                       dev_dbg(dev, "MSTP %s setting CLK_IS_CRITICAL\n",
                                mod->name);
-                       init.flags |= CLK_ENABLE_HAND_OFF;
+                       init.flags |= CLK_IS_CRITICAL;
                        break;
-#else
-                       dev_dbg(dev, "Ignoring MSTP %s to prevent disabling\n",
-                               mod->name);
-                       kfree(clock);
-                       return;
-#endif
                }
 
        parent_name = __clk_get_name(parent);
@@ -501,6 +500,122 @@ static int __init cpg_mssr_add_clk_domain(struct device *dev,
        return 0;
 }
 
+#ifdef CONFIG_RESET_CONTROLLER
+
+#define rcdev_to_priv(x)       container_of(x, struct cpg_mssr_priv, rcdev)
+
+static int cpg_mssr_reset(struct reset_controller_dev *rcdev,
+                         unsigned long id)
+{
+       struct cpg_mssr_priv *priv = rcdev_to_priv(rcdev);
+       unsigned int reg = id / 32;
+       unsigned int bit = id % 32;
+       u32 bitmask = BIT(bit);
+       unsigned long flags;
+       u32 value;
+
+       dev_dbg(priv->dev, "reset %u%02u\n", reg, bit);
+
+       /* Reset module */
+       spin_lock_irqsave(&priv->rmw_lock, flags);
+       value = readl(priv->base + SRCR(reg));
+       value |= bitmask;
+       writel(value, priv->base + SRCR(reg));
+       spin_unlock_irqrestore(&priv->rmw_lock, flags);
+
+       /* Wait for at least one cycle of the RCLK clock (@ ca. 32 kHz) */
+       udelay(35);
+
+       /* Release module from reset state */
+       writel(bitmask, priv->base + SRSTCLR(reg));
+
+       return 0;
+}
+
+static int cpg_mssr_assert(struct reset_controller_dev *rcdev, unsigned long id)
+{
+       struct cpg_mssr_priv *priv = rcdev_to_priv(rcdev);
+       unsigned int reg = id / 32;
+       unsigned int bit = id % 32;
+       u32 bitmask = BIT(bit);
+       unsigned long flags;
+       u32 value;
+
+       dev_dbg(priv->dev, "assert %u%02u\n", reg, bit);
+
+       spin_lock_irqsave(&priv->rmw_lock, flags);
+       value = readl(priv->base + SRCR(reg));
+       value |= bitmask;
+       writel(value, priv->base + SRCR(reg));
+       spin_unlock_irqrestore(&priv->rmw_lock, flags);
+       return 0;
+}
+
+static int cpg_mssr_deassert(struct reset_controller_dev *rcdev,
+                            unsigned long id)
+{
+       struct cpg_mssr_priv *priv = rcdev_to_priv(rcdev);
+       unsigned int reg = id / 32;
+       unsigned int bit = id % 32;
+       u32 bitmask = BIT(bit);
+
+       dev_dbg(priv->dev, "deassert %u%02u\n", reg, bit);
+
+       writel(bitmask, priv->base + SRSTCLR(reg));
+       return 0;
+}
+
+static int cpg_mssr_status(struct reset_controller_dev *rcdev,
+                          unsigned long id)
+{
+       struct cpg_mssr_priv *priv = rcdev_to_priv(rcdev);
+       unsigned int reg = id / 32;
+       unsigned int bit = id % 32;
+       u32 bitmask = BIT(bit);
+
+       return !!(readl(priv->base + SRCR(reg)) & bitmask);
+}
+
+static const struct reset_control_ops cpg_mssr_reset_ops = {
+       .reset = cpg_mssr_reset,
+       .assert = cpg_mssr_assert,
+       .deassert = cpg_mssr_deassert,
+       .status = cpg_mssr_status,
+};
+
+static int cpg_mssr_reset_xlate(struct reset_controller_dev *rcdev,
+                               const struct of_phandle_args *reset_spec)
+{
+       struct cpg_mssr_priv *priv = rcdev_to_priv(rcdev);
+       unsigned int unpacked = reset_spec->args[0];
+       unsigned int idx = MOD_CLK_PACK(unpacked);
+
+       if (unpacked % 100 > 31 || idx >= rcdev->nr_resets) {
+               dev_err(priv->dev, "Invalid reset index %u\n", unpacked);
+               return -EINVAL;
+       }
+
+       return idx;
+}
+
+static int cpg_mssr_reset_controller_register(struct cpg_mssr_priv *priv)
+{
+       priv->rcdev.ops = &cpg_mssr_reset_ops;
+       priv->rcdev.of_node = priv->dev->of_node;
+       priv->rcdev.of_reset_n_cells = 1;
+       priv->rcdev.of_xlate = cpg_mssr_reset_xlate;
+       priv->rcdev.nr_resets = priv->num_mod_clks;
+       return devm_reset_controller_register(priv->dev, &priv->rcdev);
+}
+
+#else /* !CONFIG_RESET_CONTROLLER */
+static inline int cpg_mssr_reset_controller_register(struct cpg_mssr_priv *priv)
+{
+       return 0;
+}
+#endif /* !CONFIG_RESET_CONTROLLER */
+
+
 static const struct of_device_id cpg_mssr_match[] = {
 #ifdef CONFIG_ARCH_R8A7743
        {
@@ -557,7 +672,7 @@ static int __init cpg_mssr_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        priv->dev = dev;
-       spin_lock_init(&priv->mstp_lock);
+       spin_lock_init(&priv->rmw_lock);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        priv->base = devm_ioremap_resource(dev, res);
@@ -598,6 +713,10 @@ static int __init cpg_mssr_probe(struct platform_device *pdev)
        if (error)
                return error;
 
+       error = cpg_mssr_reset_controller_register(priv);
+       if (error)
+               return error;
+
        return 0;
 }
 
index 16e098c36f903f542abf80bc05107fab8538eeec..141971488f409c91f0bafc63b71c52ab98019cef 100644 (file)
@@ -8,6 +8,7 @@ obj-y   += clk-pll.o
 obj-y  += clk-cpu.o
 obj-y  += clk-inverter.o
 obj-y  += clk-mmc-phase.o
+obj-y  += clk-muxgrf.o
 obj-y  += clk-ddr.o
 obj-$(CONFIG_RESET_CONTROLLER) += softrst.o
 
@@ -16,5 +17,6 @@ obj-y += clk-rk3036.o
 obj-y  += clk-rk3188.o
 obj-y  += clk-rk3228.o
 obj-y  += clk-rk3288.o
+obj-y  += clk-rk3328.o
 obj-y  += clk-rk3368.o
 obj-y  += clk-rk3399.o
diff --git a/drivers/clk/rockchip/clk-muxgrf.c b/drivers/clk/rockchip/clk-muxgrf.c
new file mode 100644 (file)
index 0000000..4f29118
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/regmap.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include "clk.h"
+
+struct rockchip_muxgrf_clock {
+       struct clk_hw           hw;
+       struct regmap           *regmap;
+       u32                     reg;
+       u32                     shift;
+       u32                     width;
+       int                     flags;
+};
+
+#define to_muxgrf_clock(_hw) container_of(_hw, struct rockchip_muxgrf_clock, hw)
+
+static u8 rockchip_muxgrf_get_parent(struct clk_hw *hw)
+{
+       struct rockchip_muxgrf_clock *mux = to_muxgrf_clock(hw);
+       unsigned int mask = GENMASK(mux->width - 1, 0);
+       unsigned int val;
+
+       regmap_read(mux->regmap, mux->reg, &val);
+
+       val >>= mux->shift;
+       val &= mask;
+
+       return val;
+}
+
+static int rockchip_muxgrf_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct rockchip_muxgrf_clock *mux = to_muxgrf_clock(hw);
+       unsigned int mask = GENMASK(mux->width + mux->shift - 1, mux->shift);
+       unsigned int val;
+
+       val = index;
+       val <<= mux->shift;
+
+       if (mux->flags & CLK_MUX_HIWORD_MASK)
+               return regmap_write(mux->regmap, mux->reg, val | (mask << 16));
+       else
+               return regmap_update_bits(mux->regmap, mux->reg, mask, val);
+}
+
+static const struct clk_ops rockchip_muxgrf_clk_ops = {
+       .get_parent = rockchip_muxgrf_get_parent,
+       .set_parent = rockchip_muxgrf_set_parent,
+       .determine_rate = __clk_mux_determine_rate,
+};
+
+struct clk *rockchip_clk_register_muxgrf(const char *name,
+                               const char *const *parent_names, u8 num_parents,
+                               int flags, struct regmap *regmap, int reg,
+                               int shift, int width, int mux_flags)
+{
+       struct rockchip_muxgrf_clock *muxgrf_clock;
+       struct clk_init_data init;
+       struct clk *clk;
+
+       if (IS_ERR(regmap)) {
+               pr_err("%s: regmap not available\n", __func__);
+               return ERR_PTR(-ENOTSUPP);
+       }
+
+       muxgrf_clock = kmalloc(sizeof(*muxgrf_clock), GFP_KERNEL);
+       if (!muxgrf_clock)
+               return ERR_PTR(-ENOMEM);
+
+       init.name = name;
+       init.flags = flags;
+       init.num_parents = num_parents;
+       init.parent_names = parent_names;
+       init.ops = &rockchip_muxgrf_clk_ops;
+
+       muxgrf_clock->hw.init = &init;
+       muxgrf_clock->regmap = regmap;
+       muxgrf_clock->reg = reg;
+       muxgrf_clock->shift = shift;
+       muxgrf_clock->width = width;
+       muxgrf_clock->flags = mux_flags;
+
+       clk = clk_register(NULL, &muxgrf_clock->hw);
+       if (IS_ERR(clk))
+               kfree(muxgrf_clock);
+
+       return clk;
+}
index 6ed605776abd1cbf5002cca73248e6b300e5f33c..eec51893a7e66532ef9680a3406017ea1b0cf9f4 100644 (file)
@@ -29,6 +29,7 @@
 #define PLL_MODE_SLOW          0x0
 #define PLL_MODE_NORM          0x1
 #define PLL_MODE_DEEP          0x2
+#define PLL_RK3328_MODE_MASK   0x1
 
 struct rockchip_clk_pll {
        struct clk_hw           hw;
@@ -848,7 +849,8 @@ struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx,
        struct clk *pll_clk, *mux_clk;
        char pll_name[20];
 
-       if (num_parents != 2) {
+       if ((pll_type != pll_rk3328 && num_parents != 2) ||
+           (pll_type == pll_rk3328 && num_parents != 1)) {
                pr_err("%s: needs two parent clocks\n", __func__);
                return ERR_PTR(-EINVAL);
        }
@@ -865,13 +867,17 @@ struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx,
        pll_mux = &pll->pll_mux;
        pll_mux->reg = ctx->reg_base + mode_offset;
        pll_mux->shift = mode_shift;
-       pll_mux->mask = PLL_MODE_MASK;
+       if (pll_type == pll_rk3328)
+               pll_mux->mask = PLL_RK3328_MODE_MASK;
+       else
+               pll_mux->mask = PLL_MODE_MASK;
        pll_mux->flags = 0;
        pll_mux->lock = &ctx->lock;
        pll_mux->hw.init = &init;
 
        if (pll_type == pll_rk3036 ||
            pll_type == pll_rk3066 ||
+           pll_type == pll_rk3328 ||
            pll_type == pll_rk3399)
                pll_mux->flags |= CLK_MUX_HIWORD_MASK;
 
@@ -884,7 +890,10 @@ struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx,
        init.flags = CLK_SET_RATE_PARENT;
        init.ops = pll->pll_mux_ops;
        init.parent_names = pll_parents;
-       init.num_parents = ARRAY_SIZE(pll_parents);
+       if (pll_type == pll_rk3328)
+               init.num_parents = 2;
+       else
+               init.num_parents = ARRAY_SIZE(pll_parents);
 
        mux_clk = clk_register(NULL, &pll_mux->hw);
        if (IS_ERR(mux_clk))
@@ -918,6 +927,7 @@ struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx,
 
        switch (pll_type) {
        case pll_rk3036:
+       case pll_rk3328:
                if (!pll->rate_table || IS_ERR(ctx->grf))
                        init.ops = &rockchip_rk3036_pll_clk_norate_ops;
                else
index 062ef4960244a54ba1f8c85a45d4bc3449111f39..00ad0e5f8d6661ee767be492cb35a407ea83b873 100644 (file)
@@ -507,8 +507,8 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = {
        GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS),
        GATE(PCLK_EFUSE, "pclk_efuse", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 2, GFLAGS),
        GATE(PCLK_TZPC, "pclk_tzpc", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 3, GFLAGS),
-       GATE(0, "pclk_ddrupctl", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 7, GFLAGS),
-       GATE(0, "pclk_ddrpubl", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS),
+       GATE(PCLK_DDRUPCTL, "pclk_ddrupctl", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 7, GFLAGS),
+       GATE(PCLK_PUBL, "pclk_ddrpubl", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS),
        GATE(0, "pclk_dbg", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 1, GFLAGS),
        GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS),
        GATE(PCLK_PMU, "pclk_pmu", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 5, GFLAGS),
index 39af05a589b3c59a25902e83bf660ce7d409b7ca..68ba7d4105e76ee3d202c6ccd940ddf15937ebd5 100644 (file)
@@ -198,6 +198,7 @@ PNAME(mux_hsadcout_p)       = { "hsadc_src", "ext_hsadc" };
 PNAME(mux_edp_24m_p)   = { "ext_edp_24m", "xin24m" };
 PNAME(mux_tspout_p)    = { "cpll", "gpll", "npll", "xin27m" };
 
+PNAME(mux_aclk_vcodec_pre_p)   = { "aclk_vepu", "aclk_vdpu" };
 PNAME(mux_usbphy480m_p)                = { "sclk_otgphy1_480m", "sclk_otgphy2_480m",
                                    "sclk_otgphy0_480m" };
 PNAME(mux_hsicphy480m_p)       = { "cpll", "gpll", "usbphy480m_src" };
@@ -398,14 +399,12 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
        COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_usb480m_p, 0,
                        RK3288_CLKSEL_CON(32), 14, 2, MFLAGS, 8, 5, DFLAGS,
                        RK3288_CLKGATE_CON(3), 11, GFLAGS),
-       /*
-        * We use aclk_vdpu by default GRF_SOC_CON0[7] setting in system,
-        * so we ignore the mux and make clocks nodes as following,
-        */
-       GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vdpu", 0,
+       MUXGRF(0, "aclk_vcodec_pre", mux_aclk_vcodec_pre_p, 0,
+                       RK3288_GRF_SOC_CON(0), 7, 1, MFLAGS),
+       GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vcodec_pre", 0,
                RK3288_CLKGATE_CON(9), 0, GFLAGS),
 
-       FACTOR_GATE(0, "hclk_vcodec_pre", "aclk_vdpu", 0, 1, 4,
+       FACTOR_GATE(0, "hclk_vcodec_pre", "aclk_vcodec_pre", 0, 1, 4,
                RK3288_CLKGATE_CON(3), 10, GFLAGS),
 
        GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_vcodec_pre", 0,
@@ -469,7 +468,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
        COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0,
                        RK3288_CLKSEL_CON(26), 8, 1, MFLAGS,
                        RK3288_CLKGATE_CON(3), 7, GFLAGS),
-       COMPOSITE_NOGATE(0, "sclk_vip_out", mux_vip_out_p, 0,
+       COMPOSITE_NOGATE(SCLK_VIP_OUT, "sclk_vip_out", mux_vip_out_p, 0,
                        RK3288_CLKSEL_CON(26), 15, 1, MFLAGS, 9, 5, DFLAGS),
 
        DIV(0, "pclk_pd_alive", "gpll", 0,
@@ -690,7 +689,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
        /* aclk_peri gates */
        GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 2, GFLAGS),
        GATE(ACLK_DMAC2, "aclk_dmac2", "aclk_peri", 0, RK3288_CLKGATE_CON(6), 3, GFLAGS),
-       GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 11, GFLAGS),
+       GATE(0, "aclk_peri_niu", "aclk_peri", 0, RK3288_CLKGATE_CON(7), 11, GFLAGS),
        GATE(ACLK_MMU, "aclk_mmu", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(8), 12, GFLAGS),
        GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 0, GFLAGS),
        GATE(HCLK_GPS, "hclk_gps", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 2, GFLAGS),
@@ -753,12 +752,12 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
        GATE(PCLK_GPIO5, "pclk_gpio5", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 5, GFLAGS),
        GATE(PCLK_GPIO6, "pclk_gpio6", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 6, GFLAGS),
        GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(14), 11, GFLAGS),
-       GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(14), 12, GFLAGS),
+       GATE(0, "pclk_alive_niu", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 12, GFLAGS),
 
        /* pclk_pd_pmu gates */
        GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 0, GFLAGS),
        GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 1, GFLAGS),
-       GATE(0, "pclk_pmu_niu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 2, GFLAGS),
+       GATE(0, "pclk_pmu_niu", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 2, GFLAGS),
        GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 3, GFLAGS),
        GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 4, GFLAGS),
 
@@ -767,7 +766,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
        GATE(HCLK_VOP0, "hclk_vop0", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 6, GFLAGS),
        GATE(HCLK_VOP1, "hclk_vop1", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 8, GFLAGS),
        GATE(HCLK_VIO_AHB_ARBI, "hclk_vio_ahb_arbi", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 9, GFLAGS),
-       GATE(HCLK_VIO_NIU, "hclk_vio_niu", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 10, GFLAGS),
+       GATE(HCLK_VIO_NIU, "hclk_vio_niu", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 10, GFLAGS),
        GATE(HCLK_VIP, "hclk_vip", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 15, GFLAGS),
        GATE(HCLK_IEP, "hclk_iep", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 3, GFLAGS),
        GATE(HCLK_ISP, "hclk_isp", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 1, GFLAGS),
@@ -783,17 +782,17 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
        /* aclk_vio0 gates */
        GATE(ACLK_VOP0, "aclk_vop0", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 5, GFLAGS),
        GATE(ACLK_IEP, "aclk_iep", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 2, GFLAGS),
-       GATE(ACLK_VIO0_NIU, "aclk_vio0_niu", "aclk_vio0", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 11, GFLAGS),
+       GATE(ACLK_VIO0_NIU, "aclk_vio0_niu", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 11, GFLAGS),
        GATE(ACLK_VIP, "aclk_vip", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 14, GFLAGS),
 
        /* aclk_vio1 gates */
        GATE(ACLK_VOP1, "aclk_vop1", "aclk_vio1", 0, RK3288_CLKGATE_CON(15), 7, GFLAGS),
        GATE(ACLK_ISP, "aclk_isp", "aclk_vio1", 0, RK3288_CLKGATE_CON(16), 2, GFLAGS),
-       GATE(ACLK_VIO1_NIU, "aclk_vio1_niu", "aclk_vio1", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 12, GFLAGS),
+       GATE(ACLK_VIO1_NIU, "aclk_vio1_niu", "aclk_vio1", 0, RK3288_CLKGATE_CON(15), 12, GFLAGS),
 
        /* aclk_rga_pre gates */
        GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3288_CLKGATE_CON(15), 0, GFLAGS),
-       GATE(ACLK_RGA_NIU, "aclk_rga_niu", "aclk_rga_pre", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 13, GFLAGS),
+       GATE(ACLK_RGA_NIU, "aclk_rga_niu", "aclk_rga_pre", 0, RK3288_CLKGATE_CON(15), 13, GFLAGS),
 
        /*
         * Other ungrouped clocks.
@@ -801,15 +800,22 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
 
        GATE(0, "pclk_vip_in", "ext_vip", 0, RK3288_CLKGATE_CON(16), 0, GFLAGS),
        INVERTER(0, "pclk_vip", "pclk_vip_in", RK3288_CLKSEL_CON(29), 4, IFLAGS),
-       GATE(0, "pclk_isp_in", "ext_isp", 0, RK3288_CLKGATE_CON(16), 3, GFLAGS),
+       GATE(PCLK_ISP_IN, "pclk_isp_in", "ext_isp", 0, RK3288_CLKGATE_CON(16), 3, GFLAGS),
        INVERTER(0, "pclk_isp", "pclk_isp_in", RK3288_CLKSEL_CON(29), 3, IFLAGS),
 };
 
 static const char *const rk3288_critical_clocks[] __initconst = {
        "aclk_cpu",
        "aclk_peri",
+       "aclk_peri_niu",
+       "aclk_vio0_niu",
+       "aclk_vio1_niu",
+       "aclk_rga_niu",
        "hclk_peri",
+       "hclk_vio_niu",
+       "pclk_alive_niu",
        "pclk_pd_pmu",
+       "pclk_pmu_niu",
 };
 
 static void __iomem *rk3288_cru_base;
diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c
new file mode 100644 (file)
index 0000000..1e384e1
--- /dev/null
@@ -0,0 +1,895 @@
+/*
+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd.
+ * Author: Elaine <zhangqing@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/syscore_ops.h>
+#include <dt-bindings/clock/rk3328-cru.h>
+#include "clk.h"
+
+#define RK3328_GRF_SOC_STATUS0         0x480
+#define RK3328_GRF_MAC_CON1            0x904
+#define RK3328_GRF_MAC_CON2            0x908
+
+enum rk3328_plls {
+       apll, dpll, cpll, gpll, npll,
+};
+
+static struct rockchip_pll_rate_table rk3328_pll_rates[] = {
+       /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */
+       RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0),
+       RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0),
+       RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0),
+       RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0),
+       RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0),
+       RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0),
+       RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0),
+       RK3036_PLL_RATE(900000000, 4, 300, 2, 1, 1, 0),
+       RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0),
+       RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0),
+       RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0),
+       RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0),
+       RK3036_PLL_RATE(800000000, 6, 400, 2, 1, 1, 0),
+       RK3036_PLL_RATE(700000000, 6, 350, 2, 1, 1, 0),
+       RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0),
+       RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0),
+       RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0),
+       RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0),
+       RK3036_PLL_RATE(500000000, 6, 250, 2, 1, 1, 0),
+       RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0),
+       RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0),
+       RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0),
+       RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0),
+       { /* sentinel */ },
+};
+
+static struct rockchip_pll_rate_table rk3328_pll_frac_rates[] = {
+       /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */
+       RK3036_PLL_RATE(1016064000, 3, 127, 1, 1, 0, 134217),
+       /* vco = 1016064000 */
+       RK3036_PLL_RATE(983040000, 24, 983, 1, 1, 0, 671088),
+       /* vco = 983040000 */
+       RK3036_PLL_RATE(491520000, 24, 983, 2, 1, 0, 671088),
+       /* vco = 983040000 */
+       RK3036_PLL_RATE(61440000, 6, 215, 7, 2, 0, 671088),
+       /* vco = 860156000 */
+       RK3036_PLL_RATE(56448000, 12, 451, 4, 4, 0, 9797894),
+       /* vco = 903168000 */
+       RK3036_PLL_RATE(40960000, 12, 409, 4, 5, 0, 10066329),
+       /* vco = 819200000 */
+       { /* sentinel */ },
+};
+
+#define RK3328_DIV_ACLKM_MASK          0x7
+#define RK3328_DIV_ACLKM_SHIFT         4
+#define RK3328_DIV_PCLK_DBG_MASK       0xf
+#define RK3328_DIV_PCLK_DBG_SHIFT      0
+
+#define RK3328_CLKSEL1(_aclk_core, _pclk_dbg)                          \
+{                                                                      \
+       .reg = RK3328_CLKSEL_CON(1),                                    \
+       .val = HIWORD_UPDATE(_aclk_core, RK3328_DIV_ACLKM_MASK,         \
+                            RK3328_DIV_ACLKM_SHIFT) |                  \
+              HIWORD_UPDATE(_pclk_dbg, RK3328_DIV_PCLK_DBG_MASK,       \
+                            RK3328_DIV_PCLK_DBG_SHIFT),                \
+}
+
+#define RK3328_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg)              \
+{                                                                      \
+       .prate = _prate,                                                \
+       .divs = {                                                       \
+               RK3328_CLKSEL1(_aclk_core, _pclk_dbg),                  \
+       },                                                              \
+}
+
+static struct rockchip_cpuclk_rate_table rk3328_cpuclk_rates[] __initdata = {
+       RK3328_CPUCLK_RATE(1800000000, 1, 7),
+       RK3328_CPUCLK_RATE(1704000000, 1, 7),
+       RK3328_CPUCLK_RATE(1608000000, 1, 7),
+       RK3328_CPUCLK_RATE(1512000000, 1, 7),
+       RK3328_CPUCLK_RATE(1488000000, 1, 5),
+       RK3328_CPUCLK_RATE(1416000000, 1, 5),
+       RK3328_CPUCLK_RATE(1392000000, 1, 5),
+       RK3328_CPUCLK_RATE(1296000000, 1, 5),
+       RK3328_CPUCLK_RATE(1200000000, 1, 5),
+       RK3328_CPUCLK_RATE(1104000000, 1, 5),
+       RK3328_CPUCLK_RATE(1008000000, 1, 5),
+       RK3328_CPUCLK_RATE(912000000, 1, 5),
+       RK3328_CPUCLK_RATE(816000000, 1, 3),
+       RK3328_CPUCLK_RATE(696000000, 1, 3),
+       RK3328_CPUCLK_RATE(600000000, 1, 3),
+       RK3328_CPUCLK_RATE(408000000, 1, 1),
+       RK3328_CPUCLK_RATE(312000000, 1, 1),
+       RK3328_CPUCLK_RATE(216000000,  1, 1),
+       RK3328_CPUCLK_RATE(96000000, 1, 1),
+};
+
+static const struct rockchip_cpuclk_reg_data rk3328_cpuclk_data = {
+       .core_reg = RK3328_CLKSEL_CON(0),
+       .div_core_shift = 0,
+       .div_core_mask = 0x1f,
+       .mux_core_alt = 1,
+       .mux_core_main = 3,
+       .mux_core_shift = 6,
+       .mux_core_mask = 0x3,
+};
+
+PNAME(mux_pll_p)               = { "xin24m" };
+
+PNAME(mux_2plls_p)             = { "cpll", "gpll" };
+PNAME(mux_gpll_cpll_p)         = { "gpll", "cpll" };
+PNAME(mux_cpll_gpll_apll_p)    = { "cpll", "gpll", "apll" };
+PNAME(mux_2plls_xin24m_p)      = { "cpll", "gpll", "xin24m" };
+PNAME(mux_2plls_hdmiphy_p)     = { "cpll", "gpll",
+                                   "dummy_hdmiphy" };
+PNAME(mux_4plls_p)             = { "cpll", "gpll",
+                                   "dummy_hdmiphy",
+                                   "usb480m" };
+PNAME(mux_2plls_u480m_p)       = { "cpll", "gpll",
+                                   "usb480m" };
+PNAME(mux_2plls_24m_u480m_p)   = { "cpll", "gpll",
+                                    "xin24m", "usb480m" };
+
+PNAME(mux_ddrphy_p)            = { "dpll", "apll", "cpll" };
+PNAME(mux_armclk_p)            = { "apll_core",
+                                   "gpll_core",
+                                   "dpll_core",
+                                   "npll_core"};
+PNAME(mux_hdmiphy_p)           = { "hdmi_phy", "xin24m" };
+PNAME(mux_usb480m_p)           = { "usb480m_phy",
+                                   "xin24m" };
+
+PNAME(mux_i2s0_p)              = { "clk_i2s0_div",
+                                   "clk_i2s0_frac",
+                                   "xin12m",
+                                   "xin12m" };
+PNAME(mux_i2s1_p)              = { "clk_i2s1_div",
+                                   "clk_i2s1_frac",
+                                   "clkin_i2s1",
+                                   "xin12m" };
+PNAME(mux_i2s2_p)              = { "clk_i2s2_div",
+                                   "clk_i2s2_frac",
+                                   "clkin_i2s2",
+                                   "xin12m" };
+PNAME(mux_i2s1out_p)           = { "clk_i2s1", "xin12m"};
+PNAME(mux_i2s2out_p)           = { "clk_i2s2", "xin12m" };
+PNAME(mux_spdif_p)             = { "clk_spdif_div",
+                                   "clk_spdif_frac",
+                                   "xin12m",
+                                   "xin12m" };
+PNAME(mux_uart0_p)             = { "clk_uart0_div",
+                                   "clk_uart0_frac",
+                                   "xin24m" };
+PNAME(mux_uart1_p)             = { "clk_uart1_div",
+                                   "clk_uart1_frac",
+                                   "xin24m" };
+PNAME(mux_uart2_p)             = { "clk_uart2_div",
+                                   "clk_uart2_frac",
+                                   "xin24m" };
+
+PNAME(mux_sclk_cif_p)          = { "clk_cif_src",
+                                   "xin24m" };
+PNAME(mux_dclk_lcdc_p)         = { "hdmiphy",
+                                   "dclk_lcdc_src" };
+PNAME(mux_aclk_peri_pre_p)     = { "cpll_peri",
+                                   "gpll_peri",
+                                   "hdmiphy_peri" };
+PNAME(mux_ref_usb3otg_src_p)   = { "xin24m",
+                                   "clk_usb3otg_ref" };
+PNAME(mux_xin24m_32k_p)                = { "xin24m",
+                                   "clk_rtc32k" };
+PNAME(mux_mac2io_src_p)                = { "clk_mac2io_src",
+                                   "gmac_clkin" };
+PNAME(mux_mac2phy_src_p)       = { "clk_mac2phy_src",
+                                   "phy_50m_out" };
+
+static struct rockchip_pll_clock rk3328_pll_clks[] __initdata = {
+       [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p,
+                    0, RK3328_PLL_CON(0),
+                    RK3328_MODE_CON, 0, 4, 0, rk3328_pll_frac_rates),
+       [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p,
+                    0, RK3328_PLL_CON(8),
+                    RK3328_MODE_CON, 4, 3, 0, NULL),
+       [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p,
+                    0, RK3328_PLL_CON(16),
+                    RK3328_MODE_CON, 8, 2, 0, rk3328_pll_rates),
+       [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p,
+                    0, RK3328_PLL_CON(24),
+                    RK3328_MODE_CON, 12, 1, 0, rk3328_pll_frac_rates),
+       [npll] = PLL(pll_rk3328, PLL_NPLL, "npll", mux_pll_p,
+                    0, RK3328_PLL_CON(40),
+                    RK3328_MODE_CON, 1, 0, 0, rk3328_pll_rates),
+};
+
+#define MFLAGS CLK_MUX_HIWORD_MASK
+#define DFLAGS CLK_DIVIDER_HIWORD_MASK
+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE)
+
+static struct rockchip_clk_branch rk3328_i2s0_fracmux __initdata =
+       MUX(0, "i2s0_pre", mux_i2s0_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(6), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3328_i2s1_fracmux __initdata =
+       MUX(0, "i2s1_pre", mux_i2s1_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(8), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3328_i2s2_fracmux __initdata =
+       MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(10), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3328_spdif_fracmux __initdata =
+       MUX(SCLK_SPDIF, "sclk_spdif", mux_spdif_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(12), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3328_uart0_fracmux __initdata =
+       MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(14), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3328_uart1_fracmux __initdata =
+       MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(16), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3328_uart2_fracmux __initdata =
+       MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(18), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
+       /*
+        * Clock-Architecture Diagram 1
+        */
+
+       DIV(0, "clk_24m", "xin24m", CLK_IGNORE_UNUSED,
+                       RK3328_CLKSEL_CON(2), 8, 5, DFLAGS),
+       COMPOSITE(SCLK_RTC32K, "clk_rtc32k", mux_2plls_xin24m_p, 0,
+                       RK3328_CLKSEL_CON(38), 14, 2, MFLAGS, 0, 14, DFLAGS,
+                       RK3328_CLKGATE_CON(0), 11, GFLAGS),
+
+       /* PD_MISC */
+       MUX(HDMIPHY, "hdmiphy", mux_hdmiphy_p, CLK_SET_RATE_PARENT,
+                       RK3328_MISC_CON, 13, 1, MFLAGS),
+       MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT,
+                       RK3328_MISC_CON, 15, 1, MFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 2
+        */
+
+       /* PD_CORE */
+       GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(0), 0, GFLAGS),
+       GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(0), 2, GFLAGS),
+       GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(0), 1, GFLAGS),
+       GATE(0, "npll_core", "npll", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(0), 12, GFLAGS),
+       COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED,
+                       RK3328_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
+                       RK3328_CLKGATE_CON(7), 0, GFLAGS),
+       COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED,
+                       RK3328_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
+                       RK3328_CLKGATE_CON(7), 1, GFLAGS),
+       GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(13), 0, GFLAGS),
+       GATE(0, "aclk_gic400", "aclk_core", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(13), 1, GFLAGS),
+
+       GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(7), 2, GFLAGS),
+
+       /* PD_GPU */
+       COMPOSITE(0, "aclk_gpu_pre", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(6), 6, GFLAGS),
+       GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_pre", CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(14), 0, GFLAGS),
+       GATE(0, "aclk_gpu_niu", "aclk_gpu_pre", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(14), 1, GFLAGS),
+
+       /* PD_DDR */
+       COMPOSITE(0, "clk_ddr", mux_ddrphy_p, CLK_IGNORE_UNUSED,
+                       RK3328_CLKSEL_CON(3), 8, 2, MFLAGS, 0, 3, DFLAGS | CLK_DIVIDER_POWER_OF_TWO,
+                       RK3328_CLKGATE_CON(0), 4, GFLAGS),
+       GATE(0, "clk_ddrmsch", "clk_ddr", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(18), 6, GFLAGS),
+       GATE(0, "clk_ddrupctl", "clk_ddr", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(18), 5, GFLAGS),
+       GATE(0, "aclk_ddrupctl", "clk_ddr", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(18), 4, GFLAGS),
+       GATE(0, "clk_ddrmon", "xin24m", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(0), 6, GFLAGS),
+
+       COMPOSITE(PCLK_DDR, "pclk_ddr", mux_2plls_hdmiphy_p, 0,
+                       RK3328_CLKSEL_CON(4), 13, 2, MFLAGS, 8, 3, DFLAGS,
+                       RK3328_CLKGATE_CON(7), 4, GFLAGS),
+       GATE(0, "pclk_ddrupctl", "pclk_ddr", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(18), 1, GFLAGS),
+       GATE(0, "pclk_ddr_msch", "pclk_ddr", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(18), 2, GFLAGS),
+       GATE(0, "pclk_ddr_mon", "pclk_ddr", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(18), 3, GFLAGS),
+       GATE(0, "pclk_ddrstdby", "pclk_ddr", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(18), 7, GFLAGS),
+       GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(18), 9, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 3
+        */
+
+       /* PD_BUS */
+       COMPOSITE(ACLK_BUS_PRE, "aclk_bus_pre", mux_2plls_hdmiphy_p, 0,
+                       RK3328_CLKSEL_CON(0), 13, 2, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(8), 0, GFLAGS),
+       COMPOSITE_NOMUX(HCLK_BUS_PRE, "hclk_bus_pre", "aclk_bus_pre", 0,
+                       RK3328_CLKSEL_CON(1), 8, 2, DFLAGS,
+                       RK3328_CLKGATE_CON(8), 1, GFLAGS),
+       COMPOSITE_NOMUX(PCLK_BUS_PRE, "pclk_bus_pre", "aclk_bus_pre", 0,
+                       RK3328_CLKSEL_CON(1), 12, 3, DFLAGS,
+                       RK3328_CLKGATE_CON(8), 2, GFLAGS),
+       GATE(0, "pclk_bus", "pclk_bus_pre", 0,
+                       RK3328_CLKGATE_CON(8), 3, GFLAGS),
+       GATE(0, "pclk_phy_pre", "pclk_bus_pre", 0,
+                       RK3328_CLKGATE_CON(8), 4, GFLAGS),
+
+       COMPOSITE(SCLK_TSP, "clk_tsp", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(21), 15, 1, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 5, GFLAGS),
+       GATE(0, "clk_hsadc_tsp", "ext_gpio3a2", 0,
+                       RK3328_CLKGATE_CON(17), 13, GFLAGS),
+
+       /* PD_I2S */
+       COMPOSITE(0, "clk_i2s0_div", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(1), 1, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(7), 0,
+                       RK3328_CLKGATE_CON(1), 2, GFLAGS,
+                       &rk3328_i2s0_fracmux),
+       GATE(SCLK_I2S0, "clk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(1), 3, GFLAGS),
+
+       COMPOSITE(0, "clk_i2s1_div", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(8), 15, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(1), 4, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(9), 0,
+                       RK3328_CLKGATE_CON(1), 5, GFLAGS,
+                       &rk3328_i2s1_fracmux),
+       GATE(SCLK_I2S1, "clk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(0), 6, GFLAGS),
+       COMPOSITE_NODIV(SCLK_I2S1_OUT, "i2s1_out", mux_i2s1out_p, 0,
+                       RK3328_CLKSEL_CON(8), 12, 1, MFLAGS,
+                       RK3328_CLKGATE_CON(1), 7, GFLAGS),
+
+       COMPOSITE(0, "clk_i2s2_div", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(1), 8, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(11), 0,
+                       RK3328_CLKGATE_CON(1), 9, GFLAGS,
+                       &rk3328_i2s2_fracmux),
+       GATE(SCLK_I2S2, "clk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(1), 10, GFLAGS),
+       COMPOSITE_NODIV(SCLK_I2S2_OUT, "i2s2_out", mux_i2s2out_p, 0,
+                       RK3328_CLKSEL_CON(10), 12, 1, MFLAGS,
+                       RK3328_CLKGATE_CON(1), 11, GFLAGS),
+
+       COMPOSITE(0, "clk_spdif_div", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(12), 15, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(1), 12, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_spdif_frac", "clk_spdif_div", CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(13), 0,
+                       RK3328_CLKGATE_CON(1), 13, GFLAGS,
+                       &rk3328_spdif_fracmux),
+
+       /* PD_UART */
+       COMPOSITE(0, "clk_uart0_div", mux_2plls_u480m_p, 0,
+                       RK3328_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(1), 14, GFLAGS),
+       COMPOSITE(0, "clk_uart1_div", mux_2plls_u480m_p, 0,
+                       RK3328_CLKSEL_CON(16), 12, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 0, GFLAGS),
+       COMPOSITE(0, "clk_uart2_div", mux_2plls_u480m_p, 0,
+                       RK3328_CLKSEL_CON(18), 12, 2, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 2, GFLAGS),
+       COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_div", CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(15), 0,
+                       RK3328_CLKGATE_CON(1), 15, GFLAGS,
+                       &rk3328_uart0_fracmux),
+       COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_div", CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(17), 0,
+                       RK3328_CLKGATE_CON(2), 1, GFLAGS,
+                       &rk3328_uart1_fracmux),
+       COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_div", CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(19), 0,
+                       RK3328_CLKGATE_CON(2), 3, GFLAGS,
+                       &rk3328_uart2_fracmux),
+
+       /*
+        * Clock-Architecture Diagram 4
+        */
+
+       COMPOSITE(SCLK_I2C0, "clk_i2c0", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(34), 7, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 9, GFLAGS),
+       COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(34), 15, 1, MFLAGS, 8, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 10, GFLAGS),
+       COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(35), 7, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 11, GFLAGS),
+       COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(35), 15, 1, MFLAGS, 8, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 12, GFLAGS),
+       COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 4, GFLAGS),
+       COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "clk_24m", 0,
+                       RK3328_CLKSEL_CON(22), 0, 10, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 6, GFLAGS),
+       COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "clk_24m", 0,
+                       RK3328_CLKSEL_CON(23), 0, 10, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 14, GFLAGS),
+       COMPOSITE(SCLK_SPI, "clk_spi", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(24), 7, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 7, GFLAGS),
+       COMPOSITE(SCLK_PWM, "clk_pwm", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(24), 15, 1, MFLAGS, 8, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 8, GFLAGS),
+       COMPOSITE(SCLK_OTP, "clk_otp", mux_2plls_xin24m_p, 0,
+                       RK3328_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 6, DFLAGS,
+                       RK3328_CLKGATE_CON(3), 8, GFLAGS),
+       COMPOSITE(SCLK_EFUSE, "clk_efuse", mux_2plls_xin24m_p, 0,
+                       RK3328_CLKSEL_CON(5), 14, 2, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 13, GFLAGS),
+       COMPOSITE(SCLK_PDM, "clk_pdm", mux_cpll_gpll_apll_p, CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(2), 15, GFLAGS),
+
+       GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0,
+                       RK3328_CLKGATE_CON(8), 5, GFLAGS),
+       GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0,
+                       RK3328_CLKGATE_CON(8), 6, GFLAGS),
+       GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0,
+                       RK3328_CLKGATE_CON(8), 7, GFLAGS),
+       GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0,
+                       RK3328_CLKGATE_CON(8), 8, GFLAGS),
+       GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0,
+                       RK3328_CLKGATE_CON(8), 9, GFLAGS),
+       GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0,
+                       RK3328_CLKGATE_CON(8), 10, GFLAGS),
+
+       COMPOSITE(SCLK_WIFI, "clk_wifi", mux_2plls_u480m_p, 0,
+                       RK3328_CLKSEL_CON(52), 6, 2, MFLAGS, 0, 6, DFLAGS,
+                       RK3328_CLKGATE_CON(0), 10, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 5
+        */
+
+       /* PD_VIDEO */
+       COMPOSITE(ACLK_RKVDEC_PRE, "aclk_rkvdec_pre", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(48), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(6), 0, GFLAGS),
+       FACTOR_GATE(HCLK_RKVDEC_PRE, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, 1, 4,
+                       RK3328_CLKGATE_CON(11), 0, GFLAGS),
+       GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(24), 0, GFLAGS),
+       GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(24), 1, GFLAGS),
+       GATE(0, "aclk_rkvdec_niu", "aclk_rkvdec_pre", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(24), 2, GFLAGS),
+       GATE(0, "hclk_rkvdec_niu", "hclk_rkvdec_pre", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(24), 3, GFLAGS),
+
+       COMPOSITE(SCLK_VDEC_CABAC, "sclk_vdec_cabac", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(48), 14, 2, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(6), 1, GFLAGS),
+
+       COMPOSITE(SCLK_VDEC_CORE, "sclk_vdec_core", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(49), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(6), 2, GFLAGS),
+
+       COMPOSITE(ACLK_VPU_PRE, "aclk_vpu_pre", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(50), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(6), 5, GFLAGS),
+       FACTOR_GATE(HCLK_VPU_PRE, "hclk_vpu_pre", "aclk_vpu_pre", 0, 1, 4,
+                       RK3328_CLKGATE_CON(11), 8, GFLAGS),
+       GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(23), 0, GFLAGS),
+       GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(23), 1, GFLAGS),
+       GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(23), 2, GFLAGS),
+       GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(23), 3, GFLAGS),
+
+       COMPOSITE(ACLK_RKVENC, "aclk_rkvenc", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(51), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(6), 3, GFLAGS),
+       FACTOR_GATE(HCLK_RKVENC, "hclk_rkvenc", "aclk_rkvenc", 0, 1, 4,
+                       RK3328_CLKGATE_CON(11), 4, GFLAGS),
+       GATE(0, "aclk_rkvenc_niu", "aclk_rkvenc", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(25), 0, GFLAGS),
+       GATE(0, "hclk_rkvenc_niu", "hclk_rkvenc", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(25), 1, GFLAGS),
+       GATE(ACLK_H265, "aclk_h265", "aclk_rkvenc", 0,
+                       RK3328_CLKGATE_CON(25), 0, GFLAGS),
+       GATE(PCLK_H265, "pclk_h265", "hclk_rkvenc", 0,
+                       RK3328_CLKGATE_CON(25), 1, GFLAGS),
+       GATE(ACLK_H264, "aclk_h264", "aclk_rkvenc", 0,
+                       RK3328_CLKGATE_CON(25), 0, GFLAGS),
+       GATE(HCLK_H264, "hclk_h264", "hclk_rkvenc", 0,
+                       RK3328_CLKGATE_CON(25), 1, GFLAGS),
+       GATE(ACLK_AXISRAM, "aclk_axisram", "aclk_rkvenc", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(25), 0, GFLAGS),
+
+       COMPOSITE(SCLK_VENC_CORE, "sclk_venc_core", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(51), 14, 2, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(6), 4, GFLAGS),
+
+       COMPOSITE(SCLK_VENC_DSP, "sclk_venc_dsp", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(52), 14, 2, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(6), 7, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 6
+        */
+
+       /* PD_VIO */
+       COMPOSITE(ACLK_VIO_PRE, "aclk_vio_pre", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(37), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(5), 2, GFLAGS),
+       DIV(HCLK_VIO_PRE, "hclk_vio_pre", "aclk_vio_pre", 0,
+                       RK3328_CLKSEL_CON(37), 8, 5, DFLAGS),
+
+       COMPOSITE(ACLK_RGA_PRE, "aclk_rga_pre", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(36), 14, 2, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(5), 0, GFLAGS),
+       COMPOSITE(SCLK_RGA, "clk_rga", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(36), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(5), 1, GFLAGS),
+       COMPOSITE(ACLK_VOP_PRE, "aclk_vop_pre", mux_4plls_p, 0,
+                       RK3328_CLKSEL_CON(39), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(5), 5, GFLAGS),
+       GATE(0, "clk_hdmi_sfc", "xin24m", 0,
+                       RK3328_CLKGATE_CON(5), 4, GFLAGS),
+
+       COMPOSITE_NODIV(0, "clk_cif_src", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(42), 7, 1, MFLAGS,
+                       RK3328_CLKGATE_CON(5), 3, GFLAGS),
+       COMPOSITE_NOGATE(SCLK_CIF_OUT, "clk_cif_out", mux_sclk_cif_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(42), 5, 1, MFLAGS, 0, 5, DFLAGS),
+
+       COMPOSITE(DCLK_LCDC_SRC, "dclk_lcdc_src", mux_gpll_cpll_p, 0,
+                       RK3328_CLKSEL_CON(40), 0, 1, MFLAGS, 8, 8, DFLAGS,
+                       RK3328_CLKGATE_CON(5), 6, GFLAGS),
+       DIV(DCLK_HDMIPHY, "dclk_hdmiphy", "dclk_lcdc_src", 0,
+                       RK3328_CLKSEL_CON(40), 3, 3, DFLAGS),
+       MUX(DCLK_LCDC, "dclk_lcdc", mux_dclk_lcdc_p, 0,
+                       RK3328_CLKSEL_CON(40), 1, 1, MFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 7
+        */
+
+       /* PD_PERI */
+       GATE(0, "gpll_peri", "gpll", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(4), 0, GFLAGS),
+       GATE(0, "cpll_peri", "cpll", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(4), 1, GFLAGS),
+       GATE(0, "hdmiphy_peri", "hdmiphy", CLK_IGNORE_UNUSED,
+                       RK3328_CLKGATE_CON(4), 2, GFLAGS),
+       COMPOSITE_NOGATE(ACLK_PERI_PRE, "aclk_peri_pre", mux_aclk_peri_pre_p, 0,
+                       RK3328_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS),
+       COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_pre", CLK_IGNORE_UNUSED,
+                       RK3328_CLKSEL_CON(29), 0, 2, DFLAGS,
+                       RK3328_CLKGATE_CON(10), 2, GFLAGS),
+       COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_pre", CLK_IGNORE_UNUSED,
+                       RK3328_CLKSEL_CON(29), 4, 3, DFLAGS,
+                       RK3328_CLKGATE_CON(10), 1, GFLAGS),
+       GATE(ACLK_PERI, "aclk_peri", "aclk_peri_pre", CLK_IGNORE_UNUSED | CLK_SET_RATE_PARENT,
+                       RK3328_CLKGATE_CON(10), 0, GFLAGS),
+
+       COMPOSITE(SCLK_SDMMC, "clk_sdmmc", mux_2plls_24m_u480m_p, 0,
+                       RK3328_CLKSEL_CON(30), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3328_CLKGATE_CON(4), 3, GFLAGS),
+
+       COMPOSITE(SCLK_SDIO, "clk_sdio", mux_2plls_24m_u480m_p, 0,
+                       RK3328_CLKSEL_CON(31), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3328_CLKGATE_CON(4), 4, GFLAGS),
+
+       COMPOSITE(SCLK_EMMC, "clk_emmc", mux_2plls_24m_u480m_p, 0,
+                       RK3328_CLKSEL_CON(32), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3328_CLKGATE_CON(4), 5, GFLAGS),
+
+       COMPOSITE(SCLK_SDMMC_EXT, "clk_sdmmc_ext", mux_2plls_24m_u480m_p, 0,
+                       RK3328_CLKSEL_CON(43), 8, 2, MFLAGS, 0, 8, DFLAGS,
+                       RK3328_CLKGATE_CON(4), 10, GFLAGS),
+
+       COMPOSITE(SCLK_REF_USB3OTG_SRC, "clk_ref_usb3otg_src", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(45), 7, 1, MFLAGS, 0, 7, DFLAGS,
+                       RK3328_CLKGATE_CON(4), 9, GFLAGS),
+
+       MUX(SCLK_REF_USB3OTG, "clk_ref_usb3otg", mux_ref_usb3otg_src_p, CLK_SET_RATE_PARENT,
+                       RK3328_CLKSEL_CON(45), 8, 1, MFLAGS),
+
+       GATE(SCLK_USB3OTG_REF, "clk_usb3otg_ref", "xin24m", 0,
+                       RK3328_CLKGATE_CON(4), 7, GFLAGS),
+
+       COMPOSITE(SCLK_USB3OTG_SUSPEND, "clk_usb3otg_suspend", mux_xin24m_32k_p, 0,
+                       RK3328_CLKSEL_CON(33), 15, 1, MFLAGS, 0, 10, DFLAGS,
+                       RK3328_CLKGATE_CON(4), 8, GFLAGS),
+
+       /*
+        * Clock-Architecture Diagram 8
+        */
+
+       /* PD_GMAC */
+       COMPOSITE(ACLK_GMAC, "aclk_gmac", mux_2plls_hdmiphy_p, 0,
+                       RK3328_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(3), 2, GFLAGS),
+       COMPOSITE_NOMUX(PCLK_GMAC, "pclk_gmac", "aclk_gmac", 0,
+                       RK3328_CLKSEL_CON(25), 8, 3, DFLAGS,
+                       RK3328_CLKGATE_CON(9), 0, GFLAGS),
+
+       COMPOSITE(SCLK_MAC2IO_SRC, "clk_mac2io_src", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(27), 7, 1, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(3), 1, GFLAGS),
+       GATE(SCLK_MAC2IO_REF, "clk_mac2io_ref", "clk_mac2io", 0,
+                       RK3328_CLKGATE_CON(9), 7, GFLAGS),
+       GATE(SCLK_MAC2IO_RX, "clk_mac2io_rx", "clk_mac2io", 0,
+                       RK3328_CLKGATE_CON(9), 4, GFLAGS),
+       GATE(SCLK_MAC2IO_TX, "clk_mac2io_tx", "clk_mac2io", 0,
+                       RK3328_CLKGATE_CON(9), 5, GFLAGS),
+       GATE(SCLK_MAC2IO_REFOUT, "clk_mac2io_refout", "clk_mac2io", 0,
+                       RK3328_CLKGATE_CON(9), 6, GFLAGS),
+       COMPOSITE(SCLK_MAC2IO_OUT, "clk_mac2io_out", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(27), 15, 1, MFLAGS, 8, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(3), 5, GFLAGS),
+
+       COMPOSITE(SCLK_MAC2PHY_SRC, "clk_mac2phy_src", mux_2plls_p, 0,
+                       RK3328_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 5, DFLAGS,
+                       RK3328_CLKGATE_CON(3), 0, GFLAGS),
+       GATE(SCLK_MAC2PHY_REF, "clk_mac2phy_ref", "clk_mac2phy", 0,
+                       RK3328_CLKGATE_CON(9), 3, GFLAGS),
+       GATE(SCLK_MAC2PHY_RXTX, "clk_mac2phy_rxtx", "clk_mac2phy", 0,
+                       RK3328_CLKGATE_CON(9), 1, GFLAGS),
+       COMPOSITE_NOMUX(SCLK_MAC2PHY_OUT, "clk_mac2phy_out", "clk_mac2phy", 0,
+                       RK3328_CLKSEL_CON(26), 8, 2, DFLAGS,
+                       RK3328_CLKGATE_CON(9), 2, GFLAGS),
+
+       FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
+       /*
+        * Clock-Architecture Diagram 9
+        */
+
+       /* PD_VOP */
+       GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3328_CLKGATE_CON(21), 10, GFLAGS),
+       GATE(0, "aclk_rga_niu", "aclk_rga_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(22), 3, GFLAGS),
+       GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, RK3328_CLKGATE_CON(21), 2, GFLAGS),
+       GATE(0, "aclk_vop_niu", "aclk_vop_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(21), 4, GFLAGS),
+
+       GATE(ACLK_IEP, "aclk_iep", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 6, GFLAGS),
+       GATE(ACLK_CIF, "aclk_cif", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 8, GFLAGS),
+       GATE(ACLK_HDCP, "aclk_hdcp", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 15, GFLAGS),
+       GATE(0, "aclk_vio_niu", "aclk_vio_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(22), 2, GFLAGS),
+
+       GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 3, GFLAGS),
+       GATE(0, "hclk_vop_niu", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 5, GFLAGS),
+       GATE(HCLK_IEP, "hclk_iep", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 7, GFLAGS),
+       GATE(HCLK_CIF, "hclk_cif", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 9, GFLAGS),
+       GATE(HCLK_RGA, "hclk_rga", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 11, GFLAGS),
+       GATE(0, "hclk_ahb1tom", "hclk_vio_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(21), 12, GFLAGS),
+       GATE(0, "pclk_vio_h2p", "hclk_vio_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(21), 13, GFLAGS),
+       GATE(0, "hclk_vio_h2p", "hclk_vio_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(21), 14, GFLAGS),
+       GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 0, GFLAGS),
+       GATE(HCLK_VIO, "hclk_vio", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 1, GFLAGS),
+       GATE(PCLK_HDMI, "pclk_hdmi", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 4, GFLAGS),
+       GATE(PCLK_HDCP, "pclk_hdcp", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 5, GFLAGS),
+
+       /* PD_PERI */
+       GATE(0, "aclk_peri_noc", "aclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 11, GFLAGS),
+       GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_peri", 0, RK3328_CLKGATE_CON(19), 4, GFLAGS),
+
+       GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 0, GFLAGS),
+       GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 1, GFLAGS),
+       GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 2, GFLAGS),
+       GATE(HCLK_SDMMC_EXT, "hclk_sdmmc_ext", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 15, GFLAGS),
+       GATE(HCLK_HOST0, "hclk_host0", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 6, GFLAGS),
+       GATE(HCLK_HOST0_ARB, "hclk_host0_arb", "hclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 7, GFLAGS),
+       GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 8, GFLAGS),
+       GATE(HCLK_OTG_PMU, "hclk_otg_pmu", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 9, GFLAGS),
+       GATE(0, "hclk_peri_niu", "hclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 12, GFLAGS),
+       GATE(0, "pclk_peri_niu", "hclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 13, GFLAGS),
+
+       /* PD_GMAC */
+       GATE(ACLK_MAC2PHY, "aclk_mac2phy", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 0, GFLAGS),
+       GATE(ACLK_MAC2IO, "aclk_mac2io", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 2, GFLAGS),
+       GATE(0, "aclk_gmac_niu", "aclk_gmac", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(26), 4, GFLAGS),
+       GATE(PCLK_MAC2PHY, "pclk_mac2phy", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 1, GFLAGS),
+       GATE(PCLK_MAC2IO, "pclk_mac2io", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 3, GFLAGS),
+       GATE(0, "pclk_gmac_niu", "pclk_gmac", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(26), 5, GFLAGS),
+
+       /* PD_BUS */
+       GATE(0, "aclk_bus_niu", "aclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 12, GFLAGS),
+       GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 11, GFLAGS),
+       GATE(ACLK_TSP, "aclk_tsp", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(17), 12, GFLAGS),
+       GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 0, GFLAGS),
+       GATE(ACLK_DMAC, "aclk_dmac_bus", "aclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 1, GFLAGS),
+
+       GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 2, GFLAGS),
+       GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 3, GFLAGS),
+       GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 4, GFLAGS),
+       GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 5, GFLAGS),
+       GATE(HCLK_SPDIF_8CH, "hclk_spdif_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 6, GFLAGS),
+       GATE(HCLK_TSP, "hclk_tsp", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(17), 11, GFLAGS),
+       GATE(HCLK_CRYPTO_MST, "hclk_crypto_mst", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 7, GFLAGS),
+       GATE(HCLK_CRYPTO_SLV, "hclk_crypto_slv", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 8, GFLAGS),
+       GATE(0, "hclk_bus_niu", "hclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 13, GFLAGS),
+       GATE(HCLK_PDM, "hclk_pdm", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(28), 0, GFLAGS),
+
+       GATE(0, "pclk_bus_niu", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 14, GFLAGS),
+       GATE(0, "pclk_efuse", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 9, GFLAGS),
+       GATE(0, "pclk_otp", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(28), 4, GFLAGS),
+       GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3328_CLKGATE_CON(15), 10, GFLAGS),
+       GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 0, GFLAGS),
+       GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 1, GFLAGS),
+       GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 2, GFLAGS),
+       GATE(PCLK_TIMER, "pclk_timer0", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 3, GFLAGS),
+       GATE(0, "pclk_stimer", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 4, GFLAGS),
+       GATE(PCLK_SPI, "pclk_spi", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 5, GFLAGS),
+       GATE(PCLK_PWM, "pclk_rk_pwm", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 6, GFLAGS),
+       GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 7, GFLAGS),
+       GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 8, GFLAGS),
+       GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 9, GFLAGS),
+       GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 10, GFLAGS),
+       GATE(PCLK_UART0, "pclk_uart0", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 11, GFLAGS),
+       GATE(PCLK_UART1, "pclk_uart1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 12, GFLAGS),
+       GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 13, GFLAGS),
+       GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 14, GFLAGS),
+       GATE(PCLK_DCF, "pclk_dcf", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 15, GFLAGS),
+       GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 0, GFLAGS),
+       GATE(0, "pclk_cru", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 4, GFLAGS),
+       GATE(0, "pclk_sgrf", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 6, GFLAGS),
+       GATE(0, "pclk_sim", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 10, GFLAGS),
+       GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus", 0, RK3328_CLKGATE_CON(17), 15, GFLAGS),
+       GATE(0, "pclk_pmu", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(28), 3, GFLAGS),
+
+       GATE(PCLK_USB3PHY_OTG, "pclk_usb3phy_otg", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(28), 1, GFLAGS),
+       GATE(PCLK_USB3PHY_PIPE, "pclk_usb3phy_pipe", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(28), 2, GFLAGS),
+       GATE(PCLK_USB3_GRF, "pclk_usb3_grf", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 2, GFLAGS),
+       GATE(PCLK_USB2_GRF, "pclk_usb2_grf", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 14, GFLAGS),
+       GATE(0, "pclk_ddrphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 13, GFLAGS),
+       GATE(0, "pclk_acodecphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 5, GFLAGS),
+       GATE(PCLK_HDMIPHY, "pclk_hdmiphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 7, GFLAGS),
+       GATE(0, "pclk_vdacphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 8, GFLAGS),
+       GATE(0, "pclk_phy_niu", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 15, GFLAGS),
+
+       /* PD_MMC */
+       MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc",
+           RK3328_SDMMC_CON0, 1),
+       MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc",
+           RK3328_SDMMC_CON1, 1),
+
+       MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio",
+           RK3328_SDIO_CON0, 1),
+       MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio",
+           RK3328_SDIO_CON1, 1),
+
+       MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc",
+           RK3328_EMMC_CON0, 1),
+       MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc",
+           RK3328_EMMC_CON1, 1),
+
+       MMC(SCLK_SDMMC_EXT_DRV, "sdmmc_ext_drv", "sclk_sdmmc_ext",
+           RK3328_SDMMC_EXT_CON0, 1),
+       MMC(SCLK_SDMMC_EXT_SAMPLE, "sdmmc_ext_sample", "sclk_sdmmc_ext",
+           RK3328_SDMMC_EXT_CON1, 1),
+};
+
+static const char *const rk3328_critical_clocks[] __initconst = {
+       "aclk_bus",
+       "pclk_bus",
+       "hclk_bus",
+       "aclk_peri",
+       "hclk_peri",
+       "pclk_peri",
+       "pclk_dbg",
+       "aclk_core_niu",
+       "aclk_gic400",
+       "aclk_intmem",
+       "hclk_rom",
+       "pclk_grf",
+       "pclk_cru",
+       "pclk_sgrf",
+       "pclk_timer0",
+       "clk_timer0",
+       "pclk_ddr_msch",
+       "pclk_ddr_mon",
+       "pclk_ddr_grf",
+       "clk_ddrupctl",
+       "clk_ddrmsch",
+       "hclk_ahb1tom",
+       "clk_jtag",
+       "pclk_ddrphy",
+       "pclk_pmu",
+       "hclk_otg_pmu",
+       "aclk_rga_niu",
+       "pclk_vio_h2p",
+       "hclk_vio_h2p",
+};
+
+static void __init rk3328_clk_init(struct device_node *np)
+{
+       struct rockchip_clk_provider *ctx;
+       void __iomem *reg_base;
+
+       reg_base = of_iomap(np, 0);
+       if (!reg_base) {
+               pr_err("%s: could not map cru region\n", __func__);
+               return;
+       }
+
+       ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
+       if (IS_ERR(ctx)) {
+               pr_err("%s: rockchip clk init failed\n", __func__);
+               iounmap(reg_base);
+               return;
+       }
+
+       rockchip_clk_register_plls(ctx, rk3328_pll_clks,
+                                  ARRAY_SIZE(rk3328_pll_clks),
+                                  RK3328_GRF_SOC_STATUS0);
+       rockchip_clk_register_branches(ctx, rk3328_clk_branches,
+                                      ARRAY_SIZE(rk3328_clk_branches));
+       rockchip_clk_protect_critical(rk3328_critical_clocks,
+                                     ARRAY_SIZE(rk3328_critical_clocks));
+
+       rockchip_clk_register_armclk(ctx, ARMCLK, "armclk",
+                                    mux_armclk_p, ARRAY_SIZE(mux_armclk_p),
+                                    &rk3328_cpuclk_data, rk3328_cpuclk_rates,
+                                    ARRAY_SIZE(rk3328_cpuclk_rates));
+
+       rockchip_register_softrst(np, 11, reg_base + RK3328_SOFTRST_CON(0),
+                                 ROCKCHIP_SOFTRST_HIWORD_MASK);
+
+       rockchip_register_restart_notifier(ctx, RK3328_GLB_SRST_FST, NULL);
+
+       rockchip_clk_of_add_provider(np, ctx);
+}
+CLK_OF_DECLARE(rk3328_cru, "rockchip,rk3328-cru", rk3328_clk_init);
index 3490887b05799390522e7fd867698fe12bfa72cf..73121b1446348d06e4c5a19030976855dac4c523 100644 (file)
@@ -1132,7 +1132,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
                        RK3399_CLKGATE_CON(11), 8, GFLAGS),
 
        COMPOSITE(PCLK_EDP, "pclk_edp", mux_pll_src_cpll_gpll_p, 0,
-                       RK3399_CLKSEL_CON(44), 15, 1, MFLAGS, 8, 5, DFLAGS,
+                       RK3399_CLKSEL_CON(44), 15, 1, MFLAGS, 8, 6, DFLAGS,
                        RK3399_CLKGATE_CON(11), 11, GFLAGS),
        GATE(PCLK_EDP_NOC, "pclk_edp_noc", "pclk_edp", CLK_IGNORE_UNUSED,
                        RK3399_CLKGATE_CON(32), 12, GFLAGS),
index b886be30f34f7136a143d0d76bf5b5cde4da86f2..fe1d393cf678fb9079127cb77d237aa4146615ac 100644 (file)
@@ -344,7 +344,6 @@ struct rockchip_clk_provider * __init rockchip_clk_init(struct device_node *np,
        ctx->clk_data.clks = clk_table;
        ctx->clk_data.clk_num = nr_clks;
        ctx->cru_node = np;
-       ctx->grf = ERR_PTR(-EPROBE_DEFER);
        spin_lock_init(&ctx->lock);
 
        ctx->grf = syscon_regmap_lookup_by_phandle(ctx->cru_node,
@@ -417,6 +416,13 @@ void __init rockchip_clk_register_branches(
                                list->mux_shift, list->mux_width,
                                list->mux_flags, &ctx->lock);
                        break;
+               case branch_muxgrf:
+                       clk = rockchip_clk_register_muxgrf(list->name,
+                               list->parent_names, list->num_parents,
+                               flags, ctx->grf, list->muxdiv_offset,
+                               list->mux_shift, list->mux_width,
+                               list->mux_flags);
+                       break;
                case branch_divider:
                        if (list->div_table)
                                clk = clk_register_divider_table(NULL,
index d67eecc4ade9bbafbca358eb42c2202f5f4db4af..7c15473ea72b233070238201100bc10c8c2de240 100644 (file)
@@ -91,6 +91,24 @@ struct clk;
 #define RK3288_EMMC_CON0               0x218
 #define RK3288_EMMC_CON1               0x21c
 
+#define RK3328_PLL_CON(x)              RK2928_PLL_CON(x)
+#define RK3328_CLKSEL_CON(x)           ((x) * 0x4 + 0x100)
+#define RK3328_CLKGATE_CON(x)          ((x) * 0x4 + 0x200)
+#define RK3328_GRFCLKSEL_CON(x)                ((x) * 0x4 + 0x100)
+#define RK3328_GLB_SRST_FST            0x9c
+#define RK3328_GLB_SRST_SND            0x98
+#define RK3328_SOFTRST_CON(x)          ((x) * 0x4 + 0x300)
+#define RK3328_MODE_CON                        0x80
+#define RK3328_MISC_CON                        0x84
+#define RK3328_SDMMC_CON0              0x380
+#define RK3328_SDMMC_CON1              0x384
+#define RK3328_SDIO_CON0               0x388
+#define RK3328_SDIO_CON1               0x38c
+#define RK3328_EMMC_CON0               0x390
+#define RK3328_EMMC_CON1               0x394
+#define RK3328_SDMMC_EXT_CON0          0x398
+#define RK3328_SDMMC_EXT_CON1          0x39C
+
 #define RK3368_PLL_CON(x)              RK2928_PLL_CON(x)
 #define RK3368_CLKSEL_CON(x)           ((x) * 0x4 + 0x100)
 #define RK3368_CLKGATE_CON(x)          ((x) * 0x4 + 0x200)
@@ -130,6 +148,7 @@ struct clk;
 enum rockchip_pll_type {
        pll_rk3036,
        pll_rk3066,
+       pll_rk3328,
        pll_rk3399,
 };
 
@@ -317,11 +336,17 @@ struct clk *rockchip_clk_register_inverter(const char *name,
                                void __iomem *reg, int shift, int flags,
                                spinlock_t *lock);
 
+struct clk *rockchip_clk_register_muxgrf(const char *name,
+                               const char *const *parent_names, u8 num_parents,
+                               int flags, struct regmap *grf, int reg,
+                               int shift, int width, int mux_flags);
+
 #define PNAME(x) static const char *const x[] __initconst
 
 enum rockchip_clk_branch_type {
        branch_composite,
        branch_mux,
+       branch_muxgrf,
        branch_divider,
        branch_fraction_divider,
        branch_gate,
@@ -551,6 +576,21 @@ struct rockchip_clk_branch {
                .gate_offset    = -1,                           \
        }
 
+#define MUXGRF(_id, cname, pnames, f, o, s, w, mf)             \
+       {                                                       \
+               .id             = _id,                          \
+               .branch_type    = branch_muxgrf,                \
+               .name           = cname,                        \
+               .parent_names   = pnames,                       \
+               .num_parents    = ARRAY_SIZE(pnames),           \
+               .flags          = f,                            \
+               .muxdiv_offset  = o,                            \
+               .mux_shift      = s,                            \
+               .mux_width      = w,                            \
+               .mux_flags      = mf,                           \
+               .gate_offset    = -1,                           \
+       }
+
 #define DIV(_id, cname, pname, f, o, s, w, df)                 \
        {                                                       \
                .id             = _id,                          \
index 57f4dc6dc4474cccc23a934450d07302a5a742bc..7afc21dc374efc4bdeed813b6a2c3db9f23be0ce 100644 (file)
@@ -5,7 +5,6 @@
 obj-$(CONFIG_COMMON_CLK)       += clk.o clk-pll.o clk-cpu.o
 obj-$(CONFIG_SOC_EXYNOS3250)   += clk-exynos3250.o
 obj-$(CONFIG_ARCH_EXYNOS4)     += clk-exynos4.o
-obj-$(CONFIG_SOC_EXYNOS4415)   += clk-exynos4415.o
 obj-$(CONFIG_SOC_EXYNOS5250)   += clk-exynos5250.o
 obj-$(CONFIG_SOC_EXYNOS5260)   += clk-exynos5260.o
 obj-$(CONFIG_SOC_EXYNOS5410)   += clk-exynos5410.o
index 17e68a724945608382924ce18b64f3df9d172db8..cb7df358a27d7838b8752c758951ed5e5e4205ee 100644 (file)
@@ -44,7 +44,7 @@ static unsigned long reg_save[][2] = {
        { ASS_CLK_GATE, 0 },
 };
 
-static int exynos_audss_clk_suspend(void)
+static int exynos_audss_clk_suspend(struct device *dev)
 {
        int i;
 
@@ -54,18 +54,15 @@ static int exynos_audss_clk_suspend(void)
        return 0;
 }
 
-static void exynos_audss_clk_resume(void)
+static int exynos_audss_clk_resume(struct device *dev)
 {
        int i;
 
        for (i = 0; i < ARRAY_SIZE(reg_save); i++)
                writel(reg_save[i][1], reg_base + reg_save[i][0]);
-}
 
-static struct syscore_ops exynos_audss_clk_syscore_ops = {
-       .suspend        = exynos_audss_clk_suspend,
-       .resume         = exynos_audss_clk_resume,
-};
+       return 0;
+}
 #endif /* CONFIG_PM_SLEEP */
 
 struct exynos_audss_clk_drvdata {
@@ -251,9 +248,6 @@ static int exynos_audss_clk_probe(struct platform_device *pdev)
                goto unregister;
        }
 
-#ifdef CONFIG_PM_SLEEP
-       register_syscore_ops(&exynos_audss_clk_syscore_ops);
-#endif
        return 0;
 
 unregister:
@@ -267,10 +261,6 @@ unregister:
 
 static int exynos_audss_clk_remove(struct platform_device *pdev)
 {
-#ifdef CONFIG_PM_SLEEP
-       unregister_syscore_ops(&exynos_audss_clk_syscore_ops);
-#endif
-
        of_clk_del_provider(pdev->dev.of_node);
 
        exynos_audss_clk_teardown();
@@ -281,10 +271,16 @@ static int exynos_audss_clk_remove(struct platform_device *pdev)
        return 0;
 }
 
+static const struct dev_pm_ops exynos_audss_clk_pm_ops = {
+       SET_LATE_SYSTEM_SLEEP_PM_OPS(exynos_audss_clk_suspend,
+                                    exynos_audss_clk_resume)
+};
+
 static struct platform_driver exynos_audss_clk_driver = {
        .driver = {
                .name = "exynos-audss-clk",
                .of_match_table = exynos_audss_clk_of_match,
+               .pm = &exynos_audss_clk_pm_ops,
        },
        .probe = exynos_audss_clk_probe,
        .remove = exynos_audss_clk_remove,
diff --git a/drivers/clk/samsung/clk-exynos4415.c b/drivers/clk/samsung/clk-exynos4415.c
deleted file mode 100644 (file)
index 6c90631..0000000
+++ /dev/null
@@ -1,1022 +0,0 @@
-/*
- * Copyright (c) 2014 Samsung Electronics Co., Ltd.
- * Author: Chanwoo Choi <cw00.choi@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Common Clock Framework support for Exynos4415 SoC.
- */
-
-#include <linux/clk-provider.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/platform_device.h>
-#include <linux/syscore_ops.h>
-
-#include <dt-bindings/clock/exynos4415.h>
-
-#include "clk.h"
-#include "clk-pll.h"
-
-#define SRC_LEFTBUS            0x4200
-#define DIV_LEFTBUS            0x4500
-#define GATE_IP_LEFTBUS                0x4800
-#define GATE_IP_IMAGE          0x4930
-#define SRC_RIGHTBUS           0x8200
-#define DIV_RIGHTBUS           0x8500
-#define GATE_IP_RIGHTBUS       0x8800
-#define GATE_IP_PERIR          0x8960
-#define EPLL_LOCK              0xc010
-#define G3D_PLL_LOCK           0xc020
-#define DISP_PLL_LOCK          0xc030
-#define ISP_PLL_LOCK           0xc040
-#define EPLL_CON0              0xc110
-#define EPLL_CON1              0xc114
-#define EPLL_CON2              0xc118
-#define G3D_PLL_CON0           0xc120
-#define G3D_PLL_CON1           0xc124
-#define G3D_PLL_CON2           0xc128
-#define ISP_PLL_CON0           0xc130
-#define ISP_PLL_CON1           0xc134
-#define ISP_PLL_CON2           0xc138
-#define DISP_PLL_CON0          0xc140
-#define DISP_PLL_CON1          0xc144
-#define DISP_PLL_CON2          0xc148
-#define SRC_TOP0               0xc210
-#define SRC_TOP1               0xc214
-#define SRC_CAM                        0xc220
-#define SRC_TV                 0xc224
-#define SRC_MFC                        0xc228
-#define SRC_G3D                        0xc22c
-#define SRC_LCD                        0xc234
-#define SRC_ISP                        0xc238
-#define SRC_MAUDIO             0xc23c
-#define SRC_FSYS               0xc240
-#define SRC_PERIL0             0xc250
-#define SRC_PERIL1             0xc254
-#define SRC_CAM1               0xc258
-#define SRC_TOP_ISP0           0xc25c
-#define SRC_TOP_ISP1           0xc260
-#define SRC_MASK_TOP           0xc310
-#define SRC_MASK_CAM           0xc320
-#define SRC_MASK_TV            0xc324
-#define SRC_MASK_LCD           0xc334
-#define SRC_MASK_ISP           0xc338
-#define SRC_MASK_MAUDIO                0xc33c
-#define SRC_MASK_FSYS          0xc340
-#define SRC_MASK_PERIL0                0xc350
-#define SRC_MASK_PERIL1                0xc354
-#define DIV_TOP                        0xc510
-#define DIV_CAM                        0xc520
-#define DIV_TV                 0xc524
-#define DIV_MFC                        0xc528
-#define DIV_G3D                        0xc52c
-#define DIV_LCD                        0xc534
-#define DIV_ISP                        0xc538
-#define DIV_MAUDIO             0xc53c
-#define DIV_FSYS0              0xc540
-#define DIV_FSYS1              0xc544
-#define DIV_FSYS2              0xc548
-#define DIV_PERIL0             0xc550
-#define DIV_PERIL1             0xc554
-#define DIV_PERIL2             0xc558
-#define DIV_PERIL3             0xc55c
-#define DIV_PERIL4             0xc560
-#define DIV_PERIL5             0xc564
-#define DIV_CAM1               0xc568
-#define DIV_TOP_ISP1           0xc56c
-#define DIV_TOP_ISP0           0xc570
-#define CLKDIV2_RATIO          0xc580
-#define GATE_SCLK_CAM          0xc820
-#define GATE_SCLK_TV           0xc824
-#define GATE_SCLK_MFC          0xc828
-#define GATE_SCLK_G3D          0xc82c
-#define GATE_SCLK_LCD          0xc834
-#define GATE_SCLK_MAUDIO       0xc83c
-#define GATE_SCLK_FSYS         0xc840
-#define GATE_SCLK_PERIL                0xc850
-#define GATE_IP_CAM            0xc920
-#define GATE_IP_TV             0xc924
-#define GATE_IP_MFC            0xc928
-#define GATE_IP_G3D            0xc92c
-#define GATE_IP_LCD            0xc934
-#define GATE_IP_FSYS           0xc940
-#define GATE_IP_PERIL          0xc950
-#define GATE_BLOCK             0xc970
-#define APLL_LOCK              0x14000
-#define APLL_CON0              0x14100
-#define SRC_CPU                        0x14200
-#define DIV_CPU0               0x14500
-#define DIV_CPU1               0x14504
-
-static const unsigned long exynos4415_cmu_clk_regs[] __initconst = {
-       SRC_LEFTBUS,
-       DIV_LEFTBUS,
-       GATE_IP_LEFTBUS,
-       GATE_IP_IMAGE,
-       SRC_RIGHTBUS,
-       DIV_RIGHTBUS,
-       GATE_IP_RIGHTBUS,
-       GATE_IP_PERIR,
-       EPLL_LOCK,
-       G3D_PLL_LOCK,
-       DISP_PLL_LOCK,
-       ISP_PLL_LOCK,
-       EPLL_CON0,
-       EPLL_CON1,
-       EPLL_CON2,
-       G3D_PLL_CON0,
-       G3D_PLL_CON1,
-       G3D_PLL_CON2,
-       ISP_PLL_CON0,
-       ISP_PLL_CON1,
-       ISP_PLL_CON2,
-       DISP_PLL_CON0,
-       DISP_PLL_CON1,
-       DISP_PLL_CON2,
-       SRC_TOP0,
-       SRC_TOP1,
-       SRC_CAM,
-       SRC_TV,
-       SRC_MFC,
-       SRC_G3D,
-       SRC_LCD,
-       SRC_ISP,
-       SRC_MAUDIO,
-       SRC_FSYS,
-       SRC_PERIL0,
-       SRC_PERIL1,
-       SRC_CAM1,
-       SRC_TOP_ISP0,
-       SRC_TOP_ISP1,
-       SRC_MASK_TOP,
-       SRC_MASK_CAM,
-       SRC_MASK_TV,
-       SRC_MASK_LCD,
-       SRC_MASK_ISP,
-       SRC_MASK_MAUDIO,
-       SRC_MASK_FSYS,
-       SRC_MASK_PERIL0,
-       SRC_MASK_PERIL1,
-       DIV_TOP,
-       DIV_CAM,
-       DIV_TV,
-       DIV_MFC,
-       DIV_G3D,
-       DIV_LCD,
-       DIV_ISP,
-       DIV_MAUDIO,
-       DIV_FSYS0,
-       DIV_FSYS1,
-       DIV_FSYS2,
-       DIV_PERIL0,
-       DIV_PERIL1,
-       DIV_PERIL2,
-       DIV_PERIL3,
-       DIV_PERIL4,
-       DIV_PERIL5,
-       DIV_CAM1,
-       DIV_TOP_ISP1,
-       DIV_TOP_ISP0,
-       CLKDIV2_RATIO,
-       GATE_SCLK_CAM,
-       GATE_SCLK_TV,
-       GATE_SCLK_MFC,
-       GATE_SCLK_G3D,
-       GATE_SCLK_LCD,
-       GATE_SCLK_MAUDIO,
-       GATE_SCLK_FSYS,
-       GATE_SCLK_PERIL,
-       GATE_IP_CAM,
-       GATE_IP_TV,
-       GATE_IP_MFC,
-       GATE_IP_G3D,
-       GATE_IP_LCD,
-       GATE_IP_FSYS,
-       GATE_IP_PERIL,
-       GATE_BLOCK,
-       APLL_LOCK,
-       APLL_CON0,
-       SRC_CPU,
-       DIV_CPU0,
-       DIV_CPU1,
-};
-
-/* list of all parent clock list */
-PNAME(mout_g3d_pllsrc_p)       = { "fin_pll", };
-
-PNAME(mout_apll_p)             = { "fin_pll", "fout_apll", };
-PNAME(mout_g3d_pll_p)          = { "fin_pll", "fout_g3d_pll", };
-PNAME(mout_isp_pll_p)          = { "fin_pll", "fout_isp_pll", };
-PNAME(mout_disp_pll_p)         = { "fin_pll", "fout_disp_pll", };
-
-PNAME(mout_mpll_user_p)                = { "fin_pll", "div_mpll_pre", };
-PNAME(mout_epll_p)             = { "fin_pll", "fout_epll", };
-PNAME(mout_core_p)             = { "mout_apll", "mout_mpll_user_c", };
-PNAME(mout_hpm_p)              = { "mout_apll", "mout_mpll_user_c", };
-
-PNAME(mout_ebi_p)              = { "div_aclk_200", "div_aclk_160", };
-PNAME(mout_ebi_1_p)            = { "mout_ebi", "mout_g3d_pll", };
-
-PNAME(mout_gdl_p)              = { "mout_mpll_user_l", };
-PNAME(mout_gdr_p)              = { "mout_mpll_user_r", };
-
-PNAME(mout_aclk_266_p)         = { "mout_mpll_user_t", "mout_g3d_pll", };
-
-PNAME(group_epll_g3dpll_p)     = { "mout_epll", "mout_g3d_pll" };
-PNAME(group_sclk_p)            = { "xxti", "xusbxti",
-                                   "none", "mout_isp_pll",
-                                   "none", "none", "div_mpll_pre",
-                                   "mout_epll", "mout_g3d_pll", };
-PNAME(group_spdif_p)           = { "mout_audio0", "mout_audio1",
-                                   "mout_audio2", "spdif_extclk", };
-PNAME(group_sclk_audio2_p)     = { "audiocdclk2", "none",
-                                   "none", "mout_isp_pll",
-                                   "mout_disp_pll", "xusbxti",
-                                   "div_mpll_pre", "mout_epll",
-                                   "mout_g3d_pll", };
-PNAME(group_sclk_audio1_p)     = { "audiocdclk1", "none",
-                                   "none", "mout_isp_pll",
-                                   "mout_disp_pll", "xusbxti",
-                                   "div_mpll_pre", "mout_epll",
-                                   "mout_g3d_pll", };
-PNAME(group_sclk_audio0_p)     = { "audiocdclk0", "none",
-                                   "none", "mout_isp_pll",
-                                   "mout_disp_pll", "xusbxti",
-                                   "div_mpll_pre", "mout_epll",
-                                   "mout_g3d_pll", };
-PNAME(group_fimc_lclk_p)       = { "xxti", "xusbxti",
-                                   "none", "mout_isp_pll",
-                                   "none", "mout_disp_pll",
-                                   "mout_mpll_user_t", "mout_epll",
-                                   "mout_g3d_pll", };
-PNAME(group_sclk_fimd0_p)      = { "xxti", "xusbxti",
-                                   "m_bitclkhsdiv4_4l", "mout_isp_pll",
-                                   "mout_disp_pll", "sclk_hdmiphy",
-                                   "div_mpll_pre", "mout_epll",
-                                   "mout_g3d_pll", };
-PNAME(mout_hdmi_p)             = { "sclk_pixel", "sclk_hdmiphy" };
-PNAME(mout_mfc_p)              = { "mout_mfc_0", "mout_mfc_1" };
-PNAME(mout_g3d_p)              = { "mout_g3d_0", "mout_g3d_1" };
-PNAME(mout_jpeg_p)             = { "mout_jpeg_0", "mout_jpeg_1" };
-PNAME(mout_jpeg1_p)            = { "mout_epll", "mout_g3d_pll" };
-PNAME(group_aclk_isp0_300_p)   = { "mout_isp_pll", "div_mpll_pre" };
-PNAME(group_aclk_isp0_400_user_p) = { "fin_pll", "div_aclk_400_mcuisp" };
-PNAME(group_aclk_isp0_300_user_p) = { "fin_pll", "mout_aclk_isp0_300" };
-PNAME(group_aclk_isp1_300_user_p) = { "fin_pll", "mout_aclk_isp1_300" };
-PNAME(group_mout_mpll_user_t_p)        = { "mout_mpll_user_t" };
-
-static const struct samsung_fixed_factor_clock exynos4415_fixed_factor_clks[] __initconst = {
-       /* HACK: fin_pll hardcoded to xusbxti until detection is implemented. */
-       FFACTOR(CLK_FIN_PLL, "fin_pll", "xusbxti", 1, 1, 0),
-};
-
-static const struct samsung_fixed_rate_clock exynos4415_fixed_rate_clks[] __initconst = {
-       FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 27000000),
-};
-
-static const struct samsung_mux_clock exynos4415_mux_clks[] __initconst = {
-       /*
-        * NOTE: Following table is sorted by register address in ascending
-        * order and then bitfield shift in descending order, as it is done
-        * in the User's Manual. When adding new entries, please make sure
-        * that the order is preserved, to avoid merge conflicts and make
-        * further work with defined data easier.
-        */
-
-       /* SRC_LEFTBUS */
-       MUX(CLK_MOUT_MPLL_USER_L, "mout_mpll_user_l", mout_mpll_user_p,
-               SRC_LEFTBUS, 4, 1),
-       MUX(CLK_MOUT_GDL, "mout_gdl", mout_gdl_p, SRC_LEFTBUS, 0, 1),
-
-       /* SRC_RIGHTBUS */
-       MUX(CLK_MOUT_MPLL_USER_R, "mout_mpll_user_r", mout_mpll_user_p,
-               SRC_RIGHTBUS, 4, 1),
-       MUX(CLK_MOUT_GDR, "mout_gdr", mout_gdr_p, SRC_RIGHTBUS, 0, 1),
-
-       /* SRC_TOP0 */
-       MUX(CLK_MOUT_EBI, "mout_ebi", mout_ebi_p, SRC_TOP0, 28, 1),
-       MUX(CLK_MOUT_ACLK_200, "mout_aclk_200", group_mout_mpll_user_t_p,
-               SRC_TOP0, 24, 1),
-       MUX(CLK_MOUT_ACLK_160, "mout_aclk_160", group_mout_mpll_user_t_p,
-               SRC_TOP0, 20, 1),
-       MUX(CLK_MOUT_ACLK_100, "mout_aclk_100", group_mout_mpll_user_t_p,
-               SRC_TOP0, 16, 1),
-       MUX(CLK_MOUT_ACLK_266, "mout_aclk_266", mout_aclk_266_p,
-               SRC_TOP0, 12, 1),
-       MUX(CLK_MOUT_G3D_PLL, "mout_g3d_pll", mout_g3d_pll_p,
-               SRC_TOP0, 8, 1),
-       MUX(CLK_MOUT_EPLL, "mout_epll", mout_epll_p, SRC_TOP0, 4, 1),
-       MUX(CLK_MOUT_EBI_1, "mout_ebi_1", mout_ebi_1_p, SRC_TOP0, 0, 1),
-
-       /* SRC_TOP1 */
-       MUX(CLK_MOUT_ISP_PLL, "mout_isp_pll", mout_isp_pll_p,
-               SRC_TOP1, 28, 1),
-       MUX(CLK_MOUT_DISP_PLL, "mout_disp_pll", mout_disp_pll_p,
-               SRC_TOP1, 16, 1),
-       MUX(CLK_MOUT_MPLL_USER_T, "mout_mpll_user_t", mout_mpll_user_p,
-               SRC_TOP1, 12, 1),
-       MUX(CLK_MOUT_ACLK_400_MCUISP, "mout_aclk_400_mcuisp",
-               group_mout_mpll_user_t_p, SRC_TOP1, 8, 1),
-       MUX(CLK_MOUT_G3D_PLLSRC, "mout_g3d_pllsrc", mout_g3d_pllsrc_p,
-               SRC_TOP1, 0, 1),
-
-       /* SRC_CAM */
-       MUX(CLK_MOUT_CSIS1, "mout_csis1", group_fimc_lclk_p, SRC_CAM, 28, 4),
-       MUX(CLK_MOUT_CSIS0, "mout_csis0", group_fimc_lclk_p, SRC_CAM, 24, 4),
-       MUX(CLK_MOUT_CAM1, "mout_cam1", group_fimc_lclk_p, SRC_CAM, 20, 4),
-       MUX(CLK_MOUT_FIMC3_LCLK, "mout_fimc3_lclk", group_fimc_lclk_p, SRC_CAM,
-               12, 4),
-       MUX(CLK_MOUT_FIMC2_LCLK, "mout_fimc2_lclk", group_fimc_lclk_p, SRC_CAM,
-               8, 4),
-       MUX(CLK_MOUT_FIMC1_LCLK, "mout_fimc1_lclk", group_fimc_lclk_p, SRC_CAM,
-               4, 4),
-       MUX(CLK_MOUT_FIMC0_LCLK, "mout_fimc0_lclk", group_fimc_lclk_p, SRC_CAM,
-               0, 4),
-
-       /* SRC_TV */
-       MUX(CLK_MOUT_HDMI, "mout_hdmi", mout_hdmi_p, SRC_TV, 0, 1),
-
-       /* SRC_MFC */
-       MUX(CLK_MOUT_MFC, "mout_mfc", mout_mfc_p, SRC_MFC, 8, 1),
-       MUX(CLK_MOUT_MFC_1, "mout_mfc_1", group_epll_g3dpll_p, SRC_MFC, 4, 1),
-       MUX(CLK_MOUT_MFC_0, "mout_mfc_0", group_mout_mpll_user_t_p, SRC_MFC, 0,
-               1),
-
-       /* SRC_G3D */
-       MUX(CLK_MOUT_G3D, "mout_g3d", mout_g3d_p, SRC_G3D, 8, 1),
-       MUX(CLK_MOUT_G3D_1, "mout_g3d_1", group_epll_g3dpll_p, SRC_G3D, 4, 1),
-       MUX(CLK_MOUT_G3D_0, "mout_g3d_0", group_mout_mpll_user_t_p, SRC_G3D, 0,
-               1),
-
-       /* SRC_LCD */
-       MUX(CLK_MOUT_MIPI0, "mout_mipi0", group_fimc_lclk_p, SRC_LCD, 12, 4),
-       MUX(CLK_MOUT_FIMD0, "mout_fimd0", group_sclk_fimd0_p, SRC_LCD, 0, 4),
-
-       /* SRC_ISP */
-       MUX(CLK_MOUT_TSADC_ISP, "mout_tsadc_isp", group_fimc_lclk_p, SRC_ISP,
-               16, 4),
-       MUX(CLK_MOUT_UART_ISP, "mout_uart_isp", group_fimc_lclk_p, SRC_ISP,
-               12, 4),
-       MUX(CLK_MOUT_SPI1_ISP, "mout_spi1_isp", group_fimc_lclk_p, SRC_ISP,
-               8, 4),
-       MUX(CLK_MOUT_SPI0_ISP, "mout_spi0_isp", group_fimc_lclk_p, SRC_ISP,
-               4, 4),
-       MUX(CLK_MOUT_PWM_ISP, "mout_pwm_isp", group_fimc_lclk_p, SRC_ISP,
-               0, 4),
-
-       /* SRC_MAUDIO */
-       MUX(CLK_MOUT_AUDIO0, "mout_audio0", group_sclk_audio0_p, SRC_MAUDIO,
-               0, 4),
-
-       /* SRC_FSYS */
-       MUX(CLK_MOUT_TSADC, "mout_tsadc", group_sclk_p, SRC_FSYS, 28, 4),
-       MUX(CLK_MOUT_MMC2, "mout_mmc2", group_sclk_p, SRC_FSYS, 8, 4),
-       MUX(CLK_MOUT_MMC1, "mout_mmc1", group_sclk_p, SRC_FSYS, 4, 4),
-       MUX(CLK_MOUT_MMC0, "mout_mmc0", group_sclk_p, SRC_FSYS, 0, 4),
-
-       /* SRC_PERIL0 */
-       MUX(CLK_MOUT_UART3, "mout_uart3", group_sclk_p, SRC_PERIL0, 12, 4),
-       MUX(CLK_MOUT_UART2, "mout_uart2", group_sclk_p, SRC_PERIL0, 8, 4),
-       MUX(CLK_MOUT_UART1, "mout_uart1", group_sclk_p, SRC_PERIL0, 4, 4),
-       MUX(CLK_MOUT_UART0, "mout_uart0", group_sclk_p, SRC_PERIL0, 0, 4),
-
-       /* SRC_PERIL1 */
-       MUX(CLK_MOUT_SPI2, "mout_spi2", group_sclk_p, SRC_PERIL1, 24, 4),
-       MUX(CLK_MOUT_SPI1, "mout_spi1", group_sclk_p, SRC_PERIL1, 20, 4),
-       MUX(CLK_MOUT_SPI0, "mout_spi0", group_sclk_p, SRC_PERIL1, 16, 4),
-       MUX(CLK_MOUT_SPDIF, "mout_spdif", group_spdif_p, SRC_PERIL1, 8, 4),
-       MUX(CLK_MOUT_AUDIO2, "mout_audio2", group_sclk_audio2_p, SRC_PERIL1,
-               4, 4),
-       MUX(CLK_MOUT_AUDIO1, "mout_audio1", group_sclk_audio1_p, SRC_PERIL1,
-               0, 4),
-
-       /* SRC_CPU */
-       MUX(CLK_MOUT_MPLL_USER_C, "mout_mpll_user_c", mout_mpll_user_p,
-               SRC_CPU, 24, 1),
-       MUX(CLK_MOUT_HPM, "mout_hpm", mout_hpm_p, SRC_CPU, 20, 1),
-       MUX_F(CLK_MOUT_CORE, "mout_core", mout_core_p, SRC_CPU, 16, 1, 0,
-               CLK_MUX_READ_ONLY),
-       MUX_F(CLK_MOUT_APLL, "mout_apll", mout_apll_p, SRC_CPU, 0, 1,
-               CLK_SET_RATE_PARENT, 0),
-
-       /* SRC_CAM1 */
-       MUX(CLK_MOUT_PXLASYNC_CSIS1_FIMC, "mout_pxlasync_csis1",
-               group_fimc_lclk_p, SRC_CAM1, 20, 1),
-       MUX(CLK_MOUT_PXLASYNC_CSIS0_FIMC, "mout_pxlasync_csis0",
-               group_fimc_lclk_p, SRC_CAM1, 16, 1),
-       MUX(CLK_MOUT_JPEG, "mout_jpeg", mout_jpeg_p, SRC_CAM1, 8, 1),
-       MUX(CLK_MOUT_JPEG1, "mout_jpeg_1", mout_jpeg1_p, SRC_CAM1, 4, 1),
-       MUX(CLK_MOUT_JPEG0, "mout_jpeg_0", group_mout_mpll_user_t_p, SRC_CAM1,
-               0, 1),
-
-       /* SRC_TOP_ISP0 */
-       MUX(CLK_MOUT_ACLK_ISP0_300, "mout_aclk_isp0_300",
-               group_aclk_isp0_300_p, SRC_TOP_ISP0, 8, 1),
-       MUX(CLK_MOUT_ACLK_ISP0_400, "mout_aclk_isp0_400_user",
-               group_aclk_isp0_400_user_p, SRC_TOP_ISP0, 4, 1),
-       MUX(CLK_MOUT_ACLK_ISP0_300_USER, "mout_aclk_isp0_300_user",
-               group_aclk_isp0_300_user_p, SRC_TOP_ISP0, 0, 1),
-
-       /* SRC_TOP_ISP1 */
-       MUX(CLK_MOUT_ACLK_ISP1_300, "mout_aclk_isp1_300",
-               group_aclk_isp0_300_p, SRC_TOP_ISP1, 4, 1),
-       MUX(CLK_MOUT_ACLK_ISP1_300_USER, "mout_aclk_isp1_300_user",
-               group_aclk_isp1_300_user_p, SRC_TOP_ISP1, 0, 1),
-};
-
-static const struct samsung_div_clock exynos4415_div_clks[] __initconst = {
-       /*
-        * NOTE: Following table is sorted by register address in ascending
-        * order and then bitfield shift in descending order, as it is done
-        * in the User's Manual. When adding new entries, please make sure
-        * that the order is preserved, to avoid merge conflicts and make
-        * further work with defined data easier.
-        */
-
-       /* DIV_LEFTBUS */
-       DIV(CLK_DIV_GPL, "div_gpl", "div_gdl", DIV_LEFTBUS, 4, 3),
-       DIV(CLK_DIV_GDL, "div_gdl", "mout_gdl", DIV_LEFTBUS, 0, 4),
-
-       /* DIV_RIGHTBUS */
-       DIV(CLK_DIV_GPR, "div_gpr", "div_gdr", DIV_RIGHTBUS, 4, 3),
-       DIV(CLK_DIV_GDR, "div_gdr", "mout_gdr", DIV_RIGHTBUS, 0, 4),
-
-       /* DIV_TOP */
-       DIV(CLK_DIV_ACLK_400_MCUISP, "div_aclk_400_mcuisp",
-               "mout_aclk_400_mcuisp", DIV_TOP, 24, 3),
-       DIV(CLK_DIV_EBI, "div_ebi", "mout_ebi_1", DIV_TOP, 16, 3),
-       DIV(CLK_DIV_ACLK_200, "div_aclk_200", "mout_aclk_200", DIV_TOP, 12, 3),
-       DIV(CLK_DIV_ACLK_160, "div_aclk_160", "mout_aclk_160", DIV_TOP, 8, 3),
-       DIV(CLK_DIV_ACLK_100, "div_aclk_100", "mout_aclk_100", DIV_TOP, 4, 4),
-       DIV(CLK_DIV_ACLK_266, "div_aclk_266", "mout_aclk_266", DIV_TOP, 0, 3),
-
-       /* DIV_CAM */
-       DIV(CLK_DIV_CSIS1, "div_csis1", "mout_csis1", DIV_CAM, 28, 4),
-       DIV(CLK_DIV_CSIS0, "div_csis0", "mout_csis0", DIV_CAM, 24, 4),
-       DIV(CLK_DIV_CAM1, "div_cam1", "mout_cam1", DIV_CAM, 20, 4),
-       DIV(CLK_DIV_FIMC3_LCLK, "div_fimc3_lclk", "mout_fimc3_lclk", DIV_CAM,
-               12, 4),
-       DIV(CLK_DIV_FIMC2_LCLK, "div_fimc2_lclk", "mout_fimc2_lclk", DIV_CAM,
-               8, 4),
-       DIV(CLK_DIV_FIMC1_LCLK, "div_fimc1_lclk", "mout_fimc1_lclk", DIV_CAM,
-               4, 4),
-       DIV(CLK_DIV_FIMC0_LCLK, "div_fimc0_lclk", "mout_fimc0_lclk", DIV_CAM,
-               0, 4),
-
-       /* DIV_TV */
-       DIV(CLK_DIV_TV_BLK, "div_tv_blk", "mout_g3d_pll", DIV_TV, 0, 4),
-
-       /* DIV_MFC */
-       DIV(CLK_DIV_MFC, "div_mfc", "mout_mfc", DIV_MFC, 0, 4),
-
-       /* DIV_G3D */
-       DIV(CLK_DIV_G3D, "div_g3d", "mout_g3d", DIV_G3D, 0, 4),
-
-       /* DIV_LCD */
-       DIV_F(CLK_DIV_MIPI0_PRE, "div_mipi0_pre", "div_mipi0", DIV_LCD, 20, 4,
-               CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_MIPI0, "div_mipi0", "mout_mipi0", DIV_LCD, 16, 4),
-       DIV(CLK_DIV_FIMD0, "div_fimd0", "mout_fimd0", DIV_LCD, 0, 4),
-
-       /* DIV_ISP */
-       DIV(CLK_DIV_UART_ISP, "div_uart_isp", "mout_uart_isp", DIV_ISP, 28, 4),
-       DIV_F(CLK_DIV_SPI1_ISP_PRE, "div_spi1_isp_pre", "div_spi1_isp",
-               DIV_ISP, 20, 8, CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_SPI1_ISP, "div_spi1_isp", "mout_spi1_isp", DIV_ISP, 16, 4),
-       DIV_F(CLK_DIV_SPI0_ISP_PRE, "div_spi0_isp_pre", "div_spi0_isp",
-               DIV_ISP, 8, 8, CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_SPI0_ISP, "div_spi0_isp", "mout_spi0_isp", DIV_ISP, 4, 4),
-       DIV(CLK_DIV_PWM_ISP, "div_pwm_isp", "mout_pwm_isp", DIV_ISP, 0, 4),
-
-       /* DIV_MAUDIO */
-       DIV(CLK_DIV_PCM0, "div_pcm0", "div_audio0", DIV_MAUDIO, 4, 8),
-       DIV(CLK_DIV_AUDIO0, "div_audio0", "mout_audio0", DIV_MAUDIO, 0, 4),
-
-       /* DIV_FSYS0 */
-       DIV_F(CLK_DIV_TSADC_PRE, "div_tsadc_pre", "div_tsadc", DIV_FSYS0, 8, 8,
-               CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_TSADC, "div_tsadc", "mout_tsadc", DIV_FSYS0, 0, 4),
-
-       /* DIV_FSYS1 */
-       DIV_F(CLK_DIV_MMC1_PRE, "div_mmc1_pre", "div_mmc1", DIV_FSYS1, 24, 8,
-               CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_MMC1, "div_mmc1", "mout_mmc1", DIV_FSYS1, 16, 4),
-       DIV_F(CLK_DIV_MMC0_PRE, "div_mmc0_pre", "div_mmc0", DIV_FSYS1, 8, 8,
-               CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_MMC0, "div_mmc0", "mout_mmc0", DIV_FSYS1, 0, 4),
-
-       /* DIV_FSYS2 */
-       DIV_F(CLK_DIV_MMC2_PRE, "div_mmc2_pre", "div_mmc2", DIV_FSYS2, 8, 8,
-               CLK_SET_RATE_PARENT, 0),
-       DIV_F(CLK_DIV_MMC2_PRE, "div_mmc2", "mout_mmc2", DIV_FSYS2, 0, 4,
-               CLK_SET_RATE_PARENT, 0),
-
-       /* DIV_PERIL0 */
-       DIV(CLK_DIV_UART3, "div_uart3", "mout_uart3", DIV_PERIL0, 12, 4),
-       DIV(CLK_DIV_UART2, "div_uart2", "mout_uart2", DIV_PERIL0, 8, 4),
-       DIV(CLK_DIV_UART1, "div_uart1", "mout_uart1", DIV_PERIL0, 4, 4),
-       DIV(CLK_DIV_UART0, "div_uart0", "mout_uart0", DIV_PERIL0, 0, 4),
-
-       /* DIV_PERIL1 */
-       DIV_F(CLK_DIV_SPI1_PRE, "div_spi1_pre", "div_spi1", DIV_PERIL1, 24, 8,
-               CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_SPI1, "div_spi1", "mout_spi1", DIV_PERIL1, 16, 4),
-       DIV_F(CLK_DIV_SPI0_PRE, "div_spi0_pre", "div_spi0", DIV_PERIL1, 8, 8,
-               CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_SPI0, "div_spi0", "mout_spi0", DIV_PERIL1, 0, 4),
-
-       /* DIV_PERIL2 */
-       DIV_F(CLK_DIV_SPI2_PRE, "div_spi2_pre", "div_spi2", DIV_PERIL2, 8, 8,
-               CLK_SET_RATE_PARENT, 0),
-       DIV(CLK_DIV_SPI2, "div_spi2", "mout_spi2", DIV_PERIL2, 0, 4),
-
-       /* DIV_PERIL4 */
-       DIV(CLK_DIV_PCM2, "div_pcm2", "div_audio2", DIV_PERIL4, 20, 8),
-       DIV(CLK_DIV_AUDIO2, "div_audio2", "mout_audio2", DIV_PERIL4, 16, 4),
-       DIV(CLK_DIV_PCM1, "div_pcm1", "div_audio1", DIV_PERIL4, 20, 8),
-       DIV(CLK_DIV_AUDIO1, "div_audio1", "mout_audio1", DIV_PERIL4, 0, 4),
-
-       /* DIV_PERIL5 */
-       DIV(CLK_DIV_I2S1, "div_i2s1", "div_audio1", DIV_PERIL5, 0, 6),
-
-       /* DIV_CAM1 */
-       DIV(CLK_DIV_PXLASYNC_CSIS1_FIMC, "div_pxlasync_csis1_fimc",
-               "mout_pxlasync_csis1", DIV_CAM1, 24, 4),
-       DIV(CLK_DIV_PXLASYNC_CSIS0_FIMC, "div_pxlasync_csis0_fimc",
-               "mout_pxlasync_csis0", DIV_CAM1, 20, 4),
-       DIV(CLK_DIV_JPEG, "div_jpeg", "mout_jpeg", DIV_CAM1, 0, 4),
-
-       /* DIV_CPU0 */
-       DIV(CLK_DIV_CORE2, "div_core2", "div_core", DIV_CPU0, 28, 3),
-       DIV_F(CLK_DIV_APLL, "div_apll", "mout_apll", DIV_CPU0, 24, 3,
-                       CLK_GET_RATE_NOCACHE, CLK_DIVIDER_READ_ONLY),
-       DIV(CLK_DIV_PCLK_DBG, "div_pclk_dbg", "div_core2", DIV_CPU0, 20, 3),
-       DIV(CLK_DIV_ATB, "div_atb", "div_core2", DIV_CPU0, 16, 3),
-       DIV(CLK_DIV_PERIPH, "div_periph", "div_core2", DIV_CPU0, 12, 3),
-       DIV(CLK_DIV_COREM1, "div_corem1", "div_core2", DIV_CPU0, 8, 3),
-       DIV(CLK_DIV_COREM0, "div_corem0", "div_core2", DIV_CPU0, 4, 3),
-       DIV_F(CLK_DIV_CORE, "div_core", "mout_core", DIV_CPU0, 0, 3,
-               CLK_GET_RATE_NOCACHE, CLK_DIVIDER_READ_ONLY),
-
-       /* DIV_CPU1 */
-       DIV(CLK_DIV_HPM, "div_hpm", "div_copy", DIV_CPU1, 4, 3),
-       DIV(CLK_DIV_COPY, "div_copy", "mout_hpm", DIV_CPU1, 0, 3),
-};
-
-static const struct samsung_gate_clock exynos4415_gate_clks[] __initconst = {
-       /*
-        * NOTE: Following table is sorted by register address in ascending
-        * order and then bitfield shift in descending order, as it is done
-        * in the User's Manual. When adding new entries, please make sure
-        * that the order is preserved, to avoid merge conflicts and make
-        * further work with defined data easier.
-        */
-
-       /* GATE_IP_LEFTBUS */
-       GATE(CLK_ASYNC_G3D, "async_g3d", "div_aclk_100", GATE_IP_LEFTBUS, 6,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_ASYNC_MFCL, "async_mfcl", "div_aclk_100", GATE_IP_LEFTBUS, 4,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_ASYNC_TVX, "async_tvx", "div_aclk_100", GATE_IP_LEFTBUS, 3,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PPMULEFT, "ppmuleft", "div_aclk_100", GATE_IP_LEFTBUS, 1,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_GPIO_LEFT, "gpio_left", "div_aclk_100", GATE_IP_LEFTBUS, 0,
-               CLK_IGNORE_UNUSED, 0),
-
-       /* GATE_IP_IMAGE */
-       GATE(CLK_PPMUIMAGE, "ppmuimage", "div_aclk_100", GATE_IP_IMAGE,
-               9, 0, 0),
-       GATE(CLK_QEMDMA2, "qe_mdma2", "div_aclk_100", GATE_IP_IMAGE,
-               8, 0, 0),
-       GATE(CLK_QEROTATOR, "qe_rotator", "div_aclk_100", GATE_IP_IMAGE,
-               7, 0, 0),
-       GATE(CLK_SMMUMDMA2, "smmu_mdam2", "div_aclk_100", GATE_IP_IMAGE,
-               5, 0, 0),
-       GATE(CLK_SMMUROTATOR, "smmu_rotator", "div_aclk_100", GATE_IP_IMAGE,
-               4, 0, 0),
-       GATE(CLK_MDMA2, "mdma2", "div_aclk_100", GATE_IP_IMAGE, 2, 0, 0),
-       GATE(CLK_ROTATOR, "rotator", "div_aclk_100", GATE_IP_IMAGE, 1, 0, 0),
-
-       /* GATE_IP_RIGHTBUS */
-       GATE(CLK_ASYNC_ISPMX, "async_ispmx", "div_aclk_100",
-               GATE_IP_RIGHTBUS, 9, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_ASYNC_MAUDIOX, "async_maudiox", "div_aclk_100",
-               GATE_IP_RIGHTBUS, 7, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_ASYNC_MFCR, "async_mfcr", "div_aclk_100",
-               GATE_IP_RIGHTBUS, 6, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_ASYNC_FSYSD, "async_fsysd", "div_aclk_100",
-               GATE_IP_RIGHTBUS, 5, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_ASYNC_LCD0X, "async_lcd0x", "div_aclk_100",
-               GATE_IP_RIGHTBUS, 3, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_ASYNC_CAMX, "async_camx", "div_aclk_100",
-               GATE_IP_RIGHTBUS, 2, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PPMURIGHT, "ppmuright", "div_aclk_100",
-               GATE_IP_RIGHTBUS, 1, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_GPIO_RIGHT, "gpio_right", "div_aclk_100",
-               GATE_IP_RIGHTBUS, 0, CLK_IGNORE_UNUSED, 0),
-
-       /* GATE_IP_PERIR */
-       GATE(CLK_ANTIRBK_APBIF, "antirbk_apbif", "div_aclk_100",
-               GATE_IP_PERIR, 24, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_EFUSE_WRITER_APBIF, "efuse_writer_apbif", "div_aclk_100",
-               GATE_IP_PERIR, 23, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_MONOCNT, "monocnt", "div_aclk_100", GATE_IP_PERIR, 22,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_TZPC6, "tzpc6", "div_aclk_100", GATE_IP_PERIR, 21,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PROVISIONKEY1, "provisionkey1", "div_aclk_100",
-               GATE_IP_PERIR, 20, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PROVISIONKEY0, "provisionkey0", "div_aclk_100",
-               GATE_IP_PERIR, 19, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_CMU_ISPPART, "cmu_isppart", "div_aclk_100", GATE_IP_PERIR, 18,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_TMU_APBIF, "tmu_apbif", "div_aclk_100",
-               GATE_IP_PERIR, 17, 0, 0),
-       GATE(CLK_KEYIF, "keyif", "div_aclk_100", GATE_IP_PERIR, 16, 0, 0),
-       GATE(CLK_RTC, "rtc", "div_aclk_100", GATE_IP_PERIR, 15, 0, 0),
-       GATE(CLK_WDT, "wdt", "div_aclk_100", GATE_IP_PERIR, 14, 0, 0),
-       GATE(CLK_MCT, "mct", "div_aclk_100", GATE_IP_PERIR, 13, 0, 0),
-       GATE(CLK_SECKEY, "seckey", "div_aclk_100", GATE_IP_PERIR, 12,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_HDMI_CEC, "hdmi_cec", "div_aclk_100", GATE_IP_PERIR, 11,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_TZPC5, "tzpc5", "div_aclk_100", GATE_IP_PERIR, 10,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_TZPC4, "tzpc4", "div_aclk_100", GATE_IP_PERIR, 9,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_TZPC3, "tzpc3", "div_aclk_100", GATE_IP_PERIR, 8,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_TZPC2, "tzpc2", "div_aclk_100", GATE_IP_PERIR, 7,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_TZPC1, "tzpc1", "div_aclk_100", GATE_IP_PERIR, 6,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_TZPC0, "tzpc0", "div_aclk_100", GATE_IP_PERIR, 5,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_CMU_COREPART, "cmu_corepart", "div_aclk_100", GATE_IP_PERIR, 4,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_CMU_TOPPART, "cmu_toppart", "div_aclk_100", GATE_IP_PERIR, 3,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PMU_APBIF, "pmu_apbif", "div_aclk_100", GATE_IP_PERIR, 2,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_SYSREG, "sysreg", "div_aclk_100", GATE_IP_PERIR, 1,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_CHIP_ID, "chip_id", "div_aclk_100", GATE_IP_PERIR, 0,
-               CLK_IGNORE_UNUSED, 0),
-
-       /* GATE_SCLK_CAM - non-completed */
-       GATE(CLK_SCLK_PXLAYSNC_CSIS1_FIMC, "sclk_pxlasync_csis1_fimc",
-               "div_pxlasync_csis1_fimc", GATE_SCLK_CAM, 11,
-               CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_PXLAYSNC_CSIS0_FIMC, "sclk_pxlasync_csis0_fimc",
-               "div_pxlasync_csis0_fimc", GATE_SCLK_CAM,
-               10, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_JPEG, "sclk_jpeg", "div_jpeg",
-               GATE_SCLK_CAM, 8, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_CSIS1, "sclk_csis1", "div_csis1",
-               GATE_SCLK_CAM, 7, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_CSIS0, "sclk_csis0", "div_csis0",
-               GATE_SCLK_CAM, 6, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_CAM1, "sclk_cam1", "div_cam1",
-               GATE_SCLK_CAM, 5, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_FIMC3_LCLK, "sclk_fimc3_lclk", "div_fimc3_lclk",
-               GATE_SCLK_CAM, 3, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_FIMC2_LCLK, "sclk_fimc2_lclk", "div_fimc2_lclk",
-               GATE_SCLK_CAM, 2, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_FIMC1_LCLK, "sclk_fimc1_lclk", "div_fimc1_lclk",
-               GATE_SCLK_CAM, 1, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_FIMC0_LCLK, "sclk_fimc0_lclk", "div_fimc0_lclk",
-               GATE_SCLK_CAM, 0, CLK_SET_RATE_PARENT, 0),
-
-       /* GATE_SCLK_TV */
-       GATE(CLK_SCLK_PIXEL, "sclk_pixel", "div_tv_blk",
-               GATE_SCLK_TV, 3, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_HDMI, "sclk_hdmi", "mout_hdmi",
-               GATE_SCLK_TV, 2, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_MIXER, "sclk_mixer", "div_tv_blk",
-               GATE_SCLK_TV, 0, CLK_SET_RATE_PARENT, 0),
-
-       /* GATE_SCLK_MFC */
-       GATE(CLK_SCLK_MFC, "sclk_mfc", "div_mfc",
-               GATE_SCLK_MFC, 0, CLK_SET_RATE_PARENT, 0),
-
-       /* GATE_SCLK_G3D */
-       GATE(CLK_SCLK_G3D, "sclk_g3d", "div_g3d",
-               GATE_SCLK_G3D, 0, CLK_SET_RATE_PARENT, 0),
-
-       /* GATE_SCLK_LCD */
-       GATE(CLK_SCLK_MIPIDPHY4L, "sclk_mipidphy4l", "div_mipi0",
-               GATE_SCLK_LCD, 4, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_MIPI0, "sclk_mipi0", "div_mipi0_pre",
-               GATE_SCLK_LCD, 3, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_MDNIE0, "sclk_mdnie0", "div_fimd0",
-               GATE_SCLK_LCD, 1, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_FIMD0, "sclk_fimd0", "div_fimd0",
-               GATE_SCLK_LCD, 0, CLK_SET_RATE_PARENT, 0),
-
-       /* GATE_SCLK_MAUDIO */
-       GATE(CLK_SCLK_PCM0, "sclk_pcm0", "div_pcm0",
-               GATE_SCLK_MAUDIO, 1, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_AUDIO0, "sclk_audio0", "div_audio0",
-               GATE_SCLK_MAUDIO, 0, CLK_SET_RATE_PARENT, 0),
-
-       /* GATE_SCLK_FSYS */
-       GATE(CLK_SCLK_TSADC, "sclk_tsadc", "div_tsadc_pre",
-               GATE_SCLK_FSYS, 9, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_EBI, "sclk_ebi", "div_ebi",
-               GATE_SCLK_FSYS, 6, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_MMC2, "sclk_mmc2", "div_mmc2_pre",
-               GATE_SCLK_FSYS, 2, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_MMC1, "sclk_mmc1", "div_mmc1_pre",
-               GATE_SCLK_FSYS, 1, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_MMC0, "sclk_mmc0", "div_mmc0_pre",
-               GATE_SCLK_FSYS, 0, CLK_SET_RATE_PARENT, 0),
-
-       /* GATE_SCLK_PERIL */
-       GATE(CLK_SCLK_I2S, "sclk_i2s1", "div_i2s1",
-               GATE_SCLK_PERIL, 18, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_PCM2, "sclk_pcm2", "div_pcm2",
-               GATE_SCLK_PERIL, 16, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_PCM1, "sclk_pcm1", "div_pcm1",
-               GATE_SCLK_PERIL, 15, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_AUDIO2, "sclk_audio2", "div_audio2",
-               GATE_SCLK_PERIL, 14, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_AUDIO1, "sclk_audio1", "div_audio1",
-               GATE_SCLK_PERIL, 13, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_SPDIF, "sclk_spdif", "mout_spdif",
-               GATE_SCLK_PERIL, 10, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_SPI2, "sclk_spi2", "div_spi2_pre",
-               GATE_SCLK_PERIL, 8, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_SPI1, "sclk_spi1", "div_spi1_pre",
-               GATE_SCLK_PERIL, 7, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_SPI0, "sclk_spi0", "div_spi0_pre",
-               GATE_SCLK_PERIL, 6, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_UART3, "sclk_uart3", "div_uart3",
-               GATE_SCLK_PERIL, 3, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_UART2, "sclk_uart2", "div_uart2",
-               GATE_SCLK_PERIL, 2, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_UART1, "sclk_uart1", "div_uart1",
-               GATE_SCLK_PERIL, 1, CLK_SET_RATE_PARENT, 0),
-       GATE(CLK_SCLK_UART0, "sclk_uart0", "div_uart0",
-               GATE_SCLK_PERIL, 0, CLK_SET_RATE_PARENT, 0),
-
-       /* GATE_IP_CAM */
-       GATE(CLK_SMMUFIMC_LITE2, "smmufimc_lite2", "div_aclk_160", GATE_IP_CAM,
-               22, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_FIMC_LITE2, "fimc_lite2", "div_aclk_160", GATE_IP_CAM,
-               20, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PIXELASYNCM1, "pixelasyncm1", "div_aclk_160", GATE_IP_CAM,
-               18, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PIXELASYNCM0, "pixelasyncm0", "div_aclk_160", GATE_IP_CAM,
-               17, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PPMUCAMIF, "ppmucamif", "div_aclk_160", GATE_IP_CAM,
-               16, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_SMMUJPEG, "smmujpeg", "div_aclk_160", GATE_IP_CAM, 11, 0, 0),
-       GATE(CLK_SMMUFIMC3, "smmufimc3", "div_aclk_160", GATE_IP_CAM, 10, 0, 0),
-       GATE(CLK_SMMUFIMC2, "smmufimc2", "div_aclk_160", GATE_IP_CAM, 9, 0, 0),
-       GATE(CLK_SMMUFIMC1, "smmufimc1", "div_aclk_160", GATE_IP_CAM, 8, 0, 0),
-       GATE(CLK_SMMUFIMC0, "smmufimc0", "div_aclk_160", GATE_IP_CAM, 7, 0, 0),
-       GATE(CLK_JPEG, "jpeg", "div_aclk_160", GATE_IP_CAM, 6, 0, 0),
-       GATE(CLK_CSIS1, "csis1", "div_aclk_160", GATE_IP_CAM, 5, 0, 0),
-       GATE(CLK_CSIS0, "csis0", "div_aclk_160", GATE_IP_CAM, 4, 0, 0),
-       GATE(CLK_FIMC3, "fimc3", "div_aclk_160", GATE_IP_CAM, 3, 0, 0),
-       GATE(CLK_FIMC2, "fimc2", "div_aclk_160", GATE_IP_CAM, 2, 0, 0),
-       GATE(CLK_FIMC1, "fimc1", "div_aclk_160", GATE_IP_CAM, 1, 0, 0),
-       GATE(CLK_FIMC0, "fimc0", "div_aclk_160", GATE_IP_CAM, 0, 0, 0),
-
-       /* GATE_IP_TV */
-       GATE(CLK_PPMUTV, "ppmutv", "div_aclk_100", GATE_IP_TV, 5, 0, 0),
-       GATE(CLK_SMMUTV, "smmutv", "div_aclk_100", GATE_IP_TV, 4, 0, 0),
-       GATE(CLK_HDMI, "hdmi", "div_aclk_100", GATE_IP_TV, 3, 0, 0),
-       GATE(CLK_MIXER, "mixer", "div_aclk_100", GATE_IP_TV, 1, 0, 0),
-       GATE(CLK_VP, "vp", "div_aclk_100", GATE_IP_TV, 0, 0, 0),
-
-       /* GATE_IP_MFC */
-       GATE(CLK_PPMUMFC_R, "ppmumfc_r", "div_aclk_200", GATE_IP_MFC, 4,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_PPMUMFC_L, "ppmumfc_l", "div_aclk_200", GATE_IP_MFC, 3,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_SMMUMFC_R, "smmumfc_r", "div_aclk_200", GATE_IP_MFC, 2, 0, 0),
-       GATE(CLK_SMMUMFC_L, "smmumfc_l", "div_aclk_200", GATE_IP_MFC, 1, 0, 0),
-       GATE(CLK_MFC, "mfc", "div_aclk_200", GATE_IP_MFC, 0, 0, 0),
-
-       /* GATE_IP_G3D */
-       GATE(CLK_PPMUG3D, "ppmug3d", "div_aclk_200", GATE_IP_G3D, 1,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_G3D, "g3d", "div_aclk_200", GATE_IP_G3D, 0, 0, 0),
-
-       /* GATE_IP_LCD */
-       GATE(CLK_PPMULCD0, "ppmulcd0", "div_aclk_160", GATE_IP_LCD, 5,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_SMMUFIMD0, "smmufimd0", "div_aclk_160", GATE_IP_LCD, 4, 0, 0),
-       GATE(CLK_DSIM0, "dsim0", "div_aclk_160", GATE_IP_LCD, 3, 0, 0),
-       GATE(CLK_SMIES, "smies", "div_aclk_160", GATE_IP_LCD, 2, 0, 0),
-       GATE(CLK_MIE0, "mie0", "div_aclk_160", GATE_IP_LCD, 1, 0, 0),
-       GATE(CLK_FIMD0, "fimd0", "div_aclk_160", GATE_IP_LCD, 0, 0, 0),
-
-       /* GATE_IP_FSYS */
-       GATE(CLK_TSADC, "tsadc", "div_aclk_200", GATE_IP_FSYS, 20, 0, 0),
-       GATE(CLK_PPMUFILE, "ppmufile", "div_aclk_200", GATE_IP_FSYS, 17,
-               CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_NFCON, "nfcon", "div_aclk_200", GATE_IP_FSYS, 16, 0, 0),
-       GATE(CLK_USBDEVICE, "usbdevice", "div_aclk_200", GATE_IP_FSYS, 13,
-               0, 0),
-       GATE(CLK_USBHOST, "usbhost", "div_aclk_200", GATE_IP_FSYS, 12, 0, 0),
-       GATE(CLK_SROMC, "sromc", "div_aclk_200", GATE_IP_FSYS, 11, 0, 0),
-       GATE(CLK_SDMMC2, "sdmmc2", "div_aclk_200", GATE_IP_FSYS, 7, 0, 0),
-       GATE(CLK_SDMMC1, "sdmmc1", "div_aclk_200", GATE_IP_FSYS, 6, 0, 0),
-       GATE(CLK_SDMMC0, "sdmmc0", "div_aclk_200", GATE_IP_FSYS, 5, 0, 0),
-       GATE(CLK_PDMA1, "pdma1", "div_aclk_200", GATE_IP_FSYS, 1, 0, 0),
-       GATE(CLK_PDMA0, "pdma0", "div_aclk_200", GATE_IP_FSYS, 0, 0, 0),
-
-       /* GATE_IP_PERIL */
-       GATE(CLK_SPDIF, "spdif", "div_aclk_100", GATE_IP_PERIL, 26, 0, 0),
-       GATE(CLK_PWM, "pwm", "div_aclk_100", GATE_IP_PERIL, 24, 0, 0),
-       GATE(CLK_PCM2, "pcm2", "div_aclk_100", GATE_IP_PERIL, 23, 0, 0),
-       GATE(CLK_PCM1, "pcm1", "div_aclk_100", GATE_IP_PERIL, 22, 0, 0),
-       GATE(CLK_I2S1, "i2s1", "div_aclk_100", GATE_IP_PERIL, 20, 0, 0),
-       GATE(CLK_SPI2, "spi2", "div_aclk_100", GATE_IP_PERIL, 18, 0, 0),
-       GATE(CLK_SPI1, "spi1", "div_aclk_100", GATE_IP_PERIL, 17, 0, 0),
-       GATE(CLK_SPI0, "spi0", "div_aclk_100", GATE_IP_PERIL, 16, 0, 0),
-       GATE(CLK_I2CHDMI, "i2chdmi", "div_aclk_100", GATE_IP_PERIL, 14, 0, 0),
-       GATE(CLK_I2C7, "i2c7", "div_aclk_100", GATE_IP_PERIL, 13, 0, 0),
-       GATE(CLK_I2C6, "i2c6", "div_aclk_100", GATE_IP_PERIL, 12, 0, 0),
-       GATE(CLK_I2C5, "i2c5", "div_aclk_100", GATE_IP_PERIL, 11, 0, 0),
-       GATE(CLK_I2C4, "i2c4", "div_aclk_100", GATE_IP_PERIL, 10, 0, 0),
-       GATE(CLK_I2C3, "i2c3", "div_aclk_100", GATE_IP_PERIL, 9, 0, 0),
-       GATE(CLK_I2C2, "i2c2", "div_aclk_100", GATE_IP_PERIL, 8, 0, 0),
-       GATE(CLK_I2C1, "i2c1", "div_aclk_100", GATE_IP_PERIL, 7, 0, 0),
-       GATE(CLK_I2C0, "i2c0", "div_aclk_100", GATE_IP_PERIL, 6, 0, 0),
-       GATE(CLK_UART3, "uart3", "div_aclk_100", GATE_IP_PERIL, 3, 0, 0),
-       GATE(CLK_UART2, "uart2", "div_aclk_100", GATE_IP_PERIL, 2, 0, 0),
-       GATE(CLK_UART1, "uart1", "div_aclk_100", GATE_IP_PERIL, 1, 0, 0),
-       GATE(CLK_UART0, "uart0", "div_aclk_100", GATE_IP_PERIL, 0, 0, 0),
-};
-
-/*
- * APLL & MPLL & BPLL & ISP_PLL & DISP_PLL & G3D_PLL
- */
-static const struct samsung_pll_rate_table exynos4415_pll_rates[] __initconst = {
-       PLL_35XX_RATE(1600000000, 400, 3,  1),
-       PLL_35XX_RATE(1500000000, 250, 2,  1),
-       PLL_35XX_RATE(1400000000, 175, 3,  0),
-       PLL_35XX_RATE(1300000000, 325, 3,  1),
-       PLL_35XX_RATE(1200000000, 400, 4,  1),
-       PLL_35XX_RATE(1100000000, 275, 3,  1),
-       PLL_35XX_RATE(1066000000, 533, 6,  1),
-       PLL_35XX_RATE(1000000000, 250, 3,  1),
-       PLL_35XX_RATE(960000000,  320, 4,  1),
-       PLL_35XX_RATE(900000000,  300, 4,  1),
-       PLL_35XX_RATE(850000000,  425, 6,  1),
-       PLL_35XX_RATE(800000000,  200, 3,  1),
-       PLL_35XX_RATE(700000000,  175, 3,  1),
-       PLL_35XX_RATE(667000000,  667, 12, 1),
-       PLL_35XX_RATE(600000000,  400, 4,  2),
-       PLL_35XX_RATE(550000000,  275, 3,  2),
-       PLL_35XX_RATE(533000000,  533, 6,  2),
-       PLL_35XX_RATE(520000000,  260, 3,  2),
-       PLL_35XX_RATE(500000000,  250, 3,  2),
-       PLL_35XX_RATE(440000000,  220, 3,  2),
-       PLL_35XX_RATE(400000000,  200, 3,  2),
-       PLL_35XX_RATE(350000000,  175, 3,  2),
-       PLL_35XX_RATE(300000000,  300, 3,  3),
-       PLL_35XX_RATE(266000000,  266, 3,  3),
-       PLL_35XX_RATE(200000000,  200, 3,  3),
-       PLL_35XX_RATE(160000000,  160, 3,  3),
-       PLL_35XX_RATE(100000000,  200, 3,  4),
-       { /* sentinel */ }
-};
-
-/* EPLL */
-static const struct samsung_pll_rate_table exynos4415_epll_rates[] __initconst = {
-       PLL_36XX_RATE(800000000, 200, 3, 1,     0),
-       PLL_36XX_RATE(288000000,  96, 2, 2,     0),
-       PLL_36XX_RATE(192000000, 128, 2, 3,     0),
-       PLL_36XX_RATE(144000000,  96, 2, 3,     0),
-       PLL_36XX_RATE(96000000,  128, 2, 4,     0),
-       PLL_36XX_RATE(84000000,  112, 2, 4,     0),
-       PLL_36XX_RATE(80750011,  107, 2, 4, 43691),
-       PLL_36XX_RATE(73728004,   98, 2, 4, 19923),
-       PLL_36XX_RATE(67987602,  271, 3, 5, 62285),
-       PLL_36XX_RATE(65911004,  175, 2, 5, 49982),
-       PLL_36XX_RATE(50000000,  200, 3, 5,     0),
-       PLL_36XX_RATE(49152003,  131, 2, 5,  4719),
-       PLL_36XX_RATE(48000000,  128, 2, 5,     0),
-       PLL_36XX_RATE(45250000,  181, 3, 5,     0),
-       { /* sentinel */ }
-};
-
-static const struct samsung_pll_clock exynos4415_plls[] __initconst = {
-       PLL(pll_35xx, CLK_FOUT_APLL, "fout_apll", "fin_pll",
-               APLL_LOCK, APLL_CON0, exynos4415_pll_rates),
-       PLL(pll_36xx, CLK_FOUT_EPLL, "fout_epll", "fin_pll",
-               EPLL_LOCK, EPLL_CON0, exynos4415_epll_rates),
-       PLL(pll_35xx, CLK_FOUT_G3D_PLL, "fout_g3d_pll", "mout_g3d_pllsrc",
-               G3D_PLL_LOCK, G3D_PLL_CON0, exynos4415_pll_rates),
-       PLL(pll_35xx, CLK_FOUT_ISP_PLL, "fout_isp_pll", "fin_pll",
-               ISP_PLL_LOCK, ISP_PLL_CON0, exynos4415_pll_rates),
-       PLL(pll_35xx, CLK_FOUT_DISP_PLL, "fout_disp_pll",
-               "fin_pll", DISP_PLL_LOCK, DISP_PLL_CON0, exynos4415_pll_rates),
-};
-
-static const struct samsung_cmu_info cmu_info __initconst = {
-       .pll_clks               = exynos4415_plls,
-       .nr_pll_clks            = ARRAY_SIZE(exynos4415_plls),
-       .mux_clks               = exynos4415_mux_clks,
-       .nr_mux_clks            = ARRAY_SIZE(exynos4415_mux_clks),
-       .div_clks               = exynos4415_div_clks,
-       .nr_div_clks            = ARRAY_SIZE(exynos4415_div_clks),
-       .gate_clks              = exynos4415_gate_clks,
-       .nr_gate_clks           = ARRAY_SIZE(exynos4415_gate_clks),
-       .fixed_clks             = exynos4415_fixed_rate_clks,
-       .nr_fixed_clks          = ARRAY_SIZE(exynos4415_fixed_rate_clks),
-       .fixed_factor_clks      = exynos4415_fixed_factor_clks,
-       .nr_fixed_factor_clks   = ARRAY_SIZE(exynos4415_fixed_factor_clks),
-       .nr_clk_ids             = CLK_NR_CLKS,
-       .clk_regs               = exynos4415_cmu_clk_regs,
-       .nr_clk_regs            = ARRAY_SIZE(exynos4415_cmu_clk_regs),
-};
-
-static void __init exynos4415_cmu_init(struct device_node *np)
-{
-       samsung_cmu_register_one(np, &cmu_info);
-}
-CLK_OF_DECLARE(exynos4415_cmu, "samsung,exynos4415-cmu", exynos4415_cmu_init);
-
-/*
- * CMU DMC
- */
-
-#define MPLL_LOCK              0x008
-#define MPLL_CON0              0x108
-#define MPLL_CON1              0x10c
-#define MPLL_CON2              0x110
-#define BPLL_LOCK              0x118
-#define BPLL_CON0              0x218
-#define BPLL_CON1              0x21c
-#define BPLL_CON2              0x220
-#define SRC_DMC                        0x300
-#define DIV_DMC1               0x504
-
-static const unsigned long exynos4415_cmu_dmc_clk_regs[] __initconst = {
-       MPLL_LOCK,
-       MPLL_CON0,
-       MPLL_CON1,
-       MPLL_CON2,
-       BPLL_LOCK,
-       BPLL_CON0,
-       BPLL_CON1,
-       BPLL_CON2,
-       SRC_DMC,
-       DIV_DMC1,
-};
-
-PNAME(mout_mpll_p)             = { "fin_pll", "fout_mpll", };
-PNAME(mout_bpll_p)             = { "fin_pll", "fout_bpll", };
-PNAME(mbpll_p)                 = { "mout_mpll", "mout_bpll", };
-
-static const struct samsung_mux_clock exynos4415_dmc_mux_clks[] __initconst = {
-       MUX(CLK_DMC_MOUT_MPLL, "mout_mpll", mout_mpll_p, SRC_DMC, 12, 1),
-       MUX(CLK_DMC_MOUT_BPLL, "mout_bpll", mout_bpll_p, SRC_DMC, 10, 1),
-       MUX(CLK_DMC_MOUT_DPHY, "mout_dphy", mbpll_p, SRC_DMC, 8, 1),
-       MUX(CLK_DMC_MOUT_DMC_BUS, "mout_dmc_bus", mbpll_p, SRC_DMC, 4, 1),
-};
-
-static const struct samsung_div_clock exynos4415_dmc_div_clks[] __initconst = {
-       DIV(CLK_DMC_DIV_DMC, "div_dmc", "div_dmc_pre", DIV_DMC1, 27, 3),
-       DIV(CLK_DMC_DIV_DPHY, "div_dphy", "mout_dphy", DIV_DMC1, 23, 3),
-       DIV(CLK_DMC_DIV_DMC_PRE, "div_dmc_pre", "mout_dmc_bus",
-               DIV_DMC1, 19, 2),
-       DIV(CLK_DMC_DIV_DMCP, "div_dmcp", "div_dmcd", DIV_DMC1, 15, 3),
-       DIV(CLK_DMC_DIV_DMCD, "div_dmcd", "div_dmc", DIV_DMC1, 11, 3),
-       DIV(CLK_DMC_DIV_MPLL_PRE, "div_mpll_pre", "mout_mpll", DIV_DMC1, 8, 2),
-};
-
-static const struct samsung_pll_clock exynos4415_dmc_plls[] __initconst = {
-       PLL(pll_35xx, CLK_DMC_FOUT_MPLL, "fout_mpll", "fin_pll",
-               MPLL_LOCK, MPLL_CON0, exynos4415_pll_rates),
-       PLL(pll_35xx, CLK_DMC_FOUT_BPLL, "fout_bpll", "fin_pll",
-               BPLL_LOCK, BPLL_CON0, exynos4415_pll_rates),
-};
-
-static const struct samsung_cmu_info cmu_dmc_info __initconst = {
-       .pll_clks               = exynos4415_dmc_plls,
-       .nr_pll_clks            = ARRAY_SIZE(exynos4415_dmc_plls),
-       .mux_clks               = exynos4415_dmc_mux_clks,
-       .nr_mux_clks            = ARRAY_SIZE(exynos4415_dmc_mux_clks),
-       .div_clks               = exynos4415_dmc_div_clks,
-       .nr_div_clks            = ARRAY_SIZE(exynos4415_dmc_div_clks),
-       .nr_clk_ids             = NR_CLKS_DMC,
-       .clk_regs               = exynos4415_cmu_dmc_clk_regs,
-       .nr_clk_regs            = ARRAY_SIZE(exynos4415_cmu_dmc_clk_regs),
-};
-
-static void __init exynos4415_cmu_dmc_init(struct device_node *np)
-{
-       samsung_cmu_register_one(np, &cmu_dmc_info);
-}
-CLK_OF_DECLARE(exynos4415_cmu_dmc, "samsung,exynos4415-cmu-dmc",
-               exynos4415_cmu_dmc_init);
index f096bd7df40c13d6af2ee8f000b7a38f41a47d25..11343a5970933d14c490006727c9fc39a86e1a5e 100644 (file)
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * Common Clock Framework support for Exynos5443 SoC.
+ * Common Clock Framework support for Exynos5433 SoC.
  */
 
 #include <linux/clk-provider.h>
@@ -549,10 +549,10 @@ static const struct samsung_gate_clock top_gate_clks[] __initconst = {
                        29, CLK_IGNORE_UNUSED, 0),
        GATE(CLK_ACLK_BUS0_400, "aclk_bus0_400", "div_aclk_bus0_400",
                        ENABLE_ACLK_TOP, 26,
-                       CLK_IGNORE_UNUSED | CLK_SET_RATE_PARENT, 0),
+                       CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, 0),
        GATE(CLK_ACLK_BUS1_400, "aclk_bus1_400", "div_aclk_bus1_400",
                        ENABLE_ACLK_TOP, 25,
-                       CLK_IGNORE_UNUSED | CLK_SET_RATE_PARENT, 0),
+                       CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, 0),
        GATE(CLK_ACLK_IMEM_200, "aclk_imem_200", "div_aclk_imem_266",
                        ENABLE_ACLK_TOP, 24,
                        CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, 0),
@@ -616,7 +616,7 @@ static const struct samsung_gate_clock top_gate_clks[] __initconst = {
 
        /* ENABLE_SCLK_TOP_MSCL */
        GATE(CLK_SCLK_JPEG_MSCL, "sclk_jpeg_mscl", "div_sclk_jpeg",
-                       ENABLE_SCLK_TOP_MSCL, 0, 0, 0),
+                       ENABLE_SCLK_TOP_MSCL, 0, CLK_SET_RATE_PARENT, 0),
 
        /* ENABLE_SCLK_TOP_CAM1 */
        GATE(CLK_SCLK_ISP_SENSOR2, "sclk_isp_sensor2", "div_sclk_isp_sensor2_b",
@@ -698,7 +698,7 @@ static const struct samsung_gate_clock top_gate_clks[] __initconst = {
  * ATLAS_PLL & APOLLO_PLL & MEM0_PLL & MEM1_PLL & BUS_PLL & MFC_PLL
  * & MPHY_PLL & G3D_PLL & DISP_PLL & ISP_PLL
  */
-static const struct samsung_pll_rate_table exynos5443_pll_rates[] __initconst = {
+static const struct samsung_pll_rate_table exynos5433_pll_rates[] __initconst = {
        PLL_35XX_RATE(2500000000U, 625, 6,  0),
        PLL_35XX_RATE(2400000000U, 500, 5,  0),
        PLL_35XX_RATE(2300000000U, 575, 6,  0),
@@ -739,7 +739,9 @@ static const struct samsung_pll_rate_table exynos5443_pll_rates[] __initconst =
        PLL_35XX_RATE(350000000U,  350, 6,  2),
        PLL_35XX_RATE(333000000U,  222, 4,  2),
        PLL_35XX_RATE(300000000U,  500, 5,  3),
+       PLL_35XX_RATE(278000000U,  556, 6,  3),
        PLL_35XX_RATE(266000000U,  532, 6,  3),
+       PLL_35XX_RATE(250000000U,  500, 6,  3),
        PLL_35XX_RATE(200000000U,  400, 6,  3),
        PLL_35XX_RATE(166000000U,  332, 6,  3),
        PLL_35XX_RATE(160000000U,  320, 6,  3),
@@ -749,7 +751,7 @@ static const struct samsung_pll_rate_table exynos5443_pll_rates[] __initconst =
 };
 
 /* AUD_PLL */
-static const struct samsung_pll_rate_table exynos5443_aud_pll_rates[] __initconst = {
+static const struct samsung_pll_rate_table exynos5433_aud_pll_rates[] __initconst = {
        PLL_36XX_RATE(400000000U, 200, 3, 2,      0),
        PLL_36XX_RATE(393216000U, 197, 3, 2, -25690),
        PLL_36XX_RATE(384000000U, 128, 2, 2,      0),
@@ -764,9 +766,9 @@ static const struct samsung_pll_rate_table exynos5443_aud_pll_rates[] __initcons
 
 static const struct samsung_pll_clock top_pll_clks[] __initconst = {
        PLL(pll_35xx, CLK_FOUT_ISP_PLL, "fout_isp_pll", "oscclk",
-               ISP_PLL_LOCK, ISP_PLL_CON0, exynos5443_pll_rates),
+               ISP_PLL_LOCK, ISP_PLL_CON0, exynos5433_pll_rates),
        PLL(pll_36xx, CLK_FOUT_AUD_PLL, "fout_aud_pll", "oscclk",
-               AUD_PLL_LOCK, AUD_PLL_CON0, exynos5443_aud_pll_rates),
+               AUD_PLL_LOCK, AUD_PLL_CON0, exynos5433_aud_pll_rates),
 };
 
 static const struct samsung_cmu_info top_cmu_info __initconst = {
@@ -820,7 +822,7 @@ PNAME(mout_mphy_pll_p)              = { "oscclk", "fout_mphy_pll", };
 
 static const struct samsung_pll_clock cpif_pll_clks[] __initconst = {
        PLL(pll_35xx, CLK_FOUT_MPHY_PLL, "fout_mphy_pll", "oscclk",
-               MPHY_PLL_LOCK, MPHY_PLL_CON0, exynos5443_pll_rates),
+               MPHY_PLL_LOCK, MPHY_PLL_CON0, exynos5433_pll_rates),
 };
 
 static const struct samsung_mux_clock cpif_mux_clks[] __initconst = {
@@ -1011,13 +1013,13 @@ static const unsigned long mif_clk_regs[] __initconst = {
 
 static const struct samsung_pll_clock mif_pll_clks[] __initconst = {
        PLL(pll_35xx, CLK_FOUT_MEM0_PLL, "fout_mem0_pll", "oscclk",
-               MEM0_PLL_LOCK, MEM0_PLL_CON0, exynos5443_pll_rates),
+               MEM0_PLL_LOCK, MEM0_PLL_CON0, exynos5433_pll_rates),
        PLL(pll_35xx, CLK_FOUT_MEM1_PLL, "fout_mem1_pll", "oscclk",
-               MEM1_PLL_LOCK, MEM1_PLL_CON0, exynos5443_pll_rates),
+               MEM1_PLL_LOCK, MEM1_PLL_CON0, exynos5433_pll_rates),
        PLL(pll_35xx, CLK_FOUT_BUS_PLL, "fout_bus_pll", "oscclk",
-               BUS_PLL_LOCK, BUS_PLL_CON0, exynos5443_pll_rates),
+               BUS_PLL_LOCK, BUS_PLL_CON0, exynos5433_pll_rates),
        PLL(pll_35xx, CLK_FOUT_MFC_PLL, "fout_mfc_pll", "oscclk",
-               MFC_PLL_LOCK, MFC_PLL_CON0, exynos5443_pll_rates),
+               MFC_PLL_LOCK, MFC_PLL_CON0, exynos5433_pll_rates),
 };
 
 /* list of all parent clock list */
@@ -1382,7 +1384,7 @@ static const struct samsung_gate_clock mif_gate_clks[] __initconst = {
        /* ENABLE_ACLK_MIF3 */
        GATE(CLK_ACLK_BUS2_400, "aclk_bus2_400", "div_aclk_bus2_400",
                        ENABLE_ACLK_MIF3, 4,
-                       CLK_IGNORE_UNUSED | CLK_SET_RATE_PARENT, 0),
+                       CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, 0),
        GATE(CLK_ACLK_DISP_333, "aclk_disp_333", "div_aclk_disp_333",
                        ENABLE_ACLK_MIF3, 1,
                        CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, 0),
@@ -2539,7 +2541,7 @@ PNAME(mout_sclk_decon_tv_vclk_b_disp_p)   = { "mout_sclk_decon_tv_vclk_a_disp",
 
 static const struct samsung_pll_clock disp_pll_clks[] __initconst = {
        PLL(pll_35xx, CLK_FOUT_DISP_PLL, "fout_disp_pll", "oscclk",
-               DISP_PLL_LOCK, DISP_PLL_CON0, exynos5443_pll_rates),
+               DISP_PLL_LOCK, DISP_PLL_CON0, exynos5433_pll_rates),
 };
 
 static const struct samsung_fixed_factor_clock disp_fixed_factor_clks[] __initconst = {
@@ -2559,8 +2561,10 @@ static const struct samsung_fixed_rate_clock disp_fixed_clks[] __initconst = {
        FRATE(0, "phyclk_mipidphy1_bitclkdiv8_phy", NULL, 0, 188000000),
        FRATE(0, "phyclk_mipidphy1_rxclkesc0_phy", NULL, 0, 100000000),
        /* PHY clocks from MIPI_DPHY0 */
-       FRATE(0, "phyclk_mipidphy0_bitclkdiv8_phy", NULL, 0, 188000000),
-       FRATE(0, "phyclk_mipidphy0_rxclkesc0_phy", NULL, 0, 100000000),
+       FRATE(CLK_PHYCLK_MIPIDPHY0_BITCLKDIV8_PHY, "phyclk_mipidphy0_bitclkdiv8_phy",
+                       NULL, 0, 188000000),
+       FRATE(CLK_PHYCLK_MIPIDPHY0_RXCLKESC0_PHY, "phyclk_mipidphy0_rxclkesc0_phy",
+                       NULL, 0, 100000000),
        /* PHY clocks from HDMI_PHY */
        FRATE(CLK_PHYCLK_HDMIPHY_TMDS_CLKO_PHY, "phyclk_hdmiphy_tmds_clko_phy",
                        NULL, 0, 300000000),
@@ -3224,7 +3228,7 @@ PNAME(mout_g3d_pll_p)             = { "oscclk", "fout_g3d_pll", };
 
 static const struct samsung_pll_clock g3d_pll_clks[] __initconst = {
        PLL(pll_35xx, CLK_FOUT_G3D_PLL, "fout_g3d_pll", "oscclk",
-               G3D_PLL_LOCK, G3D_PLL_CON0, exynos5443_pll_rates),
+               G3D_PLL_LOCK, G3D_PLL_CON0, exynos5433_pll_rates),
 };
 
 static const struct samsung_mux_clock g3d_mux_clks[] __initconst = {
@@ -3514,7 +3518,7 @@ PNAME(mout_apollo_p)                      = { "mout_apollo_pll",
 
 static const struct samsung_pll_clock apollo_pll_clks[] __initconst = {
        PLL(pll_35xx, CLK_FOUT_APOLLO_PLL, "fout_apollo_pll", "oscclk",
-               APOLLO_PLL_LOCK, APOLLO_PLL_CON0, exynos5443_pll_rates),
+               APOLLO_PLL_LOCK, APOLLO_PLL_CON0, exynos5433_pll_rates),
 };
 
 static const struct samsung_mux_clock apollo_mux_clks[] __initconst = {
@@ -3737,7 +3741,7 @@ PNAME(mout_atlas_p)                       = { "mout_atlas_pll",
 
 static const struct samsung_pll_clock atlas_pll_clks[] __initconst = {
        PLL(pll_35xx, CLK_FOUT_ATLAS_PLL, "fout_atlas_pll", "oscclk",
-               ATLAS_PLL_LOCK, ATLAS_PLL_CON0, exynos5443_pll_rates),
+               ATLAS_PLL_LOCK, ATLAS_PLL_CON0, exynos5433_pll_rates),
 };
 
 static const struct samsung_mux_clock atlas_mux_clks[] __initconst = {
index 9617825daabb8310788dde2e401c1d969889acac..52290894857a546bda106c573d35dc54c193fbf8 100644 (file)
@@ -136,11 +136,39 @@ static const struct clk_ops samsung_pll3000_clk_ops = {
 #define PLL35XX_MDIV_MASK       (0x3FF)
 #define PLL35XX_PDIV_MASK       (0x3F)
 #define PLL35XX_SDIV_MASK       (0x7)
-#define PLL35XX_LOCK_STAT_MASK (0x1)
 #define PLL35XX_MDIV_SHIFT      (16)
 #define PLL35XX_PDIV_SHIFT      (8)
 #define PLL35XX_SDIV_SHIFT      (0)
 #define PLL35XX_LOCK_STAT_SHIFT        (29)
+#define PLL35XX_ENABLE_SHIFT   (31)
+
+static int samsung_pll35xx_enable(struct clk_hw *hw)
+{
+       struct samsung_clk_pll *pll = to_clk_pll(hw);
+       u32 tmp;
+
+       tmp = readl_relaxed(pll->con_reg);
+       tmp |= BIT(PLL35XX_ENABLE_SHIFT);
+       writel_relaxed(tmp, pll->con_reg);
+
+       /* wait_lock_time */
+       do {
+               cpu_relax();
+               tmp = readl_relaxed(pll->con_reg);
+       } while (!(tmp & BIT(PLL35XX_LOCK_STAT_SHIFT)));
+
+       return 0;
+}
+
+static void samsung_pll35xx_disable(struct clk_hw *hw)
+{
+       struct samsung_clk_pll *pll = to_clk_pll(hw);
+       u32 tmp;
+
+       tmp = readl_relaxed(pll->con_reg);
+       tmp &= ~BIT(PLL35XX_ENABLE_SHIFT);
+       writel_relaxed(tmp, pll->con_reg);
+}
 
 static unsigned long samsung_pll35xx_recalc_rate(struct clk_hw *hw,
                                unsigned long parent_rate)
@@ -210,12 +238,13 @@ static int samsung_pll35xx_set_rate(struct clk_hw *hw, unsigned long drate,
                        (rate->sdiv << PLL35XX_SDIV_SHIFT);
        writel_relaxed(tmp, pll->con_reg);
 
-       /* wait_lock_time */
-       do {
-               cpu_relax();
-               tmp = readl_relaxed(pll->con_reg);
-       } while (!(tmp & (PLL35XX_LOCK_STAT_MASK
-                               << PLL35XX_LOCK_STAT_SHIFT)));
+       /* wait_lock_time if enabled */
+       if (tmp & BIT(PLL35XX_ENABLE_SHIFT)) {
+               do {
+                       cpu_relax();
+                       tmp = readl_relaxed(pll->con_reg);
+               } while (!(tmp & BIT(PLL35XX_LOCK_STAT_SHIFT)));
+       }
        return 0;
 }
 
@@ -223,6 +252,8 @@ static const struct clk_ops samsung_pll35xx_clk_ops = {
        .recalc_rate = samsung_pll35xx_recalc_rate,
        .round_rate = samsung_pll_round_rate,
        .set_rate = samsung_pll35xx_set_rate,
+       .enable = samsung_pll35xx_enable,
+       .disable = samsung_pll35xx_disable,
 };
 
 static const struct clk_ops samsung_pll35xx_clk_min_ops = {
index d7a1e772d95a97d67c8b6da5104af27358a7145a..e0650c33863bbc221f981374a26f2503c2c6dc1f 100644 (file)
@@ -76,7 +76,7 @@ static struct syscore_ops s3c2410_clk_syscore_ops = {
        .resume = s3c2410_clk_resume,
 };
 
-static void s3c2410_clk_sleep_init(void)
+static void __init s3c2410_clk_sleep_init(void)
 {
        s3c2410_save = samsung_clk_alloc_reg_dump(s3c2410_clk_regs,
                                                ARRAY_SIZE(s3c2410_clk_regs));
@@ -90,7 +90,7 @@ static void s3c2410_clk_sleep_init(void)
        return;
 }
 #else
-static void s3c2410_clk_sleep_init(void) {}
+static void __init s3c2410_clk_sleep_init(void) {}
 #endif
 
 PNAME(fclk_p) = { "mpll", "div_slow" };
index ec873ee15d377b6f720e319cb6327744b4489d99..b8340a49921b71c4423b2805a326332046ffe7e3 100644 (file)
@@ -69,7 +69,7 @@ static struct syscore_ops s3c2412_clk_syscore_ops = {
        .resume = s3c2412_clk_resume,
 };
 
-static void s3c2412_clk_sleep_init(void)
+static void __init s3c2412_clk_sleep_init(void)
 {
        s3c2412_save = samsung_clk_alloc_reg_dump(s3c2412_clk_regs,
                                                ARRAY_SIZE(s3c2412_clk_regs));
@@ -83,7 +83,7 @@ static void s3c2412_clk_sleep_init(void)
        return;
 }
 #else
-static void s3c2412_clk_sleep_init(void) {}
+static void __init s3c2412_clk_sleep_init(void) {}
 #endif
 
 static struct clk_div_table divxti_d[] = {
index 5e24a17e10e676e2e76ae5efec06af0d9eb0e847..abb935c4291699c957c242c5192512349e31524b 100644 (file)
@@ -89,7 +89,7 @@ static struct syscore_ops s3c2443_clk_syscore_ops = {
        .resume = s3c2443_clk_resume,
 };
 
-static void s3c2443_clk_sleep_init(void)
+static void __init s3c2443_clk_sleep_init(void)
 {
        s3c2443_save = samsung_clk_alloc_reg_dump(s3c2443_clk_regs,
                                                ARRAY_SIZE(s3c2443_clk_regs));
@@ -103,7 +103,7 @@ static void s3c2443_clk_sleep_init(void)
        return;
 }
 #else
-static void s3c2443_clk_sleep_init(void) {}
+static void __init s3c2443_clk_sleep_init(void) {}
 #endif
 
 PNAME(epllref_p) = { "mpllref", "mpllref", "xti", "ext" };
index a48bd5f173301e9c0d1c21a94f0e27e52eeb6a9d..7306867a0ab80c21228a47626468f5bbde68a1f3 100644 (file)
@@ -121,7 +121,7 @@ static struct syscore_ops s3c64xx_clk_syscore_ops = {
        .resume = s3c64xx_clk_resume,
 };
 
-static void s3c64xx_clk_sleep_init(void)
+static void __init s3c64xx_clk_sleep_init(void)
 {
        s3c64xx_save_common = samsung_clk_alloc_reg_dump(s3c64xx_clk_regs,
                                                ARRAY_SIZE(s3c64xx_clk_regs));
@@ -145,7 +145,7 @@ err_warn:
                __func__);
 }
 #else
-static void s3c64xx_clk_sleep_init(void) {}
+static void __init s3c64xx_clk_sleep_init(void) {}
 #endif
 
 /* List of parent clocks common for all S3C64xx SoCs. */
index 8454c6e3dd65f229a5a51fbd3966d380e2926cb7..695bbf9ef428f98f5348d100a6af2525f04afb59 100644 (file)
@@ -64,6 +64,17 @@ config SUN50I_A64_CCU
        select SUNXI_CCU_PHASE
        default ARM64 && ARCH_SUNXI
 
+config SUN5I_CCU
+       bool "Support for the Allwinner sun5i family CCM"
+       select SUNXI_CCU_DIV
+       select SUNXI_CCU_MULT
+       select SUNXI_CCU_NK
+       select SUNXI_CCU_NKM
+       select SUNXI_CCU_NM
+       select SUNXI_CCU_MP
+       select SUNXI_CCU_PHASE
+       default MACH_SUN5I
+
 config SUN6I_A31_CCU
        bool "Support for the Allwinner A31/A31s CCU"
        select SUNXI_CCU_DIV
@@ -109,4 +120,25 @@ config SUN8I_H3_CCU
        select SUNXI_CCU_PHASE
        default MACH_SUN8I
 
+config SUN8I_V3S_CCU
+       bool "Support for the Allwinner V3s CCU"
+       select SUNXI_CCU_DIV
+       select SUNXI_CCU_NK
+       select SUNXI_CCU_NKM
+       select SUNXI_CCU_NKMP
+       select SUNXI_CCU_NM
+       select SUNXI_CCU_MP
+       select SUNXI_CCU_PHASE
+       default MACH_SUN8I
+
+config SUN9I_A80_CCU
+       bool "Support for the Allwinner A80 CCU"
+       select SUNXI_CCU_DIV
+       select SUNXI_CCU_GATE
+       select SUNXI_CCU_NKMP
+       select SUNXI_CCU_NM
+       select SUNXI_CCU_MP
+       select SUNXI_CCU_PHASE
+       default MACH_SUN9I
+
 endif
index 24fbc6e5deb81e1c62de1fcf06652f19493a0f26..6feaac0c5600f883f18f7f3adc99d03abcdb2d75 100644 (file)
@@ -19,7 +19,12 @@ obj-$(CONFIG_SUNXI_CCU_MP)   += ccu_mp.o
 
 # SoC support
 obj-$(CONFIG_SUN50I_A64_CCU)   += ccu-sun50i-a64.o
+obj-$(CONFIG_SUN5I_CCU)                += ccu-sun5i.o
 obj-$(CONFIG_SUN6I_A31_CCU)    += ccu-sun6i-a31.o
 obj-$(CONFIG_SUN8I_A23_CCU)    += ccu-sun8i-a23.o
 obj-$(CONFIG_SUN8I_A33_CCU)    += ccu-sun8i-a33.o
 obj-$(CONFIG_SUN8I_H3_CCU)     += ccu-sun8i-h3.o
+obj-$(CONFIG_SUN8I_V3S_CCU)    += ccu-sun8i-v3s.o
+obj-$(CONFIG_SUN9I_A80_CCU)    += ccu-sun9i-a80.o
+obj-$(CONFIG_SUN9I_A80_CCU)    += ccu-sun9i-a80-de.o
+obj-$(CONFIG_SUN9I_A80_CCU)    += ccu-sun9i-a80-usb.o
diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c
new file mode 100644 (file)
index 0000000..06edaa5
--- /dev/null
@@ -0,0 +1,1022 @@
+/*
+ * Copyright (c) 2016 Maxime Ripard. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_mult.h"
+#include "ccu_nk.h"
+#include "ccu_nkm.h"
+#include "ccu_nkmp.h"
+#include "ccu_nm.h"
+#include "ccu_phase.h"
+
+#include "ccu-sun5i.h"
+
+static struct ccu_nkmp pll_core_clk = {
+       .enable         = BIT(31),
+       .n              = _SUNXI_CCU_MULT_OFFSET(8, 5, 0),
+       .k              = _SUNXI_CCU_MULT(4, 2),
+       .m              = _SUNXI_CCU_DIV(0, 2),
+       .p              = _SUNXI_CCU_DIV(16, 2),
+       .common         = {
+               .reg            = 0x000,
+               .hw.init        = CLK_HW_INIT("pll-core",
+                                             "hosc",
+                                             &ccu_nkmp_ops,
+                                             0),
+       },
+};
+
+/*
+ * The Audio PLL is supposed to have 4 outputs: 3 fixed factors from
+ * the base (2x, 4x and 8x), and one variable divider (the one true
+ * pll audio).
+ *
+ * We don't have any need for the variable divider for now, so we just
+ * hardcode it to match with the clock names
+ */
+#define SUN5I_PLL_AUDIO_REG    0x008
+
+static struct ccu_nm pll_audio_base_clk = {
+       .enable         = BIT(31),
+       .n              = _SUNXI_CCU_MULT_OFFSET(8, 7, 0),
+
+       /*
+        * The datasheet is wrong here, this doesn't have any
+        * offset
+        */
+       .m              = _SUNXI_CCU_DIV_OFFSET(0, 5, 0),
+       .common         = {
+               .reg            = 0x008,
+               .hw.init        = CLK_HW_INIT("pll-audio-base",
+                                             "hosc",
+                                             &ccu_nm_ops,
+                                             0),
+       },
+};
+
+static struct ccu_mult pll_video0_clk = {
+       .enable         = BIT(31),
+       .mult           = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(0, 7, 0, 9, 127),
+       .frac           = _SUNXI_CCU_FRAC(BIT(15), BIT(14),
+                                         270000000, 297000000),
+       .common         = {
+               .reg            = 0x010,
+               .features       = (CCU_FEATURE_FRACTIONAL |
+                                  CCU_FEATURE_ALL_PREDIV),
+               .prediv         = 8,
+               .hw.init        = CLK_HW_INIT("pll-video0",
+                                             "hosc",
+                                             &ccu_mult_ops,
+                                             0),
+       },
+};
+
+static struct ccu_nkmp pll_ve_clk = {
+       .enable         = BIT(31),
+       .n              = _SUNXI_CCU_MULT_OFFSET(8, 5, 0),
+       .k              = _SUNXI_CCU_MULT(4, 2),
+       .m              = _SUNXI_CCU_DIV(0, 2),
+       .p              = _SUNXI_CCU_DIV(16, 2),
+       .common         = {
+               .reg            = 0x018,
+               .hw.init        = CLK_HW_INIT("pll-ve",
+                                             "hosc",
+                                             &ccu_nkmp_ops,
+                                             0),
+       },
+};
+
+static struct ccu_nk pll_ddr_base_clk = {
+       .enable         = BIT(31),
+       .n              = _SUNXI_CCU_MULT_OFFSET(8, 5, 0),
+       .k              = _SUNXI_CCU_MULT(4, 2),
+       .common         = {
+               .reg            = 0x020,
+               .hw.init        = CLK_HW_INIT("pll-ddr-base",
+                                             "hosc",
+                                             &ccu_nk_ops,
+                                             0),
+       },
+};
+
+static SUNXI_CCU_M(pll_ddr_clk, "pll-ddr", "pll-ddr-base", 0x020, 0, 2,
+                  CLK_IS_CRITICAL);
+
+static struct ccu_div pll_ddr_other_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(16, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+       .common         = {
+               .reg            = 0x020,
+               .hw.init        = CLK_HW_INIT("pll-ddr-other", "pll-ddr-base",
+                                             &ccu_div_ops,
+                                             0),
+       },
+};
+
+static struct ccu_nk pll_periph_clk = {
+       .enable         = BIT(31),
+       .n              = _SUNXI_CCU_MULT_OFFSET(8, 5, 0),
+       .k              = _SUNXI_CCU_MULT(4, 2),
+       .fixed_post_div = 2,
+       .common         = {
+               .reg            = 0x028,
+               .features       = CCU_FEATURE_FIXED_POSTDIV,
+               .hw.init        = CLK_HW_INIT("pll-periph",
+                                             "hosc",
+                                             &ccu_nk_ops,
+                                             0),
+       },
+};
+
+static struct ccu_mult pll_video1_clk = {
+       .enable         = BIT(31),
+       .mult           = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(0, 7, 0, 9, 127),
+       .frac           = _SUNXI_CCU_FRAC(BIT(15), BIT(14),
+                                 270000000, 297000000),
+       .common         = {
+               .reg            = 0x030,
+               .features       = (CCU_FEATURE_FRACTIONAL |
+                                  CCU_FEATURE_ALL_PREDIV),
+               .prediv         = 8,
+               .hw.init        = CLK_HW_INIT("pll-video1",
+                                             "hosc",
+                                             &ccu_mult_ops,
+                                             0),
+       },
+};
+
+static SUNXI_CCU_GATE(hosc_clk,        "hosc", "osc24M", 0x050, BIT(0), 0);
+
+#define SUN5I_AHB_REG  0x054
+static const char * const cpu_parents[] = { "osc32k", "hosc",
+                                           "pll-core" , "pll-periph" };
+static const struct ccu_mux_fixed_prediv cpu_predivs[] = {
+       { .index = 3, .div = 3, },
+};
+static struct ccu_mux cpu_clk = {
+       .mux            = {
+               .shift          = 16,
+               .width          = 2,
+               .fixed_predivs  = cpu_predivs,
+               .n_predivs      = ARRAY_SIZE(cpu_predivs),
+       },
+       .common         = {
+               .reg            = 0x054,
+               .features       = CCU_FEATURE_FIXED_PREDIV,
+               .hw.init        = CLK_HW_INIT_PARENTS("cpu",
+                                                     cpu_parents,
+                                                     &ccu_mux_ops,
+                                                     CLK_IS_CRITICAL),
+       }
+};
+
+static SUNXI_CCU_M(axi_clk, "axi", "cpu", 0x054, 0, 2, 0);
+
+static const char * const ahb_parents[] = { "axi" , "cpu", "pll-periph" };
+static const struct ccu_mux_fixed_prediv ahb_predivs[] = {
+       { .index = 2, .div = 2, },
+};
+static struct ccu_div ahb_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(4, 2, CLK_DIVIDER_POWER_OF_TWO),
+       .mux            = {
+               .shift          = 6,
+               .width          = 2,
+               .fixed_predivs  = ahb_predivs,
+               .n_predivs      = ARRAY_SIZE(ahb_predivs),
+       },
+
+       .common         = {
+               .reg            = 0x054,
+               .hw.init        = CLK_HW_INIT_PARENTS("ahb",
+                                                     ahb_parents,
+                                                     &ccu_div_ops,
+                                                     0),
+       },
+};
+
+static struct clk_div_table apb0_div_table[] = {
+       { .val = 0, .div = 2 },
+       { .val = 1, .div = 2 },
+       { .val = 2, .div = 4 },
+       { .val = 3, .div = 8 },
+       { /* Sentinel */ },
+};
+static SUNXI_CCU_DIV_TABLE(apb0_clk, "apb0", "ahb",
+                          0x054, 8, 2, apb0_div_table, 0);
+
+static const char * const apb1_parents[] = { "hosc", "pll-periph", "osc32k" };
+static SUNXI_CCU_MP_WITH_MUX(apb1_clk, "apb1", apb1_parents, 0x058,
+                            0, 5,      /* M */
+                            16, 2,     /* P */
+                            24, 2,     /* mux */
+                            0);
+
+static SUNXI_CCU_GATE(axi_dram_clk,    "axi-dram",     "axi",
+                     0x05c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(ahb_otg_clk,     "ahb-otg",      "ahb",
+                     0x060, BIT(0), 0);
+static SUNXI_CCU_GATE(ahb_ehci_clk,    "ahb-ehci",     "ahb",
+                     0x060, BIT(1), 0);
+static SUNXI_CCU_GATE(ahb_ohci_clk,    "ahb-ohci",     "ahb",
+                     0x060, BIT(2), 0);
+static SUNXI_CCU_GATE(ahb_ss_clk,      "ahb-ss",       "ahb",
+                     0x060, BIT(5), 0);
+static SUNXI_CCU_GATE(ahb_dma_clk,     "ahb-dma",      "ahb",
+                     0x060, BIT(6), 0);
+static SUNXI_CCU_GATE(ahb_bist_clk,    "ahb-bist",     "ahb",
+                     0x060, BIT(6), 0);
+static SUNXI_CCU_GATE(ahb_mmc0_clk,    "ahb-mmc0",     "ahb",
+                     0x060, BIT(8), 0);
+static SUNXI_CCU_GATE(ahb_mmc1_clk,    "ahb-mmc1",     "ahb",
+                     0x060, BIT(9), 0);
+static SUNXI_CCU_GATE(ahb_mmc2_clk,    "ahb-mmc2",     "ahb",
+                     0x060, BIT(10), 0);
+static SUNXI_CCU_GATE(ahb_nand_clk,    "ahb-nand",     "ahb",
+                     0x060, BIT(13), 0);
+static SUNXI_CCU_GATE(ahb_sdram_clk,   "ahb-sdram",    "ahb",
+                     0x060, BIT(14), CLK_IS_CRITICAL);
+static SUNXI_CCU_GATE(ahb_emac_clk,    "ahb-emac",     "ahb",
+                     0x060, BIT(17), 0);
+static SUNXI_CCU_GATE(ahb_ts_clk,      "ahb-ts",       "ahb",
+                     0x060, BIT(18), 0);
+static SUNXI_CCU_GATE(ahb_spi0_clk,    "ahb-spi0",     "ahb",
+                     0x060, BIT(20), 0);
+static SUNXI_CCU_GATE(ahb_spi1_clk,    "ahb-spi1",     "ahb",
+                     0x060, BIT(21), 0);
+static SUNXI_CCU_GATE(ahb_spi2_clk,    "ahb-spi2",     "ahb",
+                     0x060, BIT(22), 0);
+static SUNXI_CCU_GATE(ahb_gps_clk,     "ahb-gps",      "ahb",
+                     0x060, BIT(26), 0);
+static SUNXI_CCU_GATE(ahb_hstimer_clk, "ahb-hstimer",  "ahb",
+                     0x060, BIT(28), 0);
+
+static SUNXI_CCU_GATE(ahb_ve_clk,      "ahb-ve",       "ahb",
+                     0x064, BIT(0), 0);
+static SUNXI_CCU_GATE(ahb_tve_clk,     "ahb-tve",      "ahb",
+                     0x064, BIT(2), 0);
+static SUNXI_CCU_GATE(ahb_lcd_clk,     "ahb-lcd",      "ahb",
+                     0x064, BIT(4), 0);
+static SUNXI_CCU_GATE(ahb_csi_clk,     "ahb-csi",      "ahb",
+                     0x064, BIT(8), 0);
+static SUNXI_CCU_GATE(ahb_hdmi_clk,    "ahb-hdmi",     "ahb",
+                     0x064, BIT(11), 0);
+static SUNXI_CCU_GATE(ahb_de_be_clk,   "ahb-de-be",    "ahb",
+                     0x064, BIT(12), 0);
+static SUNXI_CCU_GATE(ahb_de_fe_clk,   "ahb-de-fe",    "ahb",
+                     0x064, BIT(14), 0);
+static SUNXI_CCU_GATE(ahb_iep_clk,     "ahb-iep",      "ahb",
+                     0x064, BIT(19), 0);
+static SUNXI_CCU_GATE(ahb_gpu_clk,     "ahb-gpu",      "ahb",
+                     0x064, BIT(20), 0);
+
+static SUNXI_CCU_GATE(apb0_codec_clk,  "apb0-codec",   "apb0",
+                     0x068, BIT(0), 0);
+static SUNXI_CCU_GATE(apb0_spdif_clk,  "apb0-spdif",   "apb0",
+                     0x068, BIT(1), 0);
+static SUNXI_CCU_GATE(apb0_i2s_clk,    "apb0-i2s",     "apb0",
+                     0x068, BIT(3), 0);
+static SUNXI_CCU_GATE(apb0_pio_clk,    "apb0-pio",     "apb0",
+                     0x068, BIT(5), 0);
+static SUNXI_CCU_GATE(apb0_ir_clk,     "apb0-ir",      "apb0",
+                     0x068, BIT(6), 0);
+static SUNXI_CCU_GATE(apb0_keypad_clk, "apb0-keypad",  "apb0",
+                     0x068, BIT(10), 0);
+
+static SUNXI_CCU_GATE(apb1_i2c0_clk,   "apb1-i2c0",    "apb1",
+                     0x06c, BIT(0), 0);
+static SUNXI_CCU_GATE(apb1_i2c1_clk,   "apb1-i2c1",    "apb1",
+                     0x06c, BIT(1), 0);
+static SUNXI_CCU_GATE(apb1_i2c2_clk,   "apb1-i2c2",    "apb1",
+                     0x06c, BIT(2), 0);
+static SUNXI_CCU_GATE(apb1_uart0_clk,  "apb1-uart0",   "apb1",
+                     0x06c, BIT(16), 0);
+static SUNXI_CCU_GATE(apb1_uart1_clk,  "apb1-uart1",   "apb1",
+                     0x06c, BIT(17), 0);
+static SUNXI_CCU_GATE(apb1_uart2_clk,  "apb1-uart2",   "apb1",
+                     0x06c, BIT(18), 0);
+static SUNXI_CCU_GATE(apb1_uart3_clk,  "apb1-uart3",   "apb1",
+                     0x06c, BIT(19), 0);
+
+static const char * const mod0_default_parents[] = { "hosc", "pll-periph",
+                                                    "pll-ddr-other" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand_clk, "nand", mod0_default_parents, 0x080,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc0_clk, "mmc0", mod0_default_parents, 0x088,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc1_clk, "mmc1", mod0_default_parents, 0x08c,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc2_clk, "mmc2", mod0_default_parents, 0x090,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(ts_clk, "ts", mod0_default_parents, 0x098,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(ss_clk, "ss", mod0_default_parents, 0x09c,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", mod0_default_parents, 0x0a0,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi2_clk, "spi2", mod0_default_parents, 0x0a8,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(ir_clk, "ir", mod0_default_parents, 0x0b0,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x",
+                                           "pll-audio-2x", "pll-audio" };
+static SUNXI_CCU_MUX_WITH_GATE(i2s_clk, "i2s", i2s_parents,
+                              0x0b8, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
+
+static const char * const spdif_parents[] = { "pll-audio-8x", "pll-audio-4x",
+                                           "pll-audio-2x", "pll-audio" };
+static SUNXI_CCU_MUX_WITH_GATE(spdif_clk, "spdif", spdif_parents,
+                              0x0c0, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
+
+static const char * const keypad_parents[] = { "hosc", "losc"};
+static const u8 keypad_table[] = { 0, 2 };
+static struct ccu_mp keypad_clk = {
+       .enable         = BIT(31),
+       .m              = _SUNXI_CCU_DIV(8, 5),
+       .p              = _SUNXI_CCU_DIV(20, 2),
+       .mux            = _SUNXI_CCU_MUX_TABLE(24, 2, keypad_table),
+
+       .common         = {
+               .reg            = 0x0c4,
+               .hw.init        = CLK_HW_INIT_PARENTS("keypad",
+                                                     keypad_parents,
+                                                     &ccu_mp_ops,
+                                                     0),
+       },
+};
+
+static SUNXI_CCU_GATE(usb_ohci_clk,    "usb-ohci",     "pll-periph",
+                     0x0cc, BIT(6), 0);
+static SUNXI_CCU_GATE(usb_phy0_clk,    "usb-phy0",     "pll-periph",
+                     0x0cc, BIT(8), 0);
+static SUNXI_CCU_GATE(usb_phy1_clk,    "usb-phy1",     "pll-periph",
+                     0x0cc, BIT(9), 0);
+
+static const char * const gps_parents[] = { "hosc", "pll-periph",
+                                           "pll-video1", "pll-ve" };
+static SUNXI_CCU_M_WITH_MUX_GATE(gps_clk, "gps", gps_parents,
+                                0x0d0, 0, 3, 24, 2, BIT(31), 0);
+
+static SUNXI_CCU_GATE(dram_ve_clk,     "dram-ve",      "pll-ddr",
+                     0x100, BIT(0), 0);
+static SUNXI_CCU_GATE(dram_csi_clk,    "dram-csi",     "pll-ddr",
+                     0x100, BIT(1), 0);
+static SUNXI_CCU_GATE(dram_ts_clk,     "dram-ts",      "pll-ddr",
+                     0x100, BIT(3), 0);
+static SUNXI_CCU_GATE(dram_tve_clk,    "dram-tve",     "pll-ddr",
+                     0x100, BIT(5), 0);
+static SUNXI_CCU_GATE(dram_de_fe_clk,  "dram-de-fe",   "pll-ddr",
+                     0x100, BIT(25), 0);
+static SUNXI_CCU_GATE(dram_de_be_clk,  "dram-de-be",   "pll-ddr",
+                     0x100, BIT(26), 0);
+static SUNXI_CCU_GATE(dram_ace_clk,    "dram-ace",     "pll-ddr",
+                     0x100, BIT(29), 0);
+static SUNXI_CCU_GATE(dram_iep_clk,    "dram-iep",     "pll-ddr",
+                     0x100, BIT(31), 0);
+
+static const char * const de_parents[] = { "pll-video0", "pll-video1",
+                                          "pll-ddr-other" };
+static SUNXI_CCU_M_WITH_MUX_GATE(de_be_clk, "de-be", de_parents,
+                                0x104, 0, 4, 24, 2, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(de_fe_clk, "de-fe", de_parents,
+                                0x10c, 0, 4, 24, 2, BIT(31), 0);
+
+static const char * const tcon_parents[] = { "pll-video0", "pll-video1",
+                                            "pll-video0-2x", "pll-video1-2x" };
+static SUNXI_CCU_MUX_WITH_GATE(tcon_ch0_clk, "tcon-ch0-sclk", tcon_parents,
+                              0x118, 24, 2, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(tcon_ch1_sclk2_clk, "tcon-ch1-sclk2",
+                                tcon_parents,
+                                0x12c, 0, 4, 24, 2, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_GATE(tcon_ch1_sclk1_clk, "tcon-ch1-sclk1", "tcon-ch1-sclk2",
+                            0x12c, 11, 1, BIT(15), CLK_SET_RATE_PARENT);
+
+static const char * const csi_parents[] = { "hosc", "pll-video0", "pll-video1",
+                                           "pll-video0-2x", "pll-video1-2x" };
+static const u8 csi_table[] = { 0, 1, 2, 5, 6 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi_clk, "csi",
+                                      csi_parents, csi_table,
+                                      0x134, 0, 5, 24, 2, BIT(31), 0);
+
+static SUNXI_CCU_GATE(ve_clk,          "ve",           "pll-ve",
+                     0x13c, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(codec_clk,       "codec",        "pll-audio",
+                     0x140, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(avs_clk,         "avs",          "hosc",
+                     0x144, BIT(31), 0);
+
+static const char * const hdmi_parents[] = { "pll-video0", "pll-video0-2x" };
+static const u8 hdmi_table[] = { 0, 2 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(hdmi_clk, "hdmi",
+                                      hdmi_parents, hdmi_table,
+                                      0x150, 0, 4, 24, 2, BIT(31),
+                                      CLK_SET_RATE_PARENT);
+
+static const char * const gpu_parents[] = { "pll-video0", "pll-ve",
+                                           "pll-ddr-other", "pll-video1",
+                                           "pll-video1-2x" };
+static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents,
+                                0x154, 0, 4, 24, 3, BIT(31), 0);
+
+static const char * const mbus_parents[] = { "hosc", "pll-periph", "pll-ddr" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(mbus_clk, "mbus", mbus_parents,
+                                 0x15c, 0, 4, 16, 2, 24, 2, BIT(31), CLK_IS_CRITICAL);
+
+static SUNXI_CCU_GATE(iep_clk,         "iep",          "de-be",
+                     0x160, BIT(31), 0);
+
+static struct ccu_common *sun5i_a10s_ccu_clks[] = {
+       &hosc_clk.common,
+       &pll_core_clk.common,
+       &pll_audio_base_clk.common,
+       &pll_video0_clk.common,
+       &pll_ve_clk.common,
+       &pll_ddr_base_clk.common,
+       &pll_ddr_clk.common,
+       &pll_ddr_other_clk.common,
+       &pll_periph_clk.common,
+       &pll_video1_clk.common,
+       &cpu_clk.common,
+       &axi_clk.common,
+       &ahb_clk.common,
+       &apb0_clk.common,
+       &apb1_clk.common,
+       &axi_dram_clk.common,
+       &ahb_otg_clk.common,
+       &ahb_ehci_clk.common,
+       &ahb_ohci_clk.common,
+       &ahb_ss_clk.common,
+       &ahb_dma_clk.common,
+       &ahb_bist_clk.common,
+       &ahb_mmc0_clk.common,
+       &ahb_mmc1_clk.common,
+       &ahb_mmc2_clk.common,
+       &ahb_nand_clk.common,
+       &ahb_sdram_clk.common,
+       &ahb_emac_clk.common,
+       &ahb_ts_clk.common,
+       &ahb_spi0_clk.common,
+       &ahb_spi1_clk.common,
+       &ahb_spi2_clk.common,
+       &ahb_gps_clk.common,
+       &ahb_hstimer_clk.common,
+       &ahb_ve_clk.common,
+       &ahb_tve_clk.common,
+       &ahb_lcd_clk.common,
+       &ahb_csi_clk.common,
+       &ahb_hdmi_clk.common,
+       &ahb_de_be_clk.common,
+       &ahb_de_fe_clk.common,
+       &ahb_iep_clk.common,
+       &ahb_gpu_clk.common,
+       &apb0_codec_clk.common,
+       &apb0_spdif_clk.common,
+       &apb0_i2s_clk.common,
+       &apb0_pio_clk.common,
+       &apb0_ir_clk.common,
+       &apb0_keypad_clk.common,
+       &apb1_i2c0_clk.common,
+       &apb1_i2c1_clk.common,
+       &apb1_i2c2_clk.common,
+       &apb1_uart0_clk.common,
+       &apb1_uart1_clk.common,
+       &apb1_uart2_clk.common,
+       &apb1_uart3_clk.common,
+       &nand_clk.common,
+       &mmc0_clk.common,
+       &mmc1_clk.common,
+       &mmc2_clk.common,
+       &ts_clk.common,
+       &ss_clk.common,
+       &spi0_clk.common,
+       &spi1_clk.common,
+       &spi2_clk.common,
+       &ir_clk.common,
+       &i2s_clk.common,
+       &spdif_clk.common,
+       &keypad_clk.common,
+       &usb_ohci_clk.common,
+       &usb_phy0_clk.common,
+       &usb_phy1_clk.common,
+       &gps_clk.common,
+       &dram_ve_clk.common,
+       &dram_csi_clk.common,
+       &dram_ts_clk.common,
+       &dram_tve_clk.common,
+       &dram_de_fe_clk.common,
+       &dram_de_be_clk.common,
+       &dram_ace_clk.common,
+       &dram_iep_clk.common,
+       &de_be_clk.common,
+       &de_fe_clk.common,
+       &tcon_ch0_clk.common,
+       &tcon_ch1_sclk2_clk.common,
+       &tcon_ch1_sclk1_clk.common,
+       &csi_clk.common,
+       &ve_clk.common,
+       &codec_clk.common,
+       &avs_clk.common,
+       &hdmi_clk.common,
+       &gpu_clk.common,
+       &mbus_clk.common,
+       &iep_clk.common,
+};
+
+/* We hardcode the divider to 4 for now */
+static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
+                       "pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_2x_clk, "pll-audio-2x",
+                       "pll-audio-base", 2, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_4x_clk, "pll-audio-4x",
+                       "pll-audio-base", 1, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_8x_clk, "pll-audio-8x",
+                       "pll-audio-base", 1, 2, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_video0_2x_clk, "pll-video0-2x",
+                       "pll-video0", 1, 2, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_video1_2x_clk, "pll-video1-2x",
+                       "pll-video1", 1, 2, CLK_SET_RATE_PARENT);
+
+static struct clk_hw_onecell_data sun5i_a10s_hw_clks = {
+       .hws    = {
+               [CLK_HOSC]              = &hosc_clk.common.hw,
+               [CLK_PLL_CORE]          = &pll_core_clk.common.hw,
+               [CLK_PLL_AUDIO_BASE]    = &pll_audio_base_clk.common.hw,
+               [CLK_PLL_AUDIO]         = &pll_audio_clk.hw,
+               [CLK_PLL_AUDIO_2X]      = &pll_audio_2x_clk.hw,
+               [CLK_PLL_AUDIO_4X]      = &pll_audio_4x_clk.hw,
+               [CLK_PLL_AUDIO_8X]      = &pll_audio_8x_clk.hw,
+               [CLK_PLL_VIDEO0]        = &pll_video0_clk.common.hw,
+               [CLK_PLL_VIDEO0_2X]     = &pll_video0_2x_clk.hw,
+               [CLK_PLL_VE]            = &pll_ve_clk.common.hw,
+               [CLK_PLL_DDR_BASE]      = &pll_ddr_base_clk.common.hw,
+               [CLK_PLL_DDR]           = &pll_ddr_clk.common.hw,
+               [CLK_PLL_DDR_OTHER]     = &pll_ddr_other_clk.common.hw,
+               [CLK_PLL_PERIPH]        = &pll_periph_clk.common.hw,
+               [CLK_PLL_VIDEO1]        = &pll_video1_clk.common.hw,
+               [CLK_PLL_VIDEO1_2X]     = &pll_video1_2x_clk.hw,
+               [CLK_CPU]               = &cpu_clk.common.hw,
+               [CLK_AXI]               = &axi_clk.common.hw,
+               [CLK_AHB]               = &ahb_clk.common.hw,
+               [CLK_APB0]              = &apb0_clk.common.hw,
+               [CLK_APB1]              = &apb1_clk.common.hw,
+               [CLK_DRAM_AXI]          = &axi_dram_clk.common.hw,
+               [CLK_AHB_OTG]           = &ahb_otg_clk.common.hw,
+               [CLK_AHB_EHCI]          = &ahb_ehci_clk.common.hw,
+               [CLK_AHB_OHCI]          = &ahb_ohci_clk.common.hw,
+               [CLK_AHB_SS]            = &ahb_ss_clk.common.hw,
+               [CLK_AHB_DMA]           = &ahb_dma_clk.common.hw,
+               [CLK_AHB_BIST]          = &ahb_bist_clk.common.hw,
+               [CLK_AHB_MMC0]          = &ahb_mmc0_clk.common.hw,
+               [CLK_AHB_MMC1]          = &ahb_mmc1_clk.common.hw,
+               [CLK_AHB_MMC2]          = &ahb_mmc2_clk.common.hw,
+               [CLK_AHB_NAND]          = &ahb_nand_clk.common.hw,
+               [CLK_AHB_SDRAM]         = &ahb_sdram_clk.common.hw,
+               [CLK_AHB_EMAC]          = &ahb_emac_clk.common.hw,
+               [CLK_AHB_TS]            = &ahb_ts_clk.common.hw,
+               [CLK_AHB_SPI0]          = &ahb_spi0_clk.common.hw,
+               [CLK_AHB_SPI1]          = &ahb_spi1_clk.common.hw,
+               [CLK_AHB_SPI2]          = &ahb_spi2_clk.common.hw,
+               [CLK_AHB_GPS]           = &ahb_gps_clk.common.hw,
+               [CLK_AHB_HSTIMER]       = &ahb_hstimer_clk.common.hw,
+               [CLK_AHB_VE]            = &ahb_ve_clk.common.hw,
+               [CLK_AHB_TVE]           = &ahb_tve_clk.common.hw,
+               [CLK_AHB_LCD]           = &ahb_lcd_clk.common.hw,
+               [CLK_AHB_CSI]           = &ahb_csi_clk.common.hw,
+               [CLK_AHB_HDMI]          = &ahb_hdmi_clk.common.hw,
+               [CLK_AHB_DE_BE]         = &ahb_de_be_clk.common.hw,
+               [CLK_AHB_DE_FE]         = &ahb_de_fe_clk.common.hw,
+               [CLK_AHB_IEP]           = &ahb_iep_clk.common.hw,
+               [CLK_AHB_GPU]           = &ahb_gpu_clk.common.hw,
+               [CLK_APB0_CODEC]        = &apb0_codec_clk.common.hw,
+               [CLK_APB0_I2S]          = &apb0_i2s_clk.common.hw,
+               [CLK_APB0_PIO]          = &apb0_pio_clk.common.hw,
+               [CLK_APB0_IR]           = &apb0_ir_clk.common.hw,
+               [CLK_APB0_KEYPAD]       = &apb0_keypad_clk.common.hw,
+               [CLK_APB1_I2C0]         = &apb1_i2c0_clk.common.hw,
+               [CLK_APB1_I2C1]         = &apb1_i2c1_clk.common.hw,
+               [CLK_APB1_I2C2]         = &apb1_i2c2_clk.common.hw,
+               [CLK_APB1_UART0]        = &apb1_uart0_clk.common.hw,
+               [CLK_APB1_UART1]        = &apb1_uart1_clk.common.hw,
+               [CLK_APB1_UART2]        = &apb1_uart2_clk.common.hw,
+               [CLK_APB1_UART3]        = &apb1_uart3_clk.common.hw,
+               [CLK_NAND]              = &nand_clk.common.hw,
+               [CLK_MMC0]              = &mmc0_clk.common.hw,
+               [CLK_MMC1]              = &mmc1_clk.common.hw,
+               [CLK_MMC2]              = &mmc2_clk.common.hw,
+               [CLK_TS]                = &ts_clk.common.hw,
+               [CLK_SS]                = &ss_clk.common.hw,
+               [CLK_SPI0]              = &spi0_clk.common.hw,
+               [CLK_SPI1]              = &spi1_clk.common.hw,
+               [CLK_SPI2]              = &spi2_clk.common.hw,
+               [CLK_IR]                = &ir_clk.common.hw,
+               [CLK_I2S]               = &i2s_clk.common.hw,
+               [CLK_KEYPAD]            = &keypad_clk.common.hw,
+               [CLK_USB_OHCI]          = &usb_ohci_clk.common.hw,
+               [CLK_USB_PHY0]          = &usb_phy0_clk.common.hw,
+               [CLK_USB_PHY1]          = &usb_phy1_clk.common.hw,
+               [CLK_GPS]               = &gps_clk.common.hw,
+               [CLK_DRAM_VE]           = &dram_ve_clk.common.hw,
+               [CLK_DRAM_CSI]          = &dram_csi_clk.common.hw,
+               [CLK_DRAM_TS]           = &dram_ts_clk.common.hw,
+               [CLK_DRAM_TVE]          = &dram_tve_clk.common.hw,
+               [CLK_DRAM_DE_FE]        = &dram_de_fe_clk.common.hw,
+               [CLK_DRAM_DE_BE]        = &dram_de_be_clk.common.hw,
+               [CLK_DRAM_ACE]          = &dram_ace_clk.common.hw,
+               [CLK_DRAM_IEP]          = &dram_iep_clk.common.hw,
+               [CLK_DE_BE]             = &de_be_clk.common.hw,
+               [CLK_DE_FE]             = &de_fe_clk.common.hw,
+               [CLK_TCON_CH0]          = &tcon_ch0_clk.common.hw,
+               [CLK_TCON_CH1_SCLK]     = &tcon_ch1_sclk2_clk.common.hw,
+               [CLK_TCON_CH1]          = &tcon_ch1_sclk1_clk.common.hw,
+               [CLK_CSI]               = &csi_clk.common.hw,
+               [CLK_VE]                = &ve_clk.common.hw,
+               [CLK_CODEC]             = &codec_clk.common.hw,
+               [CLK_AVS]               = &avs_clk.common.hw,
+               [CLK_HDMI]              = &hdmi_clk.common.hw,
+               [CLK_GPU]               = &gpu_clk.common.hw,
+               [CLK_MBUS]              = &mbus_clk.common.hw,
+               [CLK_IEP]               = &iep_clk.common.hw,
+       },
+       .num    = CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun5i_a10s_ccu_resets[] = {
+       [RST_USB_PHY0]          =  { 0x0cc, BIT(0) },
+       [RST_USB_PHY1]          =  { 0x0cc, BIT(1) },
+
+       [RST_GPS]               =  { 0x0d0, BIT(30) },
+
+       [RST_DE_BE]             =  { 0x104, BIT(30) },
+
+       [RST_DE_FE]             =  { 0x10c, BIT(30) },
+
+       [RST_TVE]               =  { 0x118, BIT(29) },
+       [RST_LCD]               =  { 0x118, BIT(30) },
+
+       [RST_CSI]               =  { 0x134, BIT(30) },
+
+       [RST_VE]                =  { 0x13c, BIT(0) },
+
+       [RST_GPU]               =  { 0x154, BIT(30) },
+
+       [RST_IEP]               =  { 0x160, BIT(30) },
+};
+
+static const struct sunxi_ccu_desc sun5i_a10s_ccu_desc = {
+       .ccu_clks       = sun5i_a10s_ccu_clks,
+       .num_ccu_clks   = ARRAY_SIZE(sun5i_a10s_ccu_clks),
+
+       .hw_clks        = &sun5i_a10s_hw_clks,
+
+       .resets         = sun5i_a10s_ccu_resets,
+       .num_resets     = ARRAY_SIZE(sun5i_a10s_ccu_resets),
+};
+
+/*
+ * The A13 is the A10s minus the TS, GPS, HDMI, I2S and the keypad
+ */
+static struct clk_hw_onecell_data sun5i_a13_hw_clks = {
+       .hws    = {
+               [CLK_HOSC]              = &hosc_clk.common.hw,
+               [CLK_PLL_CORE]          = &pll_core_clk.common.hw,
+               [CLK_PLL_AUDIO_BASE]    = &pll_audio_base_clk.common.hw,
+               [CLK_PLL_AUDIO]         = &pll_audio_clk.hw,
+               [CLK_PLL_AUDIO_2X]      = &pll_audio_2x_clk.hw,
+               [CLK_PLL_AUDIO_4X]      = &pll_audio_4x_clk.hw,
+               [CLK_PLL_AUDIO_8X]      = &pll_audio_8x_clk.hw,
+               [CLK_PLL_VIDEO0]        = &pll_video0_clk.common.hw,
+               [CLK_PLL_VIDEO0_2X]     = &pll_video0_2x_clk.hw,
+               [CLK_PLL_VE]            = &pll_ve_clk.common.hw,
+               [CLK_PLL_DDR_BASE]      = &pll_ddr_base_clk.common.hw,
+               [CLK_PLL_DDR]           = &pll_ddr_clk.common.hw,
+               [CLK_PLL_DDR_OTHER]     = &pll_ddr_other_clk.common.hw,
+               [CLK_PLL_PERIPH]        = &pll_periph_clk.common.hw,
+               [CLK_PLL_VIDEO1]        = &pll_video1_clk.common.hw,
+               [CLK_PLL_VIDEO1_2X]     = &pll_video1_2x_clk.hw,
+               [CLK_CPU]               = &cpu_clk.common.hw,
+               [CLK_AXI]               = &axi_clk.common.hw,
+               [CLK_AHB]               = &ahb_clk.common.hw,
+               [CLK_APB0]              = &apb0_clk.common.hw,
+               [CLK_APB1]              = &apb1_clk.common.hw,
+               [CLK_DRAM_AXI]          = &axi_dram_clk.common.hw,
+               [CLK_AHB_OTG]           = &ahb_otg_clk.common.hw,
+               [CLK_AHB_EHCI]          = &ahb_ehci_clk.common.hw,
+               [CLK_AHB_OHCI]          = &ahb_ohci_clk.common.hw,
+               [CLK_AHB_SS]            = &ahb_ss_clk.common.hw,
+               [CLK_AHB_DMA]           = &ahb_dma_clk.common.hw,
+               [CLK_AHB_BIST]          = &ahb_bist_clk.common.hw,
+               [CLK_AHB_MMC0]          = &ahb_mmc0_clk.common.hw,
+               [CLK_AHB_MMC1]          = &ahb_mmc1_clk.common.hw,
+               [CLK_AHB_MMC2]          = &ahb_mmc2_clk.common.hw,
+               [CLK_AHB_NAND]          = &ahb_nand_clk.common.hw,
+               [CLK_AHB_SDRAM]         = &ahb_sdram_clk.common.hw,
+               [CLK_AHB_EMAC]          = &ahb_emac_clk.common.hw,
+               [CLK_AHB_SPI0]          = &ahb_spi0_clk.common.hw,
+               [CLK_AHB_SPI1]          = &ahb_spi1_clk.common.hw,
+               [CLK_AHB_SPI2]          = &ahb_spi2_clk.common.hw,
+               [CLK_AHB_HSTIMER]       = &ahb_hstimer_clk.common.hw,
+               [CLK_AHB_VE]            = &ahb_ve_clk.common.hw,
+               [CLK_AHB_TVE]           = &ahb_tve_clk.common.hw,
+               [CLK_AHB_LCD]           = &ahb_lcd_clk.common.hw,
+               [CLK_AHB_CSI]           = &ahb_csi_clk.common.hw,
+               [CLK_AHB_DE_BE]         = &ahb_de_be_clk.common.hw,
+               [CLK_AHB_DE_FE]         = &ahb_de_fe_clk.common.hw,
+               [CLK_AHB_IEP]           = &ahb_iep_clk.common.hw,
+               [CLK_AHB_GPU]           = &ahb_gpu_clk.common.hw,
+               [CLK_APB0_CODEC]        = &apb0_codec_clk.common.hw,
+               [CLK_APB0_PIO]          = &apb0_pio_clk.common.hw,
+               [CLK_APB0_IR]           = &apb0_ir_clk.common.hw,
+               [CLK_APB1_I2C0]         = &apb1_i2c0_clk.common.hw,
+               [CLK_APB1_I2C1]         = &apb1_i2c1_clk.common.hw,
+               [CLK_APB1_I2C2]         = &apb1_i2c2_clk.common.hw,
+               [CLK_APB1_UART0]        = &apb1_uart0_clk.common.hw,
+               [CLK_APB1_UART1]        = &apb1_uart1_clk.common.hw,
+               [CLK_APB1_UART2]        = &apb1_uart2_clk.common.hw,
+               [CLK_APB1_UART3]        = &apb1_uart3_clk.common.hw,
+               [CLK_NAND]              = &nand_clk.common.hw,
+               [CLK_MMC0]              = &mmc0_clk.common.hw,
+               [CLK_MMC1]              = &mmc1_clk.common.hw,
+               [CLK_MMC2]              = &mmc2_clk.common.hw,
+               [CLK_SS]                = &ss_clk.common.hw,
+               [CLK_SPI0]              = &spi0_clk.common.hw,
+               [CLK_SPI1]              = &spi1_clk.common.hw,
+               [CLK_SPI2]              = &spi2_clk.common.hw,
+               [CLK_IR]                = &ir_clk.common.hw,
+               [CLK_USB_OHCI]          = &usb_ohci_clk.common.hw,
+               [CLK_USB_PHY0]          = &usb_phy0_clk.common.hw,
+               [CLK_USB_PHY1]          = &usb_phy1_clk.common.hw,
+               [CLK_DRAM_VE]           = &dram_ve_clk.common.hw,
+               [CLK_DRAM_CSI]          = &dram_csi_clk.common.hw,
+               [CLK_DRAM_TVE]          = &dram_tve_clk.common.hw,
+               [CLK_DRAM_DE_FE]        = &dram_de_fe_clk.common.hw,
+               [CLK_DRAM_DE_BE]        = &dram_de_be_clk.common.hw,
+               [CLK_DRAM_ACE]          = &dram_ace_clk.common.hw,
+               [CLK_DRAM_IEP]          = &dram_iep_clk.common.hw,
+               [CLK_DE_BE]             = &de_be_clk.common.hw,
+               [CLK_DE_FE]             = &de_fe_clk.common.hw,
+               [CLK_TCON_CH0]          = &tcon_ch0_clk.common.hw,
+               [CLK_TCON_CH1_SCLK]     = &tcon_ch1_sclk2_clk.common.hw,
+               [CLK_TCON_CH1]          = &tcon_ch1_sclk1_clk.common.hw,
+               [CLK_CSI]               = &csi_clk.common.hw,
+               [CLK_VE]                = &ve_clk.common.hw,
+               [CLK_CODEC]             = &codec_clk.common.hw,
+               [CLK_AVS]               = &avs_clk.common.hw,
+               [CLK_GPU]               = &gpu_clk.common.hw,
+               [CLK_MBUS]              = &mbus_clk.common.hw,
+               [CLK_IEP]               = &iep_clk.common.hw,
+       },
+       .num    = CLK_NUMBER,
+};
+
+static const struct sunxi_ccu_desc sun5i_a13_ccu_desc = {
+       .ccu_clks       = sun5i_a10s_ccu_clks,
+       .num_ccu_clks   = ARRAY_SIZE(sun5i_a10s_ccu_clks),
+
+       .hw_clks        = &sun5i_a13_hw_clks,
+
+       .resets         = sun5i_a10s_ccu_resets,
+       .num_resets     = ARRAY_SIZE(sun5i_a10s_ccu_resets),
+};
+
+/*
+ * The GR8 is the A10s CCU minus the HDMI and keypad, plus SPDIF
+ */
+static struct clk_hw_onecell_data sun5i_gr8_hw_clks = {
+       .hws    = {
+               [CLK_HOSC]              = &hosc_clk.common.hw,
+               [CLK_PLL_CORE]          = &pll_core_clk.common.hw,
+               [CLK_PLL_AUDIO_BASE]    = &pll_audio_base_clk.common.hw,
+               [CLK_PLL_AUDIO]         = &pll_audio_clk.hw,
+               [CLK_PLL_AUDIO_2X]      = &pll_audio_2x_clk.hw,
+               [CLK_PLL_AUDIO_4X]      = &pll_audio_4x_clk.hw,
+               [CLK_PLL_AUDIO_8X]      = &pll_audio_8x_clk.hw,
+               [CLK_PLL_VIDEO0]        = &pll_video0_clk.common.hw,
+               [CLK_PLL_VIDEO0_2X]     = &pll_video0_2x_clk.hw,
+               [CLK_PLL_VE]            = &pll_ve_clk.common.hw,
+               [CLK_PLL_DDR_BASE]      = &pll_ddr_base_clk.common.hw,
+               [CLK_PLL_DDR]           = &pll_ddr_clk.common.hw,
+               [CLK_PLL_DDR_OTHER]     = &pll_ddr_other_clk.common.hw,
+               [CLK_PLL_PERIPH]        = &pll_periph_clk.common.hw,
+               [CLK_PLL_VIDEO1]        = &pll_video1_clk.common.hw,
+               [CLK_PLL_VIDEO1_2X]     = &pll_video1_2x_clk.hw,
+               [CLK_CPU]               = &cpu_clk.common.hw,
+               [CLK_AXI]               = &axi_clk.common.hw,
+               [CLK_AHB]               = &ahb_clk.common.hw,
+               [CLK_APB0]              = &apb0_clk.common.hw,
+               [CLK_APB1]              = &apb1_clk.common.hw,
+               [CLK_DRAM_AXI]          = &axi_dram_clk.common.hw,
+               [CLK_AHB_OTG]           = &ahb_otg_clk.common.hw,
+               [CLK_AHB_EHCI]          = &ahb_ehci_clk.common.hw,
+               [CLK_AHB_OHCI]          = &ahb_ohci_clk.common.hw,
+               [CLK_AHB_SS]            = &ahb_ss_clk.common.hw,
+               [CLK_AHB_DMA]           = &ahb_dma_clk.common.hw,
+               [CLK_AHB_BIST]          = &ahb_bist_clk.common.hw,
+               [CLK_AHB_MMC0]          = &ahb_mmc0_clk.common.hw,
+               [CLK_AHB_MMC1]          = &ahb_mmc1_clk.common.hw,
+               [CLK_AHB_MMC2]          = &ahb_mmc2_clk.common.hw,
+               [CLK_AHB_NAND]          = &ahb_nand_clk.common.hw,
+               [CLK_AHB_SDRAM]         = &ahb_sdram_clk.common.hw,
+               [CLK_AHB_EMAC]          = &ahb_emac_clk.common.hw,
+               [CLK_AHB_TS]            = &ahb_ts_clk.common.hw,
+               [CLK_AHB_SPI0]          = &ahb_spi0_clk.common.hw,
+               [CLK_AHB_SPI1]          = &ahb_spi1_clk.common.hw,
+               [CLK_AHB_SPI2]          = &ahb_spi2_clk.common.hw,
+               [CLK_AHB_GPS]           = &ahb_gps_clk.common.hw,
+               [CLK_AHB_HSTIMER]       = &ahb_hstimer_clk.common.hw,
+               [CLK_AHB_VE]            = &ahb_ve_clk.common.hw,
+               [CLK_AHB_TVE]           = &ahb_tve_clk.common.hw,
+               [CLK_AHB_LCD]           = &ahb_lcd_clk.common.hw,
+               [CLK_AHB_CSI]           = &ahb_csi_clk.common.hw,
+               [CLK_AHB_DE_BE]         = &ahb_de_be_clk.common.hw,
+               [CLK_AHB_DE_FE]         = &ahb_de_fe_clk.common.hw,
+               [CLK_AHB_IEP]           = &ahb_iep_clk.common.hw,
+               [CLK_AHB_GPU]           = &ahb_gpu_clk.common.hw,
+               [CLK_APB0_CODEC]        = &apb0_codec_clk.common.hw,
+               [CLK_APB0_SPDIF]        = &apb0_spdif_clk.common.hw,
+               [CLK_APB0_I2S]          = &apb0_i2s_clk.common.hw,
+               [CLK_APB0_PIO]          = &apb0_pio_clk.common.hw,
+               [CLK_APB0_IR]           = &apb0_ir_clk.common.hw,
+               [CLK_APB1_I2C0]         = &apb1_i2c0_clk.common.hw,
+               [CLK_APB1_I2C1]         = &apb1_i2c1_clk.common.hw,
+               [CLK_APB1_I2C2]         = &apb1_i2c2_clk.common.hw,
+               [CLK_APB1_UART0]        = &apb1_uart0_clk.common.hw,
+               [CLK_APB1_UART1]        = &apb1_uart1_clk.common.hw,
+               [CLK_APB1_UART2]        = &apb1_uart2_clk.common.hw,
+               [CLK_APB1_UART3]        = &apb1_uart3_clk.common.hw,
+               [CLK_NAND]              = &nand_clk.common.hw,
+               [CLK_MMC0]              = &mmc0_clk.common.hw,
+               [CLK_MMC1]              = &mmc1_clk.common.hw,
+               [CLK_MMC2]              = &mmc2_clk.common.hw,
+               [CLK_TS]                = &ts_clk.common.hw,
+               [CLK_SS]                = &ss_clk.common.hw,
+               [CLK_SPI0]              = &spi0_clk.common.hw,
+               [CLK_SPI1]              = &spi1_clk.common.hw,
+               [CLK_SPI2]              = &spi2_clk.common.hw,
+               [CLK_IR]                = &ir_clk.common.hw,
+               [CLK_I2S]               = &i2s_clk.common.hw,
+               [CLK_SPDIF]             = &spdif_clk.common.hw,
+               [CLK_USB_OHCI]          = &usb_ohci_clk.common.hw,
+               [CLK_USB_PHY0]          = &usb_phy0_clk.common.hw,
+               [CLK_USB_PHY1]          = &usb_phy1_clk.common.hw,
+               [CLK_GPS]               = &gps_clk.common.hw,
+               [CLK_DRAM_VE]           = &dram_ve_clk.common.hw,
+               [CLK_DRAM_CSI]          = &dram_csi_clk.common.hw,
+               [CLK_DRAM_TS]           = &dram_ts_clk.common.hw,
+               [CLK_DRAM_TVE]          = &dram_tve_clk.common.hw,
+               [CLK_DRAM_DE_FE]        = &dram_de_fe_clk.common.hw,
+               [CLK_DRAM_DE_BE]        = &dram_de_be_clk.common.hw,
+               [CLK_DRAM_ACE]          = &dram_ace_clk.common.hw,
+               [CLK_DRAM_IEP]          = &dram_iep_clk.common.hw,
+               [CLK_DE_BE]             = &de_be_clk.common.hw,
+               [CLK_DE_FE]             = &de_fe_clk.common.hw,
+               [CLK_TCON_CH0]          = &tcon_ch0_clk.common.hw,
+               [CLK_TCON_CH1_SCLK]     = &tcon_ch1_sclk2_clk.common.hw,
+               [CLK_TCON_CH1]          = &tcon_ch1_sclk1_clk.common.hw,
+               [CLK_CSI]               = &csi_clk.common.hw,
+               [CLK_VE]                = &ve_clk.common.hw,
+               [CLK_CODEC]             = &codec_clk.common.hw,
+               [CLK_AVS]               = &avs_clk.common.hw,
+               [CLK_GPU]               = &gpu_clk.common.hw,
+               [CLK_MBUS]              = &mbus_clk.common.hw,
+               [CLK_IEP]               = &iep_clk.common.hw,
+       },
+       .num    = CLK_NUMBER,
+};
+
+static const struct sunxi_ccu_desc sun5i_gr8_ccu_desc = {
+       .ccu_clks       = sun5i_a10s_ccu_clks,
+       .num_ccu_clks   = ARRAY_SIZE(sun5i_a10s_ccu_clks),
+
+       .hw_clks        = &sun5i_gr8_hw_clks,
+
+       .resets         = sun5i_a10s_ccu_resets,
+       .num_resets     = ARRAY_SIZE(sun5i_a10s_ccu_resets),
+};
+
+static void __init sun5i_ccu_init(struct device_node *node,
+                                 const struct sunxi_ccu_desc *desc)
+{
+       void __iomem *reg;
+       u32 val;
+
+       reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+       if (IS_ERR(reg)) {
+               pr_err("%s: Could not map the clock registers\n",
+                      of_node_full_name(node));
+               return;
+       }
+
+       /* Force the PLL-Audio-1x divider to 4 */
+       val = readl(reg + SUN5I_PLL_AUDIO_REG);
+       val &= ~GENMASK(19, 16);
+       writel(val | (3 << 16), reg + SUN5I_PLL_AUDIO_REG);
+
+       /*
+        * Use the peripheral PLL as the AHB parent, instead of CPU /
+        * AXI which have rate changes due to cpufreq.
+        *
+        * This is especially a big deal for the HS timer whose parent
+        * clock is AHB.
+        */
+       val = readl(reg + SUN5I_AHB_REG);
+       val &= ~GENMASK(7, 6);
+       writel(val | (2 << 6), reg + SUN5I_AHB_REG);
+
+       sunxi_ccu_probe(node, reg, desc);
+}
+
+static void __init sun5i_a10s_ccu_setup(struct device_node *node)
+{
+       sun5i_ccu_init(node, &sun5i_a10s_ccu_desc);
+}
+CLK_OF_DECLARE(sun5i_a10s_ccu, "allwinner,sun5i-a10s-ccu",
+              sun5i_a10s_ccu_setup);
+
+static void __init sun5i_a13_ccu_setup(struct device_node *node)
+{
+       sun5i_ccu_init(node, &sun5i_a13_ccu_desc);
+}
+CLK_OF_DECLARE(sun5i_a13_ccu, "allwinner,sun5i-a13-ccu",
+              sun5i_a13_ccu_setup);
+
+static void __init sun5i_gr8_ccu_setup(struct device_node *node)
+{
+       sun5i_ccu_init(node, &sun5i_gr8_ccu_desc);
+}
+CLK_OF_DECLARE(sun5i_gr8_ccu, "nextthing,gr8-ccu",
+              sun5i_gr8_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.h b/drivers/clk/sunxi-ng/ccu-sun5i.h
new file mode 100644 (file)
index 0000000..8144487
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN5I_H_
+#define _CCU_SUN5I_H_
+
+#include <dt-bindings/clock/sun5i-ccu.h>
+#include <dt-bindings/reset/sun5i-ccu.h>
+
+/* The HOSC is exported */
+#define CLK_PLL_CORE           2
+#define CLK_PLL_AUDIO_BASE     3
+#define CLK_PLL_AUDIO          4
+#define CLK_PLL_AUDIO_2X       5
+#define CLK_PLL_AUDIO_4X       6
+#define CLK_PLL_AUDIO_8X       7
+#define CLK_PLL_VIDEO0         8
+#define CLK_PLL_VIDEO0_2X      9
+#define CLK_PLL_VE             10
+#define CLK_PLL_DDR_BASE       11
+#define CLK_PLL_DDR            12
+#define CLK_PLL_DDR_OTHER      13
+#define CLK_PLL_PERIPH         14
+#define CLK_PLL_VIDEO1         15
+#define CLK_PLL_VIDEO1_2X      16
+
+/* The CPU clock is exported */
+
+#define CLK_AXI                        18
+#define CLK_AHB                        19
+#define CLK_APB0               20
+#define CLK_APB1               21
+#define CLK_DRAM_AXI           22
+
+/* AHB gates are exported */
+/* APB0 gates are exported */
+/* APB1 gates are exported */
+/* Modules clocks are exported */
+/* USB clocks are exported */
+/* GPS clock is exported */
+/* DRAM gates are exported */
+/* More display modules clocks are exported */
+
+#define CLK_TCON_CH1_SCLK      91
+
+/* The rest of the module clocks are exported */
+
+#define CLK_MBUS               99
+
+/* And finally the IEP clock */
+
+#define CLK_NUMBER             (CLK_IEP + 1)
+
+#endif /* _CCU_SUN5I_H_ */
index fc75a335a7ce12480f12f41a95091417466b6a92..4c9a920ff4ab7c351d59333d131d039f5e36f40a 100644 (file)
@@ -468,8 +468,8 @@ static SUNXI_CCU_MUX_WITH_GATE(daudio0_clk, "daudio0", daudio_parents,
 static SUNXI_CCU_MUX_WITH_GATE(daudio1_clk, "daudio1", daudio_parents,
                               0x0b4, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
 
-static SUNXI_CCU_M_WITH_GATE(spdif_clk, "spdif", "pll-audio",
-                            0x0c0, 0, 4, BIT(31), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_MUX_WITH_GATE(spdif_clk, "spdif", daudio_parents,
+                              0x0c0, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
 
 static SUNXI_CCU_GATE(usb_phy0_clk,    "usb-phy0",     "osc24M",
                      0x0cc, BIT(8), 0);
index 9bd1f78a05471955890e41da51e1b81811c00d64..a7b3c08ed0e232c0cf41ae419f4980bf1614590e 100644 (file)
@@ -170,7 +170,7 @@ static SUNXI_CCU_N_WITH_GATE_LOCK(pll_ddr1_clk, "pll-ddr1",
 static const char * const cpux_parents[] = { "osc32k", "osc24M",
                                             "pll-cpux" , "pll-cpux" };
 static SUNXI_CCU_MUX(cpux_clk, "cpux", cpux_parents,
-                    0x050, 16, 2, CLK_IS_CRITICAL);
+                    0x050, 16, 2, CLK_IS_CRITICAL | CLK_SET_RATE_PARENT);
 
 static SUNXI_CCU_M(axi_clk, "axi", "cpux", 0x050, 0, 2, 0);
 
@@ -440,7 +440,7 @@ static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve",
                             0x13c, 16, 3, BIT(31), CLK_SET_RATE_PARENT);
 
 static SUNXI_CCU_GATE(ac_dig_clk,      "ac-dig",       "pll-audio",
-                     0x140, BIT(31), 0);
+                     0x140, BIT(31), CLK_SET_RATE_PARENT);
 static SUNXI_CCU_GATE(ac_dig_4x_clk,   "ac-dig-4x",    "pll-audio-4x",
                      0x140, BIT(30), 0);
 static SUNXI_CCU_GATE(avs_clk,         "avs",          "osc24M",
@@ -468,7 +468,7 @@ static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(drc_clk, "drc",
                                       0x180, 0, 4, 24, 3, BIT(31), 0);
 
 static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu",
-                            0x1a0, 0, 3, BIT(31), 0);
+                            0x1a0, 0, 3, BIT(31), CLK_SET_RATE_PARENT);
 
 static const char * const ats_parents[] = { "osc24M", "pll-periph" };
 static SUNXI_CCU_M_WITH_MUX_GATE(ats_clk, "ats", ats_parents,
@@ -752,6 +752,13 @@ static const struct sunxi_ccu_desc sun8i_a33_ccu_desc = {
        .num_resets     = ARRAY_SIZE(sun8i_a33_ccu_resets),
 };
 
+static struct ccu_mux_nb sun8i_a33_cpu_nb = {
+       .common         = &cpux_clk.common,
+       .cm             = &cpux_clk.mux,
+       .delay_us       = 1, /* > 8 clock cycles at 24 MHz */
+       .bypass_index   = 1, /* index of 24 MHz oscillator */
+};
+
 static void __init sun8i_a33_ccu_setup(struct device_node *node)
 {
        void __iomem *reg;
@@ -775,6 +782,9 @@ static void __init sun8i_a33_ccu_setup(struct device_node *node)
        writel(val, reg + SUN8I_A33_PLL_MIPI_REG);
 
        sunxi_ccu_probe(node, reg, &sun8i_a33_ccu_desc);
+
+       ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk,
+                                 &sun8i_a33_cpu_nb);
 }
 CLK_OF_DECLARE(sun8i_a33_ccu, "allwinner,sun8i-a33-ccu",
               sun8i_a33_ccu_setup);
index 21c427d86f289b2a588cc41190525c408e2be3bf..a26c8a19fe93a2b8f1f768b414cb678f364d5542 100644 (file)
@@ -803,6 +803,13 @@ static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = {
        .num_resets     = ARRAY_SIZE(sun8i_h3_ccu_resets),
 };
 
+static struct ccu_mux_nb sun8i_h3_cpu_nb = {
+       .common         = &cpux_clk.common,
+       .cm             = &cpux_clk.mux,
+       .delay_us       = 1, /* > 8 clock cycles at 24 MHz */
+       .bypass_index   = 1, /* index of 24 MHz oscillator */
+};
+
 static void __init sun8i_h3_ccu_setup(struct device_node *node)
 {
        void __iomem *reg;
@@ -821,6 +828,9 @@ static void __init sun8i_h3_ccu_setup(struct device_node *node)
        writel(val | (3 << 16), reg + SUN8I_H3_PLL_AUDIO_REG);
 
        sunxi_ccu_probe(node, reg, &sun8i_h3_ccu_desc);
+
+       ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk,
+                                 &sun8i_h3_cpu_nb);
 }
 CLK_OF_DECLARE(sun8i_h3_ccu, "allwinner,sun8i-h3-ccu",
               sun8i_h3_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
new file mode 100644 (file)
index 0000000..e58706b
--- /dev/null
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * Based on ccu-sun8i-h3.c, which is:
+ * Copyright (c) 2016 Maxime Ripard. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_mult.h"
+#include "ccu_nk.h"
+#include "ccu_nkm.h"
+#include "ccu_nkmp.h"
+#include "ccu_nm.h"
+#include "ccu_phase.h"
+
+#include "ccu-sun8i-v3s.h"
+
+static SUNXI_CCU_NKMP_WITH_GATE_LOCK(pll_cpu_clk, "pll-cpu",
+                                    "osc24M", 0x000,
+                                    8, 5,      /* N */
+                                    4, 2,      /* K */
+                                    0, 2,      /* M */
+                                    16, 2,     /* P */
+                                    BIT(31),   /* gate */
+                                    BIT(28),   /* lock */
+                                    0);
+
+/*
+ * The Audio PLL is supposed to have 4 outputs: 3 fixed factors from
+ * the base (2x, 4x and 8x), and one variable divider (the one true
+ * pll audio).
+ *
+ * We don't have any need for the variable divider for now, so we just
+ * hardcode it to match with the clock names
+ */
+#define SUN8I_V3S_PLL_AUDIO_REG        0x008
+
+static SUNXI_CCU_NM_WITH_GATE_LOCK(pll_audio_base_clk, "pll-audio-base",
+                                  "osc24M", 0x008,
+                                  8, 7,        /* N */
+                                  0, 5,        /* M */
+                                  BIT(31),     /* gate */
+                                  BIT(28),     /* lock */
+                                  0);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_video_clk, "pll-video",
+                                       "osc24M", 0x0010,
+                                       8, 7,           /* N */
+                                       0, 4,           /* M */
+                                       BIT(24),        /* frac enable */
+                                       BIT(25),        /* frac select */
+                                       270000000,      /* frac rate 0 */
+                                       297000000,      /* frac rate 1 */
+                                       BIT(31),        /* gate */
+                                       BIT(28),        /* lock */
+                                       0);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve",
+                                       "osc24M", 0x0018,
+                                       8, 7,           /* N */
+                                       0, 4,           /* M */
+                                       BIT(24),        /* frac enable */
+                                       BIT(25),        /* frac select */
+                                       270000000,      /* frac rate 0 */
+                                       297000000,      /* frac rate 1 */
+                                       BIT(31),        /* gate */
+                                       BIT(28),        /* lock */
+                                       0);
+
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr_clk, "pll-ddr",
+                                   "osc24M", 0x020,
+                                   8, 5,       /* N */
+                                   4, 2,       /* K */
+                                   0, 2,       /* M */
+                                   BIT(31),    /* gate */
+                                   BIT(28),    /* lock */
+                                   0);
+
+static SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph0_clk, "pll-periph0",
+                                          "osc24M", 0x028,
+                                          8, 5,        /* N */
+                                          4, 2,        /* K */
+                                          BIT(31),     /* gate */
+                                          BIT(28),     /* lock */
+                                          2,           /* post-div */
+                                          0);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_isp_clk, "pll-isp",
+                                       "osc24M", 0x002c,
+                                       8, 7,           /* N */
+                                       0, 4,           /* M */
+                                       BIT(24),        /* frac enable */
+                                       BIT(25),        /* frac select */
+                                       270000000,      /* frac rate 0 */
+                                       297000000,      /* frac rate 1 */
+                                       BIT(31),        /* gate */
+                                       BIT(28),        /* lock */
+                                       0);
+
+static SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph1_clk, "pll-periph1",
+                                          "osc24M", 0x044,
+                                          8, 5,        /* N */
+                                          4, 2,        /* K */
+                                          BIT(31),     /* gate */
+                                          BIT(28),     /* lock */
+                                          2,           /* post-div */
+                                          0);
+
+static const char * const cpu_parents[] = { "osc32k", "osc24M",
+                                            "pll-cpu", "pll-cpu" };
+static SUNXI_CCU_MUX(cpu_clk, "cpu", cpu_parents,
+                    0x050, 16, 2, CLK_IS_CRITICAL);
+
+static SUNXI_CCU_M(axi_clk, "axi", "cpu", 0x050, 0, 2, 0);
+
+static const char * const ahb1_parents[] = { "osc32k", "osc24M",
+                                            "axi", "pll-periph0" };
+static struct ccu_div ahb1_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(4, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+       .mux            = {
+               .shift  = 12,
+               .width  = 2,
+
+               .variable_prediv        = {
+                       .index  = 3,
+                       .shift  = 6,
+                       .width  = 2,
+               },
+       },
+
+       .common         = {
+               .reg            = 0x054,
+               .features       = CCU_FEATURE_VARIABLE_PREDIV,
+               .hw.init        = CLK_HW_INIT_PARENTS("ahb1",
+                                                     ahb1_parents,
+                                                     &ccu_div_ops,
+                                                     0),
+       },
+};
+
+static struct clk_div_table apb1_div_table[] = {
+       { .val = 0, .div = 2 },
+       { .val = 1, .div = 2 },
+       { .val = 2, .div = 4 },
+       { .val = 3, .div = 8 },
+       { /* Sentinel */ },
+};
+static SUNXI_CCU_DIV_TABLE(apb1_clk, "apb1", "ahb1",
+                          0x054, 8, 2, apb1_div_table, 0);
+
+static const char * const apb2_parents[] = { "osc32k", "osc24M",
+                                            "pll-periph0", "pll-periph0" };
+static SUNXI_CCU_MP_WITH_MUX(apb2_clk, "apb2", apb2_parents, 0x058,
+                            0, 5,      /* M */
+                            16, 2,     /* P */
+                            24, 2,     /* mux */
+                            0);
+
+static const char * const ahb2_parents[] = { "ahb1", "pll-periph0" };
+static const struct ccu_mux_fixed_prediv ahb2_fixed_predivs[] = {
+       { .index = 1, .div = 2 },
+};
+static struct ccu_mux ahb2_clk = {
+       .mux            = {
+               .shift  = 0,
+               .width  = 1,
+               .fixed_predivs  = ahb2_fixed_predivs,
+               .n_predivs      = ARRAY_SIZE(ahb2_fixed_predivs),
+       },
+
+       .common         = {
+               .reg            = 0x05c,
+               .features       = CCU_FEATURE_FIXED_PREDIV,
+               .hw.init        = CLK_HW_INIT_PARENTS("ahb2",
+                                                     ahb2_parents,
+                                                     &ccu_mux_ops,
+                                                     0),
+       },
+};
+
+static SUNXI_CCU_GATE(bus_ce_clk,      "bus-ce",       "ahb1",
+                     0x060, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_dma_clk,     "bus-dma",      "ahb1",
+                     0x060, BIT(6), 0);
+static SUNXI_CCU_GATE(bus_mmc0_clk,    "bus-mmc0",     "ahb1",
+                     0x060, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_mmc1_clk,    "bus-mmc1",     "ahb1",
+                     0x060, BIT(9), 0);
+static SUNXI_CCU_GATE(bus_mmc2_clk,    "bus-mmc2",     "ahb1",
+                     0x060, BIT(10), 0);
+static SUNXI_CCU_GATE(bus_dram_clk,    "bus-dram",     "ahb1",
+                     0x060, BIT(14), 0);
+static SUNXI_CCU_GATE(bus_emac_clk,    "bus-emac",     "ahb2",
+                     0x060, BIT(17), 0);
+static SUNXI_CCU_GATE(bus_hstimer_clk, "bus-hstimer",  "ahb1",
+                     0x060, BIT(19), 0);
+static SUNXI_CCU_GATE(bus_spi0_clk,    "bus-spi0",     "ahb1",
+                     0x060, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_otg_clk,     "bus-otg",      "ahb1",
+                     0x060, BIT(24), 0);
+static SUNXI_CCU_GATE(bus_ehci0_clk,   "bus-ehci0",    "ahb1",
+                     0x060, BIT(26), 0);
+static SUNXI_CCU_GATE(bus_ohci0_clk,   "bus-ohci0",    "ahb1",
+                     0x060, BIT(29), 0);
+
+static SUNXI_CCU_GATE(bus_ve_clk,      "bus-ve",       "ahb1",
+                     0x064, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_tcon0_clk,   "bus-tcon0",    "ahb1",
+                     0x064, BIT(4), 0);
+static SUNXI_CCU_GATE(bus_csi_clk,     "bus-csi",      "ahb1",
+                     0x064, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_de_clk,      "bus-de",       "ahb1",
+                     0x064, BIT(12), 0);
+
+static SUNXI_CCU_GATE(bus_codec_clk,   "bus-codec",    "apb1",
+                     0x068, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_pio_clk,     "bus-pio",      "apb1",
+                     0x068, BIT(5), 0);
+
+static SUNXI_CCU_GATE(bus_i2c0_clk,    "bus-i2c0",     "apb2",
+                     0x06c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_i2c1_clk,    "bus-i2c1",     "apb2",
+                     0x06c, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_uart0_clk,   "bus-uart0",    "apb2",
+                     0x06c, BIT(16), 0);
+static SUNXI_CCU_GATE(bus_uart1_clk,   "bus-uart1",    "apb2",
+                     0x06c, BIT(17), 0);
+static SUNXI_CCU_GATE(bus_uart2_clk,   "bus-uart2",    "apb2",
+                     0x06c, BIT(18), 0);
+
+static SUNXI_CCU_GATE(bus_ephy_clk,    "bus-ephy",     "ahb1",
+                     0x070, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_dbg_clk,     "bus-dbg",      "ahb1",
+                     0x070, BIT(7), 0);
+
+static const char * const mod0_default_parents[] = { "osc24M", "pll-periph0",
+                                                    "pll-periph1" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc0_clk, "mmc0", mod0_default_parents, 0x088,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_PHASE(mmc0_sample_clk, "mmc0_sample", "mmc0",
+                      0x088, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc0_output_clk, "mmc0_output", "mmc0",
+                      0x088, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc1_clk, "mmc1", mod0_default_parents, 0x08c,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_PHASE(mmc1_sample_clk, "mmc1_sample", "mmc1",
+                      0x08c, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc1_output_clk, "mmc1_output", "mmc1",
+                      0x08c, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc2_clk, "mmc2", mod0_default_parents, 0x090,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_PHASE(mmc2_sample_clk, "mmc2_sample", "mmc2",
+                      0x090, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc2_output_clk, "mmc2_output", "mmc2",
+                      0x090, 8, 3, 0);
+
+static const char * const ce_parents[] = { "osc24M", "pll-periph0", };
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(ce_clk, "ce", ce_parents, 0x09c,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", mod0_default_parents, 0x0a0,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 2,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_GATE(usb_phy0_clk,    "usb-phy0",     "osc24M",
+                     0x0cc, BIT(8), 0);
+static SUNXI_CCU_GATE(usb_ohci0_clk,   "usb-ohci0",    "osc24M",
+                     0x0cc, BIT(16), 0);
+
+static const char * const dram_parents[] = { "pll-ddr", "pll-periph0-2x" };
+static SUNXI_CCU_M_WITH_MUX(dram_clk, "dram", dram_parents,
+                           0x0f4, 0, 4, 20, 2, CLK_IS_CRITICAL);
+
+static SUNXI_CCU_GATE(dram_ve_clk,     "dram-ve",      "dram",
+                     0x100, BIT(0), 0);
+static SUNXI_CCU_GATE(dram_csi_clk,    "dram-csi",     "dram",
+                     0x100, BIT(1), 0);
+static SUNXI_CCU_GATE(dram_ehci_clk,   "dram-ehci",    "dram",
+                     0x100, BIT(17), 0);
+static SUNXI_CCU_GATE(dram_ohci_clk,   "dram-ohci",    "dram",
+                     0x100, BIT(18), 0);
+
+static const char * const de_parents[] = { "pll-video", "pll-periph0" };
+static SUNXI_CCU_M_WITH_MUX_GATE(de_clk, "de", de_parents,
+                                0x104, 0, 4, 24, 2, BIT(31), 0);
+
+static const char * const tcon_parents[] = { "pll-video" };
+static SUNXI_CCU_M_WITH_MUX_GATE(tcon_clk, "tcon", tcon_parents,
+                                0x118, 0, 4, 24, 3, BIT(31), 0);
+
+static SUNXI_CCU_GATE(csi_misc_clk,    "csi-misc",     "osc24M",
+                     0x130, BIT(31), 0);
+
+static const char * const csi_mclk_parents[] = { "osc24M", "pll-video",
+                                                "pll-periph0", "pll-periph1" };
+static SUNXI_CCU_M_WITH_MUX_GATE(csi0_mclk_clk, "csi0-mclk", csi_mclk_parents,
+                                0x130, 0, 5, 8, 3, BIT(15), 0);
+
+static const char * const csi1_sclk_parents[] = { "pll-video", "pll-isp" };
+static SUNXI_CCU_M_WITH_MUX_GATE(csi1_sclk_clk, "csi-sclk", csi1_sclk_parents,
+                                0x134, 16, 4, 24, 3, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(csi1_mclk_clk, "csi-mclk", csi_mclk_parents,
+                                0x134, 0, 5, 8, 3, BIT(15), 0);
+
+static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve",
+                            0x13c, 16, 3, BIT(31), 0);
+
+static SUNXI_CCU_GATE(ac_dig_clk,      "ac-dig",       "pll-audio",
+                     0x140, BIT(31), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_GATE(avs_clk,         "avs",          "osc24M",
+                     0x144, BIT(31), 0);
+
+static const char * const mbus_parents[] = { "osc24M", "pll-periph0-2x",
+                                            "pll-ddr" };
+static SUNXI_CCU_M_WITH_MUX_GATE(mbus_clk, "mbus", mbus_parents,
+                                0x15c, 0, 3, 24, 2, BIT(31), CLK_IS_CRITICAL);
+
+static const char * const mipi_csi_parents[] = { "pll-video", "pll-periph0",
+                                                "pll-isp" };
+static SUNXI_CCU_M_WITH_MUX_GATE(mipi_csi_clk, "mipi-csi", mipi_csi_parents,
+                            0x16c, 0, 3, 24, 2, BIT(31), 0);
+
+static struct ccu_common *sun8i_v3s_ccu_clks[] = {
+       &pll_cpu_clk.common,
+       &pll_audio_base_clk.common,
+       &pll_video_clk.common,
+       &pll_ve_clk.common,
+       &pll_ddr_clk.common,
+       &pll_periph0_clk.common,
+       &pll_isp_clk.common,
+       &pll_periph1_clk.common,
+       &cpu_clk.common,
+       &axi_clk.common,
+       &ahb1_clk.common,
+       &apb1_clk.common,
+       &apb2_clk.common,
+       &ahb2_clk.common,
+       &bus_ce_clk.common,
+       &bus_dma_clk.common,
+       &bus_mmc0_clk.common,
+       &bus_mmc1_clk.common,
+       &bus_mmc2_clk.common,
+       &bus_dram_clk.common,
+       &bus_emac_clk.common,
+       &bus_hstimer_clk.common,
+       &bus_spi0_clk.common,
+       &bus_otg_clk.common,
+       &bus_ehci0_clk.common,
+       &bus_ohci0_clk.common,
+       &bus_ve_clk.common,
+       &bus_tcon0_clk.common,
+       &bus_csi_clk.common,
+       &bus_de_clk.common,
+       &bus_codec_clk.common,
+       &bus_pio_clk.common,
+       &bus_i2c0_clk.common,
+       &bus_i2c1_clk.common,
+       &bus_uart0_clk.common,
+       &bus_uart1_clk.common,
+       &bus_uart2_clk.common,
+       &bus_ephy_clk.common,
+       &bus_dbg_clk.common,
+       &mmc0_clk.common,
+       &mmc0_sample_clk.common,
+       &mmc0_output_clk.common,
+       &mmc1_clk.common,
+       &mmc1_sample_clk.common,
+       &mmc1_output_clk.common,
+       &mmc2_clk.common,
+       &mmc2_sample_clk.common,
+       &mmc2_output_clk.common,
+       &ce_clk.common,
+       &spi0_clk.common,
+       &usb_phy0_clk.common,
+       &usb_ohci0_clk.common,
+       &dram_clk.common,
+       &dram_ve_clk.common,
+       &dram_csi_clk.common,
+       &dram_ohci_clk.common,
+       &dram_ehci_clk.common,
+       &de_clk.common,
+       &tcon_clk.common,
+       &csi_misc_clk.common,
+       &csi0_mclk_clk.common,
+       &csi1_sclk_clk.common,
+       &csi1_mclk_clk.common,
+       &ve_clk.common,
+       &ac_dig_clk.common,
+       &avs_clk.common,
+       &mbus_clk.common,
+       &mipi_csi_clk.common,
+};
+
+/* We hardcode the divider to 4 for now */
+static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
+                       "pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_2x_clk, "pll-audio-2x",
+                       "pll-audio-base", 2, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_4x_clk, "pll-audio-4x",
+                       "pll-audio-base", 1, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_8x_clk, "pll-audio-8x",
+                       "pll-audio-base", 1, 2, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_periph0_2x_clk, "pll-periph0-2x",
+                       "pll-periph0", 1, 2, 0);
+
+static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
+       .hws    = {
+               [CLK_PLL_CPU]           = &pll_cpu_clk.common.hw,
+               [CLK_PLL_AUDIO_BASE]    = &pll_audio_base_clk.common.hw,
+               [CLK_PLL_AUDIO]         = &pll_audio_clk.hw,
+               [CLK_PLL_AUDIO_2X]      = &pll_audio_2x_clk.hw,
+               [CLK_PLL_AUDIO_4X]      = &pll_audio_4x_clk.hw,
+               [CLK_PLL_AUDIO_8X]      = &pll_audio_8x_clk.hw,
+               [CLK_PLL_VIDEO]         = &pll_video_clk.common.hw,
+               [CLK_PLL_VE]            = &pll_ve_clk.common.hw,
+               [CLK_PLL_DDR]           = &pll_ddr_clk.common.hw,
+               [CLK_PLL_PERIPH0]       = &pll_periph0_clk.common.hw,
+               [CLK_PLL_PERIPH0_2X]    = &pll_periph0_2x_clk.hw,
+               [CLK_PLL_ISP]           = &pll_isp_clk.common.hw,
+               [CLK_PLL_PERIPH1]       = &pll_periph1_clk.common.hw,
+               [CLK_CPU]               = &cpu_clk.common.hw,
+               [CLK_AXI]               = &axi_clk.common.hw,
+               [CLK_AHB1]              = &ahb1_clk.common.hw,
+               [CLK_APB1]              = &apb1_clk.common.hw,
+               [CLK_APB2]              = &apb2_clk.common.hw,
+               [CLK_AHB2]              = &ahb2_clk.common.hw,
+               [CLK_BUS_CE]            = &bus_ce_clk.common.hw,
+               [CLK_BUS_DMA]           = &bus_dma_clk.common.hw,
+               [CLK_BUS_MMC0]          = &bus_mmc0_clk.common.hw,
+               [CLK_BUS_MMC1]          = &bus_mmc1_clk.common.hw,
+               [CLK_BUS_MMC2]          = &bus_mmc2_clk.common.hw,
+               [CLK_BUS_DRAM]          = &bus_dram_clk.common.hw,
+               [CLK_BUS_EMAC]          = &bus_emac_clk.common.hw,
+               [CLK_BUS_HSTIMER]       = &bus_hstimer_clk.common.hw,
+               [CLK_BUS_SPI0]          = &bus_spi0_clk.common.hw,
+               [CLK_BUS_OTG]           = &bus_otg_clk.common.hw,
+               [CLK_BUS_EHCI0]         = &bus_ehci0_clk.common.hw,
+               [CLK_BUS_OHCI0]         = &bus_ohci0_clk.common.hw,
+               [CLK_BUS_VE]            = &bus_ve_clk.common.hw,
+               [CLK_BUS_TCON0]         = &bus_tcon0_clk.common.hw,
+               [CLK_BUS_CSI]           = &bus_csi_clk.common.hw,
+               [CLK_BUS_DE]            = &bus_de_clk.common.hw,
+               [CLK_BUS_CODEC]         = &bus_codec_clk.common.hw,
+               [CLK_BUS_PIO]           = &bus_pio_clk.common.hw,
+               [CLK_BUS_I2C0]          = &bus_i2c0_clk.common.hw,
+               [CLK_BUS_I2C1]          = &bus_i2c1_clk.common.hw,
+               [CLK_BUS_UART0]         = &bus_uart0_clk.common.hw,
+               [CLK_BUS_UART1]         = &bus_uart1_clk.common.hw,
+               [CLK_BUS_UART2]         = &bus_uart2_clk.common.hw,
+               [CLK_BUS_EPHY]          = &bus_ephy_clk.common.hw,
+               [CLK_BUS_DBG]           = &bus_dbg_clk.common.hw,
+               [CLK_MMC0]              = &mmc0_clk.common.hw,
+               [CLK_MMC0_SAMPLE]       = &mmc0_sample_clk.common.hw,
+               [CLK_MMC0_OUTPUT]       = &mmc0_output_clk.common.hw,
+               [CLK_MMC1]              = &mmc1_clk.common.hw,
+               [CLK_MMC1_SAMPLE]       = &mmc1_sample_clk.common.hw,
+               [CLK_MMC1_OUTPUT]       = &mmc1_output_clk.common.hw,
+               [CLK_CE]                = &ce_clk.common.hw,
+               [CLK_SPI0]              = &spi0_clk.common.hw,
+               [CLK_USB_PHY0]          = &usb_phy0_clk.common.hw,
+               [CLK_USB_OHCI0]         = &usb_ohci0_clk.common.hw,
+               [CLK_DRAM]              = &dram_clk.common.hw,
+               [CLK_DRAM_VE]           = &dram_ve_clk.common.hw,
+               [CLK_DRAM_CSI]          = &dram_csi_clk.common.hw,
+               [CLK_DRAM_EHCI]         = &dram_ehci_clk.common.hw,
+               [CLK_DRAM_OHCI]         = &dram_ohci_clk.common.hw,
+               [CLK_DE]                = &de_clk.common.hw,
+               [CLK_TCON0]             = &tcon_clk.common.hw,
+               [CLK_CSI_MISC]          = &csi_misc_clk.common.hw,
+               [CLK_CSI0_MCLK]         = &csi0_mclk_clk.common.hw,
+               [CLK_CSI1_SCLK]         = &csi1_sclk_clk.common.hw,
+               [CLK_CSI1_MCLK]         = &csi1_mclk_clk.common.hw,
+               [CLK_VE]                = &ve_clk.common.hw,
+               [CLK_AC_DIG]            = &ac_dig_clk.common.hw,
+               [CLK_AVS]               = &avs_clk.common.hw,
+               [CLK_MBUS]              = &mbus_clk.common.hw,
+               [CLK_MIPI_CSI]          = &mipi_csi_clk.common.hw,
+       },
+       .num    = CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
+       [RST_USB_PHY0]          =  { 0x0cc, BIT(0) },
+
+       [RST_MBUS]              =  { 0x0fc, BIT(31) },
+
+       [RST_BUS_CE]            =  { 0x2c0, BIT(5) },
+       [RST_BUS_DMA]           =  { 0x2c0, BIT(6) },
+       [RST_BUS_MMC0]          =  { 0x2c0, BIT(8) },
+       [RST_BUS_MMC1]          =  { 0x2c0, BIT(9) },
+       [RST_BUS_MMC2]          =  { 0x2c0, BIT(10) },
+       [RST_BUS_DRAM]          =  { 0x2c0, BIT(14) },
+       [RST_BUS_EMAC]          =  { 0x2c0, BIT(17) },
+       [RST_BUS_HSTIMER]       =  { 0x2c0, BIT(19) },
+       [RST_BUS_SPI0]          =  { 0x2c0, BIT(20) },
+       [RST_BUS_OTG]           =  { 0x2c0, BIT(23) },
+       [RST_BUS_EHCI0]         =  { 0x2c0, BIT(26) },
+       [RST_BUS_OHCI0]         =  { 0x2c0, BIT(29) },
+
+       [RST_BUS_VE]            =  { 0x2c4, BIT(0) },
+       [RST_BUS_TCON0]         =  { 0x2c4, BIT(3) },
+       [RST_BUS_CSI]           =  { 0x2c4, BIT(8) },
+       [RST_BUS_DE]            =  { 0x2c4, BIT(12) },
+       [RST_BUS_DBG]           =  { 0x2c4, BIT(31) },
+
+       [RST_BUS_EPHY]          =  { 0x2c8, BIT(2) },
+
+       [RST_BUS_CODEC]         =  { 0x2d0, BIT(0) },
+
+       [RST_BUS_I2C0]          =  { 0x2d8, BIT(0) },
+       [RST_BUS_I2C1]          =  { 0x2d8, BIT(1) },
+       [RST_BUS_UART0]         =  { 0x2d8, BIT(16) },
+       [RST_BUS_UART1]         =  { 0x2d8, BIT(17) },
+       [RST_BUS_UART2]         =  { 0x2d8, BIT(18) },
+};
+
+static const struct sunxi_ccu_desc sun8i_v3s_ccu_desc = {
+       .ccu_clks       = sun8i_v3s_ccu_clks,
+       .num_ccu_clks   = ARRAY_SIZE(sun8i_v3s_ccu_clks),
+
+       .hw_clks        = &sun8i_v3s_hw_clks,
+
+       .resets         = sun8i_v3s_ccu_resets,
+       .num_resets     = ARRAY_SIZE(sun8i_v3s_ccu_resets),
+};
+
+static void __init sun8i_v3s_ccu_setup(struct device_node *node)
+{
+       void __iomem *reg;
+       u32 val;
+
+       reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+       if (IS_ERR(reg)) {
+               pr_err("%s: Could not map the clock registers\n",
+                      of_node_full_name(node));
+               return;
+       }
+
+       /* Force the PLL-Audio-1x divider to 4 */
+       val = readl(reg + SUN8I_V3S_PLL_AUDIO_REG);
+       val &= ~GENMASK(19, 16);
+       writel(val | (3 << 16), reg + SUN8I_V3S_PLL_AUDIO_REG);
+
+       sunxi_ccu_probe(node, reg, &sun8i_v3s_ccu_desc);
+}
+CLK_OF_DECLARE(sun8i_v3s_ccu, "allwinner,sun8i-v3s-ccu",
+              sun8i_v3s_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
new file mode 100644 (file)
index 0000000..4a4d36f
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * Based on ccu-sun8i-h3.h, which is:
+ * Copyright (c) 2016 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN8I_H3_H_
+#define _CCU_SUN8I_H3_H_
+
+#include <dt-bindings/clock/sun8i-v3s-ccu.h>
+#include <dt-bindings/reset/sun8i-v3s-ccu.h>
+
+#define CLK_PLL_CPU            0
+#define CLK_PLL_AUDIO_BASE     1
+#define CLK_PLL_AUDIO          2
+#define CLK_PLL_AUDIO_2X       3
+#define CLK_PLL_AUDIO_4X       4
+#define CLK_PLL_AUDIO_8X       5
+#define CLK_PLL_VIDEO          6
+#define CLK_PLL_VE             7
+#define CLK_PLL_DDR            8
+#define CLK_PLL_PERIPH0                9
+#define CLK_PLL_PERIPH0_2X     10
+#define CLK_PLL_ISP            11
+#define CLK_PLL_PERIPH1                12
+/* Reserve one number for not implemented and not used PLL_DDR1 */
+
+/* The CPU clock is exported */
+
+#define CLK_AXI                        15
+#define CLK_AHB1               16
+#define CLK_APB1               17
+#define CLK_APB2               18
+#define CLK_AHB2               19
+
+/* All the bus gates are exported */
+
+/* The first bunch of module clocks are exported */
+
+#define CLK_DRAM               58
+
+/* All the DRAM gates are exported */
+
+/* Some more module clocks are exported */
+
+#define CLK_MBUS               72
+
+/* And the GPU module clock is exported */
+
+#define CLK_NUMBER             (CLK_MIPI_CSI + 1)
+
+#endif /* _CCU_SUN8I_H3_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80-de.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80-de.c
new file mode 100644 (file)
index 0000000..6d11658
--- /dev/null
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2016 Chen-Yu Tsai. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include "ccu_common.h"
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_reset.h"
+
+#include "ccu-sun9i-a80-de.h"
+
+static SUNXI_CCU_GATE(fe0_clk,         "fe0",          "fe0-div",
+                     0x00, BIT(0), 0);
+static SUNXI_CCU_GATE(fe1_clk,         "fe1",          "fe1-div",
+                     0x00, BIT(1), 0);
+static SUNXI_CCU_GATE(fe2_clk,         "fe2",          "fe2-div",
+                     0x00, BIT(2), 0);
+static SUNXI_CCU_GATE(iep_deu0_clk,    "iep-deu0",     "de",
+                     0x00, BIT(4), 0);
+static SUNXI_CCU_GATE(iep_deu1_clk,    "iep-deu1",     "de",
+                     0x00, BIT(5), 0);
+static SUNXI_CCU_GATE(be0_clk,         "be0",          "be0-div",
+                     0x00, BIT(8), 0);
+static SUNXI_CCU_GATE(be1_clk,         "be1",          "be1-div",
+                     0x00, BIT(9), 0);
+static SUNXI_CCU_GATE(be2_clk,         "be2",          "be2-div",
+                     0x00, BIT(10), 0);
+static SUNXI_CCU_GATE(iep_drc0_clk,    "iep-drc0",     "de",
+                     0x00, BIT(12), 0);
+static SUNXI_CCU_GATE(iep_drc1_clk,    "iep-drc1",     "de",
+                     0x00, BIT(13), 0);
+static SUNXI_CCU_GATE(merge_clk,       "merge",        "de",
+                     0x00, BIT(20), 0);
+
+static SUNXI_CCU_GATE(dram_fe0_clk,    "dram-fe0",     "sdram",
+                     0x04, BIT(0), 0);
+static SUNXI_CCU_GATE(dram_fe1_clk,    "dram-fe1",     "sdram",
+                     0x04, BIT(1), 0);
+static SUNXI_CCU_GATE(dram_fe2_clk,    "dram-fe2",     "sdram",
+                     0x04, BIT(2), 0);
+static SUNXI_CCU_GATE(dram_deu0_clk,   "dram-deu0",    "sdram",
+                     0x04, BIT(4), 0);
+static SUNXI_CCU_GATE(dram_deu1_clk,   "dram-deu1",    "sdram",
+                     0x04, BIT(5), 0);
+static SUNXI_CCU_GATE(dram_be0_clk,    "dram-be0",     "sdram",
+                     0x04, BIT(8), 0);
+static SUNXI_CCU_GATE(dram_be1_clk,    "dram-be1",     "sdram",
+                     0x04, BIT(9), 0);
+static SUNXI_CCU_GATE(dram_be2_clk,    "dram-be2",     "sdram",
+                     0x04, BIT(10), 0);
+static SUNXI_CCU_GATE(dram_drc0_clk,   "dram-drc0",    "sdram",
+                     0x04, BIT(12), 0);
+static SUNXI_CCU_GATE(dram_drc1_clk,   "dram-drc1",    "sdram",
+                     0x04, BIT(13), 0);
+
+static SUNXI_CCU_GATE(bus_fe0_clk,     "bus-fe0",      "bus-de",
+                     0x08, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_fe1_clk,     "bus-fe1",      "bus-de",
+                     0x08, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_fe2_clk,     "bus-fe2",      "bus-de",
+                     0x08, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_deu0_clk,    "bus-deu0",     "bus-de",
+                     0x08, BIT(4), 0);
+static SUNXI_CCU_GATE(bus_deu1_clk,    "bus-deu1",     "bus-de",
+                     0x08, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_be0_clk,     "bus-be0",      "bus-de",
+                     0x08, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_be1_clk,     "bus-be1",      "bus-de",
+                     0x08, BIT(9), 0);
+static SUNXI_CCU_GATE(bus_be2_clk,     "bus-be2",      "bus-de",
+                     0x08, BIT(10), 0);
+static SUNXI_CCU_GATE(bus_drc0_clk,    "bus-drc0",     "bus-de",
+                     0x08, BIT(12), 0);
+static SUNXI_CCU_GATE(bus_drc1_clk,    "bus-drc1",     "bus-de",
+                     0x08, BIT(13), 0);
+
+static SUNXI_CCU_M(fe0_div_clk, "fe0-div", "de", 0x20, 0, 4, 0);
+static SUNXI_CCU_M(fe1_div_clk, "fe1-div", "de", 0x20, 4, 4, 0);
+static SUNXI_CCU_M(fe2_div_clk, "fe2-div", "de", 0x20, 8, 4, 0);
+static SUNXI_CCU_M(be0_div_clk, "be0-div", "de", 0x20, 16, 4, 0);
+static SUNXI_CCU_M(be1_div_clk, "be1-div", "de", 0x20, 20, 4, 0);
+static SUNXI_CCU_M(be2_div_clk, "be2-div", "de", 0x20, 24, 4, 0);
+
+static struct ccu_common *sun9i_a80_de_clks[] = {
+       &fe0_clk.common,
+       &fe1_clk.common,
+       &fe2_clk.common,
+       &iep_deu0_clk.common,
+       &iep_deu1_clk.common,
+       &be0_clk.common,
+       &be1_clk.common,
+       &be2_clk.common,
+       &iep_drc0_clk.common,
+       &iep_drc1_clk.common,
+       &merge_clk.common,
+
+       &dram_fe0_clk.common,
+       &dram_fe1_clk.common,
+       &dram_fe2_clk.common,
+       &dram_deu0_clk.common,
+       &dram_deu1_clk.common,
+       &dram_be0_clk.common,
+       &dram_be1_clk.common,
+       &dram_be2_clk.common,
+       &dram_drc0_clk.common,
+       &dram_drc1_clk.common,
+
+       &bus_fe0_clk.common,
+       &bus_fe1_clk.common,
+       &bus_fe2_clk.common,
+       &bus_deu0_clk.common,
+       &bus_deu1_clk.common,
+       &bus_be0_clk.common,
+       &bus_be1_clk.common,
+       &bus_be2_clk.common,
+       &bus_drc0_clk.common,
+       &bus_drc1_clk.common,
+
+       &fe0_div_clk.common,
+       &fe1_div_clk.common,
+       &fe2_div_clk.common,
+       &be0_div_clk.common,
+       &be1_div_clk.common,
+       &be2_div_clk.common,
+};
+
+static struct clk_hw_onecell_data sun9i_a80_de_hw_clks = {
+       .hws    = {
+               [CLK_FE0]       = &fe0_clk.common.hw,
+               [CLK_FE1]       = &fe1_clk.common.hw,
+               [CLK_FE2]       = &fe2_clk.common.hw,
+               [CLK_IEP_DEU0]  = &iep_deu0_clk.common.hw,
+               [CLK_IEP_DEU1]  = &iep_deu1_clk.common.hw,
+               [CLK_BE0]       = &be0_clk.common.hw,
+               [CLK_BE1]       = &be1_clk.common.hw,
+               [CLK_BE2]       = &be2_clk.common.hw,
+               [CLK_IEP_DRC0]  = &iep_drc0_clk.common.hw,
+               [CLK_IEP_DRC1]  = &iep_drc1_clk.common.hw,
+               [CLK_MERGE]     = &merge_clk.common.hw,
+
+               [CLK_DRAM_FE0]  = &dram_fe0_clk.common.hw,
+               [CLK_DRAM_FE1]  = &dram_fe1_clk.common.hw,
+               [CLK_DRAM_FE2]  = &dram_fe2_clk.common.hw,
+               [CLK_DRAM_DEU0] = &dram_deu0_clk.common.hw,
+               [CLK_DRAM_DEU1] = &dram_deu1_clk.common.hw,
+               [CLK_DRAM_BE0]  = &dram_be0_clk.common.hw,
+               [CLK_DRAM_BE1]  = &dram_be1_clk.common.hw,
+               [CLK_DRAM_BE2]  = &dram_be2_clk.common.hw,
+               [CLK_DRAM_DRC0] = &dram_drc0_clk.common.hw,
+               [CLK_DRAM_DRC1] = &dram_drc1_clk.common.hw,
+
+               [CLK_BUS_FE0]   = &bus_fe0_clk.common.hw,
+               [CLK_BUS_FE1]   = &bus_fe1_clk.common.hw,
+               [CLK_BUS_FE2]   = &bus_fe2_clk.common.hw,
+               [CLK_BUS_DEU0]  = &bus_deu0_clk.common.hw,
+               [CLK_BUS_DEU1]  = &bus_deu1_clk.common.hw,
+               [CLK_BUS_BE0]   = &bus_be0_clk.common.hw,
+               [CLK_BUS_BE1]   = &bus_be1_clk.common.hw,
+               [CLK_BUS_BE2]   = &bus_be2_clk.common.hw,
+               [CLK_BUS_DRC0]  = &bus_drc0_clk.common.hw,
+               [CLK_BUS_DRC1]  = &bus_drc1_clk.common.hw,
+
+               [CLK_FE0_DIV]   = &fe0_div_clk.common.hw,
+               [CLK_FE1_DIV]   = &fe1_div_clk.common.hw,
+               [CLK_FE2_DIV]   = &fe2_div_clk.common.hw,
+               [CLK_BE0_DIV]   = &be0_div_clk.common.hw,
+               [CLK_BE1_DIV]   = &be1_div_clk.common.hw,
+               [CLK_BE2_DIV]   = &be2_div_clk.common.hw,
+       },
+       .num    = CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun9i_a80_de_resets[] = {
+       [RST_FE0]       = { 0x0c, BIT(0) },
+       [RST_FE1]       = { 0x0c, BIT(1) },
+       [RST_FE2]       = { 0x0c, BIT(2) },
+       [RST_DEU0]      = { 0x0c, BIT(4) },
+       [RST_DEU1]      = { 0x0c, BIT(5) },
+       [RST_BE0]       = { 0x0c, BIT(8) },
+       [RST_BE1]       = { 0x0c, BIT(9) },
+       [RST_BE2]       = { 0x0c, BIT(10) },
+       [RST_DRC0]      = { 0x0c, BIT(12) },
+       [RST_DRC1]      = { 0x0c, BIT(13) },
+       [RST_MERGE]     = { 0x0c, BIT(20) },
+};
+
+static const struct sunxi_ccu_desc sun9i_a80_de_clk_desc = {
+       .ccu_clks       = sun9i_a80_de_clks,
+       .num_ccu_clks   = ARRAY_SIZE(sun9i_a80_de_clks),
+
+       .hw_clks        = &sun9i_a80_de_hw_clks,
+
+       .resets         = sun9i_a80_de_resets,
+       .num_resets     = ARRAY_SIZE(sun9i_a80_de_resets),
+};
+
+static int sun9i_a80_de_clk_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+       struct clk *bus_clk;
+       struct reset_control *rstc;
+       void __iomem *reg;
+       int ret;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       reg = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(reg))
+               return PTR_ERR(reg);
+
+       bus_clk = devm_clk_get(&pdev->dev, "bus");
+       if (IS_ERR(bus_clk)) {
+               ret = PTR_ERR(bus_clk);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(&pdev->dev, "Couldn't get bus clk: %d\n", ret);
+               return ret;
+       }
+
+       rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+       if (IS_ERR(rstc)) {
+               ret = PTR_ERR(rstc);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(&pdev->dev,
+                               "Couldn't get reset control: %d\n", ret);
+               return ret;
+       }
+
+       /* The bus clock needs to be enabled for us to access the registers */
+       ret = clk_prepare_enable(bus_clk);
+       if (ret) {
+               dev_err(&pdev->dev, "Couldn't enable bus clk: %d\n", ret);
+               return ret;
+       }
+
+       /* The reset control needs to be asserted for the controls to work */
+       ret = reset_control_deassert(rstc);
+       if (ret) {
+               dev_err(&pdev->dev,
+                       "Couldn't deassert reset control: %d\n", ret);
+               goto err_disable_clk;
+       }
+
+       ret = sunxi_ccu_probe(pdev->dev.of_node, reg,
+                             &sun9i_a80_de_clk_desc);
+       if (ret)
+               goto err_assert_reset;
+
+       return 0;
+
+err_assert_reset:
+       reset_control_assert(rstc);
+err_disable_clk:
+       clk_disable_unprepare(bus_clk);
+       return ret;
+}
+
+static const struct of_device_id sun9i_a80_de_clk_ids[] = {
+       { .compatible = "allwinner,sun9i-a80-de-clks" },
+       { }
+};
+
+static struct platform_driver sun9i_a80_de_clk_driver = {
+       .probe  = sun9i_a80_de_clk_probe,
+       .driver = {
+               .name   = "sun9i-a80-de-clks",
+               .of_match_table = sun9i_a80_de_clk_ids,
+       },
+};
+builtin_platform_driver(sun9i_a80_de_clk_driver);
diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80-de.h b/drivers/clk/sunxi-ng/ccu-sun9i-a80-de.h
new file mode 100644 (file)
index 0000000..a476904
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2016 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai <wens@csie.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN9I_A80_DE_H_
+#define _CCU_SUN9I_A80_DE_H_
+
+#include <dt-bindings/clock/sun9i-a80-de.h>
+#include <dt-bindings/reset/sun9i-a80-de.h>
+
+/* Intermediary clock dividers are not exported */
+#define CLK_FE0_DIV    31
+#define CLK_FE1_DIV    32
+#define CLK_FE2_DIV    33
+#define CLK_BE0_DIV    34
+#define CLK_BE1_DIV    35
+#define CLK_BE2_DIV    36
+
+#define CLK_NUMBER     (CLK_BE2_DIV + 1)
+
+#endif /* _CCU_SUN9I_A80_DE_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.c
new file mode 100644 (file)
index 0000000..1d76f24
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2016 Chen-Yu Tsai. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+#include "ccu_common.h"
+#include "ccu_gate.h"
+#include "ccu_reset.h"
+
+#include "ccu-sun9i-a80-usb.h"
+
+static SUNXI_CCU_GATE(bus_hci0_clk, "bus-hci0", "bus-usb", 0x0, BIT(1), 0);
+static SUNXI_CCU_GATE(usb_ohci0_clk, "usb-ohci0", "osc24M", 0x0, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_hci1_clk, "bus-hci1", "bus-usb", 0x0, BIT(3), 0);
+static SUNXI_CCU_GATE(bus_hci2_clk, "bus-hci2", "bus-usb", 0x0, BIT(5), 0);
+static SUNXI_CCU_GATE(usb_ohci2_clk, "usb-ohci2", "osc24M", 0x0, BIT(6), 0);
+
+static SUNXI_CCU_GATE(usb0_phy_clk, "usb0-phy", "osc24M", 0x4, BIT(1), 0);
+static SUNXI_CCU_GATE(usb1_hsic_clk, "usb1-hsic", "osc24M", 0x4, BIT(2), 0);
+static SUNXI_CCU_GATE(usb1_phy_clk, "usb1-phy", "osc24M", 0x4, BIT(3), 0);
+static SUNXI_CCU_GATE(usb2_hsic_clk, "usb2-hsic", "osc24M", 0x4, BIT(4), 0);
+static SUNXI_CCU_GATE(usb2_phy_clk, "usb2-phy", "osc24M", 0x4, BIT(5), 0);
+static SUNXI_CCU_GATE(usb_hsic_clk, "usb-hsic", "osc24M", 0x4, BIT(10), 0);
+
+static struct ccu_common *sun9i_a80_usb_clks[] = {
+       &bus_hci0_clk.common,
+       &usb_ohci0_clk.common,
+       &bus_hci1_clk.common,
+       &bus_hci2_clk.common,
+       &usb_ohci2_clk.common,
+
+       &usb0_phy_clk.common,
+       &usb1_hsic_clk.common,
+       &usb1_phy_clk.common,
+       &usb2_hsic_clk.common,
+       &usb2_phy_clk.common,
+       &usb_hsic_clk.common,
+};
+
+static struct clk_hw_onecell_data sun9i_a80_usb_hw_clks = {
+       .hws    = {
+               [CLK_BUS_HCI0]  = &bus_hci0_clk.common.hw,
+               [CLK_USB_OHCI0] = &usb_ohci0_clk.common.hw,
+               [CLK_BUS_HCI1]  = &bus_hci1_clk.common.hw,
+               [CLK_BUS_HCI2]  = &bus_hci2_clk.common.hw,
+               [CLK_USB_OHCI2] = &usb_ohci2_clk.common.hw,
+
+               [CLK_USB0_PHY]  = &usb0_phy_clk.common.hw,
+               [CLK_USB1_HSIC] = &usb1_hsic_clk.common.hw,
+               [CLK_USB1_PHY]  = &usb1_phy_clk.common.hw,
+               [CLK_USB2_HSIC] = &usb2_hsic_clk.common.hw,
+               [CLK_USB2_PHY]  = &usb2_phy_clk.common.hw,
+               [CLK_USB_HSIC]  = &usb_hsic_clk.common.hw,
+       },
+       .num    = CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun9i_a80_usb_resets[] = {
+       [RST_USB0_HCI]          = { 0x0, BIT(17) },
+       [RST_USB1_HCI]          = { 0x0, BIT(18) },
+       [RST_USB2_HCI]          = { 0x0, BIT(19) },
+
+       [RST_USB0_PHY]          = { 0x4, BIT(17) },
+       [RST_USB1_HSIC]         = { 0x4, BIT(18) },
+       [RST_USB1_PHY]          = { 0x4, BIT(19) },
+       [RST_USB2_HSIC]         = { 0x4, BIT(20) },
+       [RST_USB2_PHY]          = { 0x4, BIT(21) },
+};
+
+static const struct sunxi_ccu_desc sun9i_a80_usb_clk_desc = {
+       .ccu_clks       = sun9i_a80_usb_clks,
+       .num_ccu_clks   = ARRAY_SIZE(sun9i_a80_usb_clks),
+
+       .hw_clks        = &sun9i_a80_usb_hw_clks,
+
+       .resets         = sun9i_a80_usb_resets,
+       .num_resets     = ARRAY_SIZE(sun9i_a80_usb_resets),
+};
+
+static int sun9i_a80_usb_clk_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+       struct clk *bus_clk;
+       void __iomem *reg;
+       int ret;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       reg = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(reg))
+               return PTR_ERR(reg);
+
+       bus_clk = devm_clk_get(&pdev->dev, "bus");
+       if (IS_ERR(bus_clk)) {
+               ret = PTR_ERR(bus_clk);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(&pdev->dev, "Couldn't get bus clk: %d\n", ret);
+               return ret;
+       }
+
+       /* The bus clock needs to be enabled for us to access the registers */
+       ret = clk_prepare_enable(bus_clk);
+       if (ret) {
+               dev_err(&pdev->dev, "Couldn't enable bus clk: %d\n", ret);
+               return ret;
+       }
+
+       ret = sunxi_ccu_probe(pdev->dev.of_node, reg,
+                             &sun9i_a80_usb_clk_desc);
+       if (ret)
+               goto err_disable_clk;
+
+       return 0;
+
+err_disable_clk:
+       clk_disable_unprepare(bus_clk);
+       return ret;
+}
+
+static const struct of_device_id sun9i_a80_usb_clk_ids[] = {
+       { .compatible = "allwinner,sun9i-a80-usb-clks" },
+       { }
+};
+
+static struct platform_driver sun9i_a80_usb_clk_driver = {
+       .probe  = sun9i_a80_usb_clk_probe,
+       .driver = {
+               .name   = "sun9i-a80-usb-clks",
+               .of_match_table = sun9i_a80_usb_clk_ids,
+       },
+};
+builtin_platform_driver(sun9i_a80_usb_clk_driver);
diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.h b/drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.h
new file mode 100644 (file)
index 0000000..a184280
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2016 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai <wens@csie.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN9I_A80_USB_H_
+#define _CCU_SUN9I_A80_USB_H_
+
+#include <dt-bindings/clock/sun9i-a80-usb.h>
+#include <dt-bindings/reset/sun9i-a80-usb.h>
+
+#define CLK_NUMBER     (CLK_USB_HSIC + 1)
+
+#endif /* _CCU_SUN9I_A80_USB_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
new file mode 100644 (file)
index 0000000..e13e313
--- /dev/null
@@ -0,0 +1,1223 @@
+/*
+ * Copyright (c) 2016 Chen-Yu Tsai. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_nkmp.h"
+#include "ccu_nm.h"
+#include "ccu_phase.h"
+
+#include "ccu-sun9i-a80.h"
+
+#define CCU_SUN9I_LOCK_REG     0x09c
+
+static struct clk_div_table pll_cpux_p_div_table[] = {
+       { .val = 0, .div = 1 },
+       { .val = 1, .div = 4 },
+       { /* Sentinel */ },
+};
+
+/*
+ * The CPU PLLs are actually NP clocks, but P is /1 or /4, so here we
+ * use the NM clocks with a divider table for M.
+ */
+static struct ccu_nm pll_c0cpux_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(0),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV_TABLE(16, 1, pll_cpux_p_div_table),
+       .common         = {
+               .reg            = 0x000,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-c0cpux", "osc24M",
+                                             &ccu_nm_ops, CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nm pll_c1cpux_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(1),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV_TABLE(16, 1, pll_cpux_p_div_table),
+       .common         = {
+               .reg            = 0x004,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-c1cpux", "osc24M",
+                                             &ccu_nm_ops, CLK_SET_RATE_UNGATE),
+       },
+};
+
+/*
+ * The Audio PLL has d1, d2 dividers in addition to the usual N, M
+ * factors. Since we only need 2 frequencies from this PLL: 22.5792 MHz
+ * and 24.576 MHz, ignore them for now. Enforce the default for them,
+ * which is d1 = 0, d2 = 1.
+ */
+#define SUN9I_A80_PLL_AUDIO_REG        0x008
+
+static struct ccu_nm pll_audio_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(2),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV_OFFSET(0, 6, 0),
+       .common         = {
+               .reg            = 0x008,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-audio", "osc24M",
+                                             &ccu_nm_ops, CLK_SET_RATE_UNGATE),
+       },
+};
+
+/* Some PLLs are input * N / div1 / div2. Model them as NKMP with no K */
+static struct ccu_nkmp pll_periph0_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(3),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .p              = _SUNXI_CCU_DIV(18, 1), /* output divider */
+       .common         = {
+               .reg            = 0x00c,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-periph0", "osc24M",
+                                             &ccu_nkmp_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nkmp pll_ve_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(4),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .p              = _SUNXI_CCU_DIV(18, 1), /* output divider */
+       .common         = {
+               .reg            = 0x010,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-ve", "osc24M",
+                                             &ccu_nkmp_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nkmp pll_ddr_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(5),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .p              = _SUNXI_CCU_DIV(18, 1), /* output divider */
+       .common         = {
+               .reg            = 0x014,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-ddr", "osc24M",
+                                             &ccu_nkmp_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nm pll_video0_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(6),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .common         = {
+               .reg            = 0x018,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-video0", "osc24M",
+                                             &ccu_nm_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nkmp pll_video1_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(7),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .p              = _SUNXI_CCU_DIV(0, 2), /* external divider p */
+       .common         = {
+               .reg            = 0x01c,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-video1", "osc24M",
+                                             &ccu_nkmp_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nkmp pll_gpu_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(8),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .p              = _SUNXI_CCU_DIV(18, 1), /* output divider */
+       .common         = {
+               .reg            = 0x020,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-gpu", "osc24M",
+                                             &ccu_nkmp_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nkmp pll_de_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(9),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .p              = _SUNXI_CCU_DIV(18, 1), /* output divider */
+       .common         = {
+               .reg            = 0x024,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-de", "osc24M",
+                                             &ccu_nkmp_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nkmp pll_isp_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(10),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .p              = _SUNXI_CCU_DIV(18, 1), /* output divider */
+       .common         = {
+               .reg            = 0x028,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-isp", "osc24M",
+                                             &ccu_nkmp_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static struct ccu_nkmp pll_periph1_clk = {
+       .enable         = BIT(31),
+       .lock           = BIT(11),
+       .n              = _SUNXI_CCU_MULT_OFFSET_MIN_MAX(8, 8, 0, 12, 0),
+       .m              = _SUNXI_CCU_DIV(16, 1), /* input divider */
+       .p              = _SUNXI_CCU_DIV(18, 1), /* output divider */
+       .common         = {
+               .reg            = 0x028,
+               .lock_reg       = CCU_SUN9I_LOCK_REG,
+               .features       = CCU_FEATURE_LOCK_REG,
+               .hw.init        = CLK_HW_INIT("pll-periph1", "osc24M",
+                                             &ccu_nkmp_ops,
+                                             CLK_SET_RATE_UNGATE),
+       },
+};
+
+static const char * const c0cpux_parents[] = { "osc24M", "pll-c0cpux" };
+static SUNXI_CCU_MUX(c0cpux_clk, "c0cpux", c0cpux_parents,
+                    0x50, 0, 1, CLK_SET_RATE_PARENT | CLK_IS_CRITICAL);
+
+static const char * const c1cpux_parents[] = { "osc24M", "pll-c1cpux" };
+static SUNXI_CCU_MUX(c1cpux_clk, "c1cpux", c1cpux_parents,
+                    0x50, 8, 1, CLK_SET_RATE_PARENT | CLK_IS_CRITICAL);
+
+static struct clk_div_table axi_div_table[] = {
+       { .val = 0, .div = 1 },
+       { .val = 1, .div = 2 },
+       { .val = 2, .div = 3 },
+       { .val = 3, .div = 4 },
+       { .val = 4, .div = 4 },
+       { .val = 5, .div = 4 },
+       { .val = 6, .div = 4 },
+       { .val = 7, .div = 4 },
+       { /* Sentinel */ },
+};
+
+static SUNXI_CCU_M(atb0_clk, "atb0", "c0cpux", 0x054, 8, 2, 0);
+
+static SUNXI_CCU_DIV_TABLE(axi0_clk, "axi0", "c0cpux",
+                          0x054, 0, 3, axi_div_table, 0);
+
+static SUNXI_CCU_M(atb1_clk, "atb1", "c1cpux", 0x058, 8, 2, 0);
+
+static SUNXI_CCU_DIV_TABLE(axi1_clk, "axi1", "c1cpux",
+                          0x058, 0, 3, axi_div_table, 0);
+
+static const char * const gtbus_parents[] = { "osc24M", "pll-periph0",
+                                             "pll-periph1", "pll-periph1" };
+static SUNXI_CCU_M_WITH_MUX(gtbus_clk, "gtbus", gtbus_parents,
+                           0x05c, 0, 2, 24, 2, CLK_IS_CRITICAL);
+
+static const char * const ahb_parents[] = { "gtbus", "pll-periph0",
+                                           "pll-periph1", "pll-periph1" };
+static struct ccu_div ahb0_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
+       .mux            = _SUNXI_CCU_MUX(24, 2),
+       .common         = {
+               .reg            = 0x060,
+               .hw.init        = CLK_HW_INIT_PARENTS("ahb0",
+                                                     ahb_parents,
+                                                     &ccu_div_ops,
+                                                     0),
+       },
+};
+
+static struct ccu_div ahb1_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
+       .mux            = _SUNXI_CCU_MUX(24, 2),
+       .common         = {
+               .reg            = 0x064,
+               .hw.init        = CLK_HW_INIT_PARENTS("ahb1",
+                                                     ahb_parents,
+                                                     &ccu_div_ops,
+                                                     0),
+       },
+};
+
+static struct ccu_div ahb2_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
+       .mux            = _SUNXI_CCU_MUX(24, 2),
+       .common         = {
+               .reg            = 0x068,
+               .hw.init        = CLK_HW_INIT_PARENTS("ahb2",
+                                                     ahb_parents,
+                                                     &ccu_div_ops,
+                                                     0),
+       },
+};
+
+static const char * const apb_parents[] = { "osc24M", "pll-periph0" };
+
+static struct ccu_div apb0_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
+       .mux            = _SUNXI_CCU_MUX(24, 1),
+       .common         = {
+               .reg            = 0x070,
+               .hw.init        = CLK_HW_INIT_PARENTS("apb0",
+                                                     apb_parents,
+                                                     &ccu_div_ops,
+                                                     0),
+       },
+};
+
+static struct ccu_div apb1_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
+       .mux            = _SUNXI_CCU_MUX(24, 1),
+       .common         = {
+               .reg            = 0x074,
+               .hw.init        = CLK_HW_INIT_PARENTS("apb1",
+                                                     apb_parents,
+                                                     &ccu_div_ops,
+                                                     0),
+       },
+};
+
+static struct ccu_div cci400_clk = {
+       .div            = _SUNXI_CCU_DIV_FLAGS(0, 2, CLK_DIVIDER_POWER_OF_TWO),
+       .mux            = _SUNXI_CCU_MUX(24, 2),
+       .common         = {
+               .reg            = 0x078,
+               .hw.init        = CLK_HW_INIT_PARENTS("cci400",
+                                                     ahb_parents,
+                                                     &ccu_div_ops,
+                                                     CLK_IS_CRITICAL),
+       },
+};
+
+static SUNXI_CCU_M_WITH_MUX_GATE(ats_clk, "ats", apb_parents,
+                                0x080, 0, 3, 24, 2, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(trace_clk, "trace", apb_parents,
+                                0x084, 0, 3, 24, 2, BIT(31), 0);
+
+static const char * const out_parents[] = { "osc24M", "osc32k", "osc24M" };
+static const struct ccu_mux_fixed_prediv out_prediv = {
+       .index = 0, .div = 750
+};
+
+static struct ccu_mp out_a_clk = {
+       .enable         = BIT(31),
+       .m              = _SUNXI_CCU_DIV(8, 5),
+       .p              = _SUNXI_CCU_DIV(20, 2),
+       .mux            = {
+               .shift          = 24,
+               .width          = 4,
+               .fixed_predivs  = &out_prediv,
+               .n_predivs      = 1,
+       },
+       .common         = {
+               .reg            = 0x180,
+               .features       = CCU_FEATURE_FIXED_PREDIV,
+               .hw.init        = CLK_HW_INIT_PARENTS("out-a",
+                                                     out_parents,
+                                                     &ccu_mp_ops,
+                                                     0),
+       },
+};
+
+static struct ccu_mp out_b_clk = {
+       .enable         = BIT(31),
+       .m              = _SUNXI_CCU_DIV(8, 5),
+       .p              = _SUNXI_CCU_DIV(20, 2),
+       .mux            = {
+               .shift          = 24,
+               .width          = 4,
+               .fixed_predivs  = &out_prediv,
+               .n_predivs      = 1,
+       },
+       .common         = {
+               .reg            = 0x184,
+               .features       = CCU_FEATURE_FIXED_PREDIV,
+               .hw.init        = CLK_HW_INIT_PARENTS("out-b",
+                                                     out_parents,
+                                                     &ccu_mp_ops,
+                                                     0),
+       },
+};
+
+static const char * const mod0_default_parents[] = { "osc24M", "pll-periph0" };
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand0_0_clk, "nand0-0", mod0_default_parents,
+                                 0x400,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand0_1_clk, "nand0-1", mod0_default_parents,
+                                 0x404,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand1_0_clk, "nand1-0", mod0_default_parents,
+                                 0x408,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand1_1_clk, "nand1-1", mod0_default_parents,
+                                 0x40c,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc0_clk, "mmc0", mod0_default_parents,
+                                 0x410,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_PHASE(mmc0_sample_clk, "mmc0-sample", "mmc0",
+                      0x410, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc0_output_clk, "mmc0-output", "mmc0",
+                      0x410, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc1_clk, "mmc1", mod0_default_parents,
+                                 0x414,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_PHASE(mmc1_sample_clk, "mmc1-sample", "mmc1",
+                      0x414, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc1_output_clk, "mmc1-output", "mmc1",
+                      0x414, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc2_clk, "mmc2", mod0_default_parents,
+                                 0x418,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_PHASE(mmc2_sample_clk, "mmc2-sample", "mmc2",
+                      0x418, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc2_output_clk, "mmc2-output", "mmc2",
+                      0x418, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc3_clk, "mmc3", mod0_default_parents,
+                                 0x41c,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_PHASE(mmc3_sample_clk, "mmc3-sample", "mmc3",
+                      0x41c, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc3_output_clk, "mmc3-output", "mmc3",
+                      0x41c, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(ts_clk, "ts", mod0_default_parents,
+                                 0x428,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static const char * const ss_parents[] = { "osc24M", "pll-periph",
+                                          "pll-periph1" };
+static const u8 ss_table[] = { 0, 1, 13 };
+static struct ccu_mp ss_clk = {
+       .enable         = BIT(31),
+       .m              = _SUNXI_CCU_DIV(0, 4),
+       .p              = _SUNXI_CCU_DIV(16, 2),
+       .mux            = _SUNXI_CCU_MUX_TABLE(24, 4, ss_table),
+       .common         = {
+               .reg            = 0x42c,
+               .hw.init        = CLK_HW_INIT_PARENTS("ss",
+                                                     ss_parents,
+                                                     &ccu_mp_ops,
+                                                     0),
+       },
+};
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", mod0_default_parents,
+                                 0x430,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents,
+                                 0x434,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi2_clk, "spi2", mod0_default_parents,
+                                 0x438,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi3_clk, "spi3", mod0_default_parents,
+                                 0x43c,
+                                 0, 4,         /* M */
+                                 16, 2,        /* P */
+                                 24, 4,        /* mux */
+                                 BIT(31),      /* gate */
+                                 0);
+
+static SUNXI_CCU_M_WITH_GATE(i2s0_clk, "i2s0", "pll-audio",
+                            0x440, 0, 4, BIT(31), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_M_WITH_GATE(i2s1_clk, "i2s1", "pll-audio",
+                            0x444, 0, 4, BIT(31), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_M_WITH_GATE(spdif_clk, "spdif", "pll-audio",
+                            0x44c, 0, 4, BIT(31), CLK_SET_RATE_PARENT);
+
+static const char * const sdram_parents[] = { "pll-periph0", "pll-ddr" };
+static const u8 sdram_table[] = { 0, 3 };
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(sdram_clk, "sdram",
+                                      sdram_parents, sdram_table,
+                                      0x484,
+                                      8, 4,    /* M */
+                                      12, 4,   /* mux */
+                                      0,       /* no gate */
+                                      CLK_IS_CRITICAL);
+
+static SUNXI_CCU_M_WITH_GATE(de_clk, "de", "pll-de", 0x490,
+                            0, 4, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(edp_clk, "edp", "osc24M", 0x494, BIT(31), 0);
+
+static const char * const mp_parents[] = { "pll-video1", "pll-gpu", "pll-de" };
+static const u8 mp_table[] = { 9, 10, 11 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(mp_clk, "mp", mp_parents, mp_table,
+                                      0x498,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      0);
+
+static const char * const display_parents[] = { "pll-video0", "pll-video1" };
+static const u8 display_table[] = { 8, 9 };
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(lcd0_clk, "lcd0",
+                                      display_parents, display_table,
+                                      0x49c,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      CLK_SET_RATE_NO_REPARENT |
+                                      CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(lcd1_clk, "lcd1",
+                                      display_parents, display_table,
+                                      0x4a0,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      CLK_SET_RATE_NO_REPARENT |
+                                      CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(mipi_dsi0_clk, "mipi-dsi0",
+                                      display_parents, display_table,
+                                      0x4a8,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      CLK_SET_RATE_PARENT);
+
+static const char * const mipi_dsi1_parents[] = { "osc24M", "pll-video1" };
+static const u8 mipi_dsi1_table[] = { 0, 9 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(mipi_dsi1_clk, "mipi-dsi1",
+                                      mipi_dsi1_parents, mipi_dsi1_table,
+                                      0x4ac,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(hdmi_clk, "hdmi",
+                                      display_parents, display_table,
+                                      0x4b0,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      CLK_SET_RATE_NO_REPARENT |
+                                      CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(hdmi_slow_clk, "hdmi-slow", "osc24M", 0x4b4, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_GATE(mipi_csi_clk, "mipi-csi", "osc24M", 0x4bc,
+                            0, 4, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_GATE(csi_isp_clk, "csi-isp", "pll-isp", 0x4c0,
+                            0, 4, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(csi_misc_clk, "csi-misc", "osc24M", 0x4c0, BIT(16), 0);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi0_mclk_clk, "csi0-mclk",
+                                      mipi_dsi1_parents, mipi_dsi1_table,
+                                      0x4c4,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi1_mclk_clk, "csi1-mclk",
+                                      mipi_dsi1_parents, mipi_dsi1_table,
+                                      0x4c8,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      CLK_SET_RATE_PARENT);
+
+static const char * const fd_parents[] = { "pll-periph0", "pll-isp" };
+static const u8 fd_table[] = { 1, 12 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(fd_clk, "fd", fd_parents, fd_table,
+                                      0x4cc,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      0);
+static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve", 0x4d0,
+                            16, 3, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(avs_clk, "avs", "osc24M", 0x4d4, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_GATE(gpu_core_clk, "gpu-core", "pll-gpu", 0x4f0,
+                            0, 3, BIT(31), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_M_WITH_GATE(gpu_memory_clk, "gpu-memory", "pll-gpu", 0x4f4,
+                            0, 3, BIT(31), CLK_SET_RATE_PARENT);
+
+static const char * const gpu_axi_parents[] = { "pll-periph0", "pll-gpu" };
+static const u8 gpu_axi_table[] = { 1, 10 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(gpu_axi_clk, "gpu-axi",
+                                      gpu_axi_parents, gpu_axi_table,
+                                      0x4f8,
+                                      0, 4,    /* M */
+                                      24, 4,   /* mux */
+                                      BIT(31), /* gate */
+                                      CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_GATE(sata_clk, "sata", "pll-periph0", 0x500,
+                            0, 4, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_GATE(ac97_clk, "ac97", "pll-audio",
+                            0x504, 0, 4, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(mipi_hsi_clk, "mipi-hsi",
+                                mod0_default_parents, 0x508,
+                                0, 4,          /* M */
+                                24, 4,         /* mux */
+                                BIT(31),       /* gate */
+                                0);
+
+static const char * const gpadc_parents[] = { "osc24M", "pll-audio", "osc32k" };
+static const u8 gpadc_table[] = { 0, 4, 7 };
+static struct ccu_mp gpadc_clk = {
+       .enable         = BIT(31),
+       .m              = _SUNXI_CCU_DIV(0, 4),
+       .p              = _SUNXI_CCU_DIV(16, 2),
+       .mux            = _SUNXI_CCU_MUX_TABLE(24, 4, gpadc_table),
+       .common         = {
+               .reg            = 0x50c,
+               .hw.init        = CLK_HW_INIT_PARENTS("gpadc",
+                                                     gpadc_parents,
+                                                     &ccu_mp_ops,
+                                                     0),
+       },
+};
+
+static const char * const cir_tx_parents[] = { "osc24M", "osc32k" };
+static const u8 cir_tx_table[] = { 0, 7 };
+static struct ccu_mp cir_tx_clk = {
+       .enable         = BIT(31),
+       .m              = _SUNXI_CCU_DIV(0, 4),
+       .p              = _SUNXI_CCU_DIV(16, 2),
+       .mux            = _SUNXI_CCU_MUX_TABLE(24, 4, cir_tx_table),
+       .common         = {
+               .reg            = 0x510,
+               .hw.init        = CLK_HW_INIT_PARENTS("cir-tx",
+                                                     cir_tx_parents,
+                                                     &ccu_mp_ops,
+                                                     0),
+       },
+};
+
+/* AHB0 bus gates */
+static SUNXI_CCU_GATE(bus_fd_clk,      "bus-fd",       "ahb0",
+                     0x580, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_ve_clk,      "bus-ve",       "ahb0",
+                     0x580, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_gpu_ctrl_clk,        "bus-gpu-ctrl", "ahb0",
+                     0x580, BIT(3), 0);
+static SUNXI_CCU_GATE(bus_ss_clk,      "bus-ss",       "ahb0",
+                     0x580, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_mmc_clk,     "bus-mmc",      "ahb0",
+                     0x580, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_nand0_clk,   "bus-nand0",    "ahb0",
+                     0x580, BIT(12), 0);
+static SUNXI_CCU_GATE(bus_nand1_clk,   "bus-nand1",    "ahb0",
+                     0x580, BIT(13), 0);
+static SUNXI_CCU_GATE(bus_sdram_clk,   "bus-sdram",    "ahb0",
+                     0x580, BIT(14), 0);
+static SUNXI_CCU_GATE(bus_mipi_hsi_clk,        "bus-mipi-hsi", "ahb0",
+                     0x580, BIT(15), 0);
+static SUNXI_CCU_GATE(bus_sata_clk,    "bus-sata",     "ahb0",
+                     0x580, BIT(16), 0);
+static SUNXI_CCU_GATE(bus_ts_clk,      "bus-ts",       "ahb0",
+                     0x580, BIT(18), 0);
+static SUNXI_CCU_GATE(bus_spi0_clk,    "bus-spi0",     "ahb0",
+                     0x580, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_spi1_clk,    "bus-spi1",     "ahb0",
+                     0x580, BIT(21), 0);
+static SUNXI_CCU_GATE(bus_spi2_clk,    "bus-spi2",     "ahb0",
+                     0x580, BIT(22), 0);
+static SUNXI_CCU_GATE(bus_spi3_clk,    "bus-spi3",     "ahb0",
+                     0x580, BIT(23), 0);
+
+/* AHB1 bus gates */
+static SUNXI_CCU_GATE(bus_otg_clk,     "bus-otg",      "ahb1",
+                     0x584, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_usb_clk,     "bus-usb",      "ahb1",
+                     0x584, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_gmac_clk,    "bus-gmac",     "ahb1",
+                     0x584, BIT(17), 0);
+static SUNXI_CCU_GATE(bus_msgbox_clk,  "bus-msgbox",   "ahb1",
+                     0x584, BIT(21), 0);
+static SUNXI_CCU_GATE(bus_spinlock_clk,        "bus-spinlock", "ahb1",
+                     0x584, BIT(22), 0);
+static SUNXI_CCU_GATE(bus_hstimer_clk, "bus-hstimer",  "ahb1",
+                     0x584, BIT(23), 0);
+static SUNXI_CCU_GATE(bus_dma_clk,     "bus-dma",      "ahb1",
+                     0x584, BIT(24), 0);
+
+/* AHB2 bus gates */
+static SUNXI_CCU_GATE(bus_lcd0_clk,    "bus-lcd0",     "ahb2",
+                     0x588, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_lcd1_clk,    "bus-lcd1",     "ahb2",
+                     0x588, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_edp_clk,     "bus-edp",      "ahb2",
+                     0x588, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_csi_clk,     "bus-csi",      "ahb2",
+                     0x588, BIT(4), 0);
+static SUNXI_CCU_GATE(bus_hdmi_clk,    "bus-hdmi",     "ahb2",
+                     0x588, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_de_clk,      "bus-de",       "ahb2",
+                     0x588, BIT(7), 0);
+static SUNXI_CCU_GATE(bus_mp_clk,      "bus-mp",       "ahb2",
+                     0x588, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_mipi_dsi_clk,        "bus-mipi-dsi", "ahb2",
+                     0x588, BIT(11), 0);
+
+/* APB0 bus gates */
+static SUNXI_CCU_GATE(bus_spdif_clk,   "bus-spdif",    "apb0",
+                     0x590, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_pio_clk,     "bus-pio",      "apb0",
+                     0x590, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_ac97_clk,    "bus-ac97",     "apb0",
+                     0x590, BIT(11), 0);
+static SUNXI_CCU_GATE(bus_i2s0_clk,    "bus-i2s0",     "apb0",
+                     0x590, BIT(12), 0);
+static SUNXI_CCU_GATE(bus_i2s1_clk,    "bus-i2s1",     "apb0",
+                     0x590, BIT(13), 0);
+static SUNXI_CCU_GATE(bus_lradc_clk,   "bus-lradc",    "apb0",
+                     0x590, BIT(15), 0);
+static SUNXI_CCU_GATE(bus_gpadc_clk,   "bus-gpadc",    "apb0",
+                     0x590, BIT(17), 0);
+static SUNXI_CCU_GATE(bus_twd_clk,     "bus-twd",      "apb0",
+                     0x590, BIT(18), 0);
+static SUNXI_CCU_GATE(bus_cir_tx_clk,  "bus-cir-tx",   "apb0",
+                     0x590, BIT(19), 0);
+
+/* APB1 bus gates */
+static SUNXI_CCU_GATE(bus_i2c0_clk,    "bus-i2c0",     "apb1",
+                     0x594, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_i2c1_clk,    "bus-i2c1",     "apb1",
+                     0x594, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_i2c2_clk,    "bus-i2c2",     "apb1",
+                     0x594, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_i2c3_clk,    "bus-i2c3",     "apb1",
+                     0x594, BIT(3), 0);
+static SUNXI_CCU_GATE(bus_i2c4_clk,    "bus-i2c4",     "apb1",
+                     0x594, BIT(4), 0);
+static SUNXI_CCU_GATE(bus_uart0_clk,   "bus-uart0",    "apb1",
+                     0x594, BIT(16), 0);
+static SUNXI_CCU_GATE(bus_uart1_clk,   "bus-uart1",    "apb1",
+                     0x594, BIT(17), 0);
+static SUNXI_CCU_GATE(bus_uart2_clk,   "bus-uart2",    "apb1",
+                     0x594, BIT(18), 0);
+static SUNXI_CCU_GATE(bus_uart3_clk,   "bus-uart3",    "apb1",
+                     0x594, BIT(19), 0);
+static SUNXI_CCU_GATE(bus_uart4_clk,   "bus-uart4",    "apb1",
+                     0x594, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_uart5_clk,   "bus-uart5",    "apb1",
+                     0x594, BIT(21), 0);
+
+static struct ccu_common *sun9i_a80_ccu_clks[] = {
+       &pll_c0cpux_clk.common,
+       &pll_c1cpux_clk.common,
+       &pll_audio_clk.common,
+       &pll_periph0_clk.common,
+       &pll_ve_clk.common,
+       &pll_ddr_clk.common,
+       &pll_video0_clk.common,
+       &pll_video1_clk.common,
+       &pll_gpu_clk.common,
+       &pll_de_clk.common,
+       &pll_isp_clk.common,
+       &pll_periph1_clk.common,
+       &c0cpux_clk.common,
+       &c1cpux_clk.common,
+       &atb0_clk.common,
+       &axi0_clk.common,
+       &atb1_clk.common,
+       &axi1_clk.common,
+       &gtbus_clk.common,
+       &ahb0_clk.common,
+       &ahb1_clk.common,
+       &ahb2_clk.common,
+       &apb0_clk.common,
+       &apb1_clk.common,
+       &cci400_clk.common,
+       &ats_clk.common,
+       &trace_clk.common,
+
+       &out_a_clk.common,
+       &out_b_clk.common,
+
+       /* module clocks */
+       &nand0_0_clk.common,
+       &nand0_1_clk.common,
+       &nand1_0_clk.common,
+       &nand1_1_clk.common,
+       &mmc0_clk.common,
+       &mmc0_sample_clk.common,
+       &mmc0_output_clk.common,
+       &mmc1_clk.common,
+       &mmc1_sample_clk.common,
+       &mmc1_output_clk.common,
+       &mmc2_clk.common,
+       &mmc2_sample_clk.common,
+       &mmc2_output_clk.common,
+       &mmc3_clk.common,
+       &mmc3_sample_clk.common,
+       &mmc3_output_clk.common,
+       &ts_clk.common,
+       &ss_clk.common,
+       &spi0_clk.common,
+       &spi1_clk.common,
+       &spi2_clk.common,
+       &spi3_clk.common,
+       &i2s0_clk.common,
+       &i2s1_clk.common,
+       &spdif_clk.common,
+       &sdram_clk.common,
+       &de_clk.common,
+       &edp_clk.common,
+       &mp_clk.common,
+       &lcd0_clk.common,
+       &lcd1_clk.common,
+       &mipi_dsi0_clk.common,
+       &mipi_dsi1_clk.common,
+       &hdmi_clk.common,
+       &hdmi_slow_clk.common,
+       &mipi_csi_clk.common,
+       &csi_isp_clk.common,
+       &csi_misc_clk.common,
+       &csi0_mclk_clk.common,
+       &csi1_mclk_clk.common,
+       &fd_clk.common,
+       &ve_clk.common,
+       &avs_clk.common,
+       &gpu_core_clk.common,
+       &gpu_memory_clk.common,
+       &gpu_axi_clk.common,
+       &sata_clk.common,
+       &ac97_clk.common,
+       &mipi_hsi_clk.common,
+       &gpadc_clk.common,
+       &cir_tx_clk.common,
+
+       /* AHB0 bus gates */
+       &bus_fd_clk.common,
+       &bus_ve_clk.common,
+       &bus_gpu_ctrl_clk.common,
+       &bus_ss_clk.common,
+       &bus_mmc_clk.common,
+       &bus_nand0_clk.common,
+       &bus_nand1_clk.common,
+       &bus_sdram_clk.common,
+       &bus_mipi_hsi_clk.common,
+       &bus_sata_clk.common,
+       &bus_ts_clk.common,
+       &bus_spi0_clk.common,
+       &bus_spi1_clk.common,
+       &bus_spi2_clk.common,
+       &bus_spi3_clk.common,
+
+       /* AHB1 bus gates */
+       &bus_otg_clk.common,
+       &bus_usb_clk.common,
+       &bus_gmac_clk.common,
+       &bus_msgbox_clk.common,
+       &bus_spinlock_clk.common,
+       &bus_hstimer_clk.common,
+       &bus_dma_clk.common,
+
+       /* AHB2 bus gates */
+       &bus_lcd0_clk.common,
+       &bus_lcd1_clk.common,
+       &bus_edp_clk.common,
+       &bus_csi_clk.common,
+       &bus_hdmi_clk.common,
+       &bus_de_clk.common,
+       &bus_mp_clk.common,
+       &bus_mipi_dsi_clk.common,
+
+       /* APB0 bus gates */
+       &bus_spdif_clk.common,
+       &bus_pio_clk.common,
+       &bus_ac97_clk.common,
+       &bus_i2s0_clk.common,
+       &bus_i2s1_clk.common,
+       &bus_lradc_clk.common,
+       &bus_gpadc_clk.common,
+       &bus_twd_clk.common,
+       &bus_cir_tx_clk.common,
+
+       /* APB1 bus gates */
+       &bus_i2c0_clk.common,
+       &bus_i2c1_clk.common,
+       &bus_i2c2_clk.common,
+       &bus_i2c3_clk.common,
+       &bus_i2c4_clk.common,
+       &bus_uart0_clk.common,
+       &bus_uart1_clk.common,
+       &bus_uart2_clk.common,
+       &bus_uart3_clk.common,
+       &bus_uart4_clk.common,
+       &bus_uart5_clk.common,
+};
+
+static struct clk_hw_onecell_data sun9i_a80_hw_clks = {
+       .hws    = {
+               [CLK_PLL_C0CPUX]        = &pll_c0cpux_clk.common.hw,
+               [CLK_PLL_C1CPUX]        = &pll_c1cpux_clk.common.hw,
+               [CLK_PLL_AUDIO]         = &pll_audio_clk.common.hw,
+               [CLK_PLL_PERIPH0]       = &pll_periph0_clk.common.hw,
+               [CLK_PLL_VE]            = &pll_ve_clk.common.hw,
+               [CLK_PLL_DDR]           = &pll_ddr_clk.common.hw,
+               [CLK_PLL_VIDEO0]        = &pll_video0_clk.common.hw,
+               [CLK_PLL_VIDEO1]        = &pll_video1_clk.common.hw,
+               [CLK_PLL_GPU]           = &pll_gpu_clk.common.hw,
+               [CLK_PLL_DE]            = &pll_de_clk.common.hw,
+               [CLK_PLL_ISP]           = &pll_isp_clk.common.hw,
+               [CLK_PLL_PERIPH1]       = &pll_periph1_clk.common.hw,
+               [CLK_C0CPUX]            = &c0cpux_clk.common.hw,
+               [CLK_C1CPUX]            = &c1cpux_clk.common.hw,
+               [CLK_ATB0]              = &atb0_clk.common.hw,
+               [CLK_AXI0]              = &axi0_clk.common.hw,
+               [CLK_ATB1]              = &atb1_clk.common.hw,
+               [CLK_AXI1]              = &axi1_clk.common.hw,
+               [CLK_GTBUS]             = &gtbus_clk.common.hw,
+               [CLK_AHB0]              = &ahb0_clk.common.hw,
+               [CLK_AHB1]              = &ahb1_clk.common.hw,
+               [CLK_AHB2]              = &ahb2_clk.common.hw,
+               [CLK_APB0]              = &apb0_clk.common.hw,
+               [CLK_APB1]              = &apb1_clk.common.hw,
+               [CLK_CCI400]            = &cci400_clk.common.hw,
+               [CLK_ATS]               = &ats_clk.common.hw,
+               [CLK_TRACE]             = &trace_clk.common.hw,
+
+               [CLK_OUT_A]             = &out_a_clk.common.hw,
+               [CLK_OUT_B]             = &out_b_clk.common.hw,
+
+               [CLK_NAND0_0]           = &nand0_0_clk.common.hw,
+               [CLK_NAND0_1]           = &nand0_1_clk.common.hw,
+               [CLK_NAND1_0]           = &nand1_0_clk.common.hw,
+               [CLK_NAND1_1]           = &nand1_1_clk.common.hw,
+               [CLK_MMC0]              = &mmc0_clk.common.hw,
+               [CLK_MMC0_SAMPLE]       = &mmc0_sample_clk.common.hw,
+               [CLK_MMC0_OUTPUT]       = &mmc0_output_clk.common.hw,
+               [CLK_MMC1]              = &mmc1_clk.common.hw,
+               [CLK_MMC1_SAMPLE]       = &mmc1_sample_clk.common.hw,
+               [CLK_MMC1_OUTPUT]       = &mmc1_output_clk.common.hw,
+               [CLK_MMC2]              = &mmc2_clk.common.hw,
+               [CLK_MMC2_SAMPLE]       = &mmc2_sample_clk.common.hw,
+               [CLK_MMC2_OUTPUT]       = &mmc2_output_clk.common.hw,
+               [CLK_MMC3]              = &mmc3_clk.common.hw,
+               [CLK_MMC3_SAMPLE]       = &mmc3_sample_clk.common.hw,
+               [CLK_MMC3_OUTPUT]       = &mmc3_output_clk.common.hw,
+               [CLK_TS]                = &ts_clk.common.hw,
+               [CLK_SS]                = &ss_clk.common.hw,
+               [CLK_SPI0]              = &spi0_clk.common.hw,
+               [CLK_SPI1]              = &spi1_clk.common.hw,
+               [CLK_SPI2]              = &spi2_clk.common.hw,
+               [CLK_SPI3]              = &spi3_clk.common.hw,
+               [CLK_I2S0]              = &i2s0_clk.common.hw,
+               [CLK_I2S1]              = &i2s1_clk.common.hw,
+               [CLK_SPDIF]             = &spdif_clk.common.hw,
+               [CLK_SDRAM]             = &sdram_clk.common.hw,
+               [CLK_DE]                = &de_clk.common.hw,
+               [CLK_EDP]               = &edp_clk.common.hw,
+               [CLK_MP]                = &mp_clk.common.hw,
+               [CLK_LCD0]              = &lcd0_clk.common.hw,
+               [CLK_LCD1]              = &lcd1_clk.common.hw,
+               [CLK_MIPI_DSI0]         = &mipi_dsi0_clk.common.hw,
+               [CLK_MIPI_DSI1]         = &mipi_dsi1_clk.common.hw,
+               [CLK_HDMI]              = &hdmi_clk.common.hw,
+               [CLK_HDMI_SLOW]         = &hdmi_slow_clk.common.hw,
+               [CLK_MIPI_CSI]          = &mipi_csi_clk.common.hw,
+               [CLK_CSI_ISP]           = &csi_isp_clk.common.hw,
+               [CLK_CSI_MISC]          = &csi_misc_clk.common.hw,
+               [CLK_CSI0_MCLK]         = &csi0_mclk_clk.common.hw,
+               [CLK_CSI1_MCLK]         = &csi1_mclk_clk.common.hw,
+               [CLK_FD]                = &fd_clk.common.hw,
+               [CLK_VE]                = &ve_clk.common.hw,
+               [CLK_AVS]               = &avs_clk.common.hw,
+               [CLK_GPU_CORE]          = &gpu_core_clk.common.hw,
+               [CLK_GPU_MEMORY]        = &gpu_memory_clk.common.hw,
+               [CLK_GPU_AXI]           = &gpu_axi_clk.common.hw,
+               [CLK_SATA]              = &sata_clk.common.hw,
+               [CLK_AC97]              = &ac97_clk.common.hw,
+               [CLK_MIPI_HSI]          = &mipi_hsi_clk.common.hw,
+               [CLK_GPADC]             = &gpadc_clk.common.hw,
+               [CLK_CIR_TX]            = &cir_tx_clk.common.hw,
+
+               [CLK_BUS_FD]            = &bus_fd_clk.common.hw,
+               [CLK_BUS_VE]            = &bus_ve_clk.common.hw,
+               [CLK_BUS_GPU_CTRL]      = &bus_gpu_ctrl_clk.common.hw,
+               [CLK_BUS_SS]            = &bus_ss_clk.common.hw,
+               [CLK_BUS_MMC]           = &bus_mmc_clk.common.hw,
+               [CLK_BUS_NAND0]         = &bus_nand0_clk.common.hw,
+               [CLK_BUS_NAND1]         = &bus_nand1_clk.common.hw,
+               [CLK_BUS_SDRAM]         = &bus_sdram_clk.common.hw,
+               [CLK_BUS_MIPI_HSI]      = &bus_mipi_hsi_clk.common.hw,
+               [CLK_BUS_SATA]          = &bus_sata_clk.common.hw,
+               [CLK_BUS_TS]            = &bus_ts_clk.common.hw,
+               [CLK_BUS_SPI0]          = &bus_spi0_clk.common.hw,
+               [CLK_BUS_SPI1]          = &bus_spi1_clk.common.hw,
+               [CLK_BUS_SPI2]          = &bus_spi2_clk.common.hw,
+               [CLK_BUS_SPI3]          = &bus_spi3_clk.common.hw,
+
+               [CLK_BUS_OTG]           = &bus_otg_clk.common.hw,
+               [CLK_BUS_USB]           = &bus_usb_clk.common.hw,
+               [CLK_BUS_GMAC]          = &bus_gmac_clk.common.hw,
+               [CLK_BUS_MSGBOX]        = &bus_msgbox_clk.common.hw,
+               [CLK_BUS_SPINLOCK]      = &bus_spinlock_clk.common.hw,
+               [CLK_BUS_HSTIMER]       = &bus_hstimer_clk.common.hw,
+               [CLK_BUS_DMA]           = &bus_dma_clk.common.hw,
+
+               [CLK_BUS_LCD0]          = &bus_lcd0_clk.common.hw,
+               [CLK_BUS_LCD1]          = &bus_lcd1_clk.common.hw,
+               [CLK_BUS_EDP]           = &bus_edp_clk.common.hw,
+               [CLK_BUS_CSI]           = &bus_csi_clk.common.hw,
+               [CLK_BUS_HDMI]          = &bus_hdmi_clk.common.hw,
+               [CLK_BUS_DE]            = &bus_de_clk.common.hw,
+               [CLK_BUS_MP]            = &bus_mp_clk.common.hw,
+               [CLK_BUS_MIPI_DSI]      = &bus_mipi_dsi_clk.common.hw,
+
+               [CLK_BUS_SPDIF]         = &bus_spdif_clk.common.hw,
+               [CLK_BUS_PIO]           = &bus_pio_clk.common.hw,
+               [CLK_BUS_AC97]          = &bus_ac97_clk.common.hw,
+               [CLK_BUS_I2S0]          = &bus_i2s0_clk.common.hw,
+               [CLK_BUS_I2S1]          = &bus_i2s1_clk.common.hw,
+               [CLK_BUS_LRADC]         = &bus_lradc_clk.common.hw,
+               [CLK_BUS_GPADC]         = &bus_gpadc_clk.common.hw,
+               [CLK_BUS_TWD]           = &bus_twd_clk.common.hw,
+               [CLK_BUS_CIR_TX]        = &bus_cir_tx_clk.common.hw,
+
+               [CLK_BUS_I2C0]          = &bus_i2c0_clk.common.hw,
+               [CLK_BUS_I2C1]          = &bus_i2c1_clk.common.hw,
+               [CLK_BUS_I2C2]          = &bus_i2c2_clk.common.hw,
+               [CLK_BUS_I2C3]          = &bus_i2c3_clk.common.hw,
+               [CLK_BUS_I2C4]          = &bus_i2c4_clk.common.hw,
+               [CLK_BUS_UART0]         = &bus_uart0_clk.common.hw,
+               [CLK_BUS_UART1]         = &bus_uart1_clk.common.hw,
+               [CLK_BUS_UART2]         = &bus_uart2_clk.common.hw,
+               [CLK_BUS_UART3]         = &bus_uart3_clk.common.hw,
+               [CLK_BUS_UART4]         = &bus_uart4_clk.common.hw,
+               [CLK_BUS_UART5]         = &bus_uart5_clk.common.hw,
+       },
+       .num    = CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun9i_a80_ccu_resets[] = {
+       /* AHB0 reset controls */
+       [RST_BUS_FD]            = { 0x5a0, BIT(0) },
+       [RST_BUS_VE]            = { 0x5a0, BIT(1) },
+       [RST_BUS_GPU_CTRL]      = { 0x5a0, BIT(3) },
+       [RST_BUS_SS]            = { 0x5a0, BIT(5) },
+       [RST_BUS_MMC]           = { 0x5a0, BIT(8) },
+       [RST_BUS_NAND0]         = { 0x5a0, BIT(12) },
+       [RST_BUS_NAND1]         = { 0x5a0, BIT(13) },
+       [RST_BUS_SDRAM]         = { 0x5a0, BIT(14) },
+       [RST_BUS_SATA]          = { 0x5a0, BIT(16) },
+       [RST_BUS_TS]            = { 0x5a0, BIT(18) },
+       [RST_BUS_SPI0]          = { 0x5a0, BIT(20) },
+       [RST_BUS_SPI1]          = { 0x5a0, BIT(21) },
+       [RST_BUS_SPI2]          = { 0x5a0, BIT(22) },
+       [RST_BUS_SPI3]          = { 0x5a0, BIT(23) },
+
+       /* AHB1 reset controls */
+       [RST_BUS_OTG]           = { 0x5a4, BIT(0) },
+       [RST_BUS_OTG_PHY]       = { 0x5a4, BIT(1) },
+       [RST_BUS_MIPI_HSI]      = { 0x5a4, BIT(9) },
+       [RST_BUS_GMAC]          = { 0x5a4, BIT(17) },
+       [RST_BUS_MSGBOX]        = { 0x5a4, BIT(21) },
+       [RST_BUS_SPINLOCK]      = { 0x5a4, BIT(22) },
+       [RST_BUS_HSTIMER]       = { 0x5a4, BIT(23) },
+       [RST_BUS_DMA]           = { 0x5a4, BIT(24) },
+
+       /* AHB2 reset controls */
+       [RST_BUS_LCD0]          = { 0x5a8, BIT(0) },
+       [RST_BUS_LCD1]          = { 0x5a8, BIT(1) },
+       [RST_BUS_EDP]           = { 0x5a8, BIT(2) },
+       [RST_BUS_LVDS]          = { 0x5a8, BIT(3) },
+       [RST_BUS_CSI]           = { 0x5a8, BIT(4) },
+       [RST_BUS_HDMI0]         = { 0x5a8, BIT(5) },
+       [RST_BUS_HDMI1]         = { 0x5a8, BIT(6) },
+       [RST_BUS_DE]            = { 0x5a8, BIT(7) },
+       [RST_BUS_MP]            = { 0x5a8, BIT(8) },
+       [RST_BUS_GPU]           = { 0x5a8, BIT(9) },
+       [RST_BUS_MIPI_DSI]      = { 0x5a8, BIT(11) },
+
+       /* APB0 reset controls */
+       [RST_BUS_SPDIF]         = { 0x5b0, BIT(1) },
+       [RST_BUS_AC97]          = { 0x5b0, BIT(11) },
+       [RST_BUS_I2S0]          = { 0x5b0, BIT(12) },
+       [RST_BUS_I2S1]          = { 0x5b0, BIT(13) },
+       [RST_BUS_LRADC]         = { 0x5b0, BIT(15) },
+       [RST_BUS_GPADC]         = { 0x5b0, BIT(17) },
+       [RST_BUS_CIR_TX]        = { 0x5b0, BIT(19) },
+
+       /* APB1 reset controls */
+       [RST_BUS_I2C0]          = { 0x5b4, BIT(0) },
+       [RST_BUS_I2C1]          = { 0x5b4, BIT(1) },
+       [RST_BUS_I2C2]          = { 0x5b4, BIT(2) },
+       [RST_BUS_I2C3]          = { 0x5b4, BIT(3) },
+       [RST_BUS_I2C4]          = { 0x5b4, BIT(4) },
+       [RST_BUS_UART0]         = { 0x5b4, BIT(16) },
+       [RST_BUS_UART1]         = { 0x5b4, BIT(17) },
+       [RST_BUS_UART2]         = { 0x5b4, BIT(18) },
+       [RST_BUS_UART3]         = { 0x5b4, BIT(19) },
+       [RST_BUS_UART4]         = { 0x5b4, BIT(20) },
+       [RST_BUS_UART5]         = { 0x5b4, BIT(21) },
+};
+
+static const struct sunxi_ccu_desc sun9i_a80_ccu_desc = {
+       .ccu_clks       = sun9i_a80_ccu_clks,
+       .num_ccu_clks   = ARRAY_SIZE(sun9i_a80_ccu_clks),
+
+       .hw_clks        = &sun9i_a80_hw_clks,
+
+       .resets         = sun9i_a80_ccu_resets,
+       .num_resets     = ARRAY_SIZE(sun9i_a80_ccu_resets),
+};
+
+static int sun9i_a80_ccu_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+       void __iomem *reg;
+       u32 val;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       reg = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(reg))
+               return PTR_ERR(reg);
+
+       /* Enforce d1 = 0, d2 = 0 for Audio PLL */
+       val = readl(reg + SUN9I_A80_PLL_AUDIO_REG);
+       val &= (BIT(16) & BIT(18));
+       writel(val, reg + SUN9I_A80_PLL_AUDIO_REG);
+
+       return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun9i_a80_ccu_desc);
+}
+
+static const struct of_device_id sun9i_a80_ccu_ids[] = {
+       { .compatible = "allwinner,sun9i-a80-ccu" },
+       { }
+};
+
+static struct platform_driver sun9i_a80_ccu_driver = {
+       .probe  = sun9i_a80_ccu_probe,
+       .driver = {
+               .name   = "sun9i-a80-ccu",
+               .of_match_table = sun9i_a80_ccu_ids,
+       },
+};
+builtin_platform_driver(sun9i_a80_ccu_driver);
diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.h b/drivers/clk/sunxi-ng/ccu-sun9i-a80.h
new file mode 100644 (file)
index 0000000..3156623
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2016 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai <wens@csie.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN9I_A80_H_
+#define _CCU_SUN9I_A80_H_
+
+#include <dt-bindings/clock/sun9i-a80-ccu.h>
+#include <dt-bindings/reset/sun9i-a80-ccu.h>
+
+#define CLK_PLL_C0CPUX         0
+#define CLK_PLL_C1CPUX         1
+
+/* pll-audio and pll-periph0 are exported to the PRCM block */
+
+#define CLK_PLL_VE             4
+#define CLK_PLL_DDR            5
+#define CLK_PLL_VIDEO0         6
+#define CLK_PLL_VIDEO1         7
+#define CLK_PLL_GPU            8
+#define CLK_PLL_DE             9
+#define CLK_PLL_ISP            10
+#define CLK_PLL_PERIPH1                11
+
+/* The CPUX clocks are exported */
+
+#define CLK_ATB0               14
+#define CLK_AXI0               15
+#define CLK_ATB1               16
+#define CLK_AXI1               17
+#define CLK_GTBUS              18
+#define CLK_AHB0               19
+#define CLK_AHB1               20
+#define CLK_AHB2               21
+#define CLK_APB0               22
+#define CLK_APB1               23
+#define CLK_CCI400             24
+#define CLK_ATS                        25
+#define CLK_TRACE              26
+
+/* module clocks and bus gates exported */
+
+#define CLK_NUMBER             (CLK_BUS_UART5 + 1)
+
+#endif /* _CCU_SUN9I_A80_H_ */
index 51d4bac97ab301f9d0213ff40327325622fd2109..8a47bafd78905bce849235d791f1469d448afcc9 100644 (file)
@@ -25,13 +25,18 @@ static DEFINE_SPINLOCK(ccu_lock);
 
 void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock)
 {
+       void __iomem *addr;
        u32 reg;
 
        if (!lock)
                return;
 
-       WARN_ON(readl_relaxed_poll_timeout(common->base + common->reg, reg,
-                                          reg & lock, 100, 70000));
+       if (common->features & CCU_FEATURE_LOCK_REG)
+               addr = common->base + common->lock_reg;
+       else
+               addr = common->base + common->reg;
+
+       WARN_ON(readl_relaxed_poll_timeout(addr, reg, reg & lock, 100, 70000));
 }
 
 int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
@@ -70,6 +75,11 @@ int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
                goto err_clk_unreg;
 
        reset = kzalloc(sizeof(*reset), GFP_KERNEL);
+       if (!reset) {
+               ret = -ENOMEM;
+               goto err_alloc_reset;
+       }
+
        reset->rcdev.of_node = node;
        reset->rcdev.ops = &ccu_reset_ops;
        reset->rcdev.owner = THIS_MODULE;
@@ -85,6 +95,16 @@ int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
        return 0;
 
 err_of_clk_unreg:
+       kfree(reset);
+err_alloc_reset:
+       of_clk_del_provider(node);
 err_clk_unreg:
+       while (--i >= 0) {
+               struct clk_hw *hw = desc->hw_clks->hws[i];
+
+               if (!hw)
+                       continue;
+               clk_hw_unregister(hw);
+       }
        return ret;
 }
index b3d9abfbd721d6f1def3e366044e8bd13fa1fe94..73d81dc58fc5ad91f8a293530aa89e2b22fcbdb3 100644 (file)
@@ -21,6 +21,8 @@
 #define CCU_FEATURE_VARIABLE_PREDIV    BIT(1)
 #define CCU_FEATURE_FIXED_PREDIV       BIT(2)
 #define CCU_FEATURE_FIXED_POSTDIV      BIT(3)
+#define CCU_FEATURE_ALL_PREDIV         BIT(4)
+#define CCU_FEATURE_LOCK_REG           BIT(5)
 
 struct device_node;
 
@@ -56,6 +58,8 @@ struct device_node;
 struct ccu_common {
        void __iomem    *base;
        u16             reg;
+       u16             lock_reg;
+       u32             prediv;
 
        unsigned long   features;
        spinlock_t      *lock;
index 8659b4cb6c2099cba086b29713e2312a7703b87d..4057e6021aa9f74a1cd91acef616d48fe1fc1b1a 100644 (file)
@@ -77,6 +77,18 @@ static int ccu_div_determine_rate(struct clk_hw *hw,
 {
        struct ccu_div *cd = hw_to_ccu_div(hw);
 
+       if (clk_hw_get_num_parents(hw) == 1) {
+               req->rate = divider_round_rate(hw, req->rate,
+                                              &req->best_parent_rate,
+                                              cd->div.table,
+                                              cd->div.width,
+                                              cd->div.flags);
+
+               req->best_parent_hw = clk_hw_get_parent(hw);
+
+               return 0;
+       }
+
        return ccu_mux_helper_determine_rate(&cd->common, &cd->mux,
                                             req, ccu_div_round_rate, cd);
 }
index 06540f7cf41cba1deea274ef30a0a06003160c37..08d07445120439430abb5b57427470ee419d7965 100644 (file)
@@ -41,6 +41,7 @@ struct ccu_div_internal {
        u8                      width;
 
        u32                     max;
+       u32                     offset;
 
        u32                     flags;
 
@@ -58,20 +59,27 @@ struct ccu_div_internal {
 #define _SUNXI_CCU_DIV_TABLE(_shift, _width, _table)                   \
        _SUNXI_CCU_DIV_TABLE_FLAGS(_shift, _width, _table, 0)
 
-#define _SUNXI_CCU_DIV_MAX_FLAGS(_shift, _width, _max, _flags) \
+#define _SUNXI_CCU_DIV_OFFSET_MAX_FLAGS(_shift, _width, _off, _max, _flags) \
        {                                                               \
                .shift  = _shift,                                       \
                .width  = _width,                                       \
                .flags  = _flags,                                       \
                .max    = _max,                                         \
+               .offset = _off,                                         \
        }
 
+#define _SUNXI_CCU_DIV_MAX_FLAGS(_shift, _width, _max, _flags)         \
+       _SUNXI_CCU_DIV_OFFSET_MAX_FLAGS(_shift, _width, 1, _max, _flags)
+
 #define _SUNXI_CCU_DIV_FLAGS(_shift, _width, _flags)                   \
        _SUNXI_CCU_DIV_MAX_FLAGS(_shift, _width, 0, _flags)
 
 #define _SUNXI_CCU_DIV_MAX(_shift, _width, _max)                       \
        _SUNXI_CCU_DIV_MAX_FLAGS(_shift, _width, _max, 0)
 
+#define _SUNXI_CCU_DIV_OFFSET(_shift, _width, _offset)                 \
+       _SUNXI_CCU_DIV_OFFSET_MAX_FLAGS(_shift, _width, _offset, 0, 0)
+
 #define _SUNXI_CCU_DIV(_shift, _width)                                 \
        _SUNXI_CCU_DIV_FLAGS(_shift, _width, 0)
 
index ebb1b31568a55b625af3ad9e0de01fa716015fa0..22c2ca7a2a221c1f25456e6e2548d381fbc23adf 100644 (file)
@@ -89,11 +89,14 @@ static unsigned long ccu_mp_recalc_rate(struct clk_hw *hw,
 
        m = reg >> cmp->m.shift;
        m &= (1 << cmp->m.width) - 1;
+       m += cmp->m.offset;
+       if (!m)
+               m++;
 
        p = reg >> cmp->p.shift;
        p &= (1 << cmp->p.width) - 1;
 
-       return (parent_rate >> p) / (m + 1);
+       return (parent_rate >> p) / m;
 }
 
 static int ccu_mp_determine_rate(struct clk_hw *hw,
@@ -124,9 +127,10 @@ static int ccu_mp_set_rate(struct clk_hw *hw, unsigned long rate,
        reg = readl(cmp->common.base + cmp->common.reg);
        reg &= ~GENMASK(cmp->m.width + cmp->m.shift - 1, cmp->m.shift);
        reg &= ~GENMASK(cmp->p.width + cmp->p.shift - 1, cmp->p.shift);
+       reg |= (m - cmp->m.offset) << cmp->m.shift;
+       reg |= ilog2(p) << cmp->p.shift;
 
-       writel(reg | (ilog2(p) << cmp->p.shift) | ((m - 1) << cmp->m.shift),
-              cmp->common.base + cmp->common.reg);
+       writel(reg, cmp->common.base + cmp->common.reg);
 
        spin_unlock_irqrestore(cmp->common.lock, flags);
 
index 678b6cb49f01b7b176ca84f425ee5a91d6486728..8724c01171b1758f00d1151a55e86ffea593c344 100644 (file)
@@ -40,8 +40,13 @@ static unsigned long ccu_mult_round_rate(struct ccu_mux_internal *mux,
        struct ccu_mult *cm = data;
        struct _ccu_mult _cm;
 
-       _cm.min = 1;
-       _cm.max = 1 << cm->mult.width;
+       _cm.min = cm->mult.min;
+
+       if (cm->mult.max)
+               _cm.max = cm->mult.max;
+       else
+               _cm.max = (1 << cm->mult.width) + cm->mult.offset - 1;
+
        ccu_mult_find_best(parent_rate, rate, &_cm);
 
        return parent_rate * _cm.mult;
@@ -75,6 +80,9 @@ static unsigned long ccu_mult_recalc_rate(struct clk_hw *hw,
        unsigned long val;
        u32 reg;
 
+       if (ccu_frac_helper_is_enabled(&cm->common, &cm->frac))
+               return ccu_frac_helper_read_rate(&cm->common, &cm->frac);
+
        reg = readl(cm->common.base + cm->common.reg);
        val = reg >> cm->mult.shift;
        val &= (1 << cm->mult.width) - 1;
@@ -82,7 +90,7 @@ static unsigned long ccu_mult_recalc_rate(struct clk_hw *hw,
        ccu_mux_helper_adjust_parent_for_prediv(&cm->common, &cm->mux, -1,
                                                &parent_rate);
 
-       return parent_rate * (val + 1);
+       return parent_rate * (val + cm->mult.offset);
 }
 
 static int ccu_mult_determine_rate(struct clk_hw *hw,
@@ -102,20 +110,30 @@ static int ccu_mult_set_rate(struct clk_hw *hw, unsigned long rate,
        unsigned long flags;
        u32 reg;
 
+       if (ccu_frac_helper_has_rate(&cm->common, &cm->frac, rate))
+               return ccu_frac_helper_set_rate(&cm->common, &cm->frac, rate);
+       else
+               ccu_frac_helper_disable(&cm->common, &cm->frac);
+
        ccu_mux_helper_adjust_parent_for_prediv(&cm->common, &cm->mux, -1,
                                                &parent_rate);
 
        _cm.min = cm->mult.min;
-       _cm.max = 1 << cm->mult.width;
+
+       if (cm->mult.max)
+               _cm.max = cm->mult.max;
+       else
+               _cm.max = (1 << cm->mult.width) + cm->mult.offset - 1;
+
        ccu_mult_find_best(parent_rate, rate, &_cm);
 
        spin_lock_irqsave(cm->common.lock, flags);
 
        reg = readl(cm->common.base + cm->common.reg);
        reg &= ~GENMASK(cm->mult.width + cm->mult.shift - 1, cm->mult.shift);
+       reg |= ((_cm.mult - cm->mult.offset) << cm->mult.shift);
 
-       writel(reg | ((_cm.mult - 1) << cm->mult.shift),
-              cm->common.base + cm->common.reg);
+       writel(reg, cm->common.base + cm->common.reg);
 
        spin_unlock_irqrestore(cm->common.lock, flags);
 
index c1a2134bdc711367d239b237f5dae01f1e6af07b..524acddfcb2eaf480b992b4f9761ad9b46fb8d70 100644 (file)
@@ -2,27 +2,39 @@
 #define _CCU_MULT_H_
 
 #include "ccu_common.h"
+#include "ccu_frac.h"
 #include "ccu_mux.h"
 
 struct ccu_mult_internal {
+       u8      offset;
        u8      shift;
        u8      width;
        u8      min;
+       u8      max;
 };
 
-#define _SUNXI_CCU_MULT_MIN(_shift, _width, _min)      \
-       {                                               \
-               .shift  = _shift,                       \
-               .width  = _width,                       \
-               .min    = _min,                         \
+#define _SUNXI_CCU_MULT_OFFSET_MIN_MAX(_shift, _width, _offset, _min, _max) \
+       {                                                               \
+               .min    = _min,                                         \
+               .max    = _max,                                         \
+               .offset = _offset,                                      \
+               .shift  = _shift,                                       \
+               .width  = _width,                                       \
        }
 
+#define _SUNXI_CCU_MULT_MIN(_shift, _width, _min)      \
+       _SUNXI_CCU_MULT_OFFSET_MIN_MAX(_shift, _width, 1, _min, 0)
+
+#define _SUNXI_CCU_MULT_OFFSET(_shift, _width, _offset)        \
+       _SUNXI_CCU_MULT_OFFSET_MIN_MAX(_shift, _width, _offset, 1, 0)
+
 #define _SUNXI_CCU_MULT(_shift, _width)                \
-       _SUNXI_CCU_MULT_MIN(_shift, _width, 1)
+       _SUNXI_CCU_MULT_OFFSET_MIN_MAX(_shift, _width, 1, 1, 0)
 
 struct ccu_mult {
        u32                     enable;
 
+       struct ccu_frac_internal        frac;
        struct ccu_mult_internal        mult;
        struct ccu_mux_internal mux;
        struct ccu_common       common;
index a43ad52a957dcbd104ae29a67ecdf4e057038bf1..c6bb1f5232326f982e496d00767c38d85bfde9ea 100644 (file)
@@ -25,9 +25,15 @@ void ccu_mux_helper_adjust_parent_for_prediv(struct ccu_common *common,
        int i;
 
        if (!((common->features & CCU_FEATURE_FIXED_PREDIV) ||
-             (common->features & CCU_FEATURE_VARIABLE_PREDIV)))
+             (common->features & CCU_FEATURE_VARIABLE_PREDIV) ||
+             (common->features & CCU_FEATURE_ALL_PREDIV)))
                return;
 
+       if (common->features & CCU_FEATURE_ALL_PREDIV) {
+               *parent_rate = *parent_rate / common->prediv;
+               return;
+       }
+
        reg = readl(common->base + common->reg);
        if (parent_index < 0) {
                parent_index = reg >> cm->shift;
@@ -64,19 +70,46 @@ int ccu_mux_helper_determine_rate(struct ccu_common *common,
        struct clk_hw *best_parent, *hw = &common->hw;
        unsigned int i;
 
+       if (clk_hw_get_flags(hw) & CLK_SET_RATE_NO_REPARENT) {
+               unsigned long adj_parent_rate;
+
+               best_parent = clk_hw_get_parent(hw);
+               best_parent_rate = clk_hw_get_rate(best_parent);
+
+               adj_parent_rate = best_parent_rate;
+               ccu_mux_helper_adjust_parent_for_prediv(common, cm, -1,
+                                                       &adj_parent_rate);
+
+               best_rate = round(cm, adj_parent_rate, req->rate, data);
+
+               goto out;
+       }
+
        for (i = 0; i < clk_hw_get_num_parents(hw); i++) {
-               unsigned long tmp_rate, parent_rate;
+               unsigned long tmp_rate, parent_rate, adj_parent_rate;
                struct clk_hw *parent;
 
                parent = clk_hw_get_parent_by_index(hw, i);
                if (!parent)
                        continue;
 
-               parent_rate = clk_hw_get_rate(parent);
+               if (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) {
+                       struct clk_rate_request parent_req = *req;
+                       int ret = __clk_determine_rate(parent, &parent_req);
+
+                       if (ret)
+                               continue;
+
+                       parent_rate = parent_req.rate;
+               } else {
+                       parent_rate = clk_hw_get_rate(parent);
+               }
+
+               adj_parent_rate = parent_rate;
                ccu_mux_helper_adjust_parent_for_prediv(common, cm, i,
-                                                       &parent_rate);
+                                                       &adj_parent_rate);
 
-               tmp_rate = round(cm, clk_hw_get_rate(parent), req->rate, data);
+               tmp_rate = round(cm, adj_parent_rate, req->rate, data);
                if (tmp_rate == req->rate) {
                        best_parent = parent;
                        best_parent_rate = parent_rate;
index eaf0fdf78d2ba278299c8eb93c4b40eb20ce4201..b9e9b8a9d1b458cb44376ef40f55fbb2e0ab334e 100644 (file)
@@ -76,12 +76,17 @@ static unsigned long ccu_nk_recalc_rate(struct clk_hw *hw,
 
        n = reg >> nk->n.shift;
        n &= (1 << nk->n.width) - 1;
+       n += nk->n.offset;
+       if (!n)
+               n++;
 
        k = reg >> nk->k.shift;
        k &= (1 << nk->k.width) - 1;
+       k += nk->k.offset;
+       if (!k)
+               k++;
 
-       rate = parent_rate * (n + 1) * (k + 1);
-
+       rate = parent_rate * n * k;
        if (nk->common.features & CCU_FEATURE_FIXED_POSTDIV)
                rate /= nk->fixed_post_div;
 
@@ -98,9 +103,9 @@ static long ccu_nk_round_rate(struct clk_hw *hw, unsigned long rate,
                rate *= nk->fixed_post_div;
 
        _nk.min_n = nk->n.min;
-       _nk.max_n = 1 << nk->n.width;
+       _nk.max_n = nk->n.max ?: 1 << nk->n.width;
        _nk.min_k = nk->k.min;
-       _nk.max_k = 1 << nk->k.width;
+       _nk.max_k = nk->k.max ?: 1 << nk->k.width;
 
        ccu_nk_find_best(*parent_rate, rate, &_nk);
        rate = *parent_rate * _nk.n * _nk.k;
@@ -123,9 +128,9 @@ static int ccu_nk_set_rate(struct clk_hw *hw, unsigned long rate,
                rate = rate * nk->fixed_post_div;
 
        _nk.min_n = nk->n.min;
-       _nk.max_n = 1 << nk->n.width;
+       _nk.max_n = nk->n.max ?: 1 << nk->n.width;
        _nk.min_k = nk->k.min;
-       _nk.max_k = 1 << nk->k.width;
+       _nk.max_k = nk->k.max ?: 1 << nk->k.width;
 
        ccu_nk_find_best(parent_rate, rate, &_nk);
 
@@ -135,8 +140,9 @@ static int ccu_nk_set_rate(struct clk_hw *hw, unsigned long rate,
        reg &= ~GENMASK(nk->n.width + nk->n.shift - 1, nk->n.shift);
        reg &= ~GENMASK(nk->k.width + nk->k.shift - 1, nk->k.shift);
 
-       writel(reg | ((_nk.k - 1) << nk->k.shift) | ((_nk.n - 1) << nk->n.shift),
-              nk->common.base + nk->common.reg);
+       reg |= (_nk.k - nk->k.offset) << nk->k.shift;
+       reg |= (_nk.n - nk->n.offset) << nk->n.shift;
+       writel(reg, nk->common.base + nk->common.reg);
 
        spin_unlock_irqrestore(nk->common.lock, flags);
 
index 9b840a47a94d8774aef65388058dfc7f0d589c5d..71f81e95a061e9cebfae12a6ccefc023538e3265 100644 (file)
@@ -82,14 +82,23 @@ static unsigned long ccu_nkm_recalc_rate(struct clk_hw *hw,
 
        n = reg >> nkm->n.shift;
        n &= (1 << nkm->n.width) - 1;
+       n += nkm->n.offset;
+       if (!n)
+               n++;
 
        k = reg >> nkm->k.shift;
        k &= (1 << nkm->k.width) - 1;
+       k += nkm->k.offset;
+       if (!k)
+               k++;
 
        m = reg >> nkm->m.shift;
        m &= (1 << nkm->m.width) - 1;
+       m += nkm->m.offset;
+       if (!m)
+               m++;
 
-       return parent_rate * (n + 1) * (k + 1) / (m + 1);
+       return parent_rate * n  * k / m;
 }
 
 static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
@@ -101,9 +110,9 @@ static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
        struct _ccu_nkm _nkm;
 
        _nkm.min_n = nkm->n.min;
-       _nkm.max_n = 1 << nkm->n.width;
+       _nkm.max_n = nkm->n.max ?: 1 << nkm->n.width;
        _nkm.min_k = nkm->k.min;
-       _nkm.max_k = 1 << nkm->k.width;
+       _nkm.max_k = nkm->k.max ?: 1 << nkm->k.width;
        _nkm.min_m = 1;
        _nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
 
@@ -130,9 +139,9 @@ static int ccu_nkm_set_rate(struct clk_hw *hw, unsigned long rate,
        u32 reg;
 
        _nkm.min_n = nkm->n.min;
-       _nkm.max_n = 1 << nkm->n.width;
+       _nkm.max_n = nkm->n.max ?: 1 << nkm->n.width;
        _nkm.min_k = nkm->k.min;
-       _nkm.max_k = 1 << nkm->k.width;
+       _nkm.max_k = nkm->k.max ?: 1 << nkm->k.width;
        _nkm.min_m = 1;
        _nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
 
@@ -145,10 +154,9 @@ static int ccu_nkm_set_rate(struct clk_hw *hw, unsigned long rate,
        reg &= ~GENMASK(nkm->k.width + nkm->k.shift - 1, nkm->k.shift);
        reg &= ~GENMASK(nkm->m.width + nkm->m.shift - 1, nkm->m.shift);
 
-       reg |= (_nkm.n - 1) << nkm->n.shift;
-       reg |= (_nkm.k - 1) << nkm->k.shift;
-       reg |= (_nkm.m - 1) << nkm->m.shift;
-
+       reg |= (_nkm.n - nkm->n.offset) << nkm->n.shift;
+       reg |= (_nkm.k - nkm->k.offset) << nkm->k.shift;
+       reg |= (_nkm.m - nkm->m.offset) << nkm->m.shift;
        writel(reg, nkm->common.base + nkm->common.reg);
 
        spin_unlock_irqrestore(nkm->common.lock, flags);
index 684c42da3ebbf91ac46ab1e06f7ea378b454760d..a2b40a0001577d2579aa563341d1d890797f4b7f 100644 (file)
@@ -88,17 +88,26 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw,
 
        n = reg >> nkmp->n.shift;
        n &= (1 << nkmp->n.width) - 1;
+       n += nkmp->n.offset;
+       if (!n)
+               n++;
 
        k = reg >> nkmp->k.shift;
        k &= (1 << nkmp->k.width) - 1;
+       k += nkmp->k.offset;
+       if (!k)
+               k++;
 
        m = reg >> nkmp->m.shift;
        m &= (1 << nkmp->m.width) - 1;
+       m += nkmp->m.offset;
+       if (!m)
+               m++;
 
        p = reg >> nkmp->p.shift;
        p &= (1 << nkmp->p.width) - 1;
 
-       return (parent_rate * (n + 1) * (k + 1) >> p) / (m + 1);
+       return parent_rate * n * k >> p / m;
 }
 
 static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
@@ -108,9 +117,9 @@ static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
        struct _ccu_nkmp _nkmp;
 
        _nkmp.min_n = nkmp->n.min;
-       _nkmp.max_n = 1 << nkmp->n.width;
+       _nkmp.max_n = nkmp->n.max ?: 1 << nkmp->n.width;
        _nkmp.min_k = nkmp->k.min;
-       _nkmp.max_k = 1 << nkmp->k.width;
+       _nkmp.max_k = nkmp->k.max ?: 1 << nkmp->k.width;
        _nkmp.min_m = 1;
        _nkmp.max_m = nkmp->m.max ?: 1 << nkmp->m.width;
        _nkmp.min_p = 1;
@@ -130,9 +139,9 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
        u32 reg;
 
        _nkmp.min_n = 1;
-       _nkmp.max_n = 1 << nkmp->n.width;
+       _nkmp.max_n = nkmp->n.max ?: 1 << nkmp->n.width;
        _nkmp.min_k = 1;
-       _nkmp.max_k = 1 << nkmp->k.width;
+       _nkmp.max_k = nkmp->k.max ?: 1 << nkmp->k.width;
        _nkmp.min_m = 1;
        _nkmp.max_m = nkmp->m.max ?: 1 << nkmp->m.width;
        _nkmp.min_p = 1;
@@ -148,9 +157,9 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
        reg &= ~GENMASK(nkmp->m.width + nkmp->m.shift - 1, nkmp->m.shift);
        reg &= ~GENMASK(nkmp->p.width + nkmp->p.shift - 1, nkmp->p.shift);
 
-       reg |= (_nkmp.n - 1) << nkmp->n.shift;
-       reg |= (_nkmp.k - 1) << nkmp->k.shift;
-       reg |= (_nkmp.m - 1) << nkmp->m.shift;
+       reg |= (_nkmp.n - nkmp->n.offset) << nkmp->n.shift;
+       reg |= (_nkmp.k - nkmp->k.offset) << nkmp->k.shift;
+       reg |= (_nkmp.m - nkmp->m.offset) << nkmp->m.shift;
        reg |= ilog2(_nkmp.p) << nkmp->p.shift;
 
        writel(reg, nkmp->common.base + nkmp->common.reg);
index c9f3b6c982f02e4fd2cacea8ddd516b7344cc239..af71b1909cd9f6f3816ce19dc81464425c37ff17 100644 (file)
@@ -80,11 +80,17 @@ static unsigned long ccu_nm_recalc_rate(struct clk_hw *hw,
 
        n = reg >> nm->n.shift;
        n &= (1 << nm->n.width) - 1;
+       n += nm->n.offset;
+       if (!n)
+               n++;
 
        m = reg >> nm->m.shift;
        m &= (1 << nm->m.width) - 1;
+       m += nm->m.offset;
+       if (!m)
+               m++;
 
-       return parent_rate * (n + 1) / (m + 1);
+       return parent_rate * n / m;
 }
 
 static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate,
@@ -94,7 +100,7 @@ static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate,
        struct _ccu_nm _nm;
 
        _nm.min_n = nm->n.min;
-       _nm.max_n = 1 << nm->n.width;
+       _nm.max_n = nm->n.max ?: 1 << nm->n.width;
        _nm.min_m = 1;
        _nm.max_m = nm->m.max ?: 1 << nm->m.width;
 
@@ -117,7 +123,7 @@ static int ccu_nm_set_rate(struct clk_hw *hw, unsigned long rate,
                ccu_frac_helper_disable(&nm->common, &nm->frac);
 
        _nm.min_n = 1;
-       _nm.max_n = 1 << nm->n.width;
+       _nm.max_n = nm->n.max ?: 1 << nm->n.width;
        _nm.min_m = 1;
        _nm.max_m = nm->m.max ?: 1 << nm->m.width;
 
@@ -129,8 +135,9 @@ static int ccu_nm_set_rate(struct clk_hw *hw, unsigned long rate,
        reg &= ~GENMASK(nm->n.width + nm->n.shift - 1, nm->n.shift);
        reg &= ~GENMASK(nm->m.width + nm->m.shift - 1, nm->m.shift);
 
-       writel(reg | ((_nm.m - 1) << nm->m.shift) | ((_nm.n - 1) << nm->n.shift),
-              nm->common.base + nm->common.reg);
+       reg |= (_nm.n - nm->n.offset) << nm->n.shift;
+       reg |= (_nm.m - nm->m.offset) << nm->m.shift;
+       writel(reg, nm->common.base + nm->common.reg);
 
        spin_unlock_irqrestore(nm->common.lock, flags);
 
index 1ba30d1e14f27e484e567ecda3cf3fb97092bb65..7ddacae5d0b1718c48d400774d12069426c922e4 100644 (file)
@@ -1,3 +1,7 @@
 config TEGRA_CLK_EMC
        def_bool y
        depends on TEGRA124_EMC
+
+config CLK_TEGRA_BPMP
+       def_bool y
+       depends on TEGRA_BPMP
index 33fd0938d79e5d9918ffc61a3911fc3e34e729a0..4be8af28ee61061e02922801beca10a026b181e2 100644 (file)
@@ -22,3 +22,4 @@ obj-$(CONFIG_ARCH_TEGRA_124_SOC)      += clk-tegra124-dfll-fcpu.o
 obj-$(CONFIG_ARCH_TEGRA_132_SOC)       += clk-tegra124.o
 obj-y                                  += cvb.o
 obj-$(CONFIG_ARCH_TEGRA_210_SOC)       += clk-tegra210.o
+obj-$(CONFIG_CLK_TEGRA_BPMP)           += clk-bpmp.o
diff --git a/drivers/clk/tegra/clk-bpmp.c b/drivers/clk/tegra/clk-bpmp.c
new file mode 100644 (file)
index 0000000..638ace6
--- /dev/null
@@ -0,0 +1,620 @@
+/*
+ * Copyright (C) 2016 NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/seq_buf.h>
+#include <linux/slab.h>
+
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+
+#define TEGRA_BPMP_DUMP_CLOCK_INFO     0
+
+#define TEGRA_BPMP_CLK_HAS_MUX         BIT(0)
+#define TEGRA_BPMP_CLK_HAS_SET_RATE    BIT(1)
+#define TEGRA_BPMP_CLK_IS_ROOT         BIT(2)
+
+struct tegra_bpmp_clk_info {
+       unsigned int id;
+       char name[MRQ_CLK_NAME_MAXLEN];
+       unsigned int parents[MRQ_CLK_MAX_PARENTS];
+       unsigned int num_parents;
+       unsigned long flags;
+};
+
+struct tegra_bpmp_clk {
+       struct clk_hw hw;
+
+       struct tegra_bpmp *bpmp;
+       unsigned int id;
+
+       unsigned int num_parents;
+       unsigned int *parents;
+};
+
+static inline struct tegra_bpmp_clk *to_tegra_bpmp_clk(struct clk_hw *hw)
+{
+       return container_of(hw, struct tegra_bpmp_clk, hw);
+}
+
+struct tegra_bpmp_clk_message {
+       unsigned int cmd;
+       unsigned int id;
+
+       struct {
+               const void *data;
+               size_t size;
+       } tx;
+
+       struct {
+               void *data;
+               size_t size;
+       } rx;
+};
+
+static int tegra_bpmp_clk_transfer(struct tegra_bpmp *bpmp,
+                                  const struct tegra_bpmp_clk_message *clk)
+{
+       struct mrq_clk_request request;
+       struct tegra_bpmp_message msg;
+       void *req = &request;
+
+       memset(&request, 0, sizeof(request));
+       request.cmd_and_id = (clk->cmd << 24) | clk->id;
+
+       /*
+        * The mrq_clk_request structure has an anonymous union at offset 4
+        * that contains all possible sub-command structures. Copy the data
+        * to that union. Ideally we'd be able to refer to it by name, but
+        * doing so would require changing the ABI header and increase the
+        * maintenance burden.
+        */
+       memcpy(req + 4, clk->tx.data, clk->tx.size);
+
+       memset(&msg, 0, sizeof(msg));
+       msg.mrq = MRQ_CLK;
+       msg.tx.data = &request;
+       msg.tx.size = sizeof(request);
+       msg.rx.data = clk->rx.data;
+       msg.rx.size = clk->rx.size;
+
+       return tegra_bpmp_transfer(bpmp, &msg);
+}
+
+static int tegra_bpmp_clk_prepare(struct clk_hw *hw)
+{
+       struct tegra_bpmp_clk *clk = to_tegra_bpmp_clk(hw);
+       struct tegra_bpmp_clk_message msg;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_ENABLE;
+       msg.id = clk->id;
+
+       return tegra_bpmp_clk_transfer(clk->bpmp, &msg);
+}
+
+static void tegra_bpmp_clk_unprepare(struct clk_hw *hw)
+{
+       struct tegra_bpmp_clk *clk = to_tegra_bpmp_clk(hw);
+       struct tegra_bpmp_clk_message msg;
+       int err;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_DISABLE;
+       msg.id = clk->id;
+
+       err = tegra_bpmp_clk_transfer(clk->bpmp, &msg);
+       if (err < 0)
+               dev_err(clk->bpmp->dev, "failed to disable clock %s: %d\n",
+                       clk_hw_get_name(hw), err);
+}
+
+static int tegra_bpmp_clk_is_prepared(struct clk_hw *hw)
+{
+       struct tegra_bpmp_clk *clk = to_tegra_bpmp_clk(hw);
+       struct cmd_clk_is_enabled_response response;
+       struct tegra_bpmp_clk_message msg;
+       int err;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_IS_ENABLED;
+       msg.id = clk->id;
+       msg.rx.data = &response;
+       msg.rx.size = sizeof(response);
+
+       err = tegra_bpmp_clk_transfer(clk->bpmp, &msg);
+       if (err < 0)
+               return err;
+
+       return response.state;
+}
+
+static unsigned long tegra_bpmp_clk_recalc_rate(struct clk_hw *hw,
+                                               unsigned long parent_rate)
+{
+       struct tegra_bpmp_clk *clk = to_tegra_bpmp_clk(hw);
+       struct cmd_clk_get_rate_response response;
+       struct cmd_clk_get_rate_request request;
+       struct tegra_bpmp_clk_message msg;
+       int err;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_GET_RATE;
+       msg.id = clk->id;
+       msg.tx.data = &request;
+       msg.tx.size = sizeof(request);
+       msg.rx.data = &response;
+       msg.rx.size = sizeof(response);
+
+       err = tegra_bpmp_clk_transfer(clk->bpmp, &msg);
+       if (err < 0)
+               return err;
+
+       return response.rate;
+}
+
+static long tegra_bpmp_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+                                     unsigned long *parent_rate)
+{
+       struct tegra_bpmp_clk *clk = to_tegra_bpmp_clk(hw);
+       struct cmd_clk_round_rate_response response;
+       struct cmd_clk_round_rate_request request;
+       struct tegra_bpmp_clk_message msg;
+       int err;
+
+       memset(&request, 0, sizeof(request));
+       request.rate = rate;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_ROUND_RATE;
+       msg.id = clk->id;
+       msg.tx.data = &request;
+       msg.tx.size = sizeof(request);
+       msg.rx.data = &response;
+       msg.rx.size = sizeof(response);
+
+       err = tegra_bpmp_clk_transfer(clk->bpmp, &msg);
+       if (err < 0)
+               return err;
+
+       return response.rate;
+}
+
+static int tegra_bpmp_clk_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct tegra_bpmp_clk *clk = to_tegra_bpmp_clk(hw);
+       struct cmd_clk_set_parent_response response;
+       struct cmd_clk_set_parent_request request;
+       struct tegra_bpmp_clk_message msg;
+       int err;
+
+       memset(&request, 0, sizeof(request));
+       request.parent_id = clk->parents[index];
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_SET_PARENT;
+       msg.id = clk->id;
+       msg.tx.data = &request;
+       msg.tx.size = sizeof(request);
+       msg.rx.data = &response;
+       msg.rx.size = sizeof(response);
+
+       err = tegra_bpmp_clk_transfer(clk->bpmp, &msg);
+       if (err < 0)
+               return err;
+
+       /* XXX check parent ID in response */
+
+       return 0;
+}
+
+static u8 tegra_bpmp_clk_get_parent(struct clk_hw *hw)
+{
+       struct tegra_bpmp_clk *clk = to_tegra_bpmp_clk(hw);
+       struct cmd_clk_get_parent_response response;
+       struct tegra_bpmp_clk_message msg;
+       unsigned int i;
+       int err;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_GET_PARENT;
+       msg.id = clk->id;
+       msg.rx.data = &response;
+       msg.rx.size = sizeof(response);
+
+       err = tegra_bpmp_clk_transfer(clk->bpmp, &msg);
+       if (err < 0) {
+               dev_err(clk->bpmp->dev, "failed to get parent for %s: %d\n",
+                       clk_hw_get_name(hw), err);
+               return U8_MAX;
+       }
+
+       for (i = 0; i < clk->num_parents; i++)
+               if (clk->parents[i] == response.parent_id)
+                       return i;
+
+       return U8_MAX;
+}
+
+static int tegra_bpmp_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+                                  unsigned long parent_rate)
+{
+       struct tegra_bpmp_clk *clk = to_tegra_bpmp_clk(hw);
+       struct cmd_clk_set_rate_response response;
+       struct cmd_clk_set_rate_request request;
+       struct tegra_bpmp_clk_message msg;
+
+       memset(&request, 0, sizeof(request));
+       request.rate = rate;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_SET_RATE;
+       msg.id = clk->id;
+       msg.tx.data = &request;
+       msg.tx.size = sizeof(request);
+       msg.rx.data = &response;
+       msg.rx.size = sizeof(response);
+
+       return tegra_bpmp_clk_transfer(clk->bpmp, &msg);
+}
+
+static const struct clk_ops tegra_bpmp_clk_gate_ops = {
+       .prepare = tegra_bpmp_clk_prepare,
+       .unprepare = tegra_bpmp_clk_unprepare,
+       .is_prepared = tegra_bpmp_clk_is_prepared,
+       .recalc_rate = tegra_bpmp_clk_recalc_rate,
+};
+
+static const struct clk_ops tegra_bpmp_clk_mux_ops = {
+       .prepare = tegra_bpmp_clk_prepare,
+       .unprepare = tegra_bpmp_clk_unprepare,
+       .is_prepared = tegra_bpmp_clk_is_prepared,
+       .recalc_rate = tegra_bpmp_clk_recalc_rate,
+       .set_parent = tegra_bpmp_clk_set_parent,
+       .get_parent = tegra_bpmp_clk_get_parent,
+};
+
+static const struct clk_ops tegra_bpmp_clk_rate_ops = {
+       .prepare = tegra_bpmp_clk_prepare,
+       .unprepare = tegra_bpmp_clk_unprepare,
+       .is_prepared = tegra_bpmp_clk_is_prepared,
+       .recalc_rate = tegra_bpmp_clk_recalc_rate,
+       .round_rate = tegra_bpmp_clk_round_rate,
+       .set_rate = tegra_bpmp_clk_set_rate,
+};
+
+static const struct clk_ops tegra_bpmp_clk_mux_rate_ops = {
+       .prepare = tegra_bpmp_clk_prepare,
+       .unprepare = tegra_bpmp_clk_unprepare,
+       .is_prepared = tegra_bpmp_clk_is_prepared,
+       .recalc_rate = tegra_bpmp_clk_recalc_rate,
+       .round_rate = tegra_bpmp_clk_round_rate,
+       .set_parent = tegra_bpmp_clk_set_parent,
+       .get_parent = tegra_bpmp_clk_get_parent,
+       .set_rate = tegra_bpmp_clk_set_rate,
+};
+
+static int tegra_bpmp_clk_get_max_id(struct tegra_bpmp *bpmp)
+{
+       struct cmd_clk_get_max_clk_id_response response;
+       struct tegra_bpmp_clk_message msg;
+       int err;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_GET_MAX_CLK_ID;
+       msg.rx.data = &response;
+       msg.rx.size = sizeof(response);
+
+       err = tegra_bpmp_clk_transfer(bpmp, &msg);
+       if (err < 0)
+               return err;
+
+       if (response.max_id > INT_MAX)
+               return -E2BIG;
+
+       return response.max_id;
+}
+
+static int tegra_bpmp_clk_get_info(struct tegra_bpmp *bpmp, unsigned int id,
+                                  struct tegra_bpmp_clk_info *info)
+{
+       struct cmd_clk_get_all_info_response response;
+       struct tegra_bpmp_clk_message msg;
+       unsigned int i;
+       int err;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.cmd = CMD_CLK_GET_ALL_INFO;
+       msg.id = id;
+       msg.rx.data = &response;
+       msg.rx.size = sizeof(response);
+
+       err = tegra_bpmp_clk_transfer(bpmp, &msg);
+       if (err < 0)
+               return err;
+
+       strlcpy(info->name, response.name, MRQ_CLK_NAME_MAXLEN);
+       info->num_parents = response.num_parents;
+
+       for (i = 0; i < info->num_parents; i++)
+               info->parents[i] = response.parents[i];
+
+       info->flags = response.flags;
+
+       return 0;
+}
+
+static void tegra_bpmp_clk_info_dump(struct tegra_bpmp *bpmp,
+                                    const char *level,
+                                    const struct tegra_bpmp_clk_info *info)
+{
+       const char *prefix = "";
+       struct seq_buf buf;
+       unsigned int i;
+       char flags[64];
+
+       seq_buf_init(&buf, flags, sizeof(flags));
+
+       if (info->flags)
+               seq_buf_printf(&buf, "(");
+
+       if (info->flags & TEGRA_BPMP_CLK_HAS_MUX) {
+               seq_buf_printf(&buf, "%smux", prefix);
+               prefix = ", ";
+       }
+
+       if ((info->flags & TEGRA_BPMP_CLK_HAS_SET_RATE) == 0) {
+               seq_buf_printf(&buf, "%sfixed", prefix);
+               prefix = ", ";
+       }
+
+       if (info->flags & TEGRA_BPMP_CLK_IS_ROOT) {
+               seq_buf_printf(&buf, "%sroot", prefix);
+               prefix = ", ";
+       }
+
+       if (info->flags)
+               seq_buf_printf(&buf, ")");
+
+       dev_printk(level, bpmp->dev, "%03u: %s\n", info->id, info->name);
+       dev_printk(level, bpmp->dev, "  flags: %lx %s\n", info->flags, flags);
+       dev_printk(level, bpmp->dev, "  parents: %u\n", info->num_parents);
+
+       for (i = 0; i < info->num_parents; i++)
+               dev_printk(level, bpmp->dev, "    %03u\n", info->parents[i]);
+}
+
+static int tegra_bpmp_probe_clocks(struct tegra_bpmp *bpmp,
+                                  struct tegra_bpmp_clk_info **clocksp)
+{
+       struct tegra_bpmp_clk_info *clocks;
+       unsigned int max_id, id, count = 0;
+       unsigned int holes = 0;
+       int err;
+
+       err = tegra_bpmp_clk_get_max_id(bpmp);
+       if (err < 0)
+               return err;
+
+       max_id = err;
+
+       dev_dbg(bpmp->dev, "maximum clock ID: %u\n", max_id);
+
+       clocks = kcalloc(max_id + 1, sizeof(*clocks), GFP_KERNEL);
+       if (!clocks)
+               return -ENOMEM;
+
+       for (id = 0; id <= max_id; id++) {
+               struct tegra_bpmp_clk_info *info = &clocks[count];
+
+               err = tegra_bpmp_clk_get_info(bpmp, id, info);
+               if (err < 0) {
+                       dev_err(bpmp->dev, "failed to query clock %u: %d\n",
+                               id, err);
+                       continue;
+               }
+
+               if (info->num_parents >= U8_MAX) {
+                       dev_err(bpmp->dev,
+                               "clock %u has too many parents (%u, max: %u)\n",
+                               id, info->num_parents, U8_MAX);
+                       continue;
+               }
+
+               /* clock not exposed by BPMP */
+               if (info->name[0] == '\0') {
+                       holes++;
+                       continue;
+               }
+
+               info->id = id;
+               count++;
+
+               if (TEGRA_BPMP_DUMP_CLOCK_INFO)
+                       tegra_bpmp_clk_info_dump(bpmp, KERN_DEBUG, info);
+       }
+
+       dev_dbg(bpmp->dev, "holes: %u\n", holes);
+       *clocksp = clocks;
+
+       return count;
+}
+
+static const struct tegra_bpmp_clk_info *
+tegra_bpmp_clk_find(const struct tegra_bpmp_clk_info *clocks,
+                   unsigned int num_clocks, unsigned int id)
+{
+       unsigned int i;
+
+       for (i = 0; i < num_clocks; i++)
+               if (clocks[i].id == id)
+                       return &clocks[i];
+
+       return NULL;
+}
+
+static struct tegra_bpmp_clk *
+tegra_bpmp_clk_register(struct tegra_bpmp *bpmp,
+                       const struct tegra_bpmp_clk_info *info,
+                       const struct tegra_bpmp_clk_info *clocks,
+                       unsigned int num_clocks)
+{
+       struct tegra_bpmp_clk *clk;
+       struct clk_init_data init;
+       const char **parents;
+       unsigned int i;
+       int err;
+
+       clk = devm_kzalloc(bpmp->dev, sizeof(*clk), GFP_KERNEL);
+       if (!clk)
+               return ERR_PTR(-ENOMEM);
+
+       clk->id = info->id;
+       clk->bpmp = bpmp;
+
+       clk->parents = devm_kcalloc(bpmp->dev, info->num_parents,
+                                   sizeof(*clk->parents), GFP_KERNEL);
+       if (!clk->parents)
+               return ERR_PTR(-ENOMEM);
+
+       clk->num_parents = info->num_parents;
+
+       /* hardware clock initialization */
+       memset(&init, 0, sizeof(init));
+       init.name = info->name;
+       clk->hw.init = &init;
+
+       if (info->flags & TEGRA_BPMP_CLK_HAS_MUX) {
+               if (info->flags & TEGRA_BPMP_CLK_HAS_SET_RATE)
+                       init.ops = &tegra_bpmp_clk_mux_rate_ops;
+               else
+                       init.ops = &tegra_bpmp_clk_mux_ops;
+       } else {
+               if (info->flags & TEGRA_BPMP_CLK_HAS_SET_RATE)
+                       init.ops = &tegra_bpmp_clk_rate_ops;
+               else
+                       init.ops = &tegra_bpmp_clk_gate_ops;
+       }
+
+       init.num_parents = info->num_parents;
+
+       parents = kcalloc(info->num_parents, sizeof(*parents), GFP_KERNEL);
+       if (!parents)
+               return ERR_PTR(-ENOMEM);
+
+       for (i = 0; i < info->num_parents; i++) {
+               const struct tegra_bpmp_clk_info *parent;
+
+               /* keep a private copy of the ID to parent index map */
+               clk->parents[i] = info->parents[i];
+
+               parent = tegra_bpmp_clk_find(clocks, num_clocks,
+                                            info->parents[i]);
+               if (!parent) {
+                       dev_err(bpmp->dev, "no parent %u found for %u\n",
+                               info->parents[i], info->id);
+                       continue;
+               }
+
+               parents[i] = parent->name;
+       }
+
+       init.parent_names = parents;
+
+       err = devm_clk_hw_register(bpmp->dev, &clk->hw);
+
+       kfree(parents);
+
+       if (err < 0)
+               return ERR_PTR(err);
+
+       return clk;
+}
+
+static int tegra_bpmp_register_clocks(struct tegra_bpmp *bpmp,
+                                     struct tegra_bpmp_clk_info *infos,
+                                     unsigned int count)
+{
+       struct tegra_bpmp_clk *clk;
+       unsigned int i;
+
+       bpmp->num_clocks = count;
+
+       bpmp->clocks = devm_kcalloc(bpmp->dev, count, sizeof(clk), GFP_KERNEL);
+       if (!bpmp->clocks)
+               return -ENOMEM;
+
+       for (i = 0; i < count; i++) {
+               struct tegra_bpmp_clk_info *info = &infos[i];
+
+               clk = tegra_bpmp_clk_register(bpmp, info, infos, count);
+               if (IS_ERR(clk)) {
+                       dev_err(bpmp->dev,
+                               "failed to register clock %u (%s): %ld\n",
+                               info->id, info->name, PTR_ERR(clk));
+                       continue;
+               }
+
+               bpmp->clocks[i] = clk;
+       }
+
+       return 0;
+}
+
+static void tegra_bpmp_unregister_clocks(struct tegra_bpmp *bpmp)
+{
+       unsigned int i;
+
+       for (i = 0; i < bpmp->num_clocks; i++)
+               clk_hw_unregister(&bpmp->clocks[i]->hw);
+}
+
+static struct clk_hw *tegra_bpmp_clk_of_xlate(struct of_phandle_args *clkspec,
+                                             void *data)
+{
+       unsigned int id = clkspec->args[0], i;
+       struct tegra_bpmp *bpmp = data;
+
+       for (i = 0; i < bpmp->num_clocks; i++)
+               if (bpmp->clocks[i]->id == id)
+                       return &bpmp->clocks[i]->hw;
+
+       return NULL;
+}
+
+int tegra_bpmp_init_clocks(struct tegra_bpmp *bpmp)
+{
+       struct tegra_bpmp_clk_info *clocks;
+       unsigned int count;
+       int err;
+
+       err = tegra_bpmp_probe_clocks(bpmp, &clocks);
+       if (err < 0)
+               return err;
+
+       count = err;
+
+       dev_dbg(bpmp->dev, "%u clocks probed\n", count);
+
+       err = tegra_bpmp_register_clocks(bpmp, clocks, count);
+       if (err < 0)
+               goto free;
+
+       err = of_clk_add_hw_provider(bpmp->dev->of_node,
+                                    tegra_bpmp_clk_of_xlate,
+                                    bpmp);
+       if (err < 0) {
+               tegra_bpmp_unregister_clocks(bpmp);
+               goto free;
+       }
+
+free:
+       kfree(clocks);
+       return err;
+}
index b4e5de16e561e05a3d2e9ff41d38beb52f48555f..6bb87784a0d6ef82c411bf7ba566412a4059dd20 100644 (file)
@@ -140,6 +140,35 @@ static bool _is_valid_div(struct clk_divider *divider, unsigned int div)
        return true;
 }
 
+static int _div_round_up(const struct clk_div_table *table,
+                        unsigned long parent_rate, unsigned long rate)
+{
+       const struct clk_div_table *clkt;
+       int up = INT_MAX;
+       int div = DIV_ROUND_UP_ULL((u64)parent_rate, rate);
+
+       for (clkt = table; clkt->div; clkt++) {
+               if (clkt->div == div)
+                       return clkt->div;
+               else if (clkt->div < div)
+                       continue;
+
+               if ((clkt->div - div) < (up - div))
+                       up = clkt->div;
+       }
+
+       return up;
+}
+
+static int _div_round(const struct clk_div_table *table,
+                     unsigned long parent_rate, unsigned long rate)
+{
+       if (!table)
+               return DIV_ROUND_UP(parent_rate, rate);
+
+       return _div_round_up(table, parent_rate, rate);
+}
+
 static int ti_clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
                                  unsigned long *best_parent_rate)
 {
@@ -155,7 +184,7 @@ static int ti_clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 
        if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) {
                parent_rate = *best_parent_rate;
-               bestdiv = DIV_ROUND_UP(parent_rate, rate);
+               bestdiv = _div_round(divider->table, parent_rate, rate);
                bestdiv = bestdiv == 0 ? 1 : bestdiv;
                bestdiv = bestdiv > maxdiv ? maxdiv : bestdiv;
                return bestdiv;
index 0007218ce6a08d8f5874ec7aa1f312d06c87be1d..2cf386347f0c8ba26e0436d5fb6e240ff06289f3 100644 (file)
@@ -90,11 +90,8 @@ static int uniphier_clk_probe(struct platform_device *pdev)
 
                dev_dbg(dev, "register %s (index=%d)\n", p->name, p->idx);
                hw = uniphier_clk_register(dev, regmap, p);
-               if (IS_ERR(hw)) {
-                       dev_err(dev, "failed to register %s (error %ld)\n",
-                               p->name, PTR_ERR(hw));
-                       return PTR_ERR(hw);
-               }
+               if (WARN(IS_ERR(hw), "failed to register %s", p->name))
+                       continue;
 
                if (p->idx >= 0)
                        hw_data->hws[p->idx] = hw;
index 9bff26e0cbb042917fb423c144e4a16353420468..ec11f55594ad0d9ded3eb97ca2d3b2cd1f4e96f6 100644 (file)
@@ -14,7 +14,6 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/regmap.h>
 
index d049316c1c0f20442065f92ce7c2156236edcb73..c8027d909429a352990bc848089e24099e9e9b68 100644 (file)
        UNIPHIER_CLK_FACTOR("sd-200m", -1, "spll", 1, 10),              \
        UNIPHIER_CLK_FACTOR("sd-133m", -1, "spll", 1, 15)
 
+#define UNIPHIER_SLD3_SYS_CLK_NAND(idx)                                        \
+       UNIPHIER_CLK_GATE("nand", (idx), NULL, 0x2104, 2)
+
+#define UNIPHIER_LD11_SYS_CLK_NAND(idx)                                        \
+       UNIPHIER_CLK_GATE("nand", (idx), NULL, 0x210c, 0)
+
+#define UNIPHIER_LD11_SYS_CLK_EMMC(idx)                                        \
+       UNIPHIER_CLK_GATE("emmc", (idx), NULL, 0x210c, 2)
+
 #define UNIPHIER_SLD3_SYS_CLK_STDMAC(idx)                              \
        UNIPHIER_CLK_GATE("stdmac", (idx), NULL, 0x2104, 10)
 
@@ -48,6 +57,7 @@ const struct uniphier_clk_data uniphier_sld3_sys_clk_data[] = {
        UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 5625, 512),   /* 270 MHz */
        UNIPHIER_CLK_FACTOR("uart", 0, "a2pll", 1, 16),
        UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 16),
+       UNIPHIER_SLD3_SYS_CLK_NAND(2),
        UNIPHIER_SLD3_SYS_CLK_SD,
        UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
        UNIPHIER_SLD3_SYS_CLK_STDMAC(8),
@@ -61,6 +71,7 @@ const struct uniphier_clk_data uniphier_ld4_sys_clk_data[] = {
        UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 5625, 512),   /* 270 MHz */
        UNIPHIER_CLK_FACTOR("uart", 0, "a2pll", 1, 16),
        UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 16),
+       UNIPHIER_SLD3_SYS_CLK_NAND(2),
        UNIPHIER_SLD3_SYS_CLK_SD,
        UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
        UNIPHIER_SLD3_SYS_CLK_STDMAC(8),                /* Ether, HSC, MIO */
@@ -74,6 +85,7 @@ const struct uniphier_clk_data uniphier_pro4_sys_clk_data[] = {
        UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 270, 25),     /* 270 MHz */
        UNIPHIER_CLK_FACTOR("uart", 0, "a2pll", 1, 8),
        UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 32),
+       UNIPHIER_SLD3_SYS_CLK_NAND(2),
        UNIPHIER_SLD3_SYS_CLK_SD,
        UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
        UNIPHIER_SLD3_SYS_CLK_STDMAC(8),                /* HSC, MIO, RLE */
@@ -89,6 +101,7 @@ const struct uniphier_clk_data uniphier_sld8_sys_clk_data[] = {
        UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 270, 25),     /* 270 MHz */
        UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 20),
        UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 16),
+       UNIPHIER_SLD3_SYS_CLK_NAND(2),
        UNIPHIER_SLD3_SYS_CLK_SD,
        UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
        UNIPHIER_SLD3_SYS_CLK_STDMAC(8),                /* Ether, HSC, MIO */
@@ -101,6 +114,7 @@ const struct uniphier_clk_data uniphier_pro5_sys_clk_data[] = {
        UNIPHIER_CLK_FACTOR("dapll2", -1, "ref", 144, 125),     /* 2949.12 MHz */
        UNIPHIER_CLK_FACTOR("uart", 0, "dapll2", 1, 40),
        UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 48),
+       UNIPHIER_SLD3_SYS_CLK_NAND(2),
        UNIPHIER_PRO5_SYS_CLK_SD,
        UNIPHIER_SLD3_SYS_CLK_STDMAC(8),                        /* HSC */
        UNIPHIER_PRO4_SYS_CLK_GIO(12),                          /* PCIe, USB3 */
@@ -113,6 +127,7 @@ const struct uniphier_clk_data uniphier_pxs2_sys_clk_data[] = {
        UNIPHIER_CLK_FACTOR("spll", -1, "ref", 96, 1),          /* 2400 MHz */
        UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 27),
        UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 48),
+       UNIPHIER_SLD3_SYS_CLK_NAND(2),
        UNIPHIER_PRO5_SYS_CLK_SD,
        UNIPHIER_SLD3_SYS_CLK_STDMAC(8),                        /* HSC, RLE */
        /* GIO is always clock-enabled: no function for 0x2104 bit6 */
@@ -131,6 +146,9 @@ const struct uniphier_clk_data uniphier_ld11_sys_clk_data[] = {
        UNIPHIER_CLK_FACTOR("vspll", -1, "ref", 80, 1),         /* 2000 MHz */
        UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 34),
        UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 40),
+       UNIPHIER_LD11_SYS_CLK_NAND(2),
+       UNIPHIER_LD11_SYS_CLK_EMMC(4),
+       /* Index 5 reserved for eMMC PHY */
        UNIPHIER_LD11_SYS_CLK_STDMAC(8),                        /* HSC, MIO */
        UNIPHIER_CLK_FACTOR("usb2", -1, "ref", 24, 25),
        /* CPU gears */
@@ -156,6 +174,9 @@ const struct uniphier_clk_data uniphier_ld20_sys_clk_data[] = {
        UNIPHIER_CLK_FACTOR("vppll", -1, "ref", 504, 5),        /* 2520 MHz */
        UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 34),
        UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 40),
+       UNIPHIER_LD11_SYS_CLK_NAND(2),
+       UNIPHIER_LD11_SYS_CLK_EMMC(4),
+       /* Index 5 reserved for eMMC PHY */
        UNIPHIER_LD20_SYS_CLK_SD,
        UNIPHIER_LD11_SYS_CLK_STDMAC(8),                        /* HSC */
        /* GIO is always clock-enabled: no function for 0x210c bit5 */
index a07c31e6f26d2f926add57cc63520dceef610c3d..2257d12ba988c54b7969c3aee1f99473d908d98b 100644 (file)
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/abx500/ab8500.h>
 #include <linux/mfd/abx500/ab8500-sysctrl.h>
 #include <linux/clkdev.h>
 #include <linux/clk-provider.h>
-#include <linux/mfd/dbx500-prcmu.h>
+#include <dt-bindings/clock/ste-ab8500.h>
 #include "clk.h"
 
+#define AB8500_NUM_CLKS 6
+
+static struct clk *ab8500_clks[AB8500_NUM_CLKS];
+static struct clk_onecell_data ab8500_clk_data;
+
 /* Clock definitions for ab8500 */
 static int ab8500_reg_clks(struct device *dev)
 {
        int ret;
        struct clk *clk;
-
+       struct device_node *np = dev->of_node;
        const char *intclk_parents[] = {"ab8500_sysclk", "ulpclk"};
        u16 intclk_reg_sel[] = {0 , AB8500_SYSULPCLKCTRL1};
        u8 intclk_reg_mask[] = {0 , AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_MASK};
@@ -32,55 +38,52 @@ static int ab8500_reg_clks(struct device *dev)
                (1 << AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_SHIFT)
        };
 
-       dev_info(dev, "register clocks for ab850x\n");
-
        /* Enable SWAT */
        ret = ab8500_sysctrl_set(AB8500_SWATCTRL, AB8500_SWATCTRL_SWATENABLE);
        if (ret)
                return ret;
 
-       /* ab8500_sysclk */
-       clk = clk_reg_prcmu_gate("ab8500_sysclk", NULL, PRCMU_SYSCLK, 0);
-       clk_register_clkdev(clk, "sysclk", "ab8500-usb.0");
-       clk_register_clkdev(clk, "sysclk", "ab-iddet.0");
-       clk_register_clkdev(clk, "sysclk", "snd-soc-mop500.0");
-       clk_register_clkdev(clk, "sysclk", "shrm_bus");
-
        /* ab8500_sysclk2 */
        clk = clk_reg_sysctrl_gate(dev , "ab8500_sysclk2", "ab8500_sysclk",
                AB8500_SYSULPCLKCTRL1, AB8500_SYSULPCLKCTRL1_SYSCLKBUF2REQ,
                AB8500_SYSULPCLKCTRL1_SYSCLKBUF2REQ, 0, 0);
-       clk_register_clkdev(clk, "sysclk", "0-0070");
+       ab8500_clks[AB8500_SYSCLK_BUF2] = clk;
 
        /* ab8500_sysclk3 */
        clk = clk_reg_sysctrl_gate(dev , "ab8500_sysclk3", "ab8500_sysclk",
                AB8500_SYSULPCLKCTRL1, AB8500_SYSULPCLKCTRL1_SYSCLKBUF3REQ,
                AB8500_SYSULPCLKCTRL1_SYSCLKBUF3REQ, 0, 0);
-       clk_register_clkdev(clk, "sysclk", "cg1960_core.0");
+       ab8500_clks[AB8500_SYSCLK_BUF3] = clk;
 
        /* ab8500_sysclk4 */
        clk = clk_reg_sysctrl_gate(dev , "ab8500_sysclk4", "ab8500_sysclk",
                AB8500_SYSULPCLKCTRL1, AB8500_SYSULPCLKCTRL1_SYSCLKBUF4REQ,
                AB8500_SYSULPCLKCTRL1_SYSCLKBUF4REQ, 0, 0);
+       ab8500_clks[AB8500_SYSCLK_BUF4] = clk;
 
        /* ab_ulpclk */
        clk = clk_reg_sysctrl_gate_fixed_rate(dev, "ulpclk", NULL,
                AB8500_SYSULPCLKCTRL1, AB8500_SYSULPCLKCTRL1_ULPCLKREQ,
                AB8500_SYSULPCLKCTRL1_ULPCLKREQ,
                38400000, 9000, 0);
-       clk_register_clkdev(clk, "ulpclk", "snd-soc-mop500.0");
+       ab8500_clks[AB8500_SYSCLK_ULP] = clk;
 
        /* ab8500_intclk */
        clk = clk_reg_sysctrl_set_parent(dev , "intclk", intclk_parents, 2,
                intclk_reg_sel, intclk_reg_mask, intclk_reg_bits, 0);
-       clk_register_clkdev(clk, "intclk", "snd-soc-mop500.0");
-       clk_register_clkdev(clk, NULL, "ab8500-pwm.1");
+       ab8500_clks[AB8500_SYSCLK_INT] = clk;
 
        /* ab8500_audioclk */
        clk = clk_reg_sysctrl_gate(dev , "audioclk", "intclk",
                AB8500_SYSULPCLKCTRL1, AB8500_SYSULPCLKCTRL1_AUDIOCLKENA,
                AB8500_SYSULPCLKCTRL1_AUDIOCLKENA, 0, 0);
-       clk_register_clkdev(clk, "audioclk", "ab8500-codec.0");
+       ab8500_clks[AB8500_SYSCLK_AUDIO] = clk;
+
+       ab8500_clk_data.clks = ab8500_clks;
+       ab8500_clk_data.clk_num = ARRAY_SIZE(ab8500_clks);
+       of_clk_add_provider(np, of_clk_src_onecell_get, &ab8500_clk_data);
+
+       dev_info(dev, "registered clocks for ab850x\n");
 
        return 0;
 }
@@ -116,9 +119,15 @@ static int abx500_clk_probe(struct platform_device *pdev)
        return ret;
 }
 
+static const struct of_device_id abx500_clk_match[] = {
+       { .compatible = "stericsson,ab8500-clk", },
+       {}
+};
+
 static struct platform_driver abx500_clk_driver = {
        .driver = {
                .name = "abx500-clk",
+               .of_match_table = abx500_clk_match,
        },
        .probe  = abx500_clk_probe,
 };
@@ -127,7 +136,6 @@ static int __init abx500_clk_init(void)
 {
        return platform_driver_register(&abx500_clk_driver);
 }
-
 arch_initcall(abx500_clk_init);
 
 MODULE_AUTHOR("Ulf Hansson <ulf.hansson@linaro.org");
index e960d686d9db81ebf282cc37e09d2fde5b0317c2..d5888591e1a9afdf6c4dc5a9ea42c515548c90b5 100644 (file)
@@ -206,6 +206,9 @@ static void u8500_clk_init(struct device_node *np)
        clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, 0);
        prcmu_clk[PRCMU_TIMCLK] = clk;
 
+       clk = clk_reg_prcmu_gate("ab8500_sysclk", NULL, PRCMU_SYSCLK, 0);
+       prcmu_clk[PRCMU_SYSCLK] = clk;
+
        clk = clk_reg_prcmu_opp_volt_scalable("sdmmcclk", NULL, PRCMU_SDMMCCLK,
                                        100000000, CLK_SET_RATE_GATE);
        prcmu_clk[PRCMU_SDMMCCLK] = clk;
index 04781389d0fb58fc663d8f3d5163e164d8882798..1367afb03858b843e26d265495282237c4887dab 100644 (file)
@@ -1,2 +1,3 @@
 clk-x86-lpss-objs              := clk-lpt.o
 obj-$(CONFIG_X86_INTEL_LPSS)   += clk-x86-lpss.o
+obj-$(CONFIG_PMC_ATOM)         += clk-pmc-atom.o
diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
new file mode 100644 (file)
index 0000000..2b60577
--- /dev/null
@@ -0,0 +1,371 @@
+/*
+ * Intel Atom platform clocks driver for BayTrail and CherryTrail SoCs
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Irina Tirdea <irina.tirdea@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/err.h>
+#include <linux/platform_data/x86/clk-pmc-atom.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define PLT_CLK_NAME_BASE      "pmc_plt_clk"
+
+#define PMC_CLK_CTL_OFFSET             0x60
+#define PMC_CLK_CTL_SIZE               4
+#define PMC_CLK_NUM                    6
+#define PMC_CLK_CTL_GATED_ON_D3                0x0
+#define PMC_CLK_CTL_FORCE_ON           0x1
+#define PMC_CLK_CTL_FORCE_OFF          0x2
+#define PMC_CLK_CTL_RESERVED           0x3
+#define PMC_MASK_CLK_CTL               GENMASK(1, 0)
+#define PMC_MASK_CLK_FREQ              BIT(2)
+#define PMC_CLK_FREQ_XTAL              (0 << 2)        /* 25 MHz */
+#define PMC_CLK_FREQ_PLL               (1 << 2)        /* 19.2 MHz */
+
+struct clk_plt_fixed {
+       struct clk_hw *clk;
+       struct clk_lookup *lookup;
+};
+
+struct clk_plt {
+       struct clk_hw hw;
+       void __iomem *reg;
+       struct clk_lookup *lookup;
+       /* protect access to PMC registers */
+       spinlock_t lock;
+};
+
+#define to_clk_plt(_hw) container_of(_hw, struct clk_plt, hw)
+
+struct clk_plt_data {
+       struct clk_plt_fixed **parents;
+       u8 nparents;
+       struct clk_plt *clks[PMC_CLK_NUM];
+};
+
+/* Return an index in parent table */
+static inline int plt_reg_to_parent(int reg)
+{
+       switch (reg & PMC_MASK_CLK_FREQ) {
+       default:
+       case PMC_CLK_FREQ_XTAL:
+               return 0;
+       case PMC_CLK_FREQ_PLL:
+               return 1;
+       }
+}
+
+/* Return clk index of parent */
+static inline int plt_parent_to_reg(int index)
+{
+       switch (index) {
+       default:
+       case 0:
+               return PMC_CLK_FREQ_XTAL;
+       case 1:
+               return PMC_CLK_FREQ_PLL;
+       }
+}
+
+/* Abstract status in simpler enabled/disabled value */
+static inline int plt_reg_to_enabled(int reg)
+{
+       switch (reg & PMC_MASK_CLK_CTL) {
+       case PMC_CLK_CTL_GATED_ON_D3:
+       case PMC_CLK_CTL_FORCE_ON:
+               return 1;       /* enabled */
+       case PMC_CLK_CTL_FORCE_OFF:
+       case PMC_CLK_CTL_RESERVED:
+       default:
+               return 0;       /* disabled */
+       }
+}
+
+static void plt_clk_reg_update(struct clk_plt *clk, u32 mask, u32 val)
+{
+       u32 tmp;
+       unsigned long flags;
+
+       spin_lock_irqsave(&clk->lock, flags);
+
+       tmp = readl(clk->reg);
+       tmp = (tmp & ~mask) | (val & mask);
+       writel(tmp, clk->reg);
+
+       spin_unlock_irqrestore(&clk->lock, flags);
+}
+
+static int plt_clk_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct clk_plt *clk = to_clk_plt(hw);
+
+       plt_clk_reg_update(clk, PMC_MASK_CLK_FREQ, plt_parent_to_reg(index));
+
+       return 0;
+}
+
+static u8 plt_clk_get_parent(struct clk_hw *hw)
+{
+       struct clk_plt *clk = to_clk_plt(hw);
+       u32 value;
+
+       value = readl(clk->reg);
+
+       return plt_reg_to_parent(value);
+}
+
+static int plt_clk_enable(struct clk_hw *hw)
+{
+       struct clk_plt *clk = to_clk_plt(hw);
+
+       plt_clk_reg_update(clk, PMC_MASK_CLK_CTL, PMC_CLK_CTL_FORCE_ON);
+
+       return 0;
+}
+
+static void plt_clk_disable(struct clk_hw *hw)
+{
+       struct clk_plt *clk = to_clk_plt(hw);
+
+       plt_clk_reg_update(clk, PMC_MASK_CLK_CTL, PMC_CLK_CTL_FORCE_OFF);
+}
+
+static int plt_clk_is_enabled(struct clk_hw *hw)
+{
+       struct clk_plt *clk = to_clk_plt(hw);
+       u32 value;
+
+       value = readl(clk->reg);
+
+       return plt_reg_to_enabled(value);
+}
+
+static const struct clk_ops plt_clk_ops = {
+       .enable = plt_clk_enable,
+       .disable = plt_clk_disable,
+       .is_enabled = plt_clk_is_enabled,
+       .get_parent = plt_clk_get_parent,
+       .set_parent = plt_clk_set_parent,
+       .determine_rate = __clk_mux_determine_rate,
+};
+
+static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
+                                       void __iomem *base,
+                                       const char **parent_names,
+                                       int num_parents)
+{
+       struct clk_plt *pclk;
+       struct clk_init_data init;
+       int ret;
+
+       pclk = devm_kzalloc(&pdev->dev, sizeof(*pclk), GFP_KERNEL);
+       if (!pclk)
+               return ERR_PTR(-ENOMEM);
+
+       init.name =  kasprintf(GFP_KERNEL, "%s_%d", PLT_CLK_NAME_BASE, id);
+       init.ops = &plt_clk_ops;
+       init.flags = 0;
+       init.parent_names = parent_names;
+       init.num_parents = num_parents;
+
+       pclk->hw.init = &init;
+       pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
+       spin_lock_init(&pclk->lock);
+
+       ret = devm_clk_hw_register(&pdev->dev, &pclk->hw);
+       if (ret) {
+               pclk = ERR_PTR(ret);
+               goto err_free_init;
+       }
+
+       pclk->lookup = clkdev_hw_create(&pclk->hw, init.name, NULL);
+       if (!pclk->lookup) {
+               pclk = ERR_PTR(-ENOMEM);
+               goto err_free_init;
+       }
+
+err_free_init:
+       kfree(init.name);
+       return pclk;
+}
+
+static void plt_clk_unregister(struct clk_plt *pclk)
+{
+       clkdev_drop(pclk->lookup);
+}
+
+static struct clk_plt_fixed *plt_clk_register_fixed_rate(struct platform_device *pdev,
+                                                const char *name,
+                                                const char *parent_name,
+                                                unsigned long fixed_rate)
+{
+       struct clk_plt_fixed *pclk;
+
+       pclk = devm_kzalloc(&pdev->dev, sizeof(*pclk), GFP_KERNEL);
+       if (!pclk)
+               return ERR_PTR(-ENOMEM);
+
+       pclk->clk = clk_hw_register_fixed_rate(&pdev->dev, name, parent_name,
+                                              0, fixed_rate);
+       if (IS_ERR(pclk->clk))
+               return ERR_CAST(pclk->clk);
+
+       pclk->lookup = clkdev_hw_create(pclk->clk, name, NULL);
+       if (!pclk->lookup) {
+               clk_hw_unregister_fixed_rate(pclk->clk);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       return pclk;
+}
+
+static void plt_clk_unregister_fixed_rate(struct clk_plt_fixed *pclk)
+{
+       clkdev_drop(pclk->lookup);
+       clk_hw_unregister_fixed_rate(pclk->clk);
+}
+
+static void plt_clk_unregister_fixed_rate_loop(struct clk_plt_data *data,
+                                              unsigned int i)
+{
+       while (i--)
+               plt_clk_unregister_fixed_rate(data->parents[i]);
+}
+
+static void plt_clk_free_parent_names_loop(const char **parent_names,
+                                          unsigned int i)
+{
+       while (i--)
+               kfree_const(parent_names[i]);
+       kfree(parent_names);
+}
+
+static void plt_clk_unregister_loop(struct clk_plt_data *data,
+                                   unsigned int i)
+{
+       while (i--)
+               plt_clk_unregister(data->clks[i]);
+}
+
+static const char **plt_clk_register_parents(struct platform_device *pdev,
+                                            struct clk_plt_data *data,
+                                            const struct pmc_clk *clks)
+{
+       const char **parent_names;
+       unsigned int i;
+       int err;
+       int nparents = 0;
+
+       data->nparents = 0;
+       while (clks[nparents].name)
+               nparents++;
+
+       data->parents = devm_kcalloc(&pdev->dev, nparents,
+                                    sizeof(*data->parents), GFP_KERNEL);
+       if (!data->parents)
+               return ERR_PTR(-ENOMEM);
+
+       parent_names = kcalloc(nparents, sizeof(*parent_names),
+                              GFP_KERNEL);
+       if (!parent_names)
+               return ERR_PTR(-ENOMEM);
+
+       for (i = 0; i < nparents; i++) {
+               data->parents[i] =
+                       plt_clk_register_fixed_rate(pdev, clks[i].name,
+                                                   clks[i].parent_name,
+                                                   clks[i].freq);
+               if (IS_ERR(data->parents[i])) {
+                       err = PTR_ERR(data->parents[i]);
+                       goto err_unreg;
+               }
+               parent_names[i] = kstrdup_const(clks[i].name, GFP_KERNEL);
+       }
+
+       data->nparents = nparents;
+       return parent_names;
+
+err_unreg:
+       plt_clk_unregister_fixed_rate_loop(data, i);
+       plt_clk_free_parent_names_loop(parent_names, i);
+       return ERR_PTR(err);
+}
+
+static void plt_clk_unregister_parents(struct clk_plt_data *data)
+{
+       plt_clk_unregister_fixed_rate_loop(data, data->nparents);
+}
+
+static int plt_clk_probe(struct platform_device *pdev)
+{
+       const struct pmc_clk_data *pmc_data;
+       const char **parent_names;
+       struct clk_plt_data *data;
+       unsigned int i;
+       int err;
+
+       pmc_data = dev_get_platdata(&pdev->dev);
+       if (!pmc_data || !pmc_data->clks)
+               return -EINVAL;
+
+       data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       parent_names = plt_clk_register_parents(pdev, data, pmc_data->clks);
+       if (IS_ERR(parent_names))
+               return PTR_ERR(parent_names);
+
+       for (i = 0; i < PMC_CLK_NUM; i++) {
+               data->clks[i] = plt_clk_register(pdev, i, pmc_data->base,
+                                                parent_names, data->nparents);
+               if (IS_ERR(data->clks[i])) {
+                       err = PTR_ERR(data->clks[i]);
+                       goto err_unreg_clk_plt;
+               }
+       }
+
+       plt_clk_free_parent_names_loop(parent_names, data->nparents);
+
+       platform_set_drvdata(pdev, data);
+       return 0;
+
+err_unreg_clk_plt:
+       plt_clk_unregister_loop(data, i);
+       plt_clk_unregister_parents(data);
+       plt_clk_free_parent_names_loop(parent_names, data->nparents);
+       return err;
+}
+
+static int plt_clk_remove(struct platform_device *pdev)
+{
+       struct clk_plt_data *data;
+
+       data = platform_get_drvdata(pdev);
+
+       plt_clk_unregister_loop(data, PMC_CLK_NUM);
+       plt_clk_unregister_parents(data);
+       return 0;
+}
+
+static struct platform_driver plt_clk_driver = {
+       .driver = {
+               .name = "clk-pmc-atom",
+       },
+       .probe = plt_clk_probe,
+       .remove = plt_clk_remove,
+};
+builtin_platform_driver(plt_clk_driver);
index 707d62956e9b50ac7a9b9a85cb5997e48f92a86f..2f7c668643fe2ca9aaa693d91eba55f579e42c67 100644 (file)
@@ -610,9 +610,12 @@ static int __init top_clocks_init(struct device_node *np)
                }
        }
 
-       if (of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &top_hw_onecell_data))
-               panic("could not register clk provider\n");
-       pr_info("top clk init over, nr:%d\n", TOP_NR_CLKS);
+       ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
+                                    &top_hw_onecell_data);
+       if (ret) {
+               pr_err("failed to register top clk provider: %d\n", ret);
+               return ret;
+       }
 
        return 0;
 }
@@ -776,9 +779,12 @@ static int __init lsp0_clocks_init(struct device_node *np)
                }
        }
 
-       if (of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &lsp0_hw_onecell_data))
-               panic("could not register clk provider\n");
-       pr_info("lsp0-clk init over:%d\n", LSP0_NR_CLKS);
+       ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
+                                    &lsp0_hw_onecell_data);
+       if (ret) {
+               pr_err("failed to register lsp0 clk provider: %d\n", ret);
+               return ret;
+       }
 
        return 0;
 }
@@ -881,9 +887,142 @@ static int __init lsp1_clocks_init(struct device_node *np)
                }
        }
 
-       if (of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &lsp1_hw_onecell_data))
-               panic("could not register clk provider\n");
-       pr_info("lsp1-clk init over, nr:%d\n", LSP1_NR_CLKS);
+       ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
+                                    &lsp1_hw_onecell_data);
+       if (ret) {
+               pr_err("failed to register lsp1 clk provider: %d\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+PNAME(audio_wclk_common_p) = {
+       "audio_99m",
+       "audio_24m",
+};
+
+PNAME(audio_timer_p) = {
+       "audio_24m",
+       "audio_32k",
+};
+
+static struct zx_clk_mux audio_mux_clk[] = {
+       MUX(0, "i2s0_wclk_mux", audio_wclk_common_p, AUDIO_I2S0_CLK, 0, 1),
+       MUX(0, "i2s1_wclk_mux", audio_wclk_common_p, AUDIO_I2S1_CLK, 0, 1),
+       MUX(0, "i2s2_wclk_mux", audio_wclk_common_p, AUDIO_I2S2_CLK, 0, 1),
+       MUX(0, "i2s3_wclk_mux", audio_wclk_common_p, AUDIO_I2S3_CLK, 0, 1),
+       MUX(0, "i2c0_wclk_mux", audio_wclk_common_p, AUDIO_I2C0_CLK, 0, 1),
+       MUX(0, "spdif0_wclk_mux", audio_wclk_common_p, AUDIO_SPDIF0_CLK, 0, 1),
+       MUX(0, "spdif1_wclk_mux", audio_wclk_common_p, AUDIO_SPDIF1_CLK, 0, 1),
+       MUX(0, "timer_wclk_mux", audio_timer_p, AUDIO_TIMER_CLK, 0, 1),
+};
+
+static struct clk_zx_audio_divider audio_adiv_clk[] = {
+       AUDIO_DIV(0, "i2s0_wclk_div", "i2s0_wclk_mux", AUDIO_I2S0_DIV_CFG1),
+       AUDIO_DIV(0, "i2s1_wclk_div", "i2s1_wclk_mux", AUDIO_I2S1_DIV_CFG1),
+       AUDIO_DIV(0, "i2s2_wclk_div", "i2s2_wclk_mux", AUDIO_I2S2_DIV_CFG1),
+       AUDIO_DIV(0, "i2s3_wclk_div", "i2s3_wclk_mux", AUDIO_I2S3_DIV_CFG1),
+       AUDIO_DIV(0, "spdif0_wclk_div", "spdif0_wclk_mux", AUDIO_SPDIF0_DIV_CFG1),
+       AUDIO_DIV(0, "spdif1_wclk_div", "spdif1_wclk_mux", AUDIO_SPDIF1_DIV_CFG1),
+};
+
+static struct zx_clk_div audio_div_clk[] = {
+       DIV_T(0, "tdm_wclk_div", "audio_16m384", AUDIO_TDM_CLK, 8, 4, 0, common_div_table),
+};
+
+static struct zx_clk_gate audio_gate_clk[] = {
+       GATE(AUDIO_I2S0_WCLK, "i2s0_wclk", "i2s0_wclk_div", AUDIO_I2S0_CLK, 9, CLK_SET_RATE_PARENT, 0),
+       GATE(AUDIO_I2S1_WCLK, "i2s1_wclk", "i2s1_wclk_div", AUDIO_I2S1_CLK, 9, CLK_SET_RATE_PARENT, 0),
+       GATE(AUDIO_I2S2_WCLK, "i2s2_wclk", "i2s2_wclk_div", AUDIO_I2S2_CLK, 9, CLK_SET_RATE_PARENT, 0),
+       GATE(AUDIO_I2S3_WCLK, "i2s3_wclk", "i2s3_wclk_div", AUDIO_I2S3_CLK, 9, CLK_SET_RATE_PARENT, 0),
+       GATE(AUDIO_I2S0_PCLK, "i2s0_pclk", "clk49m5", AUDIO_I2S0_CLK, 8, 0, 0),
+       GATE(AUDIO_I2S1_PCLK, "i2s1_pclk", "clk49m5", AUDIO_I2S1_CLK, 8, 0, 0),
+       GATE(AUDIO_I2S2_PCLK, "i2s2_pclk", "clk49m5", AUDIO_I2S2_CLK, 8, 0, 0),
+       GATE(AUDIO_I2S3_PCLK, "i2s3_pclk", "clk49m5", AUDIO_I2S3_CLK, 8, 0, 0),
+       GATE(AUDIO_I2C0_WCLK, "i2c0_wclk", "i2c0_wclk_mux", AUDIO_I2C0_CLK, 9, CLK_SET_RATE_PARENT, 0),
+       GATE(AUDIO_SPDIF0_WCLK, "spdif0_wclk", "spdif0_wclk_div", AUDIO_SPDIF0_CLK, 9, CLK_SET_RATE_PARENT, 0),
+       GATE(AUDIO_SPDIF1_WCLK, "spdif1_wclk", "spdif1_wclk_div", AUDIO_SPDIF1_CLK, 9, CLK_SET_RATE_PARENT, 0),
+       GATE(AUDIO_TDM_WCLK, "tdm_wclk", "tdm_wclk_div", AUDIO_TDM_CLK, 17, CLK_SET_RATE_PARENT, 0),
+       GATE(AUDIO_TS_PCLK, "tempsensor_pclk", "clk49m5", AUDIO_TS_CLK, 1, 0, 0),
+};
+
+static struct clk_hw_onecell_data audio_hw_onecell_data = {
+       .num = AUDIO_NR_CLKS,
+       .hws = {
+               [AUDIO_NR_CLKS - 1] = NULL,
+       },
+};
+
+static int __init audio_clocks_init(struct device_node *np)
+{
+       void __iomem *reg_base;
+       int i, ret;
+
+       reg_base = of_iomap(np, 0);
+       if (!reg_base) {
+               pr_err("%s: Unable to map audio clk base\n", __func__);
+               return -ENXIO;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(audio_mux_clk); i++) {
+               if (audio_mux_clk[i].id)
+                       audio_hw_onecell_data.hws[audio_mux_clk[i].id] =
+                                       &audio_mux_clk[i].mux.hw;
+
+               audio_mux_clk[i].mux.reg += (uintptr_t)reg_base;
+               ret = clk_hw_register(NULL, &audio_mux_clk[i].mux.hw);
+               if (ret) {
+                       pr_warn("audio clk %s init error!\n",
+                               audio_mux_clk[i].mux.hw.init->name);
+               }
+       }
+
+       for (i = 0; i < ARRAY_SIZE(audio_adiv_clk); i++) {
+               if (audio_adiv_clk[i].id)
+                       audio_hw_onecell_data.hws[audio_adiv_clk[i].id] =
+                                       &audio_adiv_clk[i].hw;
+
+               audio_adiv_clk[i].reg_base += (uintptr_t)reg_base;
+               ret = clk_hw_register(NULL, &audio_adiv_clk[i].hw);
+               if (ret) {
+                       pr_warn("audio clk %s init error!\n",
+                               audio_adiv_clk[i].hw.init->name);
+               }
+       }
+
+       for (i = 0; i < ARRAY_SIZE(audio_div_clk); i++) {
+               if (audio_div_clk[i].id)
+                       audio_hw_onecell_data.hws[audio_div_clk[i].id] =
+                                       &audio_div_clk[i].div.hw;
+
+               audio_div_clk[i].div.reg += (uintptr_t)reg_base;
+               ret = clk_hw_register(NULL, &audio_div_clk[i].div.hw);
+               if (ret) {
+                       pr_warn("audio clk %s init error!\n",
+                               audio_div_clk[i].div.hw.init->name);
+               }
+       }
+
+       for (i = 0; i < ARRAY_SIZE(audio_gate_clk); i++) {
+               if (audio_gate_clk[i].id)
+                       audio_hw_onecell_data.hws[audio_gate_clk[i].id] =
+                                       &audio_gate_clk[i].gate.hw;
+
+               audio_gate_clk[i].gate.reg += (uintptr_t)reg_base;
+               ret = clk_hw_register(NULL, &audio_gate_clk[i].gate.hw);
+               if (ret) {
+                       pr_warn("audio clk %s init error!\n",
+                               audio_gate_clk[i].gate.hw.init->name);
+               }
+       }
+
+       ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
+                                    &audio_hw_onecell_data);
+       if (ret) {
+               pr_err("failed to register audio clk provider: %d\n", ret);
+               return ret;
+       }
 
        return 0;
 }
@@ -892,6 +1031,7 @@ static const struct of_device_id zx_clkc_match_table[] = {
        { .compatible = "zte,zx296718-topcrm", .data = &top_clocks_init },
        { .compatible = "zte,zx296718-lsp0crm", .data = &lsp0_clocks_init },
        { .compatible = "zte,zx296718-lsp1crm", .data = &lsp1_clocks_init },
+       { .compatible = "zte,zx296718-audiocrm", .data = &audio_clocks_init },
        { }
 };
 
index c4c1251bc1e72a1f5d5b135a27333052b5da3f23..878d879b23ff183b838aa7944d0f8a6b338e389d 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/clk-provider.h>
 #include <linux/err.h>
+#include <linux/gcd.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/slab.h>
@@ -310,3 +311,129 @@ struct clk *clk_register_zx_audio(const char *name,
 
        return clk;
 }
+
+#define CLK_AUDIO_DIV_FRAC     BIT(0)
+#define CLK_AUDIO_DIV_INT      BIT(1)
+#define CLK_AUDIO_DIV_UNCOMMON BIT(1)
+
+#define CLK_AUDIO_DIV_FRAC_NSHIFT      16
+#define CLK_AUDIO_DIV_INT_FRAC_RE      BIT(16)
+#define CLK_AUDIO_DIV_INT_FRAC_MAX     (0xffff)
+#define CLK_AUDIO_DIV_INT_FRAC_MIN     (0x2)
+#define CLK_AUDIO_DIV_INT_INT_SHIFT    24
+#define CLK_AUDIO_DIV_INT_INT_WIDTH    4
+
+struct zx_clk_audio_div_table {
+       unsigned long rate;
+       unsigned int int_reg;
+       unsigned int frac_reg;
+};
+
+#define to_clk_zx_audio_div(_hw) container_of(_hw, struct clk_zx_audio_divider, hw)
+
+static unsigned long audio_calc_rate(struct clk_zx_audio_divider *audio_div,
+                                    u32 reg_frac, u32 reg_int,
+                                    unsigned long parent_rate)
+{
+       unsigned long rate, m, n;
+
+       m = reg_frac & 0xffff;
+       n = (reg_frac >> 16) & 0xffff;
+
+       m = (reg_int & 0xffff) * n + m;
+       rate = (parent_rate * n) / m;
+
+       return rate;
+}
+
+static void audio_calc_reg(struct clk_zx_audio_divider *audio_div,
+                          struct zx_clk_audio_div_table *div_table,
+                          unsigned long rate, unsigned long parent_rate)
+{
+       unsigned int reg_int, reg_frac;
+       unsigned long m, n, div;
+
+       reg_int = parent_rate / rate;
+
+       if (reg_int > CLK_AUDIO_DIV_INT_FRAC_MAX)
+               reg_int = CLK_AUDIO_DIV_INT_FRAC_MAX;
+       else if (reg_int < CLK_AUDIO_DIV_INT_FRAC_MIN)
+               reg_int = 0;
+       m = parent_rate - rate * reg_int;
+       n = rate;
+
+       div = gcd(m, n);
+       m = m / div;
+       n = n / div;
+
+       if ((m >> 16) || (n >> 16)) {
+               if (m > n) {
+                       n = n * 0xffff / m;
+                       m = 0xffff;
+               } else {
+                       m = m * 0xffff / n;
+                       n = 0xffff;
+               }
+       }
+       reg_frac = m | (n << 16);
+
+       div_table->rate = parent_rate * n / (reg_int * n + m);
+       div_table->int_reg = reg_int;
+       div_table->frac_reg = reg_frac;
+}
+
+static unsigned long zx_audio_div_recalc_rate(struct clk_hw *hw,
+                                         unsigned long parent_rate)
+{
+       struct clk_zx_audio_divider *zx_audio_div = to_clk_zx_audio_div(hw);
+       u32 reg_frac, reg_int;
+
+       reg_frac = readl_relaxed(zx_audio_div->reg_base);
+       reg_int = readl_relaxed(zx_audio_div->reg_base + 0x4);
+
+       return audio_calc_rate(zx_audio_div, reg_frac, reg_int, parent_rate);
+}
+
+static long zx_audio_div_round_rate(struct clk_hw *hw, unsigned long rate,
+                               unsigned long *prate)
+{
+       struct clk_zx_audio_divider *zx_audio_div = to_clk_zx_audio_div(hw);
+       struct zx_clk_audio_div_table divt;
+
+       audio_calc_reg(zx_audio_div, &divt, rate, *prate);
+
+       return audio_calc_rate(zx_audio_div, divt.frac_reg, divt.int_reg, *prate);
+}
+
+static int zx_audio_div_set_rate(struct clk_hw *hw, unsigned long rate,
+                                   unsigned long parent_rate)
+{
+       struct clk_zx_audio_divider *zx_audio_div = to_clk_zx_audio_div(hw);
+       struct zx_clk_audio_div_table divt;
+       unsigned int val;
+
+       audio_calc_reg(zx_audio_div, &divt, rate, parent_rate);
+       if (divt.rate != rate)
+               pr_debug("the real rate is:%ld", divt.rate);
+
+       writel_relaxed(divt.frac_reg, zx_audio_div->reg_base);
+
+       val = readl_relaxed(zx_audio_div->reg_base + 0x4);
+       val &= ~0xffff;
+       val |= divt.int_reg | CLK_AUDIO_DIV_INT_FRAC_RE;
+       writel_relaxed(val, zx_audio_div->reg_base + 0x4);
+
+       mdelay(1);
+
+       val = readl_relaxed(zx_audio_div->reg_base + 0x4);
+       val &= ~CLK_AUDIO_DIV_INT_FRAC_RE;
+       writel_relaxed(val, zx_audio_div->reg_base + 0x4);
+
+       return 0;
+}
+
+const struct clk_ops zx_audio_div_ops = {
+       .recalc_rate = zx_audio_div_recalc_rate,
+       .round_rate = zx_audio_div_round_rate,
+       .set_rate = zx_audio_div_set_rate,
+};
index 0df3474b2cf3e7c0f13558fd338db627ee75cc81..84a55a3e2bd440660d91511c69af9d7e75355320 100644 (file)
@@ -153,6 +153,25 @@ struct zx_clk_div {
        .id = _id,                                                      \
 }
 
+struct clk_zx_audio_divider {
+       struct clk_hw                           hw;
+       void __iomem                            *reg_base;
+       unsigned int                            rate_count;
+       spinlock_t                              *lock;
+       u16                                     id;
+};
+
+#define AUDIO_DIV(_id, _name, _parent, _reg)                           \
+{                                                                      \
+       .reg_base       = (void __iomem *) _reg,                        \
+       .lock           = &clk_lock,                                    \
+       .hw.init        = CLK_HW_INIT(_name,                            \
+                                     _parent,                          \
+                                     &zx_audio_div_ops,                \
+                                     0),                               \
+       .id = _id,                                                      \
+}
+
 struct clk *clk_register_zx_pll(const char *name, const char *parent_name,
        unsigned long flags, void __iomem *reg_base,
        const struct zx_pll_config *lookup_table, int count, spinlock_t *lock);
@@ -167,4 +186,6 @@ struct clk *clk_register_zx_audio(const char *name,
                                  unsigned long flags, void __iomem *reg_base);
 
 extern const struct clk_ops zx_pll_ops;
+extern const struct clk_ops zx_audio_div_ops;
+
 #endif
index 2cac445b02fde0f626946d2dd0021322bcef07c4..0b49dbc423e2449545d6a61b6a212454eef64f05 100644 (file)
@@ -62,19 +62,32 @@ config CRYPTO_DEV_GEODE
          will be called geode-aes.
 
 config ZCRYPT
-       tristate "Support for PCI-attached cryptographic adapters"
+       tristate "Support for s390 cryptographic adapters"
        depends on S390
        select HW_RANDOM
        help
-         Select this option if you want to use a PCI-attached cryptographic
-         adapter like:
-         + PCI Cryptographic Accelerator (PCICA)
-         + PCI Cryptographic Coprocessor (PCICC)
+         Select this option if you want to enable support for
+         s390 cryptographic adapters like:
          + PCI-X Cryptographic Coprocessor (PCIXCC)
-         + Crypto Express2 Coprocessor (CEX2C)
-         + Crypto Express2 Accelerator (CEX2A)
-         + Crypto Express3 Coprocessor (CEX3C)
-         + Crypto Express3 Accelerator (CEX3A)
+         + Crypto Express 2,3,4 or 5 Coprocessor (CEXxC)
+         + Crypto Express 2,3,4 or 5 Accelerator (CEXxA)
+         + Crypto Express 4 or 5 EP11 Coprocessor (CEXxP)
+
+config PKEY
+       tristate "Kernel API for protected key handling"
+       depends on S390
+       depends on ZCRYPT
+       help
+         With this option enabled the pkey kernel module provides an API
+         for creation and handling of protected keys. Other parts of the
+         kernel or userspace applications may use these functions.
+
+         Select this option if you want to enable the kernel and userspace
+         API for proteced key handling.
+
+         Please note that creation of protected keys from secure keys
+         requires to have at least one CEX card in coprocessor mode
+         available at runtime.
 
 config CRYPTO_SHA1_S390
        tristate "SHA1 digest algorithm"
@@ -124,6 +137,7 @@ config CRYPTO_AES_S390
        depends on S390
        select CRYPTO_ALGAPI
        select CRYPTO_BLKCIPHER
+       select PKEY
        help
          This is the s390 hardware accelerated implementation of the
          AES cipher algorithms (FIPS-197).
index 579f8263c47940007a6df759105b4f68be19603b..fef39f9f41ee200c5ed7138edae76beaefdff3cb 100644 (file)
@@ -269,7 +269,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
                /*
                 * If the corresponding bit is set, then it means the state
                 * handle was initialized by us, and thus it needs to be
-                * deintialized as well
+                * deinitialized as well
                 */
                if ((1 << sh_idx) & state_handle_mask) {
                        /*
old mode 100755 (executable)
new mode 100644 (file)
index 18e9875f627711b6970bece1a6270901dc6a1497..b75c77254fdb56dee9e66da6a254688a5529f2d4 100644 (file)
@@ -419,8 +419,7 @@ static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
        return -1;
 }
 
-static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
-               struct vm_fault *vmf)
+static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 {
        struct device *dev = &dax_dev->dev;
        struct dax_region *dax_region;
@@ -428,7 +427,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
        phys_addr_t phys;
        pfn_t pfn;
 
-       if (check_vma(dax_dev, vma, __func__))
+       if (check_vma(dax_dev, vmf->vma, __func__))
                return VM_FAULT_SIGBUS;
 
        dax_region = dax_dev->region;
@@ -446,7 +445,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
 
        pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-       rc = vm_insert_mixed(vma, vmf->address, pfn);
+       rc = vm_insert_mixed(vmf->vma, vmf->address, pfn);
 
        if (rc == -ENOMEM)
                return VM_FAULT_OOM;
@@ -456,22 +455,6 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
        return VM_FAULT_NOPAGE;
 }
 
-static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       int rc;
-       struct file *filp = vma->vm_file;
-       struct dax_dev *dax_dev = filp->private_data;
-
-       dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
-                       current->comm, (vmf->flags & FAULT_FLAG_WRITE)
-                       ? "write" : "read", vma->vm_start, vma->vm_end);
-       rcu_read_lock();
-       rc = __dax_dev_fault(dax_dev, vma, vmf);
-       rcu_read_unlock();
-
-       return rc;
-}
-
 static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 {
        unsigned long pmd_addr = vmf->address & PMD_MASK;
@@ -510,7 +493,53 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
                        vmf->flags & FAULT_FLAG_WRITE);
 }
 
-static int dax_dev_pmd_fault(struct vm_fault *vmf)
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
+{
+       unsigned long pud_addr = vmf->address & PUD_MASK;
+       struct device *dev = &dax_dev->dev;
+       struct dax_region *dax_region;
+       phys_addr_t phys;
+       pgoff_t pgoff;
+       pfn_t pfn;
+
+       if (check_vma(dax_dev, vmf->vma, __func__))
+               return VM_FAULT_SIGBUS;
+
+       dax_region = dax_dev->region;
+       if (dax_region->align > PUD_SIZE) {
+               dev_dbg(dev, "%s: alignment > fault size\n", __func__);
+               return VM_FAULT_SIGBUS;
+       }
+
+       /* dax pud mappings require pfn_t_devmap() */
+       if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
+               dev_dbg(dev, "%s: alignment > fault size\n", __func__);
+               return VM_FAULT_SIGBUS;
+       }
+
+       pgoff = linear_page_index(vmf->vma, pud_addr);
+       phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
+       if (phys == -1) {
+               dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+                               pgoff);
+               return VM_FAULT_SIGBUS;
+       }
+
+       pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
+
+       return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, pfn,
+                       vmf->flags & FAULT_FLAG_WRITE);
+}
+#else
+static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
+{
+       return VM_FAULT_FALLBACK;
+}
+#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+
+static int dax_dev_huge_fault(struct vm_fault *vmf,
+               enum page_entry_size pe_size)
 {
        int rc;
        struct file *filp = vmf->vma->vm_file;
@@ -522,15 +551,32 @@ static int dax_dev_pmd_fault(struct vm_fault *vmf)
                        vmf->vma->vm_start, vmf->vma->vm_end);
 
        rcu_read_lock();
-       rc = __dax_dev_pmd_fault(dax_dev, vmf);
+       switch (pe_size) {
+       case PE_SIZE_PTE:
+               rc = __dax_dev_pte_fault(dax_dev, vmf);
+               break;
+       case PE_SIZE_PMD:
+               rc = __dax_dev_pmd_fault(dax_dev, vmf);
+               break;
+       case PE_SIZE_PUD:
+               rc = __dax_dev_pud_fault(dax_dev, vmf);
+               break;
+       default:
+               return VM_FAULT_FALLBACK;
+       }
        rcu_read_unlock();
 
        return rc;
 }
 
+static int dax_dev_fault(struct vm_fault *vmf)
+{
+       return dax_dev_huge_fault(vmf, PE_SIZE_PTE);
+}
+
 static const struct vm_operations_struct dax_dev_vm_ops = {
        .fault = dax_dev_fault,
-       .pmd_fault = dax_dev_pmd_fault,
+       .huge_fault = dax_dev_huge_fault,
 };
 
 static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
index 551a271353d2a49f98221a03af5c235405d50fe5..dea04871b50d1011916307168750a82d05d535f5 100644 (file)
@@ -1228,7 +1228,7 @@ static int __init devfreq_init(void)
 subsys_initcall(devfreq_init);
 
 /*
- * The followings are helper functions for devfreq user device drivers with
+ * The following are helper functions for devfreq user device drivers with
  * OPP framework.
  */
 
index 3e882aa107e837848a45575fd6b980ba2dfa7fa2..eaa355e7d9e460f1896dfdb5a56c5224dbbea389 100644 (file)
@@ -537,7 +537,7 @@ static void rt8973a_init_dev_type(struct rt8973a_muic_info *info)
                regmap_update_bits(info->regmap, reg, mask, val);
        }
 
-       /* Check whether RT8973A is auto swithcing mode or not */
+       /* Check whether RT8973A is auto switching mode or not */
        ret = regmap_read(info->regmap, RT8973A_REG_CONTROL1, &data);
        if (ret) {
                dev_err(info->dev,
index aee149bdf4c0339169d939eed797ef9e6ab521bf..a301fcf46e8821ba7d605582703df532f27f24b5 100644 (file)
@@ -1307,8 +1307,7 @@ static void iso_resource_work(struct work_struct *work)
         */
        if (r->todo == ISO_RES_REALLOC && !success &&
            !client->in_shutdown &&
-           idr_find(&client->resource_idr, r->resource.handle)) {
-               idr_remove(&client->resource_idr, r->resource.handle);
+           idr_remove(&client->resource_idr, r->resource.handle)) {
                client_put(client);
                free = true;
        }
index f9e3aee6a2113031a8df272eacdf024c504ba27e..7c2eed76011e0f1a02b39e4c586027d3ddb5e8f5 100644 (file)
@@ -1068,7 +1068,7 @@ static void fw_device_init(struct work_struct *work)
 
        /*
         * Transition the device to running state.  If it got pulled
-        * out from under us while we did the intialization work, we
+        * out from under us while we did the initialization work, we
         * have to shut down the device again here.  Normally, though,
         * fw_node_event will be responsible for shutting it down when
         * necessary.  We have to use the atomic cmpxchg here to avoid
@@ -1231,7 +1231,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
                        break;
 
                /*
-                * Do minimal intialization of the device here, the
+                * Do minimal initialization of the device here, the
                 * rest will happen in fw_device_init().
                 *
                 * Attention:  A lot of things, even fw_device_get(),
index c02db01f6583e620d885542f37a0da6f1780152e..0218cea6be4d23ff500e17e9372085198e893efa 100644 (file)
@@ -70,10 +70,10 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
        struct amdgpu_bo_list *list;
 
        mutex_lock(&fpriv->bo_list_lock);
-       list = idr_find(&fpriv->bo_list_handles, id);
+       list = idr_remove(&fpriv->bo_list_handles, id);
        if (list) {
+               /* Another user may have a reference to this list still */
                mutex_lock(&list->lock);
-               idr_remove(&fpriv->bo_list_handles, id);
                mutex_unlock(&list->lock);
                amdgpu_bo_list_free(list);
        }
index 400c66ba4c6bdc49ecdf30a3cb9deb9a79dffa71..cf05006713531ce920c62ca105929907b7457fcd 100644 (file)
@@ -135,15 +135,11 @@ static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
        struct amdgpu_ctx *ctx;
 
        mutex_lock(&mgr->lock);
-       ctx = idr_find(&mgr->ctx_handles, id);
-       if (ctx) {
-               idr_remove(&mgr->ctx_handles, id);
+       ctx = idr_remove(&mgr->ctx_handles, id);
+       if (ctx)
                kref_put(&ctx->refcount, amdgpu_ctx_do_release);
-               mutex_unlock(&mgr->lock);
-               return 0;
-       }
        mutex_unlock(&mgr->lock);
-       return -EINVAL;
+       return ctx ? 0 : -EINVAL;
 }
 
 static int amdgpu_ctx_query(struct amdgpu_device *adev,
index ef7c8de7060e2dab64b7f7739b8c8286ea741711..ca5f2aa7232da7e5da0a6f3b19da1af4b00cb0a4 100644 (file)
@@ -262,7 +262,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
         * and because the mmu_notifier_unregister function also drop
         * mm_count we need to take an extra count here.
         */
-       atomic_inc(&p->mm->mm_count);
+       mmgrab(p->mm);
        mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
        mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
 }
index 4a4d3797a6d31e4fbac2283d857a0f21c965698d..181a2c3c636236c4cb5d8616c049f91924d0e970 100644 (file)
 #define HW_ASSISTED_I2C_STATUS_FAILURE     2
 #define HW_ASSISTED_I2C_STATUS_SUCCESS     1
 
-#pragma pack(1)                                       // BIOS data must use byte aligment
+#pragma pack(1)                                       // BIOS data must use byte alignment
 
 // Define offset to location of ROM header.
 #define OFFSET_TO_POINTER_TO_ATOM_ROM_HEADER         0x00000048L
@@ -4361,7 +4361,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
 // GPIO use to control PCIE_VDDC in certain SLT board
 #define PCIE_VDDC_CONTROL_GPIO_PINID        56
 
-//from SMU7.x, if ucGPIO_ID=PP_AC_DC_SWITCH_GPIO_PINID in GPIO_LUTTable, AC/DC swithing feature is enable
+//from SMU7.x, if ucGPIO_ID=PP_AC_DC_SWITCH_GPIO_PINID in GPIO_LUTTable, AC/DC switching feature is enable
 #define PP_AC_DC_SWITCH_GPIO_PINID          60
 //from SMU7.x, if ucGPIO_ID=VDDC_REGULATOR_VRHOT_GPIO_PINID in GPIO_LUTable, VRHot feature is enable
 #define VDDC_VRHOT_GPIO_PINID               61
@@ -9180,7 +9180,7 @@ typedef struct  _ATOM_POWERPLAY_INFO_V3
 
 /*********************************************************************************/
 
-#pragma pack() // BIOS data must use byte aligment
+#pragma pack() // BIOS data must use byte alignment
 
 #pragma pack(1)
 
@@ -9211,7 +9211,7 @@ typedef struct _ATOM_SERVICE_INFO
 
 
 
-#pragma pack() // BIOS data must use byte aligment
+#pragma pack() // BIOS data must use byte alignment
 
 //
 // AMD ACPI Table
index 26129972f68681c91bb6780768bc01d25174e30f..80ed65985af8e55e92b67c21efd19e3d0f6b6394 100644 (file)
@@ -89,7 +89,7 @@ enum phm_platform_caps {
        PHM_PlatformCaps_EnableSideportControl,                 /* indicates Sideport can be controlled */
        PHM_PlatformCaps_VideoPlaybackEEUNotification,          /* indicates EEU notification of video start/stop is required */
        PHM_PlatformCaps_TurnOffPll_ASPML1,                     /* PCIE Turn Off PLL in ASPM L1 */
-       PHM_PlatformCaps_EnableHTLinkControl,                   /* indicates HT Link can be controlled by ACPI or CLMC overrided/automated mode. */
+       PHM_PlatformCaps_EnableHTLinkControl,                   /* indicates HT Link can be controlled by ACPI or CLMC overridden/automated mode. */
        PHM_PlatformCaps_PerformanceStateOnly,                  /* indicates only performance power state to be used on current system. */
        PHM_PlatformCaps_ExclusiveModeAlwaysHigh,               /* In Exclusive (3D) mode always stay in High state. */
        PHM_PlatformCaps_DisableMGClockGating,                  /* to disable Medium Grain Clock Gating or not */
index 560d416deab2d8d34575baaefe2c32ae14f31135..1597458d884e26d7a77e75baae3f8dd2343e4cf1 100644 (file)
 #include <drm/armada_drm.h>
 #include "armada_ioctlP.h"
 
-static int armada_gem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int armada_gem_vm_fault(struct vm_fault *vmf)
 {
-       struct armada_gem_object *obj = drm_to_armada_gem(vma->vm_private_data);
+       struct drm_gem_object *gobj = vmf->vma->vm_private_data;
+       struct armada_gem_object *obj = drm_to_armada_gem(gobj);
        unsigned long pfn = obj->phys_addr >> PAGE_SHIFT;
        int ret;
 
-       pfn += (vmf->address - vma->vm_start) >> PAGE_SHIFT;
-       ret = vm_insert_pfn(vma, vmf->address, pfn);
+       pfn += (vmf->address - vmf->vma->vm_start) >> PAGE_SHIFT;
+       ret = vm_insert_pfn(vmf->vma, vmf->address, pfn);
 
        switch (ret) {
        case 0:
index 93381454bdf7b92d6718f65b02c1a76bb674509b..dc4419ada12c3b1aa907280f5dc09dc35fdd8848 100644 (file)
@@ -220,8 +220,8 @@ drm_connector_detect(struct drm_connector *connector, bool force)
  *    - drm_mode_validate_basic() performs basic sanity checks
  *    - drm_mode_validate_size() filters out modes larger than @maxX and @maxY
  *      (if specified)
- *    - drm_mode_validate_flag() checks the modes againt basic connector
- *      capabilites (interlace_allowed,doublescan_allowed,stereo_allowed)
+ *    - drm_mode_validate_flag() checks the modes against basic connector
+ *      capabilities (interlace_allowed,doublescan_allowed,stereo_allowed)
  *    - the optional &drm_connector_helper_funcs.mode_valid helper can perform
  *      driver and/or hardware specific checks
  *
index bd311c77c25447fdc0d167f47dbdd02a2312072f..1170b3209a1269aff7c1cfa3692b0fe73b413bb8 100644 (file)
@@ -96,8 +96,9 @@ static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma)
  * map, get the page, increment the use count and return it.
  */
 #if IS_ENABLED(CONFIG_AGP)
-static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_file *priv = vma->vm_file->private_data;
        struct drm_device *dev = priv->minor->dev;
        struct drm_local_map *map = NULL;
@@ -168,7 +169,7 @@ vm_fault_error:
        return VM_FAULT_SIGBUS; /* Disallow mremap */
 }
 #else
-static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_fault(struct vm_fault *vmf)
 {
        return VM_FAULT_SIGBUS;
 }
@@ -184,8 +185,9 @@ static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  * Get the mapping, find the real physical page to map, get the page, and
  * return it.
  */
-static int drm_do_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_shm_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_local_map *map = vma->vm_private_data;
        unsigned long offset;
        unsigned long i;
@@ -280,14 +282,14 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
 /**
  * \c fault method for DMA virtual memory.
  *
- * \param vma virtual memory area.
  * \param address access address.
  * \return pointer to the page structure.
  *
  * Determine the page number from the page offset and get it from drm_device_dma::pagelist.
  */
-static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_dma_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_file *priv = vma->vm_file->private_data;
        struct drm_device *dev = priv->minor->dev;
        struct drm_device_dma *dma = dev->dma;
@@ -315,14 +317,14 @@ static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 /**
  * \c fault method for scatter-gather virtual memory.
  *
- * \param vma virtual memory area.
  * \param address access address.
  * \return pointer to the page structure.
  *
  * Determine the map offset from the page offset and get it from drm_sg_mem::pagelist.
  */
-static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_sg_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_local_map *map = vma->vm_private_data;
        struct drm_file *priv = vma->vm_file->private_data;
        struct drm_device *dev = priv->minor->dev;
@@ -347,26 +349,6 @@ static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return 0;
 }
 
-static int drm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       return drm_do_vm_fault(vma, vmf);
-}
-
-static int drm_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       return drm_do_vm_shm_fault(vma, vmf);
-}
-
-static int drm_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       return drm_do_vm_dma_fault(vma, vmf);
-}
-
-static int drm_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       return drm_do_vm_sg_fault(vma, vmf);
-}
-
 /** AGP virtual memory operations */
 static const struct vm_operations_struct drm_vm_ops = {
        .fault = drm_vm_fault,
index c255eda40526f8082d721541d4e6f2e74079fcb4..e41f38667c1c858a6f28d69d692e72160850c48a 100644 (file)
@@ -73,7 +73,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
                struct drm_file *file);
 
 int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int etnaviv_gem_fault(struct vm_fault *vmf);
 int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset);
 struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj);
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj);
index aa6e35ddc87f5b5596a5f318ad7d2c5d7098449c..e78f1406885d102af012979e00a1a7a35ee61017 100644 (file)
@@ -175,8 +175,9 @@ int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma)
        return obj->ops->mmap(obj, vma);
 }
 
-int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int etnaviv_gem_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_gem_object *obj = vma->vm_private_data;
        struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
        struct page **pages, *page;
index 57b81460fec88743816e1f6ccaefcc2c109728af..4c28f7ffcc4dd1a0593e5c37bb878f6481f1ed70 100644 (file)
@@ -447,8 +447,9 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
        return ret;
 }
 
-int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int exynos_drm_gem_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_gem_object *obj = vma->vm_private_data;
        struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
        unsigned long pfn;
index df7c543d6558bdd821b99bc97e2ad01ff5acf471..85457255fcd1aa093ddfb9a8c4e09b5d63720c7f 100644 (file)
@@ -116,7 +116,7 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
                                   uint64_t *offset);
 
 /* page fault handler and mmap fault address(virtual) to physical memory. */
-int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int exynos_drm_gem_fault(struct vm_fault *vmf);
 
 /* set vm_flags and we can change the vm attribute to other one at here. */
 int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
index da42d2e1d3976b46b267d0b6f00fc398402181c3..ffe6b4ffa1a8a75dabb1a6fcb0017d0ccb6c0f99 100644 (file)
@@ -111,8 +111,9 @@ static int psbfb_pan(struct fb_var_screeninfo *var, struct fb_info *info)
         return 0;
 }
 
-static int psbfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int psbfb_vm_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct psb_framebuffer *psbfb = vma->vm_private_data;
        struct drm_device *dev = psbfb->base.dev;
        struct drm_psb_private *dev_priv = dev->dev_private;
index 527c629176603a2369a186b4128785c1c51723ad..7da061aab7299c2b2a38e14fb784c8f233f62468 100644 (file)
@@ -164,8 +164,9 @@ int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
  *     vma->vm_private_data points to the GEM object that is backing this
  *     mapping.
  */
-int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int psb_gem_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_gem_object *obj;
        struct gtt_range *r;
        int ret;
index 05d7aaf47eea7ce0dda783b027133473f38375d8..83e22fd4cfc0111da5a3b5de8a9d3ce05ba7c7f7 100644 (file)
@@ -752,7 +752,7 @@ extern int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
                        struct drm_mode_create_dumb *args);
 extern int psb_gem_dumb_map_gtt(struct drm_file *file, struct drm_device *dev,
                        uint32_t handle, uint64_t *offset);
-extern int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int psb_gem_fault(struct vm_fault *vmf);
 
 /* psb_device.c */
 extern const struct psb_ops psb_chip_ops;
index bcc81912b5e5afa23ec9833e5991e835e202e540..0a4b42d313912c3c5b56a449cfac33e63afeb16e 100644 (file)
@@ -3352,7 +3352,7 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
                                        unsigned int flags);
 int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
-int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int i915_gem_fault(struct vm_fault *vmf);
 int i915_gem_object_wait(struct drm_i915_gem_object *obj,
                         unsigned int flags,
                         long timeout,
index 88f3628b4e29bd92707430932d1900034ac35645..6908123162d17cd998c1e7f0bf54a27064e67588 100644 (file)
@@ -1772,7 +1772,6 @@ compute_partial_view(struct drm_i915_gem_object *obj,
 
 /**
  * i915_gem_fault - fault a page into the GTT
- * @area: CPU VMA in question
  * @vmf: fault info
  *
  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
@@ -1789,9 +1788,10 @@ compute_partial_view(struct drm_i915_gem_object *obj,
  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
  */
-int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
+int i915_gem_fault(struct vm_fault *vmf)
 {
 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
+       struct vm_area_struct *area = vmf->vma;
        struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
        struct drm_device *dev = obj->base.dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
index b42c81b42487aabe43961770772219f6bf0e31f8..7032c542a9b1d007c4bc69e328db040b88efc15d 100644 (file)
@@ -60,7 +60,7 @@ render_state_get_rodata(const struct intel_engine_cs *engine)
  * this is sufficient as the null state generator makes the final batch
  * with two passes to build command and state separately. At this point
  * the size of both are known and it compacts them by relocating the state
- * right after the commands taking care of aligment so we should sufficient
+ * right after the commands taking care of alignment so we should sufficient
  * space below them for adding new commands.
  */
 #define OUT_BATCH(batch, i, val)                               \
index 6a8fa085b74edd91e696c167fff1bd8747042d6f..0115989e324a20c58ef4827719abd43239e78646 100644 (file)
@@ -334,7 +334,7 @@ i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
                mm->i915 = to_i915(obj->base.dev);
 
                mm->mm = current->mm;
-               atomic_inc(&current->mm->mm_count);
+               mmgrab(current->mm);
 
                mm->mn = NULL;
 
@@ -507,7 +507,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
                        flags |= FOLL_WRITE;
 
                ret = -EFAULT;
-               if (atomic_inc_not_zero(&mm->mm_users)) {
+               if (mmget_not_zero(mm)) {
                        down_read(&mm->mmap_sem);
                        while (pinned < npages) {
                                ret = get_user_pages_remote
index d5ce829b31995fa4b63adb3d20517d0662d68a46..45cf363d25ad41021e207db7017c40c6a0c53946 100644 (file)
@@ -266,7 +266,7 @@ do {                                                                        \
 do {                                                                   \
        if (MGA_VERBOSE) {                                              \
                DRM_INFO("BEGIN_DMA(%d)\n", (n));                       \
-               DRM_INFO("   space=0x%x req=0x%Zx\n",                   \
+               DRM_INFO("   space=0x%x req=0x%zx\n",                   \
                         dev_priv->prim.space, (n) * DMA_BLOCK_SIZE);   \
        }                                                               \
        prim = dev_priv->prim.start;                                    \
@@ -313,7 +313,7 @@ do {                                                                        \
 #define DMA_WRITE(offset, val)                                         \
 do {                                                                   \
        if (MGA_VERBOSE)                                                \
-               DRM_INFO("   DMA_WRITE( 0x%08x ) at 0x%04Zx\n",         \
+               DRM_INFO("   DMA_WRITE( 0x%08x ) at 0x%04zx\n",         \
                         (u32)(val), write + (offset) * sizeof(u32));   \
        *(volatile u32 *)(prim + write + (offset) * sizeof(u32)) = val; \
 } while (0)
index cdd7b2f8e977644114db727a3309496ef3ecada1..c3b14876edaa0b2d11e08a9edf7873c7752a674e 100644 (file)
@@ -206,7 +206,7 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev);
 int msm_gem_mmap_obj(struct drm_gem_object *obj,
                        struct vm_area_struct *vma);
 int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int msm_gem_fault(struct vm_fault *vmf);
 uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj);
 int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id,
                uint64_t *iova);
index e140b05af134ae4b92c81991b871d129ea182de0..59811f29607de60f2a22d53f9ec969e62bb39d98 100644 (file)
@@ -191,8 +191,9 @@ int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
        return msm_gem_mmap_obj(vma->vm_private_data, vma);
 }
 
-int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int msm_gem_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_gem_object *obj = vma->vm_private_data;
        struct drm_device *dev = obj->dev;
        struct msm_drm_private *priv = dev->dev_private;
index 36d93ce84a294f867e94a63a87d2e5f799da589e..65977982f15f88114f86ee4b7fc2b4bfe5faeb53 100644 (file)
@@ -188,7 +188,7 @@ int omap_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 int omap_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 int omap_gem_mmap_obj(struct drm_gem_object *obj,
                struct vm_area_struct *vma);
-int omap_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int omap_gem_fault(struct vm_fault *vmf);
 int omap_gem_op_start(struct drm_gem_object *obj, enum omap_gem_op op);
 int omap_gem_op_finish(struct drm_gem_object *obj, enum omap_gem_op op);
 int omap_gem_op_sync(struct drm_gem_object *obj, enum omap_gem_op op);
index 74a9968df421dfdc888443b0c2fd387f8f53dd1d..5d5a9f517c30e0a6a0f17fb0d93fe19eae95bd28 100644 (file)
@@ -518,7 +518,6 @@ static int fault_2d(struct drm_gem_object *obj,
 
 /**
  * omap_gem_fault              -       pagefault handler for GEM objects
- * @vma: the VMA of the GEM object
  * @vmf: fault detail
  *
  * Invoked when a fault occurs on an mmap of a GEM managed area. GEM
@@ -529,8 +528,9 @@ static int fault_2d(struct drm_gem_object *obj,
  * vma->vm_private_data points to the GEM object that is backing this
  * mapping.
  */
-int omap_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int omap_gem_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_gem_object *obj = vma->vm_private_data;
        struct omap_gem_object *omap_obj = to_omap_bo(obj);
        struct drm_device *dev = obj->dev;
index 4e1a40389964fc6416206cf51d2b88c9ed2be6b1..7d1cab57c89ea86a5eb612399d4e871652ad84c0 100644 (file)
@@ -105,15 +105,15 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev)
 static struct vm_operations_struct qxl_ttm_vm_ops;
 static const struct vm_operations_struct *ttm_vm_ops;
 
-static int qxl_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int qxl_ttm_fault(struct vm_fault *vmf)
 {
        struct ttm_buffer_object *bo;
        int r;
 
-       bo = (struct ttm_buffer_object *)vma->vm_private_data;
+       bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
        if (bo == NULL)
                return VM_FAULT_NOPAGE;
-       r = ttm_vm_ops->fault(vma, vmf);
+       r = ttm_vm_ops->fault(vmf);
        return r;
 }
 
index ab89eed9ddd9c47e688c090884c8f2561ff2dc11..4b86e8b450090664db17009d005f106902259f2a 100644 (file)
 #define HW_ASSISTED_I2C_STATUS_FAILURE          2
 #define HW_ASSISTED_I2C_STATUS_SUCCESS          1
 
-#pragma pack(1)                                       /* BIOS data must use byte aligment */
+#pragma pack(1)                                       /* BIOS data must use byte alignment */
 
 /*  Define offset to location of ROM header. */
 
@@ -3883,7 +3883,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
 }ATOM_GPIO_PIN_ASSIGNMENT;
 
 //ucGPIO_ID pre-define id for multiple usage
-//from SMU7.x, if ucGPIO_ID=PP_AC_DC_SWITCH_GPIO_PINID in GPIO_LUTTable, AC/DC swithing feature is enable
+//from SMU7.x, if ucGPIO_ID=PP_AC_DC_SWITCH_GPIO_PINID in GPIO_LUTTable, AC/DC switching feature is enable
 #define PP_AC_DC_SWITCH_GPIO_PINID          60
 //from SMU7.x, if ucGPIO_ID=VDDC_REGULATOR_VRHOT_GPIO_PINID in GPIO_LUTable, VRHot feature is enable
 #define VDDC_VRHOT_GPIO_PINID               61
@@ -7909,7 +7909,7 @@ typedef struct  _ATOM_POWERPLAY_INFO_V3
 
 /*********************************************************************************/
 
-#pragma pack() // BIOS data must use byte aligment
+#pragma pack() // BIOS data must use byte alignment
 
 //
 // AMD ACPI Table
index 7a10b38529702f68cc82c3557fce6fe3e13b00ec..684f1703aa5c7189bc55ca1633aeafd78a97c55a 100644 (file)
@@ -979,19 +979,19 @@ void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
 static struct vm_operations_struct radeon_ttm_vm_ops;
 static const struct vm_operations_struct *ttm_vm_ops = NULL;
 
-static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int radeon_ttm_fault(struct vm_fault *vmf)
 {
        struct ttm_buffer_object *bo;
        struct radeon_device *rdev;
        int r;
 
-       bo = (struct ttm_buffer_object *)vma->vm_private_data;  
+       bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
        if (bo == NULL) {
                return VM_FAULT_NOPAGE;
        }
        rdev = radeon_get_rdev(bo->bdev);
        down_read(&rdev->pm.mclk_lock);
-       r = ttm_vm_ops->fault(vma, vmf);
+       r = ttm_vm_ops->fault(vmf);
        up_read(&rdev->pm.mclk_lock);
        return r;
 }
index b523a5d4a38c6c57dc79ef5a34c31fa459b4589a..17e62ecb5d4d59d009ef019adb07b0335f60a87d 100644 (file)
@@ -441,8 +441,9 @@ int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
        return 0;
 }
 
-static int tegra_bo_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int tegra_bo_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_gem_object *gem = vma->vm_private_data;
        struct tegra_bo *bo = to_tegra_bo(gem);
        struct page *page;
index 88169141bef546e193ac4ed7f6f5ae7f82cba3ab..35ffb3754febc5e2acccfffe0d8c8cc9dc8a8909 100644 (file)
@@ -43,7 +43,6 @@
 #define TTM_BO_VM_NUM_PREFAULT 16
 
 static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
-                               struct vm_area_struct *vma,
                                struct vm_fault *vmf)
 {
        int ret = 0;
@@ -67,7 +66,7 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
                        goto out_unlock;
 
                ttm_bo_reference(bo);
-               up_read(&vma->vm_mm->mmap_sem);
+               up_read(&vmf->vma->vm_mm->mmap_sem);
                (void) dma_fence_wait(bo->moving, true);
                ttm_bo_unreserve(bo);
                ttm_bo_unref(&bo);
@@ -92,8 +91,9 @@ out_unlock:
        return ret;
 }
 
-static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ttm_bo_vm_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
            vma->vm_private_data;
        struct ttm_bo_device *bdev = bo->bdev;
@@ -124,7 +124,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
                        if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
                                ttm_bo_reference(bo);
-                               up_read(&vma->vm_mm->mmap_sem);
+                               up_read(&vmf->vma->vm_mm->mmap_sem);
                                (void) ttm_bo_wait_unreserved(bo);
                                ttm_bo_unref(&bo);
                        }
@@ -168,7 +168,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
         * Wait for buffer data in transit, due to a pipelined
         * move.
         */
-       ret = ttm_bo_vm_fault_idle(bo, vma, vmf);
+       ret = ttm_bo_vm_fault_idle(bo, vmf);
        if (unlikely(ret != 0)) {
                retval = ret;
 
index 6c4286e57362ad5af12790c3c9d7742d5d12417c..2a75ab80527a954990c6ee2305883a97fa2ea41b 100644 (file)
@@ -134,7 +134,7 @@ void udl_gem_put_pages(struct udl_gem_object *obj);
 int udl_gem_vmap(struct udl_gem_object *obj);
 void udl_gem_vunmap(struct udl_gem_object *obj);
 int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int udl_gem_fault(struct vm_fault *vmf);
 
 int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
                      int width, int height);
index 3c0c4bd3f7506f56c165d457ba68841e9f987a57..775c50e4f02cdc7d6b6ef1ddbb35a8d67e63badd 100644 (file)
@@ -100,8 +100,9 @@ int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
        return ret;
 }
 
-int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int udl_gem_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct udl_gem_object *obj = to_udl_bo(vma->vm_private_data);
        struct page *page;
        unsigned int page_offset;
index 477e07f0ecb674e906994595e60a948e79896c30..7ccbb03e98de5db2f04319d831963aacfc9498db 100644 (file)
@@ -50,8 +50,9 @@ static void vgem_gem_free_object(struct drm_gem_object *obj)
        kfree(vgem_obj);
 }
 
-static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int vgem_gem_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct drm_vgem_gem_object *obj = vma->vm_private_data;
        /* We don't use vmf->pgoff since that has the fake offset */
        unsigned long vaddr = vmf->address;
index 9cc7079f7acad025b89beae96c88c47eb45ec9b0..70ec8ca8d9b19cb6fb1a078a2ceaeec309d57523 100644 (file)
@@ -114,18 +114,17 @@ static void virtio_gpu_ttm_global_fini(struct virtio_gpu_device *vgdev)
 static struct vm_operations_struct virtio_gpu_ttm_vm_ops;
 static const struct vm_operations_struct *ttm_vm_ops;
 
-static int virtio_gpu_ttm_fault(struct vm_area_struct *vma,
-                               struct vm_fault *vmf)
+static int virtio_gpu_ttm_fault(struct vm_fault *vmf)
 {
        struct ttm_buffer_object *bo;
        struct virtio_gpu_device *vgdev;
        int r;
 
-       bo = (struct ttm_buffer_object *)vma->vm_private_data;
+       bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
        if (bo == NULL)
                return VM_FAULT_NOPAGE;
        vgdev = virtio_gpu_get_vgdev(bo->bdev);
-       r = ttm_vm_ops->fault(vma, vmf);
+       r = ttm_vm_ops->fault(vmf);
        return r;
 }
 #endif
index 0dd1167b2c9b5f87b3e7ca3c828e38b171c043fb..9c113f62472d8495db054e0aac99014fc6b570d4 100644 (file)
@@ -487,7 +487,7 @@ static __u8 *kye_consumer_control_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize, int offset, const char *device_name) {
        /*
         * the fixup that need to be done:
-        *   - change Usage Maximum in the Comsumer Control
+        *   - change Usage Maximum in the Consumer Control
         *     (report ID 3) to a reasonable value
         */
        if (*rsize >= offset + 31 &&
index 3deef6cc7d7c230dfa3527b5a53b9089239c9de6..7175e6bedf2185876b622d4467bc23e61d75ca21 100644 (file)
@@ -1098,9 +1098,9 @@ static void cs_hsi_stop(struct cs_hsi_iface *hi)
        kfree(hi);
 }
 
-static int cs_char_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int cs_char_vma_fault(struct vm_fault *vmf)
 {
-       struct cs_char *csdata = vma->vm_private_data;
+       struct cs_char *csdata = vmf->vma->vm_private_data;
        struct page *page;
 
        page = virt_to_page(csdata->mmap_base);
index 6dca2fd3d3036d9bf81bff7f794315375ec6ef67..6d1208b2b6d2e1bfff864a7e5facd0ae801af1ac 100644 (file)
@@ -861,7 +861,7 @@ static ssize_t fan1_pulses_store(struct device *dev,
  * (i.e. closed or open-loop).
  *
  * Following documentation about hwmon's sysfs interface, a pwm1_enable node
- * should accept followings:
+ * should accept the following:
  *
  *  0 : no fan speed control (i.e. fan at full speed)
  *  1 : manual fan speed control enabled (use pwm[1-*]) (open-loop)
index e8d55a153a654881d4339712872a87e5b407c372..e88afe1a435cc7fc9be0cb15cf7eee8bfcf8ff6e 100644 (file)
@@ -1188,9 +1188,9 @@ static void msc_mmap_close(struct vm_area_struct *vma)
        mutex_unlock(&msc->buf_mutex);
 }
 
-static int msc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int msc_mmap_fault(struct vm_fault *vmf)
 {
-       struct msc_iter *iter = vma->vm_file->private_data;
+       struct msc_iter *iter = vmf->vma->vm_file->private_data;
        struct msc *msc = iter->msc;
 
        vmf->page = msc_buffer_get_page(msc, vmf->pgoff);
@@ -1198,7 +1198,7 @@ static int msc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
 
        get_page(vmf->page);
-       vmf->page->mapping = vma->vm_file->f_mapping;
+       vmf->page->mapping = vmf->vma->vm_file->f_mapping;
        vmf->page->index = vmf->pgoff;
 
        return 0;
index 0cdc8443deab611d6574ba07b77ddec95ee5cebd..8adc0f1d7ad0ba72240fcae0ff2feb8e7c517456 100644 (file)
@@ -128,6 +128,7 @@ config I2C_I801
            DNV (SOC)
            Broxton (SOC)
            Lewisburg (PCH)
+           Gemini Lake (SOC)
 
          This driver can also be built as a module.  If so, the module
          will be called i2c-i801.
@@ -886,6 +887,16 @@ config I2C_ST
          This driver can also be built as module. If so, the module
          will be called i2c-st.
 
+config I2C_STM32F4
+       tristate "STMicroelectronics STM32F4 I2C support"
+       depends on ARCH_STM32 || COMPILE_TEST
+       help
+         Enable this option to add support for STM32 I2C controller embedded
+         in STM32F4 SoCs.
+
+         This driver can also be built as module. If so, the module
+         will be called i2c-stm32f4.
+
 config I2C_STU300
        tristate "ST Microelectronics DDC I2C interface"
        depends on MACH_U300
@@ -919,6 +930,17 @@ config I2C_TEGRA
          If you say yes to this option, support will be included for the
          I2C controller embedded in NVIDIA Tegra SOCs
 
+config I2C_TEGRA_BPMP
+       tristate "NVIDIA Tegra BPMP I2C controller"
+       depends on TEGRA_BPMP
+       help
+         If you say yes to this option, support will be included for the I2C
+         controller embedded in NVIDIA Tegra SoCs accessed via the BPMP.
+
+         This I2C driver is a 'virtual' I2C driver. The real driver is part
+         of the BPMP firmware, and this driver merely communicates with that
+         real driver.
+
 config I2C_UNIPHIER
        tristate "UniPhier FIFO-less I2C controller"
        depends on ARCH_UNIPHIER || COMPILE_TEST
index 1c1bac87a9db8cb9a4ee35792a52571262051bc8..30b60855fbcd1e353d4dbd687d480a52f4811761 100644 (file)
@@ -85,9 +85,11 @@ obj-$(CONFIG_I2C_SH_MOBILE)  += i2c-sh_mobile.o
 obj-$(CONFIG_I2C_SIMTEC)       += i2c-simtec.o
 obj-$(CONFIG_I2C_SIRF)         += i2c-sirf.o
 obj-$(CONFIG_I2C_ST)           += i2c-st.o
+obj-$(CONFIG_I2C_STM32F4)      += i2c-stm32f4.o
 obj-$(CONFIG_I2C_STU300)       += i2c-stu300.o
 obj-$(CONFIG_I2C_SUN6I_P2WI)   += i2c-sun6i-p2wi.o
 obj-$(CONFIG_I2C_TEGRA)                += i2c-tegra.o
+obj-$(CONFIG_I2C_TEGRA_BPMP)   += i2c-tegra-bpmp.o
 obj-$(CONFIG_I2C_UNIPHIER)     += i2c-uniphier.o
 obj-$(CONFIG_I2C_UNIPHIER_F)   += i2c-uniphier-f.o
 obj-$(CONFIG_I2C_VERSATILE)    += i2c-versatile.o
index 0b86c6173e073806a4d0021a6f98cacfa082566a..fabbb9e49161293c0f17ad8d8998ac016612a34b 100644 (file)
@@ -820,7 +820,7 @@ static u32 at91_twi_func(struct i2c_adapter *adapter)
                | I2C_FUNC_SMBUS_READ_BLOCK_DATA;
 }
 
-static struct i2c_algorithm at91_twi_algorithm = {
+static const struct i2c_algorithm at91_twi_algorithm = {
        .master_xfer    = at91_twi_xfer,
        .functionality  = at91_twi_func,
 };
@@ -1180,6 +1180,7 @@ static int at91_twi_suspend_noirq(struct device *dev)
 
 static int at91_twi_resume_noirq(struct device *dev)
 {
+       struct at91_twi_dev *twi_dev = dev_get_drvdata(dev);
        int ret;
 
        if (!pm_runtime_status_suspended(dev)) {
@@ -1191,6 +1192,8 @@ static int at91_twi_resume_noirq(struct device *dev)
        pm_runtime_mark_last_busy(dev);
        pm_request_autosuspend(dev);
 
+       at91_init_twi_bus(twi_dev);
+
        return 0;
 }
 
index c3436f627028477f7e21b47e079fd5ab06ec188a..cd07a69e2e9355540442785f95e90823b05c9d10 100644 (file)
@@ -195,7 +195,9 @@ static irqreturn_t bcm2835_i2c_isr(int this_irq, void *data)
        }
 
        if (val & BCM2835_I2C_S_DONE) {
-               if (i2c_dev->curr_msg->flags & I2C_M_RD) {
+               if (!i2c_dev->curr_msg) {
+                       dev_err(i2c_dev->dev, "Got unexpected interrupt (from firmware?)\n");
+               } else if (i2c_dev->curr_msg->flags & I2C_M_RD) {
                        bcm2835_drain_rxfifo(i2c_dev);
                        val = bcm2835_i2c_readl(i2c_dev, BCM2835_I2C_S);
                }
index 29d00c4f7824c2b3a9deaa41749567a07eb2c123..9fe942b8c6104713e349436add398b1acabb8eb5 100644 (file)
@@ -563,7 +563,7 @@ static u32 bfin_twi_functionality(struct i2c_adapter *adap)
               I2C_FUNC_I2C | I2C_FUNC_SMBUS_I2C_BLOCK;
 }
 
-static struct i2c_algorithm bfin_twi_algorithm = {
+static const struct i2c_algorithm bfin_twi_algorithm = {
        .master_xfer   = bfin_twi_master_xfer,
        .smbus_xfer    = bfin_twi_smbus_xfer,
        .functionality = bfin_twi_functionality,
index 9b36a7b3befda300bd9a6371f91a7253d173c501..eb76b76f4754edfed8ba4641d854de97c593969e 100644 (file)
@@ -154,8 +154,10 @@ static int ec_i2c_parse_response(const u8 *buf, struct i2c_msg i2c_msgs[],
        resp = (const struct ec_response_i2c_passthru *)buf;
        if (resp->i2c_status & EC_I2C_STATUS_TIMEOUT)
                return -ETIMEDOUT;
+       else if (resp->i2c_status & EC_I2C_STATUS_NAK)
+               return -ENXIO;
        else if (resp->i2c_status & EC_I2C_STATUS_ERROR)
-               return -EREMOTEIO;
+               return -EIO;
 
        /* Other side could send us back fewer messages, but not more */
        if (resp->num_msgs > *num)
@@ -222,10 +224,8 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
        }
 
        result = ec_i2c_parse_response(msg->data, i2c_msgs, &num);
-       if (result < 0) {
-               dev_err(dev, "Error parsing EC i2c message %d\n", result);
+       if (result < 0)
                goto exit;
-       }
 
        /* Indicate success by saying how many messages were sent */
        result = num;
index e9db857c62268c61099cb34a91c8f7ccf3e5284a..7a3faa551cf8511d62ab2f5c82c0fc109980b4f5 100644 (file)
@@ -820,7 +820,7 @@ static u32 i2c_dw_func(struct i2c_adapter *adap)
        return dev->functionality;
 }
 
-static struct i2c_algorithm i2c_dw_algo = {
+static const struct i2c_algorithm i2c_dw_algo = {
        .master_xfer    = i2c_dw_xfer,
        .functionality  = i2c_dw_func,
 };
index 5ce71ce7b6c43b7ce8fd7e7586127650c5e2e508..bdeab0174fec2099b4572bbe9dea6bd9d32514db 100644 (file)
@@ -715,7 +715,7 @@ static u32 pch_i2c_func(struct i2c_adapter *adap)
        return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_10BIT_ADDR;
 }
 
-static struct i2c_algorithm pch_algorithm = {
+static const struct i2c_algorithm pch_algorithm = {
        .master_xfer = pch_i2c_xfer,
        .functionality = pch_i2c_func
 };
index 96bb4e7490128c9e72e21df107cba70752e3f807..3129127088542728e2f2e73993a956f2d589800a 100644 (file)
@@ -347,7 +347,7 @@ static int em_i2c_unreg_slave(struct i2c_client *slave)
        return 0;
 }
 
-static struct i2c_algorithm em_i2c_algo = {
+static const struct i2c_algorithm em_i2c_algo = {
        .master_xfer = em_i2c_xfer,
        .functionality = em_i2c_func,
        .reg_slave      = em_i2c_reg_slave,
index bea607149972d4c3e7f412ff0cd28a1d0df9c6e7..cbd93ce0661f225dd0492baef8e91a255079ac21 100644 (file)
 /* I2C_TRANS_STATUS register bits */
 #define HSI2C_MASTER_BUSY                      (1u << 17)
 #define HSI2C_SLAVE_BUSY                       (1u << 16)
+
+/* I2C_TRANS_STATUS register bits for Exynos5 variant */
 #define HSI2C_TIMEOUT_AUTO                     (1u << 4)
 #define HSI2C_NO_DEV                           (1u << 3)
 #define HSI2C_NO_DEV_ACK                       (1u << 2)
 #define HSI2C_TRANS_ABORT                      (1u << 1)
 #define HSI2C_TRANS_DONE                       (1u << 0)
 
+/* I2C_TRANS_STATUS register bits for Exynos7 variant */
+#define HSI2C_MASTER_ST_MASK                   0xf
+#define HSI2C_MASTER_ST_IDLE                   0x0
+#define HSI2C_MASTER_ST_START                  0x1
+#define HSI2C_MASTER_ST_RESTART                        0x2
+#define HSI2C_MASTER_ST_STOP                   0x3
+#define HSI2C_MASTER_ST_MASTER_ID              0x4
+#define HSI2C_MASTER_ST_ADDR0                  0x5
+#define HSI2C_MASTER_ST_ADDR1                  0x6
+#define HSI2C_MASTER_ST_ADDR2                  0x7
+#define HSI2C_MASTER_ST_ADDR_SR                        0x8
+#define HSI2C_MASTER_ST_READ                   0x9
+#define HSI2C_MASTER_ST_WRITE                  0xa
+#define HSI2C_MASTER_ST_NO_ACK                 0xb
+#define HSI2C_MASTER_ST_LOSE                   0xc
+#define HSI2C_MASTER_ST_WAIT                   0xd
+#define HSI2C_MASTER_ST_WAIT_CMD               0xe
+
 /* I2C_ADDR register bits */
 #define HSI2C_SLV_ADDR_SLV(x)                  ((x & 0x3ff) << 0)
 #define HSI2C_SLV_ADDR_MAS(x)                  ((x & 0x3ff) << 10)
@@ -437,6 +457,7 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
 
        int_status = readl(i2c->regs + HSI2C_INT_STATUS);
        writel(int_status, i2c->regs + HSI2C_INT_STATUS);
+       trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
 
        /* handle interrupt related to the transfer status */
        if (i2c->variant->hw == HSI2C_EXYNOS7) {
@@ -460,8 +481,12 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
                        i2c->state = -ETIMEDOUT;
                        goto stop;
                }
+
+               if ((trans_status & HSI2C_MASTER_ST_MASK) == HSI2C_MASTER_ST_LOSE) {
+                       i2c->state = -EAGAIN;
+                       goto stop;
+               }
        } else if (int_status & HSI2C_INT_I2C) {
-               trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
                if (trans_status & HSI2C_NO_DEV_ACK) {
                        dev_dbg(i2c->dev, "No ACK from device\n");
                        i2c->state = -ENXIO;
@@ -502,8 +527,13 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
                fifo_level = HSI2C_TX_FIFO_LVL(fifo_status);
 
                len = i2c->variant->fifo_depth - fifo_level;
-               if (len > (i2c->msg->len - i2c->msg_ptr))
+               if (len > (i2c->msg->len - i2c->msg_ptr)) {
+                       u32 int_en = readl(i2c->regs + HSI2C_INT_ENABLE);
+
+                       int_en &= ~HSI2C_INT_TX_ALMOSTEMPTY_EN;
+                       writel(int_en, i2c->regs + HSI2C_INT_ENABLE);
                        len = i2c->msg->len - i2c->msg_ptr;
+               }
 
                while (len > 0) {
                        byte = i2c->msg->buf[i2c->msg_ptr++];
index e242db43774bb68fd91ce4a45a3ac73217ce2d7d..6484fa6dbb84a35df7d90df5b595ac5db74ea099 100644 (file)
@@ -65,6 +65,7 @@
  * Lewisburg (PCH)             0xa1a3  32      hard    yes     yes     yes
  * Lewisburg Supersku (PCH)    0xa223  32      hard    yes     yes     yes
  * Kaby Lake PCH-H (PCH)       0xa2a3  32      hard    yes     yes     yes
+ * Gemini Lake (SOC)           0x31d4  32      hard    yes     yes     yes
  *
  * Features supported by this driver:
  * Software PEC                                no
 #define PCI_DEVICE_ID_INTEL_BRASWELL_SMBUS             0x2292
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS             0x2330
 #define PCI_DEVICE_ID_INTEL_COLETOCREEK_SMBUS          0x23b0
+#define PCI_DEVICE_ID_INTEL_GEMINILAKE_SMBUS           0x31d4
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS                0x3b30
 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS              0x5ad4
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_SMBUS            0x8c22
@@ -1012,6 +1014,7 @@ static const struct pci_device_id i801_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS1) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS2) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COLETOCREEK_SMBUS) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GEMINILAKE_SMBUS) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WILDCATPOINT_SMBUS) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_SMBUS) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS) },
index 3310f2e0dbd3b4e8ef2fb0386530c11deba4192f..e86801a631206f9d54f6faba57a6050992d5e027 100644 (file)
@@ -538,7 +538,7 @@ static u32 lpi2c_imx_func(struct i2c_adapter *adapter)
                I2C_FUNC_SMBUS_READ_BLOCK_DATA;
 }
 
-static struct i2c_algorithm lpi2c_imx_algo = {
+static const struct i2c_algorithm lpi2c_imx_algo = {
        .master_xfer    = lpi2c_imx_xfer,
        .functionality  = lpi2c_imx_func,
 };
index 47fc1f1acff7db60a6cf909a43f6d443c04ca91e..95ed17183e73e904e06b13c383bee28410797172 100644 (file)
@@ -1037,7 +1037,7 @@ static u32 i2c_imx_func(struct i2c_adapter *adapter)
                | I2C_FUNC_SMBUS_READ_BLOCK_DATA;
 }
 
-static struct i2c_algorithm i2c_imx_algo = {
+static const struct i2c_algorithm i2c_imx_algo = {
        .master_xfer    = i2c_imx_xfer,
        .functionality  = i2c_imx_func,
 };
index b4dec0841bc27396b723acf218351b0bf5ab2ed0..a50bd6891e27b220e3ddaa93f23dc194b2dd542b 100644 (file)
@@ -977,11 +977,32 @@ mv64xxx_i2c_remove(struct platform_device *dev)
        return 0;
 }
 
+#ifdef CONFIG_PM
+static int mv64xxx_i2c_resume(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct mv64xxx_i2c_data *drv_data = platform_get_drvdata(pdev);
+
+       mv64xxx_i2c_hw_init(drv_data);
+
+       return 0;
+}
+
+static const struct dev_pm_ops mv64xxx_i2c_pm = {
+       .resume = mv64xxx_i2c_resume,
+};
+
+#define mv64xxx_i2c_pm_ops (&mv64xxx_i2c_pm)
+#else
+#define mv64xxx_i2c_pm_ops NULL
+#endif
+
 static struct platform_driver mv64xxx_i2c_driver = {
        .probe  = mv64xxx_i2c_probe,
        .remove = mv64xxx_i2c_remove,
        .driver = {
                .name   = MV64XXX_I2C_CTLR_NAME,
+               .pm     = mv64xxx_i2c_pm_ops,
                .of_match_table = mv64xxx_i2c_of_match_table,
        },
 };
index 374b35e7e450ce55ea97f7438864100c90f68b66..3241bb9d6c186985ae11d38c1e7511b6d4aaea1d 100644 (file)
@@ -296,7 +296,7 @@ static u32 nforce2_func(struct i2c_adapter *adapter)
                I2C_FUNC_SMBUS_BLOCK_DATA : 0);
 }
 
-static struct i2c_algorithm smbus_algorithm = {
+static const struct i2c_algorithm smbus_algorithm = {
        .smbus_xfer     = nforce2_access,
        .functionality  = nforce2_func,
 };
index e160f838c25461f111f89871eac87beebca61a09..aa3c8f4771c1b60538f046526be52a27ce1baca2 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/i2c-smbus.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
-#include <linux/pci.h>
 
 /* Controller command patterns */
 #define SW_TWSI_V              BIT_ULL(63)     /* Valid bit */
@@ -118,9 +117,6 @@ struct octeon_i2c {
        void (*hlc_int_disable)(struct octeon_i2c *);
        atomic_t int_enable_cnt;
        atomic_t hlc_int_enable_cnt;
-#if IS_ENABLED(CONFIG_I2C_THUNDERX)
-       struct msix_entry i2c_msix;
-#endif
        struct i2c_smbus_alert_setup alert_data;
        struct i2c_client *ara;
 };
index c7da0c42baeefa8689693a9b3d237ccaa7dd5db7..1ebb5e947e0b6625fcf0cda7a71f51e79ac29178 100644 (file)
@@ -1504,7 +1504,7 @@ static int omap_i2c_runtime_resume(struct device *dev)
        return 0;
 }
 
-static struct dev_pm_ops omap_i2c_pm_ops = {
+static const struct dev_pm_ops omap_i2c_pm_ops = {
        SET_RUNTIME_PM_OPS(omap_i2c_runtime_suspend,
                           omap_i2c_runtime_resume, NULL)
 };
index 6263ea82d6ac3b10b97905647c1dc6034b78c100..8f11d347b3ec482815e37d3170fa6abef4537c31 100644 (file)
@@ -80,6 +80,7 @@
 #define ICIER_TEIE     0x40
 #define ICIER_RIE      0x20
 #define ICIER_NAKIE    0x10
+#define ICIER_SPIE     0x08
 
 #define ICSR2_NACKF    0x10
 
@@ -216,11 +217,10 @@ static irqreturn_t riic_tend_isr(int irq, void *data)
                return IRQ_NONE;
        }
 
-       if (riic->is_last || riic->err)
+       if (riic->is_last || riic->err) {
+               riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER);
                writeb(ICCR2_SP, riic->base + RIIC_ICCR2);
-
-       writeb(0, riic->base + RIIC_ICIER);
-       complete(&riic->msg_done);
+       }
 
        return IRQ_HANDLED;
 }
@@ -240,13 +240,13 @@ static irqreturn_t riic_rdrf_isr(int irq, void *data)
 
        if (riic->bytes_left == 1) {
                /* STOP must come before we set ACKBT! */
-               if (riic->is_last)
+               if (riic->is_last) {
+                       riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER);
                        writeb(ICCR2_SP, riic->base + RIIC_ICCR2);
+               }
 
                riic_clear_set_bit(riic, 0, ICMR3_ACKBT, RIIC_ICMR3);
 
-               writeb(0, riic->base + RIIC_ICIER);
-               complete(&riic->msg_done);
        } else {
                riic_clear_set_bit(riic, ICMR3_ACKBT, 0, RIIC_ICMR3);
        }
@@ -259,6 +259,21 @@ static irqreturn_t riic_rdrf_isr(int irq, void *data)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t riic_stop_isr(int irq, void *data)
+{
+       struct riic_dev *riic = data;
+
+       /* read back registers to confirm writes have fully propagated */
+       writeb(0, riic->base + RIIC_ICSR2);
+       readb(riic->base + RIIC_ICSR2);
+       writeb(0, riic->base + RIIC_ICIER);
+       readb(riic->base + RIIC_ICIER);
+
+       complete(&riic->msg_done);
+
+       return IRQ_HANDLED;
+}
+
 static u32 riic_func(struct i2c_adapter *adap)
 {
        return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
@@ -326,6 +341,7 @@ static struct riic_irq_desc riic_irqs[] = {
        { .res_num = 0, .isr = riic_tend_isr, .name = "riic-tend" },
        { .res_num = 1, .isr = riic_rdrf_isr, .name = "riic-rdrf" },
        { .res_num = 2, .isr = riic_tdre_isr, .name = "riic-tdre" },
+       { .res_num = 3, .isr = riic_stop_isr, .name = "riic-stop" },
        { .res_num = 5, .isr = riic_tend_isr, .name = "riic-nack" },
 };
 
index 89d8b41b66680f8ab4326ded52168452675d54be..9c0f52b7ff7ec4a04660c4fe2b00d471463531a9 100644 (file)
@@ -117,7 +117,7 @@ static u32 osif_func(struct i2c_adapter *adapter)
        return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
-static struct i2c_algorithm osif_algorithm = {
+static const struct i2c_algorithm osif_algorithm = {
        .master_xfer    = osif_xfer,
        .functionality  = osif_func,
 };
index 3d9ebe6e57166eb1ef240b9cfdaa7b680e2105b7..3d75593487454cfb9ca6b837d43a756a5fbc79c7 100644 (file)
@@ -781,7 +781,7 @@ static u32 sh_mobile_i2c_func(struct i2c_adapter *adapter)
        return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_PROTOCOL_MANGLING;
 }
 
-static struct i2c_algorithm sh_mobile_i2c_algorithm = {
+static const struct i2c_algorithm sh_mobile_i2c_algorithm = {
        .functionality  = sh_mobile_i2c_func,
        .master_xfer    = sh_mobile_i2c_xfer,
 };
index 1371547ce1a3a811dae12cdea2bd0592ec4342f7..1eb9fa82dcfd9261cb2aa67a5fb07c2cce3724de 100644 (file)
@@ -776,7 +776,7 @@ static u32 st_i2c_func(struct i2c_adapter *adap)
        return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
-static struct i2c_algorithm st_i2c_algo = {
+static const struct i2c_algorithm st_i2c_algo = {
        .master_xfer = st_i2c_xfer,
        .functionality = st_i2c_func,
 };
diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c
new file mode 100644 (file)
index 0000000..f9dd7e8
--- /dev/null
@@ -0,0 +1,897 @@
+/*
+ * Driver for STMicroelectronics STM32 I2C controller
+ *
+ * This I2C controller is described in the STM32F429/439 Soc reference manual.
+ * Please see below a link to the documentation:
+ * http://www.st.com/resource/en/reference_manual/DM00031020.pdf
+ *
+ * Copyright (C) M'boumba Cedric Madianga 2016
+ * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *
+ * This driver is based on i2c-st.c
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+/* STM32F4 I2C offset registers */
+#define STM32F4_I2C_CR1                        0x00
+#define STM32F4_I2C_CR2                        0x04
+#define STM32F4_I2C_DR                 0x10
+#define STM32F4_I2C_SR1                        0x14
+#define STM32F4_I2C_SR2                        0x18
+#define STM32F4_I2C_CCR                        0x1C
+#define STM32F4_I2C_TRISE              0x20
+#define STM32F4_I2C_FLTR               0x24
+
+/* STM32F4 I2C control 1*/
+#define STM32F4_I2C_CR1_POS            BIT(11)
+#define STM32F4_I2C_CR1_ACK            BIT(10)
+#define STM32F4_I2C_CR1_STOP           BIT(9)
+#define STM32F4_I2C_CR1_START          BIT(8)
+#define STM32F4_I2C_CR1_PE             BIT(0)
+
+/* STM32F4 I2C control 2 */
+#define STM32F4_I2C_CR2_FREQ_MASK      GENMASK(5, 0)
+#define STM32F4_I2C_CR2_FREQ(n)                ((n) & STM32F4_I2C_CR2_FREQ_MASK)
+#define STM32F4_I2C_CR2_ITBUFEN                BIT(10)
+#define STM32F4_I2C_CR2_ITEVTEN                BIT(9)
+#define STM32F4_I2C_CR2_ITERREN                BIT(8)
+#define STM32F4_I2C_CR2_IRQ_MASK       (STM32F4_I2C_CR2_ITBUFEN | \
+                                        STM32F4_I2C_CR2_ITEVTEN | \
+                                        STM32F4_I2C_CR2_ITERREN)
+
+/* STM32F4 I2C Status 1 */
+#define STM32F4_I2C_SR1_AF             BIT(10)
+#define STM32F4_I2C_SR1_ARLO           BIT(9)
+#define STM32F4_I2C_SR1_BERR           BIT(8)
+#define STM32F4_I2C_SR1_TXE            BIT(7)
+#define STM32F4_I2C_SR1_RXNE           BIT(6)
+#define STM32F4_I2C_SR1_BTF            BIT(2)
+#define STM32F4_I2C_SR1_ADDR           BIT(1)
+#define STM32F4_I2C_SR1_SB             BIT(0)
+#define STM32F4_I2C_SR1_ITEVTEN_MASK   (STM32F4_I2C_SR1_BTF | \
+                                        STM32F4_I2C_SR1_ADDR | \
+                                        STM32F4_I2C_SR1_SB)
+#define STM32F4_I2C_SR1_ITBUFEN_MASK   (STM32F4_I2C_SR1_TXE | \
+                                        STM32F4_I2C_SR1_RXNE)
+#define STM32F4_I2C_SR1_ITERREN_MASK   (STM32F4_I2C_SR1_AF | \
+                                        STM32F4_I2C_SR1_ARLO | \
+                                        STM32F4_I2C_SR1_BERR)
+
+/* STM32F4 I2C Status 2 */
+#define STM32F4_I2C_SR2_BUSY           BIT(1)
+
+/* STM32F4 I2C Control Clock */
+#define STM32F4_I2C_CCR_CCR_MASK       GENMASK(11, 0)
+#define STM32F4_I2C_CCR_CCR(n)         ((n) & STM32F4_I2C_CCR_CCR_MASK)
+#define STM32F4_I2C_CCR_FS             BIT(15)
+#define STM32F4_I2C_CCR_DUTY           BIT(14)
+
+/* STM32F4 I2C Trise */
+#define STM32F4_I2C_TRISE_VALUE_MASK   GENMASK(5, 0)
+#define STM32F4_I2C_TRISE_VALUE(n)     ((n) & STM32F4_I2C_TRISE_VALUE_MASK)
+
+#define STM32F4_I2C_MIN_STANDARD_FREQ  2U
+#define STM32F4_I2C_MIN_FAST_FREQ      6U
+#define STM32F4_I2C_MAX_FREQ           46U
+#define HZ_TO_MHZ                      1000000
+
+enum stm32f4_i2c_speed {
+       STM32F4_I2C_SPEED_STANDARD, /* 100 kHz */
+       STM32F4_I2C_SPEED_FAST, /* 400 kHz */
+       STM32F4_I2C_SPEED_END,
+};
+
+/**
+ * struct stm32f4_i2c_msg - client specific data
+ * @addr: 8-bit slave addr, including r/w bit
+ * @count: number of bytes to be transferred
+ * @buf: data buffer
+ * @result: result of the transfer
+ * @stop: last I2C msg to be sent, i.e. STOP to be generated
+ */
+struct stm32f4_i2c_msg {
+       u8 addr;
+       u32 count;
+       u8 *buf;
+       int result;
+       bool stop;
+};
+
+/**
+ * struct stm32f4_i2c_dev - private data of the controller
+ * @adap: I2C adapter for this controller
+ * @dev: device for this controller
+ * @base: virtual memory area
+ * @complete: completion of I2C message
+ * @clk: hw i2c clock
+ * @speed: I2C clock frequency of the controller. Standard or Fast are supported
+ * @parent_rate: I2C clock parent rate in MHz
+ * @msg: I2C transfer information
+ */
+struct stm32f4_i2c_dev {
+       struct i2c_adapter adap;
+       struct device *dev;
+       void __iomem *base;
+       struct completion complete;
+       struct clk *clk;
+       int speed;
+       int parent_rate;
+       struct stm32f4_i2c_msg msg;
+};
+
+static inline void stm32f4_i2c_set_bits(void __iomem *reg, u32 mask)
+{
+       writel_relaxed(readl_relaxed(reg) | mask, reg);
+}
+
+static inline void stm32f4_i2c_clr_bits(void __iomem *reg, u32 mask)
+{
+       writel_relaxed(readl_relaxed(reg) & ~mask, reg);
+}
+
+static void stm32f4_i2c_disable_irq(struct stm32f4_i2c_dev *i2c_dev)
+{
+       void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+
+       stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR2_IRQ_MASK);
+}
+
+static int stm32f4_i2c_set_periph_clk_freq(struct stm32f4_i2c_dev *i2c_dev)
+{
+       u32 freq;
+       u32 cr2 = 0;
+
+       i2c_dev->parent_rate = clk_get_rate(i2c_dev->clk);
+       freq = DIV_ROUND_UP(i2c_dev->parent_rate, HZ_TO_MHZ);
+
+       if (i2c_dev->speed == STM32F4_I2C_SPEED_STANDARD) {
+               /*
+                * To reach 100 kHz, the parent clk frequency should be between
+                * a minimum value of 2 MHz and a maximum value of 46 MHz due
+                * to hardware limitation
+                */
+               if (freq < STM32F4_I2C_MIN_STANDARD_FREQ ||
+                   freq > STM32F4_I2C_MAX_FREQ) {
+                       dev_err(i2c_dev->dev,
+                               "bad parent clk freq for standard mode\n");
+                       return -EINVAL;
+               }
+       } else {
+               /*
+                * To be as close as possible to 400 kHz, the parent clk
+                * frequency should be between a minimum value of 6 MHz and a
+                * maximum value of 46 MHz due to hardware limitation
+                */
+               if (freq < STM32F4_I2C_MIN_FAST_FREQ ||
+                   freq > STM32F4_I2C_MAX_FREQ) {
+                       dev_err(i2c_dev->dev,
+                               "bad parent clk freq for fast mode\n");
+                       return -EINVAL;
+               }
+       }
+
+       cr2 |= STM32F4_I2C_CR2_FREQ(freq);
+       writel_relaxed(cr2, i2c_dev->base + STM32F4_I2C_CR2);
+
+       return 0;
+}
+
+static void stm32f4_i2c_set_rise_time(struct stm32f4_i2c_dev *i2c_dev)
+{
+       u32 freq = DIV_ROUND_UP(i2c_dev->parent_rate, HZ_TO_MHZ);
+       u32 trise;
+
+       /*
+        * These bits must be programmed with the maximum SCL rise time given in
+        * the I2C bus specification, incremented by 1.
+        *
+        * In standard mode, the maximum allowed SCL rise time is 1000 ns.
+        * If, in the I2C_CR2 register, the value of FREQ[5:0] bits is equal to
+        * 0x08 so period = 125 ns therefore the TRISE[5:0] bits must be
+        * programmed with 0x9. (1000 ns / 125 ns + 1)
+        * So, for I2C standard mode TRISE = FREQ[5:0] + 1
+        *
+        * In fast mode, the maximum allowed SCL rise time is 300 ns.
+        * If, in the I2C_CR2 register, the value of FREQ[5:0] bits is equal to
+        * 0x08 so period = 125 ns therefore the TRISE[5:0] bits must be
+        * programmed with 0x3. (300 ns / 125 ns + 1)
+        * So, for I2C fast mode TRISE = FREQ[5:0] * 300 / 1000 + 1
+        *
+        * Function stm32f4_i2c_set_periph_clk_freq made sure that parent rate
+        * is not higher than 46 MHz . As a result trise is at most 4 bits wide
+        * and so fits into the TRISE bits [5:0].
+        */
+       if (i2c_dev->speed == STM32F4_I2C_SPEED_STANDARD)
+               trise = freq + 1;
+       else
+               trise = freq * 3 / 10 + 1;
+
+       writel_relaxed(STM32F4_I2C_TRISE_VALUE(trise),
+                      i2c_dev->base + STM32F4_I2C_TRISE);
+}
+
+static void stm32f4_i2c_set_speed_mode(struct stm32f4_i2c_dev *i2c_dev)
+{
+       u32 val;
+       u32 ccr = 0;
+
+       if (i2c_dev->speed == STM32F4_I2C_SPEED_STANDARD) {
+               /*
+                * In standard mode:
+                * t_scl_high = t_scl_low = CCR * I2C parent clk period
+                * So to reach 100 kHz, we have:
+                * CCR = I2C parent rate / 100 kHz >> 1
+                *
+                * For example with parent rate = 2 MHz:
+                * CCR = 2000000 / (100000 << 1) = 10
+                * t_scl_high = t_scl_low = 10 * (1 / 2000000) = 5000 ns
+                * t_scl_high + t_scl_low = 10000 ns so 100 kHz is reached
+                *
+                * Function stm32f4_i2c_set_periph_clk_freq made sure that
+                * parent rate is not higher than 46 MHz . As a result val
+                * is at most 8 bits wide and so fits into the CCR bits [11:0].
+                */
+               val = i2c_dev->parent_rate / (100000 << 1);
+       } else {
+               /*
+                * In fast mode, we compute CCR with duty = 0 as with low
+                * frequencies we are not able to reach 400 kHz.
+                * In that case:
+                * t_scl_high = CCR * I2C parent clk period
+                * t_scl_low = 2 * CCR * I2C parent clk period
+                * So, CCR = I2C parent rate / (400 kHz * 3)
+                *
+                * For example with parent rate = 6 MHz:
+                * CCR = 6000000 / (400000 * 3) = 5
+                * t_scl_high = 5 * (1 / 6000000) = 833 ns > 600 ns
+                * t_scl_low = 2 * 5 * (1 / 6000000) = 1667 ns > 1300 ns
+                * t_scl_high + t_scl_low = 2500 ns so 400 kHz is reached
+                *
+                * Function stm32f4_i2c_set_periph_clk_freq made sure that
+                * parent rate is not higher than 46 MHz . As a result val
+                * is at most 6 bits wide and so fits into the CCR bits [11:0].
+                */
+               val = DIV_ROUND_UP(i2c_dev->parent_rate, 400000 * 3);
+
+               /* Select Fast mode */
+               ccr |= STM32F4_I2C_CCR_FS;
+       }
+
+       ccr |= STM32F4_I2C_CCR_CCR(val);
+       writel_relaxed(ccr, i2c_dev->base + STM32F4_I2C_CCR);
+}
+
+/**
+ * stm32f4_i2c_hw_config() - Prepare I2C block
+ * @i2c_dev: Controller's private data
+ */
+static int stm32f4_i2c_hw_config(struct stm32f4_i2c_dev *i2c_dev)
+{
+       int ret;
+
+       ret = stm32f4_i2c_set_periph_clk_freq(i2c_dev);
+       if (ret)
+               return ret;
+
+       stm32f4_i2c_set_rise_time(i2c_dev);
+
+       stm32f4_i2c_set_speed_mode(i2c_dev);
+
+       /* Enable I2C */
+       writel_relaxed(STM32F4_I2C_CR1_PE, i2c_dev->base + STM32F4_I2C_CR1);
+
+       return 0;
+}
+
+static int stm32f4_i2c_wait_free_bus(struct stm32f4_i2c_dev *i2c_dev)
+{
+       u32 status;
+       int ret;
+
+       ret = readl_relaxed_poll_timeout(i2c_dev->base + STM32F4_I2C_SR2,
+                                        status,
+                                        !(status & STM32F4_I2C_SR2_BUSY),
+                                        10, 1000);
+       if (ret) {
+               dev_dbg(i2c_dev->dev, "bus not free\n");
+               ret = -EBUSY;
+       }
+
+       return ret;
+}
+
+/**
+ * stm32f4_i2c_write_ byte() - Write a byte in the data register
+ * @i2c_dev: Controller's private data
+ * @byte: Data to write in the register
+ */
+static void stm32f4_i2c_write_byte(struct stm32f4_i2c_dev *i2c_dev, u8 byte)
+{
+       writel_relaxed(byte, i2c_dev->base + STM32F4_I2C_DR);
+}
+
+/**
+ * stm32f4_i2c_write_msg() - Fill the data register in write mode
+ * @i2c_dev: Controller's private data
+ *
+ * This function fills the data register with I2C transfer buffer
+ */
+static void stm32f4_i2c_write_msg(struct stm32f4_i2c_dev *i2c_dev)
+{
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+
+       stm32f4_i2c_write_byte(i2c_dev, *msg->buf++);
+       msg->count--;
+}
+
+static void stm32f4_i2c_read_msg(struct stm32f4_i2c_dev *i2c_dev)
+{
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+       u32 rbuf;
+
+       rbuf = readl_relaxed(i2c_dev->base + STM32F4_I2C_DR);
+       *msg->buf++ = rbuf;
+       msg->count--;
+}
+
+static void stm32f4_i2c_terminate_xfer(struct stm32f4_i2c_dev *i2c_dev)
+{
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+       void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+
+       stm32f4_i2c_disable_irq(i2c_dev);
+
+       reg = i2c_dev->base + STM32F4_I2C_CR1;
+       if (msg->stop)
+               stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_STOP);
+       else
+               stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_START);
+
+       complete(&i2c_dev->complete);
+}
+
+/**
+ * stm32f4_i2c_handle_write() - Handle FIFO empty interrupt in case of write
+ * @i2c_dev: Controller's private data
+ */
+static void stm32f4_i2c_handle_write(struct stm32f4_i2c_dev *i2c_dev)
+{
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+       void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+
+       if (msg->count) {
+               stm32f4_i2c_write_msg(i2c_dev);
+               if (!msg->count) {
+                       /*
+                        * Disable buffer interrupts for RX not empty and TX
+                        * empty events
+                        */
+                       stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR2_ITBUFEN);
+               }
+       } else {
+               stm32f4_i2c_terminate_xfer(i2c_dev);
+       }
+}
+
+/**
+ * stm32f4_i2c_handle_read() - Handle FIFO empty interrupt in case of read
+ * @i2c_dev: Controller's private data
+ *
+ * This function is called when a new data is received in data register
+ */
+static void stm32f4_i2c_handle_read(struct stm32f4_i2c_dev *i2c_dev)
+{
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+       void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+
+       switch (msg->count) {
+       case 1:
+               stm32f4_i2c_disable_irq(i2c_dev);
+               stm32f4_i2c_read_msg(i2c_dev);
+               complete(&i2c_dev->complete);
+               break;
+       /*
+        * For 2-byte reception, 3-byte reception and for Data N-2, N-1 and N
+        * for N-byte reception with N > 3, we do not have to read the data
+        * register when RX not empty event occurs as we have to wait for byte
+        * transferred finished event before reading data.
+        * So, here we just disable buffer interrupt in order to avoid another
+        * system preemption due to RX not empty event.
+        */
+       case 2:
+       case 3:
+               stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR2_ITBUFEN);
+               break;
+       /*
+        * For N byte reception with N > 3 we directly read data register
+        * until N-2 data.
+        */
+       default:
+               stm32f4_i2c_read_msg(i2c_dev);
+       }
+}
+
+/**
+ * stm32f4_i2c_handle_rx_done() - Handle byte transfer finished interrupt
+ * in case of read
+ * @i2c_dev: Controller's private data
+ *
+ * This function is called when a new data is received in the shift register
+ * but data register has not been read yet.
+ */
+static void stm32f4_i2c_handle_rx_done(struct stm32f4_i2c_dev *i2c_dev)
+{
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+       void __iomem *reg;
+       u32 mask;
+       int i;
+
+       switch (msg->count) {
+       case 2:
+               /*
+                * In order to correctly send the Stop or Repeated Start
+                * condition on the I2C bus, the STOP/START bit has to be set
+                * before reading the last two bytes (data N-1 and N).
+                * After that, we could read the last two bytes, disable
+                * remaining interrupts and notify the end of xfer to the
+                * client
+                */
+               reg = i2c_dev->base + STM32F4_I2C_CR1;
+               if (msg->stop)
+                       stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_STOP);
+               else
+                       stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_START);
+
+               for (i = 2; i > 0; i--)
+                       stm32f4_i2c_read_msg(i2c_dev);
+
+               reg = i2c_dev->base + STM32F4_I2C_CR2;
+               mask = STM32F4_I2C_CR2_ITEVTEN | STM32F4_I2C_CR2_ITERREN;
+               stm32f4_i2c_clr_bits(reg, mask);
+
+               complete(&i2c_dev->complete);
+               break;
+       case 3:
+               /*
+                * In order to correctly generate the NACK pulse after the last
+                * received data byte, we have to enable NACK before reading N-2
+                * data
+                */
+               reg = i2c_dev->base + STM32F4_I2C_CR1;
+               stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR1_ACK);
+               stm32f4_i2c_read_msg(i2c_dev);
+               break;
+       default:
+               stm32f4_i2c_read_msg(i2c_dev);
+       }
+}
+
+/**
+ * stm32f4_i2c_handle_rx_addr() - Handle address matched interrupt in case of
+ * master receiver
+ * @i2c_dev: Controller's private data
+ */
+static void stm32f4_i2c_handle_rx_addr(struct stm32f4_i2c_dev *i2c_dev)
+{
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+       u32 cr1;
+
+       switch (msg->count) {
+       case 0:
+               stm32f4_i2c_terminate_xfer(i2c_dev);
+
+               /* Clear ADDR flag */
+               readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+               break;
+       case 1:
+               /*
+                * Single byte reception:
+                * Enable NACK and reset POS (Acknowledge position).
+                * Then, clear ADDR flag and set STOP or RepSTART.
+                * In that way, the NACK and STOP or RepStart pulses will be
+                * sent as soon as the byte will be received in shift register
+                */
+               cr1 = readl_relaxed(i2c_dev->base + STM32F4_I2C_CR1);
+               cr1 &= ~(STM32F4_I2C_CR1_ACK | STM32F4_I2C_CR1_POS);
+               writel_relaxed(cr1, i2c_dev->base + STM32F4_I2C_CR1);
+
+               readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+
+               if (msg->stop)
+                       cr1 |= STM32F4_I2C_CR1_STOP;
+               else
+                       cr1 |= STM32F4_I2C_CR1_START;
+               writel_relaxed(cr1, i2c_dev->base + STM32F4_I2C_CR1);
+               break;
+       case 2:
+               /*
+                * 2-byte reception:
+                * Enable NACK, set POS (NACK position) and clear ADDR flag.
+                * In that way, NACK will be sent for the next byte which will
+                * be received in the shift register instead of the current
+                * one.
+                */
+               cr1 = readl_relaxed(i2c_dev->base + STM32F4_I2C_CR1);
+               cr1 &= ~STM32F4_I2C_CR1_ACK;
+               cr1 |= STM32F4_I2C_CR1_POS;
+               writel_relaxed(cr1, i2c_dev->base + STM32F4_I2C_CR1);
+
+               readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+               break;
+
+       default:
+               /*
+                * N-byte reception:
+                * Enable ACK, reset POS (ACK postion) and clear ADDR flag.
+                * In that way, ACK will be sent as soon as the current byte
+                * will be received in the shift register
+                */
+               cr1 = readl_relaxed(i2c_dev->base + STM32F4_I2C_CR1);
+               cr1 |= STM32F4_I2C_CR1_ACK;
+               cr1 &= ~STM32F4_I2C_CR1_POS;
+               writel_relaxed(cr1, i2c_dev->base + STM32F4_I2C_CR1);
+
+               readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+               break;
+       }
+}
+
+/**
+ * stm32f4_i2c_isr_event() - Interrupt routine for I2C bus event
+ * @irq: interrupt number
+ * @data: Controller's private data
+ */
+static irqreturn_t stm32f4_i2c_isr_event(int irq, void *data)
+{
+       struct stm32f4_i2c_dev *i2c_dev = data;
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+       u32 possible_status = STM32F4_I2C_SR1_ITEVTEN_MASK;
+       u32 status, ien, event, cr2;
+
+       cr2 = readl_relaxed(i2c_dev->base + STM32F4_I2C_CR2);
+       ien = cr2 & STM32F4_I2C_CR2_IRQ_MASK;
+
+       /* Update possible_status if buffer interrupt is enabled */
+       if (ien & STM32F4_I2C_CR2_ITBUFEN)
+               possible_status |= STM32F4_I2C_SR1_ITBUFEN_MASK;
+
+       status = readl_relaxed(i2c_dev->base + STM32F4_I2C_SR1);
+       event = status & possible_status;
+       if (!event) {
+               dev_dbg(i2c_dev->dev,
+                       "spurious evt irq (status=0x%08x, ien=0x%08x)\n",
+                       status, ien);
+               return IRQ_NONE;
+       }
+
+       /* Start condition generated */
+       if (event & STM32F4_I2C_SR1_SB)
+               stm32f4_i2c_write_byte(i2c_dev, msg->addr);
+
+       /* I2C Address sent */
+       if (event & STM32F4_I2C_SR1_ADDR) {
+               if (msg->addr & I2C_M_RD)
+                       stm32f4_i2c_handle_rx_addr(i2c_dev);
+               else
+                       readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+
+               /*
+                * Enable buffer interrupts for RX not empty and TX empty
+                * events
+                */
+               cr2 |= STM32F4_I2C_CR2_ITBUFEN;
+               writel_relaxed(cr2, i2c_dev->base + STM32F4_I2C_CR2);
+       }
+
+       /* TX empty */
+       if ((event & STM32F4_I2C_SR1_TXE) && !(msg->addr & I2C_M_RD))
+               stm32f4_i2c_handle_write(i2c_dev);
+
+       /* RX not empty */
+       if ((event & STM32F4_I2C_SR1_RXNE) && (msg->addr & I2C_M_RD))
+               stm32f4_i2c_handle_read(i2c_dev);
+
+       /*
+        * The BTF (Byte Transfer finished) event occurs when:
+        * - in reception : a new byte is received in the shift register
+        * but the previous byte has not been read yet from data register
+        * - in transmission: a new byte should be sent but the data register
+        * has not been written yet
+        */
+       if (event & STM32F4_I2C_SR1_BTF) {
+               if (msg->addr & I2C_M_RD)
+                       stm32f4_i2c_handle_rx_done(i2c_dev);
+               else
+                       stm32f4_i2c_handle_write(i2c_dev);
+       }
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * stm32f4_i2c_isr_error() - Interrupt routine for I2C bus error
+ * @irq: interrupt number
+ * @data: Controller's private data
+ */
+static irqreturn_t stm32f4_i2c_isr_error(int irq, void *data)
+{
+       struct stm32f4_i2c_dev *i2c_dev = data;
+       struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+       void __iomem *reg;
+       u32 status;
+
+       status = readl_relaxed(i2c_dev->base + STM32F4_I2C_SR1);
+
+       /* Arbitration lost */
+       if (status & STM32F4_I2C_SR1_ARLO) {
+               status &= ~STM32F4_I2C_SR1_ARLO;
+               writel_relaxed(status, i2c_dev->base + STM32F4_I2C_SR1);
+               msg->result = -EAGAIN;
+       }
+
+       /*
+        * Acknowledge failure:
+        * In master transmitter mode a Stop must be generated by software
+        */
+       if (status & STM32F4_I2C_SR1_AF) {
+               if (!(msg->addr & I2C_M_RD)) {
+                       reg = i2c_dev->base + STM32F4_I2C_CR1;
+                       stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_STOP);
+               }
+               status &= ~STM32F4_I2C_SR1_AF;
+               writel_relaxed(status, i2c_dev->base + STM32F4_I2C_SR1);
+               msg->result = -EIO;
+       }
+
+       /* Bus error */
+       if (status & STM32F4_I2C_SR1_BERR) {
+               status &= ~STM32F4_I2C_SR1_BERR;
+               writel_relaxed(status, i2c_dev->base + STM32F4_I2C_SR1);
+               msg->result = -EIO;
+       }
+
+       stm32f4_i2c_disable_irq(i2c_dev);
+       complete(&i2c_dev->complete);
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * stm32f4_i2c_xfer_msg() - Transfer a single I2C message
+ * @i2c_dev: Controller's private data
+ * @msg: I2C message to transfer
+ * @is_first: first message of the sequence
+ * @is_last: last message of the sequence
+ */
+static int stm32f4_i2c_xfer_msg(struct stm32f4_i2c_dev *i2c_dev,
+                               struct i2c_msg *msg, bool is_first,
+                               bool is_last)
+{
+       struct stm32f4_i2c_msg *f4_msg = &i2c_dev->msg;
+       void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR1;
+       unsigned long timeout;
+       u32 mask;
+       int ret;
+
+       f4_msg->addr = i2c_8bit_addr_from_msg(msg);
+       f4_msg->buf = msg->buf;
+       f4_msg->count = msg->len;
+       f4_msg->result = 0;
+       f4_msg->stop = is_last;
+
+       reinit_completion(&i2c_dev->complete);
+
+       /* Enable events and errors interrupts */
+       mask = STM32F4_I2C_CR2_ITEVTEN | STM32F4_I2C_CR2_ITERREN;
+       stm32f4_i2c_set_bits(i2c_dev->base + STM32F4_I2C_CR2, mask);
+
+       if (is_first) {
+               ret = stm32f4_i2c_wait_free_bus(i2c_dev);
+               if (ret)
+                       return ret;
+
+               /* START generation */
+               stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_START);
+       }
+
+       timeout = wait_for_completion_timeout(&i2c_dev->complete,
+                                             i2c_dev->adap.timeout);
+       ret = f4_msg->result;
+
+       if (!timeout)
+               ret = -ETIMEDOUT;
+
+       return ret;
+}
+
+/**
+ * stm32f4_i2c_xfer() - Transfer combined I2C message
+ * @i2c_adap: Adapter pointer to the controller
+ * @msgs: Pointer to data to be written.
+ * @num: Number of messages to be executed
+ */
+static int stm32f4_i2c_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg msgs[],
+                           int num)
+{
+       struct stm32f4_i2c_dev *i2c_dev = i2c_get_adapdata(i2c_adap);
+       int ret, i;
+
+       ret = clk_enable(i2c_dev->clk);
+       if (ret) {
+               dev_err(i2c_dev->dev, "Failed to enable clock\n");
+               return ret;
+       }
+
+       for (i = 0; i < num && !ret; i++)
+               ret = stm32f4_i2c_xfer_msg(i2c_dev, &msgs[i], i == 0,
+                                          i == num - 1);
+
+       clk_disable(i2c_dev->clk);
+
+       return (ret < 0) ? ret : num;
+}
+
+static u32 stm32f4_i2c_func(struct i2c_adapter *adap)
+{
+       return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static struct i2c_algorithm stm32f4_i2c_algo = {
+       .master_xfer = stm32f4_i2c_xfer,
+       .functionality = stm32f4_i2c_func,
+};
+
+static int stm32f4_i2c_probe(struct platform_device *pdev)
+{
+       struct device_node *np = pdev->dev.of_node;
+       struct stm32f4_i2c_dev *i2c_dev;
+       struct resource *res;
+       u32 irq_event, irq_error, clk_rate;
+       struct i2c_adapter *adap;
+       struct reset_control *rst;
+       int ret;
+
+       i2c_dev = devm_kzalloc(&pdev->dev, sizeof(*i2c_dev), GFP_KERNEL);
+       if (!i2c_dev)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       i2c_dev->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(i2c_dev->base))
+               return PTR_ERR(i2c_dev->base);
+
+       irq_event = irq_of_parse_and_map(np, 0);
+       if (!irq_event) {
+               dev_err(&pdev->dev, "IRQ event missing or invalid\n");
+               return -EINVAL;
+       }
+
+       irq_error = irq_of_parse_and_map(np, 1);
+       if (!irq_error) {
+               dev_err(&pdev->dev, "IRQ error missing or invalid\n");
+               return -EINVAL;
+       }
+
+       i2c_dev->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(i2c_dev->clk)) {
+               dev_err(&pdev->dev, "Error: Missing controller clock\n");
+               return PTR_ERR(i2c_dev->clk);
+       }
+       ret = clk_prepare_enable(i2c_dev->clk);
+       if (ret) {
+               dev_err(i2c_dev->dev, "Failed to prepare_enable clock\n");
+               return ret;
+       }
+
+       rst = devm_reset_control_get(&pdev->dev, NULL);
+       if (IS_ERR(rst)) {
+               dev_err(&pdev->dev, "Error: Missing controller reset\n");
+               ret = PTR_ERR(rst);
+               goto clk_free;
+       }
+       reset_control_assert(rst);
+       udelay(2);
+       reset_control_deassert(rst);
+
+       i2c_dev->speed = STM32F4_I2C_SPEED_STANDARD;
+       ret = of_property_read_u32(np, "clock-frequency", &clk_rate);
+       if (!ret && clk_rate >= 400000)
+               i2c_dev->speed = STM32F4_I2C_SPEED_FAST;
+
+       i2c_dev->dev = &pdev->dev;
+
+       ret = devm_request_irq(&pdev->dev, irq_event, stm32f4_i2c_isr_event, 0,
+                              pdev->name, i2c_dev);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to request irq event %i\n",
+                       irq_event);
+               goto clk_free;
+       }
+
+       ret = devm_request_irq(&pdev->dev, irq_error, stm32f4_i2c_isr_error, 0,
+                              pdev->name, i2c_dev);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to request irq error %i\n",
+                       irq_error);
+               goto clk_free;
+       }
+
+       ret = stm32f4_i2c_hw_config(i2c_dev);
+       if (ret)
+               goto clk_free;
+
+       adap = &i2c_dev->adap;
+       i2c_set_adapdata(adap, i2c_dev);
+       snprintf(adap->name, sizeof(adap->name), "STM32 I2C(%pa)", &res->start);
+       adap->owner = THIS_MODULE;
+       adap->timeout = 2 * HZ;
+       adap->retries = 0;
+       adap->algo = &stm32f4_i2c_algo;
+       adap->dev.parent = &pdev->dev;
+       adap->dev.of_node = pdev->dev.of_node;
+
+       init_completion(&i2c_dev->complete);
+
+       ret = i2c_add_adapter(adap);
+       if (ret)
+               goto clk_free;
+
+       platform_set_drvdata(pdev, i2c_dev);
+
+       clk_disable(i2c_dev->clk);
+
+       dev_info(i2c_dev->dev, "STM32F4 I2C driver registered\n");
+
+       return 0;
+
+clk_free:
+       clk_disable_unprepare(i2c_dev->clk);
+       return ret;
+}
+
+static int stm32f4_i2c_remove(struct platform_device *pdev)
+{
+       struct stm32f4_i2c_dev *i2c_dev = platform_get_drvdata(pdev);
+
+       i2c_del_adapter(&i2c_dev->adap);
+
+       clk_unprepare(i2c_dev->clk);
+
+       return 0;
+}
+
+static const struct of_device_id stm32f4_i2c_match[] = {
+       { .compatible = "st,stm32f4-i2c", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, stm32f4_i2c_match);
+
+static struct platform_driver stm32f4_i2c_driver = {
+       .driver = {
+               .name = "stm32f4-i2c",
+               .of_match_table = stm32f4_i2c_match,
+       },
+       .probe = stm32f4_i2c_probe,
+       .remove = stm32f4_i2c_remove,
+};
+
+module_platform_driver(stm32f4_i2c_driver);
+
+MODULE_AUTHOR("M'boumba Cedric Madianga <cedric.madianga@gmail.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32F4 I2C driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/i2c/busses/i2c-tegra-bpmp.c b/drivers/i2c/busses/i2c-tegra-bpmp.c
new file mode 100644 (file)
index 0000000..9eed69d
--- /dev/null
@@ -0,0 +1,346 @@
+/*
+ * drivers/i2c/busses/i2c-tegra-bpmp.c
+ *
+ * Copyright (c) 2016 NVIDIA Corporation.  All rights reserved.
+ *
+ * Author: Shardar Shariff Md <smohammed@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <soc/tegra/bpmp-abi.h>
+#include <soc/tegra/bpmp.h>
+
+/*
+ * Serialized I2C message header size is 6 bytes and includes address, flags
+ * and length
+ */
+#define SERIALI2C_HDR_SIZE 6
+
+struct tegra_bpmp_i2c {
+       struct i2c_adapter adapter;
+       struct device *dev;
+
+       struct tegra_bpmp *bpmp;
+       unsigned int bus;
+};
+
+/*
+ * Linux flags are translated to BPMP defined I2C flags that are used in BPMP
+ * firmware I2C driver to avoid any issues in future if Linux I2C flags are
+ * changed.
+ */
+static int tegra_bpmp_xlate_flags(u16 flags, u16 *out)
+{
+       if (flags & I2C_M_TEN) {
+               *out |= SERIALI2C_TEN;
+               flags &= ~I2C_M_TEN;
+       }
+
+       if (flags & I2C_M_RD) {
+               *out |= SERIALI2C_RD;
+               flags &= ~I2C_M_RD;
+       }
+
+       if (flags & I2C_M_STOP) {
+               *out |= SERIALI2C_STOP;
+               flags &= ~I2C_M_STOP;
+       }
+
+       if (flags & I2C_M_NOSTART) {
+               *out |= SERIALI2C_NOSTART;
+               flags &= ~I2C_M_NOSTART;
+       }
+
+       if (flags & I2C_M_REV_DIR_ADDR) {
+               *out |= SERIALI2C_REV_DIR_ADDR;
+               flags &= ~I2C_M_REV_DIR_ADDR;
+       }
+
+       if (flags & I2C_M_IGNORE_NAK) {
+               *out |= SERIALI2C_IGNORE_NAK;
+               flags &= ~I2C_M_IGNORE_NAK;
+       }
+
+       if (flags & I2C_M_NO_RD_ACK) {
+               *out |= SERIALI2C_NO_RD_ACK;
+               flags &= ~I2C_M_NO_RD_ACK;
+       }
+
+       if (flags & I2C_M_RECV_LEN) {
+               *out |= SERIALI2C_RECV_LEN;
+               flags &= ~I2C_M_RECV_LEN;
+       }
+
+       return (flags != 0) ? -EINVAL : 0;
+}
+
+/**
+ * The serialized I2C format is simply the following:
+ * [addr little-endian][flags little-endian][len little-endian][data if write]
+ * [addr little-endian][flags little-endian][len little-endian][data if write]
+ *  ...
+ *
+ * The flags are translated from Linux kernel representation to seriali2c
+ * representation. Any undefined flag being set causes an error.
+ *
+ * The data is there only for writes. Reads have the data transferred in the
+ * other direction, and thus data is not present.
+ *
+ * See deserialize_i2c documentation for the data format in the other direction.
+ */
+static int tegra_bpmp_serialize_i2c_msg(struct tegra_bpmp_i2c *i2c,
+                                       struct mrq_i2c_request *request,
+                                       struct i2c_msg *msgs,
+                                       unsigned int num)
+{
+       char *buf = request->xfer.data_buf;
+       unsigned int i, j, pos = 0;
+       int err;
+
+       for (i = 0; i < num; i++) {
+               struct i2c_msg *msg = &msgs[i];
+               u16 flags = 0;
+
+               err = tegra_bpmp_xlate_flags(msg->flags, &flags);
+               if (err < 0)
+                       return err;
+
+               buf[pos++] = msg->addr & 0xff;
+               buf[pos++] = (msg->addr & 0xff00) >> 8;
+               buf[pos++] = flags & 0xff;
+               buf[pos++] = (flags & 0xff00) >> 8;
+               buf[pos++] = msg->len & 0xff;
+               buf[pos++] = (msg->len & 0xff00) >> 8;
+
+               if ((flags & SERIALI2C_RD) == 0) {
+                       for (j = 0; j < msg->len; j++)
+                               buf[pos++] = msg->buf[j];
+               }
+       }
+
+       request->xfer.data_size = pos;
+
+       return 0;
+}
+
+/**
+ * The data in the BPMP -> CPU direction is composed of sequential blocks for
+ * those messages that have I2C_M_RD. So, for example, if you have:
+ *
+ * - !I2C_M_RD, len == 5, data == a0 01 02 03 04
+ * - !I2C_M_RD, len == 1, data == a0
+ * - I2C_M_RD, len == 2, data == [uninitialized buffer 1]
+ * - !I2C_M_RD, len == 1, data == a2
+ * - I2C_M_RD, len == 2, data == [uninitialized buffer 2]
+ *
+ * ...then the data in the BPMP -> CPU direction would be 4 bytes total, and
+ * would contain 2 bytes that will go to uninitialized buffer 1, and 2 bytes
+ * that will go to uninitialized buffer 2.
+ */
+static int tegra_bpmp_i2c_deserialize(struct tegra_bpmp_i2c *i2c,
+                                     struct mrq_i2c_response *response,
+                                     struct i2c_msg *msgs,
+                                     unsigned int num)
+{
+       size_t size = response->xfer.data_size, len = 0, pos = 0;
+       char *buf = response->xfer.data_buf;
+       unsigned int i;
+
+       for (i = 0; i < num; i++)
+               if (msgs[i].flags & I2C_M_RD)
+                       len += msgs[i].len;
+
+       if (len != size)
+               return -EINVAL;
+
+       for (i = 0; i < num; i++) {
+               if (msgs[i].flags & I2C_M_RD) {
+                       memcpy(msgs[i].buf, buf + pos, msgs[i].len);
+                       pos += msgs[i].len;
+               }
+       }
+
+       return 0;
+}
+
+static int tegra_bpmp_i2c_msg_len_check(struct i2c_msg *msgs, unsigned int num)
+{
+       size_t tx_len = 0, rx_len = 0;
+       unsigned int i;
+
+       for (i = 0; i < num; i++)
+               if (!(msgs[i].flags & I2C_M_RD))
+                       tx_len += SERIALI2C_HDR_SIZE + msgs[i].len;
+
+       if (tx_len > TEGRA_I2C_IPC_MAX_IN_BUF_SIZE)
+               return -EINVAL;
+
+       for (i = 0; i < num; i++)
+               if ((msgs[i].flags & I2C_M_RD))
+                       rx_len += msgs[i].len;
+
+       if (rx_len > TEGRA_I2C_IPC_MAX_OUT_BUF_SIZE)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int tegra_bpmp_i2c_msg_xfer(struct tegra_bpmp_i2c *i2c,
+                                  struct mrq_i2c_request *request,
+                                  struct mrq_i2c_response *response)
+{
+       struct tegra_bpmp_message msg;
+       int err;
+
+       request->cmd = CMD_I2C_XFER;
+       request->xfer.bus_id = i2c->bus;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.mrq = MRQ_I2C;
+       msg.tx.data = request;
+       msg.tx.size = sizeof(*request);
+       msg.rx.data = response;
+       msg.rx.size = sizeof(*response);
+
+       if (irqs_disabled())
+               err = tegra_bpmp_transfer_atomic(i2c->bpmp, &msg);
+       else
+               err = tegra_bpmp_transfer(i2c->bpmp, &msg);
+
+       return err;
+}
+
+static int tegra_bpmp_i2c_xfer(struct i2c_adapter *adapter,
+                              struct i2c_msg *msgs, int num)
+{
+       struct tegra_bpmp_i2c *i2c = i2c_get_adapdata(adapter);
+       struct mrq_i2c_response response;
+       struct mrq_i2c_request request;
+       int err;
+
+       err = tegra_bpmp_i2c_msg_len_check(msgs, num);
+       if (err < 0) {
+               dev_err(i2c->dev, "unsupported message length\n");
+               return err;
+       }
+
+       memset(&request, 0, sizeof(request));
+       memset(&response, 0, sizeof(response));
+
+       err = tegra_bpmp_serialize_i2c_msg(i2c, &request, msgs, num);
+       if (err < 0) {
+               dev_err(i2c->dev, "failed to serialize message: %d\n", err);
+               return err;
+       }
+
+       err = tegra_bpmp_i2c_msg_xfer(i2c, &request, &response);
+       if (err < 0) {
+               dev_err(i2c->dev, "failed to transfer message: %d\n", err);
+               return err;
+       }
+
+       err = tegra_bpmp_i2c_deserialize(i2c, &response, msgs, num);
+       if (err < 0) {
+               dev_err(i2c->dev, "failed to deserialize message: %d\n", err);
+               return err;
+       }
+
+       return num;
+}
+
+static u32 tegra_bpmp_i2c_func(struct i2c_adapter *adapter)
+{
+       return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_10BIT_ADDR |
+              I2C_FUNC_PROTOCOL_MANGLING | I2C_FUNC_NOSTART;
+}
+
+static const struct i2c_algorithm tegra_bpmp_i2c_algo = {
+       .master_xfer = tegra_bpmp_i2c_xfer,
+       .functionality = tegra_bpmp_i2c_func,
+};
+
+static int tegra_bpmp_i2c_probe(struct platform_device *pdev)
+{
+       struct tegra_bpmp_i2c *i2c;
+       u32 value;
+       int err;
+
+       i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
+       if (!i2c)
+               return -ENOMEM;
+
+       i2c->dev = &pdev->dev;
+
+       i2c->bpmp = dev_get_drvdata(pdev->dev.parent);
+       if (!i2c->bpmp)
+               return -ENODEV;
+
+       err = of_property_read_u32(pdev->dev.of_node, "nvidia,bpmp-bus-id",
+                                  &value);
+       if (err < 0)
+               return err;
+
+       i2c->bus = value;
+
+       i2c_set_adapdata(&i2c->adapter, i2c);
+       i2c->adapter.owner = THIS_MODULE;
+       strlcpy(i2c->adapter.name, "Tegra BPMP I2C adapter",
+               sizeof(i2c->adapter.name));
+       i2c->adapter.algo = &tegra_bpmp_i2c_algo;
+       i2c->adapter.dev.parent = &pdev->dev;
+       i2c->adapter.dev.of_node = pdev->dev.of_node;
+
+       platform_set_drvdata(pdev, i2c);
+
+       return i2c_add_adapter(&i2c->adapter);
+}
+
+static int tegra_bpmp_i2c_remove(struct platform_device *pdev)
+{
+       struct tegra_bpmp_i2c *i2c = platform_get_drvdata(pdev);
+
+       i2c_del_adapter(&i2c->adapter);
+
+       return 0;
+}
+
+static const struct of_device_id tegra_bpmp_i2c_of_match[] = {
+       { .compatible = "nvidia,tegra186-bpmp-i2c", },
+       { }
+};
+MODULE_DEVICE_TABLE(of, tegra_bpmp_i2c_of_match);
+
+static struct platform_driver tegra_bpmp_i2c_driver = {
+       .driver = {
+               .name = "tegra-bpmp-i2c",
+               .of_match_table = tegra_bpmp_i2c_of_match,
+       },
+       .probe = tegra_bpmp_i2c_probe,
+       .remove = tegra_bpmp_i2c_remove,
+};
+module_platform_driver(tegra_bpmp_i2c_driver);
+
+MODULE_DESCRIPTION("NVIDIA Tegra BPMP I2C bus contoller driver");
+MODULE_AUTHOR("Shardar Shariff Md <smohammed@nvidia.com>");
+MODULE_AUTHOR("Juha-Matti Tilli");
+MODULE_LICENSE("GPL v2");
index bba5b429f69c92e21a1b4934319ad78fc388ef0a..1d4c2beacf2e7044011110593fbeafa2f839bb37 100644 (file)
@@ -188,11 +188,11 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev,
        i2c->hlc_int_enable = thunder_i2c_hlc_int_enable;
        i2c->hlc_int_disable = thunder_i2c_hlc_int_disable;
 
-       ret = pci_enable_msix(pdev, &i2c->i2c_msix, 1);
-       if (ret)
+       ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
+       if (ret < 0)
                goto error;
 
-       ret = devm_request_irq(dev, i2c->i2c_msix.vector, octeon_i2c_isr, 0,
+       ret = devm_request_irq(dev, pci_irq_vector(pdev, 0), octeon_i2c_isr, 0,
                               DRV_NAME, i2c);
        if (ret)
                goto error;
index 0ab1e55558bcd7a520afd8dcf07ae3d8cb77e1c0..dbe7e44c9321279bbcd7b3f8d2006dbe1365e0b7 100644 (file)
@@ -372,7 +372,7 @@ static u32 xgene_slimpro_i2c_func(struct i2c_adapter *adapter)
                I2C_FUNC_SMBUS_I2C_BLOCK;
 }
 
-static struct i2c_algorithm xgene_slimpro_i2c_algorithm = {
+static const struct i2c_algorithm xgene_slimpro_i2c_algorithm = {
        .smbus_xfer = xgene_slimpro_i2c_xfer,
        .functionality = xgene_slimpro_i2c_func,
 };
index 84a8b2eccffb2d0d35991ff278409c09ca424e72..66b464d52c9c12623d61786041f75b8ab6b22d52 100644 (file)
@@ -334,7 +334,7 @@ static u32 xlp9xx_i2c_functionality(struct i2c_adapter *adapter)
                I2C_FUNC_10BIT_ADDR;
 }
 
-static struct i2c_algorithm xlp9xx_i2c_algo = {
+static const struct i2c_algorithm xlp9xx_i2c_algo = {
        .master_xfer = xlp9xx_i2c_xfer,
        .functionality = xlp9xx_i2c_functionality,
 };
index ad17d88d857361663ae98d8faff52a0d9cedbeab..484bfa15d58ee445aee64097c36f7f271463f0c8 100644 (file)
@@ -335,7 +335,7 @@ static u32 xlr_func(struct i2c_adapter *adap)
        return (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK) | I2C_FUNC_I2C;
 }
 
-static struct i2c_algorithm xlr_i2c_algo = {
+static const struct i2c_algorithm xlr_i2c_algo = {
        .master_xfer    = xlr_i2c_xfer,
        .functionality  = xlr_func,
 };
index bfb6ba7cac00f4fc22587b9241775197920e4cb7..d2402bbf6729e55143a9d097aaf455b65c27b1ba 100644 (file)
@@ -3705,6 +3705,39 @@ int i2c_slave_unregister(struct i2c_client *client)
        return ret;
 }
 EXPORT_SYMBOL_GPL(i2c_slave_unregister);
+
+/**
+ * i2c_detect_slave_mode - detect operation mode
+ * @dev: The device owning the bus
+ *
+ * This checks the device nodes for an I2C slave by checking the address
+ * used in the reg property. If the address match the I2C_OWN_SLAVE_ADDRESS
+ * flag this means the device is configured to act as a I2C slave and it will
+ * be listening at that address.
+ *
+ * Returns true if an I2C own slave address is detected, otherwise returns
+ * false.
+ */
+bool i2c_detect_slave_mode(struct device *dev)
+{
+       if (IS_BUILTIN(CONFIG_OF) && dev->of_node) {
+               struct device_node *child;
+               u32 reg;
+
+               for_each_child_of_node(dev->of_node, child) {
+                       of_property_read_u32(child, "reg", &reg);
+                       if (reg & I2C_OWN_SLAVE_ADDRESS) {
+                               of_node_put(child);
+                               return true;
+                       }
+               }
+       } else if (IS_BUILTIN(CONFIG_ACPI) && ACPI_HANDLE(dev)) {
+               dev_dbg(dev, "ACPI slave is not supported yet\n");
+       }
+       return false;
+}
+EXPORT_SYMBOL_GPL(i2c_detect_slave_mode);
+
 #endif
 
 MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
index b7ca249ec9c38b884ed48fbb8387dcc6729f1cc6..e53f2abd135070e78aa166738df08013ef784604 100644 (file)
@@ -40,7 +40,6 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
-#include <linux/version.h>
 #include <linux/i2c/mlxcpld.h>
 
 #define CPLD_MUX_MAX_NCHANS    8
index 4ea7e691afc75bb9d6aa7166cd02c9cde7cad9a7..77840f7845a1ba3117f66d815696b4fdce68f2b3 100644 (file)
@@ -90,6 +90,7 @@ static const struct of_device_id pca9541_of_match[] = {
        { .compatible = "nxp,pca9541" },
        {}
 };
+MODULE_DEVICE_TABLE(of, pca9541_of_match);
 #endif
 
 /*
index dd18b9ccb1f40b4f6ddf5904e1f6d529931801df..dfc1c0e37c4022b66da0facee93af030c70c4e73 100644 (file)
 #include <linux/i2c.h>
 #include <linux/i2c-mux.h>
 #include <linux/i2c/pca954x.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/of_irq.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 
 #define PCA954X_MAX_NCHANS 8
 
+#define PCA954X_IRQ_OFFSET 4
+
 enum pca_type {
        pca_9540,
        pca_9542,
@@ -63,6 +69,7 @@ enum pca_type {
 struct chip_desc {
        u8 nchans;
        u8 enable;      /* used for muxes only */
+       u8 has_irq;
        enum muxtype {
                pca954x_ismux = 0,
                pca954x_isswi
@@ -75,6 +82,10 @@ struct pca954x {
        u8 last_chan;           /* last register value */
        u8 deselect;
        struct i2c_client *client;
+
+       struct irq_domain *irq;
+       unsigned int irq_mask;
+       spinlock_t lock;
 };
 
 /* Provide specs for the PCA954x types we know about */
@@ -84,17 +95,26 @@ static const struct chip_desc chips[] = {
                .enable = 0x4,
                .muxtype = pca954x_ismux,
        },
+       [pca_9542] = {
+               .nchans = 2,
+               .enable = 0x4,
+               .has_irq = 1,
+               .muxtype = pca954x_ismux,
+       },
        [pca_9543] = {
                .nchans = 2,
+               .has_irq = 1,
                .muxtype = pca954x_isswi,
        },
        [pca_9544] = {
                .nchans = 4,
                .enable = 0x4,
+               .has_irq = 1,
                .muxtype = pca954x_ismux,
        },
        [pca_9545] = {
                .nchans = 4,
+               .has_irq = 1,
                .muxtype = pca954x_isswi,
        },
        [pca_9547] = {
@@ -110,7 +130,7 @@ static const struct chip_desc chips[] = {
 
 static const struct i2c_device_id pca954x_id[] = {
        { "pca9540", pca_9540 },
-       { "pca9542", pca_9540 },
+       { "pca9542", pca_9542 },
        { "pca9543", pca_9543 },
        { "pca9544", pca_9544 },
        { "pca9545", pca_9545 },
@@ -124,7 +144,7 @@ MODULE_DEVICE_TABLE(i2c, pca954x_id);
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id pca954x_acpi_ids[] = {
        { .id = "PCA9540", .driver_data = pca_9540 },
-       { .id = "PCA9542", .driver_data = pca_9540 },
+       { .id = "PCA9542", .driver_data = pca_9542 },
        { .id = "PCA9543", .driver_data = pca_9543 },
        { .id = "PCA9544", .driver_data = pca_9544 },
        { .id = "PCA9545", .driver_data = pca_9545 },
@@ -148,6 +168,7 @@ static const struct of_device_id pca954x_of_match[] = {
        { .compatible = "nxp,pca9548", .data = &chips[pca_9548] },
        {}
 };
+MODULE_DEVICE_TABLE(of, pca954x_of_match);
 #endif
 
 /* Write to mux register. Don't use i2c_transfer()/i2c_smbus_xfer()
@@ -217,6 +238,114 @@ static int pca954x_deselect_mux(struct i2c_mux_core *muxc, u32 chan)
        return pca954x_reg_write(muxc->parent, client, data->last_chan);
 }
 
+static irqreturn_t pca954x_irq_handler(int irq, void *dev_id)
+{
+       struct pca954x *data = dev_id;
+       unsigned int child_irq;
+       int ret, i, handled = 0;
+
+       ret = i2c_smbus_read_byte(data->client);
+       if (ret < 0)
+               return IRQ_NONE;
+
+       for (i = 0; i < data->chip->nchans; i++) {
+               if (ret & BIT(PCA954X_IRQ_OFFSET + i)) {
+                       child_irq = irq_linear_revmap(data->irq, i);
+                       handle_nested_irq(child_irq);
+                       handled++;
+               }
+       }
+       return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static void pca954x_irq_mask(struct irq_data *idata)
+{
+       struct pca954x *data = irq_data_get_irq_chip_data(idata);
+       unsigned int pos = idata->hwirq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&data->lock, flags);
+
+       data->irq_mask &= ~BIT(pos);
+       if (!data->irq_mask)
+               disable_irq(data->client->irq);
+
+       spin_unlock_irqrestore(&data->lock, flags);
+}
+
+static void pca954x_irq_unmask(struct irq_data *idata)
+{
+       struct pca954x *data = irq_data_get_irq_chip_data(idata);
+       unsigned int pos = idata->hwirq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&data->lock, flags);
+
+       if (!data->irq_mask)
+               enable_irq(data->client->irq);
+       data->irq_mask |= BIT(pos);
+
+       spin_unlock_irqrestore(&data->lock, flags);
+}
+
+static int pca954x_irq_set_type(struct irq_data *idata, unsigned int type)
+{
+       if ((type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_LEVEL_LOW)
+               return -EINVAL;
+       return 0;
+}
+
+static struct irq_chip pca954x_irq_chip = {
+       .name = "i2c-mux-pca954x",
+       .irq_mask = pca954x_irq_mask,
+       .irq_unmask = pca954x_irq_unmask,
+       .irq_set_type = pca954x_irq_set_type,
+};
+
+static int pca954x_irq_setup(struct i2c_mux_core *muxc)
+{
+       struct pca954x *data = i2c_mux_priv(muxc);
+       struct i2c_client *client = data->client;
+       int c, err, irq;
+
+       if (!data->chip->has_irq || client->irq <= 0)
+               return 0;
+
+       spin_lock_init(&data->lock);
+
+       data->irq = irq_domain_add_linear(client->dev.of_node,
+                                         data->chip->nchans,
+                                         &irq_domain_simple_ops, data);
+       if (!data->irq)
+               return -ENODEV;
+
+       for (c = 0; c < data->chip->nchans; c++) {
+               irq = irq_create_mapping(data->irq, c);
+               irq_set_chip_data(irq, data);
+               irq_set_chip_and_handler(irq, &pca954x_irq_chip,
+                       handle_simple_irq);
+       }
+
+       err = devm_request_threaded_irq(&client->dev, data->client->irq, NULL,
+                                       pca954x_irq_handler,
+                                       IRQF_ONESHOT | IRQF_SHARED,
+                                       "pca954x", data);
+       if (err)
+               goto err_req_irq;
+
+       disable_irq(data->client->irq);
+
+       return 0;
+err_req_irq:
+       for (c = 0; c < data->chip->nchans; c++) {
+               irq = irq_find_mapping(data->irq, c);
+               irq_dispose_mapping(irq);
+       }
+       irq_domain_remove(data->irq);
+
+       return err;
+}
+
 /*
  * I2C init/probing/exit functions
  */
@@ -281,6 +410,10 @@ static int pca954x_probe(struct i2c_client *client,
        idle_disconnect_dt = of_node &&
                of_property_read_bool(of_node, "i2c-mux-idle-disconnect");
 
+       ret = pca954x_irq_setup(muxc);
+       if (ret)
+               goto fail_del_adapters;
+
        /* Now create an adapter for each channel */
        for (num = 0; num < data->chip->nchans; num++) {
                bool idle_disconnect_pd = false;
@@ -306,7 +439,7 @@ static int pca954x_probe(struct i2c_client *client,
                        dev_err(&client->dev,
                                "failed to register multiplexed adapter"
                                " %d as bus %d\n", num, force);
-                       goto virt_reg_failed;
+                       goto fail_del_adapters;
                }
        }
 
@@ -317,7 +450,7 @@ static int pca954x_probe(struct i2c_client *client,
 
        return 0;
 
-virt_reg_failed:
+fail_del_adapters:
        i2c_mux_del_adapters(muxc);
        return ret;
 }
@@ -325,6 +458,16 @@ virt_reg_failed:
 static int pca954x_remove(struct i2c_client *client)
 {
        struct i2c_mux_core *muxc = i2c_get_clientdata(client);
+       struct pca954x *data = i2c_mux_priv(muxc);
+       int c, irq;
+
+       if (data->irq) {
+               for (c = 0; c < data->chip->nchans; c++) {
+                       irq = irq_find_mapping(data->irq, c);
+                       irq_dispose_mapping(irq);
+               }
+               irq_domain_remove(data->irq);
+       }
 
        i2c_mux_del_adapters(muxc);
        return 0;
index b6940992a6ff95c699eddb8c7e2fb484a2f16ed3..968038482d2f1ab502252118a2ff61d780d1a0a0 100644 (file)
@@ -447,7 +447,7 @@ void ide_acpi_get_timing(ide_hwif_t *hwif)
        memcpy(&hwif->acpidata->gtm, out_obj->buffer.pointer,
               sizeof(struct GTM_buffer));
 
-       DEBPRINT("_GTM info: ptr: 0x%p, len: 0x%x, exp.len: 0x%Zx\n",
+       DEBPRINT("_GTM info: ptr: 0x%p, len: 0x%x, exp.len: 0x%zx\n",
                 out_obj->buffer.pointer, out_obj->buffer.length,
                 sizeof(struct GTM_buffer));
 
index 3c1b7974d66de7657d357943646293de7c10efff..d8a552b47718ef6a3c37b1da65819d9c0e67b261 100644 (file)
@@ -1136,7 +1136,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
        ssize_t ret = 0;
        int rc;
 
-       ide_debug_log(IDE_DBG_FUNC, "count %Zd", count);
+       ide_debug_log(IDE_DBG_FUNC, "count %zd", count);
 
        if (tape->chrdev_dir != IDETAPE_DIR_READ) {
                if (test_bit(ilog2(IDE_AFLAG_DETECT_BS), &drive->atapi_flags))
@@ -1195,7 +1195,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
        if (tape->write_prot)
                return -EACCES;
 
-       ide_debug_log(IDE_DBG_FUNC, "count %Zd", count);
+       ide_debug_log(IDE_DBG_FUNC, "count %zd", count);
 
        /* Initialize write operation */
        rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
index 46427ea01753b4c84f9670939347f06cc65bbfbe..157f2d1fb7e1908f527c2127e707199f9364593d 100644 (file)
@@ -300,7 +300,7 @@ static const struct ide_port_ops palm_bk3710_ports_ops = {
        .cable_detect           = palm_bk3710_cable_detect,
 };
 
-static struct ide_port_info palm_bk3710_port_info = {
+static struct ide_port_info palm_bk3710_port_info __initdata = {
        .init_dma               = palm_bk3710_init_dma,
        .port_ops               = &palm_bk3710_ports_ops,
        .dma_ops                = &sff_dma_ops,
index edaae9f9853c73b2f990ccbd82ebcee3868010fd..e426ac877d19fb65ab9fb3edd5b106d1c6d02c9a 100644 (file)
@@ -13,6 +13,7 @@ ib_core-y :=                  packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
                                multicast.o mad.o smi.o agent.o mad_rmpp.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
 ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
+ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
 
 ib_cm-y :=                     cm.o
 
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
new file mode 100644 (file)
index 0000000..126ac5f
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "core_priv.h"
+
+/**
+ * ib_device_register_rdmacg - register with rdma cgroup.
+ * @device: device to register to participate in resource
+ *          accounting by rdma cgroup.
+ *
+ * Register with the rdma cgroup. Should be called before
+ * exposing rdma device to user space applications to avoid
+ * resource accounting leak.
+ * Returns 0 on success or otherwise failure code.
+ */
+int ib_device_register_rdmacg(struct ib_device *device)
+{
+       device->cg_device.name = device->name;
+       return rdmacg_register_device(&device->cg_device);
+}
+
+/**
+ * ib_device_unregister_rdmacg - unregister with rdma cgroup.
+ * @device: device to unregister.
+ *
+ * Unregister with the rdma cgroup. Should be called after
+ * all the resources are deallocated, and after a stage when any
+ * other resource allocation by user application cannot be done
+ * for this device to avoid any leak in accounting.
+ */
+void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+       rdmacg_unregister_device(&device->cg_device);
+}
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+                        struct ib_device *device,
+                        enum rdmacg_resource_type resource_index)
+{
+       return rdmacg_try_charge(&cg_obj->cg, &device->cg_device,
+                                resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_try_charge);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+                       struct ib_device *device,
+                       enum rdmacg_resource_type resource_index)
+{
+       rdmacg_uncharge(cg_obj->cg, &device->cg_device,
+                       resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_uncharge);
index 912ab4cd6eae3ff881374375ad8ac267ddcd0bc4..cb7d372e4bdf877206d8da9141d46a590395bcd9 100644 (file)
@@ -35,6 +35,7 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/cgroup_rdma.h>
 
 #include <rdma/ib_verbs.h>
 
@@ -124,6 +125,35 @@ int ib_cache_setup_one(struct ib_device *device);
 void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);
 
+#ifdef CONFIG_CGROUP_RDMA
+int ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_unregister_rdmacg(struct ib_device *device);
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+                        struct ib_device *device,
+                        enum rdmacg_resource_type resource_index);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+                       struct ib_device *device,
+                       enum rdmacg_resource_type resource_index);
+#else
+static inline int ib_device_register_rdmacg(struct ib_device *device)
+{ return 0; }
+
+static inline void ib_device_unregister_rdmacg(struct ib_device *device)
+{ }
+
+static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+                                      struct ib_device *device,
+                                      enum rdmacg_resource_type resource_index)
+{ return 0; }
+
+static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+                                     struct ib_device *device,
+                                     enum rdmacg_resource_type resource_index)
+{ }
+#endif
+
 static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
                                         struct net_device *upper)
 {
index f2e48655a906bbe16d0923905288e8fbf6af6e35..593d2ce6ec7cec115b58006a5cb13c49bec72d83 100644 (file)
@@ -333,6 +333,15 @@ int ib_register_device(struct ib_device *device,
        int ret;
        struct ib_client *client;
        struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+       struct device *parent = device->dev.parent;
+
+       WARN_ON_ONCE(!parent);
+       if (!device->dev.dma_ops)
+               device->dev.dma_ops = parent->dma_ops;
+       if (!device->dev.dma_mask)
+               device->dev.dma_mask = parent->dma_mask;
+       if (!device->dev.coherent_dma_mask)
+               device->dev.coherent_dma_mask = parent->coherent_dma_mask;
 
        mutex_lock(&device_mutex);
 
@@ -360,10 +369,18 @@ int ib_register_device(struct ib_device *device,
                goto out;
        }
 
+       ret = ib_device_register_rdmacg(device);
+       if (ret) {
+               pr_warn("Couldn't register device with rdma cgroup\n");
+               ib_cache_cleanup_one(device);
+               goto out;
+       }
+
        memset(&device->attrs, 0, sizeof(device->attrs));
        ret = device->query_device(device, &device->attrs, &uhw);
        if (ret) {
                pr_warn("Couldn't query the device attributes\n");
+               ib_device_unregister_rdmacg(device);
                ib_cache_cleanup_one(device);
                goto out;
        }
@@ -372,6 +389,7 @@ int ib_register_device(struct ib_device *device,
        if (ret) {
                pr_warn("Couldn't register device %s with driver model\n",
                        device->name);
+               ib_device_unregister_rdmacg(device);
                ib_cache_cleanup_one(device);
                goto out;
        }
@@ -421,6 +439,7 @@ void ib_unregister_device(struct ib_device *device)
 
        mutex_unlock(&device_mutex);
 
+       ib_device_unregister_rdmacg(device);
        ib_device_unregister_sysfs(device);
        ib_cache_cleanup_one(device);
 
index c1fb545e8d7848218a2a584647e4643eef0fc551..daadf3130c9f5adb985d9b4816675ba298904e03 100644 (file)
@@ -1258,7 +1258,7 @@ int ib_device_register_sysfs(struct ib_device *device,
        int ret;
        int i;
 
-       device->dev.parent = device->dma_device;
+       WARN_ON_ONCE(!device->dev.parent);
        ret = dev_set_name(class_dev, "%s", device->name);
        if (ret)
                return ret;
index e0a995b85a2d982c1e2c42deb1fd481563b3ce7e..cc0d51fb06e39e9d14a12e064b34a2ebf7f13b5d 100644 (file)
@@ -1290,7 +1290,7 @@ static void ib_ucm_add_one(struct ib_device *device)
                goto err;
 
        ucm_dev->dev.class = &cm_class;
-       ucm_dev->dev.parent = device->dma_device;
+       ucm_dev->dev.parent = device->dev.parent;
        ucm_dev->dev.devt = ucm_dev->cdev.dev;
        ucm_dev->dev.release = ib_ucm_release_dev;
        dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
index 249b403b43a440ba885b374988094e3fbe9e5498..aca7ff7abedc33a4e0928b6c2bb2c0904e71bff5 100644 (file)
@@ -1188,7 +1188,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
        if (cdev_add(&port->cdev, base, 1))
                goto err_cdev;
 
-       port->dev = device_create(umad_class, device->dma_device,
+       port->dev = device_create(umad_class, device->dev.parent,
                                  port->cdev.dev, port,
                                  "umad%d", port->dev_num);
        if (IS_ERR(port->dev))
@@ -1207,7 +1207,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
        if (cdev_add(&port->sm_cdev, base, 1))
                goto err_sm_cdev;
 
-       port->sm_dev = device_create(umad_class, device->dma_device,
+       port->sm_dev = device_create(umad_class, device->dev.parent,
                                     port->sm_cdev.dev, port,
                                     "issm%d", port->dev_num);
        if (IS_ERR(port->sm_dev))
index b4b395a054acd9b3e668a147df26b1d699c521d5..7b7a76e1279adffd20325fb3f7bc5e2a19946b03 100644 (file)
@@ -316,6 +316,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        struct ib_udata                   udata;
        struct ib_ucontext               *ucontext;
        struct file                      *filp;
+       struct ib_rdmacg_object          cg_obj;
        int ret;
 
        if (out_len < sizeof resp)
@@ -335,13 +336,18 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
                   (unsigned long) cmd.response + sizeof resp,
                   in_len - sizeof cmd, out_len - sizeof resp);
 
+       ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+       if (ret)
+               goto err;
+
        ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
        if (IS_ERR(ucontext)) {
                ret = PTR_ERR(ucontext);
-               goto err;
+               goto err_alloc;
        }
 
        ucontext->device = ib_dev;
+       ucontext->cg_obj = cg_obj;
        INIT_LIST_HEAD(&ucontext->pd_list);
        INIT_LIST_HEAD(&ucontext->mr_list);
        INIT_LIST_HEAD(&ucontext->mw_list);
@@ -407,6 +413,9 @@ err_free:
        put_pid(ucontext->tgid);
        ib_dev->dealloc_ucontext(ucontext);
 
+err_alloc:
+       ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+
 err:
        mutex_unlock(&file->mutex);
        return ret;
@@ -561,6 +570,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
                return -ENOMEM;
 
        init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
+       ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
+       if (ret) {
+               kfree(uobj);
+               return ret;
+       }
+
        down_write(&uobj->mutex);
 
        pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
@@ -605,6 +621,7 @@ err_idr:
        ib_dealloc_pd(pd);
 
 err:
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
        put_uobj_write(uobj);
        return ret;
 }
@@ -637,6 +654,8 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
        if (ret)
                goto err_put;
 
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
        uobj->live = 0;
        put_uobj_write(uobj);
 
@@ -1006,6 +1025,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
                        goto err_put;
                }
        }
+       ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
+       if (ret)
+               goto err_charge;
 
        mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
                                     cmd.access_flags, &udata);
@@ -1054,6 +1077,9 @@ err_unreg:
        ib_dereg_mr(mr);
 
 err_put:
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
        put_pd_read(pd);
 
 err_free:
@@ -1178,6 +1204,8 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
        idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
 
        mutex_lock(&file->mutex);
@@ -1226,6 +1254,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
 
+       ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
+       if (ret)
+               goto err_charge;
+
        mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
        if (IS_ERR(mw)) {
                ret = PTR_ERR(mw);
@@ -1271,6 +1304,9 @@ err_unalloc:
        uverbs_dealloc_mw(mw);
 
 err_put:
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
        put_pd_read(pd);
 
 err_free:
@@ -1306,6 +1342,8 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
        idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
 
        mutex_lock(&file->mutex);
@@ -1405,6 +1443,11 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
        if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
                attr.flags = cmd->flags;
 
+       ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
+       if (ret)
+               goto err_charge;
+
        cq = ib_dev->create_cq(ib_dev, &attr,
                                             file->ucontext, uhw);
        if (IS_ERR(cq)) {
@@ -1452,6 +1495,10 @@ err_free:
        ib_destroy_cq(cq);
 
 err_file:
+       ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev,
+                          RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
        if (ev_file)
                ib_uverbs_release_ucq(file, ev_file, obj);
 
@@ -1732,6 +1779,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
        idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
 
        mutex_lock(&file->mutex);
@@ -1905,6 +1954,11 @@ static int create_qp(struct ib_uverbs_file *file,
                        goto err_put;
                }
 
+       ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
+       if (ret)
+               goto err_put;
+
        if (cmd->qp_type == IB_QPT_XRC_TGT)
                qp = ib_create_qp(pd, &attr);
        else
@@ -1912,7 +1966,7 @@ static int create_qp(struct ib_uverbs_file *file,
 
        if (IS_ERR(qp)) {
                ret = PTR_ERR(qp);
-               goto err_put;
+               goto err_create;
        }
 
        if (cmd->qp_type != IB_QPT_XRC_TGT) {
@@ -1993,6 +2047,10 @@ err_cb:
 err_destroy:
        ib_destroy_qp(qp);
 
+err_create:
+       ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device,
+                          RDMACG_RESOURCE_HCA_OBJECT);
+
 err_put:
        if (xrcd)
                put_xrcd_read(xrcd_uobj);
@@ -2519,6 +2577,8 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
        if (obj->uxrcd)
                atomic_dec(&obj->uxrcd->refcnt);
 
@@ -2970,11 +3030,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
        memset(&attr.dmac, 0, sizeof(attr.dmac));
        memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
 
+       ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
+       if (ret)
+               goto err_charge;
+
        ah = pd->device->create_ah(pd, &attr, &udata);
 
        if (IS_ERR(ah)) {
                ret = PTR_ERR(ah);
-               goto err_put;
+               goto err_create;
        }
 
        ah->device  = pd->device;
@@ -3013,7 +3078,10 @@ err_copy:
 err_destroy:
        ib_destroy_ah(ah);
 
-err_put:
+err_create:
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
        put_pd_read(pd);
 
 err:
@@ -3047,6 +3115,8 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
        idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
 
        mutex_lock(&file->mutex);
@@ -3861,10 +3931,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
                err = -EINVAL;
                goto err_free;
        }
+
+       err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
+       if (err)
+               goto err_free;
+
        flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
        if (IS_ERR(flow_id)) {
                err = PTR_ERR(flow_id);
-               goto err_free;
+               goto err_create;
        }
        flow_id->uobject = uobj;
        uobj->object = flow_id;
@@ -3897,6 +3973,8 @@ err_copy:
        idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
 destroy_flow:
        ib_destroy_flow(flow_id);
+err_create:
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
 err_free:
        kfree(flow_attr);
 err_put:
@@ -3936,8 +4014,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
        flow_id = uobj->object;
 
        ret = ib_destroy_flow(flow_id);
-       if (!ret)
+       if (!ret) {
+               ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                uobj->live = 0;
+       }
 
        put_uobj_write(uobj);
 
@@ -4005,6 +4086,11 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
        obj->uevent.events_reported = 0;
        INIT_LIST_HEAD(&obj->uevent.event_list);
 
+       ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
+       if (ret)
+               goto err_put_cq;
+
        srq = pd->device->create_srq(pd, &attr, udata);
        if (IS_ERR(srq)) {
                ret = PTR_ERR(srq);
@@ -4069,6 +4155,8 @@ err_destroy:
        ib_destroy_srq(srq);
 
 err_put:
+       ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev,
+                          RDMACG_RESOURCE_HCA_OBJECT);
        put_pd_read(pd);
 
 err_put_cq:
@@ -4255,6 +4343,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
+       ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
        if (srq_type == IB_SRQT_XRC) {
                us = container_of(obj, struct ib_usrq_object, uevent);
                atomic_dec(&us->uxrcd->refcnt);
index b3f95d453fba73073c42bcbdebe96cce98eabd72..35c788a32e26d4ea6c88fc5bea4a21217f78f103 100644 (file)
@@ -51,6 +51,7 @@
 #include <rdma/ib.h>
 
 #include "uverbs.h"
+#include "core_priv.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -237,6 +238,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
                idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
                ib_destroy_ah(ah);
+               ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                kfree(uobj);
        }
 
@@ -246,6 +249,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
                idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
                uverbs_dealloc_mw(mw);
+               ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                kfree(uobj);
        }
 
@@ -254,6 +259,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
                idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
                ib_destroy_flow(flow_id);
+               ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                kfree(uobj);
        }
 
@@ -266,6 +273,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                if (qp == qp->real_qp)
                        ib_uverbs_detach_umcast(qp, uqp);
                ib_destroy_qp(qp);
+               ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                ib_uverbs_release_uevent(file, &uqp->uevent);
                kfree(uqp);
        }
@@ -298,6 +307,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
                idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
                ib_destroy_srq(srq);
+               ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                ib_uverbs_release_uevent(file, uevent);
                kfree(uevent);
        }
@@ -310,6 +321,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
                idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
                ib_destroy_cq(cq);
+               ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                ib_uverbs_release_ucq(file, ev_file, ucq);
                kfree(ucq);
        }
@@ -319,6 +332,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
                idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
                ib_dereg_mr(mr);
+               ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                kfree(uobj);
        }
 
@@ -339,11 +354,16 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
                idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
                ib_dealloc_pd(pd);
+               ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+                                  RDMACG_RESOURCE_HCA_OBJECT);
                kfree(uobj);
        }
 
        put_pid(context->tgid);
 
+       ib_rdmacg_uncharge(&context->cg_obj, context->device,
+                          RDMACG_RESOURCE_HCA_HANDLE);
+
        return context->device->dealloc_ucontext(context);
 }
 
@@ -1174,7 +1194,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
        if (cdev_add(&uverbs_dev->cdev, base, 1))
                goto err_cdev;
 
-       uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
+       uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
                                        uverbs_dev->cdev.dev, uverbs_dev,
                                        "uverbs%d", uverbs_dev->devnum);
        if (IS_ERR(uverbs_dev->dev))
index bd452a92b386dfe4bcde2dd64c3fd97b0a0f3081..5d355401179b8ae5107e411421c673db86164696 100644 (file)
@@ -436,7 +436,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
        bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
 
        ibdev->num_comp_vectors = 1;
-       ibdev->dma_device = &rdev->en_dev->pdev->dev;
+       ibdev->dev.parent = &rdev->en_dev->pdev->dev;
        ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
 
        /* User space */
index 48649f93258a41e8ecf8b4214d4d518a28a07318..318ec5267bdfe1277181ae69980f5da19b311fce 100644 (file)
@@ -1393,7 +1393,7 @@ int iwch_register_device(struct iwch_dev *dev)
        memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
        dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
        dev->ibdev.num_comp_vectors = 1;
-       dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
+       dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
        dev->ibdev.query_device = iwch_query_device;
        dev->ibdev.query_port = iwch_query_port;
        dev->ibdev.query_pkey = iwch_query_pkey;
index bdf7de571d838d824dd5fc57d8e357a35ddfe84a..df64417ab6f24a874f049f779c1cd2a917e92c77 100644 (file)
@@ -572,7 +572,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
        memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
        dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
        dev->ibdev.num_comp_vectors =  dev->rdev.lldi.nciq;
-       dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
+       dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
        dev->ibdev.query_device = c4iw_query_device;
        dev->ibdev.query_port = c4iw_query_port;
        dev->ibdev.query_pkey = c4iw_query_pkey;
diff --git a/drivers/infiniband/hw/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c
deleted file mode 100644 (file)
index 7e8dab8..0000000
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-
-#include "verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64)0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int hfi1_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr,
-                              size_t size, enum dma_data_direction direction)
-{
-       if (WARN_ON(!valid_dma_direction(direction)))
-               return BAD_DMA_ADDRESS;
-
-       return (u64)cpu_addr;
-}
-
-static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
-                                 enum dma_data_direction direction)
-{
-       /* This is a stub, nothing to be done here */
-}
-
-static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page,
-                            unsigned long offset, size_t size,
-                           enum dma_data_direction direction)
-{
-       u64 addr;
-
-       if (WARN_ON(!valid_dma_direction(direction)))
-               return BAD_DMA_ADDRESS;
-
-       if (offset + size > PAGE_SIZE)
-               return BAD_DMA_ADDRESS;
-
-       addr = (u64)page_address(page);
-       if (addr)
-               addr += offset;
-
-       return addr;
-}
-
-static void hfi1_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
-                               enum dma_data_direction direction)
-{
-       /* This is a stub, nothing to be done here */
-}
-
-static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                      int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       u64 addr;
-       int i;
-       int ret = nents;
-
-       if (WARN_ON(!valid_dma_direction(direction)))
-               return BAD_DMA_ADDRESS;
-
-       for_each_sg(sgl, sg, nents, i) {
-               addr = (u64)page_address(sg_page(sg));
-               if (!addr) {
-                       ret = 0;
-                       break;
-               }
-               sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-               sg->dma_length = sg->length;
-#endif
-       }
-       return ret;
-}
-
-static void hfi1_unmap_sg(struct ib_device *dev,
-                         struct scatterlist *sg, int nents,
-                        enum dma_data_direction direction)
-{
-       /* This is a stub, nothing to be done here */
-}
-
-static void hfi1_sync_single_for_cpu(struct ib_device *dev, u64 addr,
-                                    size_t size, enum dma_data_direction dir)
-{
-}
-
-static void hfi1_sync_single_for_device(struct ib_device *dev, u64 addr,
-                                       size_t size,
-                                       enum dma_data_direction dir)
-{
-}
-
-static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                    u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p)
-               addr = page_address(p);
-       if (dma_handle)
-               *dma_handle = (u64)addr;
-       return addr;
-}
-
-static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
-                                  void *cpu_addr, u64 dma_handle)
-{
-       free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {
-       .mapping_error = hfi1_mapping_error,
-       .map_single = hfi1_dma_map_single,
-       .unmap_single = hfi1_dma_unmap_single,
-       .map_page = hfi1_dma_map_page,
-       .unmap_page = hfi1_dma_unmap_page,
-       .map_sg = hfi1_map_sg,
-       .unmap_sg = hfi1_unmap_sg,
-       .sync_single_for_cpu = hfi1_sync_single_for_cpu,
-       .sync_single_for_device = hfi1_sync_single_for_device,
-       .alloc_coherent = hfi1_dma_alloc_coherent,
-       .free_coherent = hfi1_dma_free_coherent
-};
index bd786b7bd30b1256f523a89357e52d12f2a4266c..3b19c16a9e45783c907359af11f68bb91c08997c 100644 (file)
@@ -92,7 +92,7 @@ static unsigned int poll_next(struct file *, struct poll_table_struct *);
 static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
-static int vma_fault(struct vm_area_struct *, struct vm_fault *);
+static int vma_fault(struct vm_fault *);
 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
                            unsigned long arg);
 
@@ -185,7 +185,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
        if (fd) {
                fd->rec_cpu_num = -1; /* no cpu affinity by default */
                fd->mm = current->mm;
-               atomic_inc(&fd->mm->mm_count);
+               mmgrab(fd->mm);
                fp->private_data = fd;
        } else {
                fp->private_data = NULL;
@@ -695,7 +695,7 @@ done:
  * Local (non-chip) user memory is not mapped right away but as it is
  * accessed by the user-level code.
  */
-static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int vma_fault(struct vm_fault *vmf)
 {
        struct page *page;
 
index 6e595afca24cfb7ba8cf3c9a20842a7d51fb3c07..09cda3c35e8246f078b593488097c3709304b432 100644 (file)
@@ -4406,7 +4406,7 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
        switch (in_mad->base_version) {
        case OPA_MGMT_BASE_VERSION:
                if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
-                       dev_err(ibdev->dma_device, "invalid in_mad_size\n");
+                       dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
                        return IB_MAD_RESULT_FAILURE;
                }
                return hfi1_process_opa_mad(ibdev, mad_flags, port,
index 33f00f0719c561acec89667bed0566fe8432bb71..222315fadab11e00f36335ed8618fd72b73bd851 100644 (file)
@@ -1703,7 +1703,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
        strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
        ibdev->owner = THIS_MODULE;
        ibdev->phys_port_cnt = dd->num_pports;
-       ibdev->dma_device = &dd->pcidev->dev;
+       ibdev->dev.parent = &dd->pcidev->dev;
        ibdev->modify_device = modify_device;
        ibdev->alloc_hw_stats = alloc_hw_stats;
        ibdev->get_hw_stats = get_hw_stats;
index 6843409fba298abf1d593d0990c49054333cf43d..c3b41f95e70a5f1c39e89d91e48653fc05e2b20a 100644 (file)
@@ -439,7 +439,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 
        ib_dev->owner                   = THIS_MODULE;
        ib_dev->node_type               = RDMA_NODE_IB_CA;
-       ib_dev->dma_device              = dev;
+       ib_dev->dev.parent              = dev;
 
        ib_dev->phys_port_cnt           = hr_dev->caps.num_ports;
        ib_dev->local_dma_lkey          = hr_dev->caps.reserved_lkey;
index f036f32f15d3caa3c67102f5375b65a57df91355..3f44f2f91f03d84d51084079662374910409e94b 100644 (file)
@@ -101,7 +101,7 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
                        event.event = IB_EVENT_QP_ACCESS_ERR;
                        break;
                default:
-                       dev_dbg(ibqp->device->dma_device, "roce_ib: Unexpected event type %d on QP %06lx\n",
+                       dev_dbg(ibqp->device->dev.parent, "roce_ib: Unexpected event type %d on QP %06lx\n",
                                type, hr_qp->qpn);
                        return;
                }
index 5f695bf232a8f0d7bf332f65441db6a195d9092e..9b2849979756ba05ebf2c1f25073c1740d4b1520 100644 (file)
@@ -2758,7 +2758,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
            (1ull << IB_USER_VERBS_CMD_POST_SEND);
        iwibdev->ibdev.phys_port_cnt = 1;
        iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
-       iwibdev->ibdev.dma_device = &pcidev->dev;
        iwibdev->ibdev.dev.parent = &pcidev->dev;
        iwibdev->ibdev.query_port = i40iw_query_port;
        iwibdev->ibdev.modify_port = i40iw_modify_port;
index 211cbbe9ccd1e7a97540df1fed2d838f03139e90..88608906ce2503987e4c100506d698a3da89a65c 100644 (file)
@@ -2628,7 +2628,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
                                                1 : ibdev->num_ports;
        ibdev->ib_dev.num_comp_vectors  = dev->caps.num_comp_vectors;
-       ibdev->ib_dev.dma_device        = &dev->persist->pdev->dev;
+       ibdev->ib_dev.dev.parent        = &dev->persist->pdev->dev;
        ibdev->ib_dev.get_netdev        = mlx4_ib_get_netdev;
        ibdev->ib_dev.add_gid           = mlx4_ib_add_gid;
        ibdev->ib_dev.del_gid           = mlx4_ib_del_gid;
index 7f3d976d81edce2789a5b5710a12f2a8a0b3b568..64fed44b43a6b6900621d56fbe02398d57f13f95 100644 (file)
@@ -55,7 +55,7 @@
 #define pr_fmt(fmt)    "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
 
 #define mlx4_ib_warn(ibdev, format, arg...) \
-       dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
+       dev_warn((ibdev)->dev.parent, MLX4_IB_DRV_NAME ": " format, ## arg)
 
 enum {
        MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
index 5d73989d977135d9d560add27e074417ceaca258..433bcdbdd680534bb757a64080118446059ed814 100644 (file)
@@ -292,10 +292,10 @@ mlx4_alloc_priv_pages(struct ib_device *device,
        if (!mr->pages)
                return -ENOMEM;
 
-       mr->page_map = dma_map_single(device->dma_device, mr->pages,
+       mr->page_map = dma_map_single(device->dev.parent, mr->pages,
                                      mr->page_map_size, DMA_TO_DEVICE);
 
-       if (dma_mapping_error(device->dma_device, mr->page_map)) {
+       if (dma_mapping_error(device->dev.parent, mr->page_map)) {
                ret = -ENOMEM;
                goto err;
        }
@@ -313,7 +313,7 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
        if (mr->pages) {
                struct ib_device *device = mr->ibmr.device;
 
-               dma_unmap_single(device->dma_device, mr->page_map,
+               dma_unmap_single(device->dev.parent, mr->page_map,
                                 mr->page_map_size, DMA_TO_DEVICE);
                free_page((unsigned long)mr->pages);
                mr->pages = NULL;
index 6a8498c052a5c3a164ab47a1525f88835634ab17..5b3355268725b8fd07917d996a612db4584ff7c1 100644 (file)
@@ -3363,7 +3363,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        dev->ib_dev.phys_port_cnt     = dev->num_ports;
        dev->ib_dev.num_comp_vectors    =
                dev->mdev->priv.eq_table.num_comp_vectors;
-       dev->ib_dev.dma_device  = &mdev->pdev->dev;
+       dev->ib_dev.dev.parent          = &mdev->pdev->dev;
 
        dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
        dev->ib_dev.uverbs_cmd_mask     =
index 3c1f483d003f76d3330fcaa95091828dcb80898d..b8f9382a8b7dd72daf02b37804b912999dc500a1 100644 (file)
@@ -966,7 +966,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
                       int page_shift, int flags)
 {
        struct mlx5_ib_dev *dev = mr->dev;
-       struct device *ddev = dev->ib_dev.dma_device;
+       struct device *ddev = dev->ib_dev.dev.parent;
        struct mlx5_ib_ucontext *uctx = NULL;
        int size;
        void *xlt;
@@ -1411,9 +1411,9 @@ mlx5_alloc_priv_descs(struct ib_device *device,
 
        mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
 
-       mr->desc_map = dma_map_single(device->dma_device, mr->descs,
+       mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
                                      size, DMA_TO_DEVICE);
-       if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+       if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
                ret = -ENOMEM;
                goto err;
        }
@@ -1432,7 +1432,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
                struct ib_device *device = mr->ibmr.device;
                int size = mr->max_descs * mr->desc_size;
 
-               dma_unmap_single(device->dma_device, mr->desc_map,
+               dma_unmap_single(device->dev.parent, mr->desc_map,
                                 size, DMA_TO_DEVICE);
                kfree(mr->descs_alloc);
                mr->descs = NULL;
index ce163184e7422450044f271348b1fba67871d001..22d0e6ee5af6aaed90c754ddb142ccee9d188867 100644 (file)
@@ -1224,7 +1224,7 @@ int mthca_register_device(struct mthca_dev *dev)
        dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
        dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
        dev->ib_dev.num_comp_vectors     = 1;
-       dev->ib_dev.dma_device           = &dev->pdev->dev;
+       dev->ib_dev.dev.parent           = &dev->pdev->dev;
        dev->ib_dev.query_device         = mthca_query_device;
        dev->ib_dev.query_port           = mthca_query_port;
        dev->ib_dev.modify_device        = mthca_modify_device;
index d3eae2f3e9f504957305e4bda59f837327bc69f7..ccf0a4cffe9c1b359deceed34b939181b161b110 100644 (file)
@@ -3731,7 +3731,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
 
        nesibdev->ibdev.phys_port_cnt = 1;
        nesibdev->ibdev.num_comp_vectors = 1;
-       nesibdev->ibdev.dma_device = &nesdev->pcidev->dev;
        nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
        nesibdev->ibdev.query_device = nes_query_device;
        nesibdev->ibdev.query_port = nes_query_port;
index 3e43bdc81e7a5b49574c5b5460e0f7a38636e199..57c9a2ad0260bfd26366cacdfcfedca48308f80a 100644 (file)
@@ -199,7 +199,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
        dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
        dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
        dev->ibdev.mmap = ocrdma_mmap;
-       dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
+       dev->ibdev.dev.parent = &dev->nic_info.pdev->dev;
 
        dev->ibdev.process_mad = ocrdma_process_mad;
        dev->ibdev.get_port_immutable = ocrdma_port_immutable;
index 3ac8aa5ef37de2c5242125077eef78035d565901..b9b47e5cc8b3bde5a053107c0ba6a4997754c1ee 100644 (file)
@@ -170,7 +170,7 @@ static int qedr_register_device(struct qedr_dev *dev)
        dev->ibdev.get_port_immutable = qedr_port_immutable;
        dev->ibdev.get_netdev = qedr_get_netdev;
 
-       dev->ibdev.dma_device = &dev->pdev->dev;
+       dev->ibdev.dev.parent = &dev->pdev->dev;
 
        dev->ibdev.get_link_layer = qedr_link_layer;
        dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c
deleted file mode 100644 (file)
index 59fe092..0000000
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2006, 2009, 2010 QLogic, Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-
-#include "qib_verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64) 0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int qib_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 qib_dma_map_single(struct ib_device *dev, void *cpu_addr,
-                             size_t size, enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-       return (u64) cpu_addr;
-}
-
-static void qib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
-                                enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static u64 qib_dma_map_page(struct ib_device *dev, struct page *page,
-                           unsigned long offset, size_t size,
-                           enum dma_data_direction direction)
-{
-       u64 addr;
-
-       BUG_ON(!valid_dma_direction(direction));
-
-       if (offset + size > PAGE_SIZE) {
-               addr = BAD_DMA_ADDRESS;
-               goto done;
-       }
-
-       addr = (u64) page_address(page);
-       if (addr)
-               addr += offset;
-       /* TODO: handle highmem pages */
-
-done:
-       return addr;
-}
-
-static void qib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
-                              enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                     int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       u64 addr;
-       int i;
-       int ret = nents;
-
-       BUG_ON(!valid_dma_direction(direction));
-
-       for_each_sg(sgl, sg, nents, i) {
-               addr = (u64) page_address(sg_page(sg));
-               /* TODO: handle highmem pages */
-               if (!addr) {
-                       ret = 0;
-                       break;
-               }
-               sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-               sg->dma_length = sg->length;
-#endif
-       }
-       return ret;
-}
-
-static void qib_unmap_sg(struct ib_device *dev,
-                        struct scatterlist *sg, int nents,
-                        enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
-                                   size_t size, enum dma_data_direction dir)
-{
-}
-
-static void qib_sync_single_for_device(struct ib_device *dev, u64 addr,
-                                      size_t size,
-                                      enum dma_data_direction dir)
-{
-}
-
-static void *qib_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                   u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p)
-               addr = page_address(p);
-       if (dma_handle)
-               *dma_handle = (u64) addr;
-       return addr;
-}
-
-static void qib_dma_free_coherent(struct ib_device *dev, size_t size,
-                                 void *cpu_addr, u64 dma_handle)
-{
-       free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops qib_dma_mapping_ops = {
-       .mapping_error = qib_mapping_error,
-       .map_single = qib_dma_map_single,
-       .unmap_single = qib_dma_unmap_single,
-       .map_page = qib_dma_map_page,
-       .unmap_page = qib_dma_unmap_page,
-       .map_sg = qib_map_sg,
-       .unmap_sg = qib_unmap_sg,
-       .sync_single_for_cpu = qib_sync_single_for_cpu,
-       .sync_single_for_device = qib_sync_single_for_device,
-       .alloc_coherent = qib_dma_alloc_coherent,
-       .free_coherent = qib_dma_free_coherent
-};
index 2d1eacf1dfed601c0b5f3f04a87d5ca8ebc532e0..9396c1807cc3ec5d023f1bb1ce60a2d359da0a2f 100644 (file)
@@ -893,7 +893,7 @@ bail:
 /*
  * qib_file_vma_fault - handle a VMA page fault.
  */
-static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int qib_file_vma_fault(struct vm_fault *vmf)
 {
        struct page *page;
 
index 92399d3ffd15efca7f5bc10958485d9937445a20..06de1cbcf67dcf5774e5c3ab886eeffa1bd4d708 100644 (file)
@@ -707,7 +707,7 @@ static void qib_6120_clear_freeze(struct qib_devdata *dd)
        /* disable error interrupts, to avoid confusion */
        qib_write_kreg(dd, kr_errmask, 0ULL);
 
-       /* also disable interrupts; errormask is sometimes overwriten */
+       /* also disable interrupts; errormask is sometimes overwritten */
        qib_6120_set_intr_state(dd, 0);
 
        qib_cancel_sends(dd->pport);
index e55e31a691951968dc01bb6bbfa6b524a5b10f73..55a18384c22d646b74439d7171b43e73da1391f9 100644 (file)
@@ -1259,7 +1259,7 @@ static void qib_7220_clear_freeze(struct qib_devdata *dd)
        /* disable error interrupts, to avoid confusion */
        qib_write_kreg(dd, kr_errmask, 0ULL);
 
-       /* also disable interrupts; errormask is sometimes overwriten */
+       /* also disable interrupts; errormask is sometimes overwritten */
        qib_7220_set_intr_state(dd, 0);
 
        qib_cancel_sends(dd->pport);
index 9cc97bd4277590bf4bc03e58817c4c67adcbb3be..12c4208fd7013b78c5e1bf736d82461c05c814c3 100644 (file)
@@ -2053,7 +2053,7 @@ static void qib_7322_clear_freeze(struct qib_devdata *dd)
                        qib_write_kreg_port(dd->pport + pidx, krp_errmask,
                                            0ULL);
 
-       /* also disable interrupts; errormask is sometimes overwriten */
+       /* also disable interrupts; errormask is sometimes overwritten */
        qib_7322_set_intr_state(dd, 0);
 
        /* clear the freeze, and be sure chip saw it */
index 2c3c93572c17262c21dffe891a18ff9f9e5b5223..8fdf79f8d4e4f8bbffba250c43a2545af8406e67 100644 (file)
@@ -158,10 +158,7 @@ int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
        unsigned n, m;
        size_t off;
 
-       /*
-        * We use RKEY == zero for kernel virtual addresses
-        * (see qib_get_dma_mr and qib_dma.c).
-        */
+       /* We use RKEY == zero for kernel virtual addresses */
        rcu_read_lock();
        if (rkey == 0) {
                struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
index 6b56f1c01a0789f9335691f57535947df4afa2a9..83f8b5f243819b04c40213ed7f6e5ae7b15648b8 100644 (file)
@@ -1550,7 +1550,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
        ibdev->owner = THIS_MODULE;
        ibdev->node_guid = ppd->guid;
        ibdev->phys_port_cnt = dd->num_pports;
-       ibdev->dma_device = &dd->pcidev->dev;
+       ibdev->dev.parent = &dd->pcidev->dev;
        ibdev->modify_device = qib_modify_device;
        ibdev->process_mad = qib_process_mad;
 
index 4f5a45db08e1889e7f4b072b7a92dfa5a5bacb6a..c0c1e8b027b1a6de79e07910ba177c775438fa3c 100644 (file)
@@ -382,7 +382,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
        us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
        us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
        us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
-       us_ibdev->ib_dev.dma_device = &dev->dev;
+       us_ibdev->ib_dev.dev.parent = &dev->dev;
        us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
        strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
 
index e03d2f6c1f90ed4f7f9782027d73ba9c44ab4f3e..100bea5c42ffb74375552131ebb1fbd5cbdc3659 100644 (file)
@@ -173,7 +173,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
        dev->flags = 0;
        dev->ib_dev.owner = THIS_MODULE;
        dev->ib_dev.num_comp_vectors = 1;
-       dev->ib_dev.dma_device = &dev->pdev->dev;
+       dev->ib_dev.dev.parent = &dev->pdev->dev;
        dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
        dev->ib_dev.uverbs_cmd_mask =
                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
index 1da8d01a68550b3ec361581321995e82e212f208..fdd001ce13d871be4176ede4d3c78f8f51b46977 100644 (file)
@@ -1,5 +1,6 @@
 config INFINIBAND_RDMAVT
        tristate "RDMA verbs transport library"
        depends on 64BIT
+       select DMA_VIRT_OPS
        ---help---
        This is a common software verbs provider for RDMA networks.
index c33a4f84413cf8ce18cbbc188621778c882ba852..78b276a90401bf04b9a29523dc9dd1ff56a7da08 100644 (file)
@@ -7,7 +7,7 @@
 #
 obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
 
-rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o \
+rdmavt-y := vt.o ah.o cq.o mad.o mcast.o mmap.o mr.o pd.o qp.o \
        rc.o srq.o trace.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
deleted file mode 100644 (file)
index f2cefb0..0000000
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-#include <rdma/ib_verbs.h>
-
-#include "dma.h"
-
-#define BAD_DMA_ADDRESS ((u64)0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int rvt_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 rvt_dma_map_single(struct ib_device *dev, void *cpu_addr,
-                             size_t size, enum dma_data_direction direction)
-{
-       if (WARN_ON(!valid_dma_direction(direction)))
-               return BAD_DMA_ADDRESS;
-
-       return (u64)cpu_addr;
-}
-
-static void rvt_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
-                                enum dma_data_direction direction)
-{
-       /* This is a stub, nothing to be done here */
-}
-
-static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
-                           unsigned long offset, size_t size,
-                           enum dma_data_direction direction)
-{
-       u64 addr;
-
-       if (WARN_ON(!valid_dma_direction(direction)))
-               return BAD_DMA_ADDRESS;
-
-       addr = (u64)page_address(page);
-       if (addr)
-               addr += offset;
-
-       return addr;
-}
-
-static void rvt_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
-                              enum dma_data_direction direction)
-{
-       /* This is a stub, nothing to be done here */
-}
-
-static int rvt_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                     int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       u64 addr;
-       int i;
-       int ret = nents;
-
-       if (WARN_ON(!valid_dma_direction(direction)))
-               return 0;
-
-       for_each_sg(sgl, sg, nents, i) {
-               addr = (u64)page_address(sg_page(sg));
-               if (!addr) {
-                       ret = 0;
-                       break;
-               }
-               sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-               sg->dma_length = sg->length;
-#endif
-       }
-       return ret;
-}
-
-static void rvt_unmap_sg(struct ib_device *dev,
-                        struct scatterlist *sg, int nents,
-                        enum dma_data_direction direction)
-{
-       /* This is a stub, nothing to be done here */
-}
-
-static int rvt_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl,
-                           int nents, enum dma_data_direction direction,
-                           unsigned long attrs)
-{
-       return rvt_map_sg(dev, sgl, nents, direction);
-}
-
-static void rvt_unmap_sg_attrs(struct ib_device *dev,
-                              struct scatterlist *sg, int nents,
-                              enum dma_data_direction direction,
-                              unsigned long attrs)
-{
-       return rvt_unmap_sg(dev, sg, nents, direction);
-}
-
-static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr,
-                                   size_t size, enum dma_data_direction dir)
-{
-}
-
-static void rvt_sync_single_for_device(struct ib_device *dev, u64 addr,
-                                      size_t size,
-                                      enum dma_data_direction dir)
-{
-}
-
-static void *rvt_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                   u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p)
-               addr = page_address(p);
-       if (dma_handle)
-               *dma_handle = (u64)addr;
-       return addr;
-}
-
-static void rvt_dma_free_coherent(struct ib_device *dev, size_t size,
-                                 void *cpu_addr, u64 dma_handle)
-{
-       free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = {
-       .mapping_error = rvt_mapping_error,
-       .map_single = rvt_dma_map_single,
-       .unmap_single = rvt_dma_unmap_single,
-       .map_page = rvt_dma_map_page,
-       .unmap_page = rvt_dma_unmap_page,
-       .map_sg = rvt_map_sg,
-       .unmap_sg = rvt_unmap_sg,
-       .map_sg_attrs = rvt_map_sg_attrs,
-       .unmap_sg_attrs = rvt_unmap_sg_attrs,
-       .sync_single_for_cpu = rvt_sync_single_for_cpu,
-       .sync_single_for_device = rvt_sync_single_for_device,
-       .alloc_coherent = rvt_dma_alloc_coherent,
-       .free_coherent = rvt_dma_free_coherent
-};
diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h
deleted file mode 100644 (file)
index 979f07e..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef DEF_RDMAVTDMA_H
-#define DEF_RDMAVTDMA_H
-
-/*
- * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-extern struct ib_dma_mapping_ops rvt_default_dma_mapping_ops;
-
-#endif          /* DEF_RDMAVTDMA_H */
index f6e99778d7ca72194dd5c5a019501d8211743ba0..bba241faca61da6bff26a3a0ee87ea148a20564a 100644 (file)
@@ -74,9 +74,9 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
                    u16 *out_mad_pkey_index)
 {
        /*
-        * MAD processing is quite different between hfi1 and qib. Therfore this
-        * is expected to be provided by the driver. Other drivers in the future
-        * may chose to implement this but it should not be made into a
+        * MAD processing is quite different between hfi1 and qib. Therefore
+        * this is expected to be provided by the driver. Other drivers in the
+        * future may choose to implement this but it should not be made into a
         * requirement.
         */
        if (ibport_num_to_idx(ibdev, port_num) < 0)
index c80a69b1ffcb060ebf111149e182b8a6daec5ce7..ae30b6838d7958fa7a044535d24eb850e3607030 100644 (file)
@@ -320,8 +320,8 @@ static void __rvt_free_mr(struct rvt_mr *mr)
  * @acc: access flags
  *
  * Return: the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see dma.c).
+ * Note that all DMA addresses should be created via the functions in
+ * struct dma_virt_ops.
  */
 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
 {
@@ -799,7 +799,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
 
        /*
         * We use LKEY == zero for kernel virtual addresses
-        * (see rvt_get_dma_mr and dma.c).
+        * (see rvt_get_dma_mr() and dma_virt_ops).
         */
        rcu_read_lock();
        if (sge->lkey == 0) {
@@ -897,7 +897,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
 
        /*
         * We use RKEY == zero for kernel virtual addresses
-        * (see rvt_get_dma_mr and dma.c).
+        * (see rvt_get_dma_mr() and dma_virt_ops).
         */
        rcu_read_lock();
        if (rkey == 0) {
index 1165639a914bf52518eee2b3f48b7a061bd9608c..0d7c6bb551d924ea76a05c512bc966c40ceb56dc 100644 (file)
@@ -47,6 +47,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/dma-mapping.h>
 #include "vt.h"
 #include "trace.h"
 
@@ -778,8 +779,7 @@ int rvt_register_device(struct rvt_dev_info *rdi)
        }
 
        /* DMA Operations */
-       rdi->ibdev.dma_ops =
-               rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops;
+       rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
 
        /* Protection Domain */
        spin_lock_init(&rdi->n_pds_lock);
index 6b01eaa4461b6ee5d9a0d1b12c9c77f38a41b52f..f363505312be870b2c5af26d14e8d7c8c3ca0b97 100644 (file)
@@ -50,7 +50,6 @@
 
 #include <rdma/rdma_vt.h>
 #include <linux/pci.h>
-#include "dma.h"
 #include "pd.h"
 #include "qp.h"
 #include "ah.h"
index 1e4e628fe7b0600deb5014c538c0db9c5f180bcd..7d1ac27ed2516dad367354f36b15903cb3197ade 100644 (file)
@@ -2,6 +2,7 @@ config RDMA_RXE
        tristate "Software RDMA over Ethernet (RoCE) driver"
        depends on INET && PCI && INFINIBAND
        depends on NET_UDP_TUNNEL
+       select DMA_VIRT_OPS
        ---help---
        This driver implements the InfiniBand RDMA transport over
        the Linux network stack. It enables a system with a
index 3b3fb9d1c470ac7574896e89ea9b8533cc6f6d12..ec35ff022a4298820bc2e6480066255b57fe915e 100644 (file)
@@ -14,7 +14,6 @@ rdma_rxe-y := \
        rxe_qp.o \
        rxe_cq.o \
        rxe_mr.o \
-       rxe_dma.o \
        rxe_opcode.o \
        rxe_mmap.o \
        rxe_icrc.o \
diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c
deleted file mode 100644 (file)
index a0f8af5..0000000
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
- * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *     - Redistributions of source code must retain the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer.
- *
- *     - Redistributions in binary form must reproduce the above
- *       copyright notice, this list of conditions and the following
- *       disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "rxe.h"
-#include "rxe_loc.h"
-
-#define DMA_BAD_ADDER ((u64)0)
-
-static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == DMA_BAD_ADDER;
-}
-
-static u64 rxe_dma_map_single(struct ib_device *dev,
-                             void *cpu_addr, size_t size,
-                             enum dma_data_direction direction)
-{
-       WARN_ON(!valid_dma_direction(direction));
-       return (uintptr_t)cpu_addr;
-}
-
-static void rxe_dma_unmap_single(struct ib_device *dev,
-                                u64 addr, size_t size,
-                                enum dma_data_direction direction)
-{
-       WARN_ON(!valid_dma_direction(direction));
-}
-
-static u64 rxe_dma_map_page(struct ib_device *dev,
-                           struct page *page,
-                           unsigned long offset,
-                           size_t size, enum dma_data_direction direction)
-{
-       u64 addr;
-
-       WARN_ON(!valid_dma_direction(direction));
-
-       if (offset + size > PAGE_SIZE) {
-               addr = DMA_BAD_ADDER;
-               goto done;
-       }
-
-       addr = (uintptr_t)page_address(page);
-       if (addr)
-               addr += offset;
-
-done:
-       return addr;
-}
-
-static void rxe_dma_unmap_page(struct ib_device *dev,
-                              u64 addr, size_t size,
-                              enum dma_data_direction direction)
-{
-       WARN_ON(!valid_dma_direction(direction));
-}
-
-static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                     int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       u64 addr;
-       int i;
-       int ret = nents;
-
-       WARN_ON(!valid_dma_direction(direction));
-
-       for_each_sg(sgl, sg, nents, i) {
-               addr = (uintptr_t)page_address(sg_page(sg));
-               if (!addr) {
-                       ret = 0;
-                       break;
-               }
-               sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-               sg->dma_length = sg->length;
-#endif
-       }
-
-       return ret;
-}
-
-static void rxe_unmap_sg(struct ib_device *dev,
-                        struct scatterlist *sg, int nents,
-                        enum dma_data_direction direction)
-{
-       WARN_ON(!valid_dma_direction(direction));
-}
-
-static int rxe_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl,
-                           int nents, enum dma_data_direction direction,
-                           unsigned long attrs)
-{
-       return rxe_map_sg(dev, sgl, nents, direction);
-}
-
-static void rxe_unmap_sg_attrs(struct ib_device *dev,
-                              struct scatterlist *sg, int nents,
-                              enum dma_data_direction direction,
-                              unsigned long attrs)
-{
-       rxe_unmap_sg(dev, sg, nents, direction);
-}
-
-static void rxe_sync_single_for_cpu(struct ib_device *dev,
-                                   u64 addr,
-                                   size_t size, enum dma_data_direction dir)
-{
-}
-
-static void rxe_sync_single_for_device(struct ib_device *dev,
-                                      u64 addr,
-                                      size_t size, enum dma_data_direction dir)
-{
-}
-
-static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                   u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p)
-               addr = page_address(p);
-
-       if (dma_handle)
-               *dma_handle = (uintptr_t)addr;
-
-       return addr;
-}
-
-static void rxe_dma_free_coherent(struct ib_device *dev, size_t size,
-                                 void *cpu_addr, u64 dma_handle)
-{
-       free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops rxe_dma_mapping_ops = {
-       .mapping_error          = rxe_mapping_error,
-       .map_single             = rxe_dma_map_single,
-       .unmap_single           = rxe_dma_unmap_single,
-       .map_page               = rxe_dma_map_page,
-       .unmap_page             = rxe_dma_unmap_page,
-       .map_sg                 = rxe_map_sg,
-       .unmap_sg               = rxe_unmap_sg,
-       .map_sg_attrs           = rxe_map_sg_attrs,
-       .unmap_sg_attrs         = rxe_unmap_sg_attrs,
-       .sync_single_for_cpu    = rxe_sync_single_for_cpu,
-       .sync_single_for_device = rxe_sync_single_for_device,
-       .alloc_coherent         = rxe_dma_alloc_coherent,
-       .free_coherent          = rxe_dma_free_coherent
-};
index 272337e5e9483cc355fcceadc5becf04ff8d4445..183a9d379b41c087ce60361e985c6be2c0baa87b 100644 (file)
@@ -237,8 +237,6 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
                      struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
                      struct ib_udata *udata);
 
-extern struct ib_dma_mapping_ops rxe_dma_mapping_ops;
-
 void rxe_release(struct kref *kref);
 
 void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify);
index d2e2eff7a515dd31ac5bd12cde06a80c5f806fcb..5113e502f6f969013cce0ac0f524bfba385f43f8 100644 (file)
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/dma-mapping.h>
 #include "rxe.h"
 #include "rxe_loc.h"
 #include "rxe_queue.h"
@@ -169,7 +170,7 @@ static int rxe_query_pkey(struct ib_device *device,
        struct rxe_port *port;
 
        if (unlikely(port_num != 1)) {
-               dev_warn(device->dma_device, "invalid port_num = %d\n",
+               dev_warn(device->dev.parent, "invalid port_num = %d\n",
                         port_num);
                goto err1;
        }
@@ -177,7 +178,7 @@ static int rxe_query_pkey(struct ib_device *device,
        port = &rxe->port;
 
        if (unlikely(index >= port->attr.pkey_tbl_len)) {
-               dev_warn(device->dma_device, "invalid index = %d\n",
+               dev_warn(device->dev.parent, "invalid index = %d\n",
                         index);
                goto err1;
        }
@@ -1234,10 +1235,10 @@ int rxe_register_device(struct rxe_dev *rxe)
        dev->node_type = RDMA_NODE_IB_CA;
        dev->phys_port_cnt = 1;
        dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
-       dev->dma_device = rxe_dma_device(rxe);
+       dev->dev.parent = rxe_dma_device(rxe);
        dev->local_dma_lkey = 0;
        dev->node_guid = rxe_node_guid(rxe);
-       dev->dma_ops = &rxe_dma_mapping_ops;
+       dev->dev.dma_ops = &dma_virt_ops;
 
        dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
        dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
index 7b6d40ff1acf8e3a80269a713705e20655def9da..bac455a1942d9cef723efd9060bfcf3288842e5b 100644 (file)
@@ -65,7 +65,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
        ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
                             sizeof(drvinfo->fw_version));
 
-       strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
+       strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent),
                sizeof(drvinfo->bus_info));
 
        strlcpy(drvinfo->version, ipoib_driver_version,
index 259c59f673945bd4cbcf36a1debe9ce01bed6081..d1d3fb7a6127c5d585009267d8379c401b2ea4ff 100644 (file)
@@ -2020,7 +2020,7 @@ static struct net_device *ipoib_add_port(const char *format,
        if (!priv)
                goto alloc_mem_failed;
 
-       SET_NETDEV_DEV(priv->dev, hca->dma_device);
+       SET_NETDEV_DEV(priv->dev, hca->dev.parent);
        priv->dev->dev_id = port - 1;
 
        result = ib_query_port(hca, port, &attr);
index 30a6985909e0d95c446eb51abe17cf570dd18f73..5a887efb4bdf1b6405d3fc4342918084ae420b57 100644 (file)
@@ -652,7 +652,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
                }
 
                if (iscsi_host_add(shost,
-                                  ib_conn->device->ib_device->dma_device)) {
+                                  ib_conn->device->ib_device->dev.parent)) {
                        mutex_unlock(&iser_conn->state_mutex);
                        goto free_host;
                }
index 3c7fa972a38cbc37437695820d3e337b123c8f05..cee46266f434850e6a9533615d812b4c610d5c13 100644 (file)
@@ -2933,7 +2933,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
        sprintf(target->target_name, "SRP.T10:%016llX",
                be64_to_cpu(target->id_ext));
 
-       if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
+       if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
                return -ENODEV;
 
        memcpy(ids.port_id, &target->id_ext, 8);
@@ -3546,7 +3546,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
        host->port = port;
 
        host->dev.class = &srp_class;
-       host->dev.parent = device->dev->dma_device;
+       host->dev.parent = device->dev->dev.parent;
        dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
 
        if (device_register(&host->dev))
index bc5a2d86ae7ea1de425dfcc204fa71ed576c17a8..7e314c2f207162f51b8c1ab78c6f0367b9025321 100644 (file)
@@ -2479,8 +2479,7 @@ static void srpt_add_one(struct ib_device *device)
        struct ib_srq_init_attr srq_attr;
        int i;
 
-       pr_debug("device = %p, device->dma_ops = %p\n", device,
-                device->dma_ops);
+       pr_debug("device = %p\n", device);
 
        sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
        if (!sdev)
index 44deca88c57972d2a65f3fca51ca852116f48810..beaf61ce775b756db1dec2658355972bf8f3823e 100644 (file)
@@ -202,7 +202,7 @@ static int cyttsp4_si_get_cydata(struct cyttsp4 *cd)
        int rc;
 
        si->si_ofs.cydata_size = si->si_ofs.test_ofs - si->si_ofs.cydata_ofs;
-       dev_dbg(cd->dev, "%s: cydata size: %Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: cydata size: %zd\n", __func__,
                        si->si_ofs.cydata_size);
 
        p = krealloc(si->si_ptrs.cydata, si->si_ofs.cydata_size, GFP_KERNEL);
@@ -430,13 +430,13 @@ static int cyttsp4_si_get_opcfg_data(struct cyttsp4 *cd)
        for (abs = 0; abs < CY_TCH_NUM_ABS; abs++) {
                dev_dbg(cd->dev, "%s: tch_rec_%s\n", __func__,
                        cyttsp4_tch_abs_string[abs]);
-               dev_dbg(cd->dev, "%s:     ofs =%2Zd\n", __func__,
+               dev_dbg(cd->dev, "%s:     ofs =%2zd\n", __func__,
                        si->si_ofs.tch_abs[abs].ofs);
-               dev_dbg(cd->dev, "%s:     siz =%2Zd\n", __func__,
+               dev_dbg(cd->dev, "%s:     siz =%2zd\n", __func__,
                        si->si_ofs.tch_abs[abs].size);
-               dev_dbg(cd->dev, "%s:     max =%2Zd\n", __func__,
+               dev_dbg(cd->dev, "%s:     max =%2zd\n", __func__,
                        si->si_ofs.tch_abs[abs].max);
-               dev_dbg(cd->dev, "%s:     bofs=%2Zd\n", __func__,
+               dev_dbg(cd->dev, "%s:     bofs=%2zd\n", __func__,
                        si->si_ofs.tch_abs[abs].bofs);
        }
 
@@ -586,62 +586,62 @@ static int cyttsp4_si_get_op_data_ptrs(struct cyttsp4 *cd)
 static void cyttsp4_si_put_log_data(struct cyttsp4 *cd)
 {
        struct cyttsp4_sysinfo *si = &cd->sysinfo;
-       dev_dbg(cd->dev, "%s: cydata_ofs =%4Zd siz=%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: cydata_ofs =%4zd siz=%4zd\n", __func__,
                si->si_ofs.cydata_ofs, si->si_ofs.cydata_size);
-       dev_dbg(cd->dev, "%s: test_ofs   =%4Zd siz=%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: test_ofs   =%4zd siz=%4zd\n", __func__,
                si->si_ofs.test_ofs, si->si_ofs.test_size);
-       dev_dbg(cd->dev, "%s: pcfg_ofs   =%4Zd siz=%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: pcfg_ofs   =%4zd siz=%4zd\n", __func__,
                si->si_ofs.pcfg_ofs, si->si_ofs.pcfg_size);
-       dev_dbg(cd->dev, "%s: opcfg_ofs  =%4Zd siz=%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: opcfg_ofs  =%4zd siz=%4zd\n", __func__,
                si->si_ofs.opcfg_ofs, si->si_ofs.opcfg_size);
-       dev_dbg(cd->dev, "%s: ddata_ofs  =%4Zd siz=%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: ddata_ofs  =%4zd siz=%4zd\n", __func__,
                si->si_ofs.ddata_ofs, si->si_ofs.ddata_size);
-       dev_dbg(cd->dev, "%s: mdata_ofs  =%4Zd siz=%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: mdata_ofs  =%4zd siz=%4zd\n", __func__,
                si->si_ofs.mdata_ofs, si->si_ofs.mdata_size);
 
-       dev_dbg(cd->dev, "%s: cmd_ofs       =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: cmd_ofs       =%4zd\n", __func__,
                si->si_ofs.cmd_ofs);
-       dev_dbg(cd->dev, "%s: rep_ofs       =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: rep_ofs       =%4zd\n", __func__,
                si->si_ofs.rep_ofs);
-       dev_dbg(cd->dev, "%s: rep_sz        =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: rep_sz        =%4zd\n", __func__,
                si->si_ofs.rep_sz);
-       dev_dbg(cd->dev, "%s: num_btns      =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: num_btns      =%4zd\n", __func__,
                si->si_ofs.num_btns);
-       dev_dbg(cd->dev, "%s: num_btn_regs  =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: num_btn_regs  =%4zd\n", __func__,
                si->si_ofs.num_btn_regs);
-       dev_dbg(cd->dev, "%s: tt_stat_ofs   =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: tt_stat_ofs   =%4zd\n", __func__,
                si->si_ofs.tt_stat_ofs);
-       dev_dbg(cd->dev, "%s: tch_rec_size  =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: tch_rec_size  =%4zd\n", __func__,
                si->si_ofs.tch_rec_size);
-       dev_dbg(cd->dev, "%s: max_tchs      =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: max_tchs      =%4zd\n", __func__,
                si->si_ofs.max_tchs);
-       dev_dbg(cd->dev, "%s: mode_size     =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: mode_size     =%4zd\n", __func__,
                si->si_ofs.mode_size);
-       dev_dbg(cd->dev, "%s: data_size     =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: data_size     =%4zd\n", __func__,
                si->si_ofs.data_size);
-       dev_dbg(cd->dev, "%s: map_sz        =%4Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: map_sz        =%4zd\n", __func__,
                si->si_ofs.map_sz);
 
-       dev_dbg(cd->dev, "%s: btn_rec_size   =%2Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: btn_rec_size   =%2zd\n", __func__,
                si->si_ofs.btn_rec_size);
-       dev_dbg(cd->dev, "%s: btn_diff_ofs   =%2Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: btn_diff_ofs   =%2zd\n", __func__,
                si->si_ofs.btn_diff_ofs);
-       dev_dbg(cd->dev, "%s: btn_diff_size  =%2Zd\n", __func__,
+       dev_dbg(cd->dev, "%s: btn_diff_size  =%2zd\n", __func__,
                si->si_ofs.btn_diff_size);
 
-       dev_dbg(cd->dev, "%s: max_x    = 0x%04ZX (%Zd)\n", __func__,
+       dev_dbg(cd->dev, "%s: max_x    = 0x%04zX (%zd)\n", __func__,
                si->si_ofs.max_x, si->si_ofs.max_x);
-       dev_dbg(cd->dev, "%s: x_origin = %Zd (%s)\n", __func__,
+       dev_dbg(cd->dev, "%s: x_origin = %zd (%s)\n", __func__,
                si->si_ofs.x_origin,
                si->si_ofs.x_origin == CY_NORMAL_ORIGIN ?
                "left corner" : "right corner");
-       dev_dbg(cd->dev, "%s: max_y    = 0x%04ZX (%Zd)\n", __func__,
+       dev_dbg(cd->dev, "%s: max_y    = 0x%04zX (%zd)\n", __func__,
                si->si_ofs.max_y, si->si_ofs.max_y);
-       dev_dbg(cd->dev, "%s: y_origin = %Zd (%s)\n", __func__,
+       dev_dbg(cd->dev, "%s: y_origin = %zd (%s)\n", __func__,
                si->si_ofs.y_origin,
                si->si_ofs.y_origin == CY_NORMAL_ORIGIN ?
                "upper corner" : "lower corner");
-       dev_dbg(cd->dev, "%s: max_p    = 0x%04ZX (%Zd)\n", __func__,
+       dev_dbg(cd->dev, "%s: max_p    = 0x%04zX (%zd)\n", __func__,
                si->si_ofs.max_p, si->si_ofs.max_p);
 
        dev_dbg(cd->dev, "%s: xy_mode=%p xy_data=%p\n", __func__,
@@ -1000,7 +1000,7 @@ static int cyttsp4_xy_worker(struct cyttsp4 *cd)
                dev_dbg(dev, "%s: Large area detected\n", __func__);
 
        if (num_cur_tch > si->si_ofs.max_tchs) {
-               dev_err(dev, "%s: too many tch; set to max tch (n=%d c=%Zd)\n",
+               dev_err(dev, "%s: too many tch; set to max tch (n=%d c=%zd)\n",
                                __func__, num_cur_tch, si->si_ofs.max_tchs);
                num_cur_tch = si->si_ofs.max_tchs;
        }
index 1b5b8c5361c506f2b835642eff2e3e57fbdf1370..98940d1392cb0cd19d648b6a25f2a5ba36c052d4 100644 (file)
@@ -117,7 +117,7 @@ const struct iommu_ops amd_iommu_ops;
 static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
 int amd_iommu_max_glx_val = -1;
 
-static struct dma_map_ops amd_iommu_dma_ops;
+static const struct dma_map_ops amd_iommu_dma_ops;
 
 /*
  * This struct contains device specific data for the IOMMU
@@ -519,7 +519,7 @@ static void iommu_uninit_device(struct device *dev)
        iommu_group_remove_device(dev);
 
        /* Remove dma-ops */
-       dev->archdata.dma_ops = NULL;
+       dev->dma_ops = NULL;
 
        /*
         * We keep dev_data around for unplugged devices and reuse it when the
@@ -2168,7 +2168,7 @@ static int amd_iommu_add_device(struct device *dev)
                                dev_name(dev));
 
                iommu_ignore_device(dev);
-               dev->archdata.dma_ops = &nommu_dma_ops;
+               dev->dma_ops = &nommu_dma_ops;
                goto out;
        }
        init_iommu_group(dev);
@@ -2185,7 +2185,7 @@ static int amd_iommu_add_device(struct device *dev)
        if (domain->type == IOMMU_DOMAIN_IDENTITY)
                dev_data->passthrough = true;
        else
-               dev->archdata.dma_ops = &amd_iommu_dma_ops;
+               dev->dma_ops = &amd_iommu_dma_ops;
 
 out:
        iommu_completion_wait(iommu);
@@ -2672,7 +2672,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
                        return NULL;
 
                page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
-                                                get_order(size));
+                                                get_order(size), flag);
                if (!page)
                        return NULL;
        }
@@ -2732,7 +2732,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
        return check_device(dev);
 }
 
-static struct dma_map_ops amd_iommu_dma_ops = {
+static const struct dma_map_ops amd_iommu_dma_ops = {
        .alloc          = alloc_coherent,
        .free           = free_coherent,
        .map_page       = map_page,
index 04cdac7ab3e34bbcc3a4cba19fbb57d44129549b..6130278c5d71bd08e925cc35007610bc32736938 100644 (file)
@@ -1507,7 +1507,7 @@ static ssize_t amd_iommu_show_cap(struct device *dev,
                                  struct device_attribute *attr,
                                  char *buf)
 {
-       struct amd_iommu *iommu = dev_get_drvdata(dev);
+       struct amd_iommu *iommu = dev_to_amd_iommu(dev);
        return sprintf(buf, "%x\n", iommu->cap);
 }
 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
@@ -1516,7 +1516,7 @@ static ssize_t amd_iommu_show_features(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf)
 {
-       struct amd_iommu *iommu = dev_get_drvdata(dev);
+       struct amd_iommu *iommu = dev_to_amd_iommu(dev);
        return sprintf(buf, "%llx\n", iommu->features);
 }
 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
index af00f381a7b1a34e060039db569b274d04a532d1..003f3ceb2661c3cf1112cf39ecb7e1afb3488772 100644 (file)
@@ -569,6 +569,11 @@ struct amd_iommu {
        volatile u64 __aligned(8) cmd_sem;
 };
 
+static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
+{
+       return container_of(dev, struct amd_iommu, iommu.dev);
+}
+
 #define ACPIHID_UID_LEN 256
 #define ACPIHID_HID_LEN 9
 
index f5e02f8e737113123991607219ad23a12b2d1c54..238ad3447712d263ef9d67a109c2d86c03693a87 100644 (file)
@@ -3829,7 +3829,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
        if (gfpflags_allow_blocking(flags)) {
                unsigned int count = size >> PAGE_SHIFT;
 
-               page = dma_alloc_from_contiguous(dev, count, order);
+               page = dma_alloc_from_contiguous(dev, count, order, flags);
                if (page && iommu_no_mapping(dev) &&
                    page_to_phys(page) + size > dev->coherent_dma_mask) {
                        dma_release_from_contiguous(dev, page, count);
@@ -4730,11 +4730,16 @@ static int intel_iommu_cpu_dead(unsigned int cpu)
        return 0;
 }
 
+static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
+{
+       return container_of(dev, struct intel_iommu, iommu.dev);
+}
+
 static ssize_t intel_iommu_show_version(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
 {
-       struct intel_iommu *iommu = dev_get_drvdata(dev);
+       struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        u32 ver = readl(iommu->reg + DMAR_VER_REG);
        return sprintf(buf, "%d:%d\n",
                       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
@@ -4745,7 +4750,7 @@ static ssize_t intel_iommu_show_address(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
 {
-       struct intel_iommu *iommu = dev_get_drvdata(dev);
+       struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%llx\n", iommu->reg_phys);
 }
 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
@@ -4754,7 +4759,7 @@ static ssize_t intel_iommu_show_cap(struct device *dev,
                                    struct device_attribute *attr,
                                    char *buf)
 {
-       struct intel_iommu *iommu = dev_get_drvdata(dev);
+       struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%llx\n", iommu->cap);
 }
 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
@@ -4763,7 +4768,7 @@ static ssize_t intel_iommu_show_ecap(struct device *dev,
                                    struct device_attribute *attr,
                                    char *buf)
 {
-       struct intel_iommu *iommu = dev_get_drvdata(dev);
+       struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%llx\n", iommu->ecap);
 }
 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
@@ -4772,7 +4777,7 @@ static ssize_t intel_iommu_show_ndoms(struct device *dev,
                                      struct device_attribute *attr,
                                      char *buf)
 {
-       struct intel_iommu *iommu = dev_get_drvdata(dev);
+       struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
 }
 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
@@ -4781,7 +4786,7 @@ static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
                                           struct device_attribute *attr,
                                           char *buf)
 {
-       struct intel_iommu *iommu = dev_get_drvdata(dev);
+       struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
                                                  cap_ndoms(iommu->cap)));
 }
index cb72e0011310d1fbd04cb1560a861c8e5ec2fd55..51f2b228723f2c00d09512f8c73dc94343a6f58d 100644 (file)
@@ -579,7 +579,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
                if (!svm->mm)
                        goto bad_req;
                /* If the mm is already defunct, don't handle faults. */
-               if (!atomic_inc_not_zero(&svm->mm->mm_users))
+               if (!mmget_not_zero(svm->mm))
                        goto bad_req;
                down_read(&svm->mm->mmap_sem);
                vma = find_extend_vma(svm->mm, address);
index 576b7b4a32787017e44ed3f3ebf6f40dcc6a965c..8bc2791bc39c19f15ba91be7bd1f4007fdf43e94 100644 (file)
@@ -2049,7 +2049,7 @@ static int diva_dbg_cmp_key(const char *ref, const char *key) {
 /*
   In case trace filter starts with "C" character then
   all following characters are interpreted as command.
-  Followings commands are available:
+  Following commands are available:
   - single, trace single call at time, independent from CPN/CiPN
 */
 static int diva_mnt_cmp_nmbr(const char *nmbr) {
index 8d338ba366d0a65e4ea682138f373df6be6b586f..77dec28ba874c7f220944afc07e6ebcb67fa00a0 100644 (file)
@@ -1625,7 +1625,7 @@ mISDNipac_init(struct ipac_hw *ipac, void *hw)
                ipac->hscx[i].bch.hw = hw;
                ipac->hscx[i].ip = ipac;
                /* default values for IOM time slots
-                * can be overwriten by card */
+                * can be overwritten by card */
                ipac->hscx[i].slot = (i == 0) ? 0x2f : 0x03;
        }
 
index 0222b1a35a2dd3c1ba0d4b2199016134066fa5ca..9b85295aa6578f5ac5c86803e0923f29404b2e13 100644 (file)
  *
  * The CMX has special functions for conferences with one, two and more
  * members. It will allow different types of data flow. Receive and transmit
- * data to/form upper layer may be swithed on/off individually without losing
+ * data to/form upper layer may be switched on/off individually without losing
  * features of CMX, Tones and DTMF.
  *
  * Echo Cancellation: Sometimes we like to cancel echo from the interface.
index 67d76f21fecd9a6e48d73861a19ef8c41914e0dd..28955b94d2b26f47d7c54217d84c2a8a11af692a 100644 (file)
@@ -328,13 +328,15 @@ static void dm_softirq_done(struct request *rq)
        int rw;
 
        if (!clone) {
-               rq_end_stats(tio->md, rq);
+               struct mapped_device *md = tio->md;
+
+               rq_end_stats(md, rq);
                rw = rq_data_dir(rq);
                if (!rq->q->mq_ops)
                        blk_end_request_all(rq, tio->error);
                else
                        blk_mq_end_request(rq, tio->error);
-               rq_completed(tio->md, rw, false);
+               rq_completed(md, rw, false);
                return;
        }
 
index 685aa2d77e2526935f8f2f416ad8d8681b6c7b14..b0536cfd8e174b83a53d49391552b0c7ec64aef8 100644 (file)
@@ -214,7 +214,7 @@ static void faulty_make_request(struct mddev *mddev, struct bio *bio)
                }
        }
        if (failit) {
-               struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev);
+               struct bio *b = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
 
                b->bi_bdev = conf->rdev->bdev;
                b->bi_private = bio;
index f1c7bbac31a580bb6f708b614696f17404badc0d..3e38e0207a3eb44339ad6431dc3557ae27d05612 100644 (file)
@@ -53,18 +53,26 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
        return conf->disks + lo;
 }
 
+/*
+ * In linear_congested() conf->raid_disks is used as a copy of
+ * mddev->raid_disks to iterate conf->disks[], because conf->raid_disks
+ * and conf->disks[] are created in linear_conf(), they are always
+ * consitent with each other, but mddev->raid_disks does not.
+ */
 static int linear_congested(struct mddev *mddev, int bits)
 {
        struct linear_conf *conf;
        int i, ret = 0;
 
-       conf = mddev->private;
+       rcu_read_lock();
+       conf = rcu_dereference(mddev->private);
 
-       for (i = 0; i < mddev->raid_disks && !ret ; i++) {
+       for (i = 0; i < conf->raid_disks && !ret ; i++) {
                struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
                ret |= bdi_congested(q->backing_dev_info, bits);
        }
 
+       rcu_read_unlock();
        return ret;
 }
 
@@ -144,6 +152,19 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
                        conf->disks[i-1].end_sector +
                        conf->disks[i].rdev->sectors;
 
+       /*
+        * conf->raid_disks is copy of mddev->raid_disks. The reason to
+        * keep a copy of mddev->raid_disks in struct linear_conf is,
+        * mddev->raid_disks may not be consistent with pointers number of
+        * conf->disks[] when it is updated in linear_add() and used to
+        * iterate old conf->disks[] earray in linear_congested().
+        * Here conf->raid_disks is always consitent with number of
+        * pointers in conf->disks[] array, and mddev->private is updated
+        * with rcu_assign_pointer() in linear_addr(), such race can be
+        * avoided.
+        */
+       conf->raid_disks = raid_disks;
+
        return conf;
 
 out:
@@ -196,15 +217,24 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
        if (!newconf)
                return -ENOMEM;
 
+       /* newconf->raid_disks already keeps a copy of * the increased
+        * value of mddev->raid_disks, WARN_ONCE() is just used to make
+        * sure of this. It is possible that oldconf is still referenced
+        * in linear_congested(), therefore kfree_rcu() is used to free
+        * oldconf until no one uses it anymore.
+        */
        mddev_suspend(mddev);
-       oldconf = mddev->private;
+       oldconf = rcu_dereference_protected(mddev->private,
+                       lockdep_is_held(&mddev->reconfig_mutex));
        mddev->raid_disks++;
-       mddev->private = newconf;
+       WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
+               "copied raid_disks doesn't match mddev->raid_disks");
+       rcu_assign_pointer(mddev->private, newconf);
        md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
        set_capacity(mddev->gendisk, mddev->array_sectors);
        mddev_resume(mddev);
        revalidate_disk(mddev->gendisk);
-       kfree(oldconf);
+       kfree_rcu(oldconf, rcu);
        return 0;
 }
 
@@ -262,6 +292,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
                                trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
                                                      split, disk_devt(mddev->gendisk),
                                                      bio_sector);
+                       mddev_check_writesame(mddev, split);
                        generic_make_request(split);
                }
        } while (split != bio);
index b685ddd7d7f76c25553ce88abd049fd9ead545e6..8d392e6098b3295ddbebac59e418a27ae21712e5 100644 (file)
@@ -10,6 +10,7 @@ struct linear_conf
 {
        struct rcu_head         rcu;
        sector_t                array_sectors;
+       int                     raid_disks; /* a copy of mddev->raid_disks */
        struct dev_info         disks[0];
 };
 #endif
index ba485dcf1064dd463bdb93edd85157b247d0fcb5..985374f20e2e3f4d78ac1d7d77213b2e1ad7e80c 100644 (file)
@@ -190,16 +190,6 @@ struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 }
 EXPORT_SYMBOL_GPL(bio_alloc_mddev);
 
-struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
-                           struct mddev *mddev)
-{
-       if (!mddev || !mddev->bio_set)
-               return bio_clone(bio, gfp_mask);
-
-       return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
-}
-EXPORT_SYMBOL_GPL(bio_clone_mddev);
-
 /*
  * We have a system wide 'event count' that is incremented
  * on any 'interesting' event, and readers of /proc/mdstat
@@ -5228,8 +5218,11 @@ int md_run(struct mddev *mddev)
                sysfs_notify_dirent_safe(rdev->sysfs_state);
        }
 
-       if (mddev->bio_set == NULL)
+       if (mddev->bio_set == NULL) {
                mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+               if (!mddev->bio_set)
+                       return -ENOMEM;
+       }
 
        spin_lock(&pers_lock);
        pers = find_pers(mddev->level, mddev->clevel);
@@ -8980,7 +8973,14 @@ static __exit void md_exit(void)
 
        for_each_mddev(mddev, tmp) {
                export_array(mddev);
+               mddev->ctime = 0;
                mddev->hold_active = 0;
+               /*
+                * for_each_mddev() will call mddev_put() at the end of each
+                * iteration.  As the mddev is now fully clear, this will
+                * schedule the mddev for destruction by a workqueue, and the
+                * destroy_workqueue() below will wait for that to complete.
+                */
        }
        destroy_workqueue(md_misc_wq);
        destroy_workqueue(md_wq);
index 2a514036a83dc0da07c0966b7fe247c18356bbbf..b8859cbf84b618b39ed3d92a2887e8764c403919 100644 (file)
@@ -673,8 +673,6 @@ extern void md_rdev_clear(struct md_rdev *rdev);
 
 extern void mddev_suspend(struct mddev *mddev);
 extern void mddev_resume(struct mddev *mddev);
-extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
-                                  struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                   struct mddev *mddev);
 
@@ -710,4 +708,11 @@ static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
 {
        mddev->flags &= ~unsupported_flags;
 }
+
+static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
+{
+       if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+           !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
+               mddev->queue->limits.max_write_same_sectors = 0;
+}
 #endif /* _MD_MD_H */
index d457afa672d57a172965aa000913fa6ff6625878..79a12b59250bbca870be857eb7cf350c0c9b53ad 100644 (file)
@@ -138,6 +138,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
        mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
        mp_bh->bio.bi_end_io = multipath_end_request;
        mp_bh->bio.bi_private = mp_bh;
+       mddev_check_writesame(mddev, &mp_bh->bio);
        generic_make_request(&mp_bh->bio);
        return;
 }
index d6585239bff22809edbcaf3881dc2f2ae0a2f41e..93347ca7c7a617e097ccafcbedbecdfa396d4968 100644 (file)
@@ -503,6 +503,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
                                trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
                                                      split, disk_devt(mddev->gendisk),
                                                      bio_sector);
+                       mddev_check_writesame(mddev, split);
                        generic_make_request(split);
                }
        } while (split != bio);
index 830ff2b203463ef075d53a6c7a2ae22e0ec2c7d9..7453d94eeed700c8ac30da1b8d7857b4788fdbd5 100644 (file)
@@ -71,9 +71,8 @@
  */
 static int max_queued_requests = 1024;
 
-static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
-                         sector_t bi_sector);
-static void lower_barrier(struct r1conf *conf);
+static void allow_barrier(struct r1conf *conf, sector_t sector_nr);
+static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
 
 #define raid1_log(md, fmt, args...)                            \
        do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
@@ -100,7 +99,6 @@ static void r1bio_pool_free(void *r1_bio, void *data)
 #define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
 #define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
 #define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
-#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
 
 static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -205,6 +203,7 @@ static void free_r1bio(struct r1bio *r1_bio)
 static void put_buf(struct r1bio *r1_bio)
 {
        struct r1conf *conf = r1_bio->mddev->private;
+       sector_t sect = r1_bio->sector;
        int i;
 
        for (i = 0; i < conf->raid_disks * 2; i++) {
@@ -215,7 +214,7 @@ static void put_buf(struct r1bio *r1_bio)
 
        mempool_free(r1_bio, conf->r1buf_pool);
 
-       lower_barrier(conf);
+       lower_barrier(conf, sect);
 }
 
 static void reschedule_retry(struct r1bio *r1_bio)
@@ -223,10 +222,12 @@ static void reschedule_retry(struct r1bio *r1_bio)
        unsigned long flags;
        struct mddev *mddev = r1_bio->mddev;
        struct r1conf *conf = mddev->private;
+       int idx;
 
+       idx = sector_to_idx(r1_bio->sector);
        spin_lock_irqsave(&conf->device_lock, flags);
        list_add(&r1_bio->retry_list, &conf->retry_list);
-       conf->nr_queued ++;
+       atomic_inc(&conf->nr_queued[idx]);
        spin_unlock_irqrestore(&conf->device_lock, flags);
 
        wake_up(&conf->wait_barrier);
@@ -243,7 +244,6 @@ static void call_bio_endio(struct r1bio *r1_bio)
        struct bio *bio = r1_bio->master_bio;
        int done;
        struct r1conf *conf = r1_bio->mddev->private;
-       sector_t start_next_window = r1_bio->start_next_window;
        sector_t bi_sector = bio->bi_iter.bi_sector;
 
        if (bio->bi_phys_segments) {
@@ -269,7 +269,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
                 */
-               allow_barrier(conf, start_next_window, bi_sector);
+               allow_barrier(conf, bi_sector);
        }
 }
 
@@ -517,6 +517,25 @@ static void raid1_end_write_request(struct bio *bio)
                bio_put(to_put);
 }
 
+static sector_t align_to_barrier_unit_end(sector_t start_sector,
+                                         sector_t sectors)
+{
+       sector_t len;
+
+       WARN_ON(sectors == 0);
+       /*
+        * len is the number of sectors from start_sector to end of the
+        * barrier unit which start_sector belongs to.
+        */
+       len = round_up(start_sector + 1, BARRIER_UNIT_SECTOR_SIZE) -
+             start_sector;
+
+       if (len > sectors)
+               len = sectors;
+
+       return len;
+}
+
 /*
  * This routine returns the disk from which the requested read should
  * be done. There is a per-array 'next expected sequential IO' sector
@@ -813,168 +832,228 @@ static void flush_pending_writes(struct r1conf *conf)
  */
 static void raise_barrier(struct r1conf *conf, sector_t sector_nr)
 {
+       int idx = sector_to_idx(sector_nr);
+
        spin_lock_irq(&conf->resync_lock);
 
        /* Wait until no block IO is waiting */
-       wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
+       wait_event_lock_irq(conf->wait_barrier,
+                           !atomic_read(&conf->nr_waiting[idx]),
                            conf->resync_lock);
 
        /* block any new IO from starting */
-       conf->barrier++;
-       conf->next_resync = sector_nr;
+       atomic_inc(&conf->barrier[idx]);
+       /*
+        * In raise_barrier() we firstly increase conf->barrier[idx] then
+        * check conf->nr_pending[idx]. In _wait_barrier() we firstly
+        * increase conf->nr_pending[idx] then check conf->barrier[idx].
+        * A memory barrier here to make sure conf->nr_pending[idx] won't
+        * be fetched before conf->barrier[idx] is increased. Otherwise
+        * there will be a race between raise_barrier() and _wait_barrier().
+        */
+       smp_mb__after_atomic();
 
        /* For these conditions we must wait:
         * A: while the array is in frozen state
-        * B: while barrier >= RESYNC_DEPTH, meaning resync reach
-        *    the max count which allowed.
-        * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
-        *    next resync will reach to the window which normal bios are
-        *    handling.
-        * D: while there are any active requests in the current window.
+        * B: while conf->nr_pending[idx] is not 0, meaning regular I/O
+        *    existing in corresponding I/O barrier bucket.
+        * C: while conf->barrier[idx] >= RESYNC_DEPTH, meaning reaches
+        *    max resync count which allowed on current I/O barrier bucket.
         */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->array_frozen &&
-                           conf->barrier < RESYNC_DEPTH &&
-                           conf->current_window_requests == 0 &&
-                           (conf->start_next_window >=
-                            conf->next_resync + RESYNC_SECTORS),
+                            !atomic_read(&conf->nr_pending[idx]) &&
+                            atomic_read(&conf->barrier[idx]) < RESYNC_DEPTH,
                            conf->resync_lock);
 
-       conf->nr_pending++;
+       atomic_inc(&conf->nr_pending[idx]);
        spin_unlock_irq(&conf->resync_lock);
 }
 
-static void lower_barrier(struct r1conf *conf)
+static void lower_barrier(struct r1conf *conf, sector_t sector_nr)
 {
-       unsigned long flags;
-       BUG_ON(conf->barrier <= 0);
-       spin_lock_irqsave(&conf->resync_lock, flags);
-       conf->barrier--;
-       conf->nr_pending--;
-       spin_unlock_irqrestore(&conf->resync_lock, flags);
+       int idx = sector_to_idx(sector_nr);
+
+       BUG_ON(atomic_read(&conf->barrier[idx]) <= 0);
+
+       atomic_dec(&conf->barrier[idx]);
+       atomic_dec(&conf->nr_pending[idx]);
        wake_up(&conf->wait_barrier);
 }
 
-static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
+static void _wait_barrier(struct r1conf *conf, int idx)
 {
-       bool wait = false;
+       /*
+        * We need to increase conf->nr_pending[idx] very early here,
+        * then raise_barrier() can be blocked when it waits for
+        * conf->nr_pending[idx] to be 0. Then we can avoid holding
+        * conf->resync_lock when there is no barrier raised in same
+        * barrier unit bucket. Also if the array is frozen, I/O
+        * should be blocked until array is unfrozen.
+        */
+       atomic_inc(&conf->nr_pending[idx]);
+       /*
+        * In _wait_barrier() we firstly increase conf->nr_pending[idx], then
+        * check conf->barrier[idx]. In raise_barrier() we firstly increase
+        * conf->barrier[idx], then check conf->nr_pending[idx]. A memory
+        * barrier is necessary here to make sure conf->barrier[idx] won't be
+        * fetched before conf->nr_pending[idx] is increased. Otherwise there
+        * will be a race between _wait_barrier() and raise_barrier().
+        */
+       smp_mb__after_atomic();
 
-       if (conf->array_frozen || !bio)
-               wait = true;
-       else if (conf->barrier && bio_data_dir(bio) == WRITE) {
-               if ((conf->mddev->curr_resync_completed
-                    >= bio_end_sector(bio)) ||
-                   (conf->start_next_window + NEXT_NORMALIO_DISTANCE
-                    <= bio->bi_iter.bi_sector))
-                       wait = false;
-               else
-                       wait = true;
-       }
+       /*
+        * Don't worry about checking two atomic_t variables at same time
+        * here. If during we check conf->barrier[idx], the array is
+        * frozen (conf->array_frozen is 1), and chonf->barrier[idx] is
+        * 0, it is safe to return and make the I/O continue. Because the
+        * array is frozen, all I/O returned here will eventually complete
+        * or be queued, no race will happen. See code comment in
+        * frozen_array().
+        */
+       if (!READ_ONCE(conf->array_frozen) &&
+           !atomic_read(&conf->barrier[idx]))
+               return;
 
-       return wait;
+       /*
+        * After holding conf->resync_lock, conf->nr_pending[idx]
+        * should be decreased before waiting for barrier to drop.
+        * Otherwise, we may encounter a race condition because
+        * raise_barrer() might be waiting for conf->nr_pending[idx]
+        * to be 0 at same time.
+        */
+       spin_lock_irq(&conf->resync_lock);
+       atomic_inc(&conf->nr_waiting[idx]);
+       atomic_dec(&conf->nr_pending[idx]);
+       /*
+        * In case freeze_array() is waiting for
+        * get_unqueued_pending() == extra
+        */
+       wake_up(&conf->wait_barrier);
+       /* Wait for the barrier in same barrier unit bucket to drop. */
+       wait_event_lock_irq(conf->wait_barrier,
+                           !conf->array_frozen &&
+                            !atomic_read(&conf->barrier[idx]),
+                           conf->resync_lock);
+       atomic_inc(&conf->nr_pending[idx]);
+       atomic_dec(&conf->nr_waiting[idx]);
+       spin_unlock_irq(&conf->resync_lock);
 }
 
-static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
+static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr)
 {
-       sector_t sector = 0;
+       int idx = sector_to_idx(sector_nr);
 
-       spin_lock_irq(&conf->resync_lock);
-       if (need_to_wait_for_sync(conf, bio)) {
-               conf->nr_waiting++;
-               /* Wait for the barrier to drop.
-                * However if there are already pending
-                * requests (preventing the barrier from
-                * rising completely), and the
-                * per-process bio queue isn't empty,
-                * then don't wait, as we need to empty
-                * that queue to allow conf->start_next_window
-                * to increase.
-                */
-               raid1_log(conf->mddev, "wait barrier");
-               wait_event_lock_irq(conf->wait_barrier,
-                                   !conf->array_frozen &&
-                                   (!conf->barrier ||
-                                    ((conf->start_next_window <
-                                      conf->next_resync + RESYNC_SECTORS) &&
-                                     current->bio_list &&
-                                     !bio_list_empty(current->bio_list))),
-                                   conf->resync_lock);
-               conf->nr_waiting--;
-       }
-
-       if (bio && bio_data_dir(bio) == WRITE) {
-               if (bio->bi_iter.bi_sector >= conf->next_resync) {
-                       if (conf->start_next_window == MaxSector)
-                               conf->start_next_window =
-                                       conf->next_resync +
-                                       NEXT_NORMALIO_DISTANCE;
-
-                       if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
-                           <= bio->bi_iter.bi_sector)
-                               conf->next_window_requests++;
-                       else
-                               conf->current_window_requests++;
-                       sector = conf->start_next_window;
-               }
-       }
+       /*
+        * Very similar to _wait_barrier(). The difference is, for read
+        * I/O we don't need wait for sync I/O, but if the whole array
+        * is frozen, the read I/O still has to wait until the array is
+        * unfrozen. Since there is no ordering requirement with
+        * conf->barrier[idx] here, memory barrier is unnecessary as well.
+        */
+       atomic_inc(&conf->nr_pending[idx]);
 
-       conf->nr_pending++;
+       if (!READ_ONCE(conf->array_frozen))
+               return;
+
+       spin_lock_irq(&conf->resync_lock);
+       atomic_inc(&conf->nr_waiting[idx]);
+       atomic_dec(&conf->nr_pending[idx]);
+       /*
+        * In case freeze_array() is waiting for
+        * get_unqueued_pending() == extra
+        */
+       wake_up(&conf->wait_barrier);
+       /* Wait for array to be unfrozen */
+       wait_event_lock_irq(conf->wait_barrier,
+                           !conf->array_frozen,
+                           conf->resync_lock);
+       atomic_inc(&conf->nr_pending[idx]);
+       atomic_dec(&conf->nr_waiting[idx]);
        spin_unlock_irq(&conf->resync_lock);
-       return sector;
 }
 
-static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
-                         sector_t bi_sector)
+static void wait_barrier(struct r1conf *conf, sector_t sector_nr)
 {
-       unsigned long flags;
+       int idx = sector_to_idx(sector_nr);
 
-       spin_lock_irqsave(&conf->resync_lock, flags);
-       conf->nr_pending--;
-       if (start_next_window) {
-               if (start_next_window == conf->start_next_window) {
-                       if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
-                           <= bi_sector)
-                               conf->next_window_requests--;
-                       else
-                               conf->current_window_requests--;
-               } else
-                       conf->current_window_requests--;
-
-               if (!conf->current_window_requests) {
-                       if (conf->next_window_requests) {
-                               conf->current_window_requests =
-                                       conf->next_window_requests;
-                               conf->next_window_requests = 0;
-                               conf->start_next_window +=
-                                       NEXT_NORMALIO_DISTANCE;
-                       } else
-                               conf->start_next_window = MaxSector;
-               }
-       }
-       spin_unlock_irqrestore(&conf->resync_lock, flags);
+       _wait_barrier(conf, idx);
+}
+
+static void wait_all_barriers(struct r1conf *conf)
+{
+       int idx;
+
+       for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
+               _wait_barrier(conf, idx);
+}
+
+static void _allow_barrier(struct r1conf *conf, int idx)
+{
+       atomic_dec(&conf->nr_pending[idx]);
        wake_up(&conf->wait_barrier);
 }
 
+static void allow_barrier(struct r1conf *conf, sector_t sector_nr)
+{
+       int idx = sector_to_idx(sector_nr);
+
+       _allow_barrier(conf, idx);
+}
+
+static void allow_all_barriers(struct r1conf *conf)
+{
+       int idx;
+
+       for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
+               _allow_barrier(conf, idx);
+}
+
+/* conf->resync_lock should be held */
+static int get_unqueued_pending(struct r1conf *conf)
+{
+       int idx, ret;
+
+       for (ret = 0, idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
+               ret += atomic_read(&conf->nr_pending[idx]) -
+                       atomic_read(&conf->nr_queued[idx]);
+
+       return ret;
+}
+
 static void freeze_array(struct r1conf *conf, int extra)
 {
-       /* stop syncio and normal IO and wait for everything to
+       /* Stop sync I/O and normal I/O and wait for everything to
         * go quite.
-        * We wait until nr_pending match nr_queued+extra
-        * This is called in the context of one normal IO request
-        * that has failed. Thus any sync request that might be pending
-        * will be blocked by nr_pending, and we need to wait for
-        * pending IO requests to complete or be queued for re-try.
-        * Thus the number queued (nr_queued) plus this request (extra)
-        * must match the number of pending IOs (nr_pending) before
-        * we continue.
+        * This is called in two situations:
+        * 1) management command handlers (reshape, remove disk, quiesce).
+        * 2) one normal I/O request failed.
+
+        * After array_frozen is set to 1, new sync IO will be blocked at
+        * raise_barrier(), and new normal I/O will blocked at _wait_barrier()
+        * or wait_read_barrier(). The flying I/Os will either complete or be
+        * queued. When everything goes quite, there are only queued I/Os left.
+
+        * Every flying I/O contributes to a conf->nr_pending[idx], idx is the
+        * barrier bucket index which this I/O request hits. When all sync and
+        * normal I/O are queued, sum of all conf->nr_pending[] will match sum
+        * of all conf->nr_queued[]. But normal I/O failure is an exception,
+        * in handle_read_error(), we may call freeze_array() before trying to
+        * fix the read error. In this case, the error read I/O is not queued,
+        * so get_unqueued_pending() == 1.
+        *
+        * Therefore before this function returns, we need to wait until
+        * get_unqueued_pendings(conf) gets equal to extra. For
+        * normal I/O context, extra is 1, in rested situations extra is 0.
         */
        spin_lock_irq(&conf->resync_lock);
        conf->array_frozen = 1;
        raid1_log(conf->mddev, "wait freeze");
-       wait_event_lock_irq_cmd(conf->wait_barrier,
-                               conf->nr_pending == conf->nr_queued+extra,
-                               conf->resync_lock,
-                               flush_pending_writes(conf));
+       wait_event_lock_irq_cmd(
+               conf->wait_barrier,
+               get_unqueued_pending(conf) == extra,
+               conf->resync_lock,
+               flush_pending_writes(conf));
        spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(struct r1conf *conf)
@@ -982,8 +1061,8 @@ static void unfreeze_array(struct r1conf *conf)
        /* reverse the effect of the freeze */
        spin_lock_irq(&conf->resync_lock);
        conf->array_frozen = 0;
-       wake_up(&conf->wait_barrier);
        spin_unlock_irq(&conf->resync_lock);
+       wake_up(&conf->wait_barrier);
 }
 
 /* duplicate the data pages for behind I/O
@@ -1070,11 +1149,28 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
        kfree(plug);
 }
 
-static void raid1_read_request(struct mddev *mddev, struct bio *bio,
-                                struct r1bio *r1_bio)
+static inline struct r1bio *
+alloc_r1bio(struct mddev *mddev, struct bio *bio, sector_t sectors_handled)
+{
+       struct r1conf *conf = mddev->private;
+       struct r1bio *r1_bio;
+
+       r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+       r1_bio->master_bio = bio;
+       r1_bio->sectors = bio_sectors(bio) - sectors_handled;
+       r1_bio->state = 0;
+       r1_bio->mddev = mddev;
+       r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+
+       return r1_bio;
+}
+
+static void raid1_read_request(struct mddev *mddev, struct bio *bio)
 {
        struct r1conf *conf = mddev->private;
        struct raid1_info *mirror;
+       struct r1bio *r1_bio;
        struct bio *read_bio;
        struct bitmap *bitmap = mddev->bitmap;
        const int op = bio_op(bio);
@@ -1083,8 +1179,29 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
        int max_sectors;
        int rdisk;
 
-       wait_barrier(conf, bio);
+       /*
+        * Still need barrier for READ in case that whole
+        * array is frozen.
+        */
+       wait_read_barrier(conf, bio->bi_iter.bi_sector);
+
+       r1_bio = alloc_r1bio(mddev, bio, 0);
 
+       /*
+        * We might need to issue multiple reads to different
+        * devices if there are bad blocks around, so we keep
+        * track of the number of reads in bio->bi_phys_segments.
+        * If this is 0, there is only one r1_bio and no locking
+        * will be needed when requests complete.  If it is
+        * non-zero, then it is the number of not-completed requests.
+        */
+       bio->bi_phys_segments = 0;
+       bio_clear_flag(bio, BIO_SEG_VALID);
+
+       /*
+        * make_request() can abort the operation when read-ahead is being
+        * used and no empty request is available.
+        */
 read_again:
        rdisk = read_balance(conf, r1_bio, &max_sectors);
 
@@ -1106,9 +1223,8 @@ read_again:
                           atomic_read(&bitmap->behind_writes) == 0);
        }
        r1_bio->read_disk = rdisk;
-       r1_bio->start_next_window = 0;
 
-       read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+       read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
        bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
                 max_sectors);
 
@@ -1151,22 +1267,16 @@ read_again:
                 */
                reschedule_retry(r1_bio);
 
-               r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-               r1_bio->master_bio = bio;
-               r1_bio->sectors = bio_sectors(bio) - sectors_handled;
-               r1_bio->state = 0;
-               r1_bio->mddev = mddev;
-               r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+               r1_bio = alloc_r1bio(mddev, bio, sectors_handled);
                goto read_again;
        } else
                generic_make_request(read_bio);
 }
 
-static void raid1_write_request(struct mddev *mddev, struct bio *bio,
-                               struct r1bio *r1_bio)
+static void raid1_write_request(struct mddev *mddev, struct bio *bio)
 {
        struct r1conf *conf = mddev->private;
+       struct r1bio *r1_bio;
        int i, disks;
        struct bitmap *bitmap = mddev->bitmap;
        unsigned long flags;
@@ -1176,7 +1286,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
        int first_clone;
        int sectors_handled;
        int max_sectors;
-       sector_t start_next_window;
 
        /*
         * Register the new request and wait if the reconstruction
@@ -1212,7 +1321,19 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                }
                finish_wait(&conf->wait_barrier, &w);
        }
-       start_next_window = wait_barrier(conf, bio);
+       wait_barrier(conf, bio->bi_iter.bi_sector);
+
+       r1_bio = alloc_r1bio(mddev, bio, 0);
+
+       /* We might need to issue multiple writes to different
+        * devices if there are bad blocks around, so we keep
+        * track of the number of writes in bio->bi_phys_segments.
+        * If this is 0, there is only one r1_bio and no locking
+        * will be needed when requests complete.  If it is
+        * non-zero, then it is the number of not-completed requests.
+        */
+       bio->bi_phys_segments = 0;
+       bio_clear_flag(bio, BIO_SEG_VALID);
 
        if (conf->pending_count >= max_queued_requests) {
                md_wakeup_thread(mddev->thread);
@@ -1233,7 +1354,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 
        disks = conf->raid_disks * 2;
  retry_write:
-       r1_bio->start_next_window = start_next_window;
        blocked_rdev = NULL;
        rcu_read_lock();
        max_sectors = r1_bio->sectors;
@@ -1300,25 +1420,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
        if (unlikely(blocked_rdev)) {
                /* Wait for this device to become unblocked */
                int j;
-               sector_t old = start_next_window;
 
                for (j = 0; j < i; j++)
                        if (r1_bio->bios[j])
                                rdev_dec_pending(conf->mirrors[j].rdev, mddev);
                r1_bio->state = 0;
-               allow_barrier(conf, start_next_window, bio->bi_iter.bi_sector);
+               allow_barrier(conf, bio->bi_iter.bi_sector);
                raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
                md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               start_next_window = wait_barrier(conf, bio);
-               /*
-                * We must make sure the multi r1bios of bio have
-                * the same value of bi_phys_segments
-                */
-               if (bio->bi_phys_segments && old &&
-                   old != start_next_window)
-                       /* Wait for the former r1bio(s) to complete */
-                       wait_event(conf->wait_barrier,
-                                  bio->bi_phys_segments == 1);
+               wait_barrier(conf, bio->bi_iter.bi_sector);
                goto retry_write;
        }
 
@@ -1341,13 +1451,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 
        first_clone = 1;
        for (i = 0; i < disks; i++) {
-               struct bio *mbio;
+               struct bio *mbio = NULL;
+               sector_t offset;
                if (!r1_bio->bios[i])
                        continue;
 
-               mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-               bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
-                        max_sectors);
+               offset = r1_bio->sector - bio->bi_iter.bi_sector;
 
                if (first_clone) {
                        /* do behind I/O ?
@@ -1357,8 +1466,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                        if (bitmap &&
                            (atomic_read(&bitmap->behind_writes)
                             < mddev->bitmap_info.max_write_behind) &&
-                           !waitqueue_active(&bitmap->behind_wait))
+                           !waitqueue_active(&bitmap->behind_wait)) {
+                               mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
+                                                               mddev->bio_set,
+                                                               offset << 9,
+                                                               max_sectors << 9);
                                alloc_behind_pages(mbio, r1_bio);
+                       }
 
                        bitmap_startwrite(bitmap, r1_bio->sector,
                                          r1_bio->sectors,
@@ -1366,6 +1480,19 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                                                   &r1_bio->state));
                        first_clone = 0;
                }
+
+               if (!mbio) {
+                       if (r1_bio->behind_bvecs)
+                               mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
+                                                               mddev->bio_set,
+                                                               offset << 9,
+                                                               max_sectors << 9);
+                       else {
+                               mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+                               bio_trim(mbio, offset, max_sectors);
+                       }
+               }
+
                if (r1_bio->behind_bvecs) {
                        struct bio_vec *bvec;
                        int j;
@@ -1385,8 +1512,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                                   conf->mirrors[i].rdev->data_offset);
                mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
                mbio->bi_end_io = raid1_end_write_request;
-               mbio->bi_opf = bio_op(bio) |
-                       (bio->bi_opf & (REQ_SYNC | REQ_PREFLUSH | REQ_FUA));
+               mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA));
                if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) &&
                    !test_bit(WriteMostly, &conf->mirrors[i].rdev->flags) &&
                    conf->raid_disks - mddev->degraded > 1)
@@ -1427,12 +1553,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                /* We need another r1_bio.  It has already been counted
                 * in bio->bi_phys_segments
                 */
-               r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-               r1_bio->master_bio = bio;
-               r1_bio->sectors = bio_sectors(bio) - sectors_handled;
-               r1_bio->state = 0;
-               r1_bio->mddev = mddev;
-               r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+               r1_bio = alloc_r1bio(mddev, bio, sectors_handled);
                goto retry_write;
        }
 
@@ -1444,36 +1565,30 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 
 static void raid1_make_request(struct mddev *mddev, struct bio *bio)
 {
-       struct r1conf *conf = mddev->private;
-       struct r1bio *r1_bio;
+       struct bio *split;
+       sector_t sectors;
 
-       /*
-        * make_request() can abort the operation when read-ahead is being
-        * used and no empty request is available.
-        *
-        */
-       r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-       r1_bio->master_bio = bio;
-       r1_bio->sectors = bio_sectors(bio);
-       r1_bio->state = 0;
-       r1_bio->mddev = mddev;
-       r1_bio->sector = bio->bi_iter.bi_sector;
+       if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
+               md_flush_request(mddev, bio);
+               return;
+       }
 
-       /*
-        * We might need to issue multiple reads to different devices if there
-        * are bad blocks around, so we keep track of the number of reads in
-        * bio->bi_phys_segments.  If this is 0, there is only one r1_bio and
-        * no locking will be needed when requests complete.  If it is
-        * non-zero, then it is the number of not-completed requests.
-        */
-       bio->bi_phys_segments = 0;
-       bio_clear_flag(bio, BIO_SEG_VALID);
+       /* if bio exceeds barrier unit boundary, split it */
+       do {
+               sectors = align_to_barrier_unit_end(
+                               bio->bi_iter.bi_sector, bio_sectors(bio));
+               if (sectors < bio_sectors(bio)) {
+                       split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
+                       bio_chain(split, bio);
+               } else {
+                       split = bio;
+               }
 
-       if (bio_data_dir(bio) == READ)
-               raid1_read_request(mddev, bio, r1_bio);
-       else
-               raid1_write_request(mddev, bio, r1_bio);
+               if (bio_data_dir(split) == READ)
+                       raid1_read_request(mddev, split);
+               else
+                       raid1_write_request(mddev, split);
+       } while (split != bio);
 }
 
 static void raid1_status(struct seq_file *seq, struct mddev *mddev)
@@ -1564,19 +1679,11 @@ static void print_conf(struct r1conf *conf)
 
 static void close_sync(struct r1conf *conf)
 {
-       wait_barrier(conf, NULL);
-       allow_barrier(conf, 0, 0);
+       wait_all_barriers(conf);
+       allow_all_barriers(conf);
 
        mempool_destroy(conf->r1buf_pool);
        conf->r1buf_pool = NULL;
-
-       spin_lock_irq(&conf->resync_lock);
-       conf->next_resync = MaxSector - 2 * NEXT_NORMALIO_DISTANCE;
-       conf->start_next_window = MaxSector;
-       conf->current_window_requests +=
-               conf->next_window_requests;
-       conf->next_window_requests = 0;
-       spin_unlock_irq(&conf->resync_lock);
 }
 
 static int raid1_spare_active(struct mddev *mddev)
@@ -2273,7 +2380,8 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
 
                        wbio->bi_vcnt = vcnt;
                } else {
-                       wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+                       wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
+                                             mddev->bio_set);
                }
 
                bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
@@ -2323,8 +2431,9 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
 
 static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
 {
-       int m;
+       int m, idx;
        bool fail = false;
+
        for (m = 0; m < conf->raid_disks * 2 ; m++)
                if (r1_bio->bios[m] == IO_MADE_GOOD) {
                        struct md_rdev *rdev = conf->mirrors[m].rdev;
@@ -2350,8 +2459,14 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
        if (fail) {
                spin_lock_irq(&conf->device_lock);
                list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
-               conf->nr_queued++;
+               idx = sector_to_idx(r1_bio->sector);
+               atomic_inc(&conf->nr_queued[idx]);
                spin_unlock_irq(&conf->device_lock);
+               /*
+                * In case freeze_array() is waiting for condition
+                * get_unqueued_pending() == extra to be true.
+                */
+               wake_up(&conf->wait_barrier);
                md_wakeup_thread(conf->mddev->thread);
        } else {
                if (test_bit(R1BIO_WriteError, &r1_bio->state))
@@ -2411,7 +2526,8 @@ read_more:
                const unsigned long do_sync
                        = r1_bio->master_bio->bi_opf & REQ_SYNC;
                r1_bio->read_disk = disk;
-               bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+               bio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
+                                    mddev->bio_set);
                bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector,
                         max_sectors);
                r1_bio->bios[r1_bio->read_disk] = bio;
@@ -2445,15 +2561,8 @@ read_more:
                        generic_make_request(bio);
                        bio = NULL;
 
-                       r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-                       r1_bio->master_bio = mbio;
-                       r1_bio->sectors = bio_sectors(mbio) - sectors_handled;
-                       r1_bio->state = 0;
+                       r1_bio = alloc_r1bio(mddev, mbio, sectors_handled);
                        set_bit(R1BIO_ReadError, &r1_bio->state);
-                       r1_bio->mddev = mddev;
-                       r1_bio->sector = mbio->bi_iter.bi_sector +
-                               sectors_handled;
 
                        goto read_more;
                } else {
@@ -2472,6 +2581,7 @@ static void raid1d(struct md_thread *thread)
        struct r1conf *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
        struct blk_plug plug;
+       int idx;
 
        md_check_recovery(mddev);
 
@@ -2479,17 +2589,15 @@ static void raid1d(struct md_thread *thread)
            !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
                LIST_HEAD(tmp);
                spin_lock_irqsave(&conf->device_lock, flags);
-               if (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
-                       while (!list_empty(&conf->bio_end_io_list)) {
-                               list_move(conf->bio_end_io_list.prev, &tmp);
-                               conf->nr_queued--;
-                       }
-               }
+               if (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+                       list_splice_init(&conf->bio_end_io_list, &tmp);
                spin_unlock_irqrestore(&conf->device_lock, flags);
                while (!list_empty(&tmp)) {
                        r1_bio = list_first_entry(&tmp, struct r1bio,
                                                  retry_list);
                        list_del(&r1_bio->retry_list);
+                       idx = sector_to_idx(r1_bio->sector);
+                       atomic_dec(&conf->nr_queued[idx]);
                        if (mddev->degraded)
                                set_bit(R1BIO_Degraded, &r1_bio->state);
                        if (test_bit(R1BIO_WriteError, &r1_bio->state))
@@ -2510,7 +2618,8 @@ static void raid1d(struct md_thread *thread)
                }
                r1_bio = list_entry(head->prev, struct r1bio, retry_list);
                list_del(head->prev);
-               conf->nr_queued--;
+               idx = sector_to_idx(r1_bio->sector);
+               atomic_dec(&conf->nr_queued[idx]);
                spin_unlock_irqrestore(&conf->device_lock, flags);
 
                mddev = r1_bio->mddev;
@@ -2549,7 +2658,6 @@ static int init_resync(struct r1conf *conf)
                                          conf->poolinfo);
        if (!conf->r1buf_pool)
                return -ENOMEM;
-       conf->next_resync = 0;
        return 0;
 }
 
@@ -2578,6 +2686,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
        int still_degraded = 0;
        int good_sectors = RESYNC_SECTORS;
        int min_bad = 0; /* number of sectors that are bad in all devices */
+       int idx = sector_to_idx(sector_nr);
 
        if (!conf->r1buf_pool)
                if (init_resync(conf))
@@ -2627,7 +2736,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
         * If there is non-resync activity waiting for a turn, then let it
         * though before starting on this new sync request.
         */
-       if (conf->nr_waiting)
+       if (atomic_read(&conf->nr_waiting[idx]))
                schedule_timeout_uninterruptible(1);
 
        /* we are incrementing sector_nr below. To be safe, we check against
@@ -2654,6 +2763,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
        r1_bio->sector = sector_nr;
        r1_bio->state = 0;
        set_bit(R1BIO_IsSync, &r1_bio->state);
+       /* make sure good_sectors won't go across barrier unit boundary */
+       good_sectors = align_to_barrier_unit_end(sector_nr, good_sectors);
 
        for (i = 0; i < conf->raid_disks * 2; i++) {
                struct md_rdev *rdev;
@@ -2884,6 +2995,26 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        if (!conf)
                goto abort;
 
+       conf->nr_pending = kcalloc(BARRIER_BUCKETS_NR,
+                                  sizeof(atomic_t), GFP_KERNEL);
+       if (!conf->nr_pending)
+               goto abort;
+
+       conf->nr_waiting = kcalloc(BARRIER_BUCKETS_NR,
+                                  sizeof(atomic_t), GFP_KERNEL);
+       if (!conf->nr_waiting)
+               goto abort;
+
+       conf->nr_queued = kcalloc(BARRIER_BUCKETS_NR,
+                                 sizeof(atomic_t), GFP_KERNEL);
+       if (!conf->nr_queued)
+               goto abort;
+
+       conf->barrier = kcalloc(BARRIER_BUCKETS_NR,
+                               sizeof(atomic_t), GFP_KERNEL);
+       if (!conf->barrier)
+               goto abort;
+
        conf->mirrors = kzalloc(sizeof(struct raid1_info)
                                * mddev->raid_disks * 2,
                                 GFP_KERNEL);
@@ -2939,9 +3070,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        conf->pending_count = 0;
        conf->recovery_disabled = mddev->recovery_disabled - 1;
 
-       conf->start_next_window = MaxSector;
-       conf->current_window_requests = conf->next_window_requests = 0;
-
        err = -EIO;
        for (i = 0; i < conf->raid_disks * 2; i++) {
 
@@ -2984,6 +3112,10 @@ static struct r1conf *setup_conf(struct mddev *mddev)
                kfree(conf->mirrors);
                safe_put_page(conf->tmppage);
                kfree(conf->poolinfo);
+               kfree(conf->nr_pending);
+               kfree(conf->nr_waiting);
+               kfree(conf->nr_queued);
+               kfree(conf->barrier);
                kfree(conf);
        }
        return ERR_PTR(err);
@@ -3085,6 +3217,10 @@ static void raid1_free(struct mddev *mddev, void *priv)
        kfree(conf->mirrors);
        safe_put_page(conf->tmppage);
        kfree(conf->poolinfo);
+       kfree(conf->nr_pending);
+       kfree(conf->nr_waiting);
+       kfree(conf->nr_queued);
+       kfree(conf->barrier);
        kfree(conf);
 }
 
index c52ef424a24b2313949971143a162c959e8f068d..dd22a37d0d8332e12785b9c270445aba09cce576 100644 (file)
@@ -1,6 +1,30 @@
 #ifndef _RAID1_H
 #define _RAID1_H
 
+/*
+ * each barrier unit size is 64MB fow now
+ * note: it must be larger than RESYNC_DEPTH
+ */
+#define BARRIER_UNIT_SECTOR_BITS       17
+#define BARRIER_UNIT_SECTOR_SIZE       (1<<17)
+/*
+ * In struct r1conf, the following members are related to I/O barrier
+ * buckets,
+ *     atomic_t        *nr_pending;
+ *     atomic_t        *nr_waiting;
+ *     atomic_t        *nr_queued;
+ *     atomic_t        *barrier;
+ * Each of them points to array of atomic_t variables, each array is
+ * designed to have BARRIER_BUCKETS_NR elements and occupy a single
+ * memory page. The data width of atomic_t variables is 4 bytes, equal
+ * to 1<<(ilog2(sizeof(atomic_t))), BARRIER_BUCKETS_NR_BITS is defined
+ * as (PAGE_SHIFT - ilog2(sizeof(int))) to make sure an array of
+ * atomic_t variables with BARRIER_BUCKETS_NR elements just exactly
+ * occupies a single memory page.
+ */
+#define BARRIER_BUCKETS_NR_BITS                (PAGE_SHIFT - ilog2(sizeof(atomic_t)))
+#define BARRIER_BUCKETS_NR             (1<<BARRIER_BUCKETS_NR_BITS)
+
 struct raid1_info {
        struct md_rdev  *rdev;
        sector_t        head_position;
@@ -35,25 +59,6 @@ struct r1conf {
                                                 */
        int                     raid_disks;
 
-       /* During resync, read_balancing is only allowed on the part
-        * of the array that has been resynced.  'next_resync' tells us
-        * where that is.
-        */
-       sector_t                next_resync;
-
-       /* When raid1 starts resync, we divide array into four partitions
-        * |---------|--------------|---------------------|-------------|
-        *        next_resync   start_next_window       end_window
-        * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
-        * end_window = start_next_window + NEXT_NORMALIO_DISTANCE
-        * current_window_requests means the count of normalIO between
-        *   start_next_window and end_window.
-        * next_window_requests means the count of normalIO after end_window.
-        * */
-       sector_t                start_next_window;
-       int                     current_window_requests;
-       int                     next_window_requests;
-
        spinlock_t              device_lock;
 
        /* list of 'struct r1bio' that need to be processed by raid1d,
@@ -79,10 +84,10 @@ struct r1conf {
         */
        wait_queue_head_t       wait_barrier;
        spinlock_t              resync_lock;
-       int                     nr_pending;
-       int                     nr_waiting;
-       int                     nr_queued;
-       int                     barrier;
+       atomic_t                *nr_pending;
+       atomic_t                *nr_waiting;
+       atomic_t                *nr_queued;
+       atomic_t                *barrier;
        int                     array_frozen;
 
        /* Set to 1 if a full sync is needed, (fresh device added).
@@ -135,7 +140,6 @@ struct r1bio {
                                                 * in this BehindIO request
                                                 */
        sector_t                sector;
-       sector_t                start_next_window;
        int                     sectors;
        unsigned long           state;
        struct mddev            *mddev;
@@ -185,4 +189,10 @@ enum r1bio_state {
        R1BIO_WriteError,
        R1BIO_FailFast,
 };
+
+static inline int sector_to_idx(sector_t sector)
+{
+       return hash_long(sector >> BARRIER_UNIT_SECTOR_BITS,
+                        BARRIER_BUCKETS_NR_BITS);
+}
 #endif
index 6bc5c2a85160e2654050716ef9270c1de3e903a3..063c43d83b72c2f0f753edb7b08f8dd608fa15ad 100644 (file)
@@ -1132,7 +1132,7 @@ read_again:
        }
        slot = r10_bio->read_slot;
 
-       read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+       read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
        bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
                 max_sectors);
 
@@ -1406,7 +1406,7 @@ retry_write:
                int d = r10_bio->devs[i].devnum;
                if (r10_bio->devs[i].bio) {
                        struct md_rdev *rdev = conf->mirrors[d].rdev;
-                       mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+                       mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
                        bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
                                 max_sectors);
                        r10_bio->devs[i].bio = mbio;
@@ -1457,7 +1457,7 @@ retry_write:
                                smp_mb();
                                rdev = conf->mirrors[d].rdev;
                        }
-                       mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+                       mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
                        bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
                                 max_sectors);
                        r10_bio->devs[i].repl_bio = mbio;
@@ -2565,7 +2565,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
                if (sectors > sect_to_write)
                        sectors = sect_to_write;
                /* Write at 'sector' for 'sectors' */
-               wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+               wbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
                bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
                wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
                wbio->bi_iter.bi_sector = wsector +
@@ -2641,8 +2641,7 @@ read_more:
                           mdname(mddev),
                           bdevname(rdev->bdev, b),
                           (unsigned long long)r10_bio->sector);
-       bio = bio_clone_mddev(r10_bio->master_bio,
-                             GFP_NOIO, mddev);
+       bio = bio_clone_fast(r10_bio->master_bio, GFP_NOIO, mddev->bio_set);
        bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);
        r10_bio->devs[slot].bio = bio;
        r10_bio->devs[slot].rdev = rdev;
index 302dea3296ba5ccd07740365314f45d74df49ec2..3f307be01b10cc70eb7b08bc31b9a2a3717372b8 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/crc32c.h>
 #include <linux/random.h>
 #include <linux/kthread.h>
+#include <linux/types.h>
 #include "md.h"
 #include "raid5.h"
 #include "bitmap.h"
@@ -164,8 +165,59 @@ struct r5l_log {
        struct work_struct deferred_io_work;
        /* to disable write back during in degraded mode */
        struct work_struct disable_writeback_work;
+
+       /* to for chunk_aligned_read in writeback mode, details below */
+       spinlock_t tree_lock;
+       struct radix_tree_root big_stripe_tree;
 };
 
+/*
+ * Enable chunk_aligned_read() with write back cache.
+ *
+ * Each chunk may contain more than one stripe (for example, a 256kB
+ * chunk contains 64 4kB-page, so this chunk contain 64 stripes). For
+ * chunk_aligned_read, these stripes are grouped into one "big_stripe".
+ * For each big_stripe, we count how many stripes of this big_stripe
+ * are in the write back cache. These data are tracked in a radix tree
+ * (big_stripe_tree). We use radix_tree item pointer as the counter.
+ * r5c_tree_index() is used to calculate keys for the radix tree.
+ *
+ * chunk_aligned_read() calls r5c_big_stripe_cached() to look up
+ * big_stripe of each chunk in the tree. If this big_stripe is in the
+ * tree, chunk_aligned_read() aborts. This look up is protected by
+ * rcu_read_lock().
+ *
+ * It is necessary to remember whether a stripe is counted in
+ * big_stripe_tree. Instead of adding new flag, we reuses existing flags:
+ * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE. If either of these
+ * two flags are set, the stripe is counted in big_stripe_tree. This
+ * requires moving set_bit(STRIPE_R5C_PARTIAL_STRIPE) to
+ * r5c_try_caching_write(); and moving clear_bit of
+ * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE to
+ * r5c_finish_stripe_write_out().
+ */
+
+/*
+ * radix tree requests lowest 2 bits of data pointer to be 2b'00.
+ * So it is necessary to left shift the counter by 2 bits before using it
+ * as data pointer of the tree.
+ */
+#define R5C_RADIX_COUNT_SHIFT 2
+
+/*
+ * calculate key for big_stripe_tree
+ *
+ * sect: align_bi->bi_iter.bi_sector or sh->sector
+ */
+static inline sector_t r5c_tree_index(struct r5conf *conf,
+                                     sector_t sect)
+{
+       sector_t offset;
+
+       offset = sector_div(sect, conf->chunk_sectors);
+       return sect;
+}
+
 /*
  * an IO range starts from a meta data block and end at the next meta data
  * block. The io unit's the meta data block tracks data/parity followed it. io
@@ -337,17 +389,30 @@ void r5c_check_cached_full_stripe(struct r5conf *conf)
 /*
  * Total log space (in sectors) needed to flush all data in cache
  *
- * Currently, writing-out phase automatically includes all pending writes
- * to the same sector. So the reclaim of each stripe takes up to
- * (conf->raid_disks + 1) pages of log space.
+ * To avoid deadlock due to log space, it is necessary to reserve log
+ * space to flush critical stripes (stripes that occupying log space near
+ * last_checkpoint). This function helps check how much log space is
+ * required to flush all cached stripes.
  *
- * To totally avoid deadlock due to log space, the code reserves
- * (conf->raid_disks + 1) pages for each stripe in cache, which is not
- * necessary in most cases.
+ * To reduce log space requirements, two mechanisms are used to give cache
+ * flush higher priorities:
+ *    1. In handle_stripe_dirtying() and schedule_reconstruction(),
+ *       stripes ALREADY in journal can be flushed w/o pending writes;
+ *    2. In r5l_write_stripe() and r5c_cache_data(), stripes NOT in journal
+ *       can be delayed (r5l_add_no_space_stripe).
  *
- * To improve this, we will need writing-out phase to be able to NOT include
- * pending writes, which will reduce the requirement to
- * (conf->max_degraded + 1) pages per stripe in cache.
+ * In cache flush, the stripe goes through 1 and then 2. For a stripe that
+ * already passed 1, flushing it requires at most (conf->max_degraded + 1)
+ * pages of journal space. For stripes that has not passed 1, flushing it
+ * requires (conf->raid_disks + 1) pages of journal space. There are at
+ * most (conf->group_cnt + 1) stripe that passed 1. So total journal space
+ * required to flush all cached stripes (in pages) is:
+ *
+ *     (stripe_in_journal_count - group_cnt - 1) * (max_degraded + 1) +
+ *     (group_cnt + 1) * (raid_disks + 1)
+ * or
+ *     (stripe_in_journal_count) * (max_degraded + 1) +
+ *     (group_cnt + 1) * (raid_disks - max_degraded)
  */
 static sector_t r5c_log_required_to_flush_cache(struct r5conf *conf)
 {
@@ -356,8 +421,9 @@ static sector_t r5c_log_required_to_flush_cache(struct r5conf *conf)
        if (!r5c_is_writeback(log))
                return 0;
 
-       return BLOCK_SECTORS * (conf->raid_disks + 1) *
-               atomic_read(&log->stripe_in_journal_count);
+       return BLOCK_SECTORS *
+               ((conf->max_degraded + 1) * atomic_read(&log->stripe_in_journal_count) +
+                (conf->raid_disks - conf->max_degraded) * (conf->group_cnt + 1));
 }
 
 /*
@@ -412,16 +478,6 @@ void r5c_make_stripe_write_out(struct stripe_head *sh)
 
        if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                atomic_inc(&conf->preread_active_stripes);
-
-       if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) {
-               BUG_ON(atomic_read(&conf->r5c_cached_partial_stripes) == 0);
-               atomic_dec(&conf->r5c_cached_partial_stripes);
-       }
-
-       if (test_and_clear_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) {
-               BUG_ON(atomic_read(&conf->r5c_cached_full_stripes) == 0);
-               atomic_dec(&conf->r5c_cached_full_stripes);
-       }
 }
 
 static void r5c_handle_data_cached(struct stripe_head *sh)
@@ -1271,6 +1327,10 @@ static void r5c_flush_stripe(struct r5conf *conf, struct stripe_head *sh)
        atomic_inc(&conf->active_stripes);
        r5c_make_stripe_write_out(sh);
 
+       if (test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state))
+               atomic_inc(&conf->r5c_flushing_partial_stripes);
+       else
+               atomic_inc(&conf->r5c_flushing_full_stripes);
        raid5_release_stripe(sh);
 }
 
@@ -1313,12 +1373,16 @@ static void r5c_do_reclaim(struct r5conf *conf)
        unsigned long flags;
        int total_cached;
        int stripes_to_flush;
+       int flushing_partial, flushing_full;
 
        if (!r5c_is_writeback(log))
                return;
 
+       flushing_partial = atomic_read(&conf->r5c_flushing_partial_stripes);
+       flushing_full = atomic_read(&conf->r5c_flushing_full_stripes);
        total_cached = atomic_read(&conf->r5c_cached_partial_stripes) +
-               atomic_read(&conf->r5c_cached_full_stripes);
+               atomic_read(&conf->r5c_cached_full_stripes) -
+               flushing_full - flushing_partial;
 
        if (total_cached > conf->min_nr_stripes * 3 / 4 ||
            atomic_read(&conf->empty_inactive_list_nr) > 0)
@@ -1328,7 +1392,7 @@ static void r5c_do_reclaim(struct r5conf *conf)
                 */
                stripes_to_flush = R5C_RECLAIM_STRIPE_GROUP;
        else if (total_cached > conf->min_nr_stripes * 1 / 2 ||
-                atomic_read(&conf->r5c_cached_full_stripes) >
+                atomic_read(&conf->r5c_cached_full_stripes) - flushing_full >
                 R5C_FULL_STRIPE_FLUSH_BATCH)
                /*
                 * if stripe cache pressure moderate, or if there is many full
@@ -1362,9 +1426,9 @@ static void r5c_do_reclaim(struct r5conf *conf)
                            !test_bit(STRIPE_HANDLE, &sh->state) &&
                            atomic_read(&sh->count) == 0) {
                                r5c_flush_stripe(conf, sh);
+                               if (count++ >= R5C_RECLAIM_STRIPE_GROUP)
+                                       break;
                        }
-                       if (count++ >= R5C_RECLAIM_STRIPE_GROUP)
-                               break;
                }
                spin_unlock(&conf->device_lock);
                spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags);
@@ -2320,6 +2384,10 @@ int r5c_try_caching_write(struct r5conf *conf,
        int i;
        struct r5dev *dev;
        int to_cache = 0;
+       void **pslot;
+       sector_t tree_index;
+       int ret;
+       uintptr_t refcount;
 
        BUG_ON(!r5c_is_writeback(log));
 
@@ -2364,6 +2432,44 @@ int r5c_try_caching_write(struct r5conf *conf,
                }
        }
 
+       /* if the stripe is not counted in big_stripe_tree, add it now */
+       if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
+           !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) {
+               tree_index = r5c_tree_index(conf, sh->sector);
+               spin_lock(&log->tree_lock);
+               pslot = radix_tree_lookup_slot(&log->big_stripe_tree,
+                                              tree_index);
+               if (pslot) {
+                       refcount = (uintptr_t)radix_tree_deref_slot_protected(
+                               pslot, &log->tree_lock) >>
+                               R5C_RADIX_COUNT_SHIFT;
+                       radix_tree_replace_slot(
+                               &log->big_stripe_tree, pslot,
+                               (void *)((refcount + 1) << R5C_RADIX_COUNT_SHIFT));
+               } else {
+                       /*
+                        * this radix_tree_insert can fail safely, so no
+                        * need to call radix_tree_preload()
+                        */
+                       ret = radix_tree_insert(
+                               &log->big_stripe_tree, tree_index,
+                               (void *)(1 << R5C_RADIX_COUNT_SHIFT));
+                       if (ret) {
+                               spin_unlock(&log->tree_lock);
+                               r5c_make_stripe_write_out(sh);
+                               return -EAGAIN;
+                       }
+               }
+               spin_unlock(&log->tree_lock);
+
+               /*
+                * set STRIPE_R5C_PARTIAL_STRIPE, this shows the stripe is
+                * counted in the radix tree
+                */
+               set_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state);
+               atomic_inc(&conf->r5c_cached_partial_stripes);
+       }
+
        for (i = disks; i--; ) {
                dev = &sh->dev[i];
                if (dev->towrite) {
@@ -2438,17 +2544,20 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
                                 struct stripe_head *sh,
                                 struct stripe_head_state *s)
 {
+       struct r5l_log *log = conf->log;
        int i;
        int do_wakeup = 0;
+       sector_t tree_index;
+       void **pslot;
+       uintptr_t refcount;
 
-       if (!conf->log ||
-           !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
+       if (!log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
                return;
 
        WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
        clear_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags);
 
-       if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
+       if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
                return;
 
        for (i = sh->disks; i--; ) {
@@ -2470,12 +2579,45 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
        if (do_wakeup)
                wake_up(&conf->wait_for_overlap);
 
-       spin_lock_irq(&conf->log->stripe_in_journal_lock);
+       spin_lock_irq(&log->stripe_in_journal_lock);
        list_del_init(&sh->r5c);
-       spin_unlock_irq(&conf->log->stripe_in_journal_lock);
+       spin_unlock_irq(&log->stripe_in_journal_lock);
        sh->log_start = MaxSector;
-       atomic_dec(&conf->log->stripe_in_journal_count);
-       r5c_update_log_state(conf->log);
+
+       atomic_dec(&log->stripe_in_journal_count);
+       r5c_update_log_state(log);
+
+       /* stop counting this stripe in big_stripe_tree */
+       if (test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) ||
+           test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) {
+               tree_index = r5c_tree_index(conf, sh->sector);
+               spin_lock(&log->tree_lock);
+               pslot = radix_tree_lookup_slot(&log->big_stripe_tree,
+                                              tree_index);
+               BUG_ON(pslot == NULL);
+               refcount = (uintptr_t)radix_tree_deref_slot_protected(
+                       pslot, &log->tree_lock) >>
+                       R5C_RADIX_COUNT_SHIFT;
+               if (refcount == 1)
+                       radix_tree_delete(&log->big_stripe_tree, tree_index);
+               else
+                       radix_tree_replace_slot(
+                               &log->big_stripe_tree, pslot,
+                               (void *)((refcount - 1) << R5C_RADIX_COUNT_SHIFT));
+               spin_unlock(&log->tree_lock);
+       }
+
+       if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) {
+               BUG_ON(atomic_read(&conf->r5c_cached_partial_stripes) == 0);
+               atomic_dec(&conf->r5c_flushing_partial_stripes);
+               atomic_dec(&conf->r5c_cached_partial_stripes);
+       }
+
+       if (test_and_clear_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) {
+               BUG_ON(atomic_read(&conf->r5c_cached_full_stripes) == 0);
+               atomic_dec(&conf->r5c_flushing_full_stripes);
+               atomic_dec(&conf->r5c_cached_full_stripes);
+       }
 }
 
 int
@@ -2535,6 +2677,22 @@ r5c_cache_data(struct r5l_log *log, struct stripe_head *sh,
        return 0;
 }
 
+/* check whether this big stripe is in write back cache. */
+bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect)
+{
+       struct r5l_log *log = conf->log;
+       sector_t tree_index;
+       void *slot;
+
+       if (!log)
+               return false;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       tree_index = r5c_tree_index(conf, sect);
+       slot = radix_tree_lookup(&log->big_stripe_tree, tree_index);
+       return slot != NULL;
+}
+
 static int r5l_load_log(struct r5l_log *log)
 {
        struct md_rdev *rdev = log->rdev;
@@ -2681,6 +2839,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
        if (!log->meta_pool)
                goto out_mempool;
 
+       spin_lock_init(&log->tree_lock);
+       INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);
+
        log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
                                                 log->rdev->mddev, "reclaim");
        if (!log->reclaim_thread)
index 6214e699342c87d7cdcb83e385530dff808fa918..2ce23b01dbb21da6ae17664df085c37c0a63e157 100644 (file)
@@ -281,13 +281,13 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
                                                atomic_dec(&conf->r5c_cached_partial_stripes);
                                        list_add_tail(&sh->lru, &conf->r5c_full_stripe_list);
                                        r5c_check_cached_full_stripe(conf);
-                               } else {
-                                       /* partial stripe */
-                                       if (!test_and_set_bit(STRIPE_R5C_PARTIAL_STRIPE,
-                                                             &sh->state))
-                                               atomic_inc(&conf->r5c_cached_partial_stripes);
+                               } else
+                                       /*
+                                        * STRIPE_R5C_PARTIAL_STRIPE is set in
+                                        * r5c_try_caching_write(). No need to
+                                        * set it again.
+                                        */
                                        list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list);
-                               }
                        }
                }
        }
@@ -353,17 +353,15 @@ static void release_inactive_stripe_list(struct r5conf *conf,
 static int release_stripe_list(struct r5conf *conf,
                               struct list_head *temp_inactive_list)
 {
-       struct stripe_head *sh;
+       struct stripe_head *sh, *t;
        int count = 0;
        struct llist_node *head;
 
        head = llist_del_all(&conf->released_stripes);
        head = llist_reverse_order(head);
-       while (head) {
+       llist_for_each_entry_safe(sh, t, head, release_list) {
                int hash;
 
-               sh = llist_entry(head, struct stripe_head, release_list);
-               head = llist_next(head);
                /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
                smp_mb();
                clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state);
@@ -863,6 +861,43 @@ static int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
        return 1;
 }
 
+static void flush_deferred_bios(struct r5conf *conf)
+{
+       struct bio_list tmp;
+       struct bio *bio;
+
+       if (!conf->batch_bio_dispatch || !conf->group_cnt)
+               return;
+
+       bio_list_init(&tmp);
+       spin_lock(&conf->pending_bios_lock);
+       bio_list_merge(&tmp, &conf->pending_bios);
+       bio_list_init(&conf->pending_bios);
+       spin_unlock(&conf->pending_bios_lock);
+
+       while ((bio = bio_list_pop(&tmp)))
+               generic_make_request(bio);
+}
+
+static void defer_bio_issue(struct r5conf *conf, struct bio *bio)
+{
+       /*
+        * change group_cnt will drain all bios, so this is safe
+        *
+        * A read generally means a read-modify-write, which usually means a
+        * randwrite, so we don't delay it
+        */
+       if (!conf->batch_bio_dispatch || !conf->group_cnt ||
+           bio_op(bio) == REQ_OP_READ) {
+               generic_make_request(bio);
+               return;
+       }
+       spin_lock(&conf->pending_bios_lock);
+       bio_list_add(&conf->pending_bios, bio);
+       spin_unlock(&conf->pending_bios_lock);
+       md_wakeup_thread(conf->mddev->thread);
+}
+
 static void
 raid5_end_read_request(struct bio *bi);
 static void
@@ -1043,7 +1078,7 @@ again:
                                trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
                                                      bi, disk_devt(conf->mddev->gendisk),
                                                      sh->dev[i].sector);
-                       generic_make_request(bi);
+                       defer_bio_issue(conf, bi);
                }
                if (rrdev) {
                        if (s->syncing || s->expanding || s->expanded
@@ -1088,7 +1123,7 @@ again:
                                trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
                                                      rbi, disk_devt(conf->mddev->gendisk),
                                                      sh->dev[i].sector);
-                       generic_make_request(rbi);
+                       defer_bio_issue(conf, rbi);
                }
                if (!rdev && !rrdev) {
                        if (op_is_write(op))
@@ -2914,12 +2949,36 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
  *      like to flush data in journal to RAID disks first, so complex rmw
  *      is handled in the write patch (handle_stripe_dirtying).
  *
+ *   2. when journal space is critical (R5C_LOG_CRITICAL=1)
+ *
+ *      It is important to be able to flush all stripes in raid5-cache.
+ *      Therefore, we need reserve some space on the journal device for
+ *      these flushes. If flush operation includes pending writes to the
+ *      stripe, we need to reserve (conf->raid_disk + 1) pages per stripe
+ *      for the flush out. If we exclude these pending writes from flush
+ *      operation, we only need (conf->max_degraded + 1) pages per stripe.
+ *      Therefore, excluding pending writes in these cases enables more
+ *      efficient use of the journal device.
+ *
+ *      Note: To make sure the stripe makes progress, we only delay
+ *      towrite for stripes with data already in journal (injournal > 0).
+ *      When LOG_CRITICAL, stripes with injournal == 0 will be sent to
+ *      no_space_stripes list.
+ *
  */
-static inline bool delay_towrite(struct r5dev *dev,
-                                  struct stripe_head_state *s)
+static inline bool delay_towrite(struct r5conf *conf,
+                                struct r5dev *dev,
+                                struct stripe_head_state *s)
 {
-       return !test_bit(R5_OVERWRITE, &dev->flags) &&
-               !test_bit(R5_Insync, &dev->flags) && s->injournal;
+       /* case 1 above */
+       if (!test_bit(R5_OVERWRITE, &dev->flags) &&
+           !test_bit(R5_Insync, &dev->flags) && s->injournal)
+               return true;
+       /* case 2 above */
+       if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
+           s->injournal > 0)
+               return true;
+       return false;
 }
 
 static void
@@ -2942,7 +3001,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
 
-                       if (dev->towrite && !delay_towrite(dev, s)) {
+                       if (dev->towrite && !delay_towrite(conf, dev, s)) {
                                set_bit(R5_LOCKED, &dev->flags);
                                set_bit(R5_Wantdrain, &dev->flags);
                                if (!expand)
@@ -3694,7 +3753,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
        } else for (i = disks; i--; ) {
                /* would I have to read this buffer for read_modify_write */
                struct r5dev *dev = &sh->dev[i];
-               if (((dev->towrite && !delay_towrite(dev, s)) ||
+               if (((dev->towrite && !delay_towrite(conf, dev, s)) ||
                     i == sh->pd_idx || i == sh->qd_idx ||
                     test_bit(R5_InJournal, &dev->flags)) &&
                    !test_bit(R5_LOCKED, &dev->flags) &&
@@ -3718,8 +3777,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
                }
        }
 
-       pr_debug("for sector %llu, rmw=%d rcw=%d\n",
-               (unsigned long long)sh->sector, rmw, rcw);
+       pr_debug("for sector %llu state 0x%lx, rmw=%d rcw=%d\n",
+                (unsigned long long)sh->sector, sh->state, rmw, rcw);
        set_bit(STRIPE_HANDLE, &sh->state);
        if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) {
                /* prefer read-modify-write, but need to get some data */
@@ -3759,7 +3818,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
-                       if (((dev->towrite && !delay_towrite(dev, s)) ||
+                       if (((dev->towrite && !delay_towrite(conf, dev, s)) ||
                             i == sh->pd_idx || i == sh->qd_idx ||
                             test_bit(R5_InJournal, &dev->flags)) &&
                            !test_bit(R5_LOCKED, &dev->flags) &&
@@ -4995,9 +5054,9 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
                return 0;
        }
        /*
-        * use bio_clone_mddev to make a copy of the bio
+        * use bio_clone_fast to make a copy of the bio
         */
-       align_bi = bio_clone_mddev(raid_bio, GFP_NOIO, mddev);
+       align_bi = bio_clone_fast(raid_bio, GFP_NOIO, mddev->bio_set);
        if (!align_bi)
                return 0;
        /*
@@ -5025,6 +5084,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
                      rdev->recovery_offset >= end_sector)))
                        rdev = NULL;
        }
+
+       if (r5c_big_stripe_cached(conf, align_bi->bi_iter.bi_sector)) {
+               rcu_read_unlock();
+               bio_put(align_bi);
+               return 0;
+       }
+
        if (rdev) {
                sector_t first_bad;
                int bad_sectors;
@@ -5381,7 +5447,6 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
         * data on failed drives.
         */
        if (rw == READ && mddev->degraded == 0 &&
-           !r5c_is_writeback(conf->log) &&
            mddev->reshape_position == MaxSector) {
                bi = chunk_aligned_read(mddev, bi);
                if (!bi)
@@ -6126,6 +6191,8 @@ static void raid5d(struct md_thread *thread)
                mutex_unlock(&conf->cache_size_mutex);
        }
 
+       flush_deferred_bios(conf);
+
        r5l_flush_stripe_to_raid(conf->log);
 
        async_tx_issue_pending_all();
@@ -6711,6 +6778,18 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        atomic_set(&conf->active_stripes, 0);
        atomic_set(&conf->preread_active_stripes, 0);
        atomic_set(&conf->active_aligned_reads, 0);
+       bio_list_init(&conf->pending_bios);
+       spin_lock_init(&conf->pending_bios_lock);
+       conf->batch_bio_dispatch = true;
+       rdev_for_each(rdev, mddev) {
+               if (test_bit(Journal, &rdev->flags))
+                       continue;
+               if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
+                       conf->batch_bio_dispatch = false;
+                       break;
+               }
+       }
+
        conf->bypass_threshold = BYPASS_THRESHOLD;
        conf->recovery_disabled = mddev->recovery_disabled - 1;
 
@@ -6757,6 +6836,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        INIT_LIST_HEAD(&conf->r5c_full_stripe_list);
        atomic_set(&conf->r5c_cached_partial_stripes, 0);
        INIT_LIST_HEAD(&conf->r5c_partial_stripe_list);
+       atomic_set(&conf->r5c_flushing_full_stripes, 0);
+       atomic_set(&conf->r5c_flushing_partial_stripes, 0);
 
        conf->level = mddev->new_level;
        conf->chunk_sectors = mddev->new_chunk_sectors;
index 1440fa26e29629c4f9acc098f0fa9035f5ff1d1a..4bb27b97bf6bc48f6362461592edab19f1024140 100644 (file)
@@ -663,6 +663,8 @@ struct r5conf {
        struct list_head        r5c_full_stripe_list;
        atomic_t                r5c_cached_partial_stripes;
        struct list_head        r5c_partial_stripe_list;
+       atomic_t                r5c_flushing_full_stripes;
+       atomic_t                r5c_flushing_partial_stripes;
 
        atomic_t                empty_inactive_list_nr;
        struct llist_head       released_stripes;
@@ -684,6 +686,10 @@ struct r5conf {
        int                     group_cnt;
        int                     worker_cnt_per_group;
        struct r5l_log          *log;
+
+       struct bio_list         pending_bios;
+       spinlock_t              pending_bios_lock;
+       bool                    batch_bio_dispatch;
 };
 
 
@@ -788,4 +794,5 @@ extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
 extern void r5c_check_cached_full_stripe(struct r5conf *conf);
 extern struct md_sysfs_entry r5c_journal_mode;
 extern void r5c_update_on_rdev_error(struct mddev *mddev);
+extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
 #endif
index bbe94873d44d314a120acdf821dd160d6e06eae7..8ed6bcc3a56e5ffd1299f19bba1db6cda5627fa5 100644 (file)
@@ -136,7 +136,7 @@ extern void dvb_ringbuffer_flush_spinlock_wakeup(struct dvb_ringbuffer *rbuf);
 }
 
 /**
- * dvb_ringbuffer_read_user - Reads a buffer into an user pointer
+ * dvb_ringbuffer_read_user - Reads a buffer into a user pointer
  *
  * @rbuf: pointer to struct dvb_ringbuffer
  * @buf: pointer to the buffer where the data will be stored
@@ -193,7 +193,7 @@ extern ssize_t dvb_ringbuffer_write(struct dvb_ringbuffer *rbuf, const u8 *buf,
                                    size_t len);
 
 /**
- * dvb_ringbuffer_write_user - Writes a buffer received via an user pointer
+ * dvb_ringbuffer_write_user - Writes a buffer received via a user pointer
  *
  * @rbuf: pointer to struct dvb_ringbuffer
  * @buf: pointer to the buffer where the data will be read
index 9076bf21cc8a41fa7791df1654a332abb4eaa6a1..7a681d8202c7ee9e9eed6dd2dbb4bf118d8f9508 100644 (file)
@@ -1317,9 +1317,9 @@ struct drx_version_list {
                DRX_MPEG_STR_WIDTH_8
        };
 
-/* CTRL CFG MPEG ouput */
+/* CTRL CFG MPEG output */
 /**
-* \struct struct drx_cfg_mpeg_output * \brief Configuartion parameters for MPEG output control.
+* \struct struct drx_cfg_mpeg_output * \brief Configuration parameters for MPEG output control.
 *
 * Used by DRX_CFG_MPEG_OUTPUT, in combination with DRX_CTRL_SET_CFG and
 * DRX_CTRL_GET_CFG.
index f1c3e3b09b65db6761d3ffa4d48a570e3570fc2a..daeaf965dd56b483d3bde8193ed4d27e406a9edb 100644 (file)
@@ -601,7 +601,7 @@ static struct drxj_data drxj_data_g = {
        0,                      /* hi_cfg_wake_up_key    */
        0,                      /* hi_cfg_ctrl         */
        0,                      /* HICfgTimeout      */
-       /* UIO configuartion */
+       /* UIO configuration */
        DRX_UIO_MODE_DISABLE,   /* uio_sma_rx_mode      */
        DRX_UIO_MODE_DISABLE,   /* uio_sma_tx_mode      */
        DRX_UIO_MODE_DISABLE,   /* uioASELMode       */
@@ -619,7 +619,7 @@ static struct drxj_data drxj_data_g = {
 /*   false,                  * flagHDevSet       */
 /*   (u16) 0xFFF,          * rdsLastCount      */
 
-       /* ATV configuartion */
+       /* ATV configuration */
        0UL,                    /* flags cfg changes */
        /* shadow of ATV_TOP_EQU0__A */
        {-5,
@@ -3352,7 +3352,7 @@ rw_error:
 /*----------------------------------------------------------------------------*/
 
 /*----------------------------------------------------------------------------*/
-/* miscellaneous configuartions - begin                           */
+/* miscellaneous configurations - begin                           */
 /*----------------------------------------------------------------------------*/
 
 /**
@@ -3515,7 +3515,7 @@ rw_error:
 }
 
 /*----------------------------------------------------------------------------*/
-/* miscellaneous configuartions - end                             */
+/* miscellaneous configurations - end                             */
 /*----------------------------------------------------------------------------*/
 
 /*----------------------------------------------------------------------------*/
@@ -10952,7 +10952,7 @@ rw_error:
 
 static void drxj_reset_mode(struct drxj_data *ext_attr)
 {
-       /* Initialize default AFE configuartion for QAM */
+       /* Initialize default AFE configuration for QAM */
        if (ext_attr->has_lna) {
                /* IF AGC off, PGA active */
 #ifndef DRXJ_VSB_ONLY
@@ -10996,7 +10996,7 @@ static void drxj_reset_mode(struct drxj_data *ext_attr)
        ext_attr->qam_pre_saw_cfg.reference = 0x07;
        ext_attr->qam_pre_saw_cfg.use_pre_saw = true;
 #endif
-       /* Initialize default AFE configuartion for VSB */
+       /* Initialize default AFE configuration for VSB */
        ext_attr->vsb_rf_agc_cfg.standard = DRX_STANDARD_8VSB;
        ext_attr->vsb_rf_agc_cfg.ctrl_mode = DRX_AGC_CTRL_AUTO;
        ext_attr->vsb_rf_agc_cfg.min_output_level = 0;
@@ -11072,9 +11072,9 @@ ctrl_power_mode(struct drx_demod_instance *demod, enum drx_power_mode *mode)
        }
 
        if ((*mode == DRX_POWER_UP)) {
-               /* Restore analog & pin configuartion */
+               /* Restore analog & pin configuration */
 
-               /* Initialize default AFE configuartion for VSB */
+               /* Initialize default AFE configuration for VSB */
                drxj_reset_mode(ext_attr);
        } else {
                /* Power down to requested mode */
index 55ad535197d282f33d269a2e655175a519712316..6c5b8f78f9f63bf7d4d938c49c300be7ab14ca0e 100644 (file)
@@ -447,7 +447,7 @@ struct drxj_cfg_atv_output {
                u16 hi_cfg_ctrl;          /**< HI Configure() parameter 5                       */
                u16 hi_cfg_transmit;      /**< HI Configure() parameter 6                       */
 
-               /* UIO configuartion */
+               /* UIO configuration */
                enum drxuio_mode uio_sma_rx_mode;/**< current mode of SmaRx pin                        */
                enum drxuio_mode uio_sma_tx_mode;/**< current mode of SmaTx pin                        */
                enum drxuio_mode uio_gpio_mode; /**< current mode of ASEL pin                         */
@@ -459,7 +459,7 @@ struct drxj_cfg_atv_output {
                /* IQM RC frequecy shift */
                u32 iqm_rc_rate_ofs;       /**< frequency shifter setting after setchannel      */
 
-               /* ATV configuartion */
+               /* ATV configuration */
                u32 atv_cfg_changed_flags; /**< flag: flags cfg changes */
                s16 atv_top_equ0[DRXJ_COEF_IDX_MAX];         /**< shadow of ATV_TOP_EQU0__A */
                s16 atv_top_equ1[DRXJ_COEF_IDX_MAX];         /**< shadow of ATV_TOP_EQU1__A */
index 15d2cac588b14320b54676ef18e9aa3e0493512b..7e1bbbaad625a192142bbc3db3b8ef894ea83cd0 100644 (file)
@@ -1626,7 +1626,7 @@ static int ctrl_power_mode(struct drxk_state *state, enum drx_power_mode *mode)
        }
 
        if (*mode == DRX_POWER_UP) {
-               /* Restore analog & pin configuartion */
+               /* Restore analog & pin configuration */
        } else {
                /* Power down to requested mode */
                /* Backup some register settings */
index ef35c2b30ea3b5f9ebf7a92fd1424f5367634f0a..4bf5a551ba403f0f4e7f985ccb7e9734b734df0b 100644 (file)
@@ -309,7 +309,7 @@ static int helene_write_regs(struct helene_priv *priv,
 
        if (len + 1 > sizeof(buf)) {
                dev_warn(&priv->i2c->dev,
-                               "wr reg=%04x: len=%d vs %Zu is too big!\n",
+                               "wr reg=%04x: len=%d vs %zu is too big!\n",
                                reg, len + 1, sizeof(buf));
                return -E2BIG;
        }
index 4b67d7e0116d615116d7e2ac736c09391ef6e793..62aa007670152d0f049c468523ec4671719b40d6 100644 (file)
@@ -133,7 +133,7 @@ static int or51132_load_firmware (struct dvb_frontend* fe, const struct firmware
        u32 firmwareAsize, firmwareBsize;
        int i,ret;
 
-       dprintk("Firmware is %Zd bytes\n",fw->size);
+       dprintk("Firmware is %zd bytes\n",fw->size);
 
        /* Get size of firmware A and B */
        firmwareAsize = le32_to_cpu(*((__le32*)fw->data));
index 92ab34c3e0be71e03290bf064d7097b422eb63cc..143b39b5f6c9c2a47a1b710db075c5852f76bcaa 100644 (file)
@@ -499,7 +499,7 @@ static int tda10048_firmware_upload(struct dvb_frontend *fe)
                        __func__);
                return -EIO;
        } else {
-               printk(KERN_INFO "%s: firmware read %Zu bytes.\n",
+               printk(KERN_INFO "%s: firmware read %zu bytes.\n",
                        __func__,
                        fw->size);
                ret = 0;
index 843d4998435ef964b3ee806aebde007648459c19..4ade89d33d6204fa7ba792c5ac0728a19b69b715 100644 (file)
@@ -83,7 +83,7 @@
 #define ADV7183_LETTERBOX_3        0x9D /* Letterbox 3 */
 #define ADV7183_CRC_EN             0xB2 /* CRC enable */
 #define ADV7183_ADC_SWITCH_1       0xC3 /* ADC switch 1 */
-#define ADV7183_ADC_SWITCH_2       0xC4 /* ADC swithc 2 */
+#define ADV7183_ADC_SWITCH_2       0xC4 /* ADC switch 2 */
 #define ADV7183_LETTERBOX_CTRL_1   0xDC /* Letterbox control 1 */
 #define ADV7183_LETTERBOX_CTRL_2   0xDD /* Letterbox control 2 */
 #define ADV7183_SD_OFFSET_CB       0xE1 /* SD offset Cb */
index 4ba5eade7ce29bc5863be57de7147e8f22c116d8..ef4906406ebf9aa67f527c9ac81707a1a786c373 100644 (file)
@@ -422,7 +422,7 @@ int saa7164_downloadfirmware(struct saa7164_dev *dev)
                        return -ENOMEM;
                }
 
-               printk(KERN_INFO "%s() firmware read %Zu bytes.\n",
+               printk(KERN_INFO "%s() firmware read %zu bytes.\n",
                        __func__, fw->size);
 
                if (fw->size != fwlength) {
index 5615fefbf7af0eda5c117554ed69a9088c98ac85..c0373aede81ab19981d429929b40a40f8c521ed7 100644 (file)
@@ -358,7 +358,7 @@ struct fimc_pix_limit {
  * @pix_limit: pixel size constraints for the scaler
  * @min_inp_pixsize: minimum input pixel size
  * @min_out_pixsize: minimum output pixel size
- * @hor_offs_align: horizontal pixel offset aligment
+ * @hor_offs_align: horizontal pixel offset alignment
  * @min_vsize_align: minimum vertical pixel size alignment
  */
 struct fimc_variant {
index 0345b274eccc0c88aa94f7539401340acf18dfcf..91947cf1950ea0b1db11eb4b30ed3d5b2431276f 100644 (file)
@@ -1144,7 +1144,7 @@ static int xc_load_fw_and_init_tuner(struct dvb_frontend *fe, int force)
                        pr_err("xc5000: Upload failed. rc %d\n", ret);
                        return ret;
                }
-               dprintk(1, "firmware read %Zu bytes.\n", fw->size);
+               dprintk(1, "firmware read %zu bytes.\n", fw->size);
 
                if (fw->size != desired_fw->size) {
                        pr_err("xc5000: Firmware file with incorrect size\n");
index 81d7fd4f7776ef629ad0ea93d619ce9bc0c4c653..85ab3fa48f9a881d40705e657dc4c292ed4d856f 100644 (file)
@@ -2414,7 +2414,7 @@ static int stk9090m_frontend_attach(struct dvb_usb_adapter *adap)
                deb_info("%s: Upload failed. (file not found?)\n", __func__);
                return -ENODEV;
        } else {
-               deb_info("%s: firmware read %Zu bytes.\n", __func__, state->frontend_firmware->size);
+               deb_info("%s: firmware read %zu bytes.\n", __func__, state->frontend_firmware->size);
        }
        stk9090m_config.microcode_B_fe_size = state->frontend_firmware->size;
        stk9090m_config.microcode_B_fe_buffer = state->frontend_firmware->data;
@@ -2480,7 +2480,7 @@ static int nim9090md_frontend_attach(struct dvb_usb_adapter *adap)
                deb_info("%s: Upload failed. (file not found?)\n", __func__);
                return -EIO;
        } else {
-               deb_info("%s: firmware read %Zu bytes.\n", __func__, state->frontend_firmware->size);
+               deb_info("%s: firmware read %zu bytes.\n", __func__, state->frontend_firmware->size);
        }
        nim9090md_config[0].microcode_B_fe_size = state->frontend_firmware->size;
        nim9090md_config[0].microcode_B_fe_buffer = state->frontend_firmware->data;
index 42667710af92279dba54b777b9aa4827237f9340..46fb76349000ad743eaa616784e788e6ad4f71a6 100644 (file)
@@ -570,9 +570,9 @@ static void setfreq(struct gspca_dev *gspca_dev, s32 val)
 /* this function is called at probe and resume time */
 static int sd_init(struct gspca_dev *gspca_dev)
 {
-       /* some of this registers are not really neded, because
-        * they are overriden by setbrigthness, setcontrast, etc,
-        * but wont hurt anyway, and can help someone with similar webcam
+       /* some of this registers are not really needed, because
+        * they are overridden by setbrigthness, setcontrast, etc.,
+        * but won't hurt anyway, and can help someone with similar webcam
         * to see the initial parameters.*/
        struct sd *sd = (struct sd *) gspca_dev;
        const struct additional_sensor_data *sensor;
index 4afd4655d562883a58c4e1bc2b7fa0dbe58f0b58..39c15bb2b20ceb3ca5ca30f0062d0ced276058f9 100644 (file)
@@ -438,7 +438,7 @@ int tm6000_ir_init(struct tm6000_core *dev)
 
        /* input setup */
        rc->allowed_protocols = RC_BIT_RC5 | RC_BIT_NEC;
-       /* Neded, in order to support NEC remotes with 24 or 32 bits */
+       /* Needed, in order to support NEC remotes with 24 or 32 bits */
        rc->scancode_mask = 0xffff;
        rc->priv = ir;
        rc->change_protocol = tm6000_ir_change_protocol;
index 05b5c6652cfac56718af6a0e49fb0682b9c8ac28..e48b7c032c951eb9fc9f382ec615677b4ed8897a 100644 (file)
@@ -245,7 +245,7 @@ static const struct analog_demod_ops tuner_analog_ops = {
  * @tuner_callback:    an optional function to be called when switching
  *                     to analog mode
  *
- * This function applys the tuner config to tuner specified
+ * This function applies the tuner config to tuner specified
  * by tun_setup structure. It contains several per-tuner initialization "magic"
  */
 static void set_type(struct i2c_client *c, unsigned int type,
@@ -463,7 +463,7 @@ attach_failed:
  * @sd:                subdev descriptor
  * @tun_setup: type to be associated to a given tuner i2c address
  *
- * This function applys the tuner config to tuner specified
+ * This function applies the tuner config to tuner specified
  * by tun_setup structure.
  * If tuner I2C address is UNSET, then it will only set the device
  * if the tuner supports the mode specified in the call.
index ba63ca57ed7ebe0bf47cb6826b66e22518ca02ab..36bd904946bd34c0dec9d0295020117a31322933 100644 (file)
@@ -434,8 +434,9 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
  * now ...).  Bounce buffers don't work very well for the data rates
  * video capture has.
  */
-static int videobuf_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int videobuf_vm_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct page *page;
 
        dprintk(3, "fault: fault @ %08lx [vma %08lx-%08lx]\n",
index 3907387b6d1564c5ffc7a7fbcc17a6402d1aaf68..062bf6ca262580a61f0cc57f09261312b5015d69 100644 (file)
@@ -121,8 +121,9 @@ void cxl_context_set_mapping(struct cxl_context *ctx,
        mutex_unlock(&ctx->mapping_lock);
 }
 
-static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int cxl_mmap_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct cxl_context *ctx = vma->vm_file->private_data;
        u64 area, offset;
 
index 051b14766ef964984da6ff487db81f5a782cb3ba..764ff5df0dbc343c07b6263e20067ea1f2a0860c 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/log2.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
-#include <linux/of.h>
+#include <linux/property.h>
 #include <linux/acpi.h>
 #include <linux/i2c.h>
 #include <linux/nvmem-provider.h>
@@ -562,26 +562,26 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
        return 0;
 }
 
-#ifdef CONFIG_OF
-static void at24_get_ofdata(struct i2c_client *client,
-                           struct at24_platform_data *chip)
+static void at24_get_pdata(struct device *dev, struct at24_platform_data *chip)
 {
-       const __be32 *val;
-       struct device_node *node = client->dev.of_node;
-
-       if (node) {
-               if (of_get_property(node, "read-only", NULL))
-                       chip->flags |= AT24_FLAG_READONLY;
-               val = of_get_property(node, "pagesize", NULL);
-               if (val)
-                       chip->page_size = be32_to_cpup(val);
+       int err;
+       u32 val;
+
+       if (device_property_present(dev, "read-only"))
+               chip->flags |= AT24_FLAG_READONLY;
+
+       err = device_property_read_u32(dev, "pagesize", &val);
+       if (!err) {
+               chip->page_size = val;
+       } else {
+               /*
+                * This is slow, but we can't know all eeproms, so we better
+                * play safe. Specifying custom eeprom-types via platform_data
+                * is recommended anyhow.
+                */
+               chip->page_size = 1;
        }
 }
-#else
-static void at24_get_ofdata(struct i2c_client *client,
-                           struct at24_platform_data *chip)
-{ }
-#endif /* CONFIG_OF */
 
 static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
@@ -613,15 +613,8 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
                chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
                magic >>= AT24_SIZE_BYTELEN;
                chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
-               /*
-                * This is slow, but we can't know all eeproms, so we better
-                * play safe. Specifying custom eeprom-types via platform_data
-                * is recommended anyhow.
-                */
-               chip.page_size = 1;
 
-               /* update chipdata if OF is present */
-               at24_get_ofdata(client, &chip);
+               at24_get_pdata(&client->dev, &chip);
 
                chip.setup = NULL;
                chip.context = NULL;
index be37890abb93d3897c7e000184c65567a31ed2a9..77b16ca668460157cf15cfa8f767e56d590b5e73 100644 (file)
@@ -143,7 +143,7 @@ static void mbus_release_dev(struct device *d)
 }
 
 struct mbus_device *
-mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
                     struct mbus_hw_ops *hw_ops, int index,
                     void __iomem *mmio_va)
 {
@@ -158,7 +158,7 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
        mbdev->dev.parent = pdev;
        mbdev->id.device = id;
        mbdev->id.vendor = MBUS_DEV_ANY_ID;
-       mbdev->dev.archdata.dma_ops = dma_ops;
+       mbdev->dev.dma_ops = dma_ops;
        mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask;
        dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64));
        mbdev->dev.release = mbus_release_dev;
index ff6e01c258101f8527f75be845d1984323a72717..a444db5f61fe5bf3aed0b25f610751711aa4d930 100644 (file)
@@ -138,7 +138,7 @@ static void scif_release_dev(struct device *d)
 }
 
 struct scif_hw_dev *
-scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+scif_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
                     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
                     struct mic_mw *mmio, struct mic_mw *aper, void *dp,
                     void __iomem *rdp, struct dma_chan **chan, int num_chan,
@@ -154,7 +154,7 @@ scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
        sdev->dev.parent = pdev;
        sdev->id.device = id;
        sdev->id.vendor = SCIF_DEV_ANY_ID;
-       sdev->dev.archdata.dma_ops = dma_ops;
+       sdev->dev.dma_ops = dma_ops;
        sdev->dev.release = scif_release_dev;
        sdev->hw_ops = hw_ops;
        sdev->dnode = dnode;
index 94f29ac608b6a87de4165dc79bbf3ed80e0371e7..ff59568219adbf637c227ae2be657864ef8e3853 100644 (file)
@@ -113,7 +113,7 @@ int scif_register_driver(struct scif_driver *driver);
 void scif_unregister_driver(struct scif_driver *driver);
 struct scif_hw_dev *
 scif_register_device(struct device *pdev, int id,
-                    struct dma_map_ops *dma_ops,
+                    const struct dma_map_ops *dma_ops,
                     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
                     struct mic_mw *mmio, struct mic_mw *aper,
                     void *dp, void __iomem *rdp,
index 303da222f5b6d69ba39971eb21d4177aadabbc27..fd7f2a6049f86218ded7d3033773e03514ae8424 100644 (file)
@@ -154,7 +154,7 @@ vop_register_device(struct device *pdev, int id,
        vdev->dev.parent = pdev;
        vdev->id.device = id;
        vdev->id.vendor = VOP_DEV_ANY_ID;
-       vdev->dev.archdata.dma_ops = (struct dma_map_ops *)dma_ops;
+       vdev->dev.dma_ops = dma_ops;
        vdev->dev.dma_mask = &vdev->dev.coherent_dma_mask;
        dma_set_mask(&vdev->dev, DMA_BIT_MASK(64));
        vdev->dev.release = vop_release_dev;
index 9599d732aff3559ab28acca6189113e13ee46afa..c327985c9523be27067f2710db322e5e0afc03cc 100644 (file)
@@ -245,7 +245,7 @@ static void __mic_dma_unmap_sg(struct device *dev,
        dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
 }
 
-static struct dma_map_ops __mic_dma_ops = {
+static const struct dma_map_ops __mic_dma_ops = {
        .alloc = __mic_dma_alloc,
        .free = __mic_dma_free,
        .map_page = __mic_dma_map_page,
@@ -344,7 +344,7 @@ mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
        mic_unmap_single(mdev, dma_addr, size);
 }
 
-static struct dma_map_ops mic_dma_ops = {
+static const struct dma_map_ops mic_dma_ops = {
        .map_page = mic_dma_map_page,
        .unmap_page = mic_dma_unmap_page,
 };
index af2e077da4b89402d3b256de648e0cbaadd17f4e..3641f1334cf0889934f74aa960f7879f5c8d53f8 100644 (file)
@@ -926,8 +926,9 @@ again:
  *
  *     Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
  */
-int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int gru_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct gru_thread_state *gts;
        unsigned long paddr, vaddr;
        unsigned long expires;
index 5c3ce24596753afad29180aa88e6dd7a78d209c8..b5e308b50ed18036fa71aaadbf9e6c22c6481533 100644 (file)
@@ -665,7 +665,7 @@ extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
                int cbr_au_count, char *cbmap);
 extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
                int dsr_au_count, char *dsmap);
-extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
+extern int gru_fault(struct vm_fault *vmf);
 extern struct gru_mm_struct *gru_register_mmu_notifier(void);
 extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
 
index f866a4baecb5407cb952e2ff6e55f4b671ed63a4..f35f0c8606b9ad6f094c6dfb3683d37ffc45304b 100644 (file)
@@ -303,7 +303,7 @@ int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
 
        vmci_dg_size = VMCI_DG_SIZE(dg);
        if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
-               pr_devel("Datagram too large (bytes=%Zu)\n", vmci_dg_size);
+               pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
                return VMCI_ERROR_INVALID_ARGS;
        }
 
index f84a4275ca294af9fb2ca8b6ac15bcb9b20bf69f..498c0854305f5a5ac0fa6f5d2b481c479e5714ff 100644 (file)
@@ -2928,7 +2928,7 @@ int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
 EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
 
 /*
- * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the comsumer.
+ * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the consumer.
  * @qpair:      Pointer to the queue pair struct.
  * @consumer_tail:      Reference used for storing consumer tail index.
  * @producer_head:      Reference used for storing the producer head index.
index 2b7fc37648032d76b9f428e71c8ff3ab0c8fb7e2..00750c9d35145ddc291904f7a0c9a4a261f3f48c 100644 (file)
@@ -170,7 +170,7 @@ int dml_hw_init(struct mmci_host *host, struct device_node *np)
        writel_relaxed(producer_id | (consumer_id << CONSUMER_PIPE_ID_SHFT),
                       base + DML_PIPE_ID);
 
-       /* Make sure dml intialization is finished */
+       /* Make sure dml initialization is finished */
        mb();
 
        return 0;
index 82bd00af5cc3841ba6cd44ef51024b5792ba09b5..268aae45b5149de12ff77e59df776790724dede8 100644 (file)
@@ -75,18 +75,18 @@ static char module_name[] = "lart";
 
 /* blob */
 #define NUM_BLOB_BLOCKS                FLASH_NUMBLOCKS_16m_PARAM
-#define BLOB_START                     0x00000000
-#define BLOB_LEN                       (NUM_BLOB_BLOCKS * FLASH_BLOCKSIZE_PARAM)
+#define PART_BLOB_START                0x00000000
+#define PART_BLOB_LEN          (NUM_BLOB_BLOCKS * FLASH_BLOCKSIZE_PARAM)
 
 /* kernel */
 #define NUM_KERNEL_BLOCKS      7
-#define KERNEL_START           (BLOB_START + BLOB_LEN)
-#define KERNEL_LEN                     (NUM_KERNEL_BLOCKS * FLASH_BLOCKSIZE_MAIN)
+#define PART_KERNEL_START      (PART_BLOB_START + PART_BLOB_LEN)
+#define PART_KERNEL_LEN                (NUM_KERNEL_BLOCKS * FLASH_BLOCKSIZE_MAIN)
 
 /* initial ramdisk */
 #define NUM_INITRD_BLOCKS      24
-#define INITRD_START           (KERNEL_START + KERNEL_LEN)
-#define INITRD_LEN                     (NUM_INITRD_BLOCKS * FLASH_BLOCKSIZE_MAIN)
+#define PART_INITRD_START      (PART_KERNEL_START + PART_KERNEL_LEN)
+#define PART_INITRD_LEN                (NUM_INITRD_BLOCKS * FLASH_BLOCKSIZE_MAIN)
 
 /*
  * See section 4.0 in "3 Volt Fast Boot Block Flash Memory" Intel Datasheet
@@ -587,20 +587,20 @@ static struct mtd_partition lart_partitions[] = {
        /* blob */
        {
                .name   = "blob",
-               .offset = BLOB_START,
-               .size   = BLOB_LEN,
+               .offset = PART_BLOB_START,
+               .size   = PART_BLOB_LEN,
        },
        /* kernel */
        {
                .name   = "kernel",
-               .offset = KERNEL_START,         /* MTDPART_OFS_APPEND */
-               .size   = KERNEL_LEN,
+               .offset = PART_KERNEL_START,    /* MTDPART_OFS_APPEND */
+               .size   = PART_KERNEL_LEN,
        },
        /* initial ramdisk / file system */
        {
                .name   = "file system",
-               .offset = INITRD_START,         /* MTDPART_OFS_APPEND */
-               .size   = INITRD_LEN,           /* MTDPART_SIZ_FULL */
+               .offset = PART_INITRD_START,    /* MTDPART_OFS_APPEND */
+               .size   = PART_INITRD_LEN,      /* MTDPART_SIZ_FULL */
        }
 };
 #define NUM_PARTITIONS ARRAY_SIZE(lart_partitions)
index 6ea963e3b89a1ab1c78ccb4d63c99a16e1bfbe0b..62ee439d58829574d732e84b8afba738871a43e5 100644 (file)
@@ -123,7 +123,7 @@ static int __init arcnet_init(void)
                arc_proto_map[count] = arc_proto_default;
 
        if (BUGLVL(D_DURING))
-               pr_info("struct sizes: %Zd %Zd %Zd %Zd %Zd\n",
+               pr_info("struct sizes: %zd %zd %zd %zd %zd\n",
                        sizeof(struct arc_hardware),
                        sizeof(struct arc_rfc1201),
                        sizeof(struct arc_rfc1051),
index a8173130373056b3a245b241f834312ded0f8fed..a9ac58c351a05cb1b568361e73e8df34e98b2b5c 100644 (file)
@@ -1206,7 +1206,7 @@ static void bfin_mac_rx(struct bfin_mac_local *lp)
        /* reserve 2 bytes for RXDWA padding */
        skb_reserve(new_skb, NET_IP_ALIGN);
        /* Invalidate the data cache of skb->data range when it is write back
-        * cache. It will prevent overwritting the new data from DMA
+        * cache. It will prevent overwriting the new data from DMA
         */
        blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
                                         (unsigned long)new_skb->end);
index 0ee6e208aa07eca9d7111666d89e05e042e94fa4..50d88d3e03b68af9bb09f11b2aec09e7d2977c80 100644 (file)
@@ -817,7 +817,7 @@ static void bcm_enet_adjust_phy_link(struct net_device *dev)
                        rx_pause_en = 1;
                        tx_pause_en = 1;
                } else if (!priv->pause_auto) {
-                       /* pause setting overrided by user */
+                       /* pause setting overridden by user */
                        rx_pause_en = priv->pause_rx;
                        tx_pause_en = priv->pause_tx;
                } else {
index 05356efdbf93ee59690a403f34ee3a4765f76890..b209b7f6093e29da98be58312d9c375c04125ab3 100644 (file)
@@ -6957,7 +6957,7 @@ int bnx2x_link_update(struct link_params *params, struct link_vars *vars)
                         * hence its link is expected to be down
                         * - SECOND_PHY means that first phy should not be able
                         * to link up by itself (using configuration)
-                        * - DEFAULT should be overriden during initialiazation
+                        * - DEFAULT should be overridden during initialization
                         */
                                DP(NETIF_MSG_LINK, "Invalid link indication"
                                           "mpc=0x%x. DISABLING LINK !!!\n",
index 016d481c6476eddf9fa1586ff8b32df94bae1cc6..30606b11b128e9d169f421cacbe6b80b962ed821 100644 (file)
@@ -1622,7 +1622,7 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
                }
        }
 
-       netdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%Zu]\n",
+       netdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%zu]\n",
                   bp->dev->mtu, bp->rx_buffer_size);
 }
 
index acc231293e4d342c07b98f529e83956e4d727177..f6e739da7bb77a55f09e3121a279286034a0e7e1 100644 (file)
@@ -1416,7 +1416,7 @@ static unsigned int xdigit2int(unsigned char c)
  * <pattern data>[/<pattern mask>][@<anchor>]
  *
  * Up to 2 filter patterns can be specified.  If 2 are supplied the first one
- * must be anchored at 0.  An omited mask is taken as a mask of 1s, an omitted
+ * must be anchored at 0.  An omitted mask is taken as a mask of 1s, an omitted
  * anchor is taken as 0.
  */
 static ssize_t mps_trc_write(struct file *file, const char __user *buf,
index cbbf8648307a1a4f14d77b39d469484b0913d99d..78460c52b7c445112cd777385f0e81058f26e918 100644 (file)
@@ -847,9 +847,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
                wr32(hw, reg_idx, reg);
                i40e_flush(hw);
        }
-       /* reset some of the state varibles keeping
-        * track of the resources
-        */
+       /* reset some of the state variables keeping track of the resources */
        vf->num_queue_pairs = 0;
        vf->vf_states = 0;
        clear_bit(I40E_VF_STAT_INIT, &vf->vf_states);
index 2788a5409023ef41ad9ba62620acbc398b96d490..68812d783f33e02cb34ec0a8bcc25f9eba72d354 100644 (file)
@@ -294,7 +294,7 @@ s32 igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data)
        u32 i, i2ccmd = 0;
        u16 phy_data_swapped;
 
-       /* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/
+       /* Prevent overwriting SFP I2C EEPROM which is at A0 address.*/
        if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) {
                hw_dbg("PHY I2C Address %d is out of range.\n",
                          hw->phy.addr);
index 30535e6b68f08c9fc4df2988b0316de5c91e2fcf..c8ac46049f3430ae07712f151c5043391f0059ad 100644 (file)
@@ -1449,7 +1449,7 @@ do { \
  *  @atr_input: input bitstream to compute the hash on
  *  @input_mask: mask for the input bitstream
  *
- *  This function serves two main purposes.  First it applys the input_mask
+ *  This function serves two main purposes.  First it applies the input_mask
  *  to the atr_input resulting in a cleaned up atr_input data stream.
  *  Secondly it computes the hash and stores it in the bkt_hash field at
  *  the end of the input byte stream.  This way it will be available for
index d7ac22d7f94029dc6f48d8746049f2014453b6a1..bd8de6b9be718f967ca6967a06c00be21d2e3b6c 100644 (file)
@@ -441,30 +441,40 @@ static int
 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
                           struct mlxsw_sp_prefix_usage *req_prefix_usage)
 {
-       struct mlxsw_sp_lpm_tree *lpm_tree;
+       struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree;
+       struct mlxsw_sp_lpm_tree *new_tree;
+       int err;
 
-       if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
-                                    &vr->lpm_tree->prefix_usage))
+       if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
                return 0;
 
-       lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
+       new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
                                         vr->proto, false);
-       if (IS_ERR(lpm_tree)) {
+       if (IS_ERR(new_tree)) {
                /* We failed to get a tree according to the required
                 * prefix usage. However, the current tree might be still good
                 * for us if our requirement is subset of the prefixes used
                 * in the tree.
                 */
                if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
-                                                &vr->lpm_tree->prefix_usage))
+                                                &lpm_tree->prefix_usage))
                        return 0;
-               return PTR_ERR(lpm_tree);
+               return PTR_ERR(new_tree);
        }
 
-       mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
-       mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+       /* Prevent packet loss by overwriting existing binding */
+       vr->lpm_tree = new_tree;
+       err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+       if (err)
+               goto err_tree_bind;
+       mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+
+       return 0;
+
+err_tree_bind:
        vr->lpm_tree = lpm_tree;
-       return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+       mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
+       return err;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
index ee38c18c2d2dcc3c3e468573194de7b87a4d52ac..ee1c78abab0bf323c6aed4b253376dfe22e6314e 100644 (file)
@@ -1251,10 +1251,10 @@ struct ksz_port_info {
  * @tx_size:           Transmit data size.  Used for TX optimization.
  *                     The maximum is defined by MAX_TX_HELD_SIZE.
  * @perm_addr:         Permanent MAC address.
- * @override_addr:     Overrided MAC address.
+ * @override_addr:     Overridden MAC address.
  * @address:           Additional MAC address entries.
  * @addr_list_size:    Additional MAC address list size.
- * @mac_override:      Indication of MAC address overrided.
+ * @mac_override:      Indication of MAC address overridden.
  * @promiscuous:       Counter to keep track of promiscuous mode set.
  * @all_multi:         Counter to keep track of all multicast mode set.
  * @multi_list:                Multicast address entries.
@@ -4042,7 +4042,7 @@ static int empty_addr(u8 *addr)
  * @hw:        The hardware instance.
  *
  * This routine programs the MAC address of the hardware when the address is
- * overrided.
+ * overridden.
  */
 static void hw_set_addr(struct ksz_hw *hw)
 {
@@ -7043,7 +7043,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id)
        if (macaddr[0] != ':')
                get_mac_addr(hw_priv, macaddr, MAIN_PORT);
 
-       /* Read MAC address and initialize override address if not overrided. */
+       /* Read MAC address and initialize override address if not overridden. */
        hw_read_addr(hw);
 
        /* Multiple device interfaces mode requires a second MAC address. */
index 61a9cd5be49734ce02f7a437763d267949707175..00c17fa6545bd5752a427e3660b062dc26ba57db 100644 (file)
@@ -688,7 +688,9 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
 #define OOO_LB_TC 9
 
 int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate);
-void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate);
+void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
+                                        struct qed_ptt *p_ptt,
+                                        u32 min_pf_rate);
 
 void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
index d6c5a8165b5f42a9ec82a7a9f12f4aacf3f56bbd..e2a081ceaf520c429b90e1fcc1e2b6cb7d3b10aa 100644 (file)
@@ -3198,7 +3198,8 @@ int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate)
 }
 
 /* API to configure WFQ from mcp link change */
-void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate)
+void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
+                                        struct qed_ptt *p_ptt, u32 min_pf_rate)
 {
        int i;
 
@@ -3212,8 +3213,7 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate)
        for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
-               __qed_configure_vp_wfq_on_link_change(p_hwfn,
-                                                     p_hwfn->p_dpc_ptt,
+               __qed_configure_vp_wfq_on_link_change(p_hwfn, p_ptt,
                                                      min_pf_rate);
        }
 }
index 314022df34694758d524554caccb3e2d2f9ec3ba..87fde205149fdbf3181befd79ca62508b2daa388 100644 (file)
@@ -679,7 +679,8 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 
        /* Min bandwidth configuration */
        __qed_configure_pf_min_bandwidth(p_hwfn, p_ptt, p_link, min_bw);
-       qed_configure_vp_wfq_on_link_change(p_hwfn->cdev, p_link->min_pf_rate);
+       qed_configure_vp_wfq_on_link_change(p_hwfn->cdev, p_ptt,
+                                           p_link->min_pf_rate);
 
        p_link->an = !!(status & LINK_STATUS_AUTO_NEGOTIATE_ENABLED);
        p_link->an_complete = !!(status &
index 29ed785f1dc22699962ea15904118c3fee314207..253c2bbe1e4e1a705e52054b4d3faa199fd2ca93 100644 (file)
@@ -3014,8 +3014,7 @@ cleanup:
                ack_vfs[vfid / 32] |= BIT((vfid % 32));
                p_hwfn->pf_iov_info->pending_flr[rel_vf_id / 64] &=
                    ~(1ULL << (rel_vf_id % 64));
-               p_hwfn->pf_iov_info->pending_events[rel_vf_id / 64] &=
-                   ~(1ULL << (rel_vf_id % 64));
+               p_vf->vf_mbx.b_pending_msg = false;
        }
 
        return rc;
@@ -3128,11 +3127,20 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
        mbx = &p_vf->vf_mbx;
 
        /* qed_iov_process_mbx_request */
-       DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-                  "VF[%02x]: Processing mailbox message\n", p_vf->abs_vf_id);
+       if (!mbx->b_pending_msg) {
+               DP_NOTICE(p_hwfn,
+                         "VF[%02x]: Trying to process mailbox message when none is pending\n",
+                         p_vf->abs_vf_id);
+               return;
+       }
+       mbx->b_pending_msg = false;
 
        mbx->first_tlv = mbx->req_virt->first_tlv;
 
+       DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+                  "VF[%02x]: Processing mailbox message [type %04x]\n",
+                  p_vf->abs_vf_id, mbx->first_tlv.tl.type);
+
        /* check if tlv type is known */
        if (qed_iov_tlv_supported(mbx->first_tlv.tl.type) &&
            !p_vf->b_malicious) {
@@ -3219,20 +3227,19 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
        }
 }
 
-static void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid)
+void qed_iov_pf_get_pending_events(struct qed_hwfn *p_hwfn, u64 *events)
 {
-       u64 add_bit = 1ULL << (vfid % 64);
+       int i;
 
-       p_hwfn->pf_iov_info->pending_events[vfid / 64] |= add_bit;
-}
+       memset(events, 0, sizeof(u64) * QED_VF_ARRAY_LENGTH);
 
-static void qed_iov_pf_get_and_clear_pending_events(struct qed_hwfn *p_hwfn,
-                                                   u64 *events)
-{
-       u64 *p_pending_events = p_hwfn->pf_iov_info->pending_events;
+       qed_for_each_vf(p_hwfn, i) {
+               struct qed_vf_info *p_vf;
 
-       memcpy(events, p_pending_events, sizeof(u64) * QED_VF_ARRAY_LENGTH);
-       memset(p_pending_events, 0, sizeof(u64) * QED_VF_ARRAY_LENGTH);
+               p_vf = &p_hwfn->pf_iov_info->vfs_array[i];
+               if (p_vf->vf_mbx.b_pending_msg)
+                       events[i / 64] |= 1ULL << (i % 64);
+       }
 }
 
 static struct qed_vf_info *qed_sriov_get_vf_from_absid(struct qed_hwfn *p_hwfn,
@@ -3266,7 +3273,7 @@ static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn,
        p_vf->vf_mbx.pending_req = (((u64)vf_msg->hi) << 32) | vf_msg->lo;
 
        /* Mark the event and schedule the workqueue */
-       qed_iov_pf_add_pending_events(p_hwfn, p_vf->relative_vf_id);
+       p_vf->vf_mbx.b_pending_msg = true;
        qed_schedule_iov(p_hwfn, QED_IOV_WQ_MSG_FLAG);
 
        return 0;
@@ -4030,7 +4037,7 @@ static void qed_handle_vf_msg(struct qed_hwfn *hwfn)
                return;
        }
 
-       qed_iov_pf_get_and_clear_pending_events(hwfn, events);
+       qed_iov_pf_get_pending_events(hwfn, events);
 
        DP_VERBOSE(hwfn, QED_MSG_IOV,
                   "Event mask of VF events: 0x%llx 0x%llx 0x%llx\n",
index fc08cc2da6a7886d9e27a1f8e1743e4dcf73a16f..a89605821522d528411f711bbb0755c0ae003e5a 100644 (file)
@@ -140,6 +140,9 @@ struct qed_iov_vf_mbx {
        /* Address in VF where a pending message is located */
        dma_addr_t pending_req;
 
+       /* Message from VF awaits handling */
+       bool b_pending_msg;
+
        u8 *offset;
 
        /* saved VF request header */
@@ -232,7 +235,6 @@ struct qed_vf_info {
  */
 struct qed_pf_iov {
        struct qed_vf_info vfs_array[MAX_NUM_VFS];
-       u64 pending_events[QED_VF_ARRAY_LENGTH];
        u64 pending_flr[QED_VF_ARRAY_LENGTH];
 
        /* Allocate message address continuosuly and split to each VF */
index 99b187bfdd55e6e305cb7ffe5fd8eb78ffec2209..718bf58a7da66284e121f3aab8e79de7a6221c67 100644 (file)
@@ -178,7 +178,7 @@ const u32 qlcnic_83xx_reg_tbl[] = {
        0x3540,         /* Device state, DRV_REG1 */
        0x3544,         /* Driver state, DRV_REG2 */
        0x3548,         /* Driver scratch, DRV_REG3 */
-       0x354C,         /* Device partiton info, DRV_REG4 */
+       0x354C,         /* Device partition info, DRV_REG4 */
        0x3524,         /* Driver IDC ver, DRV_REG5 */
        0x3550,         /* FW_VER_MAJOR */
        0x3554,         /* FW_VER_MINOR */
index 47ced8a898ca305885136acc4c54dd8b95786a11..91fb54fd03d9b44e98202e188eb29bf4db5b8972 100644 (file)
 
 /***********************************/
 /* MC_CMD_GET_LICENSED_V3_FEATURE_STATES
- * Query the state of an one or more licensed features. (Note that the actual
+ * Query the state of one or more licensed features. (Note that the actual
  * state may be invalidated by the MC_CMD_LICENSING_V3 OP_UPDATE_LICENSE
  * operation or a reboot of the MC.) Used for V3 licensing (Medford)
  */
index 19a458716f1ae11cb6e27100834771d0ff5fc6f6..1b6f6171d0788e74b939622d284880f5c083742e 100644 (file)
@@ -176,7 +176,7 @@ struct sis900_private {
 
        u32 msg_enable;
 
-       unsigned int cur_rx, dirty_rx; /* producer/comsumer pointers for Tx/Rx ring */
+       unsigned int cur_rx, dirty_rx; /* producer/consumer pointers for Tx/Rx ring */
        unsigned int cur_tx, dirty_tx;
 
        /* The saved address of a sent/receive-in-place packet buffer */
index 45301cb98bc1c279a760e2b19cd1ed32b4659a9f..7074b40ebd7f8e8cb0bcf0def7085fd91b8b45d6 100644 (file)
@@ -881,12 +881,14 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
                info = &geneve->info;
        }
 
+       rcu_read_lock();
 #if IS_ENABLED(CONFIG_IPV6)
        if (info->mode & IP_TUNNEL_INFO_IPV6)
                err = geneve6_xmit_skb(skb, dev, geneve, info);
        else
 #endif
                err = geneve_xmit_skb(skb, dev, geneve, info);
+       rcu_read_unlock();
 
        if (likely(!err))
                return NETDEV_TX_OK;
index bda0c64134508cf5dd786d3f698bd45884dcce57..89698741682f41e2ea70868715ffcb4f4784e258 100644 (file)
@@ -1330,7 +1330,7 @@ static int __init gtp_init(void)
        if (err < 0)
                goto unreg_genl_family;
 
-       pr_info("GTP module loaded (pdp ctx size %Zd bytes)\n",
+       pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
                sizeof(struct pdp_ctx));
        return 0;
 
index 6e98ede997d3f08d4ac3fa967382b55e4dfc287d..0dd510604118bc8c26c5ec9a84410edbe16a4d8d 100644 (file)
@@ -346,7 +346,7 @@ static int ax88772_reset(struct usbnet *dev)
        if (ret < 0)
                goto out;
 
-       asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0);
+       ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0);
        if (ret < 0)
                goto out;
 
index 3e37724d30ae7efa2153f53fab3b21dc6cac5af7..8aefb282c862c3bb4d474d236d70b15c44a2fa1b 100644 (file)
@@ -343,7 +343,7 @@ static const struct driver_info kalmia_info = {
 static const struct usb_device_id products[] = {
        /* The unswitched USB ID, to get the module auto loaded: */
        { USB_DEVICE(0x04e8, 0x689a) },
-       /* The stick swithed into modem (by e.g. usb_modeswitch): */
+       /* The stick switched into modem (by e.g. usb_modeswitch): */
        { USB_DEVICE(0x04e8, 0x6889),
                .driver_info = (unsigned long) &kalmia_info, },
        { /* EMPTY == end of list */} };
index 4f4f71b2966ba50760f8309efdc919106d9f8276..c5b21138b7eb1723579528532deb1b7ea9c2aa72 100644 (file)
@@ -383,7 +383,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags)
 
        /* REVISIT:  peripheral "alignment" request is ignored ... */
        dev_dbg(&intf->dev,
-               "hard mtu %u (%u from dev), rx buflen %Zu, align %d\n",
+               "hard mtu %u (%u from dev), rx buflen %zu, align %d\n",
                dev->hard_mtu, tmp, dev->rx_urb_size,
                1 << le32_to_cpu(u.init_c->packet_alignment));
 
index d9440bc022f2c40d965f6a6dd804f7ba74944cbc..ac69f28d92d2360ddf3fc4ace031046db8d7d39d 100644 (file)
@@ -379,7 +379,7 @@ static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen)
        u32 expected_length;
 
        if (datalen < sizeof(struct lsi_umts_single)) {
-               netdev_err(dev->net, "%s: Data length %d, exp >= %Zu\n",
+               netdev_err(dev->net, "%s: Data length %d, exp >= %zu\n",
                           __func__, datalen, sizeof(struct lsi_umts_single));
                return -1;
        }
index b7911994112aebecc691117e15ab32a7c238fdf9..e375560cc74e5ffc09553ddab5c6b657fe1cb6f0 100644 (file)
@@ -2105,6 +2105,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
        src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
                                     vxlan->cfg.port_max, true);
 
+       rcu_read_lock();
        if (dst->sa.sa_family == AF_INET) {
                struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
                struct rtable *rt;
@@ -2127,7 +2128,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                                    dst_port, vni, &rt->dst,
                                                    rt->rt_flags);
                        if (err)
-                               return;
+                               goto out_unlock;
                } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
                        df = htons(IP_DF);
                }
@@ -2166,7 +2167,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                                    dst_port, vni, ndst,
                                                    rt6i_flags);
                        if (err)
-                               return;
+                               goto out_unlock;
                }
 
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
@@ -2183,6 +2184,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                     label, src_port, dst_port, !udp_sum);
 #endif
        }
+out_unlock:
+       rcu_read_unlock();
        return;
 
 drop:
@@ -2191,6 +2194,7 @@ drop:
        return;
 
 tx_error:
+       rcu_read_unlock();
        if (err == -ELOOP)
                dev->stats.collisions++;
        else if (err == -ENETUNREACH)
index e74664b84925e9997072bc033c6938c8b0231a09..502c346aa790bf4f79218d816420f9997b69df3e 100644 (file)
@@ -237,7 +237,7 @@ void __i2400mu_bm_notif_cb(struct urb *urb)
  *
  * @i2400m: device descriptor
  * @urb: urb to use
- * @completion: completion varible to complete when done
+ * @completion: completion variable to complete when done
  *
  * Data is always read to i2400m->bm_ack_buf
  */
index 815efe9fd208fab95984b04bd84b088e6407576f..5214dd7a3936311e1bb93473199b602fbb9954aa 100644 (file)
@@ -59,13 +59,13 @@ static const struct ani_ofdm_level_entry ofdm_level_table[] = {
 /*
  * MRC (Maximal Ratio Combining) has always been used with multi-antenna ofdm.
  * With OFDM for single stream you just add up all antenna inputs, you're
- * only interested in what you get after FFT. Signal aligment is also not
+ * only interested in what you get after FFT. Signal alignment is also not
  * required for OFDM because any phase difference adds up in the frequency
  * domain.
  *
  * MRC requires extra work for use with CCK. You need to align the antenna
  * signals from the different antenna before you can add the signals together.
- * You need aligment of signals as CCK is in time domain, so addition can cancel
+ * You need alignment of signals as CCK is in time domain, so addition can cancel
  * your signal completely if phase is 180 degrees (think of adding sine waves).
  * You also need to remove noise before the addition and this is where ANI
  * MRC CCK comes into play. One of the antenna inputs may be stronger but
index 10098b7586f3c95de1d2a5a5abc4396f5b13e319..944b83cfc51978d06ccc82a9f23ce64472af3fc8 100644 (file)
@@ -4874,7 +4874,7 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
                kfree(af_params);
        } else {
                brcmf_dbg(TRACE, "Unhandled, fc=%04x!!\n", mgmt->frame_control);
-               brcmf_dbg_hex_dump(true, buf, len, "payload, len=%Zu\n", len);
+               brcmf_dbg_hex_dump(true, buf, len, "payload, len=%zu\n", len);
        }
 
 exit:
index 356aba9d3d538a3029431e44b99dd6e4ed89529a..f922859acf40a5beecbb30d524ddd24b1372a1b7 100644 (file)
@@ -1238,7 +1238,7 @@ static int ipw2100_get_hw_features(struct ipw2100_priv *priv)
 }
 
 /*
- * Start firmware execution after power on and intialization
+ * Start firmware execution after power on and initialization
  * The sequence is:
  *  1. Release ARC
  *  2. Wait for f/w initialization completes;
@@ -1277,7 +1277,7 @@ static int ipw2100_start_adapter(struct ipw2100_priv *priv)
        /* Release ARC - clear reset bit */
        write_register(priv->net_dev, IPW_REG_RESET_REG, 0);
 
-       /* wait for f/w intialization complete */
+       /* wait for f/w initialization complete */
        IPW_DEBUG_FW("Waiting for f/w initialization to complete...\n");
        i = 5000;
        do {
@@ -5652,7 +5652,7 @@ static void shim__set_security(struct net_device *dev,
 
 /* As a temporary work around to enable WPA until we figure out why
  * wpa_supplicant toggles the security capability of the driver, which
- * forces a disassocation with force_update...
+ * forces a disassociation with force_update...
  *
  *     if (force_update || !(priv->status & STATUS_ASSOCIATED))*/
        if (!(priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING)))
index ef9af8a29cad5172c24871a7cb2746a57ccf8cbb..5ef3c5cc47c5f75e4aee7c727f4ae08e7225c98f 100644 (file)
@@ -3974,7 +3974,7 @@ static void ipw_send_disassociate(struct ipw_priv *priv, int quiet)
                return;
        }
 
-       IPW_DEBUG_ASSOC("Disassocation attempt from %pM "
+       IPW_DEBUG_ASSOC("Disassociation attempt from %pM "
                        "on channel %d.\n",
                        priv->assoc_request.bssid,
                        priv->assoc_request.channel);
@@ -5196,7 +5196,7 @@ static void ipw_rx_queue_restock(struct ipw_priv *priv)
  * Move all used packet from rx_used to rx_free, allocating a new SKB for each.
  * Also restock the Rx queue via ipw_rx_queue_restock.
  *
- * This is called as a scheduled work item (except for during intialization)
+ * This is called as a scheduled work item (except for during initialization)
  */
 static void ipw_rx_queue_replenish(void *data)
 {
index a91d170a614b63181d6ca2c45a7038da2cb4fb4c..2781f5728d0768169276d380d625695f7513e917 100644 (file)
@@ -4855,39 +4855,39 @@ il4965_ucode_callback(const struct firmware *ucode_raw, void *context)
         */
 
        D_INFO("f/w package hdr ucode version raw = 0x%x\n", il->ucode_ver);
-       D_INFO("f/w package hdr runtime inst size = %Zd\n", pieces.inst_size);
-       D_INFO("f/w package hdr runtime data size = %Zd\n", pieces.data_size);
-       D_INFO("f/w package hdr init inst size = %Zd\n", pieces.init_size);
-       D_INFO("f/w package hdr init data size = %Zd\n", pieces.init_data_size);
-       D_INFO("f/w package hdr boot inst size = %Zd\n", pieces.boot_size);
+       D_INFO("f/w package hdr runtime inst size = %zd\n", pieces.inst_size);
+       D_INFO("f/w package hdr runtime data size = %zd\n", pieces.data_size);
+       D_INFO("f/w package hdr init inst size = %zd\n", pieces.init_size);
+       D_INFO("f/w package hdr init data size = %zd\n", pieces.init_data_size);
+       D_INFO("f/w package hdr boot inst size = %zd\n", pieces.boot_size);
 
        /* Verify that uCode images will fit in card's SRAM */
        if (pieces.inst_size > il->hw_params.max_inst_size) {
-               IL_ERR("uCode instr len %Zd too large to fit in\n",
+               IL_ERR("uCode instr len %zd too large to fit in\n",
                       pieces.inst_size);
                goto try_again;
        }
 
        if (pieces.data_size > il->hw_params.max_data_size) {
-               IL_ERR("uCode data len %Zd too large to fit in\n",
+               IL_ERR("uCode data len %zd too large to fit in\n",
                       pieces.data_size);
                goto try_again;
        }
 
        if (pieces.init_size > il->hw_params.max_inst_size) {
-               IL_ERR("uCode init instr len %Zd too large to fit in\n",
+               IL_ERR("uCode init instr len %zd too large to fit in\n",
                       pieces.init_size);
                goto try_again;
        }
 
        if (pieces.init_data_size > il->hw_params.max_data_size) {
-               IL_ERR("uCode init data len %Zd too large to fit in\n",
+               IL_ERR("uCode init data len %zd too large to fit in\n",
                       pieces.init_data_size);
                goto try_again;
        }
 
        if (pieces.boot_size > il->hw_params.max_bsm_size) {
-               IL_ERR("uCode boot instr len %Zd too large to fit in\n",
+               IL_ERR("uCode boot instr len %zd too large to fit in\n",
                       pieces.boot_size);
                goto try_again;
        }
@@ -4938,7 +4938,7 @@ il4965_ucode_callback(const struct firmware *ucode_raw, void *context)
        /* Copy images into buffers for card's bus-master reads ... */
 
        /* Runtime instructions (first block of data in file) */
-       D_INFO("Copying (but not loading) uCode instr len %Zd\n",
+       D_INFO("Copying (but not loading) uCode instr len %zd\n",
               pieces.inst_size);
        memcpy(il->ucode_code.v_addr, pieces.inst, pieces.inst_size);
 
@@ -4949,28 +4949,28 @@ il4965_ucode_callback(const struct firmware *ucode_raw, void *context)
         * Runtime data
         * NOTE:  Copy into backup buffer will be done in il_up()
         */
-       D_INFO("Copying (but not loading) uCode data len %Zd\n",
+       D_INFO("Copying (but not loading) uCode data len %zd\n",
               pieces.data_size);
        memcpy(il->ucode_data.v_addr, pieces.data, pieces.data_size);
        memcpy(il->ucode_data_backup.v_addr, pieces.data, pieces.data_size);
 
        /* Initialization instructions */
        if (pieces.init_size) {
-               D_INFO("Copying (but not loading) init instr len %Zd\n",
+               D_INFO("Copying (but not loading) init instr len %zd\n",
                       pieces.init_size);
                memcpy(il->ucode_init.v_addr, pieces.init, pieces.init_size);
        }
 
        /* Initialization data */
        if (pieces.init_data_size) {
-               D_INFO("Copying (but not loading) init data len %Zd\n",
+               D_INFO("Copying (but not loading) init data len %zd\n",
                       pieces.init_data_size);
                memcpy(il->ucode_init_data.v_addr, pieces.init_data,
                       pieces.init_data_size);
        }
 
        /* Bootstrap instructions */
-       D_INFO("Copying (but not loading) boot instr len %Zd\n",
+       D_INFO("Copying (but not loading) boot instr len %zd\n",
               pieces.boot_size);
        memcpy(il->ucode_boot.v_addr, pieces.boot, pieces.boot_size);
 
index 0e0293d42b5d3464586e12cae7a7741d54d16ed0..be466a074c1df8ad351c2b1ce9142a13955fae47 100644 (file)
@@ -1141,21 +1141,21 @@ static int validate_sec_sizes(struct iwl_drv *drv,
                              struct iwl_firmware_pieces *pieces,
                              const struct iwl_cfg *cfg)
 {
-       IWL_DEBUG_INFO(drv, "f/w package hdr runtime inst size = %Zd\n",
+       IWL_DEBUG_INFO(drv, "f/w package hdr runtime inst size = %zd\n",
                get_sec_size(pieces, IWL_UCODE_REGULAR,
                             IWL_UCODE_SECTION_INST));
-       IWL_DEBUG_INFO(drv, "f/w package hdr runtime data size = %Zd\n",
+       IWL_DEBUG_INFO(drv, "f/w package hdr runtime data size = %zd\n",
                get_sec_size(pieces, IWL_UCODE_REGULAR,
                             IWL_UCODE_SECTION_DATA));
-       IWL_DEBUG_INFO(drv, "f/w package hdr init inst size = %Zd\n",
+       IWL_DEBUG_INFO(drv, "f/w package hdr init inst size = %zd\n",
                get_sec_size(pieces, IWL_UCODE_INIT, IWL_UCODE_SECTION_INST));
-       IWL_DEBUG_INFO(drv, "f/w package hdr init data size = %Zd\n",
+       IWL_DEBUG_INFO(drv, "f/w package hdr init data size = %zd\n",
                get_sec_size(pieces, IWL_UCODE_INIT, IWL_UCODE_SECTION_DATA));
 
        /* Verify that uCode images will fit in card's SRAM. */
        if (get_sec_size(pieces, IWL_UCODE_REGULAR, IWL_UCODE_SECTION_INST) >
            cfg->max_inst_size) {
-               IWL_ERR(drv, "uCode instr len %Zd too large to fit in\n",
+               IWL_ERR(drv, "uCode instr len %zd too large to fit in\n",
                        get_sec_size(pieces, IWL_UCODE_REGULAR,
                                     IWL_UCODE_SECTION_INST));
                return -1;
@@ -1163,7 +1163,7 @@ static int validate_sec_sizes(struct iwl_drv *drv,
 
        if (get_sec_size(pieces, IWL_UCODE_REGULAR, IWL_UCODE_SECTION_DATA) >
            cfg->max_data_size) {
-               IWL_ERR(drv, "uCode data len %Zd too large to fit in\n",
+               IWL_ERR(drv, "uCode data len %zd too large to fit in\n",
                        get_sec_size(pieces, IWL_UCODE_REGULAR,
                                     IWL_UCODE_SECTION_DATA));
                return -1;
@@ -1171,7 +1171,7 @@ static int validate_sec_sizes(struct iwl_drv *drv,
 
        if (get_sec_size(pieces, IWL_UCODE_INIT, IWL_UCODE_SECTION_INST) >
             cfg->max_inst_size) {
-               IWL_ERR(drv, "uCode init instr len %Zd too large to fit in\n",
+               IWL_ERR(drv, "uCode init instr len %zd too large to fit in\n",
                        get_sec_size(pieces, IWL_UCODE_INIT,
                                     IWL_UCODE_SECTION_INST));
                return -1;
@@ -1179,7 +1179,7 @@ static int validate_sec_sizes(struct iwl_drv *drv,
 
        if (get_sec_size(pieces, IWL_UCODE_INIT, IWL_UCODE_SECTION_DATA) >
            cfg->max_data_size) {
-               IWL_ERR(drv, "uCode init data len %Zd too large to fit in\n",
+               IWL_ERR(drv, "uCode init data len %zd too large to fit in\n",
                        get_sec_size(pieces, IWL_UCODE_REGULAR,
                                     IWL_UCODE_SECTION_DATA));
                return -1;
index abdd0cf710bf6df2440595dfbe879debf208be9a..fac28bd8fbee49bac450a7af32b7c6c71b4e95b6 100644 (file)
@@ -346,9 +346,7 @@ void mwifiex_parse_tx_status_event(struct mwifiex_private *priv,
                return;
 
        spin_lock_irqsave(&priv->ack_status_lock, flags);
-       ack_skb = idr_find(&priv->ack_status_frames, tx_status->tx_token_id);
-       if (ack_skb)
-               idr_remove(&priv->ack_status_frames, tx_status->tx_token_id);
+       ack_skb = idr_remove(&priv->ack_status_frames, tx_status->tx_token_id);
        spin_unlock_irqrestore(&priv->ack_status_lock, flags);
 
        if (ack_skb) {
index 28c2f6fae3e644d0787f8a1f9117e7e867f04e2b..e4ff3b9738505f2077452cf65790cd68bee3652e 100644 (file)
@@ -673,8 +673,8 @@ void mwifiex_update_ralist_tx_pause(struct mwifiex_private *priv, u8 *mac,
        spin_unlock_irqrestore(&priv->wmm.ra_list_spinlock, flags);
 }
 
-/* This function update non-tdls peer ralist tx_pause while
- * tdls channel swithing
+/* This function updates non-tdls peer ralist tx_pause while
+ * tdls channel switching
  */
 void mwifiex_update_ralist_tx_pause_in_tdls_cs(struct mwifiex_private *priv,
                                               u8 *mac, u8 tx_pause)
index 1922e78ad6bdd7a7cd7390907db7ebb48d70b11e..89a0a28b8b202bc0c978ecb536a8bbc20258e8bb 100644 (file)
@@ -455,7 +455,7 @@ static u32 _rtl92s_fill_h2c_cmd(struct sk_buff *skb, u32 h2cbufferlen,
        u8 i = 0;
 
        do {
-               /* 8 - Byte aligment */
+               /* 8 - Byte alignment */
                len = H2C_TX_CMD_HDR_LEN + N_BYTE_ALIGMENT(pcmd_len[i], 8);
 
                /* Buffer length is not enough */
@@ -504,7 +504,7 @@ static u32 _rtl92s_get_h2c_cmdlen(u32 h2cbufferlen, u32 cmd_num, u32 *pcmd_len)
        u8 i = 0;
 
        do {
-               /* 8 - Byte aligment */
+               /* 8 - Byte alignment */
                len = H2C_TX_CMD_HDR_LEN + N_BYTE_ALIGMENT(pcmd_len[i], 8);
 
                /* Buffer length is not enough */
index ef5d394f185bfb46cc24c9170eecf4f6cee4dab8..cc8deecea8cbeeebbd7bc9216d7698a175ba8cae 100644 (file)
@@ -516,7 +516,7 @@ err:
 
 /**
  * rsi_disconnect() - This function performs the reverse of the probe function,
- *                   it deintialize the driver structure.
+ *                   it deinitialize the driver structure.
  * @pfunction: Pointer to the USB interface structure.
  *
  * Return: None.
index 5bdf7a03e3ddb578023dda6969113199701fe0a0..d1aa3eee0e81f8cc7612eddabc4cf630dbbd1e79 100644 (file)
@@ -178,7 +178,7 @@ static struct wlcore_conf wl18xx_conf = {
        .sg = {
                .params = {
                        [WL18XX_CONF_SG_PARAM_0] = 0,
-                       /* Configuartion Parameters */
+                       /* Configuration Parameters */
                        [WL18XX_CONF_SG_ANTENNA_CONFIGURATION] = 0,
                        [WL18XX_CONF_SG_ZIGBEE_COEX] = 0,
                        [WL18XX_CONF_SG_TIME_SYNC] = 0,
index d0b7734030ef70ddc8c2bc9b124021fca187834b..58898b99d3f74ada971d9f614733b61d13dc69bd 100644 (file)
@@ -544,7 +544,7 @@ static int wl12xx_init_sta_role(struct wl1271 *wl, struct wl12xx_vif *wlvif)
        return 0;
 }
 
-/* vif-specific intialization */
+/* vif-specific initialization */
 static int wl12xx_init_ap_role(struct wl1271 *wl, struct wl12xx_vif *wlvif)
 {
        int ret;
index d9c55830b2b26a1fb67b41562b3b16c0b09f8830..a966c6a85ea869384d295fe6468cbd3e56d02b6f 100644 (file)
@@ -487,7 +487,7 @@ static int pn533_send_cmd_async(struct pn533 *dev, u8 cmd_code,
 /*
  * pn533_send_cmd_direct_async
  *
- * The function sends a piority cmd directly to the chip omiting the cmd
+ * The function sends a piority cmd directly to the chip omitting the cmd
  * queue. It's intended to be used by chaining mechanism of received responses
  * where the host has to request every single chunk of data before scheduling
  * next cmd from the queue.
index 44a1a257e0b598738765ab7001e8ec862a00cdc0..25ec4e58522058f70a302ad02811abccf5cb4e1a 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/ptrace.h>
 #include <linux/nvme_ioctl.h>
 #include <linux/t10-pi.h>
+#include <linux/pm_qos.h>
 #include <scsi/sg.h>
 #include <asm/unaligned.h>
 
@@ -56,6 +57,11 @@ EXPORT_SYMBOL_GPL(nvme_max_retries);
 static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
 
+static unsigned long default_ps_max_latency_us = 25000;
+module_param(default_ps_max_latency_us, ulong, 0644);
+MODULE_PARM_DESC(default_ps_max_latency_us,
+                "max power saving latency for new devices; use PM QOS to change per device");
+
 static LIST_HEAD(nvme_ctrl_list);
 static DEFINE_SPINLOCK(dev_list_lock);
 
@@ -560,7 +566,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 
        /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
        c.identify.opcode = nvme_admin_identify;
-       c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL);
+       c.identify.cns = NVME_ID_CNS_CTRL;
 
        *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
        if (!*id)
@@ -578,7 +584,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
        struct nvme_command c = { };
 
        c.identify.opcode = nvme_admin_identify;
-       c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST);
+       c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
        c.identify.nsid = cpu_to_le32(nsid);
        return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
 }
@@ -590,8 +596,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
        int error;
 
        /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
-       c.identify.opcode = nvme_admin_identify,
-       c.identify.nsid = cpu_to_le32(nsid),
+       c.identify.opcode = nvme_admin_identify;
+       c.identify.nsid = cpu_to_le32(nsid);
+       c.identify.cns = NVME_ID_CNS_NS;
 
        *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
        if (!*id)
@@ -1251,6 +1258,176 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
        blk_queue_write_cache(q, vwc, vwc);
 }
 
+static void nvme_configure_apst(struct nvme_ctrl *ctrl)
+{
+       /*
+        * APST (Autonomous Power State Transition) lets us program a
+        * table of power state transitions that the controller will
+        * perform automatically.  We configure it with a simple
+        * heuristic: we are willing to spend at most 2% of the time
+        * transitioning between power states.  Therefore, when running
+        * in any given state, we will enter the next lower-power
+        * non-operational state after waiting 100 * (enlat + exlat)
+        * microseconds, as long as that state's total latency is under
+        * the requested maximum latency.
+        *
+        * We will not autonomously enter any non-operational state for
+        * which the total latency exceeds ps_max_latency_us.  Users
+        * can set ps_max_latency_us to zero to turn off APST.
+        */
+
+       unsigned apste;
+       struct nvme_feat_auto_pst *table;
+       int ret;
+
+       /*
+        * If APST isn't supported or if we haven't been initialized yet,
+        * then don't do anything.
+        */
+       if (!ctrl->apsta)
+               return;
+
+       if (ctrl->npss > 31) {
+               dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
+               return;
+       }
+
+       table = kzalloc(sizeof(*table), GFP_KERNEL);
+       if (!table)
+               return;
+
+       if (ctrl->ps_max_latency_us == 0) {
+               /* Turn off APST. */
+               apste = 0;
+       } else {
+               __le64 target = cpu_to_le64(0);
+               int state;
+
+               /*
+                * Walk through all states from lowest- to highest-power.
+                * According to the spec, lower-numbered states use more
+                * power.  NPSS, despite the name, is the index of the
+                * lowest-power state, not the number of states.
+                */
+               for (state = (int)ctrl->npss; state >= 0; state--) {
+                       u64 total_latency_us, transition_ms;
+
+                       if (target)
+                               table->entries[state] = target;
+
+                       /*
+                        * Is this state a useful non-operational state for
+                        * higher-power states to autonomously transition to?
+                        */
+                       if (!(ctrl->psd[state].flags &
+                             NVME_PS_FLAGS_NON_OP_STATE))
+                               continue;
+
+                       total_latency_us =
+                               (u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
+                               + le32_to_cpu(ctrl->psd[state].exit_lat);
+                       if (total_latency_us > ctrl->ps_max_latency_us)
+                               continue;
+
+                       /*
+                        * This state is good.  Use it as the APST idle
+                        * target for higher power states.
+                        */
+                       transition_ms = total_latency_us + 19;
+                       do_div(transition_ms, 20);
+                       if (transition_ms > (1 << 24) - 1)
+                               transition_ms = (1 << 24) - 1;
+
+                       target = cpu_to_le64((state << 3) |
+                                            (transition_ms << 8));
+               }
+
+               apste = 1;
+       }
+
+       ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
+                               table, sizeof(*table), NULL);
+       if (ret)
+               dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
+
+       kfree(table);
+}
+
+static void nvme_set_latency_tolerance(struct device *dev, s32 val)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+       u64 latency;
+
+       switch (val) {
+       case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT:
+       case PM_QOS_LATENCY_ANY:
+               latency = U64_MAX;
+               break;
+
+       default:
+               latency = val;
+       }
+
+       if (ctrl->ps_max_latency_us != latency) {
+               ctrl->ps_max_latency_us = latency;
+               nvme_configure_apst(ctrl);
+       }
+}
+
+struct nvme_core_quirk_entry {
+       /*
+        * NVMe model and firmware strings are padded with spaces.  For
+        * simplicity, strings in the quirk table are padded with NULLs
+        * instead.
+        */
+       u16 vid;
+       const char *mn;
+       const char *fr;
+       unsigned long quirks;
+};
+
+static const struct nvme_core_quirk_entry core_quirks[] = {
+       /*
+        * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes
+        * the controller to go out to lunch.  It dies when the watchdog
+        * timer reads CSTS and gets 0xffffffff.
+        */
+       {
+               .vid = 0x144d,
+               .fr = "BXW75D0Q",
+               .quirks = NVME_QUIRK_NO_APST,
+       },
+};
+
+/* match is null-terminated but idstr is space-padded. */
+static bool string_matches(const char *idstr, const char *match, size_t len)
+{
+       size_t matchlen;
+
+       if (!match)
+               return true;
+
+       matchlen = strlen(match);
+       WARN_ON_ONCE(matchlen > len);
+
+       if (memcmp(idstr, match, matchlen))
+               return false;
+
+       for (; matchlen < len; matchlen++)
+               if (idstr[matchlen] != ' ')
+                       return false;
+
+       return true;
+}
+
+static bool quirk_matches(const struct nvme_id_ctrl *id,
+                         const struct nvme_core_quirk_entry *q)
+{
+       return q->vid == le16_to_cpu(id->vid) &&
+               string_matches(id->mn, q->mn, sizeof(id->mn)) &&
+               string_matches(id->fr, q->fr, sizeof(id->fr));
+}
+
 /*
  * Initialize the cached copies of the Identify data and various controller
  * register in our nvme_ctrl structure.  This should be called as soon as
@@ -1262,6 +1439,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        u64 cap;
        int ret, page_shift;
        u32 max_hw_sectors;
+       u8 prev_apsta;
 
        ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
        if (ret) {
@@ -1285,6 +1463,24 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                return -EIO;
        }
 
+       if (!ctrl->identified) {
+               /*
+                * Check for quirks.  Quirk can depend on firmware version,
+                * so, in principle, the set of quirks present can change
+                * across a reset.  As a possible future enhancement, we
+                * could re-scan for quirks every time we reinitialize
+                * the device, but we'd have to make sure that the driver
+                * behaves intelligently if the quirks change.
+                */
+
+               int i;
+
+               for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {
+                       if (quirk_matches(id, &core_quirks[i]))
+                               ctrl->quirks |= core_quirks[i].quirks;
+               }
+       }
+
        ctrl->oacs = le16_to_cpu(id->oacs);
        ctrl->vid = le16_to_cpu(id->vid);
        ctrl->oncs = le16_to_cpup(&id->oncs);
@@ -1305,6 +1501,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        ctrl->sgls = le32_to_cpu(id->sgls);
        ctrl->kas = le16_to_cpu(id->kas);
 
+       ctrl->npss = id->npss;
+       prev_apsta = ctrl->apsta;
+       ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta;
+       memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
+
        if (ctrl->ops->is_fabrics) {
                ctrl->icdoff = le16_to_cpu(id->icdoff);
                ctrl->ioccsz = le32_to_cpu(id->ioccsz);
@@ -1328,6 +1529,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        }
 
        kfree(id);
+
+       if (ctrl->apsta && !prev_apsta)
+               dev_pm_qos_expose_latency_tolerance(ctrl->device);
+       else if (!ctrl->apsta && prev_apsta)
+               dev_pm_qos_hide_latency_tolerance(ctrl->device);
+
+       nvme_configure_apst(ctrl);
+
+       ctrl->identified = true;
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_init_identify);
@@ -1577,6 +1788,29 @@ static ssize_t nvme_sysfs_show_transport(struct device *dev,
 }
 static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
 
+static ssize_t nvme_sysfs_show_state(struct device *dev,
+                                    struct device_attribute *attr,
+                                    char *buf)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+       static const char *const state_name[] = {
+               [NVME_CTRL_NEW]         = "new",
+               [NVME_CTRL_LIVE]        = "live",
+               [NVME_CTRL_RESETTING]   = "resetting",
+               [NVME_CTRL_RECONNECTING]= "reconnecting",
+               [NVME_CTRL_DELETING]    = "deleting",
+               [NVME_CTRL_DEAD]        = "dead",
+       };
+
+       if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
+           state_name[ctrl->state])
+               return sprintf(buf, "%s\n", state_name[ctrl->state]);
+
+       return sprintf(buf, "unknown state\n");
+}
+
+static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
+
 static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
                                         struct device_attribute *attr,
                                         char *buf)
@@ -1609,6 +1843,7 @@ static struct attribute *nvme_dev_attrs[] = {
        &dev_attr_transport.attr,
        &dev_attr_subsysnqn.attr,
        &dev_attr_address.attr,
+       &dev_attr_state.attr,
        NULL
 };
 
@@ -2065,6 +2300,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
        list_add_tail(&ctrl->node, &nvme_ctrl_list);
        spin_unlock(&dev_list_lock);
 
+       /*
+        * Initialize latency tolerance controls.  The sysfs files won't
+        * be visible to userspace unless the device actually supports APST.
+        */
+       ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance;
+       dev_pm_qos_update_user_latency_tolerance(ctrl->device,
+               min(default_ps_max_latency_us, (unsigned long)S32_MAX));
+
        return 0;
 out_release_instance:
        nvme_release_instance(ctrl);
@@ -2090,9 +2333,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
                 * Revalidating a dead namespace sets capacity to 0. This will
                 * end buffered writers dirtying pages that can't be synced.
                 */
-               if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
-                       revalidate_disk(ns->disk);
-
+               if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+                       continue;
+               revalidate_disk(ns->disk);
                blk_set_queue_dying(ns->queue);
                blk_mq_abort_requeue_list(ns->queue);
                blk_mq_start_stopped_hw_queues(ns->queue, true);
index 916d1360805964cbe6a095576ba2049af52384f8..5b7386f69f4de5571112bcc504134c8d99744793 100644 (file)
@@ -480,11 +480,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
  * being implemented to the common NVMe fabrics library. Part of
  * the overall init sequence of starting up a fabrics driver.
  */
-void nvmf_register_transport(struct nvmf_transport_ops *ops)
+int nvmf_register_transport(struct nvmf_transport_ops *ops)
 {
+       if (!ops->create_ctrl)
+               return -EINVAL;
+
        mutex_lock(&nvmf_transports_mutex);
        list_add_tail(&ops->entry, &nvmf_transports);
        mutex_unlock(&nvmf_transports_mutex);
+
+       return 0;
 }
 EXPORT_SYMBOL_GPL(nvmf_register_transport);
 
index 924145c979f136e167c72448b5998df15479a670..156018182ce43bbf70fe34fa1ff71b6df2e4456b 100644 (file)
@@ -128,7 +128,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
 int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
 int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
-void nvmf_register_transport(struct nvmf_transport_ops *ops);
+int nvmf_register_transport(struct nvmf_transport_ops *ops);
 void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
 void nvmf_free_options(struct nvmf_ctrl_options *opts);
 const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
index fb51a8de9b29a770c93a34dfca7b3264e8dd0a6a..9690beb15e69ab47bb04345da5f142ec56141035 100644 (file)
@@ -2353,18 +2353,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
        /* sanity checks */
 
-       /* FC-NVME supports 64-byte SQE only */
-       if (ctrl->ctrl.ioccsz != 4) {
-               dev_err(ctrl->ctrl.device, "ioccsz %d is not supported!\n",
-                               ctrl->ctrl.ioccsz);
-               goto out_remove_admin_queue;
-       }
-       /* FC-NVME supports 16-byte CQE only */
-       if (ctrl->ctrl.iorcsz != 1) {
-               dev_err(ctrl->ctrl.device, "iorcsz %d is not supported!\n",
-                               ctrl->ctrl.iorcsz);
-               goto out_remove_admin_queue;
-       }
        /* FC-NVME does not have other data in the capsule */
        if (ctrl->ctrl.icdoff) {
                dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
@@ -2562,8 +2550,7 @@ static int __init nvme_fc_init_module(void)
        if (!nvme_fc_wq)
                return -ENOMEM;
 
-       nvmf_register_transport(&nvme_fc_transport);
-       return 0;
+       return nvmf_register_transport(&nvme_fc_transport);
 }
 
 static void __exit nvme_fc_exit_module(void)
index 14cfc6f7facb240a96630a637ccc60bd45911302..a3da1e90b99dbf1bb04177379c65567c992c2dfd 100644 (file)
@@ -78,6 +78,11 @@ enum nvme_quirks {
         * readiness, which is done by reading the NVME_CSTS_RDY bit.
         */
        NVME_QUIRK_DELAY_BEFORE_CHK_RDY         = (1 << 3),
+
+       /*
+        * APST should not be used.
+        */
+       NVME_QUIRK_NO_APST                      = (1 << 4),
 };
 
 /*
@@ -112,6 +117,7 @@ enum nvme_ctrl_state {
 
 struct nvme_ctrl {
        enum nvme_ctrl_state state;
+       bool identified;
        spinlock_t lock;
        const struct nvme_ctrl_ops *ops;
        struct request_queue *admin_q;
@@ -147,13 +153,19 @@ struct nvme_ctrl {
        u32 vs;
        u32 sgls;
        u16 kas;
+       u8 npss;
+       u8 apsta;
        unsigned int kato;
        bool subsystem;
        unsigned long quirks;
+       struct nvme_id_power_state psd[32];
        struct work_struct scan_work;
        struct work_struct async_event_work;
        struct delayed_work ka_work;
 
+       /* Power saving configuration */
+       u64 ps_max_latency_us;
+
        /* Fabrics only */
        u16 sqsize;
        u32 ioccsz;
index ddc51adb594d0ba3df2e800b9247e2b3cb161847..57a1af52b06e6674a0a3c84564cb31257db37c6e 100644 (file)
@@ -613,10 +613,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        spin_lock_irq(&nvmeq->q_lock);
        if (unlikely(nvmeq->cq_vector < 0)) {
-               if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
-                       ret = BLK_MQ_RQ_QUEUE_BUSY;
-               else
-                       ret = BLK_MQ_RQ_QUEUE_ERROR;
+               ret = BLK_MQ_RQ_QUEUE_ERROR;
                spin_unlock_irq(&nvmeq->q_lock);
                goto out_cleanup_iod;
        }
@@ -1739,7 +1736,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
        if (dev->ctrl.admin_q)
                blk_put_queue(dev->ctrl.admin_q);
        kfree(dev->queues);
-       kfree(dev->ctrl.opal_dev);
+       free_opal_dev(dev->ctrl.opal_dev);
        kfree(dev);
 }
 
@@ -1789,14 +1786,17 @@ static void nvme_reset_work(struct work_struct *work)
        if (result)
                goto out;
 
-       if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
-               dev->ctrl.opal_dev =
-                       init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+       if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
+               if (!dev->ctrl.opal_dev)
+                       dev->ctrl.opal_dev =
+                               init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+               else if (was_suspend)
+                       opal_unlock_from_suspend(dev->ctrl.opal_dev);
+       } else {
+               free_opal_dev(dev->ctrl.opal_dev);
+               dev->ctrl.opal_dev = NULL;
        }
 
-       if (was_suspend)
-               opal_unlock_from_suspend(dev->ctrl.opal_dev);
-
        result = nvme_setup_io_queues(dev);
        if (result)
                goto out;
@@ -2001,8 +2001,10 @@ static void nvme_remove(struct pci_dev *pdev)
 
        pci_set_drvdata(pdev, NULL);
 
-       if (!pci_device_is_present(pdev))
+       if (!pci_device_is_present(pdev)) {
                nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+               nvme_dev_disable(dev, false);
+       }
 
        flush_work(&dev->reset_work);
        nvme_uninit_ctrl(&dev->ctrl);
@@ -2121,6 +2123,7 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
+       { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
        { 0, }
 };
 MODULE_DEVICE_TABLE(pci, nvme_id_table);
index a75e95d42b3febf5edba65c4ba7ed5181aa08c20..779f516e7a4ec405ff919f9b248d21ff0f748b8b 100644 (file)
 
 #define NVME_RDMA_MAX_INLINE_SEGMENTS  1
 
-static const char *const nvme_rdma_cm_status_strs[] = {
-       [NVME_RDMA_CM_INVALID_LEN]      = "invalid length",
-       [NVME_RDMA_CM_INVALID_RECFMT]   = "invalid record format",
-       [NVME_RDMA_CM_INVALID_QID]      = "invalid queue ID",
-       [NVME_RDMA_CM_INVALID_HSQSIZE]  = "invalid host SQ size",
-       [NVME_RDMA_CM_INVALID_HRQSIZE]  = "invalid host RQ size",
-       [NVME_RDMA_CM_NO_RSC]           = "resource not found",
-       [NVME_RDMA_CM_INVALID_IRD]      = "invalid IRD",
-       [NVME_RDMA_CM_INVALID_ORD]      = "Invalid ORD",
-};
-
-static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
-{
-       size_t index = status;
-
-       if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
-           nvme_rdma_cm_status_strs[index])
-               return nvme_rdma_cm_status_strs[index];
-       else
-               return "unrecognized reason";
-};
-
 /*
  * We handle AEN commands ourselves and don't even let the
  * block layer know about them.
@@ -155,6 +133,10 @@ struct nvme_rdma_ctrl {
                struct sockaddr addr;
                struct sockaddr_in addr_in;
        };
+       union {
+               struct sockaddr src_addr;
+               struct sockaddr_in src_addr_in;
+       };
 
        struct nvme_ctrl        ctrl;
 };
@@ -567,6 +549,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
                int idx, size_t queue_size)
 {
        struct nvme_rdma_queue *queue;
+       struct sockaddr *src_addr = NULL;
        int ret;
 
        queue = &ctrl->queues[idx];
@@ -589,7 +572,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
        }
 
        queue->cm_error = -ETIMEDOUT;
-       ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr,
+       if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
+               src_addr = &ctrl->src_addr;
+
+       ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
                        NVME_RDMA_CONNECT_TIMEOUT_MS);
        if (ret) {
                dev_info(ctrl->ctrl.device,
@@ -1065,7 +1051,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
         * sequencer is not allocated in our driver's tagset and it's
         * triggered to be freed by blk_cleanup_queue(). So we need to
         * always mark it as signaled to ensure that the "wr_cqe", which is
-        * embeded in request's payload, is not freed when __ib_process_cq()
+        * embedded in request's payload, is not freed when __ib_process_cq()
         * calls wr_cqe->done().
         */
        if ((++queue->sig_count % 32) == 0 || flush)
@@ -1265,7 +1251,7 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
 
        dev = nvme_rdma_find_get_device(queue->cm_id);
        if (!dev) {
-               dev_err(queue->cm_id->device->dma_device,
+               dev_err(queue->cm_id->device->dev.parent,
                        "no client data found!\n");
                return -ECONNREFUSED;
        }
@@ -1905,6 +1891,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                goto out_free_ctrl;
        }
 
+       if (opts->mask & NVMF_OPT_HOST_TRADDR) {
+               ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
+                               opts->host_traddr);
+               if (ret) {
+                       pr_err("malformed src IP address passed: %s\n",
+                              opts->host_traddr);
+                       goto out_free_ctrl;
+               }
+       }
+
        if (opts->mask & NVMF_OPT_TRSVCID) {
                u16 port;
 
@@ -2016,7 +2012,8 @@ out_free_ctrl:
 static struct nvmf_transport_ops nvme_rdma_transport = {
        .name           = "rdma",
        .required_opts  = NVMF_OPT_TRADDR,
-       .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY,
+       .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
+                         NVMF_OPT_HOST_TRADDR,
        .create_ctrl    = nvme_rdma_create_ctrl,
 };
 
@@ -2063,8 +2060,7 @@ static int __init nvme_rdma_init_module(void)
                return ret;
        }
 
-       nvmf_register_transport(&nvme_rdma_transport);
-       return 0;
+       return nvmf_register_transport(&nvme_rdma_transport);
 }
 
 static void __exit nvme_rdma_cleanup_module(void)
index 95ae52390478fe62fdb59605ee2c7a6d0583a919..94e524fea5687b8de8ebf68d6469676cae4fa08d 100644 (file)
@@ -41,7 +41,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
        ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid);
        if (!ns) {
                status = NVME_SC_INVALID_NS;
-               pr_err("nvmet : Counld not find namespace id : %d\n",
+               pr_err("nvmet : Could not find namespace id : %d\n",
                                le32_to_cpu(req->cmd->get_log_page.nsid));
                goto out;
        }
@@ -509,7 +509,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req)
                break;
        case nvme_admin_identify:
                req->data_len = 4096;
-               switch (le32_to_cpu(cmd->identify.cns)) {
+               switch (cmd->identify.cns) {
                case NVME_ID_CNS_NS:
                        req->execute = nvmet_execute_identify_ns;
                        return 0;
index fc5ba2f9e15f47fe8bd13795bdb9d6caaa532b93..5267ce20c12d48b062d84bf7d43ad73585694214 100644 (file)
@@ -17,6 +17,7 @@
 #include "nvmet.h"
 
 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
+static DEFINE_IDA(cntlid_ida);
 
 /*
  * This read/write semaphore is used to synchronize access to configuration
@@ -749,7 +750,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
        if (!ctrl->sqs)
                goto out_free_cqs;
 
-       ret = ida_simple_get(&subsys->cntlid_ida,
+       ret = ida_simple_get(&cntlid_ida,
                             NVME_CNTLID_MIN, NVME_CNTLID_MAX,
                             GFP_KERNEL);
        if (ret < 0) {
@@ -819,7 +820,7 @@ static void nvmet_ctrl_free(struct kref *ref)
        flush_work(&ctrl->async_event_work);
        cancel_work_sync(&ctrl->fatal_err_work);
 
-       ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
+       ida_simple_remove(&cntlid_ida, ctrl->cntlid);
        nvmet_subsys_put(subsys);
 
        kfree(ctrl->sqs);
@@ -918,9 +919,6 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
        mutex_init(&subsys->lock);
        INIT_LIST_HEAD(&subsys->namespaces);
        INIT_LIST_HEAD(&subsys->ctrls);
-
-       ida_init(&subsys->cntlid_ida);
-
        INIT_LIST_HEAD(&subsys->hosts);
 
        return subsys;
@@ -933,7 +931,6 @@ static void nvmet_subsys_free(struct kref *ref)
 
        WARN_ON_ONCE(!list_empty(&subsys->namespaces));
 
-       ida_destroy(&subsys->cntlid_ida);
        kfree(subsys->subsysnqn);
        kfree(subsys);
 }
@@ -976,6 +973,7 @@ static void __exit nvmet_exit(void)
 {
        nvmet_exit_configfs();
        nvmet_exit_discovery();
+       ida_destroy(&cntlid_ida);
 
        BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
        BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
index 12f39eea569f2fb33cec45884c188d0ad8ae2493..af8aabf0533504971bef38fbdd02c3943b805a5f 100644 (file)
@@ -186,14 +186,14 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
                }
        case nvme_admin_identify:
                req->data_len = 4096;
-               switch (le32_to_cpu(cmd->identify.cns)) {
+               switch (cmd->identify.cns) {
                case NVME_ID_CNS_CTRL:
                        req->execute =
                                nvmet_execute_identify_disc_ctrl;
                        return 0;
                default:
                        pr_err("nvmet: unsupported identify cns %d\n",
-                               le32_to_cpu(cmd->identify.cns));
+                               cmd->identify.cns);
                        return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
                }
        default:
index f4088198cd0d0a15b8b3da63f698612f568f2bb3..8bd022af3df6741ed1b08f10bb7b6dce40fe6925 100644 (file)
@@ -153,8 +153,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
                goto out;
        }
 
-       pr_info("creating controller %d for NQN %s.\n",
-                       ctrl->cntlid, ctrl->hostnqn);
+       pr_info("creating controller %d for subsystem %s for NQN %s.\n",
+               ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
        req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
 
 out:
@@ -220,7 +220,7 @@ int nvmet_parse_connect_cmd(struct nvmet_req *req)
 
        req->ns = NULL;
 
-       if (req->cmd->common.opcode != nvme_fabrics_command) {
+       if (cmd->common.opcode != nvme_fabrics_command) {
                pr_err("invalid command 0x%x on unconnected queue.\n",
                        cmd->fabrics.opcode);
                return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
index ba57f9852bde33b0ff3d0655d4c08313632a3a8f..8f483ee7868c56bdc55e174f226f368609b32159 100644 (file)
@@ -1817,16 +1817,14 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
                /* data no longer needed */
                nvmet_fc_free_tgt_pgs(fod);
 
-               if (fcpreq->fcp_error || abort)
-                       nvmet_req_complete(&fod->req, fcpreq->fcp_error);
-
+               nvmet_req_complete(&fod->req, fcpreq->fcp_error);
                return;
        }
 
        switch (fcpreq->op) {
 
        case NVMET_FCOP_WRITEDATA:
-               if (abort || fcpreq->fcp_error ||
+               if (fcpreq->fcp_error ||
                    fcpreq->transferred_length != fcpreq->transfer_length) {
                        nvmet_req_complete(&fod->req,
                                        NVME_SC_FC_TRANSPORT_ERROR);
@@ -1849,7 +1847,7 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
 
        case NVMET_FCOP_READDATA:
        case NVMET_FCOP_READDATA_RSP:
-               if (abort || fcpreq->fcp_error ||
+               if (fcpreq->fcp_error ||
                    fcpreq->transferred_length != fcpreq->transfer_length) {
                        /* data no longer needed */
                        nvmet_fc_free_tgt_pgs(fod);
index f3862e38f5748d8e21b4a55574cac30c7cf2054a..d1f06e7768ff1d7ff6ee787ff6d94eb01576252f 100644 (file)
@@ -724,8 +724,7 @@ static int __init nvme_loop_init_module(void)
        ret = nvmet_register_transport(&nvme_loop_ops);
        if (ret)
                return ret;
-       nvmf_register_transport(&nvme_loop_transport);
-       return 0;
+       return nvmf_register_transport(&nvme_loop_transport);
 }
 
 static void __exit nvme_loop_cleanup_module(void)
index cc7ad06b43a78a029dd76fac575c3f6ee57c9e92..1370eee0a3c0f6295722d22e0c103a2f6cece47b 100644 (file)
@@ -142,7 +142,6 @@ struct nvmet_subsys {
        unsigned int            max_nsid;
 
        struct list_head        ctrls;
-       struct ida              cntlid_ida;
 
        struct list_head        hosts;
        bool                    allow_any_host;
index 60990220bd831074bc3c8fdbd044ee3aed37a2db..9aa1da3778b3ac1d2262bfe9b845b65b9cd942d9 100644 (file)
@@ -1041,6 +1041,9 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id,
 {
        struct nvme_rdma_cm_rej rej;
 
+       pr_debug("rejecting connect request: status %d (%s)\n",
+                status, nvme_rdma_cm_msg(status));
+
        rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
        rej.sts = cpu_to_le16(status);
 
@@ -1091,7 +1094,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
        queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
        if (queue->idx < 0) {
                ret = NVME_RDMA_CM_NO_RSC;
-               goto out_free_queue;
+               goto out_destroy_sq;
        }
 
        ret = nvmet_rdma_alloc_rsps(queue);
@@ -1135,7 +1138,6 @@ out_destroy_sq:
 out_free_queue:
        kfree(queue);
 out_reject:
-       pr_debug("rejecting connect request with status code %d\n", ret);
        nvmet_rdma_cm_reject(cm_id, ret);
        return NULL;
 }
@@ -1188,7 +1190,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 
        ndev = nvmet_rdma_find_get_device(cm_id);
        if (!ndev) {
-               pr_err("no client data!\n");
                nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC);
                return -ECONNREFUSED;
        }
index 553ef8a5d588685ff02f98bde7110dfb421accaa..aeb073b5fe1606475a615054c58e02060627cec9 100644 (file)
@@ -1011,7 +1011,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
        DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents);
 }
 
-static struct dma_map_ops ccio_ops = {
+static const struct dma_map_ops ccio_ops = {
        .dma_supported =        ccio_dma_supported,
        .alloc =                ccio_alloc,
        .free =                 ccio_free,
index 151b86b6d2e2e2069e512f8b7456d1c984d60775..33385e57443382ba090cd4a1e41b5bdf4af58936 100644 (file)
@@ -1069,7 +1069,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
 
 }
 
-static struct dma_map_ops sba_ops = {
+static const struct dma_map_ops sba_ops = {
        .dma_supported =        sba_dma_supported,
        .alloc =                sba_alloc,
        .free =                 sba_free,
index c0e7d21c88c25da720b7156b1143374a4d574325..75071605d22fc6688647811999d82748b9be67da 100644 (file)
@@ -307,7 +307,7 @@ size_t parport_ieee1284_read_byte (struct parport *port,
                if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
                end_of_data:
                        DPRINTK (KERN_DEBUG
-                                "%s: No more byte data (%Zd bytes)\n",
+                                "%s: No more byte data (%zd bytes)\n",
                                 port->name, count);
 
                        /* Go to reverse idle phase. */
index 78530d1714dc7422a597314e098a88513208c169..3e56e7deab8e8deabfe53f271417b999c27612e1 100644 (file)
@@ -902,7 +902,7 @@ static size_t parport_pc_ecp_write_block_pio(struct parport *port,
  *     ******************************************
  */
 
-/* GCC is not inlining extern inline function later overwriten to non-inline,
+/* GCC is not inlining extern inline function later overwritten to non-inline,
    so we use outlined_ variants here.  */
 static const struct parport_operations parport_pc_ops = {
        .write_data     = parport_pc_write_data,
index 18ef1a93c10ac191cec6b26760533a644f0c1934..e27ad2a3bd33f8de025aff5e320f858356860e73 100644 (file)
@@ -282,7 +282,7 @@ static struct device *to_vmd_dev(struct device *dev)
        return &vmd->dev->dev;
 }
 
-static struct dma_map_ops *vmd_dma_ops(struct device *dev)
+static const struct dma_map_ops *vmd_dma_ops(struct device *dev)
 {
        return get_dma_ops(to_vmd_dev(dev));
 }
index ca77d235867fe63aa46fbc2b989ec41f79e4c7be..f754453fe754e985361cb49cee0bddf54d752443 100644 (file)
@@ -3630,7 +3630,7 @@ static int __init pci_apply_final_quirks(void)
 fs_initcall_sync(pci_apply_final_quirks);
 
 /*
- * Followings are device-specific reset methods which can be used to
+ * Following are device-specific reset methods which can be used to
  * reset a single function if other methods (e.g. FLR, PM D0->D3) are
  * not available.
  */
index 8968dd7aebed2133b0103b39d7b752916dbfaab0..e8c4e4f934a6d388bef3e77b89dc19d1e3a6db66 100644 (file)
@@ -70,7 +70,7 @@ config PINCTRL_CYGNUS_MUX
 
          The Broadcom Cygnus IOMUX driver supports group based IOMUX
          configuration, with the exception that certain individual pins
-         can be overrided to GPIO function
+         can be overridden to GPIO function
 
 config PINCTRL_NSP_GPIO
        bool "Broadcom NSP GPIO (with PINCONF) driver"
index 49a594855f98f43d151e44f5d07449cba1a5b095..4bc88eb52712c7a8231ac64b94bbfacd5607f1f5 100644 (file)
@@ -92,9 +92,8 @@ config ASUS_LAPTOP
          If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
 config DELL_SMBIOS
-       tristate "Dell SMBIOS Support"
-       depends on DCDBAS
-       default n
+       tristate
+       select DCDBAS
        ---help---
        This module provides common functions for kernel modules using
        Dell SMBIOS.
@@ -103,16 +102,15 @@ config DELL_SMBIOS
 
 config DELL_LAPTOP
        tristate "Dell Laptop Extras"
-       depends on DELL_SMBIOS
        depends on DMI
        depends on BACKLIGHT_CLASS_DEVICE
        depends on ACPI_VIDEO || ACPI_VIDEO = n
        depends on RFKILL || RFKILL = n
        depends on SERIO_I8042
+       select DELL_SMBIOS
        select POWER_SUPPLY
        select LEDS_CLASS
        select NEW_LEDS
-       default n
        ---help---
        This driver adds support for rfkill and backlight control to Dell
        laptops (except for some models covered by the Compal driver).
@@ -123,7 +121,7 @@ config DELL_WMI
        depends on DMI
        depends on INPUT
        depends on ACPI_VIDEO || ACPI_VIDEO = n
-       depends on DELL_SMBIOS
+       select DELL_SMBIOS
        select INPUT_SPARSEKMAP
        ---help---
          Say Y here if you want to support WMI-based hotkeys on Dell laptops.
@@ -1069,4 +1067,30 @@ config MLX_CPLD_PLATFORM
          This driver handles hot-plug events for the power suppliers, power
          cables and fans on the wide range Mellanox IB and Ethernet systems.
 
+config INTEL_TURBO_MAX_3
+       bool "Intel Turbo Boost Max Technology 3.0 enumeration driver"
+       depends on X86_64 && SCHED_MC_PRIO
+       ---help---
+         This driver reads maximum performance ratio of each CPU and set up
+         the scheduler priority metrics. In this way scheduler can prefer
+         CPU with higher performance to schedule tasks.
+         This driver is only required when the system is not using Hardware
+         P-States (HWP). In HWP mode, priority can be read from ACPI tables.
+
+config SILEAD_DMI
+       bool "Tablets with Silead touchscreens"
+       depends on ACPI && DMI && I2C=y && INPUT
+       ---help---
+         Certain ACPI based tablets with Silead touchscreens do not have
+         enough data in ACPI tables for the touchscreen driver to handle
+         the touchscreen properly, as OEMs expected the data to be baked
+         into the tablet model specific version of the driver shipped
+         with the OS-image for the device. This option supplies the missing
+         information. Enable this for x86 tablets with Silead touchscreens.
+
 endif # X86_PLATFORM_DEVICES
+
+config PMC_ATOM
+       def_bool y
+       depends on PCI
+       select COMMON_CLK
index b2f52a7690af2239fae006668a7f7bbdfc00cd10..299d0f9e40f7b309feaf534764a5e852f061aa14 100644 (file)
@@ -65,6 +65,7 @@ obj-$(CONFIG_INTEL_SMARTCONNECT)      += intel-smartconnect.o
 obj-$(CONFIG_PVPANIC)           += pvpanic.o
 obj-$(CONFIG_ALIENWARE_WMI)    += alienware-wmi.o
 obj-$(CONFIG_INTEL_PMC_IPC)    += intel_pmc_ipc.o
+obj-$(CONFIG_SILEAD_DMI)       += silead_dmi.o
 obj-$(CONFIG_SURFACE_PRO3_BUTTON)      += surfacepro3_button.o
 obj-$(CONFIG_SURFACE_3_BUTTON) += surface3_button.o
 obj-$(CONFIG_INTEL_PUNIT_IPC)  += intel_punit_ipc.o
@@ -73,5 +74,7 @@ obj-$(CONFIG_INTEL_TELEMETRY) += intel_telemetry_core.o \
                                   intel_telemetry_pltdrv.o \
                                   intel_telemetry_debugfs.o
 obj-$(CONFIG_INTEL_PMC_CORE)    += intel_pmc_core.o
+obj-$(CONFIG_PMC_ATOM)         += pmc_atom.o
 obj-$(CONFIG_MLX_PLATFORM)     += mlx-platform.o
 obj-$(CONFIG_MLX_CPLD_PLATFORM)        += mlxcpld-hotplug.o
+obj-$(CONFIG_INTEL_TURBO_MAX_3) += intel_turbo_max_3.o
index a66192f692e31afbad13294039121e81f0727f26..dac0fbe87460ebc5b816746830e626ba3876fe83 100644 (file)
@@ -128,6 +128,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst = {
        {KE_KEY, KEY_TOUCHPAD_OFF, {KEY_TOUCHPAD_OFF} },
        {KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} },
        {KE_KEY, 0x85, {KEY_TOUCHPAD_TOGGLE} },
+       {KE_KEY, 0x86, {KEY_WLAN} },
        {KE_END, 0}
 };
 
@@ -150,15 +151,30 @@ struct event_return_value {
 #define ACER_WMID3_GDS_BLUETOOTH       (1<<11) /* BT */
 #define ACER_WMID3_GDS_TOUCHPAD                (1<<1)  /* Touchpad */
 
-struct lm_input_params {
+/* Hotkey Customized Setting and Acer Application Status.
+ * Set Device Default Value and Report Acer Application Status.
+ * When Acer Application starts, it will run this method to inform
+ * BIOS/EC that Acer Application is on.
+ * App Status
+ *     Bit[0]: Launch Manager Status
+ *     Bit[1]: ePM Status
+ *     Bit[2]: Device Control Status
+ *     Bit[3]: Acer Power Button Utility Status
+ *     Bit[4]: RF Button Status
+ *     Bit[5]: ODD PM Status
+ *     Bit[6]: Device Default Value Control
+ *     Bit[7]: Hall Sensor Application Status
+ */
+struct func_input_params {
        u8 function_num;        /* Function Number */
        u16 commun_devices;     /* Communication type devices default status */
        u16 devices;            /* Other type devices default status */
-       u8 lm_status;           /* Launch Manager Status */
-       u16 reserved;
+       u8 app_status;          /* Acer Device Status. LM, ePM, RF Button... */
+       u8 app_mask;            /* Bit mask to app_status */
+       u8 reserved;
 } __attribute__((packed));
 
-struct lm_return_value {
+struct func_return_value {
        u8 error_code;          /* Error Code */
        u8 ec_return_value;     /* EC Return Value */
        u16 reserved;
@@ -1769,13 +1785,13 @@ static void acer_wmi_notify(u32 value, void *context)
 }
 
 static acpi_status __init
-wmid3_set_lm_mode(struct lm_input_params *params,
-                 struct lm_return_value *return_value)
+wmid3_set_function_mode(struct func_input_params *params,
+                       struct func_return_value *return_value)
 {
        acpi_status status;
        union acpi_object *obj;
 
-       struct acpi_buffer input = { sizeof(struct lm_input_params), params };
+       struct acpi_buffer input = { sizeof(struct func_input_params), params };
        struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
 
        status = wmi_evaluate_method(WMID_GUID3, 0, 0x1, &input, &output);
@@ -1796,7 +1812,7 @@ wmid3_set_lm_mode(struct lm_input_params *params,
                return AE_ERROR;
        }
 
-       *return_value = *((struct lm_return_value *)obj->buffer.pointer);
+       *return_value = *((struct func_return_value *)obj->buffer.pointer);
        kfree(obj);
 
        return status;
@@ -1804,16 +1820,17 @@ wmid3_set_lm_mode(struct lm_input_params *params,
 
 static int __init acer_wmi_enable_ec_raw(void)
 {
-       struct lm_return_value return_value;
+       struct func_return_value return_value;
        acpi_status status;
-       struct lm_input_params params = {
+       struct func_input_params params = {
                .function_num = 0x1,
                .commun_devices = 0xFFFF,
                .devices = 0xFFFF,
-               .lm_status = 0x00,            /* Launch Manager Deactive */
+               .app_status = 0x00,             /* Launch Manager Deactive */
+               .app_mask = 0x01,
        };
 
-       status = wmid3_set_lm_mode(&params, &return_value);
+       status = wmid3_set_function_mode(&params, &return_value);
 
        if (return_value.error_code || return_value.ec_return_value)
                pr_warn("Enabling EC raw mode failed: 0x%x - 0x%x\n",
@@ -1827,16 +1844,17 @@ static int __init acer_wmi_enable_ec_raw(void)
 
 static int __init acer_wmi_enable_lm(void)
 {
-       struct lm_return_value return_value;
+       struct func_return_value return_value;
        acpi_status status;
-       struct lm_input_params params = {
+       struct func_input_params params = {
                .function_num = 0x1,
                .commun_devices = 0xFFFF,
                .devices = 0xFFFF,
-               .lm_status = 0x01,            /* Launch Manager Active */
+               .app_status = 0x01,            /* Launch Manager Active */
+               .app_mask = 0x01,
        };
 
-       status = wmid3_set_lm_mode(&params, &return_value);
+       status = wmid3_set_function_mode(&params, &return_value);
 
        if (return_value.error_code || return_value.ec_return_value)
                pr_warn("Enabling Launch Manager failed: 0x%x - 0x%x\n",
@@ -1846,11 +1864,46 @@ static int __init acer_wmi_enable_lm(void)
        return status;
 }
 
+static int __init acer_wmi_enable_rf_button(void)
+{
+       struct func_return_value return_value;
+       acpi_status status;
+       struct func_input_params params = {
+               .function_num = 0x1,
+               .commun_devices = 0xFFFF,
+               .devices = 0xFFFF,
+               .app_status = 0x10,            /* RF Button Active */
+               .app_mask = 0x10,
+       };
+
+       status = wmid3_set_function_mode(&params, &return_value);
+
+       if (return_value.error_code || return_value.ec_return_value)
+               pr_warn("Enabling RF Button failed: 0x%x - 0x%x\n",
+                       return_value.error_code,
+                       return_value.ec_return_value);
+
+       return status;
+}
+
+#define ACER_WMID_ACCEL_HID    "BST0001"
+
 static acpi_status __init acer_wmi_get_handle_cb(acpi_handle ah, u32 level,
                                                void *ctx, void **retval)
 {
+       struct acpi_device *dev;
+
+       if (!strcmp(ctx, "SENR")) {
+               if (acpi_bus_get_device(ah, &dev))
+                       return AE_OK;
+               if (!strcmp(ACER_WMID_ACCEL_HID, acpi_device_hid(dev)))
+                       return AE_OK;
+       } else
+               return AE_OK;
+
        *(acpi_handle *)retval = ah;
-       return AE_OK;
+
+       return AE_CTRL_TERMINATE;
 }
 
 static int __init acer_wmi_get_handle(const char *name, const char *prop,
@@ -1877,7 +1930,7 @@ static int __init acer_wmi_accel_setup(void)
 {
        int err;
 
-       err = acer_wmi_get_handle("SENR", "BST0001", &gsensor_handle);
+       err = acer_wmi_get_handle("SENR", ACER_WMID_ACCEL_HID, &gsensor_handle);
        if (err)
                return err;
 
@@ -2216,6 +2269,9 @@ static int __init acer_wmi_init(void)
                interface->capability &= ~ACER_CAP_BRIGHTNESS;
 
        if (wmi_has_guid(WMID_GUID3)) {
+               if (ACPI_FAILURE(acer_wmi_enable_rf_button()))
+                       pr_warn("Cannot enable RF Button Driver\n");
+
                if (ec_raw_mode) {
                        if (ACPI_FAILURE(acer_wmi_enable_ec_raw())) {
                                pr_err("Cannot enable EC raw mode\n");
@@ -2233,10 +2289,11 @@ static int __init acer_wmi_init(void)
                err = acer_wmi_input_setup();
                if (err)
                        return err;
+               err = acer_wmi_accel_setup();
+               if (err)
+                       return err;
        }
 
-       acer_wmi_accel_setup();
-
        err = platform_driver_register(&acer_platform_driver);
        if (err) {
                pr_err("Unable to register platform driver\n");
index 005629447b0ce624b24ed02835e3d9314065a50a..d6b34923fb4edd061ab18a57d2c8d281136f84b7 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/dmi.h>
-#include <linux/acpi.h>
 #include <linux/leds.h>
 
 #define LEGACY_CONTROL_GUID            "A90597CE-A997-11DA-B012-B622A1EF5492"
index 9f31bc1a47d01b9c88072f60444d53917a63af84..f3796164329efb9ea00aed180a8c1116b813c979 100644 (file)
 #include <linux/pci_ids.h>
 #include <linux/leds.h>
 
-#define ASUS_WIRELESS_LED_STATUS 0x2
-#define ASUS_WIRELESS_LED_OFF 0x4
-#define ASUS_WIRELESS_LED_ON 0x5
+struct hswc_params {
+       u8 on;
+       u8 off;
+       u8 status;
+};
 
 struct asus_wireless_data {
        struct input_dev *idev;
        struct acpi_device *adev;
+       const struct hswc_params *hswc_params;
        struct workqueue_struct *wq;
        struct work_struct led_work;
        struct led_classdev led;
        int led_state;
 };
 
+static const struct hswc_params atk4001_id_params = {
+       .on = 0x0,
+       .off = 0x1,
+       .status = 0x2,
+};
+
+static const struct hswc_params atk4002_id_params = {
+       .on = 0x5,
+       .off = 0x4,
+       .status = 0x2,
+};
+
+static const struct acpi_device_id device_ids[] = {
+       {"ATK4001", (kernel_ulong_t)&atk4001_id_params},
+       {"ATK4002", (kernel_ulong_t)&atk4002_id_params},
+       {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, device_ids);
+
 static u64 asus_wireless_method(acpi_handle handle, const char *method,
                                int param)
 {
@@ -61,8 +83,8 @@ static enum led_brightness led_state_get(struct led_classdev *led)
 
        data = container_of(led, struct asus_wireless_data, led);
        s = asus_wireless_method(acpi_device_handle(data->adev), "HSWC",
-                                ASUS_WIRELESS_LED_STATUS);
-       if (s == ASUS_WIRELESS_LED_ON)
+                                data->hswc_params->status);
+       if (s == data->hswc_params->on)
                return LED_FULL;
        return LED_OFF;
 }
@@ -76,14 +98,13 @@ static void led_state_update(struct work_struct *work)
                             data->led_state);
 }
 
-static void led_state_set(struct led_classdev *led,
-                                 enum led_brightness value)
+static void led_state_set(struct led_classdev *led, enum led_brightness value)
 {
        struct asus_wireless_data *data;
 
        data = container_of(led, struct asus_wireless_data, led);
-       data->led_state = value == LED_OFF ? ASUS_WIRELESS_LED_OFF :
-                                            ASUS_WIRELESS_LED_ON;
+       data->led_state = value == LED_OFF ? data->hswc_params->off :
+                                            data->hswc_params->on;
        queue_work(data->wq, &data->led_work);
 }
 
@@ -104,12 +125,14 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event)
 static int asus_wireless_add(struct acpi_device *adev)
 {
        struct asus_wireless_data *data;
+       const struct acpi_device_id *id;
        int err;
 
        data = devm_kzalloc(&adev->dev, sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
        adev->driver_data = data;
+       data->adev = adev;
 
        data->idev = devm_input_allocate_device(&adev->dev);
        if (!data->idev)
@@ -124,7 +147,16 @@ static int asus_wireless_add(struct acpi_device *adev)
        if (err)
                return err;
 
-       data->adev = adev;
+       for (id = device_ids; id->id[0]; id++) {
+               if (!strcmp((char *) id->id, acpi_device_hid(adev))) {
+                       data->hswc_params =
+                               (const struct hswc_params *)id->driver_data;
+                       break;
+               }
+       }
+       if (!data->hswc_params)
+               return 0;
+
        data->wq = create_singlethread_workqueue("asus_wireless_workqueue");
        if (!data->wq)
                return -ENOMEM;
@@ -137,6 +169,7 @@ static int asus_wireless_add(struct acpi_device *adev)
        err = devm_led_classdev_register(&adev->dev, &data->led);
        if (err)
                destroy_workqueue(data->wq);
+
        return err;
 }
 
@@ -149,13 +182,6 @@ static int asus_wireless_remove(struct acpi_device *adev)
        return 0;
 }
 
-static const struct acpi_device_id device_ids[] = {
-       {"ATK4001", 0},
-       {"ATK4002", 0},
-       {"", 0},
-};
-MODULE_DEVICE_TABLE(acpi, device_ids);
-
 static struct acpi_driver asus_wireless_driver = {
        .name = "Asus Wireless Radio Control Driver",
        .class = "hotkey",
index 14392a01ab360aceaf80c8424e2b4854ac5ea264..f57dd282a0021a5645eab7fb56b6c7a10307a4d1 100644 (file)
@@ -105,6 +105,12 @@ static const struct dmi_system_id dell_device_table[] __initconst = {
                        DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /*Laptop*/
                },
        },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /*Notebook*/
+               },
+       },
        {
                .ident = "Dell Computer Corporation",
                .matches = {
index 82d67715ce76d9ff0442ecdcdbd2dbb324380203..2b218b1d13e55dc985a2ca27e44b6a6ddf905141 100644 (file)
@@ -202,6 +202,7 @@ static int radio_led_set(struct led_classdev *cdev,
 
 static struct led_classdev radio_led = {
  .name = "fujitsu::radio_led",
+ .default_trigger = "rfkill-any",
  .brightness_get = radio_led_get,
  .brightness_set_blocking = radio_led_set
 };
@@ -270,15 +271,20 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
 static int logolamp_set(struct led_classdev *cdev,
                               enum led_brightness brightness)
 {
-       if (brightness >= LED_FULL) {
-               call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON);
-               return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_ON);
-       } else if (brightness >= LED_HALF) {
-               call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON);
-               return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_OFF);
-       } else {
-               return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_OFF);
-       }
+       int poweron = FUNC_LED_ON, always = FUNC_LED_ON;
+       int ret;
+
+       if (brightness < LED_HALF)
+               poweron = FUNC_LED_OFF;
+
+       if (brightness < LED_FULL)
+               always = FUNC_LED_OFF;
+
+       ret = call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, poweron);
+       if (ret < 0)
+               return ret;
+
+       return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, always);
 }
 
 static int kblamps_set(struct led_classdev *cdev,
@@ -313,17 +319,17 @@ static int eco_led_set(struct led_classdev *cdev,
 
 static enum led_brightness logolamp_get(struct led_classdev *cdev)
 {
-       enum led_brightness brightness = LED_OFF;
-       int poweron, always;
-
-       poweron = call_fext_func(FUNC_LEDS, 0x2, LOGOLAMP_POWERON, 0x0);
-       if (poweron == FUNC_LED_ON) {
-               brightness = LED_HALF;
-               always = call_fext_func(FUNC_LEDS, 0x2, LOGOLAMP_ALWAYS, 0x0);
-               if (always == FUNC_LED_ON)
-                       brightness = LED_FULL;
-       }
-       return brightness;
+       int ret;
+
+       ret = call_fext_func(FUNC_LEDS, 0x2, LOGOLAMP_ALWAYS, 0x0);
+       if (ret == FUNC_LED_ON)
+               return LED_FULL;
+
+       ret = call_fext_func(FUNC_LEDS, 0x2, LOGOLAMP_POWERON, 0x0);
+       if (ret == FUNC_LED_ON)
+               return LED_HALF;
+
+       return LED_OFF;
 }
 
 static enum led_brightness kblamps_get(struct led_classdev *cdev)
@@ -1029,107 +1035,117 @@ static int acpi_fujitsu_hotkey_remove(struct acpi_device *device)
        return 0;
 }
 
+static void acpi_fujitsu_hotkey_press(int keycode)
+{
+       struct input_dev *input = fujitsu_hotkey->input;
+       int status;
+
+       status = kfifo_in_locked(&fujitsu_hotkey->fifo,
+                                (unsigned char *)&keycode, sizeof(keycode),
+                                &fujitsu_hotkey->fifo_lock);
+       if (status != sizeof(keycode)) {
+               vdbg_printk(FUJLAPTOP_DBG_WARN,
+                           "Could not push keycode [0x%x]\n", keycode);
+               return;
+       }
+       input_report_key(input, keycode, 1);
+       input_sync(input);
+       vdbg_printk(FUJLAPTOP_DBG_TRACE,
+                   "Push keycode into ringbuffer [%d]\n", keycode);
+}
+
+static void acpi_fujitsu_hotkey_release(void)
+{
+       struct input_dev *input = fujitsu_hotkey->input;
+       int keycode, status;
+
+       while (true) {
+               status = kfifo_out_locked(&fujitsu_hotkey->fifo,
+                                         (unsigned char *)&keycode,
+                                         sizeof(keycode),
+                                         &fujitsu_hotkey->fifo_lock);
+               if (status != sizeof(keycode))
+                       return;
+               input_report_key(input, keycode, 0);
+               input_sync(input);
+               vdbg_printk(FUJLAPTOP_DBG_TRACE,
+                           "Pop keycode from ringbuffer [%d]\n", keycode);
+       }
+}
+
 static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 {
        struct input_dev *input;
-       int keycode, keycode_r;
+       int keycode;
        unsigned int irb = 1;
-       int i, status;
+       int i;
 
        input = fujitsu_hotkey->input;
 
+       if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
+               keycode = KEY_UNKNOWN;
+               vdbg_printk(FUJLAPTOP_DBG_WARN,
+                           "Unsupported event [0x%x]\n", event);
+               input_report_key(input, keycode, 1);
+               input_sync(input);
+               input_report_key(input, keycode, 0);
+               input_sync(input);
+               return;
+       }
+
        if (fujitsu_hotkey->rfkill_supported)
                fujitsu_hotkey->rfkill_state =
                        call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
 
-       switch (event) {
-       case ACPI_FUJITSU_NOTIFY_CODE1:
-               i = 0;
-               while ((irb =
-                       call_fext_func(FUNC_BUTTONS, 0x1, 0x0, 0x0)) != 0
-                               && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
-                       switch (irb & 0x4ff) {
-                       case KEY1_CODE:
-                               keycode = fujitsu->keycode1;
-                               break;
-                       case KEY2_CODE:
-                               keycode = fujitsu->keycode2;
-                               break;
-                       case KEY3_CODE:
-                               keycode = fujitsu->keycode3;
-                               break;
-                       case KEY4_CODE:
-                               keycode = fujitsu->keycode4;
-                               break;
-                       case KEY5_CODE:
-                               keycode = fujitsu->keycode5;
-                               break;
-                       case 0:
-                               keycode = 0;
-                               break;
-                       default:
-                               vdbg_printk(FUJLAPTOP_DBG_WARN,
-                                           "Unknown GIRB result [%x]\n", irb);
-                               keycode = -1;
-                               break;
-                       }
-                       if (keycode > 0) {
-                               vdbg_printk(FUJLAPTOP_DBG_TRACE,
-                                       "Push keycode into ringbuffer [%d]\n",
-                                       keycode);
-                               status = kfifo_in_locked(&fujitsu_hotkey->fifo,
-                                                  (unsigned char *)&keycode,
-                                                  sizeof(keycode),
-                                                  &fujitsu_hotkey->fifo_lock);
-                               if (status != sizeof(keycode)) {
-                                       vdbg_printk(FUJLAPTOP_DBG_WARN,
-                                           "Could not push keycode [0x%x]\n",
-                                           keycode);
-                               } else {
-                                       input_report_key(input, keycode, 1);
-                                       input_sync(input);
-                               }
-                       } else if (keycode == 0) {
-                               while ((status =
-                                       kfifo_out_locked(
-                                        &fujitsu_hotkey->fifo,
-                                        (unsigned char *) &keycode_r,
-                                        sizeof(keycode_r),
-                                        &fujitsu_hotkey->fifo_lock))
-                                        == sizeof(keycode_r)) {
-                                       input_report_key(input, keycode_r, 0);
-                                       input_sync(input);
-                                       vdbg_printk(FUJLAPTOP_DBG_TRACE,
-                                         "Pop keycode from ringbuffer [%d]\n",
-                                         keycode_r);
-                               }
-                       }
+       i = 0;
+       while ((irb =
+               call_fext_func(FUNC_BUTTONS, 0x1, 0x0, 0x0)) != 0
+                       && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
+               switch (irb & 0x4ff) {
+               case KEY1_CODE:
+                       keycode = fujitsu->keycode1;
+                       break;
+               case KEY2_CODE:
+                       keycode = fujitsu->keycode2;
+                       break;
+               case KEY3_CODE:
+                       keycode = fujitsu->keycode3;
+                       break;
+               case KEY4_CODE:
+                       keycode = fujitsu->keycode4;
+                       break;
+               case KEY5_CODE:
+                       keycode = fujitsu->keycode5;
+                       break;
+               case 0:
+                       keycode = 0;
+                       break;
+               default:
+                       vdbg_printk(FUJLAPTOP_DBG_WARN,
+                                   "Unknown GIRB result [%x]\n", irb);
+                       keycode = -1;
+                       break;
                }
 
-               /* On some models (first seen on the Skylake-based Lifebook
-                * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
-                * handled in software; its state is queried using FUNC_RFKILL
-                */
-               if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
-                   (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
-                       keycode = KEY_TOUCHPAD_TOGGLE;
-                       input_report_key(input, keycode, 1);
-                       input_sync(input);
-                       input_report_key(input, keycode, 0);
-                       input_sync(input);
-               }
+               if (keycode > 0)
+                       acpi_fujitsu_hotkey_press(keycode);
+               else if (keycode == 0)
+                       acpi_fujitsu_hotkey_release();
+       }
 
-               break;
-       default:
-               keycode = KEY_UNKNOWN;
-               vdbg_printk(FUJLAPTOP_DBG_WARN,
-                           "Unsupported event [0x%x]\n", event);
+       /* On some models (first seen on the Skylake-based Lifebook
+        * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
+        * handled in software; its state is queried using FUNC_RFKILL
+        */
+       if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
+           (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
+               keycode = KEY_TOUCHPAD_TOGGLE;
                input_report_key(input, keycode, 1);
                input_sync(input);
                input_report_key(input, keycode, 0);
                input_sync(input);
-               break;
        }
+
 }
 
 /* Initialization */
index 09356684c32f6a78f03dbc5ef1dd7addc828bf08..493d8910a74e2eca10812c3202a74cdb21ed37cd 100644 (file)
@@ -251,6 +251,7 @@ static const struct dmi_system_id lis3lv02d_dmi_ids[] = {
        AXIS_DMI_MATCH("HPB64xx", "HP EliteBook 84", xy_swap),
        AXIS_DMI_MATCH("HPB65xx", "HP ProBook 65", x_inverted),
        AXIS_DMI_MATCH("HPZBook15", "HP ZBook 15", x_inverted),
+       AXIS_DMI_MATCH("HPZBook17", "HP ZBook 17", xy_swap_yz_inverted),
        { NULL, }
 /* Laptop models without axis info (yet):
  * "NC6910" "HP Compaq 6910"
index cb3ab2b212b1768b8355aced7d7956f9e1ed2185..bcf438f38781019a2db04847e77aa4bfce023eee 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Intel HID event driver for Windows 8
+ *  Intel HID event & 5 button array driver
  *
  *  Copyright (C) 2015 Alex Hung <alex.hung@canonical.com>
  *  Copyright (C) 2015 Andrew Lutomirski <luto@kernel.org>
@@ -57,8 +57,24 @@ static const struct key_entry intel_hid_keymap[] = {
        { KE_END },
 };
 
+/* 5 button array notification value. */
+static const struct key_entry intel_array_keymap[] = {
+       { KE_KEY,    0xC2, { KEY_LEFTMETA } },                /* Press */
+       { KE_IGNORE, 0xC3, { KEY_LEFTMETA } },                /* Release */
+       { KE_KEY,    0xC4, { KEY_VOLUMEUP } },                /* Press */
+       { KE_IGNORE, 0xC5, { KEY_VOLUMEUP } },                /* Release */
+       { KE_KEY,    0xC6, { KEY_VOLUMEDOWN } },              /* Press */
+       { KE_IGNORE, 0xC7, { KEY_VOLUMEDOWN } },              /* Release */
+       { KE_SW,     0xC8, { .sw = { SW_ROTATE_LOCK, 1 } } }, /* Press */
+       { KE_SW,     0xC9, { .sw = { SW_ROTATE_LOCK, 0 } } }, /* Release */
+       { KE_KEY,    0xCE, { KEY_POWER } },                   /* Press */
+       { KE_IGNORE, 0xCF, { KEY_POWER } },                   /* Release */
+       { KE_END },
+};
+
 struct intel_hid_priv {
        struct input_dev *input_dev;
+       struct input_dev *array;
 };
 
 static int intel_hid_set_enable(struct device *device, int enable)
@@ -78,15 +94,43 @@ static int intel_hid_set_enable(struct device *device, int enable)
        return 0;
 }
 
+static void intel_button_array_enable(struct device *device, bool enable)
+{
+       struct intel_hid_priv *priv = dev_get_drvdata(device);
+       acpi_handle handle = ACPI_HANDLE(device);
+       unsigned long long button_cap;
+       acpi_status status;
+
+       if (!priv->array)
+               return;
+
+       /* Query supported platform features */
+       status = acpi_evaluate_integer(handle, "BTNC", NULL, &button_cap);
+       if (ACPI_FAILURE(status)) {
+               dev_warn(device, "failed to get button capability\n");
+               return;
+       }
+
+       /* Enable|disable features - power button is always enabled */
+       status = acpi_execute_simple_method(handle, "BTNE",
+                                           enable ? button_cap : 1);
+       if (ACPI_FAILURE(status))
+               dev_warn(device, "failed to set button capability\n");
+}
+
 static int intel_hid_pl_suspend_handler(struct device *device)
 {
        intel_hid_set_enable(device, 0);
+       intel_button_array_enable(device, false);
+
        return 0;
 }
 
 static int intel_hid_pl_resume_handler(struct device *device)
 {
        intel_hid_set_enable(device, 1);
+       intel_button_array_enable(device, true);
+
        return 0;
 }
 
@@ -126,6 +170,27 @@ err_free_device:
        return ret;
 }
 
+static int intel_button_array_input_setup(struct platform_device *device)
+{
+       struct intel_hid_priv *priv = dev_get_drvdata(&device->dev);
+       int ret;
+
+       /* Setup input device for 5 button array */
+       priv->array = devm_input_allocate_device(&device->dev);
+       if (!priv->array)
+               return -ENOMEM;
+
+       ret = sparse_keymap_setup(priv->array, intel_array_keymap, NULL);
+       if (ret)
+               return ret;
+
+       priv->array->dev.parent = &device->dev;
+       priv->array->name = "Intel HID 5 button array";
+       priv->array->id.bustype = BUS_HOST;
+
+       return input_register_device(priv->array);
+}
+
 static void intel_hid_input_destroy(struct platform_device *device)
 {
        struct intel_hid_priv *priv = dev_get_drvdata(&device->dev);
@@ -140,10 +205,11 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
        unsigned long long ev_index;
        acpi_status status;
 
-       /* The platform spec only defines one event code: 0xC0. */
+       /* 0xC0 is for HID events, other values are for 5 button array */
        if (event != 0xc0) {
-               dev_warn(&device->dev, "received unknown event (0x%x)\n",
-                        event);
+               if (!priv->array ||
+                   !sparse_keymap_report_event(priv->array, event, 1, true))
+                       dev_info(&device->dev, "unknown event 0x%x\n", event);
                return;
        }
 
@@ -161,8 +227,8 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
 static int intel_hid_probe(struct platform_device *device)
 {
        acpi_handle handle = ACPI_HANDLE(&device->dev);
+       unsigned long long event_cap, mode;
        struct intel_hid_priv *priv;
-       unsigned long long mode;
        acpi_status status;
        int err;
 
@@ -193,6 +259,15 @@ static int intel_hid_probe(struct platform_device *device)
                return err;
        }
 
+       /* Setup 5 button array */
+       status = acpi_evaluate_integer(handle, "HEBC", NULL, &event_cap);
+       if (ACPI_SUCCESS(status) && (event_cap & 0x20000)) {
+               dev_info(&device->dev, "platform supports 5 button array\n");
+               err = intel_button_array_input_setup(device);
+               if (err)
+                       pr_err("Failed to setup Intel 5 button array hotkeys\n");
+       }
+
        status = acpi_install_notify_handler(handle,
                                             ACPI_DEVICE_NOTIFY,
                                             notify_handler,
@@ -206,6 +281,16 @@ static int intel_hid_probe(struct platform_device *device)
        if (err)
                goto err_remove_notify;
 
+       if (priv->array) {
+               intel_button_array_enable(&device->dev, true);
+
+               /* Call button load method to enable HID power button */
+               status = acpi_evaluate_object(handle, "BTNL", NULL, NULL);
+               if (ACPI_FAILURE(status))
+                       dev_warn(&device->dev,
+                                "failed to enable HID power button\n");
+       }
+
        return 0;
 
 err_remove_notify:
@@ -224,6 +309,7 @@ static int intel_hid_remove(struct platform_device *device)
        acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
        intel_hid_input_destroy(device);
        intel_hid_set_enable(&device->dev, 0);
+       intel_button_array_enable(&device->dev, false);
 
        /*
         * Even if we failed to shut off the event stream, we can still
index 361770568ad03a6e7a3bc7e6d579ccacae1725b7..871cfa682519c2256126edae36dffefc9537ed8b 100644 (file)
@@ -1,7 +1,10 @@
 /*
- * Power button driver for Medfield.
+ * Power button driver for Intel MID platforms.
  *
- * Copyright (C) 2010 Intel Corp
+ * Copyright (C) 2010,2017 Intel Corp
+ *
+ * Author: Hong Liu <hong.liu@intel.com>
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  */
 
-#include <linux/module.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/platform_device.h>
 #include <linux/input.h>
+#include <linux/interrupt.h>
 #include <linux/mfd/intel_msic.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
+#include <linux/slab.h>
+
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/intel_scu_ipc.h>
 
 #define DRIVER_NAME "msic_power_btn"
 
  */
 #define MSIC_PWRBTNM    (1 << 0)
 
-static irqreturn_t mfld_pb_isr(int irq, void *dev_id)
+/* Intel Tangier */
+#define BCOVE_PB_LEVEL         (1 << 4)        /* 1 - release, 0 - press */
+
+/* Basin Cove PMIC */
+#define BCOVE_PBIRQ            0x02
+#define BCOVE_IRQLVL1MSK       0x0c
+#define BCOVE_PBIRQMASK                0x0d
+#define BCOVE_PBSTATUS         0x27
+
+struct mid_pb_ddata {
+       struct device *dev;
+       int irq;
+       struct input_dev *input;
+       unsigned short mirqlvl1_addr;
+       unsigned short pbstat_addr;
+       u8 pbstat_mask;
+       int (*setup)(struct mid_pb_ddata *ddata);
+};
+
+static int mid_pbstat(struct mid_pb_ddata *ddata, int *value)
 {
-       struct input_dev *input = dev_id;
+       struct input_dev *input = ddata->input;
        int ret;
        u8 pbstat;
 
-       ret = intel_msic_reg_read(INTEL_MSIC_PBSTATUS, &pbstat);
+       ret = intel_scu_ipc_ioread8(ddata->pbstat_addr, &pbstat);
+       if (ret)
+               return ret;
+
        dev_dbg(input->dev.parent, "PB_INT status= %d\n", pbstat);
 
+       *value = !(pbstat & ddata->pbstat_mask);
+       return 0;
+}
+
+static int mid_irq_ack(struct mid_pb_ddata *ddata)
+{
+       return intel_scu_ipc_update_register(ddata->mirqlvl1_addr, 0, MSIC_PWRBTNM);
+}
+
+static int mrfld_setup(struct mid_pb_ddata *ddata)
+{
+       /* Unmask the PBIRQ and MPBIRQ on Tangier */
+       intel_scu_ipc_update_register(BCOVE_PBIRQ, 0, MSIC_PWRBTNM);
+       intel_scu_ipc_update_register(BCOVE_PBIRQMASK, 0, MSIC_PWRBTNM);
+
+       return 0;
+}
+
+static irqreturn_t mid_pb_isr(int irq, void *dev_id)
+{
+       struct mid_pb_ddata *ddata = dev_id;
+       struct input_dev *input = ddata->input;
+       int value = 0;
+       int ret;
+
+       ret = mid_pbstat(ddata, &value);
        if (ret < 0) {
-               dev_err(input->dev.parent, "Read error %d while reading"
-                              " MSIC_PB_STATUS\n", ret);
+               dev_err(input->dev.parent,
+                       "Read error %d while reading MSIC_PB_STATUS\n", ret);
        } else {
-               input_event(input, EV_KEY, KEY_POWER,
-                              !(pbstat & MSIC_PB_LEVEL));
+               input_event(input, EV_KEY, KEY_POWER, value);
                input_sync(input);
        }
 
+       mid_irq_ack(ddata);
        return IRQ_HANDLED;
 }
 
-static int mfld_pb_probe(struct platform_device *pdev)
+static struct mid_pb_ddata mfld_ddata = {
+       .mirqlvl1_addr  = INTEL_MSIC_IRQLVL1MSK,
+       .pbstat_addr    = INTEL_MSIC_PBSTATUS,
+       .pbstat_mask    = MSIC_PB_LEVEL,
+};
+
+static struct mid_pb_ddata mrfld_ddata = {
+       .mirqlvl1_addr  = BCOVE_IRQLVL1MSK,
+       .pbstat_addr    = BCOVE_PBSTATUS,
+       .pbstat_mask    = BCOVE_PB_LEVEL,
+       .setup  = mrfld_setup,
+};
+
+#define ICPU(model, ddata)     \
+       { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
+
+static const struct x86_cpu_id mid_pb_cpu_ids[] = {
+       ICPU(INTEL_FAM6_ATOM_PENWELL,           mfld_ddata),
+       ICPU(INTEL_FAM6_ATOM_MERRIFIELD,        mrfld_ddata),
+       {}
+};
+
+static int mid_pb_probe(struct platform_device *pdev)
 {
+       const struct x86_cpu_id *id;
+       struct mid_pb_ddata *ddata;
        struct input_dev *input;
        int irq = platform_get_irq(pdev, 0);
        int error;
 
+       id = x86_match_cpu(mid_pb_cpu_ids);
+       if (!id)
+               return -ENODEV;
+
        if (irq < 0)
                return -EINVAL;
 
-       input = input_allocate_device();
+       input = devm_input_allocate_device(&pdev->dev);
        if (!input)
                return -ENOMEM;
 
@@ -77,25 +156,36 @@ static int mfld_pb_probe(struct platform_device *pdev)
 
        input_set_capability(input, EV_KEY, KEY_POWER);
 
-       error = request_threaded_irq(irq, NULL, mfld_pb_isr, IRQF_ONESHOT,
-                                    DRIVER_NAME, input);
-       if (error) {
-               dev_err(&pdev->dev, "Unable to request irq %d for mfld power"
-                               "button\n", irq);
-               goto err_free_input;
+       ddata = (struct mid_pb_ddata *)id->driver_data;
+       if (!ddata)
+               return -ENODATA;
+
+       ddata->dev = &pdev->dev;
+       ddata->irq = irq;
+       ddata->input = input;
+
+       if (ddata->setup) {
+               error = ddata->setup(ddata);
+               if (error)
+                       return error;
        }
 
-       device_init_wakeup(&pdev->dev, true);
-       dev_pm_set_wake_irq(&pdev->dev, irq);
+       error = devm_request_threaded_irq(&pdev->dev, irq, NULL, mid_pb_isr,
+                                         IRQF_ONESHOT, DRIVER_NAME, ddata);
+       if (error) {
+               dev_err(&pdev->dev,
+                       "Unable to request irq %d for MID power button\n", irq);
+               return error;
+       }
 
        error = input_register_device(input);
        if (error) {
-               dev_err(&pdev->dev, "Unable to register input dev, error "
-                               "%d\n", error);
-               goto err_free_irq;
+               dev_err(&pdev->dev,
+                       "Unable to register input dev, error %d\n", error);
+               return error;
        }
 
-       platform_set_drvdata(pdev, input);
+       platform_set_drvdata(pdev, ddata);
 
        /*
         * SCU firmware might send power button interrupts to IA core before
@@ -107,46 +197,39 @@ static int mfld_pb_probe(struct platform_device *pdev)
         * initialization. The race happens rarely. So we needn't worry
         * about it.
         */
-       error = intel_msic_reg_update(INTEL_MSIC_IRQLVL1MSK, 0, MSIC_PWRBTNM);
+       error = mid_irq_ack(ddata);
        if (error) {
-               dev_err(&pdev->dev, "Unable to clear power button interrupt, "
-                               "error: %d\n", error);
-               goto err_free_irq;
+               dev_err(&pdev->dev,
+                       "Unable to clear power button interrupt, error: %d\n",
+                       error);
+               return error;
        }
 
-       return 0;
+       device_init_wakeup(&pdev->dev, true);
+       dev_pm_set_wake_irq(&pdev->dev, irq);
 
-err_free_irq:
-       free_irq(irq, input);
-err_free_input:
-       input_free_device(input);
-       return error;
+       return 0;
 }
 
-static int mfld_pb_remove(struct platform_device *pdev)
+static int mid_pb_remove(struct platform_device *pdev)
 {
-       struct input_dev *input = platform_get_drvdata(pdev);
-       int irq = platform_get_irq(pdev, 0);
-
        dev_pm_clear_wake_irq(&pdev->dev);
        device_init_wakeup(&pdev->dev, false);
-       free_irq(irq, input);
-       input_unregister_device(input);
 
        return 0;
 }
 
-static struct platform_driver mfld_pb_driver = {
+static struct platform_driver mid_pb_driver = {
        .driver = {
                .name = DRIVER_NAME,
        },
-       .probe  = mfld_pb_probe,
-       .remove = mfld_pb_remove,
+       .probe  = mid_pb_probe,
+       .remove = mid_pb_remove,
 };
 
-module_platform_driver(mfld_pb_driver);
+module_platform_driver(mid_pb_driver);
 
 MODULE_AUTHOR("Hong Liu <hong.liu@intel.com>");
-MODULE_DESCRIPTION("Intel Medfield Power Button Driver");
+MODULE_DESCRIPTION("Intel MID Power Button Driver");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:" DRIVER_NAME);
index 0df3c9d375096e77234aba0048fdd59366b12696..008a76903cbfb62221ac4fd4bb8aaa7b46c56fb1 100644 (file)
@@ -549,9 +549,9 @@ static int mid_thermal_remove(struct platform_device *pdev)
 
 static const struct platform_device_id therm_id_table[] = {
        { DRIVER_NAME, 1 },
-       { "msic_thermal", 1 },
        { }
 };
+MODULE_DEVICE_TABLE(platform, therm_id_table);
 
 static struct platform_driver mid_thermal_driver = {
        .driver = {
index b130b8c9b9d7b429e9dbb0b17926285337ab1d78..914bcd2edbde6b172ab47dfbec1ddbb2f93880e1 100644 (file)
@@ -188,8 +188,7 @@ static int pmc_core_check_read_lock_bit(void)
        u32 value;
 
        value = pmc_core_reg_read(pmcdev, SPT_PMC_PM_CFG_OFFSET);
-       return test_bit(SPT_PMC_READ_DISABLE_BIT,
-                       (unsigned long *)&value);
+       return value & BIT(SPT_PMC_READ_DISABLE_BIT);
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -238,8 +237,7 @@ static int pmc_core_mtpmc_link_status(void)
        u32 value;
 
        value = pmc_core_reg_read(pmcdev, SPT_PMC_PM_STS_OFFSET);
-       return test_bit(SPT_PMC_MSG_FULL_STS_BIT,
-                       (unsigned long *)&value);
+       return value & BIT(SPT_PMC_MSG_FULL_STS_BIT);
 }
 
 static int pmc_core_send_msg(u32 *addr_xram)
index 0bf51d574fa9a47ada5e6e0b3c98d6cb4d710581..0651d47b8eeb7e59aa371ff61388a1df79b25ce0 100644 (file)
 #include <linux/notifier.h>
 #include <linux/suspend.h>
 #include <linux/acpi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+
 #include <asm/intel_pmc_ipc.h>
+
 #include <linux/platform_data/itco_wdt.h>
 
 /*
 #define IPC_WRITE_BUFFER       0x80
 #define IPC_READ_BUFFER                0x90
 
+/* PMC Global Control Registers */
+#define GCR_TELEM_DEEP_S0IX_OFFSET     0x1078
+#define GCR_TELEM_SHLW_S0IX_OFFSET     0x1080
+
+/* Residency with clock rate at 19.2MHz to usecs */
+#define S0IX_RESIDENCY_IN_USECS(d, s)          \
+({                                             \
+       u64 result = 10ull * ((d) + (s));       \
+       do_div(result, 192);                    \
+       result;                                 \
+})
+
 /*
  * 16-byte buffer for sending data associated with IPC command.
  */
@@ -68,7 +83,7 @@
 #define PLAT_RESOURCE_IPC_INDEX                0
 #define PLAT_RESOURCE_IPC_SIZE         0x1000
 #define PLAT_RESOURCE_GCR_OFFSET       0x1008
-#define PLAT_RESOURCE_GCR_SIZE         0x4
+#define PLAT_RESOURCE_GCR_SIZE         0x1000
 #define PLAT_RESOURCE_BIOS_DATA_INDEX  1
 #define PLAT_RESOURCE_BIOS_IFACE_INDEX 2
 #define PLAT_RESOURCE_TELEM_SSRAM_INDEX        3
 #define TCO_PMC_OFFSET                 0x8
 #define TCO_PMC_SIZE                   0x4
 
-static const int iTCO_version = 3;
-
 static struct intel_pmc_ipc_dev {
        struct device *dev;
        void __iomem *ipc_base;
@@ -115,6 +128,7 @@ static struct intel_pmc_ipc_dev {
        /* gcr */
        resource_size_t gcr_base;
        int gcr_size;
+       bool has_gcr_regs;
 
        /* punit */
        struct platform_device *punit_dev;
@@ -180,6 +194,11 @@ static inline u32 ipc_data_readl(u32 offset)
        return readl(ipcdev.ipc_base + IPC_READ_BUFFER + offset);
 }
 
+static inline u64 gcr_data_readq(u32 offset)
+{
+       return readq(ipcdev.ipc_base + offset);
+}
+
 static int intel_pmc_ipc_check_status(void)
 {
        int status;
@@ -389,6 +408,7 @@ static void ipc_pci_remove(struct pci_dev *pdev)
 static const struct pci_device_id ipc_pci_ids[] = {
        {PCI_VDEVICE(INTEL, 0x0a94), 0},
        {PCI_VDEVICE(INTEL, 0x1a94), 0},
+       {PCI_VDEVICE(INTEL, 0x5a94), 0},
        { 0,}
 };
 MODULE_DEVICE_TABLE(pci, ipc_pci_ids);
@@ -712,7 +732,8 @@ static int ipc_plat_get_res(struct platform_device *pdev)
                dev_err(&pdev->dev, "Failed to get ipc resource\n");
                return -ENXIO;
        }
-       size = PLAT_RESOURCE_IPC_SIZE;
+       size = PLAT_RESOURCE_IPC_SIZE + PLAT_RESOURCE_GCR_SIZE;
+
        if (!request_mem_region(res->start, size, pdev->name)) {
                dev_err(&pdev->dev, "Failed to request ipc resource\n");
                return -EBUSY;
@@ -748,6 +769,28 @@ static int ipc_plat_get_res(struct platform_device *pdev)
        return 0;
 }
 
+/**
+ * intel_pmc_s0ix_counter_read() - Read S0ix residency.
+ * @data: Out param that contains current S0ix residency count.
+ *
+ * Return: an error code or 0 on success.
+ */
+int intel_pmc_s0ix_counter_read(u64 *data)
+{
+       u64 deep, shlw;
+
+       if (!ipcdev.has_gcr_regs)
+               return -EACCES;
+
+       deep = gcr_data_readq(GCR_TELEM_DEEP_S0IX_OFFSET);
+       shlw = gcr_data_readq(GCR_TELEM_SHLW_S0IX_OFFSET);
+
+       *data = S0IX_RESIDENCY_IN_USECS(deep, shlw);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(intel_pmc_s0ix_counter_read);
+
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id ipc_acpi_ids[] = {
        { "INT34D2", 0},
@@ -797,6 +840,8 @@ static int ipc_plat_probe(struct platform_device *pdev)
                goto err_sys;
        }
 
+       ipcdev.has_gcr_regs = true;
+
        return 0;
 err_sys:
        free_irq(ipcdev.irq, &ipcdev);
@@ -808,8 +853,11 @@ err_device:
        iounmap(ipcdev.ipc_base);
        res = platform_get_resource(pdev, IORESOURCE_MEM,
                                    PLAT_RESOURCE_IPC_INDEX);
-       if (res)
-               release_mem_region(res->start, PLAT_RESOURCE_IPC_SIZE);
+       if (res) {
+               release_mem_region(res->start,
+                                  PLAT_RESOURCE_IPC_SIZE +
+                                  PLAT_RESOURCE_GCR_SIZE);
+       }
        return ret;
 }
 
@@ -825,8 +873,11 @@ static int ipc_plat_remove(struct platform_device *pdev)
        iounmap(ipcdev.ipc_base);
        res = platform_get_resource(pdev, IORESOURCE_MEM,
                                    PLAT_RESOURCE_IPC_INDEX);
-       if (res)
-               release_mem_region(res->start, PLAT_RESOURCE_IPC_SIZE);
+       if (res) {
+               release_mem_region(res->start,
+                                  PLAT_RESOURCE_IPC_SIZE +
+                                  PLAT_RESOURCE_GCR_SIZE);
+       }
        ipcdev.dev = NULL;
        return 0;
 }
diff --git a/drivers/platform/x86/intel_turbo_max_3.c b/drivers/platform/x86/intel_turbo_max_3.c
new file mode 100644 (file)
index 0000000..4f60d8e
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Intel Turbo Boost Max Technology 3.0 legacy (non HWP) enumeration driver
+ * Copyright (c) 2017, Intel Corporation.
+ * All rights reserved.
+ *
+ * Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/topology.h>
+#include <linux/workqueue.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpufeature.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+
+#define MSR_OC_MAILBOX                 0x150
+#define MSR_OC_MAILBOX_CMD_OFFSET      32
+#define MSR_OC_MAILBOX_RSP_OFFSET      32
+#define MSR_OC_MAILBOX_BUSY_BIT                63
+#define OC_MAILBOX_FC_CONTROL_CMD      0x1C
+
+/*
+ * Typical latency to get mail box response is ~3us, It takes +3 us to
+ * process reading mailbox after issuing mailbox write on a Broadwell 3.4 GHz
+ * system. So for most of the time, the first mailbox read should have the
+ * response, but to avoid some boundary cases retry twice.
+ */
+#define OC_MAILBOX_RETRY_COUNT         2
+
+static int get_oc_core_priority(unsigned int cpu)
+{
+       u64 value, cmd = OC_MAILBOX_FC_CONTROL_CMD;
+       int ret, i;
+
+       /* Issue favored core read command */
+       value = cmd << MSR_OC_MAILBOX_CMD_OFFSET;
+       /* Set the busy bit to indicate OS is trying to issue command */
+       value |=  BIT_ULL(MSR_OC_MAILBOX_BUSY_BIT);
+       ret = wrmsrl_safe(MSR_OC_MAILBOX, value);
+       if (ret) {
+               pr_debug("cpu %d OC mailbox write failed\n", cpu);
+               return ret;
+       }
+
+       for (i = 0; i < OC_MAILBOX_RETRY_COUNT; ++i) {
+               ret = rdmsrl_safe(MSR_OC_MAILBOX, &value);
+               if (ret) {
+                       pr_debug("cpu %d OC mailbox read failed\n", cpu);
+                       break;
+               }
+
+               if (value & BIT_ULL(MSR_OC_MAILBOX_BUSY_BIT)) {
+                       pr_debug("cpu %d OC mailbox still processing\n", cpu);
+                       ret = -EBUSY;
+                       continue;
+               }
+
+               if ((value >> MSR_OC_MAILBOX_RSP_OFFSET) & 0xff) {
+                       pr_debug("cpu %d OC mailbox cmd failed\n", cpu);
+                       ret = -ENXIO;
+                       break;
+               }
+
+               ret = value & 0xff;
+               pr_debug("cpu %d max_ratio %d\n", cpu, ret);
+               break;
+       }
+
+       return ret;
+}
+
+/*
+ * The work item is needed to avoid CPU hotplug locking issues. The function
+ * itmt_legacy_set_priority() is called from CPU online callback, so can't
+ * call sched_set_itmt_support() from there as this function will aquire
+ * hotplug locks in its path.
+ */
+static void itmt_legacy_work_fn(struct work_struct *work)
+{
+       sched_set_itmt_support();
+}
+
+static DECLARE_WORK(sched_itmt_work, itmt_legacy_work_fn);
+
+static int itmt_legacy_cpu_online(unsigned int cpu)
+{
+       static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
+       int priority;
+
+       priority = get_oc_core_priority(cpu);
+       if (priority < 0)
+               return 0;
+
+       sched_set_itmt_core_prio(priority, cpu);
+
+       /* Enable ITMT feature when a core with different priority is found */
+       if (max_highest_perf <= min_highest_perf) {
+               if (priority > max_highest_perf)
+                       max_highest_perf = priority;
+
+               if (priority < min_highest_perf)
+                       min_highest_perf = priority;
+
+               if (max_highest_perf > min_highest_perf)
+                       schedule_work(&sched_itmt_work);
+       }
+
+       return 0;
+}
+
+#define ICPU(model)     { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
+
+static const struct x86_cpu_id itmt_legacy_cpu_ids[] = {
+       ICPU(INTEL_FAM6_BROADWELL_X),
+       {}
+};
+
+static int __init itmt_legacy_init(void)
+{
+       const struct x86_cpu_id *id;
+       int ret;
+
+       id = x86_match_cpu(itmt_legacy_cpu_ids);
+       if (!id)
+               return -ENODEV;
+
+       if (boot_cpu_has(X86_FEATURE_HWP))
+               return -ENODEV;
+
+       ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+                               "platform/x86/turbo_max_3:online",
+                               itmt_legacy_cpu_online, NULL);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+late_initcall(itmt_legacy_init)
index 25f15df5c2d7b3c37b82e099f301831c403caa7d..8f98c211b440c0a7658d54980daa5fc46a195da3 100644 (file)
 /* LPC bus IO offsets */
 #define MLXPLAT_CPLD_LPC_I2C_BASE_ADRR         0x2000
 #define MLXPLAT_CPLD_LPC_REG_BASE_ADRR         0x2500
+#define MLXPLAT_CPLD_LPC_REG_AGGR_ADRR         0x253a
+#define MLXPLAT_CPLD_LPC_REG_PSU_ADRR          0x2558
+#define MLXPLAT_CPLD_LPC_REG_PWR_ADRR          0x2564
+#define MLXPLAT_CPLD_LPC_REG_FAN_ADRR          0x2588
 #define MLXPLAT_CPLD_LPC_IO_RANGE              0x100
 #define MLXPLAT_CPLD_LPC_I2C_CH1_OFF           0xdb
 #define MLXPLAT_CPLD_LPC_I2C_CH2_OFF           0xda
                                  MLXPLAT_CPLD_LPC_I2C_CH2_OFF) | \
                                  MLXPLAT_CPLD_LPC_PIO_OFFSET)
 
+/* Masks for aggregation, psu, pwr and fan event in CPLD related registers. */
+#define MLXPLAT_CPLD_AGGR_PSU_MASK_DEF 0x08
+#define MLXPLAT_CPLD_AGGR_PWR_MASK_DEF 0x08
+#define MLXPLAT_CPLD_AGGR_FAN_MASK_DEF 0x40
+#define MLXPLAT_CPLD_AGGR_MASK_DEF     (MLXPLAT_CPLD_AGGR_PSU_MASK_DEF | \
+                                        MLXPLAT_CPLD_AGGR_FAN_MASK_DEF)
+#define MLXPLAT_CPLD_AGGR_MASK_MSN21XX 0x04
+#define MLXPLAT_CPLD_PSU_MASK          GENMASK(1, 0)
+#define MLXPLAT_CPLD_PWR_MASK          GENMASK(1, 0)
+#define MLXPLAT_CPLD_FAN_MASK          GENMASK(3, 0)
+
 /* Start channel numbers */
 #define MLXPLAT_CPLD_CH1                       2
 #define MLXPLAT_CPLD_CH2                       10
@@ -123,7 +138,7 @@ static struct i2c_mux_reg_platform_data mlxplat_mux_data[] = {
 };
 
 /* Platform hotplug devices */
-static struct mlxcpld_hotplug_device mlxplat_mlxcpld_hotplug_psu[] = {
+static struct mlxcpld_hotplug_device mlxplat_mlxcpld_psu[] = {
        {
                .brdinfo = { I2C_BOARD_INFO("24c02", 0x51) },
                .bus = 10,
@@ -134,7 +149,7 @@ static struct mlxcpld_hotplug_device mlxplat_mlxcpld_hotplug_psu[] = {
        },
 };
 
-static struct mlxcpld_hotplug_device mlxplat_mlxcpld_hotplug_pwr[] = {
+static struct mlxcpld_hotplug_device mlxplat_mlxcpld_pwr[] = {
        {
                .brdinfo = { I2C_BOARD_INFO("dps460", 0x59) },
                .bus = 10,
@@ -145,7 +160,7 @@ static struct mlxcpld_hotplug_device mlxplat_mlxcpld_hotplug_pwr[] = {
        },
 };
 
-static struct mlxcpld_hotplug_device mlxplat_mlxcpld_hotplug_fan[] = {
+static struct mlxcpld_hotplug_device mlxplat_mlxcpld_fan[] = {
        {
                .brdinfo = { I2C_BOARD_INFO("24c32", 0x50) },
                .bus = 11,
@@ -166,38 +181,38 @@ static struct mlxcpld_hotplug_device mlxplat_mlxcpld_hotplug_fan[] = {
 
 /* Platform hotplug default data */
 static
-struct mlxcpld_hotplug_platform_data mlxplat_mlxcpld_hotplug_default_data = {
-       .top_aggr_offset = (MLXPLAT_CPLD_LPC_REG_BASE_ADRR | 0x3a),
-       .top_aggr_mask = 0x48,
-       .top_aggr_psu_mask = 0x08,
-       .psu_reg_offset = (MLXPLAT_CPLD_LPC_REG_BASE_ADRR | 0x58),
-       .psu_mask = 0x03,
-       .psu_count = ARRAY_SIZE(mlxplat_mlxcpld_hotplug_psu),
-       .psu = mlxplat_mlxcpld_hotplug_psu,
-       .top_aggr_pwr_mask = 0x08,
-       .pwr_reg_offset = (MLXPLAT_CPLD_LPC_REG_BASE_ADRR | 0x64),
-       .pwr_mask = 0x03,
-       .pwr_count = ARRAY_SIZE(mlxplat_mlxcpld_hotplug_pwr),
-       .pwr = mlxplat_mlxcpld_hotplug_pwr,
-       .top_aggr_fan_mask = 0x40,
-       .fan_reg_offset = (MLXPLAT_CPLD_LPC_REG_BASE_ADRR | 0x88),
-       .fan_mask = 0x0f,
-       .fan_count = ARRAY_SIZE(mlxplat_mlxcpld_hotplug_fan),
-       .fan = mlxplat_mlxcpld_hotplug_fan,
+struct mlxcpld_hotplug_platform_data mlxplat_mlxcpld_default_data = {
+       .top_aggr_offset = MLXPLAT_CPLD_LPC_REG_AGGR_ADRR,
+       .top_aggr_mask = MLXPLAT_CPLD_AGGR_MASK_DEF,
+       .top_aggr_psu_mask = MLXPLAT_CPLD_AGGR_PSU_MASK_DEF,
+       .psu_reg_offset = MLXPLAT_CPLD_LPC_REG_PSU_ADRR,
+       .psu_mask = MLXPLAT_CPLD_PSU_MASK,
+       .psu_count = ARRAY_SIZE(mlxplat_mlxcpld_psu),
+       .psu = mlxplat_mlxcpld_psu,
+       .top_aggr_pwr_mask = MLXPLAT_CPLD_AGGR_PWR_MASK_DEF,
+       .pwr_reg_offset = MLXPLAT_CPLD_LPC_REG_PWR_ADRR,
+       .pwr_mask = MLXPLAT_CPLD_PWR_MASK,
+       .pwr_count = ARRAY_SIZE(mlxplat_mlxcpld_pwr),
+       .pwr = mlxplat_mlxcpld_pwr,
+       .top_aggr_fan_mask = MLXPLAT_CPLD_AGGR_FAN_MASK_DEF,
+       .fan_reg_offset = MLXPLAT_CPLD_LPC_REG_FAN_ADRR,
+       .fan_mask = MLXPLAT_CPLD_FAN_MASK,
+       .fan_count = ARRAY_SIZE(mlxplat_mlxcpld_fan),
+       .fan = mlxplat_mlxcpld_fan,
 };
 
 /* Platform hotplug MSN21xx system family data */
 static
-struct mlxcpld_hotplug_platform_data mlxplat_mlxcpld_hotplug_msn21xx_data = {
-       .top_aggr_offset = (MLXPLAT_CPLD_LPC_REG_BASE_ADRR | 0x3a),
-       .top_aggr_mask = 0x04,
-       .top_aggr_pwr_mask = 0x04,
-       .pwr_reg_offset = (MLXPLAT_CPLD_LPC_REG_BASE_ADRR | 0x64),
-       .pwr_mask = 0x03,
-       .pwr_count = ARRAY_SIZE(mlxplat_mlxcpld_hotplug_pwr),
+struct mlxcpld_hotplug_platform_data mlxplat_mlxcpld_msn21xx_data = {
+       .top_aggr_offset = MLXPLAT_CPLD_LPC_REG_AGGR_ADRR,
+       .top_aggr_mask = MLXPLAT_CPLD_AGGR_MASK_MSN21XX,
+       .top_aggr_pwr_mask = MLXPLAT_CPLD_AGGR_MASK_MSN21XX,
+       .pwr_reg_offset = MLXPLAT_CPLD_LPC_REG_PWR_ADRR,
+       .pwr_mask = MLXPLAT_CPLD_PWR_MASK,
+       .pwr_count = ARRAY_SIZE(mlxplat_mlxcpld_pwr),
 };
 
-static struct resource mlxplat_mlxcpld_hotplug_resources[] = {
+static struct resource mlxplat_mlxcpld_resources[] = {
        [0] = DEFINE_RES_IRQ_NAMED(17, "mlxcpld-hotplug"),
 };
 
@@ -213,7 +228,7 @@ static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi)
                mlxplat_mux_data[i].n_values =
                                ARRAY_SIZE(mlxplat_default_channels[i]);
        }
-       mlxplat_hotplug = &mlxplat_mlxcpld_hotplug_default_data;
+       mlxplat_hotplug = &mlxplat_mlxcpld_default_data;
 
        return 1;
 };
@@ -227,7 +242,7 @@ static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi)
                mlxplat_mux_data[i].n_values =
                                ARRAY_SIZE(mlxplat_msn21xx_channels);
        }
-       mlxplat_hotplug = &mlxplat_mlxcpld_hotplug_msn21xx_data;
+       mlxplat_hotplug = &mlxplat_mlxcpld_msn21xx_data;
 
        return 1;
 };
@@ -314,9 +329,10 @@ static int __init mlxplat_init(void)
        }
 
        priv->pdev_hotplug = platform_device_register_resndata(
-                               &mlxplat_dev->dev, "mlxcpld-hotplug", -1,
-                               mlxplat_mlxcpld_hotplug_resources,
-                               ARRAY_SIZE(mlxplat_mlxcpld_hotplug_resources),
+                               &mlxplat_dev->dev, "mlxcpld-hotplug",
+                               PLATFORM_DEVID_NONE,
+                               mlxplat_mlxcpld_resources,
+                               ARRAY_SIZE(mlxplat_mlxcpld_resources),
                                mlxplat_hotplug, sizeof(*mlxplat_hotplug));
        if (IS_ERR(priv->pdev_hotplug)) {
                err = PTR_ERR(priv->pdev_hotplug);
diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c
new file mode 100644 (file)
index 0000000..77bac85
--- /dev/null
@@ -0,0 +1,532 @@
+/*
+ * Intel Atom SOC Power Management Controller Driver
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/platform_data/x86/clk-pmc-atom.h>
+#include <linux/platform_data/x86/pmc_atom.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+
+struct pmc_bit_map {
+       const char *name;
+       u32 bit_mask;
+};
+
+struct pmc_reg_map {
+       const struct pmc_bit_map *d3_sts_0;
+       const struct pmc_bit_map *d3_sts_1;
+       const struct pmc_bit_map *func_dis;
+       const struct pmc_bit_map *func_dis_2;
+       const struct pmc_bit_map *pss;
+};
+
+struct pmc_data {
+       const struct pmc_reg_map *map;
+       const struct pmc_clk *clks;
+};
+
+struct pmc_dev {
+       u32 base_addr;
+       void __iomem *regmap;
+       const struct pmc_reg_map *map;
+#ifdef CONFIG_DEBUG_FS
+       struct dentry *dbgfs_dir;
+#endif /* CONFIG_DEBUG_FS */
+       bool init;
+};
+
+static struct pmc_dev pmc_device;
+static u32 acpi_base_addr;
+
+static const struct pmc_clk byt_clks[] = {
+       {
+               .name = "xtal",
+               .freq = 25000000,
+               .parent_name = NULL,
+       },
+       {
+               .name = "pll",
+               .freq = 19200000,
+               .parent_name = "xtal",
+       },
+       {},
+};
+
+static const struct pmc_clk cht_clks[] = {
+       {
+               .name = "xtal",
+               .freq = 19200000,
+               .parent_name = NULL,
+       },
+       {},
+};
+
+static const struct pmc_bit_map d3_sts_0_map[] = {
+       {"LPSS1_F0_DMA",        BIT_LPSS1_F0_DMA},
+       {"LPSS1_F1_PWM1",       BIT_LPSS1_F1_PWM1},
+       {"LPSS1_F2_PWM2",       BIT_LPSS1_F2_PWM2},
+       {"LPSS1_F3_HSUART1",    BIT_LPSS1_F3_HSUART1},
+       {"LPSS1_F4_HSUART2",    BIT_LPSS1_F4_HSUART2},
+       {"LPSS1_F5_SPI",        BIT_LPSS1_F5_SPI},
+       {"LPSS1_F6_Reserved",   BIT_LPSS1_F6_XXX},
+       {"LPSS1_F7_Reserved",   BIT_LPSS1_F7_XXX},
+       {"SCC_EMMC",            BIT_SCC_EMMC},
+       {"SCC_SDIO",            BIT_SCC_SDIO},
+       {"SCC_SDCARD",          BIT_SCC_SDCARD},
+       {"SCC_MIPI",            BIT_SCC_MIPI},
+       {"HDA",                 BIT_HDA},
+       {"LPE",                 BIT_LPE},
+       {"OTG",                 BIT_OTG},
+       {"USH",                 BIT_USH},
+       {"GBE",                 BIT_GBE},
+       {"SATA",                BIT_SATA},
+       {"USB_EHCI",            BIT_USB_EHCI},
+       {"SEC",                 BIT_SEC},
+       {"PCIE_PORT0",          BIT_PCIE_PORT0},
+       {"PCIE_PORT1",          BIT_PCIE_PORT1},
+       {"PCIE_PORT2",          BIT_PCIE_PORT2},
+       {"PCIE_PORT3",          BIT_PCIE_PORT3},
+       {"LPSS2_F0_DMA",        BIT_LPSS2_F0_DMA},
+       {"LPSS2_F1_I2C1",       BIT_LPSS2_F1_I2C1},
+       {"LPSS2_F2_I2C2",       BIT_LPSS2_F2_I2C2},
+       {"LPSS2_F3_I2C3",       BIT_LPSS2_F3_I2C3},
+       {"LPSS2_F3_I2C4",       BIT_LPSS2_F4_I2C4},
+       {"LPSS2_F5_I2C5",       BIT_LPSS2_F5_I2C5},
+       {"LPSS2_F6_I2C6",       BIT_LPSS2_F6_I2C6},
+       {"LPSS2_F7_I2C7",       BIT_LPSS2_F7_I2C7},
+       {},
+};
+
+static struct pmc_bit_map byt_d3_sts_1_map[] = {
+       {"SMB",                 BIT_SMB},
+       {"OTG_SS_PHY",          BIT_OTG_SS_PHY},
+       {"USH_SS_PHY",          BIT_USH_SS_PHY},
+       {"DFX",                 BIT_DFX},
+       {},
+};
+
+static struct pmc_bit_map cht_d3_sts_1_map[] = {
+       {"SMB",                 BIT_SMB},
+       {"GMM",                 BIT_STS_GMM},
+       {"ISH",                 BIT_STS_ISH},
+       {},
+};
+
+static struct pmc_bit_map cht_func_dis_2_map[] = {
+       {"SMB",                 BIT_SMB},
+       {"GMM",                 BIT_FD_GMM},
+       {"ISH",                 BIT_FD_ISH},
+       {},
+};
+
+static const struct pmc_bit_map byt_pss_map[] = {
+       {"GBE",                 PMC_PSS_BIT_GBE},
+       {"SATA",                PMC_PSS_BIT_SATA},
+       {"HDA",                 PMC_PSS_BIT_HDA},
+       {"SEC",                 PMC_PSS_BIT_SEC},
+       {"PCIE",                PMC_PSS_BIT_PCIE},
+       {"LPSS",                PMC_PSS_BIT_LPSS},
+       {"LPE",                 PMC_PSS_BIT_LPE},
+       {"DFX",                 PMC_PSS_BIT_DFX},
+       {"USH_CTRL",            PMC_PSS_BIT_USH_CTRL},
+       {"USH_SUS",             PMC_PSS_BIT_USH_SUS},
+       {"USH_VCCS",            PMC_PSS_BIT_USH_VCCS},
+       {"USH_VCCA",            PMC_PSS_BIT_USH_VCCA},
+       {"OTG_CTRL",            PMC_PSS_BIT_OTG_CTRL},
+       {"OTG_VCCS",            PMC_PSS_BIT_OTG_VCCS},
+       {"OTG_VCCA_CLK",        PMC_PSS_BIT_OTG_VCCA_CLK},
+       {"OTG_VCCA",            PMC_PSS_BIT_OTG_VCCA},
+       {"USB",                 PMC_PSS_BIT_USB},
+       {"USB_SUS",             PMC_PSS_BIT_USB_SUS},
+       {},
+};
+
+static const struct pmc_bit_map cht_pss_map[] = {
+       {"SATA",                PMC_PSS_BIT_SATA},
+       {"HDA",                 PMC_PSS_BIT_HDA},
+       {"SEC",                 PMC_PSS_BIT_SEC},
+       {"PCIE",                PMC_PSS_BIT_PCIE},
+       {"LPSS",                PMC_PSS_BIT_LPSS},
+       {"LPE",                 PMC_PSS_BIT_LPE},
+       {"UFS",                 PMC_PSS_BIT_CHT_UFS},
+       {"UXD",                 PMC_PSS_BIT_CHT_UXD},
+       {"UXD_FD",              PMC_PSS_BIT_CHT_UXD_FD},
+       {"UX_ENG",              PMC_PSS_BIT_CHT_UX_ENG},
+       {"USB_SUS",             PMC_PSS_BIT_CHT_USB_SUS},
+       {"GMM",                 PMC_PSS_BIT_CHT_GMM},
+       {"ISH",                 PMC_PSS_BIT_CHT_ISH},
+       {"DFX_MASTER",          PMC_PSS_BIT_CHT_DFX_MASTER},
+       {"DFX_CLUSTER1",        PMC_PSS_BIT_CHT_DFX_CLUSTER1},
+       {"DFX_CLUSTER2",        PMC_PSS_BIT_CHT_DFX_CLUSTER2},
+       {"DFX_CLUSTER3",        PMC_PSS_BIT_CHT_DFX_CLUSTER3},
+       {"DFX_CLUSTER4",        PMC_PSS_BIT_CHT_DFX_CLUSTER4},
+       {"DFX_CLUSTER5",        PMC_PSS_BIT_CHT_DFX_CLUSTER5},
+       {},
+};
+
+static const struct pmc_reg_map byt_reg_map = {
+       .d3_sts_0       = d3_sts_0_map,
+       .d3_sts_1       = byt_d3_sts_1_map,
+       .func_dis       = d3_sts_0_map,
+       .func_dis_2     = byt_d3_sts_1_map,
+       .pss            = byt_pss_map,
+};
+
+static const struct pmc_reg_map cht_reg_map = {
+       .d3_sts_0       = d3_sts_0_map,
+       .d3_sts_1       = cht_d3_sts_1_map,
+       .func_dis       = d3_sts_0_map,
+       .func_dis_2     = cht_func_dis_2_map,
+       .pss            = cht_pss_map,
+};
+
+static const struct pmc_data byt_data = {
+       .map = &byt_reg_map,
+       .clks = byt_clks,
+};
+
+static const struct pmc_data cht_data = {
+       .map = &cht_reg_map,
+       .clks = cht_clks,
+};
+
+static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset)
+{
+       return readl(pmc->regmap + reg_offset);
+}
+
+static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val)
+{
+       writel(val, pmc->regmap + reg_offset);
+}
+
+int pmc_atom_read(int offset, u32 *value)
+{
+       struct pmc_dev *pmc = &pmc_device;
+
+       if (!pmc->init)
+               return -ENODEV;
+
+       *value = pmc_reg_read(pmc, offset);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pmc_atom_read);
+
+int pmc_atom_write(int offset, u32 value)
+{
+       struct pmc_dev *pmc = &pmc_device;
+
+       if (!pmc->init)
+               return -ENODEV;
+
+       pmc_reg_write(pmc, offset, value);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pmc_atom_write);
+
+static void pmc_power_off(void)
+{
+       u16     pm1_cnt_port;
+       u32     pm1_cnt_value;
+
+       pr_info("Preparing to enter system sleep state S5\n");
+
+       pm1_cnt_port = acpi_base_addr + PM1_CNT;
+
+       pm1_cnt_value = inl(pm1_cnt_port);
+       pm1_cnt_value &= SLEEP_TYPE_MASK;
+       pm1_cnt_value |= SLEEP_TYPE_S5;
+       pm1_cnt_value |= SLEEP_ENABLE;
+
+       outl(pm1_cnt_value, pm1_cnt_port);
+}
+
+static void pmc_hw_reg_setup(struct pmc_dev *pmc)
+{
+       /*
+        * Disable PMC S0IX_WAKE_EN events coming from:
+        * - LPC clock run
+        * - GPIO_SUS ored dedicated IRQs
+        * - GPIO_SCORE ored dedicated IRQs
+        * - GPIO_SUS shared IRQ
+        * - GPIO_SCORE shared IRQ
+        */
+       pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void pmc_dev_state_print(struct seq_file *s, int reg_index,
+                               u32 sts, const struct pmc_bit_map *sts_map,
+                               u32 fd, const struct pmc_bit_map *fd_map)
+{
+       int offset = PMC_REG_BIT_WIDTH * reg_index;
+       int index;
+
+       for (index = 0; sts_map[index].name; index++) {
+               seq_printf(s, "Dev: %-2d - %-32s\tState: %s [%s]\n",
+                       offset + index, sts_map[index].name,
+                       fd_map[index].bit_mask & fd ?  "Disabled" : "Enabled ",
+                       sts_map[index].bit_mask & sts ?  "D3" : "D0");
+       }
+}
+
+static int pmc_dev_state_show(struct seq_file *s, void *unused)
+{
+       struct pmc_dev *pmc = s->private;
+       const struct pmc_reg_map *m = pmc->map;
+       u32 func_dis, func_dis_2;
+       u32 d3_sts_0, d3_sts_1;
+
+       func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS);
+       func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2);
+       d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0);
+       d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1);
+
+       /* Low part */
+       pmc_dev_state_print(s, 0, d3_sts_0, m->d3_sts_0, func_dis, m->func_dis);
+
+       /* High part */
+       pmc_dev_state_print(s, 1, d3_sts_1, m->d3_sts_1, func_dis_2, m->func_dis_2);
+
+       return 0;
+}
+
+static int pmc_dev_state_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, pmc_dev_state_show, inode->i_private);
+}
+
+static const struct file_operations pmc_dev_state_ops = {
+       .open           = pmc_dev_state_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static int pmc_pss_state_show(struct seq_file *s, void *unused)
+{
+       struct pmc_dev *pmc = s->private;
+       const struct pmc_bit_map *map = pmc->map->pss;
+       u32 pss = pmc_reg_read(pmc, PMC_PSS);
+       int index;
+
+       for (index = 0; map[index].name; index++) {
+               seq_printf(s, "Island: %-2d - %-32s\tState: %s\n",
+                       index, map[index].name,
+                       map[index].bit_mask & pss ? "Off" : "On");
+       }
+       return 0;
+}
+
+static int pmc_pss_state_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, pmc_pss_state_show, inode->i_private);
+}
+
+static const struct file_operations pmc_pss_state_ops = {
+       .open           = pmc_pss_state_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static int pmc_sleep_tmr_show(struct seq_file *s, void *unused)
+{
+       struct pmc_dev *pmc = s->private;
+       u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr;
+
+       s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT;
+       s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT;
+       s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT;
+       s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT;
+       s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT;
+
+       seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr);
+       seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr);
+       seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr);
+       seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr);
+       seq_printf(s, "S0   Residency:\t%lldus\n", s0_tmr);
+       return 0;
+}
+
+static int pmc_sleep_tmr_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, pmc_sleep_tmr_show, inode->i_private);
+}
+
+static const struct file_operations pmc_sleep_tmr_ops = {
+       .open           = pmc_sleep_tmr_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static void pmc_dbgfs_unregister(struct pmc_dev *pmc)
+{
+       debugfs_remove_recursive(pmc->dbgfs_dir);
+}
+
+static int pmc_dbgfs_register(struct pmc_dev *pmc)
+{
+       struct dentry *dir, *f;
+
+       dir = debugfs_create_dir("pmc_atom", NULL);
+       if (!dir)
+               return -ENOMEM;
+
+       pmc->dbgfs_dir = dir;
+
+       f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO,
+                               dir, pmc, &pmc_dev_state_ops);
+       if (!f)
+               goto err;
+
+       f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO,
+                               dir, pmc, &pmc_pss_state_ops);
+       if (!f)
+               goto err;
+
+       f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO,
+                               dir, pmc, &pmc_sleep_tmr_ops);
+       if (!f)
+               goto err;
+
+       return 0;
+err:
+       pmc_dbgfs_unregister(pmc);
+       return -ENODEV;
+}
+#else
+static int pmc_dbgfs_register(struct pmc_dev *pmc)
+{
+       return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap,
+                         const struct pmc_data *pmc_data)
+{
+       struct platform_device *clkdev;
+       struct pmc_clk_data *clk_data;
+
+       clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
+       if (!clk_data)
+               return -ENOMEM;
+
+       clk_data->base = pmc_regmap; /* offset is added by client */
+       clk_data->clks = pmc_data->clks;
+
+       clkdev = platform_device_register_data(&pdev->dev, "clk-pmc-atom",
+                                              PLATFORM_DEVID_NONE,
+                                              clk_data, sizeof(*clk_data));
+       if (IS_ERR(clkdev)) {
+               kfree(clk_data);
+               return PTR_ERR(clkdev);
+       }
+
+       kfree(clk_data);
+
+       return 0;
+}
+
+static int pmc_setup_dev(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       struct pmc_dev *pmc = &pmc_device;
+       const struct pmc_data *data = (struct pmc_data *)ent->driver_data;
+       const struct pmc_reg_map *map = data->map;
+       int ret;
+
+       /* Obtain ACPI base address */
+       pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr);
+       acpi_base_addr &= ACPI_BASE_ADDR_MASK;
+
+       /* Install power off function */
+       if (acpi_base_addr != 0 && pm_power_off == NULL)
+               pm_power_off = pmc_power_off;
+
+       pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr);
+       pmc->base_addr &= PMC_BASE_ADDR_MASK;
+
+       pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN);
+       if (!pmc->regmap) {
+               dev_err(&pdev->dev, "error: ioremap failed\n");
+               return -ENOMEM;
+       }
+
+       pmc->map = map;
+
+       /* PMC hardware registers setup */
+       pmc_hw_reg_setup(pmc);
+
+       ret = pmc_dbgfs_register(pmc);
+       if (ret)
+               dev_warn(&pdev->dev, "debugfs register failed\n");
+
+       /* Register platform clocks - PMC_PLT_CLK [0..5] */
+       ret = pmc_setup_clks(pdev, pmc->regmap, data);
+       if (ret)
+               dev_warn(&pdev->dev, "platform clocks register failed: %d\n",
+                        ret);
+
+       pmc->init = true;
+       return ret;
+}
+
+/*
+ * Data for PCI driver interface
+ *
+ * used by pci_match_id() call below.
+ */
+static const struct pci_device_id pmc_pci_ids[] = {
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_VLV_PMC), (kernel_ulong_t)&byt_data },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_CHT_PMC), (kernel_ulong_t)&cht_data },
+       { 0, },
+};
+
+static int __init pmc_atom_init(void)
+{
+       struct pci_dev *pdev = NULL;
+       const struct pci_device_id *ent;
+
+       /* We look for our device - PCU PMC
+        * we assume that there is max. one device.
+        *
+        * We can't use plain pci_driver mechanism,
+        * as the device is really a multiple function device,
+        * main driver that binds to the pci_device is lpc_ich
+        * and have to find & bind to the device this way.
+        */
+       for_each_pci_dev(pdev) {
+               ent = pci_match_id(pmc_pci_ids, pdev);
+               if (ent)
+                       return pmc_setup_dev(pdev, ent);
+       }
+       /* Device not found. */
+       return -ENODEV;
+}
+
+device_initcall(pmc_atom_init);
+
+/*
+MODULE_AUTHOR("Aubrey Li <aubrey.li@linux.intel.com>");
+MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface");
+MODULE_LICENSE("GPL v2");
+*/
diff --git a/drivers/platform/x86/silead_dmi.c b/drivers/platform/x86/silead_dmi.c
new file mode 100644 (file)
index 0000000..02e11fd
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Silead touchscreen driver DMI based configuration code
+ *
+ * Copyright (c) 2017 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Red Hat authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/dmi.h>
+#include <linux/i2c.h>
+#include <linux/notifier.h>
+#include <linux/property.h>
+#include <linux/string.h>
+
+struct silead_ts_dmi_data {
+       const char *acpi_name;
+       struct property_entry *properties;
+};
+
+static struct property_entry cube_iwork8_air_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1660),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 900),
+       PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+       PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-cube-iwork8-air.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+       { }
+};
+
+static const struct silead_ts_dmi_data cube_iwork8_air_data = {
+       .acpi_name      = "MSSL1680:00",
+       .properties     = cube_iwork8_air_props,
+};
+
+static struct property_entry jumper_ezpad_mini3_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1700),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1150),
+       PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+       PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-jumper-ezpad-mini3.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+       { }
+};
+
+static const struct silead_ts_dmi_data jumper_ezpad_mini3_data = {
+       .acpi_name      = "MSSL1680:00",
+       .properties     = jumper_ezpad_mini3_props,
+};
+
+static const struct dmi_system_id silead_ts_dmi_table[] = {
+       {
+               /* CUBE iwork8 Air */
+               .driver_data = (void *)&cube_iwork8_air_data,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "cube"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "i1-TF"),
+                       DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
+               },
+       },
+       {
+               /* Jumper EZpad mini3 */
+               .driver_data = (void *)&jumper_ezpad_mini3_data,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+                       /* jumperx.T87.KFBNEEA02 with the version-nr dropped */
+                       DMI_MATCH(DMI_BIOS_VERSION, "jumperx.T87.KFBNEEA"),
+               },
+       },
+       { },
+};
+
+static void silead_ts_dmi_add_props(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       const struct dmi_system_id *dmi_id;
+       const struct silead_ts_dmi_data *ts_data;
+       int error;
+
+       dmi_id = dmi_first_match(silead_ts_dmi_table);
+       if (!dmi_id)
+               return;
+
+       ts_data = dmi_id->driver_data;
+       if (has_acpi_companion(dev) &&
+           !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) {
+               error = device_add_properties(dev, ts_data->properties);
+               if (error)
+                       dev_err(dev, "failed to add properties: %d\n", error);
+       }
+}
+
+static int silead_ts_dmi_notifier_call(struct notifier_block *nb,
+                                      unsigned long action, void *data)
+{
+       struct device *dev = data;
+
+       switch (action) {
+       case BUS_NOTIFY_ADD_DEVICE:
+               silead_ts_dmi_add_props(dev);
+               break;
+
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+static struct notifier_block silead_ts_dmi_notifier = {
+       .notifier_call = silead_ts_dmi_notifier_call,
+};
+
+static int __init silead_ts_dmi_init(void)
+{
+       int error;
+
+       error = bus_register_notifier(&i2c_bus_type, &silead_ts_dmi_notifier);
+       if (error)
+               pr_err("%s: failed to register i2c bus notifier: %d\n",
+                       __func__, error);
+
+       return error;
+}
+
+/*
+ * We are registering out notifier after i2c core is initialized and i2c bus
+ * itself is ready (which happens at postcore initcall level), but before
+ * ACPI starts enumerating devices (at subsys initcall level).
+ */
+arch_initcall(silead_ts_dmi_init);
index cacb43fb1df7fcbd5f643788fa960a4cdb6e4e36..1d18b32628ecbf501aff7193fc857b0126a56f58 100644 (file)
@@ -163,6 +163,7 @@ enum tpacpi_hkey_event_t {
        TP_HKEY_EV_HOTKEY_BASE          = 0x1001, /* first hotkey (FN+F1) */
        TP_HKEY_EV_BRGHT_UP             = 0x1010, /* Brightness up */
        TP_HKEY_EV_BRGHT_DOWN           = 0x1011, /* Brightness down */
+       TP_HKEY_EV_KBD_LIGHT            = 0x1012, /* Thinklight/kbd backlight */
        TP_HKEY_EV_VOL_UP               = 0x1015, /* Volume up or unmute */
        TP_HKEY_EV_VOL_DOWN             = 0x1016, /* Volume down or unmute */
        TP_HKEY_EV_VOL_MUTE             = 0x1017, /* Mixer output mute */
@@ -372,11 +373,9 @@ enum led_status_t {
        TPACPI_LED_BLINK,
 };
 
-/* Special LED class that can defer work */
+/* tpacpi LED class */
 struct tpacpi_led_classdev {
        struct led_classdev led_classdev;
-       struct work_struct work;
-       enum led_status_t new_state;
        int led;
 };
 
@@ -1959,7 +1958,7 @@ enum {    /* Positions of some of the keys in hotkey masks */
        TP_ACPI_HKEY_HIBERNATE_MASK     = 1 << TP_ACPI_HOTKEYSCAN_FNF12,
        TP_ACPI_HKEY_BRGHTUP_MASK       = 1 << TP_ACPI_HOTKEYSCAN_FNHOME,
        TP_ACPI_HKEY_BRGHTDWN_MASK      = 1 << TP_ACPI_HOTKEYSCAN_FNEND,
-       TP_ACPI_HKEY_THNKLGHT_MASK      = 1 << TP_ACPI_HOTKEYSCAN_FNPAGEUP,
+       TP_ACPI_HKEY_KBD_LIGHT_MASK     = 1 << TP_ACPI_HOTKEYSCAN_FNPAGEUP,
        TP_ACPI_HKEY_ZOOM_MASK          = 1 << TP_ACPI_HOTKEYSCAN_FNSPACE,
        TP_ACPI_HKEY_VOLUP_MASK         = 1 << TP_ACPI_HOTKEYSCAN_VOLUMEUP,
        TP_ACPI_HKEY_VOLDWN_MASK        = 1 << TP_ACPI_HOTKEYSCAN_VOLUMEDOWN,
@@ -2344,7 +2343,7 @@ static void hotkey_read_nvram(struct tp_nvram_state *n, const u32 m)
                n->display_toggle = !!(d & TP_NVRAM_MASK_HKT_DISPLAY);
                n->hibernate_toggle = !!(d & TP_NVRAM_MASK_HKT_HIBERNATE);
        }
-       if (m & TP_ACPI_HKEY_THNKLGHT_MASK) {
+       if (m & TP_ACPI_HKEY_KBD_LIGHT_MASK) {
                d = nvram_read_byte(TP_NVRAM_ADDR_THINKLIGHT);
                n->thinklight_toggle = !!(d & TP_NVRAM_MASK_THINKLIGHT);
        }
@@ -5084,18 +5083,27 @@ static struct ibm_struct video_driver_data = {
  * Keyboard backlight subdriver
  */
 
+static enum led_brightness kbdlight_brightness;
+static DEFINE_MUTEX(kbdlight_mutex);
+
 static int kbdlight_set_level(int level)
 {
+       int ret = 0;
+
        if (!hkey_handle)
                return -ENXIO;
 
+       mutex_lock(&kbdlight_mutex);
+
        if (!acpi_evalf(hkey_handle, NULL, "MLCS", "dd", level))
-               return -EIO;
+               ret = -EIO;
+       else
+               kbdlight_brightness = level;
 
-       return 0;
-}
+       mutex_unlock(&kbdlight_mutex);
 
-static int kbdlight_set_level_and_update(int level);
+       return ret;
+}
 
 static int kbdlight_get_level(void)
 {
@@ -5158,24 +5166,10 @@ static bool kbdlight_is_supported(void)
        return status & BIT(9);
 }
 
-static void kbdlight_set_worker(struct work_struct *work)
-{
-       struct tpacpi_led_classdev *data =
-                       container_of(work, struct tpacpi_led_classdev, work);
-
-       if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
-               kbdlight_set_level_and_update(data->new_state);
-}
-
-static void kbdlight_sysfs_set(struct led_classdev *led_cdev,
+static int kbdlight_sysfs_set(struct led_classdev *led_cdev,
                        enum led_brightness brightness)
 {
-       struct tpacpi_led_classdev *data =
-                       container_of(led_cdev,
-                                    struct tpacpi_led_classdev,
-                                    led_classdev);
-       data->new_state = brightness;
-       queue_work(tpacpi_wq, &data->work);
+       return kbdlight_set_level(brightness);
 }
 
 static enum led_brightness kbdlight_sysfs_get(struct led_classdev *led_cdev)
@@ -5193,7 +5187,8 @@ static struct tpacpi_led_classdev tpacpi_led_kbdlight = {
        .led_classdev = {
                .name           = "tpacpi::kbd_backlight",
                .max_brightness = 2,
-               .brightness_set = &kbdlight_sysfs_set,
+               .flags          = LED_BRIGHT_HW_CHANGED,
+               .brightness_set_blocking = &kbdlight_sysfs_set,
                .brightness_get = &kbdlight_sysfs_get,
        }
 };
@@ -5205,7 +5200,6 @@ static int __init kbdlight_init(struct ibm_init_struct *iibm)
        vdbg_printk(TPACPI_DBG_INIT, "initializing kbdlight subdriver\n");
 
        TPACPI_ACPIHANDLE_INIT(hkey);
-       INIT_WORK(&tpacpi_led_kbdlight.work, kbdlight_set_worker);
 
        if (!kbdlight_is_supported()) {
                tp_features.kbdlight = 0;
@@ -5213,6 +5207,7 @@ static int __init kbdlight_init(struct ibm_init_struct *iibm)
                return 1;
        }
 
+       kbdlight_brightness = kbdlight_sysfs_get(NULL);
        tp_features.kbdlight = 1;
 
        rc = led_classdev_register(&tpacpi_pdev->dev,
@@ -5222,6 +5217,8 @@ static int __init kbdlight_init(struct ibm_init_struct *iibm)
                return rc;
        }
 
+       tpacpi_hotkey_driver_mask_set(hotkey_driver_mask |
+                                     TP_ACPI_HKEY_KBD_LIGHT_MASK);
        return 0;
 }
 
@@ -5229,7 +5226,6 @@ static void kbdlight_exit(void)
 {
        if (tp_features.kbdlight)
                led_classdev_unregister(&tpacpi_led_kbdlight.led_classdev);
-       flush_workqueue(tpacpi_wq);
 }
 
 static int kbdlight_set_level_and_update(int level)
@@ -5358,25 +5354,11 @@ static int light_set_status(int status)
        return -ENXIO;
 }
 
-static void light_set_status_worker(struct work_struct *work)
-{
-       struct tpacpi_led_classdev *data =
-                       container_of(work, struct tpacpi_led_classdev, work);
-
-       if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
-               light_set_status((data->new_state != TPACPI_LED_OFF));
-}
-
-static void light_sysfs_set(struct led_classdev *led_cdev,
+static int light_sysfs_set(struct led_classdev *led_cdev,
                        enum led_brightness brightness)
 {
-       struct tpacpi_led_classdev *data =
-               container_of(led_cdev,
-                            struct tpacpi_led_classdev,
-                            led_classdev);
-       data->new_state = (brightness != LED_OFF) ?
-                               TPACPI_LED_ON : TPACPI_LED_OFF;
-       queue_work(tpacpi_wq, &data->work);
+       return light_set_status((brightness != LED_OFF) ?
+                               TPACPI_LED_ON : TPACPI_LED_OFF);
 }
 
 static enum led_brightness light_sysfs_get(struct led_classdev *led_cdev)
@@ -5387,7 +5369,7 @@ static enum led_brightness light_sysfs_get(struct led_classdev *led_cdev)
 static struct tpacpi_led_classdev tpacpi_led_thinklight = {
        .led_classdev = {
                .name           = "tpacpi::thinklight",
-               .brightness_set = &light_sysfs_set,
+               .brightness_set_blocking = &light_sysfs_set,
                .brightness_get = &light_sysfs_get,
        }
 };
@@ -5403,7 +5385,6 @@ static int __init light_init(struct ibm_init_struct *iibm)
                TPACPI_ACPIHANDLE_INIT(lght);
        }
        TPACPI_ACPIHANDLE_INIT(cmos);
-       INIT_WORK(&tpacpi_led_thinklight.work, light_set_status_worker);
 
        /* light not supported on 570, 600e/x, 770e, 770x, G4x, R30, R31 */
        tp_features.light = (cmos_handle || lght_handle) && !ledb_handle;
@@ -5437,7 +5418,6 @@ static int __init light_init(struct ibm_init_struct *iibm)
 static void light_exit(void)
 {
        led_classdev_unregister(&tpacpi_led_thinklight.led_classdev);
-       flush_workqueue(tpacpi_wq);
 }
 
 static int light_read(struct seq_file *m)
@@ -5704,29 +5684,21 @@ static int led_set_status(const unsigned int led,
        return rc;
 }
 
-static void led_set_status_worker(struct work_struct *work)
-{
-       struct tpacpi_led_classdev *data =
-               container_of(work, struct tpacpi_led_classdev, work);
-
-       if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
-               led_set_status(data->led, data->new_state);
-}
-
-static void led_sysfs_set(struct led_classdev *led_cdev,
+static int led_sysfs_set(struct led_classdev *led_cdev,
                        enum led_brightness brightness)
 {
        struct tpacpi_led_classdev *data = container_of(led_cdev,
                             struct tpacpi_led_classdev, led_classdev);
+       enum led_status_t new_state;
 
        if (brightness == LED_OFF)
-               data->new_state = TPACPI_LED_OFF;
+               new_state = TPACPI_LED_OFF;
        else if (tpacpi_led_state_cache[data->led] != TPACPI_LED_BLINK)
-               data->new_state = TPACPI_LED_ON;
+               new_state = TPACPI_LED_ON;
        else
-               data->new_state = TPACPI_LED_BLINK;
+               new_state = TPACPI_LED_BLINK;
 
-       queue_work(tpacpi_wq, &data->work);
+       return led_set_status(data->led, new_state);
 }
 
 static int led_sysfs_blink_set(struct led_classdev *led_cdev,
@@ -5743,10 +5715,7 @@ static int led_sysfs_blink_set(struct led_classdev *led_cdev,
        } else if ((*delay_on != 500) || (*delay_off != 500))
                return -EINVAL;
 
-       data->new_state = TPACPI_LED_BLINK;
-       queue_work(tpacpi_wq, &data->work);
-
-       return 0;
+       return led_set_status(data->led, TPACPI_LED_BLINK);
 }
 
 static enum led_brightness led_sysfs_get(struct led_classdev *led_cdev)
@@ -5775,7 +5744,6 @@ static void led_exit(void)
                        led_classdev_unregister(&tpacpi_leds[i].led_classdev);
        }
 
-       flush_workqueue(tpacpi_wq);
        kfree(tpacpi_leds);
 }
 
@@ -5789,7 +5757,7 @@ static int __init tpacpi_init_led(unsigned int led)
        if (!tpacpi_led_names[led])
                return 0;
 
-       tpacpi_leds[led].led_classdev.brightness_set = &led_sysfs_set;
+       tpacpi_leds[led].led_classdev.brightness_set_blocking = &led_sysfs_set;
        tpacpi_leds[led].led_classdev.blink_set = &led_sysfs_blink_set;
        if (led_supported == TPACPI_LED_570)
                tpacpi_leds[led].led_classdev.brightness_get =
@@ -5797,8 +5765,6 @@ static int __init tpacpi_init_led(unsigned int led)
 
        tpacpi_leds[led].led_classdev.name = tpacpi_led_names[led];
 
-       INIT_WORK(&tpacpi_leds[led].work, led_set_status_worker);
-
        rc = led_classdev_register(&tpacpi_pdev->dev,
                                &tpacpi_leds[led].led_classdev);
        if (rc < 0)
@@ -9169,6 +9135,24 @@ static void tpacpi_driver_event(const unsigned int hkey_event)
                        volume_alsa_notify_change();
                }
        }
+       if (tp_features.kbdlight && hkey_event == TP_HKEY_EV_KBD_LIGHT) {
+               enum led_brightness brightness;
+
+               mutex_lock(&kbdlight_mutex);
+
+               /*
+                * Check the brightness actually changed, setting the brightness
+                * through kbdlight_set_level() also triggers this event.
+                */
+               brightness = kbdlight_sysfs_get(NULL);
+               if (kbdlight_brightness != brightness) {
+                       kbdlight_brightness = brightness;
+                       led_classdev_notify_brightness_hw_changed(
+                               &tpacpi_led_kbdlight.led_classdev, brightness);
+               }
+
+               mutex_unlock(&kbdlight_mutex);
+       }
 }
 
 static void hotkey_driver_event(const unsigned int scancode)
index fa0f19b975a6a59cd5532e7091052ec194dd14b2..974fd684bab2c74400f8904fb557a5ac21ac9f26 100644 (file)
@@ -195,7 +195,7 @@ static void sr_stop_vddautocomp(struct omap_sr *sr)
 }
 
 /*
- * This function handles the intializations which have to be done
+ * This function handles the initializations which have to be done
  * only when both sr device and class driver regiter has
  * completed. This will be attempted to be called from both sr class
  * driver register and sr device intializtion API's. Only one call
@@ -671,7 +671,7 @@ int sr_register_class(struct omap_sr_class_data *class_data)
        sr_class = class_data;
 
        /*
-        * Call into late init to do intializations that require
+        * Call into late init to do initializations that require
         * both sr driver and sr class driver to be initiallized.
         */
        list_for_each_entry(sr_info, &sr_list, node)
@@ -899,7 +899,7 @@ static int __init omap_sr_probe(struct platform_device *pdev)
        list_add(&sr_info->node, &sr_list);
 
        /*
-        * Call into late init to do intializations that require
+        * Call into late init to do initializations that require
         * both sr driver and sr class driver to be initiallized.
         */
        if (sr_class) {
index 9013a585507e8a80b4580c084445ec7bbde6e04c..50b617af81bd5a2d034798bfa27f2cfa8da97bde 100644 (file)
@@ -889,17 +889,16 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
                        goto err_req;
                }
 
-               down_read(&current->mm->mmap_sem);
-               pinned = get_user_pages(
+               pinned = get_user_pages_unlocked(
                                (unsigned long)xfer->loc_addr & PAGE_MASK,
                                nr_pages,
-                               dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0,
-                               page_list, NULL);
-               up_read(&current->mm->mmap_sem);
+                               page_list,
+                               dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0);
 
                if (pinned != nr_pages) {
                        if (pinned < 0) {
-                               rmcd_error("get_user_pages err=%ld", pinned);
+                               rmcd_error("get_user_pages_unlocked err=%ld",
+                                          pinned);
                                nr_pages = 0;
                        } else
                                rmcd_error("pinned %ld out of %ld pages",
index 5dc673dc948785a79da8d070954323b9357385c1..ee1b0e9dde79a9ec4720c516a697fd5de1e72fcf 100644 (file)
@@ -1434,9 +1434,10 @@ config RTC_DRV_SUN4V
          based RTC on SUN4V systems.
 
 config RTC_DRV_SUN6I
-       tristate "Allwinner A31 RTC"
-       default MACH_SUN6I || MACH_SUN8I || COMPILE_TEST
-       depends on ARCH_SUNXI
+       bool "Allwinner A31 RTC"
+       default MACH_SUN6I || MACH_SUN8I
+       depends on COMMON_CLK
+       depends on ARCH_SUNXI || COMPILE_TEST
        help
          If you say Y here you will get support for the RTC found in
          some Allwinner SoCs like the A31 or the A64.
@@ -1719,6 +1720,17 @@ config RTC_DRV_R7301
           This driver can also be built as a module. If so, the module
           will be called rtc-r7301.
 
+config RTC_DRV_STM32
+       tristate "STM32 RTC"
+       select REGMAP_MMIO
+       depends on ARCH_STM32 || COMPILE_TEST
+       help
+          If you say yes here you get support for the STM32 On-Chip
+          Real Time Clock.
+
+          This driver can also be built as a module, if so, the module
+          will be called "rtc-stm32".
+
 comment "HID Sensor RTC drivers"
 
 config RTC_DRV_HID_SENSOR_TIME
index f13ab1c5c222c269e711786e74f5a99a258150a5..f07297b1460a06f2ed33c2b6b2ff11196f6955b0 100644 (file)
@@ -145,6 +145,7 @@ obj-$(CONFIG_RTC_DRV_SNVS)  += rtc-snvs.o
 obj-$(CONFIG_RTC_DRV_SPEAR)    += rtc-spear.o
 obj-$(CONFIG_RTC_DRV_STARFIRE) += rtc-starfire.o
 obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o
+obj-$(CONFIG_RTC_DRV_STM32)    += rtc-stm32.o
 obj-$(CONFIG_RTC_DRV_STMP)     += rtc-stmp3xxx.o
 obj-$(CONFIG_RTC_DRV_ST_LPC)   += rtc-st-lpc.o
 obj-$(CONFIG_RTC_DRV_SUN4V)    += rtc-sun4v.o
index 9a3f2a6f512e014b60b86974bdd6875e95e643ad..21f355c37eab53cd4de0668e7250d1d578344e71 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
 #define RTC_STATUS_ALARM1          BIT(0)
 #define RTC_STATUS_ALARM2          BIT(1)
 #define RTC_IRQ1_CONF      0x4
-#define RTC_IRQ1_AL_EN             BIT(0)
-#define RTC_IRQ1_FREQ_EN           BIT(1)
-#define RTC_IRQ1_FREQ_1HZ          BIT(2)
+#define RTC_IRQ2_CONF      0x8
+#define RTC_IRQ_AL_EN              BIT(0)
+#define RTC_IRQ_FREQ_EN                    BIT(1)
+#define RTC_IRQ_FREQ_1HZ           BIT(2)
+
 #define RTC_TIME           0xC
 #define RTC_ALARM1         0x10
-
-#define SOC_RTC_INTERRUPT   0x8
-#define SOC_RTC_ALARM1         BIT(0)
-#define SOC_RTC_ALARM2         BIT(1)
-#define SOC_RTC_ALARM1_MASK    BIT(2)
-#define SOC_RTC_ALARM2_MASK    BIT(3)
+#define RTC_ALARM2         0x14
+
+/* Armada38x SoC registers  */
+#define RTC_38X_BRIDGE_TIMING_CTL   0x0
+#define RTC_38X_PERIOD_OFFS            0
+#define RTC_38X_PERIOD_MASK            (0x3FF << RTC_38X_PERIOD_OFFS)
+#define RTC_38X_READ_DELAY_OFFS                26
+#define RTC_38X_READ_DELAY_MASK                (0x1F << RTC_38X_READ_DELAY_OFFS)
+
+/* Armada 7K/8K registers  */
+#define RTC_8K_BRIDGE_TIMING_CTL0    0x0
+#define RTC_8K_WRCLK_PERIOD_OFFS       0
+#define RTC_8K_WRCLK_PERIOD_MASK       (0xFFFF << RTC_8K_WRCLK_PERIOD_OFFS)
+#define RTC_8K_WRCLK_SETUP_OFFS                16
+#define RTC_8K_WRCLK_SETUP_MASK                (0xFFFF << RTC_8K_WRCLK_SETUP_OFFS)
+#define RTC_8K_BRIDGE_TIMING_CTL1   0x4
+#define RTC_8K_READ_DELAY_OFFS         0
+#define RTC_8K_READ_DELAY_MASK         (0xFFFF << RTC_8K_READ_DELAY_OFFS)
+
+#define RTC_8K_ISR                 0x10
+#define RTC_8K_IMR                 0x14
+#define RTC_8K_ALARM2                  BIT(0)
+
+#define SOC_RTC_INTERRUPT          0x8
+#define SOC_RTC_ALARM1                 BIT(0)
+#define SOC_RTC_ALARM2                 BIT(1)
+#define SOC_RTC_ALARM1_MASK            BIT(2)
+#define SOC_RTC_ALARM2_MASK            BIT(3)
+
+#define SAMPLE_NR 100
+
+struct value_to_freq {
+       u32 value;
+       u8 freq;
+};
 
 struct armada38x_rtc {
        struct rtc_device   *rtc_dev;
@@ -41,38 +73,153 @@ struct armada38x_rtc {
        void __iomem        *regs_soc;
        spinlock_t          lock;
        int                 irq;
+       struct value_to_freq *val_to_freq;
+       struct armada38x_rtc_data *data;
+};
+
+#define ALARM1 0
+#define ALARM2 1
+
+#define ALARM_REG(base, alarm)  ((base) + (alarm) * sizeof(u32))
+
+struct armada38x_rtc_data {
+       /* Initialize the RTC-MBUS bridge timing */
+       void (*update_mbus_timing)(struct armada38x_rtc *rtc);
+       u32 (*read_rtc_reg)(struct armada38x_rtc *rtc, u8 rtc_reg);
+       void (*clear_isr)(struct armada38x_rtc *rtc);
+       void (*unmask_interrupt)(struct armada38x_rtc *rtc);
+       u32 alarm;
 };
 
 /*
  * According to the datasheet, the OS should wait 5us after every
  * register write to the RTC hard macro so that the required update
  * can occur without holding off the system bus
+ * According to errata RES-3124064, Write to any RTC register
+ * may fail. As a workaround, before writing to RTC
+ * register, issue a dummy write of 0x0 twice to RTC Status
+ * register.
  */
+
 static void rtc_delayed_write(u32 val, struct armada38x_rtc *rtc, int offset)
 {
+       writel(0, rtc->regs + RTC_STATUS);
+       writel(0, rtc->regs + RTC_STATUS);
        writel(val, rtc->regs + offset);
        udelay(5);
 }
 
+/* Update RTC-MBUS bridge timing parameters */
+static void rtc_update_38x_mbus_timing_params(struct armada38x_rtc *rtc)
+{
+       u32 reg;
+
+       reg = readl(rtc->regs_soc + RTC_38X_BRIDGE_TIMING_CTL);
+       reg &= ~RTC_38X_PERIOD_MASK;
+       reg |= 0x3FF << RTC_38X_PERIOD_OFFS; /* Maximum value */
+       reg &= ~RTC_38X_READ_DELAY_MASK;
+       reg |= 0x1F << RTC_38X_READ_DELAY_OFFS; /* Maximum value */
+       writel(reg, rtc->regs_soc + RTC_38X_BRIDGE_TIMING_CTL);
+}
+
+static void rtc_update_8k_mbus_timing_params(struct armada38x_rtc *rtc)
+{
+       u32 reg;
+
+       reg = readl(rtc->regs_soc + RTC_8K_BRIDGE_TIMING_CTL0);
+       reg &= ~RTC_8K_WRCLK_PERIOD_MASK;
+       reg |= 0x3FF << RTC_8K_WRCLK_PERIOD_OFFS;
+       reg &= ~RTC_8K_WRCLK_SETUP_MASK;
+       reg |= 0x29 << RTC_8K_WRCLK_SETUP_OFFS;
+       writel(reg, rtc->regs_soc + RTC_8K_BRIDGE_TIMING_CTL0);
+
+       reg = readl(rtc->regs_soc + RTC_8K_BRIDGE_TIMING_CTL1);
+       reg &= ~RTC_8K_READ_DELAY_MASK;
+       reg |= 0x3F << RTC_8K_READ_DELAY_OFFS;
+       writel(reg, rtc->regs_soc + RTC_8K_BRIDGE_TIMING_CTL1);
+}
+
+static u32 read_rtc_register(struct armada38x_rtc *rtc, u8 rtc_reg)
+{
+       return readl(rtc->regs + rtc_reg);
+}
+
+static u32 read_rtc_register_38x_wa(struct armada38x_rtc *rtc, u8 rtc_reg)
+{
+       int i, index_max = 0, max = 0;
+
+       for (i = 0; i < SAMPLE_NR; i++) {
+               rtc->val_to_freq[i].value = readl(rtc->regs + rtc_reg);
+               rtc->val_to_freq[i].freq = 0;
+       }
+
+       for (i = 0; i < SAMPLE_NR; i++) {
+               int j = 0;
+               u32 value = rtc->val_to_freq[i].value;
+
+               while (rtc->val_to_freq[j].freq) {
+                       if (rtc->val_to_freq[j].value == value) {
+                               rtc->val_to_freq[j].freq++;
+                               break;
+                       }
+                       j++;
+               }
+
+               if (!rtc->val_to_freq[j].freq) {
+                       rtc->val_to_freq[j].value = value;
+                       rtc->val_to_freq[j].freq = 1;
+               }
+
+               if (rtc->val_to_freq[j].freq > max) {
+                       index_max = j;
+                       max = rtc->val_to_freq[j].freq;
+               }
+
+               /*
+                * If a value already has half of the sample this is the most
+                * frequent one and we can stop the research right now
+                */
+               if (max > SAMPLE_NR / 2)
+                       break;
+       }
+
+       return rtc->val_to_freq[index_max].value;
+}
+
+static void armada38x_clear_isr(struct armada38x_rtc *rtc)
+{
+       u32 val = readl(rtc->regs_soc + SOC_RTC_INTERRUPT);
+
+       writel(val & ~SOC_RTC_ALARM1, rtc->regs_soc + SOC_RTC_INTERRUPT);
+}
+
+static void armada38x_unmask_interrupt(struct armada38x_rtc *rtc)
+{
+       u32 val = readl(rtc->regs_soc + SOC_RTC_INTERRUPT);
+
+       writel(val | SOC_RTC_ALARM1_MASK, rtc->regs_soc + SOC_RTC_INTERRUPT);
+}
+
+static void armada8k_clear_isr(struct armada38x_rtc *rtc)
+{
+       writel(RTC_8K_ALARM2, rtc->regs_soc + RTC_8K_ISR);
+}
+
+static void armada8k_unmask_interrupt(struct armada38x_rtc *rtc)
+{
+       writel(RTC_8K_ALARM2, rtc->regs_soc + RTC_8K_IMR);
+}
+
 static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        struct armada38x_rtc *rtc = dev_get_drvdata(dev);
-       unsigned long time, time_check, flags;
+       unsigned long time, flags;
 
        spin_lock_irqsave(&rtc->lock, flags);
-       time = readl(rtc->regs + RTC_TIME);
-       /*
-        * WA for failing time set attempts. As stated in HW ERRATA if
-        * more than one second between two time reads is detected
-        * then read once again.
-        */
-       time_check = readl(rtc->regs + RTC_TIME);
-       if ((time_check - time) > 1)
-               time_check = readl(rtc->regs + RTC_TIME);
-
+       time = rtc->data->read_rtc_reg(rtc, RTC_TIME);
        spin_unlock_irqrestore(&rtc->lock, flags);
 
-       rtc_time_to_tm(time_check, tm);
+       rtc_time_to_tm(time, tm);
 
        return 0;
 }
@@ -87,16 +234,9 @@ static int armada38x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 
        if (ret)
                goto out;
-       /*
-        * According to errata FE-3124064, Write to RTC TIME register
-        * may fail. As a workaround, after writing to RTC TIME
-        * register, issue a dummy write of 0x0 twice to RTC Status
-        * register.
-        */
+
        spin_lock_irqsave(&rtc->lock, flags);
        rtc_delayed_write(time, rtc, RTC_TIME);
-       rtc_delayed_write(0, rtc, RTC_STATUS);
-       rtc_delayed_write(0, rtc, RTC_STATUS);
        spin_unlock_irqrestore(&rtc->lock, flags);
 
 out:
@@ -107,12 +247,14 @@ static int armada38x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct armada38x_rtc *rtc = dev_get_drvdata(dev);
        unsigned long time, flags;
+       u32 reg = ALARM_REG(RTC_ALARM1, rtc->data->alarm);
+       u32 reg_irq = ALARM_REG(RTC_IRQ1_CONF, rtc->data->alarm);
        u32 val;
 
        spin_lock_irqsave(&rtc->lock, flags);
 
-       time = readl(rtc->regs + RTC_ALARM1);
-       val = readl(rtc->regs + RTC_IRQ1_CONF) & RTC_IRQ1_AL_EN;
+       time = rtc->data->read_rtc_reg(rtc, reg);
+       val = rtc->data->read_rtc_reg(rtc, reg_irq) & RTC_IRQ_AL_EN;
 
        spin_unlock_irqrestore(&rtc->lock, flags);
 
@@ -125,9 +267,10 @@ static int armada38x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 static int armada38x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct armada38x_rtc *rtc = dev_get_drvdata(dev);
+       u32 reg = ALARM_REG(RTC_ALARM1, rtc->data->alarm);
+       u32 reg_irq = ALARM_REG(RTC_IRQ1_CONF, rtc->data->alarm);
        unsigned long time, flags;
        int ret = 0;
-       u32 val;
 
        ret = rtc_tm_to_time(&alrm->time, &time);
 
@@ -136,13 +279,11 @@ static int armada38x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
        spin_lock_irqsave(&rtc->lock, flags);
 
-       rtc_delayed_write(time, rtc, RTC_ALARM1);
+       rtc_delayed_write(time, rtc, reg);
 
        if (alrm->enabled) {
-                       rtc_delayed_write(RTC_IRQ1_AL_EN, rtc, RTC_IRQ1_CONF);
-                       val = readl(rtc->regs_soc + SOC_RTC_INTERRUPT);
-                       writel(val | SOC_RTC_ALARM1_MASK,
-                              rtc->regs_soc + SOC_RTC_INTERRUPT);
+               rtc_delayed_write(RTC_IRQ_AL_EN, rtc, reg_irq);
+               rtc->data->unmask_interrupt(rtc);
        }
 
        spin_unlock_irqrestore(&rtc->lock, flags);
@@ -155,14 +296,15 @@ static int armada38x_rtc_alarm_irq_enable(struct device *dev,
                                         unsigned int enabled)
 {
        struct armada38x_rtc *rtc = dev_get_drvdata(dev);
+       u32 reg_irq = ALARM_REG(RTC_IRQ1_CONF, rtc->data->alarm);
        unsigned long flags;
 
        spin_lock_irqsave(&rtc->lock, flags);
 
        if (enabled)
-               rtc_delayed_write(RTC_IRQ1_AL_EN, rtc, RTC_IRQ1_CONF);
+               rtc_delayed_write(RTC_IRQ_AL_EN, rtc, reg_irq);
        else
-               rtc_delayed_write(0, rtc, RTC_IRQ1_CONF);
+               rtc_delayed_write(0, rtc, reg_irq);
 
        spin_unlock_irqrestore(&rtc->lock, flags);
 
@@ -174,24 +316,23 @@ static irqreturn_t armada38x_rtc_alarm_irq(int irq, void *data)
        struct armada38x_rtc *rtc = data;
        u32 val;
        int event = RTC_IRQF | RTC_AF;
+       u32 reg_irq = ALARM_REG(RTC_IRQ1_CONF, rtc->data->alarm);
 
        dev_dbg(&rtc->rtc_dev->dev, "%s:irq(%d)\n", __func__, irq);
 
        spin_lock(&rtc->lock);
 
-       val = readl(rtc->regs_soc + SOC_RTC_INTERRUPT);
-
-       writel(val & ~SOC_RTC_ALARM1, rtc->regs_soc + SOC_RTC_INTERRUPT);
-       val = readl(rtc->regs + RTC_IRQ1_CONF);
-       /* disable all the interrupts for alarm 1 */
-       rtc_delayed_write(0, rtc, RTC_IRQ1_CONF);
+       rtc->data->clear_isr(rtc);
+       val = rtc->data->read_rtc_reg(rtc, reg_irq);
+       /* disable all the interrupts for alarm*/
+       rtc_delayed_write(0, rtc, reg_irq);
        /* Ack the event */
-       rtc_delayed_write(RTC_STATUS_ALARM1, rtc, RTC_STATUS);
+       rtc_delayed_write(1 << rtc->data->alarm, rtc, RTC_STATUS);
 
        spin_unlock(&rtc->lock);
 
-       if (val & RTC_IRQ1_FREQ_EN) {
-               if (val & RTC_IRQ1_FREQ_1HZ)
+       if (val & RTC_IRQ_FREQ_EN) {
+               if (val & RTC_IRQ_FREQ_1HZ)
                        event |= RTC_UF;
                else
                        event |= RTC_PF;
@@ -202,7 +343,7 @@ static irqreturn_t armada38x_rtc_alarm_irq(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-static struct rtc_class_ops armada38x_rtc_ops = {
+static const struct rtc_class_ops armada38x_rtc_ops = {
        .read_time = armada38x_rtc_read_time,
        .set_time = armada38x_rtc_set_time,
        .read_alarm = armada38x_rtc_read_alarm,
@@ -210,17 +351,65 @@ static struct rtc_class_ops armada38x_rtc_ops = {
        .alarm_irq_enable = armada38x_rtc_alarm_irq_enable,
 };
 
+static const struct rtc_class_ops armada38x_rtc_ops_noirq = {
+       .read_time = armada38x_rtc_read_time,
+       .set_time = armada38x_rtc_set_time,
+       .read_alarm = armada38x_rtc_read_alarm,
+};
+
+static const struct armada38x_rtc_data armada38x_data = {
+       .update_mbus_timing = rtc_update_38x_mbus_timing_params,
+       .read_rtc_reg = read_rtc_register_38x_wa,
+       .clear_isr = armada38x_clear_isr,
+       .unmask_interrupt = armada38x_unmask_interrupt,
+       .alarm = ALARM1,
+};
+
+static const struct armada38x_rtc_data armada8k_data = {
+       .update_mbus_timing = rtc_update_8k_mbus_timing_params,
+       .read_rtc_reg = read_rtc_register,
+       .clear_isr = armada8k_clear_isr,
+       .unmask_interrupt = armada8k_unmask_interrupt,
+       .alarm = ALARM2,
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id armada38x_rtc_of_match_table[] = {
+       {
+               .compatible = "marvell,armada-380-rtc",
+               .data = &armada38x_data,
+       },
+       {
+               .compatible = "marvell,armada-8k-rtc",
+               .data = &armada8k_data,
+       },
+       {}
+};
+MODULE_DEVICE_TABLE(of, armada38x_rtc_of_match_table);
+#endif
+
 static __init int armada38x_rtc_probe(struct platform_device *pdev)
 {
+       const struct rtc_class_ops *ops;
        struct resource *res;
        struct armada38x_rtc *rtc;
+       const struct of_device_id *match;
        int ret;
 
+       match = of_match_device(armada38x_rtc_of_match_table, &pdev->dev);
+       if (!match)
+               return -ENODEV;
+
        rtc = devm_kzalloc(&pdev->dev, sizeof(struct armada38x_rtc),
                            GFP_KERNEL);
        if (!rtc)
                return -ENOMEM;
 
+       rtc->val_to_freq = devm_kcalloc(&pdev->dev, SAMPLE_NR,
+                               sizeof(struct value_to_freq), GFP_KERNEL);
+       if (!rtc->val_to_freq)
+               return -ENOMEM;
+
        spin_lock_init(&rtc->lock);
 
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rtc");
@@ -242,19 +431,27 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev)
                                0, pdev->name, rtc) < 0) {
                dev_warn(&pdev->dev, "Interrupt not available.\n");
                rtc->irq = -1;
+       }
+       platform_set_drvdata(pdev, rtc);
+
+       if (rtc->irq != -1) {
+               device_init_wakeup(&pdev->dev, 1);
+               ops = &armada38x_rtc_ops;
+       } else {
                /*
                 * If there is no interrupt available then we can't
                 * use the alarm
                 */
-               armada38x_rtc_ops.set_alarm = NULL;
-               armada38x_rtc_ops.alarm_irq_enable = NULL;
+               ops = &armada38x_rtc_ops_noirq;
        }
-       platform_set_drvdata(pdev, rtc);
-       if (rtc->irq != -1)
-               device_init_wakeup(&pdev->dev, 1);
+       rtc->data = (struct armada38x_rtc_data *)match->data;
+
+
+       /* Update RTC-MBUS bridge timing parameters */
+       rtc->data->update_mbus_timing(rtc);
 
        rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, pdev->name,
-                                       &armada38x_rtc_ops, THIS_MODULE);
+                                               ops, THIS_MODULE);
        if (IS_ERR(rtc->rtc_dev)) {
                ret = PTR_ERR(rtc->rtc_dev);
                dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret);
@@ -280,6 +477,9 @@ static int armada38x_rtc_resume(struct device *dev)
        if (device_may_wakeup(dev)) {
                struct armada38x_rtc *rtc = dev_get_drvdata(dev);
 
+               /* Update RTC-MBUS bridge timing parameters */
+               rtc->data->update_mbus_timing(rtc);
+
                return disable_irq_wake(rtc->irq);
        }
 
@@ -290,14 +490,6 @@ static int armada38x_rtc_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(armada38x_rtc_pm_ops,
                         armada38x_rtc_suspend, armada38x_rtc_resume);
 
-#ifdef CONFIG_OF
-static const struct of_device_id armada38x_rtc_of_match_table[] = {
-       { .compatible = "marvell,armada-380-rtc", },
-       {}
-};
-MODULE_DEVICE_TABLE(of, armada38x_rtc_of_match_table);
-#endif
-
 static struct platform_driver armada38x_rtc_driver = {
        .driver         = {
                .name   = "armada38x-rtc",
index 84d6e026784daad1d793e6d10ff7da5c0fa239fe..2ba44ccb9c3a3fe00e3e9bf2c740845001011b79 100644 (file)
@@ -56,7 +56,7 @@ static int au1xtoy_rtc_set_time(struct device *dev, struct rtc_time *tm)
        return 0;
 }
 
-static struct rtc_class_ops au1xtoy_rtc_ops = {
+static const struct rtc_class_ops au1xtoy_rtc_ops = {
        .read_time      = au1xtoy_rtc_read_time,
        .set_time       = au1xtoy_rtc_set_time,
 };
index 535a5f9338d026ec8f433fb4960dc7227728ddf6..15344b7c07c599330366def14638236e093dbe6b 100644 (file)
@@ -333,7 +333,7 @@ static int bfin_rtc_proc(struct device *dev, struct seq_file *seq)
 #undef yesno
 }
 
-static struct rtc_class_ops bfin_rtc_ops = {
+static const struct rtc_class_ops bfin_rtc_ops = {
        .read_time     = bfin_rtc_read_time,
        .set_time      = bfin_rtc_set_time,
        .read_alarm    = bfin_rtc_read_alarm,
index 397742446007aa2479969969c60097ad3edb6889..2b223935001fb57e92abdbbd360b02db70ada47c 100644 (file)
@@ -34,6 +34,7 @@
 #define BQ32K_CALIBRATION      0x07    /* CAL_CFG1, calibration and control */
 #define BQ32K_TCH2             0x08    /* Trickle charge enable */
 #define BQ32K_CFG2             0x09    /* Trickle charger control */
+#define BQ32K_TCFE             BIT(6)  /* Trickle charge FET bypass */
 
 struct bq32k_regs {
        uint8_t         seconds;
@@ -188,6 +189,65 @@ static int trickle_charger_of_init(struct device *dev, struct device_node *node)
        return 0;
 }
 
+static ssize_t bq32k_sysfs_show_tricklecharge_bypass(struct device *dev,
+                                              struct device_attribute *attr,
+                                              char *buf)
+{
+       int reg, error;
+
+       error = bq32k_read(dev, &reg, BQ32K_CFG2, 1);
+       if (error)
+               return error;
+
+       return sprintf(buf, "%d\n", (reg & BQ32K_TCFE) ? 1 : 0);
+}
+
+static ssize_t bq32k_sysfs_store_tricklecharge_bypass(struct device *dev,
+                                               struct device_attribute *attr,
+                                               const char *buf, size_t count)
+{
+       int reg, enable, error;
+
+       if (kstrtoint(buf, 0, &enable))
+               return -EINVAL;
+
+       error = bq32k_read(dev, &reg, BQ32K_CFG2, 1);
+       if (error)
+               return error;
+
+       if (enable) {
+               reg |= BQ32K_TCFE;
+               error = bq32k_write(dev, &reg, BQ32K_CFG2, 1);
+               if (error)
+                       return error;
+
+               dev_info(dev, "Enabled trickle charge FET bypass.\n");
+       } else {
+               reg &= ~BQ32K_TCFE;
+               error = bq32k_write(dev, &reg, BQ32K_CFG2, 1);
+               if (error)
+                       return error;
+
+               dev_info(dev, "Disabled trickle charge FET bypass.\n");
+       }
+
+       return count;
+}
+
+static DEVICE_ATTR(trickle_charge_bypass, 0644,
+                  bq32k_sysfs_show_tricklecharge_bypass,
+                  bq32k_sysfs_store_tricklecharge_bypass);
+
+static int bq32k_sysfs_register(struct device *dev)
+{
+       return device_create_file(dev, &dev_attr_trickle_charge_bypass);
+}
+
+static void bq32k_sysfs_unregister(struct device *dev)
+{
+       device_remove_file(dev, &dev_attr_trickle_charge_bypass);
+}
+
 static int bq32k_probe(struct i2c_client *client,
                                const struct i2c_device_id *id)
 {
@@ -224,11 +284,26 @@ static int bq32k_probe(struct i2c_client *client,
        if (IS_ERR(rtc))
                return PTR_ERR(rtc);
 
+       error = bq32k_sysfs_register(&client->dev);
+       if (error) {
+               dev_err(&client->dev,
+                       "Unable to create sysfs entries for rtc bq32000\n");
+               return error;
+       }
+
+
        i2c_set_clientdata(client, rtc);
 
        return 0;
 }
 
+static int bq32k_remove(struct i2c_client *client)
+{
+       bq32k_sysfs_unregister(&client->dev);
+
+       return 0;
+}
+
 static const struct i2c_device_id bq32k_id[] = {
        { "bq32000", 0 },
        { }
@@ -240,6 +315,7 @@ static struct i2c_driver bq32k_driver = {
                .name   = "bq32k",
        },
        .probe          = bq32k_probe,
+       .remove         = bq32k_remove,
        .id_table       = bq32k_id,
 };
 
index 94067f8eeb103c1ff424ab17a69bf3833f02fd18..f225cd873ff6c29be6636b82a6506d77c6088b76 100644 (file)
@@ -116,7 +116,7 @@ static int dm355evm_rtc_set_time(struct device *dev, struct rtc_time *tm)
        return 0;
 }
 
-static struct rtc_class_ops dm355evm_rtc_ops = {
+static const struct rtc_class_ops dm355evm_rtc_ops = {
        .read_time      = dm355evm_rtc_read_time,
        .set_time       = dm355evm_rtc_set_time,
 };
index b1f20d8c358fd5c2a223563c46e302d221e575e9..9bb39a06b994ad5990b47870fe53fdaf7436dd7c 100644 (file)
 #include <linux/slab.h>
 #include <linux/regmap.h>
 
-#define DS3232_REG_SECONDS     0x00
-#define DS3232_REG_MINUTES     0x01
-#define DS3232_REG_HOURS       0x02
-#define DS3232_REG_AMPM                0x02
-#define DS3232_REG_DAY         0x03
-#define DS3232_REG_DATE                0x04
-#define DS3232_REG_MONTH       0x05
-#define DS3232_REG_CENTURY     0x05
-#define DS3232_REG_YEAR                0x06
-#define DS3232_REG_ALARM1         0x07 /* Alarm 1 BASE */
-#define DS3232_REG_ALARM2         0x0B /* Alarm 2 BASE */
-#define DS3232_REG_CR          0x0E    /* Control register */
-#      define DS3232_REG_CR_nEOSC        0x80
-#       define DS3232_REG_CR_INTCN        0x04
-#       define DS3232_REG_CR_A2IE        0x02
-#       define DS3232_REG_CR_A1IE        0x01
-
-#define DS3232_REG_SR  0x0F    /* control/status register */
-#      define DS3232_REG_SR_OSF   0x80
-#       define DS3232_REG_SR_BSY   0x04
-#       define DS3232_REG_SR_A2F   0x02
-#       define DS3232_REG_SR_A1F   0x01
+#define DS3232_REG_SECONDS      0x00
+#define DS3232_REG_MINUTES      0x01
+#define DS3232_REG_HOURS        0x02
+#define DS3232_REG_AMPM         0x02
+#define DS3232_REG_DAY          0x03
+#define DS3232_REG_DATE         0x04
+#define DS3232_REG_MONTH        0x05
+#define DS3232_REG_CENTURY      0x05
+#define DS3232_REG_YEAR         0x06
+#define DS3232_REG_ALARM1       0x07       /* Alarm 1 BASE */
+#define DS3232_REG_ALARM2       0x0B       /* Alarm 2 BASE */
+#define DS3232_REG_CR           0x0E       /* Control register */
+#       define DS3232_REG_CR_nEOSC   0x80
+#       define DS3232_REG_CR_INTCN   0x04
+#       define DS3232_REG_CR_A2IE    0x02
+#       define DS3232_REG_CR_A1IE    0x01
+
+#define DS3232_REG_SR           0x0F       /* control/status register */
+#       define DS3232_REG_SR_OSF     0x80
+#       define DS3232_REG_SR_BSY     0x04
+#       define DS3232_REG_SR_A2F     0x02
+#       define DS3232_REG_SR_A1F     0x01
 
 struct ds3232 {
        struct device *dev;
@@ -363,6 +363,9 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq,
        if (ret)
                return ret;
 
+       if (ds3232->irq > 0)
+               device_init_wakeup(dev, 1);
+
        ds3232->rtc = devm_rtc_device_register(dev, name, &ds3232_rtc_ops,
                                                THIS_MODULE);
        if (IS_ERR(ds3232->rtc))
@@ -374,10 +377,10 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq,
                                                IRQF_SHARED | IRQF_ONESHOT,
                                                name, dev);
                if (ret) {
+                       device_set_wakeup_capable(dev, 0);
                        ds3232->irq = 0;
                        dev_err(dev, "unable to request IRQ\n");
-               } else
-                       device_init_wakeup(dev, 1);
+               }
        }
 
        return 0;
@@ -420,6 +423,7 @@ static int ds3232_i2c_probe(struct i2c_client *client,
        static const struct regmap_config config = {
                .reg_bits = 8,
                .val_bits = 8,
+               .max_register = 0x13,
        };
 
        regmap = devm_regmap_init_i2c(client, &config);
@@ -479,6 +483,7 @@ static int ds3234_probe(struct spi_device *spi)
        static const struct regmap_config config = {
                .reg_bits = 8,
                .val_bits = 8,
+               .max_register = 0x13,
                .write_flag_mask = 0x80,
        };
        struct regmap *regmap;
index 688debc143483ff33aef70c08c85656a093b0fc9..ccf0dbadb62d16d7c512fffb9fc37efb7fcac7b8 100644 (file)
@@ -159,9 +159,16 @@ static int gemini_rtc_remove(struct platform_device *pdev)
        return 0;
 }
 
+static const struct of_device_id gemini_rtc_dt_match[] = {
+       { .compatible = "cortina,gemini-rtc" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, gemini_rtc_dt_match);
+
 static struct platform_driver gemini_rtc_driver = {
        .driver         = {
                .name   = DRV_NAME,
+               .of_match_table = gemini_rtc_dt_match,
        },
        .probe          = gemini_rtc_probe,
        .remove         = gemini_rtc_remove,
index 67b56b80dc7097049dbf20f0ea8d0e908a231b5f..6b54f6c24c5fb0e3e3cd1972dbaa41a3a5a2ca78 100644 (file)
  * @pdev: pionter to platform dev
  * @rtc: pointer to rtc struct
  * @ioaddr: IO registers pointer
- * @irq: dryice normal interrupt
  * @clk: input reference clock
  * @dsr: copy of the DSR register
  * @irq_lock: interrupt enable register (DIER) lock
@@ -120,7 +119,6 @@ struct imxdi_dev {
        struct platform_device *pdev;
        struct rtc_device *rtc;
        void __iomem *ioaddr;
-       int irq;
        struct clk *clk;
        u32 dsr;
        spinlock_t irq_lock;
@@ -668,7 +666,7 @@ static int dryice_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
        return 0;
 }
 
-static struct rtc_class_ops dryice_rtc_ops = {
+static const struct rtc_class_ops dryice_rtc_ops = {
        .read_time              = dryice_rtc_read_time,
        .set_mmss               = dryice_rtc_set_mmss,
        .alarm_irq_enable       = dryice_rtc_alarm_irq_enable,
@@ -677,9 +675,9 @@ static struct rtc_class_ops dryice_rtc_ops = {
 };
 
 /*
- * dryice "normal" interrupt handler
+ * interrupt handler for dryice "normal" and security violation interrupt
  */
-static irqreturn_t dryice_norm_irq(int irq, void *dev_id)
+static irqreturn_t dryice_irq(int irq, void *dev_id)
 {
        struct imxdi_dev *imxdi = dev_id;
        u32 dsr, dier;
@@ -765,6 +763,7 @@ static int __init dryice_rtc_probe(struct platform_device *pdev)
 {
        struct resource *res;
        struct imxdi_dev *imxdi;
+       int norm_irq, sec_irq;
        int rc;
 
        imxdi = devm_kzalloc(&pdev->dev, sizeof(*imxdi), GFP_KERNEL);
@@ -780,9 +779,16 @@ static int __init dryice_rtc_probe(struct platform_device *pdev)
 
        spin_lock_init(&imxdi->irq_lock);
 
-       imxdi->irq = platform_get_irq(pdev, 0);
-       if (imxdi->irq < 0)
-               return imxdi->irq;
+       norm_irq = platform_get_irq(pdev, 0);
+       if (norm_irq < 0)
+               return norm_irq;
+
+       /* the 2nd irq is the security violation irq
+        * make this optional, don't break the device tree ABI
+        */
+       sec_irq = platform_get_irq(pdev, 1);
+       if (sec_irq <= 0)
+               sec_irq = IRQ_NOTCONNECTED;
 
        init_waitqueue_head(&imxdi->write_wait);
 
@@ -808,13 +814,20 @@ static int __init dryice_rtc_probe(struct platform_device *pdev)
        if (rc != 0)
                goto err;
 
-       rc = devm_request_irq(&pdev->dev, imxdi->irq, dryice_norm_irq,
-                       IRQF_SHARED, pdev->name, imxdi);
+       rc = devm_request_irq(&pdev->dev, norm_irq, dryice_irq,
+                             IRQF_SHARED, pdev->name, imxdi);
        if (rc) {
                dev_warn(&pdev->dev, "interrupt not available.\n");
                goto err;
        }
 
+       rc = devm_request_irq(&pdev->dev, sec_irq, dryice_irq,
+                             IRQF_SHARED, pdev->name, imxdi);
+       if (rc) {
+               dev_warn(&pdev->dev, "security violation interrupt not available.\n");
+               /* this is not an error, see above */
+       }
+
        platform_set_drvdata(pdev, imxdi);
        imxdi->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
                                  &dryice_rtc_ops, THIS_MODULE);
index 22a9ec4f2b836c53585880ec6cd65b222ed522df..e04ca54f21e20791fbf393de0154f4168b3c6026 100644 (file)
@@ -138,7 +138,7 @@ err:
        return ret;
 }
 
-static struct rtc_class_ops  ls1x_rtc_ops = {
+static const struct rtc_class_ops  ls1x_rtc_ops = {
        .read_time      = ls1x_rtc_read_time,
        .set_time       = ls1x_rtc_set_time,
 };
index 0eeb5714c00fa610f7e438ac084ee16e09a89ae3..02af045305dd3ce156a7f60b14cc153e548ce60c 100644 (file)
 #include <linux/module.h>
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/rtc-m48t86.h>
 #include <linux/bcd.h>
+#include <linux/io.h>
 
-#define M48T86_REG_SEC         0x00
-#define M48T86_REG_SECALRM     0x01
-#define M48T86_REG_MIN         0x02
-#define M48T86_REG_MINALRM     0x03
-#define M48T86_REG_HOUR                0x04
-#define M48T86_REG_HOURALRM    0x05
-#define M48T86_REG_DOW         0x06 /* 1 = sunday */
-#define M48T86_REG_DOM         0x07
-#define M48T86_REG_MONTH       0x08 /* 1 - 12 */
-#define M48T86_REG_YEAR                0x09 /* 0 - 99 */
-#define M48T86_REG_A           0x0A
-#define M48T86_REG_B           0x0B
-#define M48T86_REG_C           0x0C
-#define M48T86_REG_D           0x0D
-
-#define M48T86_REG_B_H24       (1 << 1)
-#define M48T86_REG_B_DM                (1 << 2)
-#define M48T86_REG_B_SET       (1 << 7)
-#define M48T86_REG_D_VRT       (1 << 7)
+#define M48T86_SEC             0x00
+#define M48T86_SECALRM         0x01
+#define M48T86_MIN             0x02
+#define M48T86_MINALRM         0x03
+#define M48T86_HOUR            0x04
+#define M48T86_HOURALRM                0x05
+#define M48T86_DOW             0x06 /* 1 = sunday */
+#define M48T86_DOM             0x07
+#define M48T86_MONTH           0x08 /* 1 - 12 */
+#define M48T86_YEAR            0x09 /* 0 - 99 */
+#define M48T86_A               0x0a
+#define M48T86_B               0x0b
+#define M48T86_B_SET           BIT(7)
+#define M48T86_B_DM            BIT(2)
+#define M48T86_B_H24           BIT(1)
+#define M48T86_C               0x0c
+#define M48T86_D               0x0d
+#define M48T86_D_VRT           BIT(7)
+#define M48T86_NVRAM(x)                (0x0e + (x))
+#define M48T86_NVRAM_LEN       114
+
+struct m48t86_rtc_info {
+       void __iomem *index_reg;
+       void __iomem *data_reg;
+       struct rtc_device *rtc;
+};
+
+static unsigned char m48t86_readb(struct device *dev, unsigned long addr)
+{
+       struct m48t86_rtc_info *info = dev_get_drvdata(dev);
+       unsigned char value;
+
+       writeb(addr, info->index_reg);
+       value = readb(info->data_reg);
+
+       return value;
+}
+
+static void m48t86_writeb(struct device *dev,
+                         unsigned char value, unsigned long addr)
+{
+       struct m48t86_rtc_info *info = dev_get_drvdata(dev);
+
+       writeb(addr, info->index_reg);
+       writeb(value, info->data_reg);
+}
 
 static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        unsigned char reg;
-       struct platform_device *pdev = to_platform_device(dev);
-       struct m48t86_ops *ops = dev_get_platdata(&pdev->dev);
 
-       reg = ops->readbyte(M48T86_REG_B);
+       reg = m48t86_readb(dev, M48T86_B);
 
-       if (reg & M48T86_REG_B_DM) {
+       if (reg & M48T86_B_DM) {
                /* data (binary) mode */
-               tm->tm_sec      = ops->readbyte(M48T86_REG_SEC);
-               tm->tm_min      = ops->readbyte(M48T86_REG_MIN);
-               tm->tm_hour     = ops->readbyte(M48T86_REG_HOUR) & 0x3F;
-               tm->tm_mday     = ops->readbyte(M48T86_REG_DOM);
+               tm->tm_sec      = m48t86_readb(dev, M48T86_SEC);
+               tm->tm_min      = m48t86_readb(dev, M48T86_MIN);
+               tm->tm_hour     = m48t86_readb(dev, M48T86_HOUR) & 0x3f;
+               tm->tm_mday     = m48t86_readb(dev, M48T86_DOM);
                /* tm_mon is 0-11 */
-               tm->tm_mon      = ops->readbyte(M48T86_REG_MONTH) - 1;
-               tm->tm_year     = ops->readbyte(M48T86_REG_YEAR) + 100;
-               tm->tm_wday     = ops->readbyte(M48T86_REG_DOW);
+               tm->tm_mon      = m48t86_readb(dev, M48T86_MONTH) - 1;
+               tm->tm_year     = m48t86_readb(dev, M48T86_YEAR) + 100;
+               tm->tm_wday     = m48t86_readb(dev, M48T86_DOW);
        } else {
                /* bcd mode */
-               tm->tm_sec      = bcd2bin(ops->readbyte(M48T86_REG_SEC));
-               tm->tm_min      = bcd2bin(ops->readbyte(M48T86_REG_MIN));
-               tm->tm_hour     = bcd2bin(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
-               tm->tm_mday     = bcd2bin(ops->readbyte(M48T86_REG_DOM));
+               tm->tm_sec      = bcd2bin(m48t86_readb(dev, M48T86_SEC));
+               tm->tm_min      = bcd2bin(m48t86_readb(dev, M48T86_MIN));
+               tm->tm_hour     = bcd2bin(m48t86_readb(dev, M48T86_HOUR) &
+                                         0x3f);
+               tm->tm_mday     = bcd2bin(m48t86_readb(dev, M48T86_DOM));
                /* tm_mon is 0-11 */
-               tm->tm_mon      = bcd2bin(ops->readbyte(M48T86_REG_MONTH)) - 1;
-               tm->tm_year     = bcd2bin(ops->readbyte(M48T86_REG_YEAR)) + 100;
-               tm->tm_wday     = bcd2bin(ops->readbyte(M48T86_REG_DOW));
+               tm->tm_mon      = bcd2bin(m48t86_readb(dev, M48T86_MONTH)) - 1;
+               tm->tm_year     = bcd2bin(m48t86_readb(dev, M48T86_YEAR)) + 100;
+               tm->tm_wday     = bcd2bin(m48t86_readb(dev, M48T86_DOW));
        }
 
        /* correct the hour if the clock is in 12h mode */
-       if (!(reg & M48T86_REG_B_H24))
-               if (ops->readbyte(M48T86_REG_HOUR) & 0x80)
+       if (!(reg & M48T86_B_H24))
+               if (m48t86_readb(dev, M48T86_HOUR) & 0x80)
                        tm->tm_hour += 12;
 
        return rtc_valid_tm(tm);
@@ -80,38 +106,36 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
 static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
        unsigned char reg;
-       struct platform_device *pdev = to_platform_device(dev);
-       struct m48t86_ops *ops = dev_get_platdata(&pdev->dev);
 
-       reg = ops->readbyte(M48T86_REG_B);
+       reg = m48t86_readb(dev, M48T86_B);
 
        /* update flag and 24h mode */
-       reg |= M48T86_REG_B_SET | M48T86_REG_B_H24;
-       ops->writebyte(reg, M48T86_REG_B);
+       reg |= M48T86_B_SET | M48T86_B_H24;
+       m48t86_writeb(dev, reg, M48T86_B);
 
-       if (reg & M48T86_REG_B_DM) {
+       if (reg & M48T86_B_DM) {
                /* data (binary) mode */
-               ops->writebyte(tm->tm_sec, M48T86_REG_SEC);
-               ops->writebyte(tm->tm_min, M48T86_REG_MIN);
-               ops->writebyte(tm->tm_hour, M48T86_REG_HOUR);
-               ops->writebyte(tm->tm_mday, M48T86_REG_DOM);
-               ops->writebyte(tm->tm_mon + 1, M48T86_REG_MONTH);
-               ops->writebyte(tm->tm_year % 100, M48T86_REG_YEAR);
-               ops->writebyte(tm->tm_wday, M48T86_REG_DOW);
+               m48t86_writeb(dev, tm->tm_sec, M48T86_SEC);
+               m48t86_writeb(dev, tm->tm_min, M48T86_MIN);
+               m48t86_writeb(dev, tm->tm_hour, M48T86_HOUR);
+               m48t86_writeb(dev, tm->tm_mday, M48T86_DOM);
+               m48t86_writeb(dev, tm->tm_mon + 1, M48T86_MONTH);
+               m48t86_writeb(dev, tm->tm_year % 100, M48T86_YEAR);
+               m48t86_writeb(dev, tm->tm_wday, M48T86_DOW);
        } else {
                /* bcd mode */
-               ops->writebyte(bin2bcd(tm->tm_sec), M48T86_REG_SEC);
-               ops->writebyte(bin2bcd(tm->tm_min), M48T86_REG_MIN);
-               ops->writebyte(bin2bcd(tm->tm_hour), M48T86_REG_HOUR);
-               ops->writebyte(bin2bcd(tm->tm_mday), M48T86_REG_DOM);
-               ops->writebyte(bin2bcd(tm->tm_mon + 1), M48T86_REG_MONTH);
-               ops->writebyte(bin2bcd(tm->tm_year % 100), M48T86_REG_YEAR);
-               ops->writebyte(bin2bcd(tm->tm_wday), M48T86_REG_DOW);
+               m48t86_writeb(dev, bin2bcd(tm->tm_sec), M48T86_SEC);
+               m48t86_writeb(dev, bin2bcd(tm->tm_min), M48T86_MIN);
+               m48t86_writeb(dev, bin2bcd(tm->tm_hour), M48T86_HOUR);
+               m48t86_writeb(dev, bin2bcd(tm->tm_mday), M48T86_DOM);
+               m48t86_writeb(dev, bin2bcd(tm->tm_mon + 1), M48T86_MONTH);
+               m48t86_writeb(dev, bin2bcd(tm->tm_year % 100), M48T86_YEAR);
+               m48t86_writeb(dev, bin2bcd(tm->tm_wday), M48T86_DOW);
        }
 
        /* update ended */
-       reg &= ~M48T86_REG_B_SET;
-       ops->writebyte(reg, M48T86_REG_B);
+       reg &= ~M48T86_B_SET;
+       m48t86_writeb(dev, reg, M48T86_B);
 
        return 0;
 }
@@ -119,18 +143,16 @@ static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
 static int m48t86_rtc_proc(struct device *dev, struct seq_file *seq)
 {
        unsigned char reg;
-       struct platform_device *pdev = to_platform_device(dev);
-       struct m48t86_ops *ops = dev_get_platdata(&pdev->dev);
 
-       reg = ops->readbyte(M48T86_REG_B);
+       reg = m48t86_readb(dev, M48T86_B);
 
        seq_printf(seq, "mode\t\t: %s\n",
-                (reg & M48T86_REG_B_DM) ? "binary" : "bcd");
+                  (reg & M48T86_B_DM) ? "binary" : "bcd");
 
-       reg = ops->readbyte(M48T86_REG_D);
+       reg = m48t86_readb(dev, M48T86_D);
 
        seq_printf(seq, "battery\t\t: %s\n",
-                (reg & M48T86_REG_D_VRT) ? "ok" : "exhausted");
+                  (reg & M48T86_D_VRT) ? "ok" : "exhausted");
 
        return 0;
 }
@@ -141,25 +163,116 @@ static const struct rtc_class_ops m48t86_rtc_ops = {
        .proc           = m48t86_rtc_proc,
 };
 
-static int m48t86_rtc_probe(struct platform_device *dev)
+static ssize_t m48t86_nvram_read(struct file *filp, struct kobject *kobj,
+                                struct bin_attribute *attr,
+                                char *buf, loff_t off, size_t count)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       unsigned int i;
+
+       for (i = 0; i < count; i++)
+               buf[i] = m48t86_readb(dev, M48T86_NVRAM(off + i));
+
+       return count;
+}
+
+static ssize_t m48t86_nvram_write(struct file *filp, struct kobject *kobj,
+                                 struct bin_attribute *attr,
+                                 char *buf, loff_t off, size_t count)
 {
+       struct device *dev = kobj_to_dev(kobj);
+       unsigned int i;
+
+       for (i = 0; i < count; i++)
+               m48t86_writeb(dev, buf[i], M48T86_NVRAM(off + i));
+
+       return count;
+}
+
+static BIN_ATTR(nvram, 0644, m48t86_nvram_read, m48t86_nvram_write,
+               M48T86_NVRAM_LEN);
+
+/*
+ * The RTC is an optional feature at purchase time on some Technologic Systems
+ * boards. Verify that it actually exists by checking if the last two bytes
+ * of the NVRAM can be changed.
+ *
+ * This is based on the method used in their rtc7800.c example.
+ */
+static bool m48t86_verify_chip(struct platform_device *pdev)
+{
+       unsigned int offset0 = M48T86_NVRAM(M48T86_NVRAM_LEN - 2);
+       unsigned int offset1 = M48T86_NVRAM(M48T86_NVRAM_LEN - 1);
+       unsigned char tmp0, tmp1;
+
+       tmp0 = m48t86_readb(&pdev->dev, offset0);
+       tmp1 = m48t86_readb(&pdev->dev, offset1);
+
+       m48t86_writeb(&pdev->dev, 0x00, offset0);
+       m48t86_writeb(&pdev->dev, 0x55, offset1);
+       if (m48t86_readb(&pdev->dev, offset1) == 0x55) {
+               m48t86_writeb(&pdev->dev, 0xaa, offset1);
+               if (m48t86_readb(&pdev->dev, offset1) == 0xaa &&
+                   m48t86_readb(&pdev->dev, offset0) == 0x00) {
+                       m48t86_writeb(&pdev->dev, tmp0, offset0);
+                       m48t86_writeb(&pdev->dev, tmp1, offset1);
+
+                       return true;
+               }
+       }
+       return false;
+}
+
+static int m48t86_rtc_probe(struct platform_device *pdev)
+{
+       struct m48t86_rtc_info *info;
+       struct resource *res;
        unsigned char reg;
-       struct m48t86_ops *ops = dev_get_platdata(&dev->dev);
-       struct rtc_device *rtc;
 
-       rtc = devm_rtc_device_register(&dev->dev, "m48t86",
-                               &m48t86_rtc_ops, THIS_MODULE);
+       info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+       info->index_reg = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(info->index_reg))
+               return PTR_ERR(info->index_reg);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (!res)
+               return -ENODEV;
+       info->data_reg = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(info->data_reg))
+               return PTR_ERR(info->data_reg);
 
-       if (IS_ERR(rtc))
-               return PTR_ERR(rtc);
+       dev_set_drvdata(&pdev->dev, info);
+
+       if (!m48t86_verify_chip(pdev)) {
+               dev_info(&pdev->dev, "RTC not present\n");
+               return -ENODEV;
+       }
 
-       platform_set_drvdata(dev, rtc);
+       info->rtc = devm_rtc_device_register(&pdev->dev, "m48t86",
+                                            &m48t86_rtc_ops, THIS_MODULE);
+       if (IS_ERR(info->rtc))
+               return PTR_ERR(info->rtc);
 
        /* read battery status */
-       reg = ops->readbyte(M48T86_REG_D);
-       dev_info(&dev->dev, "battery %s\n",
-               (reg & M48T86_REG_D_VRT) ? "ok" : "exhausted");
+       reg = m48t86_readb(&pdev->dev, M48T86_D);
+       dev_info(&pdev->dev, "battery %s\n",
+                (reg & M48T86_D_VRT) ? "ok" : "exhausted");
 
+       if (device_create_bin_file(&pdev->dev, &bin_attr_nvram))
+               dev_err(&pdev->dev, "failed to create nvram sysfs entry\n");
+
+       return 0;
+}
+
+static int m48t86_rtc_remove(struct platform_device *pdev)
+{
+       device_remove_bin_file(&pdev->dev, &bin_attr_nvram);
        return 0;
 }
 
@@ -168,6 +281,7 @@ static struct platform_driver m48t86_rtc_platform_driver = {
                .name   = "rtc-m48t86",
        },
        .probe          = m48t86_rtc_probe,
+       .remove         = m48t86_rtc_remove,
 };
 
 module_platform_driver(m48t86_rtc_platform_driver);
index ce75e421ba001fce02c7f7afce57c310af012001..77f21331ae21c5f098cb7d3f307c294f1a62e889 100644 (file)
 #define MCP795_REG_DAY         0x04
 #define MCP795_REG_MONTH       0x06
 #define MCP795_REG_CONTROL     0x08
+#define MCP795_REG_ALM0_SECONDS        0x0C
+#define MCP795_REG_ALM0_DAY    0x0F
 
 #define MCP795_ST_BIT          BIT(7)
 #define MCP795_24_BIT          BIT(6)
 #define MCP795_LP_BIT          BIT(5)
 #define MCP795_EXTOSC_BIT      BIT(3)
 #define MCP795_OSCON_BIT       BIT(5)
+#define MCP795_ALM0_BIT                BIT(4)
+#define MCP795_ALM1_BIT                BIT(5)
+#define MCP795_ALM0IF_BIT      BIT(3)
+#define MCP795_ALM0C0_BIT      BIT(4)
+#define MCP795_ALM0C1_BIT      BIT(5)
+#define MCP795_ALM0C2_BIT      BIT(6)
+
+#define SEC_PER_DAY            (24 * 60 * 60)
 
 static int mcp795_rtcc_read(struct device *dev, u8 addr, u8 *buf, u8 count)
 {
@@ -150,6 +160,30 @@ static int mcp795_start_oscillator(struct device *dev, bool *extosc)
                        dev, MCP795_REG_SECONDS, MCP795_ST_BIT, MCP795_ST_BIT);
 }
 
+/* Enable or disable Alarm 0 in RTC */
+static int mcp795_update_alarm(struct device *dev, bool enable)
+{
+       int ret;
+
+       dev_dbg(dev, "%s alarm\n", enable ? "Enable" : "Disable");
+
+       if (enable) {
+               /* clear ALM0IF (Alarm 0 Interrupt Flag) bit */
+               ret = mcp795_rtcc_set_bits(dev, MCP795_REG_ALM0_DAY,
+                                       MCP795_ALM0IF_BIT, 0);
+               if (ret)
+                       return ret;
+               /* enable alarm 0 */
+               ret = mcp795_rtcc_set_bits(dev, MCP795_REG_CONTROL,
+                                       MCP795_ALM0_BIT, MCP795_ALM0_BIT);
+       } else {
+               /* disable alarm 0 and alarm 1 */
+               ret = mcp795_rtcc_set_bits(dev, MCP795_REG_CONTROL,
+                                       MCP795_ALM0_BIT | MCP795_ALM1_BIT, 0);
+       }
+       return ret;
+}
+
 static int mcp795_set_time(struct device *dev, struct rtc_time *tim)
 {
        int ret;
@@ -170,6 +204,7 @@ static int mcp795_set_time(struct device *dev, struct rtc_time *tim)
        data[0] = (data[0] & 0x80) | bin2bcd(tim->tm_sec);
        data[1] = (data[1] & 0x80) | bin2bcd(tim->tm_min);
        data[2] = bin2bcd(tim->tm_hour);
+       data[3] = (data[3] & 0xF8) | bin2bcd(tim->tm_wday + 1);
        data[4] = bin2bcd(tim->tm_mday);
        data[5] = (data[5] & MCP795_LP_BIT) | bin2bcd(tim->tm_mon + 1);
 
@@ -198,9 +233,9 @@ static int mcp795_set_time(struct device *dev, struct rtc_time *tim)
        if (ret)
                return ret;
 
-       dev_dbg(dev, "Set mcp795: %04d-%02d-%02d %02d:%02d:%02d\n",
+       dev_dbg(dev, "Set mcp795: %04d-%02d-%02d(%d) %02d:%02d:%02d\n",
                        tim->tm_year + 1900, tim->tm_mon, tim->tm_mday,
-                       tim->tm_hour, tim->tm_min, tim->tm_sec);
+                       tim->tm_wday, tim->tm_hour, tim->tm_min, tim->tm_sec);
 
        return 0;
 }
@@ -218,20 +253,139 @@ static int mcp795_read_time(struct device *dev, struct rtc_time *tim)
        tim->tm_sec     = bcd2bin(data[0] & 0x7F);
        tim->tm_min     = bcd2bin(data[1] & 0x7F);
        tim->tm_hour    = bcd2bin(data[2] & 0x3F);
+       tim->tm_wday    = bcd2bin(data[3] & 0x07) - 1;
        tim->tm_mday    = bcd2bin(data[4] & 0x3F);
        tim->tm_mon     = bcd2bin(data[5] & 0x1F) - 1;
        tim->tm_year    = bcd2bin(data[6]) + 100; /* Assume we are in 20xx */
 
-       dev_dbg(dev, "Read from mcp795: %04d-%02d-%02d %02d:%02d:%02d\n",
-                               tim->tm_year + 1900, tim->tm_mon, tim->tm_mday,
-                               tim->tm_hour, tim->tm_min, tim->tm_sec);
+       dev_dbg(dev, "Read from mcp795: %04d-%02d-%02d(%d) %02d:%02d:%02d\n",
+                       tim->tm_year + 1900, tim->tm_mon, tim->tm_mday,
+                       tim->tm_wday, tim->tm_hour, tim->tm_min, tim->tm_sec);
 
        return rtc_valid_tm(tim);
 }
 
+static int mcp795_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+       struct rtc_time now_tm;
+       time64_t now;
+       time64_t later;
+       u8 tmp[6];
+       int ret;
+
+       /* Read current time from RTC hardware */
+       ret = mcp795_read_time(dev, &now_tm);
+       if (ret)
+               return ret;
+       /* Get the number of seconds since 1970 */
+       now = rtc_tm_to_time64(&now_tm);
+       later = rtc_tm_to_time64(&alm->time);
+       if (later <= now)
+               return -EINVAL;
+       /* make sure alarm fires within the next one year */
+       if ((later - now) >=
+               (SEC_PER_DAY * (365 + is_leap_year(alm->time.tm_year))))
+               return -EDOM;
+       /* disable alarm */
+       ret = mcp795_update_alarm(dev, false);
+       if (ret)
+               return ret;
+       /* Read registers, so we can leave configuration bits untouched */
+       ret = mcp795_rtcc_read(dev, MCP795_REG_ALM0_SECONDS, tmp, sizeof(tmp));
+       if (ret)
+               return ret;
+
+       alm->time.tm_year       = -1;
+       alm->time.tm_isdst      = -1;
+       alm->time.tm_yday       = -1;
+
+       tmp[0] = (tmp[0] & 0x80) | bin2bcd(alm->time.tm_sec);
+       tmp[1] = (tmp[1] & 0x80) | bin2bcd(alm->time.tm_min);
+       tmp[2] = (tmp[2] & 0xE0) | bin2bcd(alm->time.tm_hour);
+       tmp[3] = (tmp[3] & 0x80) | bin2bcd(alm->time.tm_wday + 1);
+       /* set alarm match: seconds, minutes, hour, day, date and month */
+       tmp[3] |= (MCP795_ALM0C2_BIT | MCP795_ALM0C1_BIT | MCP795_ALM0C0_BIT);
+       tmp[4] = (tmp[4] & 0xC0) | bin2bcd(alm->time.tm_mday);
+       tmp[5] = (tmp[5] & 0xE0) | bin2bcd(alm->time.tm_mon + 1);
+
+       ret = mcp795_rtcc_write(dev, MCP795_REG_ALM0_SECONDS, tmp, sizeof(tmp));
+       if (ret)
+               return ret;
+
+       /* enable alarm if requested */
+       if (alm->enabled) {
+               ret = mcp795_update_alarm(dev, true);
+               if (ret)
+                       return ret;
+               dev_dbg(dev, "Alarm IRQ armed\n");
+       }
+       dev_dbg(dev, "Set alarm: %02d-%02d(%d) %02d:%02d:%02d\n",
+                       alm->time.tm_mon, alm->time.tm_mday, alm->time.tm_wday,
+                       alm->time.tm_hour, alm->time.tm_min, alm->time.tm_sec);
+       return 0;
+}
+
+static int mcp795_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+       u8 data[6];
+       int ret;
+
+       ret = mcp795_rtcc_read(
+                       dev, MCP795_REG_ALM0_SECONDS, data, sizeof(data));
+       if (ret)
+               return ret;
+
+       alm->time.tm_sec        = bcd2bin(data[0] & 0x7F);
+       alm->time.tm_min        = bcd2bin(data[1] & 0x7F);
+       alm->time.tm_hour       = bcd2bin(data[2] & 0x1F);
+       alm->time.tm_wday       = bcd2bin(data[3] & 0x07) - 1;
+       alm->time.tm_mday       = bcd2bin(data[4] & 0x3F);
+       alm->time.tm_mon        = bcd2bin(data[5] & 0x1F) - 1;
+       alm->time.tm_year       = -1;
+       alm->time.tm_isdst      = -1;
+       alm->time.tm_yday       = -1;
+
+       dev_dbg(dev, "Read alarm: %02d-%02d(%d) %02d:%02d:%02d\n",
+                       alm->time.tm_mon, alm->time.tm_mday, alm->time.tm_wday,
+                       alm->time.tm_hour, alm->time.tm_min, alm->time.tm_sec);
+       return 0;
+}
+
+static int mcp795_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+       return mcp795_update_alarm(dev, !!enabled);
+}
+
+static irqreturn_t mcp795_irq(int irq, void *data)
+{
+       struct spi_device *spi = data;
+       struct rtc_device *rtc = spi_get_drvdata(spi);
+       struct mutex *lock = &rtc->ops_lock;
+       int ret;
+
+       mutex_lock(lock);
+
+       /* Disable alarm.
+        * There is no need to clear ALM0IF (Alarm 0 Interrupt Flag) bit,
+        * because it is done every time when alarm is enabled.
+        */
+       ret = mcp795_update_alarm(&spi->dev, false);
+       if (ret)
+               dev_err(&spi->dev,
+                       "Failed to disable alarm in IRQ (ret=%d)\n", ret);
+       rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF);
+
+       mutex_unlock(lock);
+
+       return IRQ_HANDLED;
+}
+
 static const struct rtc_class_ops mcp795_rtc_ops = {
                .read_time = mcp795_read_time,
-               .set_time = mcp795_set_time
+               .set_time = mcp795_set_time,
+               .read_alarm = mcp795_read_alarm,
+               .set_alarm = mcp795_set_alarm,
+               .alarm_irq_enable = mcp795_alarm_irq_enable
 };
 
 static int mcp795_probe(struct spi_device *spi)
@@ -259,6 +413,23 @@ static int mcp795_probe(struct spi_device *spi)
 
        spi_set_drvdata(spi, rtc);
 
+       if (spi->irq > 0) {
+               dev_dbg(&spi->dev, "Alarm support enabled\n");
+
+               /* Clear any pending alarm (ALM0IF bit) before requesting
+                * the interrupt.
+                */
+               mcp795_rtcc_set_bits(&spi->dev, MCP795_REG_ALM0_DAY,
+                                       MCP795_ALM0IF_BIT, 0);
+               ret = devm_request_threaded_irq(&spi->dev, spi->irq, NULL,
+                               mcp795_irq, IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+                               dev_name(&rtc->dev), spi);
+               if (ret)
+                       dev_err(&spi->dev, "Failed to request IRQ: %d: %d\n",
+                                               spi->irq, ret);
+               else
+                       device_init_wakeup(&spi->dev, true);
+       }
        return 0;
 }
 
index 359876a88ac860c06304bc9738f42f1ce406c044..77319122642ab51c491655ef5ffc94a9fc8c7745 100644 (file)
@@ -353,7 +353,7 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 }
 
 /* RTC layer */
-static struct rtc_class_ops mxc_rtc_ops = {
+static const struct rtc_class_ops mxc_rtc_ops = {
        .release                = mxc_rtc_release,
        .read_time              = mxc_rtc_read_time,
        .set_mmss64             = mxc_rtc_set_mmss,
index 2bfdf638b67390661f10e1405eea44d544eda6ad..f33447c5db85e395ac540f43c1bf7ec69f48efcf 100644 (file)
@@ -52,9 +52,20 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
        unsigned char buf[10];
        int ret;
+       int i;
 
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL1, buf,
-                               sizeof(buf));
+       for (i = 0; i <= PCF2127_REG_CTRL3; i++) {
+               ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1 + i,
+                                 (unsigned int *)(buf + i));
+               if (ret) {
+                       dev_err(dev, "%s: read error\n", __func__);
+                       return ret;
+               }
+       }
+
+       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_SC,
+                              (buf + PCF2127_REG_SC),
+                              ARRAY_SIZE(buf) - PCF2127_REG_SC);
        if (ret) {
                dev_err(dev, "%s: read error\n", __func__);
                return ret;
index 7163b91bb773bd3a8ef0a3be75d9936ffc116f9b..d08da371912cd868e496cbfd417d59067d04ce3a 100644 (file)
@@ -63,7 +63,6 @@ struct rx8010_data {
        struct i2c_client *client;
        struct rtc_device *rtc;
        u8 ctrlreg;
-       spinlock_t flags_lock;
 };
 
 static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id)
@@ -72,12 +71,12 @@ static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id)
        struct rx8010_data *rx8010 = i2c_get_clientdata(client);
        int flagreg;
 
-       spin_lock(&rx8010->flags_lock);
+       mutex_lock(&rx8010->rtc->ops_lock);
 
        flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG);
 
        if (flagreg <= 0) {
-               spin_unlock(&rx8010->flags_lock);
+               mutex_unlock(&rx8010->rtc->ops_lock);
                return IRQ_NONE;
        }
 
@@ -101,7 +100,7 @@ static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id)
 
        i2c_smbus_write_byte_data(client, RX8010_FLAG, flagreg);
 
-       spin_unlock(&rx8010->flags_lock);
+       mutex_unlock(&rx8010->rtc->ops_lock);
        return IRQ_HANDLED;
 }
 
@@ -143,7 +142,6 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt)
        u8 date[7];
        int ctrl, flagreg;
        int ret;
-       unsigned long irqflags;
 
        if ((dt->tm_year < 100) || (dt->tm_year > 199))
                return -EINVAL;
@@ -181,11 +179,8 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt)
        if (ret < 0)
                return ret;
 
-       spin_lock_irqsave(&rx8010->flags_lock, irqflags);
-
        flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG);
        if (flagreg < 0) {
-               spin_unlock_irqrestore(&rx8010->flags_lock, irqflags);
                return flagreg;
        }
 
@@ -193,8 +188,6 @@ static int rx8010_set_time(struct device *dev, struct rtc_time *dt)
                ret = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG,
                                                flagreg & ~RX8010_FLAG_VLF);
 
-       spin_unlock_irqrestore(&rx8010->flags_lock, irqflags);
-
        return 0;
 }
 
@@ -288,12 +281,9 @@ static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t)
        u8 alarmvals[3];
        int extreg, flagreg;
        int err;
-       unsigned long irqflags;
 
-       spin_lock_irqsave(&rx8010->flags_lock, irqflags);
        flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG);
        if (flagreg < 0) {
-               spin_unlock_irqrestore(&rx8010->flags_lock, irqflags);
                return flagreg;
        }
 
@@ -302,14 +292,12 @@ static int rx8010_set_alarm(struct device *dev, struct rtc_wkalrm *t)
                err = i2c_smbus_write_byte_data(rx8010->client, RX8010_CTRL,
                                                rx8010->ctrlreg);
                if (err < 0) {
-                       spin_unlock_irqrestore(&rx8010->flags_lock, irqflags);
                        return err;
                }
        }
 
        flagreg &= ~RX8010_FLAG_AF;
        err = i2c_smbus_write_byte_data(rx8010->client, RX8010_FLAG, flagreg);
-       spin_unlock_irqrestore(&rx8010->flags_lock, irqflags);
        if (err < 0)
                return err;
 
@@ -404,7 +392,6 @@ static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
        struct rx8010_data *rx8010 = dev_get_drvdata(dev);
        int ret, tmp;
        int flagreg;
-       unsigned long irqflags;
 
        switch (cmd) {
        case RTC_VL_READ:
@@ -419,16 +406,13 @@ static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
                return 0;
 
        case RTC_VL_CLR:
-               spin_lock_irqsave(&rx8010->flags_lock, irqflags);
                flagreg = i2c_smbus_read_byte_data(rx8010->client, RX8010_FLAG);
                if (flagreg < 0) {
-                       spin_unlock_irqrestore(&rx8010->flags_lock, irqflags);
                        return flagreg;
                }
 
                flagreg &= ~RX8010_FLAG_VLF;
                ret = i2c_smbus_write_byte_data(client, RX8010_FLAG, flagreg);
-               spin_unlock_irqrestore(&rx8010->flags_lock, irqflags);
                if (ret < 0)
                        return ret;
 
@@ -466,8 +450,6 @@ static int rx8010_probe(struct i2c_client *client,
        rx8010->client = client;
        i2c_set_clientdata(client, rx8010);
 
-       spin_lock_init(&rx8010->flags_lock);
-
        err = rx8010_init_client(client);
        if (err)
                return err;
index 17b6235d67a588e9dff65f1c4689c923d939d8ff..c626e43a9cbb48f6c254517dc858dd1451c550d3 100644 (file)
@@ -535,7 +535,7 @@ static int sh_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
        return 0;
 }
 
-static struct rtc_class_ops sh_rtc_ops = {
+static const struct rtc_class_ops sh_rtc_ops = {
        .read_time      = sh_rtc_read_time,
        .set_time       = sh_rtc_set_time,
        .read_alarm     = sh_rtc_read_alarm,
index 0f11c2a228e35a02033e1b64dcddc6f5599927cd..d51b07d620f7bd6fadbbf66f666870d8e50516f0 100644 (file)
@@ -184,6 +184,7 @@ static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        rtc_tm_to_time(alrm_tm, &time);
 
        regmap_update_bits(data->regmap, data->offset + SNVS_LPCR, SNVS_LPCR_LPTA_EN, 0);
+       rtc_write_sync_lp(data);
        regmap_write(data->regmap, data->offset + SNVS_LPTAR, time);
 
        /* Clear alarm interrupt status bit */
diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c
new file mode 100644 (file)
index 0000000..bd57eb1
--- /dev/null
@@ -0,0 +1,725 @@
+/*
+ * Copyright (C) Amelie Delaunay 2016
+ * Author:  Amelie Delaunay <amelie.delaunay@st.com>
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/bcd.h>
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/ioport.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+
+#define DRIVER_NAME "stm32_rtc"
+
+/* STM32 RTC registers */
+#define STM32_RTC_TR           0x00
+#define STM32_RTC_DR           0x04
+#define STM32_RTC_CR           0x08
+#define STM32_RTC_ISR          0x0C
+#define STM32_RTC_PRER         0x10
+#define STM32_RTC_ALRMAR       0x1C
+#define STM32_RTC_WPR          0x24
+
+/* STM32_RTC_TR bit fields  */
+#define STM32_RTC_TR_SEC_SHIFT         0
+#define STM32_RTC_TR_SEC               GENMASK(6, 0)
+#define STM32_RTC_TR_MIN_SHIFT         8
+#define STM32_RTC_TR_MIN               GENMASK(14, 8)
+#define STM32_RTC_TR_HOUR_SHIFT                16
+#define STM32_RTC_TR_HOUR              GENMASK(21, 16)
+
+/* STM32_RTC_DR bit fields */
+#define STM32_RTC_DR_DATE_SHIFT                0
+#define STM32_RTC_DR_DATE              GENMASK(5, 0)
+#define STM32_RTC_DR_MONTH_SHIFT       8
+#define STM32_RTC_DR_MONTH             GENMASK(12, 8)
+#define STM32_RTC_DR_WDAY_SHIFT                13
+#define STM32_RTC_DR_WDAY              GENMASK(15, 13)
+#define STM32_RTC_DR_YEAR_SHIFT                16
+#define STM32_RTC_DR_YEAR              GENMASK(23, 16)
+
+/* STM32_RTC_CR bit fields */
+#define STM32_RTC_CR_FMT               BIT(6)
+#define STM32_RTC_CR_ALRAE             BIT(8)
+#define STM32_RTC_CR_ALRAIE            BIT(12)
+
+/* STM32_RTC_ISR bit fields */
+#define STM32_RTC_ISR_ALRAWF           BIT(0)
+#define STM32_RTC_ISR_INITS            BIT(4)
+#define STM32_RTC_ISR_RSF              BIT(5)
+#define STM32_RTC_ISR_INITF            BIT(6)
+#define STM32_RTC_ISR_INIT             BIT(7)
+#define STM32_RTC_ISR_ALRAF            BIT(8)
+
+/* STM32_RTC_PRER bit fields */
+#define STM32_RTC_PRER_PRED_S_SHIFT    0
+#define STM32_RTC_PRER_PRED_S          GENMASK(14, 0)
+#define STM32_RTC_PRER_PRED_A_SHIFT    16
+#define STM32_RTC_PRER_PRED_A          GENMASK(22, 16)
+
+/* STM32_RTC_ALRMAR and STM32_RTC_ALRMBR bit fields */
+#define STM32_RTC_ALRMXR_SEC_SHIFT     0
+#define STM32_RTC_ALRMXR_SEC           GENMASK(6, 0)
+#define STM32_RTC_ALRMXR_SEC_MASK      BIT(7)
+#define STM32_RTC_ALRMXR_MIN_SHIFT     8
+#define STM32_RTC_ALRMXR_MIN           GENMASK(14, 8)
+#define STM32_RTC_ALRMXR_MIN_MASK      BIT(15)
+#define STM32_RTC_ALRMXR_HOUR_SHIFT    16
+#define STM32_RTC_ALRMXR_HOUR          GENMASK(21, 16)
+#define STM32_RTC_ALRMXR_PM            BIT(22)
+#define STM32_RTC_ALRMXR_HOUR_MASK     BIT(23)
+#define STM32_RTC_ALRMXR_DATE_SHIFT    24
+#define STM32_RTC_ALRMXR_DATE          GENMASK(29, 24)
+#define STM32_RTC_ALRMXR_WDSEL         BIT(30)
+#define STM32_RTC_ALRMXR_WDAY_SHIFT    24
+#define STM32_RTC_ALRMXR_WDAY          GENMASK(27, 24)
+#define STM32_RTC_ALRMXR_DATE_MASK     BIT(31)
+
+/* STM32_RTC_WPR key constants */
+#define RTC_WPR_1ST_KEY                        0xCA
+#define RTC_WPR_2ND_KEY                        0x53
+#define RTC_WPR_WRONG_KEY              0xFF
+
+/*
+ * RTC registers are protected against parasitic write access.
+ * PWR_CR_DBP bit must be set to enable write access to RTC registers.
+ */
+/* STM32_PWR_CR */
+#define PWR_CR                         0x00
+/* STM32_PWR_CR bit field */
+#define PWR_CR_DBP                     BIT(8)
+
+struct stm32_rtc {
+       struct rtc_device *rtc_dev;
+       void __iomem *base;
+       struct regmap *dbp;
+       struct clk *ck_rtc;
+       int irq_alarm;
+};
+
+static void stm32_rtc_wpr_unlock(struct stm32_rtc *rtc)
+{
+       writel_relaxed(RTC_WPR_1ST_KEY, rtc->base + STM32_RTC_WPR);
+       writel_relaxed(RTC_WPR_2ND_KEY, rtc->base + STM32_RTC_WPR);
+}
+
+static void stm32_rtc_wpr_lock(struct stm32_rtc *rtc)
+{
+       writel_relaxed(RTC_WPR_WRONG_KEY, rtc->base + STM32_RTC_WPR);
+}
+
+static int stm32_rtc_enter_init_mode(struct stm32_rtc *rtc)
+{
+       unsigned int isr = readl_relaxed(rtc->base + STM32_RTC_ISR);
+
+       if (!(isr & STM32_RTC_ISR_INITF)) {
+               isr |= STM32_RTC_ISR_INIT;
+               writel_relaxed(isr, rtc->base + STM32_RTC_ISR);
+
+               /*
+                * It takes around 2 ck_rtc clock cycles to enter in
+                * initialization phase mode (and have INITF flag set). As
+                * slowest ck_rtc frequency may be 32kHz and highest should be
+                * 1MHz, we poll every 10 us with a timeout of 100ms.
+                */
+               return readl_relaxed_poll_timeout_atomic(
+                                       rtc->base + STM32_RTC_ISR,
+                                       isr, (isr & STM32_RTC_ISR_INITF),
+                                       10, 100000);
+       }
+
+       return 0;
+}
+
+static void stm32_rtc_exit_init_mode(struct stm32_rtc *rtc)
+{
+       unsigned int isr = readl_relaxed(rtc->base + STM32_RTC_ISR);
+
+       isr &= ~STM32_RTC_ISR_INIT;
+       writel_relaxed(isr, rtc->base + STM32_RTC_ISR);
+}
+
+static int stm32_rtc_wait_sync(struct stm32_rtc *rtc)
+{
+       unsigned int isr = readl_relaxed(rtc->base + STM32_RTC_ISR);
+
+       isr &= ~STM32_RTC_ISR_RSF;
+       writel_relaxed(isr, rtc->base + STM32_RTC_ISR);
+
+       /*
+        * Wait for RSF to be set to ensure the calendar registers are
+        * synchronised, it takes around 2 ck_rtc clock cycles
+        */
+       return readl_relaxed_poll_timeout_atomic(rtc->base + STM32_RTC_ISR,
+                                                isr,
+                                                (isr & STM32_RTC_ISR_RSF),
+                                                10, 100000);
+}
+
+static irqreturn_t stm32_rtc_alarm_irq(int irq, void *dev_id)
+{
+       struct stm32_rtc *rtc = (struct stm32_rtc *)dev_id;
+       unsigned int isr, cr;
+
+       mutex_lock(&rtc->rtc_dev->ops_lock);
+
+       isr = readl_relaxed(rtc->base + STM32_RTC_ISR);
+       cr = readl_relaxed(rtc->base + STM32_RTC_CR);
+
+       if ((isr & STM32_RTC_ISR_ALRAF) &&
+           (cr & STM32_RTC_CR_ALRAIE)) {
+               /* Alarm A flag - Alarm interrupt */
+               dev_dbg(&rtc->rtc_dev->dev, "Alarm occurred\n");
+
+               /* Pass event to the kernel */
+               rtc_update_irq(rtc->rtc_dev, 1, RTC_IRQF | RTC_AF);
+
+               /* Clear event flag, otherwise new events won't be received */
+               writel_relaxed(isr & ~STM32_RTC_ISR_ALRAF,
+                              rtc->base + STM32_RTC_ISR);
+       }
+
+       mutex_unlock(&rtc->rtc_dev->ops_lock);
+
+       return IRQ_HANDLED;
+}
+
+/* Convert rtc_time structure from bin to bcd format */
+static void tm2bcd(struct rtc_time *tm)
+{
+       tm->tm_sec = bin2bcd(tm->tm_sec);
+       tm->tm_min = bin2bcd(tm->tm_min);
+       tm->tm_hour = bin2bcd(tm->tm_hour);
+
+       tm->tm_mday = bin2bcd(tm->tm_mday);
+       tm->tm_mon = bin2bcd(tm->tm_mon + 1);
+       tm->tm_year = bin2bcd(tm->tm_year - 100);
+       /*
+        * Number of days since Sunday
+        * - on kernel side, 0=Sunday...6=Saturday
+        * - on rtc side, 0=invalid,1=Monday...7=Sunday
+        */
+       tm->tm_wday = (!tm->tm_wday) ? 7 : tm->tm_wday;
+}
+
+/* Convert rtc_time structure from bcd to bin format */
+static void bcd2tm(struct rtc_time *tm)
+{
+       tm->tm_sec = bcd2bin(tm->tm_sec);
+       tm->tm_min = bcd2bin(tm->tm_min);
+       tm->tm_hour = bcd2bin(tm->tm_hour);
+
+       tm->tm_mday = bcd2bin(tm->tm_mday);
+       tm->tm_mon = bcd2bin(tm->tm_mon) - 1;
+       tm->tm_year = bcd2bin(tm->tm_year) + 100;
+       /*
+        * Number of days since Sunday
+        * - on kernel side, 0=Sunday...6=Saturday
+        * - on rtc side, 0=invalid,1=Monday...7=Sunday
+        */
+       tm->tm_wday %= 7;
+}
+
+static int stm32_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct stm32_rtc *rtc = dev_get_drvdata(dev);
+       unsigned int tr, dr;
+
+       /* Time and Date in BCD format */
+       tr = readl_relaxed(rtc->base + STM32_RTC_TR);
+       dr = readl_relaxed(rtc->base + STM32_RTC_DR);
+
+       tm->tm_sec = (tr & STM32_RTC_TR_SEC) >> STM32_RTC_TR_SEC_SHIFT;
+       tm->tm_min = (tr & STM32_RTC_TR_MIN) >> STM32_RTC_TR_MIN_SHIFT;
+       tm->tm_hour = (tr & STM32_RTC_TR_HOUR) >> STM32_RTC_TR_HOUR_SHIFT;
+
+       tm->tm_mday = (dr & STM32_RTC_DR_DATE) >> STM32_RTC_DR_DATE_SHIFT;
+       tm->tm_mon = (dr & STM32_RTC_DR_MONTH) >> STM32_RTC_DR_MONTH_SHIFT;
+       tm->tm_year = (dr & STM32_RTC_DR_YEAR) >> STM32_RTC_DR_YEAR_SHIFT;
+       tm->tm_wday = (dr & STM32_RTC_DR_WDAY) >> STM32_RTC_DR_WDAY_SHIFT;
+
+       /* We don't report tm_yday and tm_isdst */
+
+       bcd2tm(tm);
+
+       return 0;
+}
+
+static int stm32_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct stm32_rtc *rtc = dev_get_drvdata(dev);
+       unsigned int tr, dr;
+       int ret = 0;
+
+       tm2bcd(tm);
+
+       /* Time in BCD format */
+       tr = ((tm->tm_sec << STM32_RTC_TR_SEC_SHIFT) & STM32_RTC_TR_SEC) |
+            ((tm->tm_min << STM32_RTC_TR_MIN_SHIFT) & STM32_RTC_TR_MIN) |
+            ((tm->tm_hour << STM32_RTC_TR_HOUR_SHIFT) & STM32_RTC_TR_HOUR);
+
+       /* Date in BCD format */
+       dr = ((tm->tm_mday << STM32_RTC_DR_DATE_SHIFT) & STM32_RTC_DR_DATE) |
+            ((tm->tm_mon << STM32_RTC_DR_MONTH_SHIFT) & STM32_RTC_DR_MONTH) |
+            ((tm->tm_year << STM32_RTC_DR_YEAR_SHIFT) & STM32_RTC_DR_YEAR) |
+            ((tm->tm_wday << STM32_RTC_DR_WDAY_SHIFT) & STM32_RTC_DR_WDAY);
+
+       stm32_rtc_wpr_unlock(rtc);
+
+       ret = stm32_rtc_enter_init_mode(rtc);
+       if (ret) {
+               dev_err(dev, "Can't enter in init mode. Set time aborted.\n");
+               goto end;
+       }
+
+       writel_relaxed(tr, rtc->base + STM32_RTC_TR);
+       writel_relaxed(dr, rtc->base + STM32_RTC_DR);
+
+       stm32_rtc_exit_init_mode(rtc);
+
+       ret = stm32_rtc_wait_sync(rtc);
+end:
+       stm32_rtc_wpr_lock(rtc);
+
+       return ret;
+}
+
+static int stm32_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+       struct stm32_rtc *rtc = dev_get_drvdata(dev);
+       struct rtc_time *tm = &alrm->time;
+       unsigned int alrmar, cr, isr;
+
+       alrmar = readl_relaxed(rtc->base + STM32_RTC_ALRMAR);
+       cr = readl_relaxed(rtc->base + STM32_RTC_CR);
+       isr = readl_relaxed(rtc->base + STM32_RTC_ISR);
+
+       if (alrmar & STM32_RTC_ALRMXR_DATE_MASK) {
+               /*
+                * Date/day doesn't matter in Alarm comparison so alarm
+                * triggers every day
+                */
+               tm->tm_mday = -1;
+               tm->tm_wday = -1;
+       } else {
+               if (alrmar & STM32_RTC_ALRMXR_WDSEL) {
+                       /* Alarm is set to a day of week */
+                       tm->tm_mday = -1;
+                       tm->tm_wday = (alrmar & STM32_RTC_ALRMXR_WDAY) >>
+                                     STM32_RTC_ALRMXR_WDAY_SHIFT;
+                       tm->tm_wday %= 7;
+               } else {
+                       /* Alarm is set to a day of month */
+                       tm->tm_wday = -1;
+                       tm->tm_mday = (alrmar & STM32_RTC_ALRMXR_DATE) >>
+                                      STM32_RTC_ALRMXR_DATE_SHIFT;
+               }
+       }
+
+       if (alrmar & STM32_RTC_ALRMXR_HOUR_MASK) {
+               /* Hours don't matter in Alarm comparison */
+               tm->tm_hour = -1;
+       } else {
+               tm->tm_hour = (alrmar & STM32_RTC_ALRMXR_HOUR) >>
+                              STM32_RTC_ALRMXR_HOUR_SHIFT;
+               if (alrmar & STM32_RTC_ALRMXR_PM)
+                       tm->tm_hour += 12;
+       }
+
+       if (alrmar & STM32_RTC_ALRMXR_MIN_MASK) {
+               /* Minutes don't matter in Alarm comparison */
+               tm->tm_min = -1;
+       } else {
+               tm->tm_min = (alrmar & STM32_RTC_ALRMXR_MIN) >>
+                             STM32_RTC_ALRMXR_MIN_SHIFT;
+       }
+
+       if (alrmar & STM32_RTC_ALRMXR_SEC_MASK) {
+               /* Seconds don't matter in Alarm comparison */
+               tm->tm_sec = -1;
+       } else {
+               tm->tm_sec = (alrmar & STM32_RTC_ALRMXR_SEC) >>
+                             STM32_RTC_ALRMXR_SEC_SHIFT;
+       }
+
+       bcd2tm(tm);
+
+       alrm->enabled = (cr & STM32_RTC_CR_ALRAE) ? 1 : 0;
+       alrm->pending = (isr & STM32_RTC_ISR_ALRAF) ? 1 : 0;
+
+       return 0;
+}
+
+static int stm32_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+       struct stm32_rtc *rtc = dev_get_drvdata(dev);
+       unsigned int isr, cr;
+
+       cr = readl_relaxed(rtc->base + STM32_RTC_CR);
+
+       stm32_rtc_wpr_unlock(rtc);
+
+       /* We expose Alarm A to the kernel */
+       if (enabled)
+               cr |= (STM32_RTC_CR_ALRAIE | STM32_RTC_CR_ALRAE);
+       else
+               cr &= ~(STM32_RTC_CR_ALRAIE | STM32_RTC_CR_ALRAE);
+       writel_relaxed(cr, rtc->base + STM32_RTC_CR);
+
+       /* Clear event flag, otherwise new events won't be received */
+       isr = readl_relaxed(rtc->base + STM32_RTC_ISR);
+       isr &= ~STM32_RTC_ISR_ALRAF;
+       writel_relaxed(isr, rtc->base + STM32_RTC_ISR);
+
+       stm32_rtc_wpr_lock(rtc);
+
+       return 0;
+}
+
+static int stm32_rtc_valid_alrm(struct stm32_rtc *rtc, struct rtc_time *tm)
+{
+       int cur_day, cur_mon, cur_year, cur_hour, cur_min, cur_sec;
+       unsigned int dr = readl_relaxed(rtc->base + STM32_RTC_DR);
+       unsigned int tr = readl_relaxed(rtc->base + STM32_RTC_TR);
+
+       cur_day = (dr & STM32_RTC_DR_DATE) >> STM32_RTC_DR_DATE_SHIFT;
+       cur_mon = (dr & STM32_RTC_DR_MONTH) >> STM32_RTC_DR_MONTH_SHIFT;
+       cur_year = (dr & STM32_RTC_DR_YEAR) >> STM32_RTC_DR_YEAR_SHIFT;
+       cur_sec = (tr & STM32_RTC_TR_SEC) >> STM32_RTC_TR_SEC_SHIFT;
+       cur_min = (tr & STM32_RTC_TR_MIN) >> STM32_RTC_TR_MIN_SHIFT;
+       cur_hour = (tr & STM32_RTC_TR_HOUR) >> STM32_RTC_TR_HOUR_SHIFT;
+
+       /*
+        * Assuming current date is M-D-Y H:M:S.
+        * RTC alarm can't be set on a specific month and year.
+        * So the valid alarm range is:
+        *      M-D-Y H:M:S < alarm <= (M+1)-D-Y H:M:S
+        * with a specific case for December...
+        */
+       if ((((tm->tm_year > cur_year) &&
+             (tm->tm_mon == 0x1) && (cur_mon == 0x12)) ||
+            ((tm->tm_year == cur_year) &&
+             (tm->tm_mon <= cur_mon + 1))) &&
+           ((tm->tm_mday > cur_day) ||
+            ((tm->tm_mday == cur_day) &&
+            ((tm->tm_hour > cur_hour) ||
+             ((tm->tm_hour == cur_hour) && (tm->tm_min > cur_min)) ||
+             ((tm->tm_hour == cur_hour) && (tm->tm_min == cur_min) &&
+              (tm->tm_sec >= cur_sec))))))
+               return 0;
+
+       return -EINVAL;
+}
+
+static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+       struct stm32_rtc *rtc = dev_get_drvdata(dev);
+       struct rtc_time *tm = &alrm->time;
+       unsigned int cr, isr, alrmar;
+       int ret = 0;
+
+       tm2bcd(tm);
+
+       /*
+        * RTC alarm can't be set on a specific date, unless this date is
+        * up to the same day of month next month.
+        */
+       if (stm32_rtc_valid_alrm(rtc, tm) < 0) {
+               dev_err(dev, "Alarm can be set only on upcoming month.\n");
+               return -EINVAL;
+       }
+
+       alrmar = 0;
+       /* tm_year and tm_mon are not used because not supported by RTC */
+       alrmar |= (tm->tm_mday << STM32_RTC_ALRMXR_DATE_SHIFT) &
+                 STM32_RTC_ALRMXR_DATE;
+       /* 24-hour format */
+       alrmar &= ~STM32_RTC_ALRMXR_PM;
+       alrmar |= (tm->tm_hour << STM32_RTC_ALRMXR_HOUR_SHIFT) &
+                 STM32_RTC_ALRMXR_HOUR;
+       alrmar |= (tm->tm_min << STM32_RTC_ALRMXR_MIN_SHIFT) &
+                 STM32_RTC_ALRMXR_MIN;
+       alrmar |= (tm->tm_sec << STM32_RTC_ALRMXR_SEC_SHIFT) &
+                 STM32_RTC_ALRMXR_SEC;
+
+       stm32_rtc_wpr_unlock(rtc);
+
+       /* Disable Alarm */
+       cr = readl_relaxed(rtc->base + STM32_RTC_CR);
+       cr &= ~STM32_RTC_CR_ALRAE;
+       writel_relaxed(cr, rtc->base + STM32_RTC_CR);
+
+       /*
+        * Poll Alarm write flag to be sure that Alarm update is allowed: it
+        * takes around 2 ck_rtc clock cycles
+        */
+       ret = readl_relaxed_poll_timeout_atomic(rtc->base + STM32_RTC_ISR,
+                                               isr,
+                                               (isr & STM32_RTC_ISR_ALRAWF),
+                                               10, 100000);
+
+       if (ret) {
+               dev_err(dev, "Alarm update not allowed\n");
+               goto end;
+       }
+
+       /* Write to Alarm register */
+       writel_relaxed(alrmar, rtc->base + STM32_RTC_ALRMAR);
+
+       if (alrm->enabled)
+               stm32_rtc_alarm_irq_enable(dev, 1);
+       else
+               stm32_rtc_alarm_irq_enable(dev, 0);
+
+end:
+       stm32_rtc_wpr_lock(rtc);
+
+       return ret;
+}
+
+static const struct rtc_class_ops stm32_rtc_ops = {
+       .read_time      = stm32_rtc_read_time,
+       .set_time       = stm32_rtc_set_time,
+       .read_alarm     = stm32_rtc_read_alarm,
+       .set_alarm      = stm32_rtc_set_alarm,
+       .alarm_irq_enable = stm32_rtc_alarm_irq_enable,
+};
+
+static const struct of_device_id stm32_rtc_of_match[] = {
+       { .compatible = "st,stm32-rtc" },
+       {}
+};
+MODULE_DEVICE_TABLE(of, stm32_rtc_of_match);
+
+static int stm32_rtc_init(struct platform_device *pdev,
+                         struct stm32_rtc *rtc)
+{
+       unsigned int prer, pred_a, pred_s, pred_a_max, pred_s_max, cr;
+       unsigned int rate;
+       int ret = 0;
+
+       rate = clk_get_rate(rtc->ck_rtc);
+
+       /* Find prediv_a and prediv_s to obtain the 1Hz calendar clock */
+       pred_a_max = STM32_RTC_PRER_PRED_A >> STM32_RTC_PRER_PRED_A_SHIFT;
+       pred_s_max = STM32_RTC_PRER_PRED_S >> STM32_RTC_PRER_PRED_S_SHIFT;
+
+       for (pred_a = pred_a_max; pred_a + 1 > 0; pred_a--) {
+               pred_s = (rate / (pred_a + 1)) - 1;
+
+               if (((pred_s + 1) * (pred_a + 1)) == rate)
+                       break;
+       }
+
+       /*
+        * Can't find a 1Hz, so give priority to RTC power consumption
+        * by choosing the higher possible value for prediv_a
+        */
+       if ((pred_s > pred_s_max) || (pred_a > pred_a_max)) {
+               pred_a = pred_a_max;
+               pred_s = (rate / (pred_a + 1)) - 1;
+
+               dev_warn(&pdev->dev, "ck_rtc is %s\n",
+                        (rate < ((pred_a + 1) * (pred_s + 1))) ?
+                        "fast" : "slow");
+       }
+
+       stm32_rtc_wpr_unlock(rtc);
+
+       ret = stm32_rtc_enter_init_mode(rtc);
+       if (ret) {
+               dev_err(&pdev->dev,
+                       "Can't enter in init mode. Prescaler config failed.\n");
+               goto end;
+       }
+
+       prer = (pred_s << STM32_RTC_PRER_PRED_S_SHIFT) & STM32_RTC_PRER_PRED_S;
+       writel_relaxed(prer, rtc->base + STM32_RTC_PRER);
+       prer |= (pred_a << STM32_RTC_PRER_PRED_A_SHIFT) & STM32_RTC_PRER_PRED_A;
+       writel_relaxed(prer, rtc->base + STM32_RTC_PRER);
+
+       /* Force 24h time format */
+       cr = readl_relaxed(rtc->base + STM32_RTC_CR);
+       cr &= ~STM32_RTC_CR_FMT;
+       writel_relaxed(cr, rtc->base + STM32_RTC_CR);
+
+       stm32_rtc_exit_init_mode(rtc);
+
+       ret = stm32_rtc_wait_sync(rtc);
+end:
+       stm32_rtc_wpr_lock(rtc);
+
+       return ret;
+}
+
+static int stm32_rtc_probe(struct platform_device *pdev)
+{
+       struct stm32_rtc *rtc;
+       struct resource *res;
+       int ret;
+
+       rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+       if (!rtc)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       rtc->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(rtc->base))
+               return PTR_ERR(rtc->base);
+
+       rtc->dbp = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+                                                  "st,syscfg");
+       if (IS_ERR(rtc->dbp)) {
+               dev_err(&pdev->dev, "no st,syscfg\n");
+               return PTR_ERR(rtc->dbp);
+       }
+
+       rtc->ck_rtc = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(rtc->ck_rtc)) {
+               dev_err(&pdev->dev, "no ck_rtc clock");
+               return PTR_ERR(rtc->ck_rtc);
+       }
+
+       ret = clk_prepare_enable(rtc->ck_rtc);
+       if (ret)
+               return ret;
+
+       regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, PWR_CR_DBP);
+
+       /*
+        * After a system reset, RTC_ISR.INITS flag can be read to check if
+        * the calendar has been initalized or not. INITS flag is reset by a
+        * power-on reset (no vbat, no power-supply). It is not reset if
+        * ck_rtc parent clock has changed (so RTC prescalers need to be
+        * changed). That's why we cannot rely on this flag to know if RTC
+        * init has to be done.
+        */
+       ret = stm32_rtc_init(pdev, rtc);
+       if (ret)
+               goto err;
+
+       rtc->irq_alarm = platform_get_irq(pdev, 0);
+       if (rtc->irq_alarm <= 0) {
+               dev_err(&pdev->dev, "no alarm irq\n");
+               ret = rtc->irq_alarm;
+               goto err;
+       }
+
+       platform_set_drvdata(pdev, rtc);
+
+       ret = device_init_wakeup(&pdev->dev, true);
+       if (ret)
+               dev_warn(&pdev->dev,
+                        "alarm won't be able to wake up the system");
+
+       rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, pdev->name,
+                       &stm32_rtc_ops, THIS_MODULE);
+       if (IS_ERR(rtc->rtc_dev)) {
+               ret = PTR_ERR(rtc->rtc_dev);
+               dev_err(&pdev->dev, "rtc device registration failed, err=%d\n",
+                       ret);
+               goto err;
+       }
+
+       /* Handle RTC alarm interrupts */
+       ret = devm_request_threaded_irq(&pdev->dev, rtc->irq_alarm, NULL,
+                                       stm32_rtc_alarm_irq,
+                                       IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+                                       pdev->name, rtc);
+       if (ret) {
+               dev_err(&pdev->dev, "IRQ%d (alarm interrupt) already claimed\n",
+                       rtc->irq_alarm);
+               goto err;
+       }
+
+       /*
+        * If INITS flag is reset (calendar year field set to 0x00), calendar
+        * must be initialized
+        */
+       if (!(readl_relaxed(rtc->base + STM32_RTC_ISR) & STM32_RTC_ISR_INITS))
+               dev_warn(&pdev->dev, "Date/Time must be initialized\n");
+
+       return 0;
+err:
+       clk_disable_unprepare(rtc->ck_rtc);
+
+       regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, 0);
+
+       device_init_wakeup(&pdev->dev, false);
+
+       return ret;
+}
+
+static int stm32_rtc_remove(struct platform_device *pdev)
+{
+       struct stm32_rtc *rtc = platform_get_drvdata(pdev);
+       unsigned int cr;
+
+       /* Disable interrupts */
+       stm32_rtc_wpr_unlock(rtc);
+       cr = readl_relaxed(rtc->base + STM32_RTC_CR);
+       cr &= ~STM32_RTC_CR_ALRAIE;
+       writel_relaxed(cr, rtc->base + STM32_RTC_CR);
+       stm32_rtc_wpr_lock(rtc);
+
+       clk_disable_unprepare(rtc->ck_rtc);
+
+       /* Enable backup domain write protection */
+       regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, 0);
+
+       device_init_wakeup(&pdev->dev, false);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int stm32_rtc_suspend(struct device *dev)
+{
+       struct stm32_rtc *rtc = dev_get_drvdata(dev);
+
+       if (device_may_wakeup(dev))
+               return enable_irq_wake(rtc->irq_alarm);
+
+       return 0;
+}
+
+static int stm32_rtc_resume(struct device *dev)
+{
+       struct stm32_rtc *rtc = dev_get_drvdata(dev);
+       int ret = 0;
+
+       ret = stm32_rtc_wait_sync(rtc);
+       if (ret < 0)
+               return ret;
+
+       if (device_may_wakeup(dev))
+               return disable_irq_wake(rtc->irq_alarm);
+
+       return ret;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(stm32_rtc_pm_ops,
+                        stm32_rtc_suspend, stm32_rtc_resume);
+
+static struct platform_driver stm32_rtc_driver = {
+       .probe          = stm32_rtc_probe,
+       .remove         = stm32_rtc_remove,
+       .driver         = {
+               .name   = DRIVER_NAME,
+               .pm     = &stm32_rtc_pm_ops,
+               .of_match_table = stm32_rtc_of_match,
+       },
+};
+
+module_platform_driver(stm32_rtc_driver);
+
+MODULE_ALIAS("platform:" DRIVER_NAME);
+MODULE_AUTHOR("Amelie Delaunay <amelie.delaunay@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 Real Time Clock driver");
+MODULE_LICENSE("GPL v2");
index c169a2cd47273e11fd62cb85755cb0badbb31a94..39cbc1238b92bbb5766d558b53435ae9ffee0ddd 100644 (file)
@@ -20,6 +20,8 @@
  * more details.
  */
 
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 
 /* Control register */
 #define SUN6I_LOSC_CTRL                                0x0000
+#define SUN6I_LOSC_CTRL_KEY                    (0x16aa << 16)
 #define SUN6I_LOSC_CTRL_ALM_DHMS_ACC           BIT(9)
 #define SUN6I_LOSC_CTRL_RTC_HMS_ACC            BIT(8)
 #define SUN6I_LOSC_CTRL_RTC_YMD_ACC            BIT(7)
+#define SUN6I_LOSC_CTRL_EXT_OSC                        BIT(0)
 #define SUN6I_LOSC_CTRL_ACC_MASK               GENMASK(9, 7)
 
+#define SUN6I_LOSC_CLK_PRESCAL                 0x0008
+
 /* RTC */
 #define SUN6I_RTC_YMD                          0x0010
 #define SUN6I_RTC_HMS                          0x0014
@@ -114,13 +121,142 @@ struct sun6i_rtc_dev {
        void __iomem *base;
        int irq;
        unsigned long alarm;
+
+       struct clk_hw hw;
+       struct clk_hw *int_osc;
+       struct clk *losc;
+
+       spinlock_t lock;
+};
+
+static struct sun6i_rtc_dev *sun6i_rtc;
+
+static unsigned long sun6i_rtc_osc_recalc_rate(struct clk_hw *hw,
+                                              unsigned long parent_rate)
+{
+       struct sun6i_rtc_dev *rtc = container_of(hw, struct sun6i_rtc_dev, hw);
+       u32 val;
+
+       val = readl(rtc->base + SUN6I_LOSC_CTRL);
+       if (val & SUN6I_LOSC_CTRL_EXT_OSC)
+               return parent_rate;
+
+       val = readl(rtc->base + SUN6I_LOSC_CLK_PRESCAL);
+       val &= GENMASK(4, 0);
+
+       return parent_rate / (val + 1);
+}
+
+static u8 sun6i_rtc_osc_get_parent(struct clk_hw *hw)
+{
+       struct sun6i_rtc_dev *rtc = container_of(hw, struct sun6i_rtc_dev, hw);
+
+       return readl(rtc->base + SUN6I_LOSC_CTRL) & SUN6I_LOSC_CTRL_EXT_OSC;
+}
+
+static int sun6i_rtc_osc_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct sun6i_rtc_dev *rtc = container_of(hw, struct sun6i_rtc_dev, hw);
+       unsigned long flags;
+       u32 val;
+
+       if (index > 1)
+               return -EINVAL;
+
+       spin_lock_irqsave(&rtc->lock, flags);
+       val = readl(rtc->base + SUN6I_LOSC_CTRL);
+       val &= ~SUN6I_LOSC_CTRL_EXT_OSC;
+       val |= SUN6I_LOSC_CTRL_KEY;
+       val |= index ? SUN6I_LOSC_CTRL_EXT_OSC : 0;
+       writel(val, rtc->base + SUN6I_LOSC_CTRL);
+       spin_unlock_irqrestore(&rtc->lock, flags);
+
+       return 0;
+}
+
+static const struct clk_ops sun6i_rtc_osc_ops = {
+       .recalc_rate    = sun6i_rtc_osc_recalc_rate,
+
+       .get_parent     = sun6i_rtc_osc_get_parent,
+       .set_parent     = sun6i_rtc_osc_set_parent,
 };
 
+static void __init sun6i_rtc_clk_init(struct device_node *node)
+{
+       struct clk_hw_onecell_data *clk_data;
+       struct sun6i_rtc_dev *rtc;
+       struct clk_init_data init = {
+               .ops            = &sun6i_rtc_osc_ops,
+       };
+       const char *parents[2];
+
+       rtc = kzalloc(sizeof(*rtc), GFP_KERNEL);
+       if (!rtc)
+               return;
+       spin_lock_init(&rtc->lock);
+
+       clk_data = kzalloc(sizeof(*clk_data) + sizeof(*clk_data->hws),
+                          GFP_KERNEL);
+       if (!clk_data)
+               return;
+       spin_lock_init(&rtc->lock);
+
+       rtc->base = of_io_request_and_map(node, 0, of_node_full_name(node));
+       if (IS_ERR(rtc->base)) {
+               pr_crit("Can't map RTC registers");
+               return;
+       }
+
+       /* Switch to the external, more precise, oscillator */
+       writel(SUN6I_LOSC_CTRL_KEY | SUN6I_LOSC_CTRL_EXT_OSC,
+              rtc->base + SUN6I_LOSC_CTRL);
+
+       /* Yes, I know, this is ugly. */
+       sun6i_rtc = rtc;
+
+       /* Deal with old DTs */
+       if (!of_get_property(node, "clocks", NULL))
+               return;
+
+       rtc->int_osc = clk_hw_register_fixed_rate_with_accuracy(NULL,
+                                                               "rtc-int-osc",
+                                                               NULL, 0,
+                                                               667000,
+                                                               300000000);
+       if (IS_ERR(rtc->int_osc)) {
+               pr_crit("Couldn't register the internal oscillator\n");
+               return;
+       }
+
+       parents[0] = clk_hw_get_name(rtc->int_osc);
+       parents[1] = of_clk_get_parent_name(node, 0);
+
+       rtc->hw.init = &init;
+
+       init.parent_names = parents;
+       init.num_parents = of_clk_get_parent_count(node) + 1;
+       of_property_read_string(node, "clock-output-names", &init.name);
+
+       rtc->losc = clk_register(NULL, &rtc->hw);
+       if (IS_ERR(rtc->losc)) {
+               pr_crit("Couldn't register the LOSC clock\n");
+               return;
+       }
+
+       clk_data->num = 1;
+       clk_data->hws[0] = &rtc->hw;
+       of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data);
+}
+CLK_OF_DECLARE_DRIVER(sun6i_rtc_clk, "allwinner,sun6i-a31-rtc",
+                     sun6i_rtc_clk_init);
+
 static irqreturn_t sun6i_rtc_alarmirq(int irq, void *id)
 {
        struct sun6i_rtc_dev *chip = (struct sun6i_rtc_dev *) id;
+       irqreturn_t ret = IRQ_NONE;
        u32 val;
 
+       spin_lock(&chip->lock);
        val = readl(chip->base + SUN6I_ALRM_IRQ_STA);
 
        if (val & SUN6I_ALRM_IRQ_STA_CNT_IRQ_PEND) {
@@ -129,10 +265,11 @@ static irqreturn_t sun6i_rtc_alarmirq(int irq, void *id)
 
                rtc_update_irq(chip->rtc, 1, RTC_AF | RTC_IRQF);
 
-               return IRQ_HANDLED;
+               ret = IRQ_HANDLED;
        }
+       spin_unlock(&chip->lock);
 
-       return IRQ_NONE;
+       return ret;
 }
 
 static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip)
@@ -140,6 +277,7 @@ static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip)
        u32 alrm_val = 0;
        u32 alrm_irq_val = 0;
        u32 alrm_wake_val = 0;
+       unsigned long flags;
 
        if (to) {
                alrm_val = SUN6I_ALRM_EN_CNT_EN;
@@ -150,9 +288,11 @@ static void sun6i_rtc_setaie(int to, struct sun6i_rtc_dev *chip)
                       chip->base + SUN6I_ALRM_IRQ_STA);
        }
 
+       spin_lock_irqsave(&chip->lock, flags);
        writel(alrm_val, chip->base + SUN6I_ALRM_EN);
        writel(alrm_irq_val, chip->base + SUN6I_ALRM_IRQ_EN);
        writel(alrm_wake_val, chip->base + SUN6I_ALARM_CONFIG);
+       spin_unlock_irqrestore(&chip->lock, flags);
 }
 
 static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
@@ -191,11 +331,15 @@ static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 static int sun6i_rtc_getalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
 {
        struct sun6i_rtc_dev *chip = dev_get_drvdata(dev);
+       unsigned long flags;
        u32 alrm_st;
        u32 alrm_en;
 
+       spin_lock_irqsave(&chip->lock, flags);
        alrm_en = readl(chip->base + SUN6I_ALRM_IRQ_EN);
        alrm_st = readl(chip->base + SUN6I_ALRM_IRQ_STA);
+       spin_unlock_irqrestore(&chip->lock, flags);
+
        wkalrm->enabled = !!(alrm_en & SUN6I_ALRM_EN_CNT_EN);
        wkalrm->pending = !!(alrm_st & SUN6I_ALRM_EN_CNT_EN);
        rtc_time_to_tm(chip->alarm, &wkalrm->time);
@@ -349,22 +493,15 @@ static const struct rtc_class_ops sun6i_rtc_ops = {
 
 static int sun6i_rtc_probe(struct platform_device *pdev)
 {
-       struct sun6i_rtc_dev *chip;
-       struct resource *res;
+       struct sun6i_rtc_dev *chip = sun6i_rtc;
        int ret;
 
-       chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
        if (!chip)
-               return -ENOMEM;
+               return -ENODEV;
 
        platform_set_drvdata(pdev, chip);
        chip->dev = &pdev->dev;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       chip->base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(chip->base))
-               return PTR_ERR(chip->base);
-
        chip->irq = platform_get_irq(pdev, 0);
        if (chip->irq < 0) {
                dev_err(&pdev->dev, "No IRQ resource\n");
@@ -404,8 +541,10 @@ static int sun6i_rtc_probe(struct platform_device *pdev)
        /* disable alarm wakeup */
        writel(0, chip->base + SUN6I_ALARM_CONFIG);
 
-       chip->rtc = rtc_device_register("rtc-sun6i", &pdev->dev,
-                                       &sun6i_rtc_ops, THIS_MODULE);
+       clk_prepare_enable(chip->losc);
+
+       chip->rtc = devm_rtc_device_register(&pdev->dev, "rtc-sun6i",
+                                            &sun6i_rtc_ops, THIS_MODULE);
        if (IS_ERR(chip->rtc)) {
                dev_err(&pdev->dev, "unable to register device\n");
                return PTR_ERR(chip->rtc);
@@ -416,15 +555,6 @@ static int sun6i_rtc_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int sun6i_rtc_remove(struct platform_device *pdev)
-{
-       struct sun6i_rtc_dev *chip = platform_get_drvdata(pdev);
-
-       rtc_device_unregister(chip->rtc);
-
-       return 0;
-}
-
 static const struct of_device_id sun6i_rtc_dt_ids[] = {
        { .compatible = "allwinner,sun6i-a31-rtc" },
        { /* sentinel */ },
@@ -433,15 +563,9 @@ MODULE_DEVICE_TABLE(of, sun6i_rtc_dt_ids);
 
 static struct platform_driver sun6i_rtc_driver = {
        .probe          = sun6i_rtc_probe,
-       .remove         = sun6i_rtc_remove,
        .driver         = {
                .name           = "sun6i-rtc",
                .of_match_table = sun6i_rtc_dt_ids,
        },
 };
-
-module_platform_driver(sun6i_rtc_driver);
-
-MODULE_DESCRIPTION("sun6i RTC driver");
-MODULE_AUTHOR("Chen-Yu Tsai <wens@csie.org>");
-MODULE_LICENSE("GPL");
+builtin_platform_driver(sun6i_rtc_driver);
index 3853ba963bb5d801502b8d50d992c83aeb886eb0..d30d57b048d36ccb5c9ab3285e196d4e168207fc 100644 (file)
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
-#include <linux/kernel.h>
+
+#include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/rtc.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
 
 /* set to 1 = busy every eight 32kHz clocks during copy of sec+msec to AHB */
 #define TEGRA_RTC_REG_BUSY                     0x004
@@ -59,6 +61,7 @@ struct tegra_rtc_info {
        struct platform_device  *pdev;
        struct rtc_device       *rtc_dev;
        void __iomem            *rtc_base; /* NULL if not initialized. */
+       struct clk              *clk;
        int                     tegra_rtc_irq; /* alarm and periodic irq */
        spinlock_t              tegra_rtc_lock;
 };
@@ -326,6 +329,14 @@ static int __init tegra_rtc_probe(struct platform_device *pdev)
        if (info->tegra_rtc_irq <= 0)
                return -EBUSY;
 
+       info->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(info->clk))
+               return PTR_ERR(info->clk);
+
+       ret = clk_prepare_enable(info->clk);
+       if (ret < 0)
+               return ret;
+
        /* set context info. */
        info->pdev = pdev;
        spin_lock_init(&info->tegra_rtc_lock);
@@ -346,7 +357,7 @@ static int __init tegra_rtc_probe(struct platform_device *pdev)
                ret = PTR_ERR(info->rtc_dev);
                dev_err(&pdev->dev, "Unable to register device (err=%d).\n",
                        ret);
-               return ret;
+               goto disable_clk;
        }
 
        ret = devm_request_irq(&pdev->dev, info->tegra_rtc_irq,
@@ -356,11 +367,24 @@ static int __init tegra_rtc_probe(struct platform_device *pdev)
                dev_err(&pdev->dev,
                        "Unable to request interrupt for device (err=%d).\n",
                        ret);
-               return ret;
+               goto disable_clk;
        }
 
        dev_notice(&pdev->dev, "Tegra internal Real Time Clock\n");
 
+       return 0;
+
+disable_clk:
+       clk_disable_unprepare(info->clk);
+       return ret;
+}
+
+static int tegra_rtc_remove(struct platform_device *pdev)
+{
+       struct tegra_rtc_info *info = platform_get_drvdata(pdev);
+
+       clk_disable_unprepare(info->clk);
+
        return 0;
 }
 
@@ -413,6 +437,7 @@ static void tegra_rtc_shutdown(struct platform_device *pdev)
 
 MODULE_ALIAS("platform:tegra_rtc");
 static struct platform_driver tegra_rtc_driver = {
+       .remove         = tegra_rtc_remove,
        .shutdown       = tegra_rtc_shutdown,
        .driver         = {
                .name   = "tegra_rtc",
index 5a3d53caa485f733c3f38af872b18e8037b751ba..d0244d7979fcb8913bfaf6093334b6f3f5938bf4 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/types.h>
 #include <linux/rtc.h>
 #include <linux/bcd.h>
+#include <linux/math64.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/mfd/tps65910.h>
@@ -33,7 +34,21 @@ struct tps65910_rtc {
 /* Total number of RTC registers needed to set time*/
 #define NUM_TIME_REGS  (TPS65910_YEARS - TPS65910_SECONDS + 1)
 
-static int tps65910_rtc_alarm_irq_enable(struct device *dev, unsigned enabled)
+/* Total number of RTC registers needed to set compensation registers */
+#define NUM_COMP_REGS  (TPS65910_RTC_COMP_MSB - TPS65910_RTC_COMP_LSB + 1)
+
+/* Min and max values supported with 'offset' interface (swapped sign) */
+#define MIN_OFFSET     (-277761)
+#define MAX_OFFSET     (277778)
+
+/* Number of ticks per hour */
+#define TICKS_PER_HOUR (32768 * 3600)
+
+/* Multiplier for ppb conversions */
+#define PPB_MULT       (1000000000LL)
+
+static int tps65910_rtc_alarm_irq_enable(struct device *dev,
+                                        unsigned int enabled)
 {
        struct tps65910 *tps = dev_get_drvdata(dev->parent);
        u8 val = 0;
@@ -187,6 +202,133 @@ static int tps65910_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
        return ret;
 }
 
+static int tps65910_rtc_set_calibration(struct device *dev, int calibration)
+{
+       unsigned char comp_data[NUM_COMP_REGS];
+       struct tps65910 *tps = dev_get_drvdata(dev->parent);
+       s16 value;
+       int ret;
+
+       /*
+        * TPS65910 uses two's complement 16 bit value for compensation for RTC
+        * crystal inaccuracies. One time every hour when seconds counter
+        * increments from 0 to 1 compensation value will be added to internal
+        * RTC counter value.
+        *
+        * Compensation value 0x7FFF is prohibited value.
+        *
+        * Valid range for compensation value: [-32768 .. 32766]
+        */
+       if ((calibration < -32768) || (calibration > 32766)) {
+               dev_err(dev, "RTC calibration value out of range: %d\n",
+                       calibration);
+               return -EINVAL;
+       }
+
+       value = (s16)calibration;
+
+       comp_data[0] = (u16)value & 0xFF;
+       comp_data[1] = ((u16)value >> 8) & 0xFF;
+
+       /* Update all the compensation registers in one shot */
+       ret = regmap_bulk_write(tps->regmap, TPS65910_RTC_COMP_LSB,
+               comp_data, NUM_COMP_REGS);
+       if (ret < 0) {
+               dev_err(dev, "rtc_set_calibration error: %d\n", ret);
+               return ret;
+       }
+
+       /* Enable automatic compensation */
+       ret = regmap_update_bits(tps->regmap, TPS65910_RTC_CTRL,
+               TPS65910_RTC_CTRL_AUTO_COMP, TPS65910_RTC_CTRL_AUTO_COMP);
+       if (ret < 0)
+               dev_err(dev, "auto_comp enable failed with error: %d\n", ret);
+
+       return ret;
+}
+
+static int tps65910_rtc_get_calibration(struct device *dev, int *calibration)
+{
+       unsigned char comp_data[NUM_COMP_REGS];
+       struct tps65910 *tps = dev_get_drvdata(dev->parent);
+       unsigned int ctrl;
+       u16 value;
+       int ret;
+
+       ret = regmap_read(tps->regmap, TPS65910_RTC_CTRL, &ctrl);
+       if (ret < 0)
+               return ret;
+
+       /* If automatic compensation is not enabled report back zero */
+       if (!(ctrl & TPS65910_RTC_CTRL_AUTO_COMP)) {
+               *calibration = 0;
+               return 0;
+       }
+
+       ret = regmap_bulk_read(tps->regmap, TPS65910_RTC_COMP_LSB, comp_data,
+               NUM_COMP_REGS);
+       if (ret < 0) {
+               dev_err(dev, "rtc_get_calibration error: %d\n", ret);
+               return ret;
+       }
+
+       value = (u16)comp_data[0] | ((u16)comp_data[1] << 8);
+
+       *calibration = (s16)value;
+
+       return 0;
+}
+
+static int tps65910_read_offset(struct device *dev, long *offset)
+{
+       int calibration;
+       s64 tmp;
+       int ret;
+
+       ret = tps65910_rtc_get_calibration(dev, &calibration);
+       if (ret < 0)
+               return ret;
+
+       /* Convert from RTC calibration register format to ppb format */
+       tmp = calibration * (s64)PPB_MULT;
+       if (tmp < 0)
+               tmp -= TICKS_PER_HOUR / 2LL;
+       else
+               tmp += TICKS_PER_HOUR / 2LL;
+       tmp = div_s64(tmp, TICKS_PER_HOUR);
+
+       /* Offset value operates in negative way, so swap sign */
+       *offset = (long)-tmp;
+
+       return 0;
+}
+
+static int tps65910_set_offset(struct device *dev, long offset)
+{
+       int calibration;
+       s64 tmp;
+       int ret;
+
+       /* Make sure offset value is within supported range */
+       if (offset < MIN_OFFSET || offset > MAX_OFFSET)
+               return -ERANGE;
+
+       /* Convert from ppb format to RTC calibration register format */
+       tmp = offset * (s64)TICKS_PER_HOUR;
+       if (tmp < 0)
+               tmp -= PPB_MULT / 2LL;
+       else
+               tmp += PPB_MULT / 2LL;
+       tmp = div_s64(tmp, PPB_MULT);
+
+       /* Offset value operates in negative way, so swap sign */
+       calibration = (int)-tmp;
+
+       ret = tps65910_rtc_set_calibration(dev, calibration);
+
+       return ret;
+}
+
 static irqreturn_t tps65910_rtc_interrupt(int irq, void *rtc)
 {
        struct device *dev = rtc;
@@ -219,6 +361,8 @@ static const struct rtc_class_ops tps65910_rtc_ops = {
        .read_alarm     = tps65910_rtc_read_alarm,
        .set_alarm      = tps65910_rtc_set_alarm,
        .alarm_irq_enable = tps65910_rtc_alarm_irq_enable,
+       .read_offset    = tps65910_read_offset,
+       .set_offset     = tps65910_set_offset,
 };
 
 static int tps65910_rtc_probe(struct platform_device *pdev)
index 0f1713727d4c7911a018a8fb1554f289d1d86e18..0b38217f8147b33f2d94c4388d3a7e45c603f4cb 100644 (file)
@@ -4864,7 +4864,7 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
                        break;
                case 3: /* tsa_intrg */
                        len += sprintf(page + len, PRINTK_HEADER
-                                     " tsb->tsa.intrg.: not supportet yet\n");
+                                     " tsb->tsa.intrg.: not supported yet\n");
                        break;
                }
 
index 8225da6190148a44e5a729d9d56a186ca0da6dfe..4182f60124da1a9cf114313ee69cf52ce5ac6039 100644 (file)
@@ -165,13 +165,15 @@ int tpi(struct tpi_info *addr)
 int chsc(void *chsc_area)
 {
        typedef struct { char _[4096]; } addr_type;
-       int cc;
+       int cc = -EIO;
 
        asm volatile(
                "       .insn   rre,0xb25f0000,%2,0\n"
-               "       ipm     %0\n"
+               "0:     ipm     %0\n"
                "       srl     %0,28\n"
-               : "=d" (cc), "=m" (*(addr_type *) chsc_area)
+               "1:\n"
+               EX_TABLE(0b, 1b)
+               : "+d" (cc), "=m" (*(addr_type *) chsc_area)
                : "d" (chsc_area), "m" (*(addr_type *) chsc_area)
                : "cc");
        trace_s390_cio_chsc(chsc_area, cc);
index 0a7fb83f35e5b364381c22594cd31f89552ce676..be36f1010d75594390c550f8547c5e86311d9acb 100644 (file)
@@ -10,3 +10,7 @@ zcrypt-objs += zcrypt_msgtype6.o zcrypt_msgtype50.o
 obj-$(CONFIG_ZCRYPT) += zcrypt.o
 # adapter drivers depend on ap.o and zcrypt.o
 obj-$(CONFIG_ZCRYPT) += zcrypt_pcixcc.o zcrypt_cex2a.o zcrypt_cex4.o
+
+# pkey kernel module
+pkey-objs := pkey_api.o
+obj-$(CONFIG_PKEY) += pkey.o
index 56db76c05775dc68f32387c90aabf11da9690f3e..9be4596d8a089c7ab8fa35703ebd8c8252608e63 100644 (file)
@@ -1107,16 +1107,6 @@ static void ap_config_timeout(unsigned long ptr)
        queue_work(system_long_wq, &ap_scan_work);
 }
 
-static void ap_reset_domain(void)
-{
-       int i;
-
-       if (ap_domain_index == -1 || !ap_test_config_domain(ap_domain_index))
-               return;
-       for (i = 0; i < AP_DEVICES; i++)
-               ap_rapq(AP_MKQID(i, ap_domain_index));
-}
-
 static void ap_reset_all(void)
 {
        int i, j;
index 1cd9128593e414ce4a404dffc9944006e82dd46e..cfa161ccc74e92112e48a041cf7f24b99c9204e8 100644 (file)
@@ -58,9 +58,9 @@ static ssize_t ap_functions_show(struct device *dev,
 
 static DEVICE_ATTR(ap_functions, 0444, ap_functions_show, NULL);
 
-static ssize_t ap_request_count_show(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
+static ssize_t ap_req_count_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
 {
        struct ap_card *ac = to_ap_card(dev);
        unsigned int req_cnt;
@@ -72,7 +72,23 @@ static ssize_t ap_request_count_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", req_cnt);
 }
 
-static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL);
+static ssize_t ap_req_count_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buf, size_t count)
+{
+       struct ap_card *ac = to_ap_card(dev);
+       struct ap_queue *aq;
+
+       spin_lock_bh(&ap_list_lock);
+       for_each_ap_queue(aq, ac)
+               aq->total_request_count = 0;
+       spin_unlock_bh(&ap_list_lock);
+       atomic_set(&ac->total_request_count, 0);
+
+       return count;
+}
+
+static DEVICE_ATTR(request_count, 0644, ap_req_count_show, ap_req_count_store);
 
 static ssize_t ap_requestq_count_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
index 7be67fa9f224d7e240fbbbeca52993e19ab0be77..480c58a637694e3c8c1f885b7eefdeb309e88102 100644 (file)
@@ -459,9 +459,9 @@ EXPORT_SYMBOL(ap_queue_resume);
 /*
  * AP queue related attributes.
  */
-static ssize_t ap_request_count_show(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
+static ssize_t ap_req_count_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
 {
        struct ap_queue *aq = to_ap_queue(dev);
        unsigned int req_cnt;
@@ -472,7 +472,20 @@ static ssize_t ap_request_count_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", req_cnt);
 }
 
-static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL);
+static ssize_t ap_req_count_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buf, size_t count)
+{
+       struct ap_queue *aq = to_ap_queue(dev);
+
+       spin_lock_bh(&aq->lock);
+       aq->total_request_count = 0;
+       spin_unlock_bh(&aq->lock);
+
+       return count;
+}
+
+static DEVICE_ATTR(request_count, 0644, ap_req_count_show, ap_req_count_store);
 
 static ssize_t ap_requestq_count_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
new file mode 100644 (file)
index 0000000..40f1136
--- /dev/null
@@ -0,0 +1,1148 @@
+/*
+ *  pkey device driver
+ *
+ *  Copyright IBM Corp. 2017
+ *  Author(s): Harald Freudenberger
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ */
+
+#define KMSG_COMPONENT "pkey"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kallsyms.h>
+#include <linux/debugfs.h>
+#include <asm/zcrypt.h>
+#include <asm/cpacf.h>
+#include <asm/pkey.h>
+
+#include "zcrypt_api.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("s390 protected key interface");
+
+/* Size of parameter block used for all cca requests/replies */
+#define PARMBSIZE 512
+
+/* Size of vardata block used for some of the cca requests/replies */
+#define VARDATASIZE 4096
+
+/*
+ * debug feature data and functions
+ */
+
+static debug_info_t *debug_info;
+
+#define DEBUG_DBG(...) debug_sprintf_event(debug_info, 6, ##__VA_ARGS__)
+#define DEBUG_INFO(...) debug_sprintf_event(debug_info, 5, ##__VA_ARGS__)
+#define DEBUG_WARN(...) debug_sprintf_event(debug_info, 4, ##__VA_ARGS__)
+#define DEBUG_ERR(...) debug_sprintf_event(debug_info, 3, ##__VA_ARGS__)
+
+static void __init pkey_debug_init(void)
+{
+       debug_info = debug_register("pkey", 1, 1, 4 * sizeof(long));
+       debug_register_view(debug_info, &debug_sprintf_view);
+       debug_set_level(debug_info, 3);
+}
+
+static void __exit pkey_debug_exit(void)
+{
+       debug_unregister(debug_info);
+}
+
+/* inside view of a secure key token (only type 0x01 version 0x04) */
+struct secaeskeytoken {
+       u8  type;     /* 0x01 for internal key token */
+       u8  res0[3];
+       u8  version;  /* should be 0x04 */
+       u8  res1[1];
+       u8  flag;     /* key flags */
+       u8  res2[1];
+       u64 mkvp;     /* master key verification pattern */
+       u8  key[32];  /* key value (encrypted) */
+       u8  cv[8];    /* control vector */
+       u16 bitsize;  /* key bit size */
+       u16 keysize;  /* key byte size */
+       u8  tvv[4];   /* token validation value */
+} __packed;
+
+/*
+ * Simple check if the token is a valid CCA secure AES key
+ * token. If keybitsize is given, the bitsize of the key is
+ * also checked. Returns 0 on success or errno value on failure.
+ */
+static int check_secaeskeytoken(u8 *token, int keybitsize)
+{
+       struct secaeskeytoken *t = (struct secaeskeytoken *) token;
+
+       if (t->type != 0x01) {
+               DEBUG_ERR(
+                       "check_secaeskeytoken secure token check failed, type mismatch 0x%02x != 0x01\n",
+                       (int) t->type);
+               return -EINVAL;
+       }
+       if (t->version != 0x04) {
+               DEBUG_ERR(
+                       "check_secaeskeytoken secure token check failed, version mismatch 0x%02x != 0x04\n",
+                       (int) t->version);
+               return -EINVAL;
+       }
+       if (keybitsize > 0 && t->bitsize != keybitsize) {
+               DEBUG_ERR(
+                       "check_secaeskeytoken secure token check failed, bitsize mismatch %d != %d\n",
+                       (int) t->bitsize, keybitsize);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * Allocate consecutive memory for request CPRB, request param
+ * block, reply CPRB and reply param block and fill in values
+ * for the common fields. Returns 0 on success or errno value
+ * on failure.
+ */
+static int alloc_and_prep_cprbmem(size_t paramblen,
+                                 u8 **pcprbmem,
+                                 struct CPRBX **preqCPRB,
+                                 struct CPRBX **prepCPRB)
+{
+       u8 *cprbmem;
+       size_t cprbplusparamblen = sizeof(struct CPRBX) + paramblen;
+       struct CPRBX *preqcblk, *prepcblk;
+
+       /*
+        * allocate consecutive memory for request CPRB, request param
+        * block, reply CPRB and reply param block
+        */
+       cprbmem = kmalloc(2 * cprbplusparamblen, GFP_KERNEL);
+       if (!cprbmem)
+               return -ENOMEM;
+       memset(cprbmem, 0, 2 * cprbplusparamblen);
+
+       preqcblk = (struct CPRBX *) cprbmem;
+       prepcblk = (struct CPRBX *) (cprbmem + cprbplusparamblen);
+
+       /* fill request cprb struct */
+       preqcblk->cprb_len = sizeof(struct CPRBX);
+       preqcblk->cprb_ver_id = 0x02;
+       memcpy(preqcblk->func_id, "T2", 2);
+       preqcblk->rpl_msgbl = cprbplusparamblen;
+       if (paramblen) {
+               preqcblk->req_parmb =
+                       ((u8 *) preqcblk) + sizeof(struct CPRBX);
+               preqcblk->rpl_parmb =
+                       ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       }
+
+       *pcprbmem = cprbmem;
+       *preqCPRB = preqcblk;
+       *prepCPRB = prepcblk;
+
+       return 0;
+}
+
+/*
+ * Free the cprb memory allocated with the function above.
+ * If the scrub value is not zero, the memory is filled
+ * with zeros before freeing (useful if there was some
+ * clear key material in there).
+ */
+static void free_cprbmem(void *mem, size_t paramblen, int scrub)
+{
+       if (scrub)
+               memzero_explicit(mem, 2 * (sizeof(struct CPRBX) + paramblen));
+       kfree(mem);
+}
+
+/*
+ * Helper function to prepare the xcrb struct
+ */
+static inline void prep_xcrb(struct ica_xcRB *pxcrb,
+                            u16 cardnr,
+                            struct CPRBX *preqcblk,
+                            struct CPRBX *prepcblk)
+{
+       memset(pxcrb, 0, sizeof(*pxcrb));
+       pxcrb->agent_ID = 0x4341; /* 'CA' */
+       pxcrb->user_defined = (cardnr == 0xFFFF ? AUTOSELECT : cardnr);
+       pxcrb->request_control_blk_length =
+               preqcblk->cprb_len + preqcblk->req_parml;
+       pxcrb->request_control_blk_addr = (void *) preqcblk;
+       pxcrb->reply_control_blk_length = preqcblk->rpl_msgbl;
+       pxcrb->reply_control_blk_addr = (void *) prepcblk;
+}
+
+/*
+ * Helper function which calls zcrypt_send_cprb with
+ * memory management segment adjusted to kernel space
+ * so that the copy_from_user called within this
+ * function do in fact copy from kernel space.
+ */
+static inline int _zcrypt_send_cprb(struct ica_xcRB *xcrb)
+{
+       int rc;
+       mm_segment_t old_fs = get_fs();
+
+       set_fs(KERNEL_DS);
+       rc = zcrypt_send_cprb(xcrb);
+       set_fs(old_fs);
+
+       return rc;
+}
+
+/*
+ * Generate (random) AES secure key.
+ */
+int pkey_genseckey(u16 cardnr, u16 domain,
+                  u32 keytype, struct pkey_seckey *seckey)
+{
+       int i, rc, keysize;
+       int seckeysize;
+       u8 *mem;
+       struct CPRBX *preqcblk, *prepcblk;
+       struct ica_xcRB xcrb;
+       struct kgreqparm {
+               u8  subfunc_code[2];
+               u16 rule_array_len;
+               struct lv1 {
+                       u16 len;
+                       char  key_form[8];
+                       char  key_length[8];
+                       char  key_type1[8];
+                       char  key_type2[8];
+               } lv1;
+               struct lv2 {
+                       u16 len;
+                       struct keyid {
+                               u16 len;
+                               u16 attr;
+                               u8  data[SECKEYBLOBSIZE];
+                       } keyid[6];
+               } lv2;
+       } *preqparm;
+       struct kgrepparm {
+               u8  subfunc_code[2];
+               u16 rule_array_len;
+               struct lv3 {
+                       u16 len;
+                       u16 keyblocklen;
+                       struct {
+                               u16 toklen;
+                               u16 tokattr;
+                               u8  tok[0];
+                               /* ... some more data ... */
+                       } keyblock;
+               } lv3;
+       } *prepparm;
+
+       /* get already prepared memory for 2 cprbs with param block each */
+       rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+       if (rc)
+               return rc;
+
+       /* fill request cprb struct */
+       preqcblk->domain = domain;
+
+       /* fill request cprb param block with KG request */
+       preqparm = (struct kgreqparm *) preqcblk->req_parmb;
+       memcpy(preqparm->subfunc_code, "KG", 2);
+       preqparm->rule_array_len = sizeof(preqparm->rule_array_len);
+       preqparm->lv1.len = sizeof(struct lv1);
+       memcpy(preqparm->lv1.key_form,   "OP      ", 8);
+       switch (keytype) {
+       case PKEY_KEYTYPE_AES_128:
+               keysize = 16;
+               memcpy(preqparm->lv1.key_length, "KEYLN16 ", 8);
+               break;
+       case PKEY_KEYTYPE_AES_192:
+               keysize = 24;
+               memcpy(preqparm->lv1.key_length, "KEYLN24 ", 8);
+               break;
+       case PKEY_KEYTYPE_AES_256:
+               keysize = 32;
+               memcpy(preqparm->lv1.key_length, "KEYLN32 ", 8);
+               break;
+       default:
+               DEBUG_ERR(
+                       "pkey_genseckey unknown/unsupported keytype %d\n",
+                       keytype);
+               rc = -EINVAL;
+               goto out;
+       }
+       memcpy(preqparm->lv1.key_type1,  "AESDATA ", 8);
+       preqparm->lv2.len = sizeof(struct lv2);
+       for (i = 0; i < 6; i++) {
+               preqparm->lv2.keyid[i].len = sizeof(struct keyid);
+               preqparm->lv2.keyid[i].attr = (i == 2 ? 0x30 : 0x10);
+       }
+       preqcblk->req_parml = sizeof(struct kgreqparm);
+
+       /* fill xcrb struct */
+       prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
+
+       /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
+       rc = _zcrypt_send_cprb(&xcrb);
+       if (rc) {
+               DEBUG_ERR(
+                       "pkey_genseckey zcrypt_send_cprb (cardnr=%d domain=%d) failed with errno %d\n",
+                       (int) cardnr, (int) domain, rc);
+               goto out;
+       }
+
+       /* check response returncode and reasoncode */
+       if (prepcblk->ccp_rtcode != 0) {
+               DEBUG_ERR(
+                       "pkey_genseckey secure key generate failure, card response %d/%d\n",
+                       (int) prepcblk->ccp_rtcode,
+                       (int) prepcblk->ccp_rscode);
+               rc = -EIO;
+               goto out;
+       }
+
+       /* process response cprb param block */
+       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepparm = (struct kgrepparm *) prepcblk->rpl_parmb;
+
+       /* check length of the returned secure key token */
+       seckeysize = prepparm->lv3.keyblock.toklen
+               - sizeof(prepparm->lv3.keyblock.toklen)
+               - sizeof(prepparm->lv3.keyblock.tokattr);
+       if (seckeysize != SECKEYBLOBSIZE) {
+               DEBUG_ERR(
+                       "pkey_genseckey secure token size mismatch %d != %d bytes\n",
+                       seckeysize, SECKEYBLOBSIZE);
+               rc = -EIO;
+               goto out;
+       }
+
+       /* check secure key token */
+       rc = check_secaeskeytoken(prepparm->lv3.keyblock.tok, 8*keysize);
+       if (rc) {
+               rc = -EIO;
+               goto out;
+       }
+
+       /* copy the generated secure key token */
+       memcpy(seckey->seckey, prepparm->lv3.keyblock.tok, SECKEYBLOBSIZE);
+
+out:
+       free_cprbmem(mem, PARMBSIZE, 0);
+       return rc;
+}
+EXPORT_SYMBOL(pkey_genseckey);
+
+/*
+ * Generate an AES secure key with given key value.
+ */
+int pkey_clr2seckey(u16 cardnr, u16 domain, u32 keytype,
+                   const struct pkey_clrkey *clrkey,
+                   struct pkey_seckey *seckey)
+{
+       int rc, keysize, seckeysize;
+       u8 *mem;
+       struct CPRBX *preqcblk, *prepcblk;
+       struct ica_xcRB xcrb;
+       struct cmreqparm {
+               u8  subfunc_code[2];
+               u16 rule_array_len;
+               char  rule_array[8];
+               struct lv1 {
+                       u16 len;
+                       u8  clrkey[0];
+               } lv1;
+               struct lv2 {
+                       u16 len;
+                       struct keyid {
+                               u16 len;
+                               u16 attr;
+                               u8  data[SECKEYBLOBSIZE];
+                       } keyid;
+               } lv2;
+       } *preqparm;
+       struct lv2 *plv2;
+       struct cmrepparm {
+               u8  subfunc_code[2];
+               u16 rule_array_len;
+               struct lv3 {
+                       u16 len;
+                       u16 keyblocklen;
+                       struct {
+                               u16 toklen;
+                               u16 tokattr;
+                               u8  tok[0];
+                               /* ... some more data ... */
+                       } keyblock;
+               } lv3;
+       } *prepparm;
+
+       /* get already prepared memory for 2 cprbs with param block each */
+       rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+       if (rc)
+               return rc;
+
+       /* fill request cprb struct */
+       preqcblk->domain = domain;
+
+       /* fill request cprb param block with CM request */
+       preqparm = (struct cmreqparm *) preqcblk->req_parmb;
+       memcpy(preqparm->subfunc_code, "CM", 2);
+       memcpy(preqparm->rule_array, "AES     ", 8);
+       preqparm->rule_array_len =
+               sizeof(preqparm->rule_array_len) + sizeof(preqparm->rule_array);
+       switch (keytype) {
+       case PKEY_KEYTYPE_AES_128:
+               keysize = 16;
+               break;
+       case PKEY_KEYTYPE_AES_192:
+               keysize = 24;
+               break;
+       case PKEY_KEYTYPE_AES_256:
+               keysize = 32;
+               break;
+       default:
+               DEBUG_ERR(
+                       "pkey_clr2seckey unknown/unsupported keytype %d\n",
+                       keytype);
+               rc = -EINVAL;
+               goto out;
+       }
+       preqparm->lv1.len = sizeof(struct lv1) + keysize;
+       memcpy(preqparm->lv1.clrkey, clrkey->clrkey, keysize);
+       plv2 = (struct lv2 *) (((u8 *) &preqparm->lv2) + keysize);
+       plv2->len = sizeof(struct lv2);
+       plv2->keyid.len = sizeof(struct keyid);
+       plv2->keyid.attr = 0x30;
+       preqcblk->req_parml = sizeof(struct cmreqparm) + keysize;
+
+       /* fill xcrb struct */
+       prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
+
+       /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
+       rc = _zcrypt_send_cprb(&xcrb);
+       if (rc) {
+               DEBUG_ERR(
+                       "pkey_clr2seckey zcrypt_send_cprb (cardnr=%d domain=%d) failed with errno %d\n",
+                       (int) cardnr, (int) domain, rc);
+               goto out;
+       }
+
+       /* check response returncode and reasoncode */
+       if (prepcblk->ccp_rtcode != 0) {
+               DEBUG_ERR(
+                       "pkey_clr2seckey clear key import failure, card response %d/%d\n",
+                       (int) prepcblk->ccp_rtcode,
+                       (int) prepcblk->ccp_rscode);
+               rc = -EIO;
+               goto out;
+       }
+
+       /* process response cprb param block */
+       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepparm = (struct cmrepparm *) prepcblk->rpl_parmb;
+
+       /* check length of the returned secure key token */
+       seckeysize = prepparm->lv3.keyblock.toklen
+               - sizeof(prepparm->lv3.keyblock.toklen)
+               - sizeof(prepparm->lv3.keyblock.tokattr);
+       if (seckeysize != SECKEYBLOBSIZE) {
+               DEBUG_ERR(
+                       "pkey_clr2seckey secure token size mismatch %d != %d bytes\n",
+                       seckeysize, SECKEYBLOBSIZE);
+               rc = -EIO;
+               goto out;
+       }
+
+       /* check secure key token */
+       rc = check_secaeskeytoken(prepparm->lv3.keyblock.tok, 8*keysize);
+       if (rc) {
+               rc = -EIO;
+               goto out;
+       }
+
+       /* copy the generated secure key token */
+       memcpy(seckey->seckey, prepparm->lv3.keyblock.tok, SECKEYBLOBSIZE);
+
+out:
+       free_cprbmem(mem, PARMBSIZE, 1);
+       return rc;
+}
+EXPORT_SYMBOL(pkey_clr2seckey);
+
+/*
+ * Derive a proteced key from the secure key blob.
+ */
+int pkey_sec2protkey(u16 cardnr, u16 domain,
+                    const struct pkey_seckey *seckey,
+                    struct pkey_protkey *protkey)
+{
+       int rc;
+       u8 *mem;
+       struct CPRBX *preqcblk, *prepcblk;
+       struct ica_xcRB xcrb;
+       struct uskreqparm {
+               u8  subfunc_code[2];
+               u16 rule_array_len;
+               struct lv1 {
+                       u16 len;
+                       u16 attr_len;
+                       u16 attr_flags;
+               } lv1;
+               struct lv2 {
+                       u16 len;
+                       u16 attr_len;
+                       u16 attr_flags;
+                       u8  token[0];         /* cca secure key token */
+               } lv2 __packed;
+       } *preqparm;
+       struct uskrepparm {
+               u8  subfunc_code[2];
+               u16 rule_array_len;
+               struct lv3 {
+                       u16 len;
+                       u16 attr_len;
+                       u16 attr_flags;
+                       struct cpacfkeyblock {
+                               u8  version;  /* version of this struct */
+                               u8  flags[2];
+                               u8  algo;
+                               u8  form;
+                               u8  pad1[3];
+                               u16 keylen;
+                               u8  key[64];  /* the key (keylen bytes) */
+                               u16 keyattrlen;
+                               u8  keyattr[32];
+                               u8  pad2[1];
+                               u8  vptype;
+                               u8  vp[32];  /* verification pattern */
+                       } keyblock;
+               } lv3 __packed;
+       } *prepparm;
+
+       /* get already prepared memory for 2 cprbs with param block each */
+       rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk);
+       if (rc)
+               return rc;
+
+       /* fill request cprb struct */
+       preqcblk->domain = domain;
+
+       /* fill request cprb param block with USK request */
+       preqparm = (struct uskreqparm *) preqcblk->req_parmb;
+       memcpy(preqparm->subfunc_code, "US", 2);
+       preqparm->rule_array_len = sizeof(preqparm->rule_array_len);
+       preqparm->lv1.len = sizeof(struct lv1);
+       preqparm->lv1.attr_len = sizeof(struct lv1) - sizeof(preqparm->lv1.len);
+       preqparm->lv1.attr_flags = 0x0001;
+       preqparm->lv2.len = sizeof(struct lv2) + SECKEYBLOBSIZE;
+       preqparm->lv2.attr_len = sizeof(struct lv2)
+               - sizeof(preqparm->lv2.len) + SECKEYBLOBSIZE;
+       preqparm->lv2.attr_flags = 0x0000;
+       memcpy(preqparm->lv2.token, seckey->seckey, SECKEYBLOBSIZE);
+       preqcblk->req_parml = sizeof(struct uskreqparm) + SECKEYBLOBSIZE;
+
+       /* fill xcrb struct */
+       prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
+
+       /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
+       rc = _zcrypt_send_cprb(&xcrb);
+       if (rc) {
+               DEBUG_ERR(
+                       "pkey_sec2protkey zcrypt_send_cprb (cardnr=%d domain=%d) failed with errno %d\n",
+                       (int) cardnr, (int) domain, rc);
+               goto out;
+       }
+
+       /* check response returncode and reasoncode */
+       if (prepcblk->ccp_rtcode != 0) {
+               DEBUG_ERR(
+                       "pkey_sec2protkey unwrap secure key failure, card response %d/%d\n",
+                       (int) prepcblk->ccp_rtcode,
+                       (int) prepcblk->ccp_rscode);
+               rc = -EIO;
+               goto out;
+       }
+
+       /* process response cprb param block */
+       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepparm = (struct uskrepparm *) prepcblk->rpl_parmb;
+
+       /* check the returned keyblock */
+       if (prepparm->lv3.keyblock.version != 0x01) {
+               DEBUG_ERR(
+                       "pkey_sec2protkey reply param keyblock version mismatch 0x%02x != 0x01\n",
+                       (int) prepparm->lv3.keyblock.version);
+               rc = -EIO;
+               goto out;
+       }
+
+       /* copy the tanslated protected key */
+       switch (prepparm->lv3.keyblock.keylen) {
+       case 16+32:
+               protkey->type = PKEY_KEYTYPE_AES_128;
+               break;
+       case 24+32:
+               protkey->type = PKEY_KEYTYPE_AES_192;
+               break;
+       case 32+32:
+               protkey->type = PKEY_KEYTYPE_AES_256;
+               break;
+       default:
+               DEBUG_ERR("pkey_sec2protkey unknown/unsupported keytype %d\n",
+                         prepparm->lv3.keyblock.keylen);
+               rc = -EIO;
+               goto out;
+       }
+       protkey->len = prepparm->lv3.keyblock.keylen;
+       memcpy(protkey->protkey, prepparm->lv3.keyblock.key, protkey->len);
+
+out:
+       free_cprbmem(mem, PARMBSIZE, 0);
+       return rc;
+}
+EXPORT_SYMBOL(pkey_sec2protkey);
+
+/*
+ * Create a protected key from a clear key value.
+ */
+int pkey_clr2protkey(u32 keytype,
+                    const struct pkey_clrkey *clrkey,
+                    struct pkey_protkey *protkey)
+{
+       long fc;
+       int keysize;
+       u8 paramblock[64];
+
+       switch (keytype) {
+       case PKEY_KEYTYPE_AES_128:
+               keysize = 16;
+               fc = CPACF_PCKMO_ENC_AES_128_KEY;
+               break;
+       case PKEY_KEYTYPE_AES_192:
+               keysize = 24;
+               fc = CPACF_PCKMO_ENC_AES_192_KEY;
+               break;
+       case PKEY_KEYTYPE_AES_256:
+               keysize = 32;
+               fc = CPACF_PCKMO_ENC_AES_256_KEY;
+               break;
+       default:
+               DEBUG_ERR("pkey_clr2protkey unknown/unsupported keytype %d\n",
+                         keytype);
+               return -EINVAL;
+       }
+
+       /* prepare param block */
+       memset(paramblock, 0, sizeof(paramblock));
+       memcpy(paramblock, clrkey->clrkey, keysize);
+
+       /* call the pckmo instruction */
+       cpacf_pckmo(fc, paramblock);
+
+       /* copy created protected key */
+       protkey->type = keytype;
+       protkey->len = keysize + 32;
+       memcpy(protkey->protkey, paramblock, keysize + 32);
+
+       return 0;
+}
+EXPORT_SYMBOL(pkey_clr2protkey);
+
+/*
+ * query cryptographic facility from adapter
+ */
+static int query_crypto_facility(u16 cardnr, u16 domain,
+                                const char *keyword,
+                                u8 *rarray, size_t *rarraylen,
+                                u8 *varray, size_t *varraylen)
+{
+       int rc;
+       u16 len;
+       u8 *mem, *ptr;
+       struct CPRBX *preqcblk, *prepcblk;
+       struct ica_xcRB xcrb;
+       struct fqreqparm {
+               u8  subfunc_code[2];
+               u16 rule_array_len;
+               char  rule_array[8];
+               struct lv1 {
+                       u16 len;
+                       u8  data[VARDATASIZE];
+               } lv1;
+               u16 dummylen;
+       } *preqparm;
+       size_t parmbsize = sizeof(struct fqreqparm);
+       struct fqrepparm {
+               u8  subfunc_code[2];
+               u8  lvdata[0];
+       } *prepparm;
+
+       /* get already prepared memory for 2 cprbs with param block each */
+       rc = alloc_and_prep_cprbmem(parmbsize, &mem, &preqcblk, &prepcblk);
+       if (rc)
+               return rc;
+
+       /* fill request cprb struct */
+       preqcblk->domain = domain;
+
+       /* fill request cprb param block with FQ request */
+       preqparm = (struct fqreqparm *) preqcblk->req_parmb;
+       memcpy(preqparm->subfunc_code, "FQ", 2);
+       strncpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array));
+       preqparm->rule_array_len =
+               sizeof(preqparm->rule_array_len) + sizeof(preqparm->rule_array);
+       preqparm->lv1.len = sizeof(preqparm->lv1);
+       preqparm->dummylen = sizeof(preqparm->dummylen);
+       preqcblk->req_parml = parmbsize;
+
+       /* fill xcrb struct */
+       prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk);
+
+       /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */
+       rc = _zcrypt_send_cprb(&xcrb);
+       if (rc) {
+               DEBUG_ERR(
+                       "query_crypto_facility zcrypt_send_cprb (cardnr=%d domain=%d) failed with errno %d\n",
+                       (int) cardnr, (int) domain, rc);
+               goto out;
+       }
+
+       /* check response returncode and reasoncode */
+       if (prepcblk->ccp_rtcode != 0) {
+               DEBUG_ERR(
+                       "query_crypto_facility unwrap secure key failure, card response %d/%d\n",
+                       (int) prepcblk->ccp_rtcode,
+                       (int) prepcblk->ccp_rscode);
+               rc = -EIO;
+               goto out;
+       }
+
+       /* process response cprb param block */
+       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepparm = (struct fqrepparm *) prepcblk->rpl_parmb;
+       ptr = prepparm->lvdata;
+
+       /* check and possibly copy reply rule array */
+       len = *((u16 *) ptr);
+       if (len > sizeof(u16)) {
+               ptr += sizeof(u16);
+               len -= sizeof(u16);
+               if (rarray && rarraylen && *rarraylen > 0) {
+                       *rarraylen = (len > *rarraylen ? *rarraylen : len);
+                       memcpy(rarray, ptr, *rarraylen);
+               }
+               ptr += len;
+       }
+       /* check and possible copy reply var array */
+       len = *((u16 *) ptr);
+       if (len > sizeof(u16)) {
+               ptr += sizeof(u16);
+               len -= sizeof(u16);
+               if (varray && varraylen && *varraylen > 0) {
+                       *varraylen = (len > *varraylen ? *varraylen : len);
+                       memcpy(varray, ptr, *varraylen);
+               }
+               ptr += len;
+       }
+
+out:
+       free_cprbmem(mem, parmbsize, 0);
+       return rc;
+}
+
+/*
+ * Fetch just the mkvp value via query_crypto_facility from adapter.
+ */
+static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp)
+{
+       int rc, found = 0;
+       size_t rlen, vlen;
+       u8 *rarray, *varray, *pg;
+
+       pg = (u8 *) __get_free_page(GFP_KERNEL);
+       if (!pg)
+               return -ENOMEM;
+       rarray = pg;
+       varray = pg + PAGE_SIZE/2;
+       rlen = vlen = PAGE_SIZE/2;
+
+       rc = query_crypto_facility(cardnr, domain, "STATICSA",
+                                  rarray, &rlen, varray, &vlen);
+       if (rc == 0 && rlen > 8*8 && vlen > 184+8) {
+               if (rarray[64] == '2') {
+                       /* current master key state is valid */
+                       *mkvp = *((u64 *)(varray + 184));
+                       found = 1;
+               }
+       }
+
+       free_page((unsigned long) pg);
+
+       return found ? 0 : -ENOENT;
+}
+
+/* struct to hold cached mkvp info for each card/domain */
+struct mkvp_info {
+       struct list_head list;
+       u16 cardnr;
+       u16 domain;
+       u64 mkvp;
+};
+
+/* a list with mkvp_info entries */
+static LIST_HEAD(mkvp_list);
+static DEFINE_SPINLOCK(mkvp_list_lock);
+
+static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
+{
+       int rc = -ENOENT;
+       struct mkvp_info *ptr;
+
+       spin_lock_bh(&mkvp_list_lock);
+       list_for_each_entry(ptr, &mkvp_list, list) {
+               if (ptr->cardnr == cardnr &&
+                   ptr->domain == domain) {
+                       *mkvp = ptr->mkvp;
+                       rc = 0;
+                       break;
+               }
+       }
+       spin_unlock_bh(&mkvp_list_lock);
+
+       return rc;
+}
+
+static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
+{
+       int found = 0;
+       struct mkvp_info *ptr;
+
+       spin_lock_bh(&mkvp_list_lock);
+       list_for_each_entry(ptr, &mkvp_list, list) {
+               if (ptr->cardnr == cardnr &&
+                   ptr->domain == domain) {
+                       ptr->mkvp = mkvp;
+                       found = 1;
+                       break;
+               }
+       }
+       if (!found) {
+               ptr = kmalloc(sizeof(*ptr), GFP_ATOMIC);
+               if (!ptr) {
+                       spin_unlock_bh(&mkvp_list_lock);
+                       return;
+               }
+               ptr->cardnr = cardnr;
+               ptr->domain = domain;
+               ptr->mkvp = mkvp;
+               list_add(&ptr->list, &mkvp_list);
+       }
+       spin_unlock_bh(&mkvp_list_lock);
+}
+
+static void mkvp_cache_scrub(u16 cardnr, u16 domain)
+{
+       struct mkvp_info *ptr;
+
+       spin_lock_bh(&mkvp_list_lock);
+       list_for_each_entry(ptr, &mkvp_list, list) {
+               if (ptr->cardnr == cardnr &&
+                   ptr->domain == domain) {
+                       list_del(&ptr->list);
+                       kfree(ptr);
+                       break;
+               }
+       }
+       spin_unlock_bh(&mkvp_list_lock);
+}
+
+static void __exit mkvp_cache_free(void)
+{
+       struct mkvp_info *ptr, *pnext;
+
+       spin_lock_bh(&mkvp_list_lock);
+       list_for_each_entry_safe(ptr, pnext, &mkvp_list, list) {
+               list_del(&ptr->list);
+               kfree(ptr);
+       }
+       spin_unlock_bh(&mkvp_list_lock);
+}
+
+/*
+ * Search for a matching crypto card based on the Master Key
+ * Verification Pattern provided inside a secure key.
+ */
+int pkey_findcard(const struct pkey_seckey *seckey,
+                 u16 *pcardnr, u16 *pdomain, int verify)
+{
+       struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
+       struct zcrypt_device_matrix *device_matrix;
+       u16 card, dom;
+       u64 mkvp;
+       int i, rc;
+
+       /* mkvp must not be zero */
+       if (t->mkvp == 0)
+               return -EINVAL;
+
+       /* fetch status of all crypto cards */
+       device_matrix = kmalloc(sizeof(struct zcrypt_device_matrix),
+                               GFP_KERNEL);
+       if (!device_matrix)
+               return -ENOMEM;
+       zcrypt_device_status_mask(device_matrix);
+
+       /* walk through all crypto cards */
+       for (i = 0; i < MAX_ZDEV_ENTRIES; i++) {
+               card = AP_QID_CARD(device_matrix->device[i].qid);
+               dom = AP_QID_QUEUE(device_matrix->device[i].qid);
+               if (device_matrix->device[i].online &&
+                   device_matrix->device[i].functions & 0x04) {
+                       /* an enabled CCA Coprocessor card */
+                       /* try cached mkvp */
+                       if (mkvp_cache_fetch(card, dom, &mkvp) == 0 &&
+                           t->mkvp == mkvp) {
+                               if (!verify)
+                                       break;
+                               /* verify: fetch mkvp from adapter */
+                               if (fetch_mkvp(card, dom, &mkvp) == 0) {
+                                       mkvp_cache_update(card, dom, mkvp);
+                                       if (t->mkvp == mkvp)
+                                               break;
+                               }
+                       }
+               } else {
+                       /* Card is offline and/or not a CCA card. */
+                       /* del mkvp entry from cache if it exists */
+                       mkvp_cache_scrub(card, dom);
+               }
+       }
+       if (i >= MAX_ZDEV_ENTRIES) {
+               /* nothing found, so this time without cache */
+               for (i = 0; i < MAX_ZDEV_ENTRIES; i++) {
+                       if (!(device_matrix->device[i].online &&
+                             device_matrix->device[i].functions & 0x04))
+                               continue;
+                       card = AP_QID_CARD(device_matrix->device[i].qid);
+                       dom = AP_QID_QUEUE(device_matrix->device[i].qid);
+                       /* fresh fetch mkvp from adapter */
+                       if (fetch_mkvp(card, dom, &mkvp) == 0) {
+                               mkvp_cache_update(card, dom, mkvp);
+                               if (t->mkvp == mkvp)
+                                       break;
+                       }
+               }
+       }
+       if (i < MAX_ZDEV_ENTRIES) {
+               if (pcardnr)
+                       *pcardnr = card;
+               if (pdomain)
+                       *pdomain = dom;
+               rc = 0;
+       } else
+               rc = -ENODEV;
+
+       kfree(device_matrix);
+       return rc;
+}
+EXPORT_SYMBOL(pkey_findcard);
+
+/*
+ * Find card and transform secure key into protected key.
+ */
+int pkey_skey2pkey(const struct pkey_seckey *seckey,
+                  struct pkey_protkey *protkey)
+{
+       u16 cardnr, domain;
+       int rc, verify;
+
+       /*
+        * The pkey_sec2protkey call may fail when a card has been
+        * addressed where the master key was changed after last fetch
+        * of the mkvp into the cache. So first try without verify then
+        * with verify enabled (thus refreshing the mkvp for each card).
+        */
+       for (verify = 0; verify < 2; verify++) {
+               rc = pkey_findcard(seckey, &cardnr, &domain, verify);
+               if (rc)
+                       continue;
+               rc = pkey_sec2protkey(cardnr, domain, seckey, protkey);
+               if (rc == 0)
+                       break;
+       }
+
+       if (rc)
+               DEBUG_DBG("pkey_skey2pkey failed rc=%d\n", rc);
+
+       return rc;
+}
+EXPORT_SYMBOL(pkey_skey2pkey);
+
+/*
+ * File io functions
+ */
+
+static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
+                               unsigned long arg)
+{
+       int rc;
+
+       switch (cmd) {
+       case PKEY_GENSECK: {
+               struct pkey_genseck __user *ugs = (void __user *) arg;
+               struct pkey_genseck kgs;
+
+               if (copy_from_user(&kgs, ugs, sizeof(kgs)))
+                       return -EFAULT;
+               rc = pkey_genseckey(kgs.cardnr, kgs.domain,
+                                   kgs.keytype, &kgs.seckey);
+               DEBUG_DBG("pkey_ioctl pkey_genseckey()=%d\n", rc);
+               if (rc)
+                       break;
+               if (copy_to_user(ugs, &kgs, sizeof(kgs)))
+                       return -EFAULT;
+               break;
+       }
+       case PKEY_CLR2SECK: {
+               struct pkey_clr2seck __user *ucs = (void __user *) arg;
+               struct pkey_clr2seck kcs;
+
+               if (copy_from_user(&kcs, ucs, sizeof(kcs)))
+                       return -EFAULT;
+               rc = pkey_clr2seckey(kcs.cardnr, kcs.domain, kcs.keytype,
+                                    &kcs.clrkey, &kcs.seckey);
+               DEBUG_DBG("pkey_ioctl pkey_clr2seckey()=%d\n", rc);
+               if (rc)
+                       break;
+               if (copy_to_user(ucs, &kcs, sizeof(kcs)))
+                       return -EFAULT;
+               memzero_explicit(&kcs, sizeof(kcs));
+               break;
+       }
+       case PKEY_SEC2PROTK: {
+               struct pkey_sec2protk __user *usp = (void __user *) arg;
+               struct pkey_sec2protk ksp;
+
+               if (copy_from_user(&ksp, usp, sizeof(ksp)))
+                       return -EFAULT;
+               rc = pkey_sec2protkey(ksp.cardnr, ksp.domain,
+                                     &ksp.seckey, &ksp.protkey);
+               DEBUG_DBG("pkey_ioctl pkey_sec2protkey()=%d\n", rc);
+               if (rc)
+                       break;
+               if (copy_to_user(usp, &ksp, sizeof(ksp)))
+                       return -EFAULT;
+               break;
+       }
+       case PKEY_CLR2PROTK: {
+               struct pkey_clr2protk __user *ucp = (void __user *) arg;
+               struct pkey_clr2protk kcp;
+
+               if (copy_from_user(&kcp, ucp, sizeof(kcp)))
+                       return -EFAULT;
+               rc = pkey_clr2protkey(kcp.keytype,
+                                     &kcp.clrkey, &kcp.protkey);
+               DEBUG_DBG("pkey_ioctl pkey_clr2protkey()=%d\n", rc);
+               if (rc)
+                       break;
+               if (copy_to_user(ucp, &kcp, sizeof(kcp)))
+                       return -EFAULT;
+               memzero_explicit(&kcp, sizeof(kcp));
+               break;
+       }
+       case PKEY_FINDCARD: {
+               struct pkey_findcard __user *ufc = (void __user *) arg;
+               struct pkey_findcard kfc;
+
+               if (copy_from_user(&kfc, ufc, sizeof(kfc)))
+                       return -EFAULT;
+               rc = pkey_findcard(&kfc.seckey,
+                                  &kfc.cardnr, &kfc.domain, 1);
+               DEBUG_DBG("pkey_ioctl pkey_findcard()=%d\n", rc);
+               if (rc)
+                       break;
+               if (copy_to_user(ufc, &kfc, sizeof(kfc)))
+                       return -EFAULT;
+               break;
+       }
+       case PKEY_SKEY2PKEY: {
+               struct pkey_skey2pkey __user *usp = (void __user *) arg;
+               struct pkey_skey2pkey ksp;
+
+               if (copy_from_user(&ksp, usp, sizeof(ksp)))
+                       return -EFAULT;
+               rc = pkey_skey2pkey(&ksp.seckey, &ksp.protkey);
+               DEBUG_DBG("pkey_ioctl pkey_skey2pkey()=%d\n", rc);
+               if (rc)
+                       break;
+               if (copy_to_user(usp, &ksp, sizeof(ksp)))
+                       return -EFAULT;
+               break;
+       }
+       default:
+               /* unknown/unsupported ioctl cmd */
+               return -ENOTTY;
+       }
+
+       return rc;
+}
+
+/*
+ * Sysfs and file io operations
+ */
+static const struct file_operations pkey_fops = {
+       .owner          = THIS_MODULE,
+       .open           = nonseekable_open,
+       .llseek         = no_llseek,
+       .unlocked_ioctl = pkey_unlocked_ioctl,
+};
+
+static struct miscdevice pkey_dev = {
+       .name   = "pkey",
+       .minor  = MISC_DYNAMIC_MINOR,
+       .mode   = 0666,
+       .fops   = &pkey_fops,
+};
+
+/*
+ * Module init
+ */
+int __init pkey_init(void)
+{
+       cpacf_mask_t pckmo_functions;
+
+       /* check for pckmo instructions available */
+       if (!cpacf_query(CPACF_PCKMO, &pckmo_functions))
+               return -EOPNOTSUPP;
+       if (!cpacf_test_func(&pckmo_functions, CPACF_PCKMO_ENC_AES_128_KEY) ||
+           !cpacf_test_func(&pckmo_functions, CPACF_PCKMO_ENC_AES_192_KEY) ||
+           !cpacf_test_func(&pckmo_functions, CPACF_PCKMO_ENC_AES_256_KEY))
+               return -EOPNOTSUPP;
+
+       pkey_debug_init();
+
+       return misc_register(&pkey_dev);
+}
+
+/*
+ * Module exit
+ */
+static void __exit pkey_exit(void)
+{
+       misc_deregister(&pkey_dev);
+       mkvp_cache_free();
+       pkey_debug_exit();
+}
+
+module_init(pkey_init);
+module_exit(pkey_exit);
index 144a17941e6fee1dabbe27f2c7a217e3a1b250bb..93015f85d4a6a9c17ff4daae5d4bcdebb774057c 100644 (file)
@@ -374,7 +374,7 @@ out:
        return rc;
 }
 
-static long zcrypt_send_cprb(struct ica_xcRB *xcRB)
+long zcrypt_send_cprb(struct ica_xcRB *xcRB)
 {
        struct zcrypt_card *zc, *pref_zc;
        struct zcrypt_queue *zq, *pref_zq;
@@ -444,6 +444,7 @@ out:
                              AP_QID_CARD(qid), AP_QID_QUEUE(qid));
        return rc;
 }
+EXPORT_SYMBOL(zcrypt_send_cprb);
 
 static bool is_desired_ep11_card(unsigned int dev_id,
                                 unsigned short target_num,
@@ -619,7 +620,7 @@ out:
        return rc;
 }
 
-static void zcrypt_device_status_mask(struct zcrypt_device_matrix *matrix)
+void zcrypt_device_status_mask(struct zcrypt_device_matrix *matrix)
 {
        struct zcrypt_card *zc;
        struct zcrypt_queue *zq;
index 274a590515347094b5b27a79efd6607dd50d085b..6c94efd23eacf7c6a20fa3880285485e26341f2c 100644 (file)
@@ -190,5 +190,7 @@ void zcrypt_msgtype_unregister(struct zcrypt_ops *);
 struct zcrypt_ops *zcrypt_msgtype(unsigned char *, int);
 int zcrypt_api_init(void);
 void zcrypt_api_exit(void);
+long zcrypt_send_cprb(struct ica_xcRB *xcRB);
+void zcrypt_device_status_mask(struct zcrypt_device_matrix *devstatus);
 
 #endif /* _ZCRYPT_API_H_ */
index 137d22d3a005ddab73e82beeca66a2d91c24586d..838347c44f322c274129c18a9e2253eea9dccb0a 100644 (file)
@@ -1630,7 +1630,7 @@ static int aac_acquire_resources(struct aac_dev *dev)
 
        if (!dev->sync_mode) {
                /* After EEH recovery or suspend resume, max_msix count
-                * may change, therfore updating in init as well.
+                * may change, therefore updating in init as well.
                 */
                dev->init->r7.no_of_msix_vectors = cpu_to_le32(dev->max_msix);
                aac_adapter_start(dev);
index ae5bfe039fcc0012ed65ca4d81c3e20be9e65070..ccbd9e31a5dee847ea27b3f125c83dc54774be34 100644 (file)
@@ -680,7 +680,7 @@ struct bfi_ioim_req_s {
 
        /*
         * SG elements array within the IO request must be double word
-        * aligned. This aligment is required to optimize SGM setup for the IO.
+        * aligned. This alignment is required to optimize SGM setup for the IO.
         */
        struct bfi_sge_s        sges[BFI_SGE_INLINE_MAX];
        u8      io_timeout;
index 90869cee2b20b2bcf682d54425dd9fe66b72b519..ef5bf55f08a4c6e837c10fe14785cb1a40a286a8 100644 (file)
@@ -1053,7 +1053,6 @@ out:
 
 /**
  * cxlflash_mmap_fault() - mmap fault handler for adapter file descriptor
- * @vma:       VM area associated with mapping.
  * @vmf:       VM fault associated with current fault.
  *
  * To support error notification via MMIO, faults are 'caught' by this routine
@@ -1067,8 +1066,9 @@ out:
  *
  * Return: 0 on success, VM_FAULT_SIGBUS on failure
  */
-static int cxlflash_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int cxlflash_mmap_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct file *file = vma->vm_file;
        struct cxl_context *ctx = cxl_fops_get_context(file);
        struct cxlflash_cfg *cfg = container_of(file->f_op, struct cxlflash_cfg,
@@ -1097,7 +1097,7 @@ static int cxlflash_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        if (likely(!ctxi->err_recovery_active)) {
                vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-               rc = ctxi->cxl_mmap_vmops->fault(vma, vmf);
+               rc = ctxi->cxl_mmap_vmops->fault(vmf);
        } else {
                dev_dbg(dev, "%s: err recovery active, use err_page\n",
                        __func__);
index cea57e27e713a672d987a3fe2a471eb6aa74cbee..656463ff9ccb83e7edd4c8ad2c2a23a3fbcee304 100644 (file)
@@ -1387,7 +1387,7 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip,
        /*
         * Actually need to subtract 'sizeof(*mp) - sizeof(*wp)' from 'rlen'
         * before determining max Vx_Port descriptor but a buggy FCF could have
-        * omited either or both MAC Address and Name Identifier descriptors
+        * omitted either or both MAC Address and Name Identifier descriptors
         */
        num_vlink_desc = rlen / sizeof(*vp);
        if (num_vlink_desc)
index 835c59c777f20a506731fea77300f3f305e8ff9a..b29afafc28857e95bffd8946598748907ab77b17 100644 (file)
@@ -9330,7 +9330,7 @@ static pci_ers_result_t ipr_pci_error_detected(struct pci_dev *pdev,
  * ipr_probe_ioa_part2 - Initializes IOAs found in ipr_probe_ioa(..)
  * @ioa_cfg:   ioa cfg struct
  *
- * Description: This is the second phase of adapter intialization
+ * Description: This is the second phase of adapter initialization
  * This function takes care of initilizing the adapter to the point
  * where it can accept new commands.
 
index 50cf402dea298119ddcc88c4e3d2ec0669dba204..03cb05abc821aaea2746ae1f9fabe1ebcfd271e0 100644 (file)
@@ -3329,7 +3329,7 @@ static DEVICE_ATTR(lpfc_static_vport, S_IRUGO,
  * @buf: Data buffer.
  * @count: Size of the data buffer.
  *
- * This function get called when an user write to the lpfc_stat_data_ctrl
+ * This function get called when a user write to the lpfc_stat_data_ctrl
  * sysfs file. This function parse the command written to the sysfs file
  * and take appropriate action. These commands are used for controlling
  * driver statistical data collection.
index d977a472f89f0695940bef7a96584a798db22094..8e886caf245430d9a9a1108ac7ea30b4ad10ddf3 100644 (file)
@@ -4510,7 +4510,7 @@ lpfc_sli4_rb_setup(struct lpfc_hba *phba)
  * @phba: Pointer to HBA context object.
  * @sli_mode: sli mode - 2/3
  *
- * This function is called by the sli intialization code path
+ * This function is called by the sli initialization code path
  * to issue config_port mailbox command. This function restarts the
  * HBA firmware and issues a config_port mailbox command to configure
  * the SLI interface in the sli mode specified by sli_mode
@@ -4650,11 +4650,11 @@ do_prep_failed:
 
 
 /**
- * lpfc_sli_hba_setup - SLI intialization function
+ * lpfc_sli_hba_setup - SLI initialization function
  * @phba: Pointer to HBA context object.
  *
- * This function is the main SLI intialization function. This function
- * is called by the HBA intialization code, HBA reset code and HBA
+ * This function is the main SLI initialization function. This function
+ * is called by the HBA initialization code, HBA reset code and HBA
  * error attention handler code. Caller is not required to hold any
  * locks. This function issues config_port mailbox command to configure
  * the SLI, setup iocb rings and HBQ rings. In the end the function
@@ -6324,11 +6324,11 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 }
 
 /**
- * lpfc_sli4_hba_setup - SLI4 device intialization PCI function
+ * lpfc_sli4_hba_setup - SLI4 device initialization PCI function
  * @phba: Pointer to HBA context object.
  *
- * This function is the main SLI4 device intialization PCI function. This
- * function is called by the HBA intialization code, HBA reset code and
+ * This function is the main SLI4 device initialization PCI function. This
+ * function is called by the HBA initialization code, HBA reset code and
  * HBA error attention handler code. Caller is not required to hold any
  * locks.
  **/
@@ -12079,7 +12079,7 @@ lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * @phba: Pointer to HBA context object.
  * @wcqe: Pointer to work-queue completion queue entry.
  *
- * This routine handles slow-path WQ entry comsumed event by invoking the
+ * This routine handles slow-path WQ entry consumed event by invoking the
  * proper WQ release routine to the slow-path WQ.
  **/
 static void
@@ -12451,7 +12451,7 @@ lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * @cq: Pointer to completion queue.
  * @wcqe: Pointer to work-queue completion queue entry.
  *
- * This routine handles an fast-path WQ entry comsumed event by invoking the
+ * This routine handles an fast-path WQ entry consumed event by invoking the
  * proper WQ release routine to the slow-path WQ.
  **/
 static void
index 02fe1c4aae2fdb1718f2e9ebf7a476d363159249..bdffb692bded453145765ab08aefa483122a0b6f 100644 (file)
@@ -1925,7 +1925,7 @@ mpt3sas_send_diag_release(struct MPT3SAS_ADAPTER *ioc, u8 buffer_type,
  *
  * This allows ownership of the specified buffer to returned to the driver,
  * allowing an application to read the buffer without fear that firmware is
- * overwritting information in the buffer.
+ * overwriting information in the buffer.
  */
 static long
 _ctl_diag_release(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
index f3e17a8c1b07ba08e1ea4ca559ef0e7b5b598f89..a44046cff0f3d9a3d52b3d6adfb893341cb2bc9e 100644 (file)
@@ -390,7 +390,7 @@ struct mpt3_diag_query {
  *
  * This allows ownership of the specified buffer to returned to the driver,
  * allowing an application to read the buffer without fear that firmware is
- * overwritting information in the buffer.
+ * overwriting information in the buffer.
  */
 struct mpt3_diag_release {
        struct mpt3_ioctl_header hdr;
index 30b905080c61412a3e8e1234157e687594e0a041..6903f03c88af46bb54f7d3bb1e07b3a16596714d 100644 (file)
@@ -1290,7 +1290,7 @@ int osd_req_add_get_attr_list(struct osd_request *or,
        or->enc_get_attr.total_bytes = total_bytes;
 
        OSD_DEBUG(
-              "get_attr.total_bytes=%u(%u) enc_get_attr.total_bytes=%u(%Zu)\n",
+              "get_attr.total_bytes=%u(%u) enc_get_attr.total_bytes=%u(%zu)\n",
               or->get_attr.total_bytes,
               or->get_attr.total_bytes - _osd_req_sizeof_alist_header(or),
               or->enc_get_attr.total_bytes,
@@ -1677,7 +1677,7 @@ int osd_finalize_request(struct osd_request *or,
                }
        } else {
                /* TODO: I think that for the GET_ATTR command these 2 should
-                * be reversed to keep them in execution order (for embeded
+                * be reversed to keep them in execution order (for embedded
                 * targets with low memory footprint)
                 */
                ret = _osd_req_finalize_set_attr_list(or);
index 451de6c5e3c9942b3a8094bd705fbd234b094f91..75ac662793a3cdff00aaa92febd80c507ccdd16f 100644 (file)
@@ -3435,7 +3435,7 @@ static ssize_t osst_write(struct file * filp, const char __user * buf, size_t co
 
        /* Write must be integral number of blocks */
        if (STp->block_size != 0 && (count % STp->block_size) != 0) {
-               printk(KERN_ERR "%s:E: Write (%Zd bytes) not multiple of tape block size (%d%c).\n",
+               printk(KERN_ERR "%s:E: Write (%zd bytes) not multiple of tape block size (%d%c).\n",
                                       name, count, STp->block_size<1024?
                                       STp->block_size:STp->block_size/1024, STp->block_size<1024?'b':'k');
                retval = (-EINVAL);
@@ -3756,7 +3756,7 @@ static ssize_t osst_read(struct file * filp, char __user * buf, size_t count, lo
 
        if ((count % STp->block_size) != 0) {
                printk(KERN_WARNING
-                   "%s:W: Read (%Zd bytes) not multiple of tape block size (%d%c).\n", name, count,
+                   "%s:W: Read (%zd bytes) not multiple of tape block size (%d%c).\n", name, count,
                    STp->block_size<1024?STp->block_size:STp->block_size/1024, STp->block_size<1024?'b':'k');
        }
 
@@ -3815,7 +3815,7 @@ static ssize_t osst_read(struct file * filp, char __user * buf, size_t count, lo
 
                        if (transfer == 0) {
                                printk(KERN_WARNING
-                                 "%s:W: Nothing can be transferred, requested %Zd, tape block size (%d%c).\n",
+                                 "%s:W: Nothing can be transferred, requested %zd, tape block size (%d%c).\n",
                                        name, count, STp->block_size < 1024?
                                        STp->block_size:STp->block_size/1024,
                                        STp->block_size<1024?'b':'k');
index 7b6317c8c2e93bef3509c7e3d15fbae080922788..265e1395bdb837315bfea3f698fc60270b794da9 100644 (file)
@@ -5669,7 +5669,7 @@ qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr)
        /* Validate firmware image by checking version. */
        if (blob->fw->size < 8 * sizeof(uint16_t)) {
                ql_log(ql_log_fatal, vha, 0x0085,
-                   "Unable to verify integrity of firmware image (%Zd).\n",
+                   "Unable to verify integrity of firmware image (%zd).\n",
                    blob->fw->size);
                goto fail_fw_integrity;
        }
@@ -5697,7 +5697,7 @@ qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr)
                if (blob->fw->size < fwclen) {
                        ql_log(ql_log_fatal, vha, 0x0088,
                            "Unable to verify integrity of firmware image "
-                           "(%Zd).\n", blob->fw->size);
+                           "(%zd).\n", blob->fw->size);
                        goto fail_fw_integrity;
                }
 
@@ -5778,7 +5778,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr)
        /* Validate firmware image by checking version. */
        if (blob->fw->size < 8 * sizeof(uint32_t)) {
                ql_log(ql_log_fatal, vha, 0x0093,
-                   "Unable to verify integrity of firmware image (%Zd).\n",
+                   "Unable to verify integrity of firmware image (%zd).\n",
                    blob->fw->size);
                return QLA_FUNCTION_FAILED;
        }
@@ -5789,7 +5789,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr)
            (dcode[0] == 0 && dcode[1] == 0 && dcode[2] == 0 &&
                dcode[3] == 0)) {
                ql_log(ql_log_fatal, vha, 0x0094,
-                   "Unable to verify integrity of firmware image (%Zd).\n",
+                   "Unable to verify integrity of firmware image (%zd).\n",
                    blob->fw->size);
                ql_log(ql_log_fatal, vha, 0x0095,
                    "Firmware data: %08x %08x %08x %08x.\n",
@@ -5807,7 +5807,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr)
                if (blob->fw->size < fwclen) {
                        ql_log(ql_log_fatal, vha, 0x0096,
                            "Unable to verify integrity of firmware image "
-                           "(%Zd).\n", blob->fw->size);
+                           "(%zd).\n", blob->fw->size);
                        return QLA_FUNCTION_FAILED;
                }
 
index 912fbc3b4543dd5e87b04341862293d46fd57f30..3e32dc954c3c8c6b05d5883615afc1b21864e401 100644 (file)
@@ -1167,7 +1167,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 
        /* zero out the cmd, except for the embedded scsi_request */
        memset((char *)cmd + sizeof(cmd->req), 0,
-               sizeof(*cmd) - sizeof(cmd->req));
+               sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size);
 
        cmd->device = dev;
        cmd->sense_buffer = buf;
index 126a5ee00987ee14e21553b035620bff88302252..cdbb293aca08fd3b386b9c03ca02c6f27fe2056e 100644 (file)
@@ -227,27 +227,31 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
                return 0;
        }
 
+       q = blk_alloc_queue(GFP_KERNEL);
+       if (!q)
+               return -ENOMEM;
+       q->cmd_size = sizeof(struct scsi_request);
+
        if (rphy) {
-               q = blk_init_queue(sas_non_host_smp_request, NULL);
+               q->request_fn = sas_non_host_smp_request;
                dev = &rphy->dev;
                name = dev_name(dev);
                release = NULL;
        } else {
-               q = blk_init_queue(sas_host_smp_request, NULL);
+               q->request_fn = sas_host_smp_request;
                dev = &shost->shost_gendev;
                snprintf(namebuf, sizeof(namebuf),
                         "sas_host%d", shost->host_no);
                name = namebuf;
                release = sas_host_release;
        }
-       if (!q)
-               return -ENOMEM;
+       error = blk_init_allocated_queue(q);
+       if (error)
+               goto out_cleanup_queue;
 
        error = bsg_register_queue(q, dev, name, release);
-       if (error) {
-               blk_cleanup_queue(q);
-               return -ENOMEM;
-       }
+       if (error)
+               goto out_cleanup_queue;
 
        if (rphy)
                rphy->q = q;
@@ -261,6 +265,10 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 
        queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
        return 0;
+
+out_cleanup_queue:
+       blk_cleanup_queue(q);
+       return error;
 }
 
 static void sas_bsg_remove(struct Scsi_Host *shost, struct sas_rphy *rphy)
@@ -1467,7 +1475,7 @@ static void sas_end_device_release(struct device *dev)
 }
 
 /**
- * sas_rphy_initialize - common rphy intialization
+ * sas_rphy_initialize - common rphy initialization
  * @rphy:      rphy to initialise
  *
  * Used by both sas_end_device_alloc() and sas_expander_alloc() to
index e831e01f9fa68546083ae43c82d708980425d83c..29b86505f796d9fa57c19b958c1c7ae4ef31b298 100644 (file)
@@ -1185,8 +1185,9 @@ sg_fasync(int fd, struct file *filp, int mode)
 }
 
 static int
-sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+sg_vma_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        Sg_fd *sfp;
        unsigned long offset, len, sa;
        Sg_scatter_hold *rsv_schp;
index 585e54f6512cdd773342a7a040a203045ea65f26..638e5f427c901fddee96a53328cd8cc76f1368be 100644 (file)
@@ -280,7 +280,7 @@ static const struct vmstor_protocol vmstor_protocols[] = {
 
 
 /*
- * This structure is sent during the intialization phase to get the different
+ * This structure is sent during the initialization phase to get the different
  * properties of the channel.
  */
 
index 969600779e44d7e664121177d86e95ea9d61089b..2c3ffbcbd621a5d3236e25e72ee8dda2884b75c3 100644 (file)
@@ -870,9 +870,9 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
        mutex_unlock(&buffer->lock);
 }
 
-static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ion_vm_fault(struct vm_fault *vmf)
 {
-       struct ion_buffer *buffer = vma->vm_private_data;
+       struct ion_buffer *buffer = vmf->vma->vm_private_data;
        unsigned long pfn;
        int ret;
 
@@ -881,7 +881,7 @@ static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        BUG_ON(!buffer->pages || !buffer->pages[vmf->pgoff]);
 
        pfn = page_to_pfn(ion_buffer_page(buffer->pages[vmf->pgoff]));
-       ret = vm_insert_pfn(vma, vmf->address, pfn);
+       ret = vm_insert_pfn(vmf->vma, vmf->address, pfn);
        mutex_unlock(&buffer->lock);
        if (ret)
                return VM_FAULT_ERROR;
index 7b8cc3a252144f6d9d829009a6014c3cc7c29dab..cd1eb2c4c9406d96c667589ec2f5d772aef46337 100644 (file)
@@ -39,7 +39,7 @@ struct fpgaimage {
        const struct    firmware        *fw_entry;
 
        /*
-        * the followings can be read from bitstream,
+        * the following can be read from bitstream,
         * but other image format should have as well
         */
        char    filename[MAX_STR];
index b0eb80d70c235f116df42052d04ede14685230a0..60b827eeefe2cd0b4b6e11188df69fb9063935e3 100644 (file)
@@ -1704,7 +1704,7 @@ struct ost_lvb {
  *   lquota data structures
  */
 
-/* The lquota_id structure is an union of all the possible identifier types that
+/* The lquota_id structure is a union of all the possible identifier types that
  * can be used with quota, this includes:
  * - 64-bit user ID
  * - 64-bit group ID
index 9afa6bec3e6f44e22f1bf4fbcbd76e4476348fb3..896196c74cd2b91766e45cefb7f8cf00f2f63fc5 100644 (file)
@@ -321,7 +321,7 @@ out:
        return fault_ret;
 }
 
-static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ll_fault(struct vm_fault *vmf)
 {
        int count = 0;
        bool printed = false;
@@ -335,7 +335,7 @@ static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
 
 restart:
-       result = ll_fault0(vma, vmf);
+       result = ll_fault0(vmf->vma, vmf);
        LASSERT(!(result & VM_FAULT_LOCKED));
        if (result == 0) {
                struct page *vmpage = vmf->page;
@@ -362,8 +362,9 @@ restart:
        return result;
 }
 
-static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ll_page_mkwrite(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        int count = 0;
        bool printed = false;
        bool retry;
index 3e9cf710501b7c5847d73414a6e689709de867a6..4c57755e06e75b834393a87e83ea4e5c4c7a4acb 100644 (file)
@@ -1014,7 +1014,7 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 {
        struct vm_fault *vmf = cfio->ft_vmf;
 
-       cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf);
+       cfio->ft_flags = filemap_fault(vmf);
        cfio->ft_flags_valid = 1;
 
        if (vmf->page) {
index 0d247058bce41d738586102b5f3a7eb1ee55ad27..097147071df0db9cbae8212b61390e3e56d478b9 100644 (file)
@@ -1953,7 +1953,7 @@ struct ieee80211_device {
 
        /* ask to the driver to retune the radio .
         * This function can sleep. the driver should ensure
-        * the radio has been swithced before return.
+        * the radio has been switched before return.
         */
        void (*set_chan)(struct net_device *dev, short ch);
 
@@ -1964,7 +1964,7 @@ struct ieee80211_device {
         * The syncro version is similar to the start_scan but
         * does not return until all channels has been scanned.
         * this is called in user context and should sleep,
-        * it is called in a work_queue when swithcing to ad-hoc mode
+        * it is called in a work_queue when switching to ad-hoc mode
         * or in behalf of iwlist scan when the card is associated
         * and root user ask for a scan.
         * the function stop_scan should stop both the syncro and
index 1bff0e91cc0c769faaf4d1cd259659c0cb81ac22..0ea90aae428371879846187bfb476c2550e8eda8 100644 (file)
@@ -2364,7 +2364,7 @@ static void ieee80211_start_ibss_wq(struct work_struct *work)
 //     if((IS_DOT11D_ENABLE(ieee)) && (ieee->state == IEEE80211_NOLINK))
        if (ieee->state == IEEE80211_NOLINK)
                ieee->current_network.channel = 6;
-       /* if not then the state is not linked. Maybe the user swithced to
+       /* if not then the state is not linked. Maybe the user switched to
         * ad-hoc mode just after being in monitor mode, or just after
         * being very few time in managed mode (so the card have had no
         * time to scan all the chans..) or we have just run up the iface
index 1dc8627e65b05c8925e4c2578a0cc0d39444d8a4..cb0b7ca36b1ec66d283f694ca88f7c9dd8b40fa2 100644 (file)
@@ -1875,8 +1875,8 @@ vchiq_arm_init_state(VCHIQ_STATE_T *state, VCHIQ_ARM_STATE_T *arm_state)
 **
 ** VC_RESUME_IDLE - Initialise the resume completion at the same time.  The
 **                     resume completion is in it's 'done' state whenever
-**                     videcore is running.  Therfore, the VC_RESUME_IDLE state
-**                     implies that videocore is suspended.
+**                     videcore is running.  Therefore, the VC_RESUME_IDLE
+**                     state implies that videocore is suspended.
 **                     Hence, any thread which needs to wait until videocore is
 **                     running can wait on this completion - it will only block
 **                     if videocore is suspended.
index 9ab43935869e6d586035923ab60c2494d0a476be..2eebc6215cacd55b055dd5751ea26382dacf7e96 100644 (file)
@@ -213,7 +213,7 @@ static void deinit_irq(struct net_device *dev)
        vif = netdev_priv(dev);
        wilc = vif->wilc;
 
-       /* Deintialize IRQ */
+       /* Deinitialize IRQ */
        if (wilc->dev_irq_num) {
                free_irq(wilc->dev_irq_num, wilc);
                gpio_free(wilc->gpio);
index f7ce47cac2aafb45dca8e041032aab74d13b1c58..7961d1c56847392c6d977e065cab76f435ea31de 100644 (file)
@@ -2357,7 +2357,7 @@ int wilc_deinit_host_int(struct net_device *net)
                del_timer_sync(&wilc_during_ip_timer);
 
        if (s32Error)
-               netdev_err(net, "Error while deintializing host interface\n");
+               netdev_err(net, "Error while deinitializing host interface\n");
 
        return s32Error;
 }
index 8041710b697298ec7073c4e5910849bd1a154703..c3adefe95e50f7f7054e272e15fc5e37663d11c9 100644 (file)
@@ -642,9 +642,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
                WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD);
 
                spin_lock(&udev->commands_lock);
-               cmd = idr_find(&udev->commands, entry->hdr.cmd_id);
-               if (cmd)
-                       idr_remove(&udev->commands, cmd->cmd_id);
+               cmd = idr_remove(&udev->commands, entry->hdr.cmd_id);
                spin_unlock(&udev->commands_lock);
 
                if (!cmd) {
@@ -783,15 +781,15 @@ static int tcmu_find_mem_index(struct vm_area_struct *vma)
        return -1;
 }
 
-static int tcmu_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int tcmu_vma_fault(struct vm_fault *vmf)
 {
-       struct tcmu_dev *udev = vma->vm_private_data;
+       struct tcmu_dev *udev = vmf->vma->vm_private_data;
        struct uio_info *info = &udev->uio_info;
        struct page *page;
        unsigned long offset;
        void *addr;
 
-       int mi = tcmu_find_mem_index(vma);
+       int mi = tcmu_find_mem_index(vmf->vma);
        if (mi < 0)
                return VM_FAULT_SIGBUS;
 
index eb278832f5ce52c880f76a8eb9baa5e9e68cf447..1bacbc3b19a05cc7b685ddf93b14d5ca10d67acf 100644 (file)
@@ -667,7 +667,7 @@ static ssize_t n_hdlc_tty_write(struct tty_struct *tty, struct file *file,
        struct n_hdlc_buf *tbuf;
 
        if (debuglevel >= DEBUG_LEVEL_INFO)     
-               printk("%s(%d)n_hdlc_tty_write() called count=%Zd\n",
+               printk("%s(%d)n_hdlc_tty_write() called count=%zd\n",
                        __FILE__,__LINE__,count);
                
        /* Verify pointers */
index 6ad26f802b511b0e5d78afd6bfa50ed349c60cc5..f96bcf9bee2591258409b1477cc63a6b83266ec3 100644 (file)
 #define IOC4_SSCR_PAUSE_STATE   0x40000000  /* Sets when PAUSE takes effect */
 #define IOC4_SSCR_RESET                0x80000000  /* Reset DMA channels */
 
-/* All producer/comsumer pointers are the same bitfield */
+/* All producer/consumer pointers are the same bitfield */
 #define IOC4_PROD_CONS_PTR_4K   0x00000ff8     /* For 4K buffers */
 #define IOC4_PROD_CONS_PTR_1K   0x000003f8     /* For 1K buffers */
 #define IOC4_PROD_CONS_PTR_OFF           3
index 73abd89c0108dc793159fe10d2567970de7d70ef..46e46894e918ab03c6e7821bb715332d276bf346 100644 (file)
@@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port)
 
 static int receive_chars_read(struct uart_port *port)
 {
-       int saw_console_brk = 0;
+       static int saw_console_brk;
        int limit = 10000;
 
        while (limit-- > 0) {
@@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port)
                        bytes_read = 0;
 
                        if (stat == CON_BREAK) {
+                               if (saw_console_brk)
+                                       sun_do_break();
+
                                if (uart_handle_break(port))
                                        continue;
                                saw_console_brk = 1;
@@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port)
                if (port->sysrq != 0 &&  *con_read_page) {
                        for (i = 0; i < bytes_read; i++)
                                uart_handle_sysrq_char(port, con_read_page[i]);
+                       saw_console_brk = 0;
                }
 
                if (port->state == NULL)
@@ -398,6 +402,12 @@ static struct uart_driver sunhv_reg = {
 
 static struct uart_port *sunhv_port;
 
+void sunhv_migrate_hvcons_irq(int cpu)
+{
+       /* Migrate hvcons irq to param cpu */
+       irq_force_affinity(sunhv_port->irq, cpumask_of(cpu));
+}
+
 /* Copy 's' into the con_write_page, decoding "\n" into
  * "\r\n" along the way.  We have to return two lengths
  * because the caller needs to know how much to advance
index fba021f5736afdee41fd914da4648e8bf71874b6..31d95dc9c202daab21e996015ee7d4b8e466ea32 100644 (file)
@@ -597,14 +597,14 @@ static int uio_find_mem_index(struct vm_area_struct *vma)
        return -1;
 }
 
-static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int uio_vma_fault(struct vm_fault *vmf)
 {
-       struct uio_device *idev = vma->vm_private_data;
+       struct uio_device *idev = vmf->vma->vm_private_data;
        struct page *page;
        unsigned long offset;
        void *addr;
 
-       int mi = uio_find_mem_index(vma);
+       int mi = uio_find_mem_index(vmf->vma);
        if (mi < 0)
                return VM_FAULT_SIGBUS;
 
index 52747b6ac89ae7662f30ae36713a95e73684ff4b..ca425e8099ea816096d1962713a5c4eecfced49c 100644 (file)
@@ -2335,7 +2335,7 @@ static int proc_drop_privileges(struct usb_dev_state *ps, void __user *arg)
        if (copy_from_user(&data, arg, sizeof(data)))
                return -EFAULT;
 
-       /* This is an one way operation. Once privileges are
+       /* This is a one way operation. Once privileges are
         * dropped, you cannot regain them. You may however reissue
         * this ioctl to shrink the allowed interfaces mask.
         */
index 6bde4396927c997b686419cbc4fb15997b0e304f..a2615d64d07c1967d7cd2c25ab2e046747f6bd7d 100644 (file)
@@ -1848,7 +1848,7 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
 
 fail:
        spin_unlock_irq (&dev->lock);
-       pr_debug ("%s: %s fail %Zd, %p\n", shortname, __func__, value, dev);
+       pr_debug ("%s: %s fail %zd, %p\n", shortname, __func__, value, dev);
        kfree (dev->buf);
        dev->buf = NULL;
        return value;
index 2e41ef36b944d1c3da6952bf134e7a435f2dfecd..b76fcdb763a0d27c5606e97807b18bf14e05ba9f 100644 (file)
@@ -520,7 +520,7 @@ static void struct_ep_qh_setup(struct fsl_udc *udc, unsigned char ep_num,
 /* Setup qh structure and ep register for ep0. */
 static void ep0_setup(struct fsl_udc *udc)
 {
-       /* the intialization of an ep includes: fields in QH, Regs,
+       /* the initialization of an ep includes: fields in QH, Regs,
         * fsl_ep struct */
        struct_ep_qh_setup(udc, 0, USB_RECV, USB_ENDPOINT_XFER_CONTROL,
                        USB_MAX_CTRL_PAYLOAD, 0, 0);
@@ -2349,7 +2349,7 @@ static int struct_ep_setup(struct fsl_udc *udc, unsigned char index,
 }
 
 /* Driver probe function
- * all intialization operations implemented here except enabling usb_intr reg
+ * all initialization operations implemented here except enabling usb_intr reg
  * board setup should have been done in the platform code
  */
 static int fsl_udc_probe(struct platform_device *pdev)
index fb8fc34827aba12f5132b3232f0ffca3c24e89a8..2218f91e92a61cd8078afe857ba347463aa8ff63 100644 (file)
@@ -1791,7 +1791,7 @@ static int renesas_usb3_init_ep(struct renesas_usb3 *usb3, struct device *dev,
 
        dev_dbg(dev, "%s: num_usb3_eps = %d\n", __func__, usb3->num_usb3_eps);
        /*
-        * This driver prepares pipes as the followings:
+        * This driver prepares pipes as follows:
         *  - odd pipes = IN pipe
         *  - even pipes = OUT pipe (except pipe 0)
         */
@@ -1841,7 +1841,7 @@ static void renesas_usb3_init_ram(struct renesas_usb3 *usb3, struct device *dev,
        memset(basead, 0, sizeof(basead));
 
        /*
-        * This driver prepares pipes as the followings:
+        * This driver prepares pipes as follows:
         *  - all pipes = the same size as "ramsize_per_pipe"
         * Please refer to the "Method of Specifying RAM Mapping"
         */
index 063064801ceb0e37a0f4a5b45101f02f7c1def83..ac2c4eab478db7f91c8b0f705eeadd22cd818604 100644 (file)
@@ -1322,7 +1322,7 @@ static int __init ehci_hcd_init(void)
                printk(KERN_WARNING "Warning! ehci_hcd should always be loaded"
                                " before uhci_hcd and ohci_hcd, not after\n");
 
-       pr_debug("%s: block sizes: qh %Zd qtd %Zd itd %Zd sitd %Zd\n",
+       pr_debug("%s: block sizes: qh %zd qtd %zd itd %zd sitd %zd\n",
                 hcd_name,
                 sizeof(struct ehci_qh), sizeof(struct ehci_qtd),
                 sizeof(struct ehci_itd), sizeof(struct ehci_sitd));
index 9d0b0518290a1d596f7018216bc694d5bbc037b1..1c5b34b74860f1c3aed374f2cfcd0668b1124bcf 100644 (file)
@@ -5697,7 +5697,7 @@ static int __init fotg210_hcd_init(void)
                        test_bit(USB_OHCI_LOADED, &usb_hcds_loaded))
                pr_warn("Warning! fotg210_hcd should always be loaded before uhci_hcd and ohci_hcd, not after\n");
 
-       pr_debug("%s: block sizes: qh %Zd qtd %Zd itd %Zd\n",
+       pr_debug("%s: block sizes: qh %zd qtd %zd itd %zd\n",
                        hcd_name, sizeof(struct fotg210_qh),
                        sizeof(struct fotg210_qtd),
                        sizeof(struct fotg210_itd));
index 8685cf3e629271d9165d6dd7f21bb868a90d4de9..b6daf2e6998936021e590a444633dbe28e100878 100644 (file)
@@ -1252,7 +1252,7 @@ static int __init ohci_hcd_mod_init(void)
                return -ENODEV;
 
        printk(KERN_INFO "%s: " DRIVER_DESC "\n", hcd_name);
-       pr_debug ("%s: block sizes: ed %Zd td %Zd\n", hcd_name,
+       pr_debug ("%s: block sizes: ed %zd td %zd\n", hcd_name,
                sizeof (struct ed), sizeof (struct td));
        set_bit(USB_OHCI_LOADED, &usb_hcds_loaded);
 
index a540e4f206c48b75e03109cfb5121692f6cab67b..c5fa584d8f0a1788baf5514d318d64d14049a618 100644 (file)
@@ -563,20 +563,20 @@ static ssize_t adu_write(struct file *file, const __user char *buffer,
                        }
 
                        dev_dbg(&dev->udev->dev,
-                               "%s : in progress, count = %Zd\n",
+                               "%s : in progress, count = %zd\n",
                                __func__, count);
                } else {
                        spin_unlock_irqrestore(&dev->buflock, flags);
                        set_current_state(TASK_RUNNING);
                        remove_wait_queue(&dev->write_wait, &waita);
-                       dev_dbg(&dev->udev->dev, "%s : sending, count = %Zd\n",
+                       dev_dbg(&dev->udev->dev, "%s : sending, count = %zd\n",
                                __func__, count);
 
                        /* write the data into interrupt_out_buffer from userspace */
                        buffer_size = usb_endpoint_maxp(dev->interrupt_out_endpoint);
                        bytes_to_write = count > buffer_size ? buffer_size : count;
                        dev_dbg(&dev->udev->dev,
-                               "%s : buffer_size = %Zd, count = %Zd, bytes_to_write = %Zd\n",
+                               "%s : buffer_size = %zd, count = %zd, bytes_to_write = %zd\n",
                                __func__, buffer_size, count, bytes_to_write);
 
                        if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write) != 0) {
index b10e26c74a9088b20f44b5ad376bebfe869d196b..322a042d6e59ab55ec10e9fb8961a3080ec8f6f9 100644 (file)
@@ -673,7 +673,7 @@ static ssize_t tower_write (struct file *file, const char __user *buffer, size_t
 
        /* write the data into interrupt_out_buffer from userspace */
        bytes_to_write = min_t(int, count, write_buffer_size);
-       dev_dbg(&dev->udev->dev, "%s: count = %Zd, bytes_to_write = %Zd\n",
+       dev_dbg(&dev->udev->dev, "%s: count = %zd, bytes_to_write = %zd\n",
                __func__, count, bytes_to_write);
 
        if (copy_from_user (dev->interrupt_out_buffer, buffer, bytes_to_write)) {
index 356d312add57793aa92587571d0b398264166a25..0a643fa74cab747099d421d67ae8b668cf4d525d 100644 (file)
@@ -526,7 +526,7 @@ static size_t parport_uss720_epp_write_data(struct parport *pp, const void *buf,
                return 0;
        i = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, 1), (void *)buf, length, &rlen, 20000);
        if (i)
-               printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %Zu rlen %u\n", buf, length, rlen);
+               printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %zu rlen %u\n", buf, length, rlen);
        change_mode(pp, ECR_PS2);
        return rlen;
 #endif
@@ -587,7 +587,7 @@ static size_t parport_uss720_ecp_write_data(struct parport *pp, const void *buff
                return 0;
        i = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, 1), (void *)buffer, len, &rlen, 20000);
        if (i)
-               printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %Zu rlen %u\n", buffer, len, rlen);
+               printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %zu rlen %u\n", buffer, len, rlen);
        change_mode(pp, ECR_PS2);
        return rlen;
 }
@@ -605,7 +605,7 @@ static size_t parport_uss720_ecp_read_data(struct parport *pp, void *buffer, siz
                return 0;
        i = usb_bulk_msg(usbdev, usb_rcvbulkpipe(usbdev, 2), buffer, len, &rlen, 20000);
        if (i)
-               printk(KERN_ERR "uss720: recvbulk ep 2 buf %p len %Zu rlen %u\n", buffer, len, rlen);
+               printk(KERN_ERR "uss720: recvbulk ep 2 buf %p len %zu rlen %u\n", buffer, len, rlen);
        change_mode(pp, ECR_PS2);
        return rlen;
 }
@@ -638,7 +638,7 @@ static size_t parport_uss720_write_compat(struct parport *pp, const void *buffer
                return 0;
        i = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, 1), (void *)buffer, len, &rlen, 20000);
        if (i)
-               printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %Zu rlen %u\n", buffer, len, rlen);
+               printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %zu rlen %u\n", buffer, len, rlen);
        change_mode(pp, ECR_PS2);
        return rlen;
 }
index 91c22276c03b55c2c78e0de55e8a2a9b618607ea..9fb8b1e6ecc26dec4c93a24ee9192d7ccc0b8f19 100644 (file)
@@ -1223,9 +1223,9 @@ static void mon_bin_vma_close(struct vm_area_struct *vma)
 /*
  * Map ring pages to user space.
  */
-static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int mon_bin_vma_fault(struct vm_fault *vmf)
 {
-       struct mon_reader_bin *rp = vma->vm_private_data;
+       struct mon_reader_bin *rp = vmf->vma->vm_private_data;
        unsigned long offset, chunk_idx;
        struct page *pageptr;
 
index 8b232290be6b97a04be362582e4a4d87db8d9185..1a6f78d7d0275fb539dcbbc618ca00df6aa2e49e 100644 (file)
@@ -707,7 +707,7 @@ void usbip_pad_iso(struct usbip_device *ud, struct urb *urb)
                return;
 
        /*
-        * loop over all packets from last to first (to prevent overwritting
+        * loop over all packets from last to first (to prevent overwriting
         * memory when padding) and move them into the proper place
         */
        for (i = np-1; i > 0; i--) {
index a44f5627b82a3f4e7e9b079e2546ad4a5f4931c5..12ded23f1aaff70a136114e78d58b3efdfaddc37 100644 (file)
@@ -412,11 +412,9 @@ static void fbcon_add_cursor_timer(struct fb_info *info)
                if (!info->queue.func)
                        INIT_WORK(&info->queue, fb_flashcursor);
 
-               init_timer(&ops->cursor_timer);
-               ops->cursor_timer.function = cursor_timer_handler;
-               ops->cursor_timer.expires = jiffies + ops->cur_blink_jiffies;
-               ops->cursor_timer.data = (unsigned long ) info;
-               add_timer(&ops->cursor_timer);
+               setup_timer(&ops->cursor_timer, cursor_timer_handler,
+                           (unsigned long) info);
+               mod_timer(&ops->cursor_timer, jiffies + ops->cur_blink_jiffies);
                ops->flags |= FBCON_FLAGS_CURSOR_TIMER;
        }
 }
@@ -1165,6 +1163,8 @@ static void fbcon_free_font(struct display *p, bool freefont)
        p->userfont = 0;
 }
 
+static void set_vc_hi_font(struct vc_data *vc, bool set);
+
 static void fbcon_deinit(struct vc_data *vc)
 {
        struct display *p = &fb_display[vc->vc_num];
@@ -1200,6 +1200,9 @@ finished:
        if (free_font)
                vc->vc_font.data = NULL;
 
+       if (vc->vc_hi_font_mask)
+               set_vc_hi_font(vc, false);
+
        if (!con_is_bound(&fb_con))
                fbcon_exit();
 
@@ -2436,32 +2439,10 @@ static int fbcon_get_font(struct vc_data *vc, struct console_font *font)
        return 0;
 }
 
-static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
-                            const u8 * data, int userfont)
+/* set/clear vc_hi_font_mask and update vc attrs accordingly */
+static void set_vc_hi_font(struct vc_data *vc, bool set)
 {
-       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-       struct fbcon_ops *ops = info->fbcon_par;
-       struct display *p = &fb_display[vc->vc_num];
-       int resize;
-       int cnt;
-       char *old_data = NULL;
-
-       if (con_is_visible(vc) && softback_lines)
-               fbcon_set_origin(vc);
-
-       resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
-       if (p->userfont)
-               old_data = vc->vc_font.data;
-       if (userfont)
-               cnt = FNTCHARCNT(data);
-       else
-               cnt = 256;
-       vc->vc_font.data = (void *)(p->fontdata = data);
-       if ((p->userfont = userfont))
-               REFCOUNT(data)++;
-       vc->vc_font.width = w;
-       vc->vc_font.height = h;
-       if (vc->vc_hi_font_mask && cnt == 256) {
+       if (!set) {
                vc->vc_hi_font_mask = 0;
                if (vc->vc_can_do_color) {
                        vc->vc_complement_mask >>= 1;
@@ -2484,7 +2465,7 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
                            ((c & 0xfe00) >> 1) | (c & 0xff);
                        vc->vc_attr >>= 1;
                }
-       } else if (!vc->vc_hi_font_mask && cnt == 512) {
+       } else {
                vc->vc_hi_font_mask = 0x100;
                if (vc->vc_can_do_color) {
                        vc->vc_complement_mask <<= 1;
@@ -2516,8 +2497,38 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
                        } else
                                vc->vc_video_erase_char = c & ~0x100;
                }
-
        }
+}
+
+static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
+                            const u8 * data, int userfont)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct display *p = &fb_display[vc->vc_num];
+       int resize;
+       int cnt;
+       char *old_data = NULL;
+
+       if (con_is_visible(vc) && softback_lines)
+               fbcon_set_origin(vc);
+
+       resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
+       if (p->userfont)
+               old_data = vc->vc_font.data;
+       if (userfont)
+               cnt = FNTCHARCNT(data);
+       else
+               cnt = 256;
+       vc->vc_font.data = (void *)(p->fontdata = data);
+       if ((p->userfont = userfont))
+               REFCOUNT(data)++;
+       vc->vc_font.width = w;
+       vc->vc_font.height = h;
+       if (vc->vc_hi_font_mask && cnt == 256)
+               set_vc_hi_font(vc, false);
+       else if (!vc->vc_hi_font_mask && cnt == 512)
+               set_vc_hi_font(vc, true);
 
        if (resize) {
                int cols, rows;
index 476ff3f4d46659f0b9537fcb07283259c2ff8d1e..cd2db1113e67c2be2607c8440214ba2100d4e5f0 100644 (file)
@@ -213,15 +213,8 @@ static void tpg110_init(struct device *dev, struct device_node *np,
        board->disable = tpg110_disable;
 }
 
-int nomadik_clcd_init_panel(struct clcd_fb *fb,
-                           struct device_node *endpoint)
+int nomadik_clcd_init_panel(struct clcd_fb *fb, struct device_node *panel)
 {
-       struct device_node *panel;
-
-       panel = of_graph_get_remote_port_parent(endpoint);
-       if (!panel)
-               return -ENODEV;
-
        if (of_device_is_compatible(panel, "tpo,tpg110"))
                tpg110_init(&fb->dev->dev, panel, fb->board);
        else
index 50aa9bda69fd42b40dcd48215dc76baafae35e0f..a24032c8156e0b913ebda42aa84ceef3e3668fbc 100644 (file)
@@ -6,8 +6,7 @@
 #ifdef CONFIG_ARCH_NOMADIK
 int nomadik_clcd_init_board(struct amba_device *adev,
                             struct clcd_board *board);
-int nomadik_clcd_init_panel(struct clcd_fb *fb,
-                           struct device_node *endpoint);
+int nomadik_clcd_init_panel(struct clcd_fb *fb, struct device_node *panel);
 #else
 static inline int nomadik_clcd_init_board(struct amba_device *adev,
                                          struct clcd_board *board)
@@ -15,7 +14,7 @@ static inline int nomadik_clcd_init_board(struct amba_device *adev,
        return 0;
 }
 static inline int nomadik_clcd_init_panel(struct clcd_fb *fb,
-                                         struct device_node *endpoint)
+                                         struct device_node *panel)
 {
        return 0;
 }
index e5d9bfc1703a5ff9fcaafe44fbb3c7e152fa16c7..d42047dc4e4ec2287f0d6558e013c5ca38d13c67 100644 (file)
@@ -452,11 +452,9 @@ static const struct versatile_panel versatile_panels[] = {
        },
 };
 
-static void versatile_panel_probe(struct device *dev,
-                                 struct device_node *endpoint)
+static void versatile_panel_probe(struct device *dev, struct device_node *panel)
 {
        struct versatile_panel const *vpanel = NULL;
-       struct device_node *panel = NULL;
        u32 val;
        int ret;
        int i;
@@ -488,11 +486,6 @@ static void versatile_panel_probe(struct device *dev,
                return;
        }
 
-       panel = of_graph_get_remote_port_parent(endpoint);
-       if (!panel) {
-               dev_err(dev, "could not locate panel in DT\n");
-               return;
-       }
        if (!of_device_is_compatible(panel, vpanel->compatible))
                dev_err(dev, "panel in DT is not compatible with the "
                        "auto-detected panel, continuing anyway\n");
@@ -514,8 +507,7 @@ static void versatile_panel_probe(struct device *dev,
        }
 }
 
-int versatile_clcd_init_panel(struct clcd_fb *fb,
-                             struct device_node *endpoint)
+int versatile_clcd_init_panel(struct clcd_fb *fb, struct device_node *panel)
 {
        const struct of_device_id *clcd_id;
        enum versatile_clcd versatile_clcd_type;
@@ -551,7 +543,7 @@ int versatile_clcd_init_panel(struct clcd_fb *fb,
                fb->board->enable = versatile_clcd_enable;
                fb->board->disable = versatile_clcd_disable;
                fb->board->decode = versatile_clcd_decode;
-               versatile_panel_probe(dev, endpoint);
+               versatile_panel_probe(dev, panel);
                dev_info(dev, "set up callbacks for Versatile\n");
                break;
        case REALVIEW_CLCD_EB:
index 1b14359c2cf6b3fc0865cb6510651a9cd9a4d9a4..4692c309282327f366a7da8aa3db48559dca7ef0 100644 (file)
@@ -6,11 +6,10 @@
 #include <linux/platform_data/video-clcd-versatile.h>
 
 #if defined(CONFIG_PLAT_VERSATILE_CLCD) && defined(CONFIG_OF)
-int versatile_clcd_init_panel(struct clcd_fb *fb,
-                             struct device_node *endpoint);
+int versatile_clcd_init_panel(struct clcd_fb *fb, struct device_node *panel);
 #else
 static inline int versatile_clcd_init_panel(struct clcd_fb *fb,
-                               struct device_node *endpoint)
+                                           struct device_node *panel)
 {
        return 0;
 }
index ec2671d98abcccb5f3afa6bfd68158a14557ce6e..0fab92c628280359ec79023b545ff893c12a2d6c 100644 (file)
  *
  *  ARM PrimeCell PL110 Color LCD Controller
  */
-#include <linux/dma-mapping.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/slab.h>
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/backlight.h>
+#include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/mm.h>
+#include <linux/dma-mapping.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/list.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/clcd.h>
-#include <linux/bitops.h>
-#include <linux/clk.h>
-#include <linux/hardirq.h>
-#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_graph.h>
-#include <linux/backlight.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 #include <video/display_timing.h>
 #include <video/of_display_timing.h>
 #include <video/videomode.h>
@@ -629,16 +624,11 @@ static int clcdfb_snprintf_mode(char *buf, int size, struct fb_videomode *mode)
                        mode->refresh);
 }
 
-static int clcdfb_of_get_backlight(struct device_node *endpoint,
+static int clcdfb_of_get_backlight(struct device_node *panel,
                                   struct clcd_panel *clcd_panel)
 {
-       struct device_node *panel;
        struct device_node *backlight;
 
-       panel = of_graph_get_remote_port_parent(endpoint);
-       if (!panel)
-               return -ENODEV;
-
        /* Look up the optional backlight phandle */
        backlight = of_parse_phandle(panel, "backlight", 0);
        if (backlight) {
@@ -651,19 +641,14 @@ static int clcdfb_of_get_backlight(struct device_node *endpoint,
        return 0;
 }
 
-static int clcdfb_of_get_mode(struct device *dev, struct device_node *endpoint,
-               struct clcd_panel *clcd_panel)
+static int clcdfb_of_get_mode(struct device *dev, struct device_node *panel,
+                             struct clcd_panel *clcd_panel)
 {
        int err;
-       struct device_node *panel;
        struct fb_videomode *mode;
        char *name;
        int len;
 
-       panel = of_graph_get_remote_port_parent(endpoint);
-       if (!panel)
-               return -ENODEV;
-
        /* Only directly connected DPI panels supported for now */
        if (of_device_is_compatible(panel, "panel-dpi"))
                err = clcdfb_of_get_dpi_panel_mode(panel, clcd_panel);
@@ -769,7 +754,7 @@ static int clcdfb_of_init_tft_panel(struct clcd_fb *fb, u32 r0, u32 g0, u32 b0)
 
 static int clcdfb_of_init_display(struct clcd_fb *fb)
 {
-       struct device_node *endpoint;
+       struct device_node *endpoint, *panel;
        int err;
        unsigned int bpp;
        u32 max_bandwidth;
@@ -786,17 +771,21 @@ static int clcdfb_of_init_display(struct clcd_fb *fb)
        if (!endpoint)
                return -ENODEV;
 
+       panel = of_graph_get_remote_port_parent(endpoint);
+       if (!panel)
+               return -ENODEV;
+
        if (fb->vendor->init_panel) {
-               err = fb->vendor->init_panel(fb, endpoint);
+               err = fb->vendor->init_panel(fb, panel);
                if (err)
                        return err;
        }
 
-       err = clcdfb_of_get_backlight(endpoint, fb->panel);
+       err = clcdfb_of_get_backlight(panel, fb->panel);
        if (err)
                return err;
 
-       err = clcdfb_of_get_mode(&fb->dev->dev, endpoint, fb->panel);
+       err = clcdfb_of_get_mode(&fb->dev->dev, panel, fb->panel);
        if (err)
                return err;
 
index 1d702e13aaff08771f873d705f1fdeb79ad2a3ee..cc11c60612980208d8bb26bd4c92b8892d792bfe 100644 (file)
@@ -1484,13 +1484,11 @@ static int ami_decode_var(struct fb_var_screeninfo *var, struct amifb_par *par,
                par->xoffset = var->xoffset;
                par->yoffset = var->yoffset;
                if (par->vmode & FB_VMODE_YWRAP) {
-                       if (par->xoffset || par->yoffset < 0 ||
-                           par->yoffset >= par->vyres)
+                       if (par->yoffset >= par->vyres)
                                par->xoffset = par->yoffset = 0;
                } else {
-                       if (par->xoffset < 0 ||
-                           par->xoffset > upx(16 << maxfmode, par->vxres - par->xres) ||
-                           par->yoffset < 0 || par->yoffset > par->vyres - par->yres)
+                       if (par->xoffset > upx(16 << maxfmode, par->vxres - par->xres) ||
+                           par->yoffset > par->vyres - par->yres)
                                par->xoffset = par->yoffset = 0;
                }
        } else
index 278b421ab3fedc607b53330648938782d9fb3e88..dd823f5fe4c9e81484184ededbbcedc6c9e551d2 100644 (file)
@@ -646,7 +646,7 @@ void radeon_probe_screens(struct radeonfb_info *rinfo,
 
 
 /*
- * This functions applyes any arch/model/machine specific fixups
+ * This function applies any arch/model/machine specific fixups
  * to the panel info. It may eventually alter EDID block as
  * well or whatever is specific to a given model and not probed
  * properly by the default code
index 74b5bcac8bf22af140f5c1afb9062eef0639f221..37f69c061210edf8a4b30799535555ed676fe2fd 100644 (file)
@@ -37,12 +37,11 @@ static struct page *fb_deferred_io_page(struct fb_info *info, unsigned long offs
 }
 
 /* this is to find and return the vmalloc-ed fb pages */
-static int fb_deferred_io_fault(struct vm_area_struct *vma,
-                               struct vm_fault *vmf)
+static int fb_deferred_io_fault(struct vm_fault *vmf)
 {
        unsigned long offset;
        struct page *page;
-       struct fb_info *info = vma->vm_private_data;
+       struct fb_info *info = vmf->vma->vm_private_data;
 
        offset = vmf->pgoff << PAGE_SHIFT;
        if (offset >= info->fix.smem_len)
@@ -54,8 +53,8 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma,
 
        get_page(page);
 
-       if (vma->vm_file)
-               page->mapping = vma->vm_file->f_mapping;
+       if (vmf->vma->vm_file)
+               page->mapping = vmf->vma->vm_file->f_mapping;
        else
                printk(KERN_ERR "no mapping available\n");
 
@@ -91,11 +90,10 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy
 EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);
 
 /* vm_ops->page_mkwrite handler */
-static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
-                                 struct vm_fault *vmf)
+static int fb_deferred_io_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct fb_info *info = vma->vm_private_data;
+       struct fb_info *info = vmf->vma->vm_private_data;
        struct fb_deferred_io *fbdefio = info->fbdefio;
        struct page *cur;
 
@@ -105,7 +103,7 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
        deferred framebuffer IO. then if userspace touches a page
        again, we repeat the same scheme */
 
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
 
        /* protect against the workqueue changing the page list */
        mutex_lock(&fbdefio->lock);
index fe00a07c122e69f0735baf8832fdd112677b4dd6..ca3d6b36647155abfe2c587df6f8b8155f649edf 100644 (file)
@@ -439,12 +439,12 @@ static struct mfb_info mfb_template[] = {
 static void __attribute__ ((unused)) fsl_diu_dump(struct diu __iomem *hw)
 {
        mb();
-       pr_debug("DIU: desc=%08x,%08x,%08x, gamma=%08x pallete=%08x "
+       pr_debug("DIU: desc=%08x,%08x,%08x, gamma=%08x palette=%08x "
                 "cursor=%08x curs_pos=%08x diu_mode=%08x bgnd=%08x "
                 "disp_size=%08x hsyn_para=%08x vsyn_para=%08x syn_pol=%08x "
                 "thresholds=%08x int_mask=%08x plut=%08x\n",
                 hw->desc[0], hw->desc[1], hw->desc[2], hw->gamma,
-                hw->pallete, hw->cursor, hw->curs_pos, hw->diu_mode,
+                hw->palette, hw->cursor, hw->curs_pos, hw->diu_mode,
                 hw->bgnd, hw->disp_size, hw->hsyn_para, hw->vsyn_para,
                 hw->syn_pol, hw->thresholds, hw->int_mask, hw->plut);
        rmb();
@@ -703,12 +703,6 @@ static int fsl_diu_check_var(struct fb_var_screeninfo *var,
        if (var->yres_virtual < var->yres)
                var->yres_virtual = var->yres;
 
-       if (var->xoffset < 0)
-               var->xoffset = 0;
-
-       if (var->yoffset < 0)
-               var->yoffset = 0;
-
        if (var->xoffset + info->var.xres > info->var.xres_virtual)
                var->xoffset = info->var.xres_virtual - info->var.xres;
 
@@ -1254,8 +1248,7 @@ static int fsl_diu_pan_display(struct fb_var_screeninfo *var,
            (info->var.yoffset == var->yoffset))
                return 0;       /* No change, do nothing */
 
-       if (var->xoffset < 0 || var->yoffset < 0
-           || var->xoffset + info->var.xres > info->var.xres_virtual
+       if (var->xoffset + info->var.xres > info->var.xres_virtual
            || var->yoffset + info->var.yres > info->var.yres_virtual)
                return -EINVAL;
 
index fe0c4eeff2e4f1f11442272d03e3c784db06d49f..1b0faadb30801921d74231b6fc788fac5b67d9cd 100644 (file)
@@ -985,7 +985,11 @@ static int imxfb_probe(struct platform_device *pdev)
         */
        imxfb_check_var(&info->var, info);
 
-       ret = fb_alloc_cmap(&info->cmap, 1 << info->var.bits_per_pixel, 0);
+       /*
+        * For modes > 8bpp, the color map is bypassed.
+        * Therefore, 256 entries are enough.
+        */
+       ret = fb_alloc_cmap(&info->cmap, 256, 0);
        if (ret < 0)
                goto failed_cmap;
 
index a01147fdf270f5e15416d163a614f1f3db810051..b380a393cbc3c24c28fc203b10be6a4c3166de46 100644 (file)
@@ -1088,14 +1088,20 @@ static void MGAG100_restore(struct matrox_fb_info *minfo)
 
 #ifdef CONFIG_FB_MATROX_MYSTIQUE
 struct matrox_switch matrox_mystique = {
-       MGA1064_preinit, MGA1064_reset, MGA1064_init, MGA1064_restore,
+       .preinit        = MGA1064_preinit,
+       .reset          = MGA1064_reset,
+       .init           = MGA1064_init,
+       .restore        = MGA1064_restore,
 };
 EXPORT_SYMBOL(matrox_mystique);
 #endif
 
 #ifdef CONFIG_FB_MATROX_G
 struct matrox_switch matrox_G100 = {
-       MGAG100_preinit, MGAG100_reset, MGAG100_init, MGAG100_restore,
+       .preinit        = MGAG100_preinit,
+       .reset          = MGAG100_reset,
+       .init           = MGAG100_init,
+       .restore        = MGAG100_restore,
 };
 EXPORT_SYMBOL(matrox_G100);
 #endif
index 68fa037d8cbc06a36c257f828264180efce6fd24..9ff9be85759e8fcff00a115d0ac75865b04dc448 100644 (file)
@@ -738,7 +738,10 @@ static int Ti3026_preinit(struct matrox_fb_info *minfo)
 }
 
 struct matrox_switch matrox_millennium = {
-       Ti3026_preinit, Ti3026_reset, Ti3026_init, Ti3026_restore
+       .preinit        = Ti3026_preinit,
+       .reset          = Ti3026_reset,
+       .init           = Ti3026_init,
+       .restore        = Ti3026_restore
 };
 EXPORT_SYMBOL(matrox_millennium);
 #endif
index 5cf52d3c8e75065bddeed87299c7aef7ddd82e87..cab7333208eab44e38b1ac8f33bfd1a3c230dcfc 100644 (file)
@@ -51,7 +51,7 @@ static struct fb_var_screeninfo maxinefb_defined = {
        .vmode =        FB_VMODE_NONINTERLACED,
 };
 
-static struct fb_fix_screeninfo maxinefb_fix = {
+static struct fb_fix_screeninfo maxinefb_fix __initdata = {
        .id =           "Maxine",
        .smem_len =     (1024*768),
        .type =         FB_TYPE_PACKED_PIXELS,
index e3bc00a75296558591884453c1dacfc5c1fa8465..2528d3e609a4794d3f0456c35c83640cd6abbed0 100644 (file)
@@ -15,12 +15,6 @@ struct mbxfb_debugfs_data {
        struct dentry *misc;
 };
 
-static int open_file_generic(struct inode *inode, struct file *file)
-{
-       file->private_data = inode->i_private;
-       return 0;
-}
-
 static ssize_t write_file_dummy(struct file *file, const char __user *buf,
                                size_t count, loff_t *ppos)
 {
@@ -174,42 +168,42 @@ static ssize_t misc_read_file(struct file *file, char __user *userbuf,
 static const struct file_operations sysconf_fops = {
        .read = sysconf_read_file,
        .write = write_file_dummy,
-       .open = open_file_generic,
+       .open = simple_open,
        .llseek = default_llseek,
 };
 
 static const struct file_operations clock_fops = {
        .read = clock_read_file,
        .write = write_file_dummy,
-       .open = open_file_generic,
+       .open = simple_open,
        .llseek = default_llseek,
 };
 
 static const struct file_operations display_fops = {
        .read = display_read_file,
        .write = write_file_dummy,
-       .open = open_file_generic,
+       .open = simple_open,
        .llseek = default_llseek,
 };
 
 static const struct file_operations gsctl_fops = {
        .read = gsctl_read_file,
        .write = write_file_dummy,
-       .open = open_file_generic,
+       .open = simple_open,
        .llseek = default_llseek,
 };
 
 static const struct file_operations sdram_fops = {
        .read = sdram_read_file,
        .write = write_file_dummy,
-       .open = open_file_generic,
+       .open = simple_open,
        .llseek = default_llseek,
 };
 
 static const struct file_operations misc_fops = {
        .read = misc_read_file,
        .write = write_file_dummy,
-       .open = open_file_generic,
+       .open = simple_open,
        .llseek = default_llseek,
 };
 
index abb6bbf226d5232caa2b21495a0d65a5b204cbcf..9085e9525341e8c811e6e1432d4138d5194776a3 100644 (file)
@@ -187,7 +187,7 @@ static int load_waveform(u8 *mem, size_t size, int m, int t,
                epd_frame_table[par->dt].wfm_size = user_wfm_size;
 
        if (size != epd_frame_table[par->dt].wfm_size) {
-               dev_err(dev, "Error: unexpected size %Zd != %d\n", size,
+               dev_err(dev, "Error: unexpected size %zd != %d\n", size,
                                        epd_frame_table[par->dt].wfm_size);
                return -EINVAL;
        }
index 906c6e75c260180705d5bbdd1f6228cd0e0d315d..9be884b0c778b562a699d4ae620d136fa3ae4e2c 100644 (file)
@@ -668,14 +668,14 @@ static int __init offb_init(void)
                offb_init_nodriver(of_chosen, 1);
        }
 
-       for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) {
+       for_each_node_by_type(dp, "display") {
                if (of_get_property(dp, "linux,opened", NULL) &&
                    of_get_property(dp, "linux,boot-display", NULL)) {
                        boot_disp = dp;
                        offb_init_nodriver(dp, 0);
                }
        }
-       for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) {
+       for_each_node_by_type(dp, "display") {
                if (of_get_property(dp, "linux,opened", NULL) &&
                    dp != boot_disp)
                        offb_init_nodriver(dp, 0);
index f912a207b3941ef163483311ba777e3fecb3618a..a4ee947006c77c5178161f52df2938db8a45c4cd 100644 (file)
@@ -136,11 +136,6 @@ static void ams_delta_panel_disable(struct lcd_panel *panel)
        gpio_set_value(AMS_DELTA_GPIO_PIN_LCD_NDISP, 0);
 }
 
-static unsigned long ams_delta_panel_get_caps(struct lcd_panel *panel)
-{
-       return 0;
-}
-
 static struct lcd_panel ams_delta_panel = {
        .name           = "ams-delta",
        .config         = 0,
@@ -163,7 +158,6 @@ static struct lcd_panel ams_delta_panel = {
        .cleanup        = ams_delta_panel_cleanup,
        .enable         = ams_delta_panel_enable,
        .disable        = ams_delta_panel_disable,
-       .get_caps       = ams_delta_panel_get_caps,
 };
 
 
@@ -195,27 +189,8 @@ static int ams_delta_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int ams_delta_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int ams_delta_panel_suspend(struct platform_device *pdev,
-               pm_message_t mesg)
-{
-       return 0;
-}
-
-static int ams_delta_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver ams_delta_panel_driver = {
        .probe          = ams_delta_panel_probe,
-       .remove         = ams_delta_panel_remove,
-       .suspend        = ams_delta_panel_suspend,
-       .resume         = ams_delta_panel_resume,
        .driver         = {
                .name   = "lcd_ams_delta",
        },
index 21512b027ff7abeea4847a4e2509776929960f20..9d2da146813ef0e2c88e8d4121657fa0400a76dd 100644 (file)
 
 #define MODULE_NAME    "omapfb-lcd_h3"
 
-static int h3_panel_init(struct lcd_panel *panel, struct omapfb_device *fbdev)
-{
-       return 0;
-}
-
-static void h3_panel_cleanup(struct lcd_panel *panel)
-{
-}
-
 static int h3_panel_enable(struct lcd_panel *panel)
 {
        int r = 0;
@@ -63,12 +54,7 @@ static void h3_panel_disable(struct lcd_panel *panel)
                pr_err(MODULE_NAME ": Unable to turn off LCD panel\n");
 }
 
-static unsigned long h3_panel_get_caps(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-struct lcd_panel h3_panel = {
+static struct lcd_panel h3_panel = {
        .name           = "h3",
        .config         = OMAP_LCDC_PANEL_TFT,
 
@@ -85,11 +71,8 @@ struct lcd_panel h3_panel = {
        .vbp            = 0,
        .pcd            = 0,
 
-       .init           = h3_panel_init,
-       .cleanup        = h3_panel_cleanup,
        .enable         = h3_panel_enable,
        .disable        = h3_panel_disable,
-       .get_caps       = h3_panel_get_caps,
 };
 
 static int h3_panel_probe(struct platform_device *pdev)
@@ -98,26 +81,8 @@ static int h3_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int h3_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int h3_panel_suspend(struct platform_device *pdev, pm_message_t mesg)
-{
-       return 0;
-}
-
-static int h3_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver h3_panel_driver = {
        .probe          = h3_panel_probe,
-       .remove         = h3_panel_remove,
-       .suspend        = h3_panel_suspend,
-       .resume         = h3_panel_resume,
        .driver         = {
                .name   = "lcd_h3",
        },
index 8b4dfa0582584aa3f507388ae426eae80d070ae5..9d692f5b80253c039bc878599f2c082daca5ff54 100644 (file)
 
 #include "omapfb.h"
 
-static int htcherald_panel_init(struct lcd_panel *panel,
-                                       struct omapfb_device *fbdev)
-{
-       return 0;
-}
-
-static void htcherald_panel_cleanup(struct lcd_panel *panel)
-{
-}
-
-static int htcherald_panel_enable(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-static void htcherald_panel_disable(struct lcd_panel *panel)
-{
-}
-
-static unsigned long htcherald_panel_get_caps(struct lcd_panel *panel)
-{
-       return 0;
-}
-
 /* Found on WIZ200 (miknix) and some HERA110 models (darkstar62) */
-struct lcd_panel htcherald_panel_1 = {
+static struct lcd_panel htcherald_panel_1 = {
        .name           = "lcd_herald",
        .config         = OMAP_LCDC_PANEL_TFT |
                          OMAP_LCDC_INV_HSYNC |
@@ -74,12 +50,6 @@ struct lcd_panel htcherald_panel_1 = {
        .vsw            = 3,
        .vfp            = 2,
        .vbp            = 2,
-
-       .init           = htcherald_panel_init,
-       .cleanup        = htcherald_panel_cleanup,
-       .enable         = htcherald_panel_enable,
-       .disable        = htcherald_panel_disable,
-       .get_caps       = htcherald_panel_get_caps,
 };
 
 static int htcherald_panel_probe(struct platform_device *pdev)
@@ -88,27 +58,8 @@ static int htcherald_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int htcherald_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int htcherald_panel_suspend(struct platform_device *pdev,
-                                               pm_message_t mesg)
-{
-       return 0;
-}
-
-static int htcherald_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver htcherald_panel_driver = {
        .probe          = htcherald_panel_probe,
-       .remove         = htcherald_panel_remove,
-       .suspend        = htcherald_panel_suspend,
-       .resume         = htcherald_panel_resume,
        .driver         = {
                .name   = "lcd_htcherald",
        },
index 49907fab36ac4ab017b5934af09037e51e4687b3..b284050f54717b132fdb1bd1f67eb44a19f7b0d3 100644 (file)
 
 #include "omapfb.h"
 
-static int innovator1510_panel_init(struct lcd_panel *panel,
-                                   struct omapfb_device *fbdev)
-{
-       return 0;
-}
-
-static void innovator1510_panel_cleanup(struct lcd_panel *panel)
-{
-}
-
 static int innovator1510_panel_enable(struct lcd_panel *panel)
 {
        __raw_writeb(0x7, OMAP1510_FPGA_LCD_PANEL_CONTROL);
@@ -48,12 +38,7 @@ static void innovator1510_panel_disable(struct lcd_panel *panel)
        __raw_writeb(0x0, OMAP1510_FPGA_LCD_PANEL_CONTROL);
 }
 
-static unsigned long innovator1510_panel_get_caps(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-struct lcd_panel innovator1510_panel = {
+static struct lcd_panel innovator1510_panel = {
        .name           = "inn1510",
        .config         = OMAP_LCDC_PANEL_TFT,
 
@@ -70,11 +55,8 @@ struct lcd_panel innovator1510_panel = {
        .vbp            = 0,
        .pcd            = 12,
 
-       .init           = innovator1510_panel_init,
-       .cleanup        = innovator1510_panel_cleanup,
        .enable         = innovator1510_panel_enable,
        .disable        = innovator1510_panel_disable,
-       .get_caps       = innovator1510_panel_get_caps,
 };
 
 static int innovator1510_panel_probe(struct platform_device *pdev)
@@ -83,27 +65,8 @@ static int innovator1510_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int innovator1510_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int innovator1510_panel_suspend(struct platform_device *pdev,
-                                      pm_message_t mesg)
-{
-       return 0;
-}
-
-static int innovator1510_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver innovator1510_panel_driver = {
        .probe          = innovator1510_panel_probe,
-       .remove         = innovator1510_panel_remove,
-       .suspend        = innovator1510_panel_suspend,
-       .resume         = innovator1510_panel_resume,
        .driver         = {
                .name   = "lcd_inn1510",
        },
index 8b42894eeb7756053b6c3250409c069eac6e2e0f..1841710e796f779453e1fda56fea2a45f78a71df 100644 (file)
@@ -69,12 +69,7 @@ static void innovator1610_panel_disable(struct lcd_panel *panel)
        gpio_set_value(15, 0);
 }
 
-static unsigned long innovator1610_panel_get_caps(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-struct lcd_panel innovator1610_panel = {
+static struct lcd_panel innovator1610_panel = {
        .name           = "inn1610",
        .config         = OMAP_LCDC_PANEL_TFT,
 
@@ -95,7 +90,6 @@ struct lcd_panel innovator1610_panel = {
        .cleanup        = innovator1610_panel_cleanup,
        .enable         = innovator1610_panel_enable,
        .disable        = innovator1610_panel_disable,
-       .get_caps       = innovator1610_panel_get_caps,
 };
 
 static int innovator1610_panel_probe(struct platform_device *pdev)
@@ -104,27 +98,8 @@ static int innovator1610_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int innovator1610_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int innovator1610_panel_suspend(struct platform_device *pdev,
-                                      pm_message_t mesg)
-{
-       return 0;
-}
-
-static int innovator1610_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver innovator1610_panel_driver = {
        .probe          = innovator1610_panel_probe,
-       .remove         = innovator1610_panel_remove,
-       .suspend        = innovator1610_panel_suspend,
-       .resume         = innovator1610_panel_resume,
        .driver         = {
                .name   = "lcd_inn1610",
        },
index b56886c7055e6d56dde4d14e127ac3d0306f12c8..b0be5771fe90ee6704fb0ec2882f2cc133162a62 100644 (file)
 
 #include "omapfb.h"
 
-static int osk_panel_init(struct lcd_panel *panel, struct omapfb_device *fbdev)
-{
-       /* gpio2 was allocated in board init */
-       return 0;
-}
-
-static void osk_panel_cleanup(struct lcd_panel *panel)
-{
-}
-
 static int osk_panel_enable(struct lcd_panel *panel)
 {
        /* configure PWL pin */
@@ -68,12 +58,7 @@ static void osk_panel_disable(struct lcd_panel *panel)
        gpio_set_value(2, 0);
 }
 
-static unsigned long osk_panel_get_caps(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-struct lcd_panel osk_panel = {
+static struct lcd_panel osk_panel = {
        .name           = "osk",
        .config         = OMAP_LCDC_PANEL_TFT,
 
@@ -90,11 +75,8 @@ struct lcd_panel osk_panel = {
        .vbp            = 0,
        .pcd            = 12,
 
-       .init           = osk_panel_init,
-       .cleanup        = osk_panel_cleanup,
        .enable         = osk_panel_enable,
        .disable        = osk_panel_disable,
-       .get_caps       = osk_panel_get_caps,
 };
 
 static int osk_panel_probe(struct platform_device *pdev)
@@ -103,26 +85,8 @@ static int osk_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int osk_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int osk_panel_suspend(struct platform_device *pdev, pm_message_t mesg)
-{
-       return 0;
-}
-
-static int osk_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver osk_panel_driver = {
        .probe          = osk_panel_probe,
-       .remove         = osk_panel_remove,
-       .suspend        = osk_panel_suspend,
-       .resume         = osk_panel_resume,
        .driver         = {
                .name   = "lcd_osk",
        },
index 2713fed286f75803b5f76582661a115b1c8eab9d..cef96386cf8021e0b16118cb017175e00f3a07ed 100644 (file)
 
 #include "omapfb.h"
 
-static int palmte_panel_init(struct lcd_panel *panel,
-                               struct omapfb_device *fbdev)
-{
-       return 0;
-}
-
-static void palmte_panel_cleanup(struct lcd_panel *panel)
-{
-}
-
-static int palmte_panel_enable(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-static void palmte_panel_disable(struct lcd_panel *panel)
-{
-}
-
-static unsigned long palmte_panel_get_caps(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-struct lcd_panel palmte_panel = {
+static struct lcd_panel palmte_panel = {
        .name           = "palmte",
        .config         = OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC |
                          OMAP_LCDC_INV_HSYNC | OMAP_LCDC_HSVS_RISING_EDGE |
@@ -67,12 +43,6 @@ struct lcd_panel palmte_panel = {
        .vfp            = 8,
        .vbp            = 7,
        .pcd            = 0,
-
-       .init           = palmte_panel_init,
-       .cleanup        = palmte_panel_cleanup,
-       .enable         = palmte_panel_enable,
-       .disable        = palmte_panel_disable,
-       .get_caps       = palmte_panel_get_caps,
 };
 
 static int palmte_panel_probe(struct platform_device *pdev)
@@ -81,26 +51,8 @@ static int palmte_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int palmte_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int palmte_panel_suspend(struct platform_device *pdev, pm_message_t mesg)
-{
-       return 0;
-}
-
-static int palmte_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver palmte_panel_driver = {
        .probe          = palmte_panel_probe,
-       .remove         = palmte_panel_remove,
-       .suspend        = palmte_panel_suspend,
-       .resume         = palmte_panel_resume,
        .driver         = {
                .name   = "lcd_palmte",
        },
index 1a936d5c7b6f7a7a231592658a44dfe8b5fd6788..627f13dae5ad69011e5f5c7e0e783459d6b939ea 100644 (file)
@@ -32,31 +32,12 @@ GPIO13 - screen blanking
 
 #include "omapfb.h"
 
-static int palmtt_panel_init(struct lcd_panel *panel,
-       struct omapfb_device *fbdev)
-{
-       return 0;
-}
-
-static void palmtt_panel_cleanup(struct lcd_panel *panel)
-{
-}
-
-static int palmtt_panel_enable(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-static void palmtt_panel_disable(struct lcd_panel *panel)
-{
-}
-
 static unsigned long palmtt_panel_get_caps(struct lcd_panel *panel)
 {
        return OMAPFB_CAPS_SET_BACKLIGHT;
 }
 
-struct lcd_panel palmtt_panel = {
+static struct lcd_panel palmtt_panel = {
        .name           = "palmtt",
        .config         = OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC |
                        OMAP_LCDC_INV_HSYNC | OMAP_LCDC_HSVS_RISING_EDGE |
@@ -74,10 +55,6 @@ struct lcd_panel palmtt_panel = {
        .vbp            = 7,
        .pcd            = 0,
 
-       .init           = palmtt_panel_init,
-       .cleanup        = palmtt_panel_cleanup,
-       .enable         = palmtt_panel_enable,
-       .disable        = palmtt_panel_disable,
        .get_caps       = palmtt_panel_get_caps,
 };
 
@@ -87,26 +64,8 @@ static int palmtt_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int palmtt_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int palmtt_panel_suspend(struct platform_device *pdev, pm_message_t mesg)
-{
-       return 0;
-}
-
-static int palmtt_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver palmtt_panel_driver = {
        .probe          = palmtt_panel_probe,
-       .remove         = palmtt_panel_remove,
-       .suspend        = palmtt_panel_suspend,
-       .resume         = palmtt_panel_resume,
        .driver         = {
                .name   = "lcd_palmtt",
        },
index a20db4f7ea99b3ff17d10d436456b474f53c282f..c46d4db1f839ff83b23a5ae6278e60bad418f681 100644 (file)
 
 #include "omapfb.h"
 
-static int palmz71_panel_init(struct lcd_panel *panel,
-                             struct omapfb_device *fbdev)
-{
-       return 0;
-}
-
-static void palmz71_panel_cleanup(struct lcd_panel *panel)
-{
-
-}
-
-static int palmz71_panel_enable(struct lcd_panel *panel)
-{
-       return 0;
-}
-
-static void palmz71_panel_disable(struct lcd_panel *panel)
-{
-}
-
 static unsigned long palmz71_panel_get_caps(struct lcd_panel *panel)
 {
        return OMAPFB_CAPS_SET_BACKLIGHT;
 }
 
-struct lcd_panel palmz71_panel = {
+static struct lcd_panel palmz71_panel = {
        .name           = "palmz71",
        .config         = OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC |
                          OMAP_LCDC_INV_HSYNC | OMAP_LCDC_HSVS_RISING_EDGE |
@@ -69,10 +49,6 @@ struct lcd_panel palmz71_panel = {
        .vbp            = 7,
        .pcd            = 0,
 
-       .init           = palmz71_panel_init,
-       .cleanup        = palmz71_panel_cleanup,
-       .enable         = palmz71_panel_enable,
-       .disable        = palmz71_panel_disable,
        .get_caps       = palmz71_panel_get_caps,
 };
 
@@ -82,27 +58,8 @@ static int palmz71_panel_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int palmz71_panel_remove(struct platform_device *pdev)
-{
-       return 0;
-}
-
-static int palmz71_panel_suspend(struct platform_device *pdev,
-                                pm_message_t mesg)
-{
-       return 0;
-}
-
-static int palmz71_panel_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
 static struct platform_driver palmz71_panel_driver = {
        .probe          = palmz71_panel_probe,
-       .remove         = palmz71_panel_remove,
-       .suspend        = palmz71_panel_suspend,
-       .resume         = palmz71_panel_resume,
        .driver         = {
                .name   = "lcd_palmz71",
        },
index 6429f33167f5ef99e00f09e1491d235c00e0fa53..1abba07b84b3efd014b28bfd0ef61f3cad7cbeda 100644 (file)
@@ -337,7 +337,8 @@ static int omapfb_blank(int blank, struct fb_info *fbi)
                if (fbdev->state == OMAPFB_SUSPENDED) {
                        if (fbdev->ctrl->resume)
                                fbdev->ctrl->resume();
-                       fbdev->panel->enable(fbdev->panel);
+                       if (fbdev->panel->enable)
+                               fbdev->panel->enable(fbdev->panel);
                        fbdev->state = OMAPFB_ACTIVE;
                        if (fbdev->ctrl->get_update_mode() ==
                                        OMAPFB_MANUAL_UPDATE)
@@ -346,7 +347,8 @@ static int omapfb_blank(int blank, struct fb_info *fbi)
                break;
        case FB_BLANK_POWERDOWN:
                if (fbdev->state == OMAPFB_ACTIVE) {
-                       fbdev->panel->disable(fbdev->panel);
+                       if (fbdev->panel->disable)
+                               fbdev->panel->disable(fbdev->panel);
                        if (fbdev->ctrl->suspend)
                                fbdev->ctrl->suspend();
                        fbdev->state = OMAPFB_SUSPENDED;
@@ -1030,7 +1032,8 @@ static void omapfb_get_caps(struct omapfb_device *fbdev, int plane,
 {
        memset(caps, 0, sizeof(*caps));
        fbdev->ctrl->get_caps(plane, caps);
-       caps->ctrl |= fbdev->panel->get_caps(fbdev->panel);
+       if (fbdev->panel->get_caps)
+               caps->ctrl |= fbdev->panel->get_caps(fbdev->panel);
 }
 
 /* For lcd testing */
@@ -1549,7 +1552,8 @@ static void omapfb_free_resources(struct omapfb_device *fbdev, int state)
        case 7:
                omapfb_unregister_sysfs(fbdev);
        case 6:
-               fbdev->panel->disable(fbdev->panel);
+               if (fbdev->panel->disable)
+                       fbdev->panel->disable(fbdev->panel);
        case 5:
                omapfb_set_update_mode(fbdev, OMAPFB_UPDATE_DISABLED);
        case 4:
@@ -1557,7 +1561,8 @@ static void omapfb_free_resources(struct omapfb_device *fbdev, int state)
        case 3:
                ctrl_cleanup(fbdev);
        case 2:
-               fbdev->panel->cleanup(fbdev->panel);
+               if (fbdev->panel->cleanup)
+                       fbdev->panel->cleanup(fbdev->panel);
        case 1:
                dev_set_drvdata(fbdev->dev, NULL);
                kfree(fbdev);
@@ -1680,9 +1685,11 @@ static int omapfb_do_probe(struct platform_device *pdev,
                goto cleanup;
        }
 
-       r = fbdev->panel->init(fbdev->panel, fbdev);
-       if (r)
-               goto cleanup;
+       if (fbdev->panel->init) {
+               r = fbdev->panel->init(fbdev->panel, fbdev);
+               if (r)
+                       goto cleanup;
+       }
 
        pr_info("omapfb: configured for panel %s\n", fbdev->panel->name);
 
@@ -1725,9 +1732,11 @@ static int omapfb_do_probe(struct platform_device *pdev,
                                   OMAPFB_MANUAL_UPDATE : OMAPFB_AUTO_UPDATE);
        init_state++;
 
-       r = fbdev->panel->enable(fbdev->panel);
-       if (r)
-               goto cleanup;
+       if (fbdev->panel->enable) {
+               r = fbdev->panel->enable(fbdev->panel);
+               if (r)
+                       goto cleanup;
+       }
        init_state++;
 
        r = omapfb_register_sysfs(fbdev);
index 5872bc4af3cee8bfeec57b45d243fd64a25aa677..df02fb4b7fd1c2fe6363ef5173ed516479811da1 100644 (file)
@@ -129,7 +129,7 @@ static struct fb_ops pmagbafb_ops = {
 /*
  * Turn the hardware cursor off.
  */
-static void __init pmagbafb_erase_cursor(struct fb_info *info)
+static void pmagbafb_erase_cursor(struct fb_info *info)
 {
        struct pmagbafb_par *par = info->par;
 
index 0822b6f8dddcf57fda238b0d17bce74a9f97a9bf..a7a179a0bb33f2d85beb0301945375dad45e8f79 100644 (file)
@@ -133,7 +133,7 @@ static struct fb_ops pmagbbfb_ops = {
 /*
  * Turn the hardware cursor off.
  */
-static void __init pmagbbfb_erase_cursor(struct fb_info *info)
+static void pmagbbfb_erase_cursor(struct fb_info *info)
 {
        struct pmagbbfb_par *par = info->par;
 
index 82c0a8caa9b851126402b27a88a5d41ea9fc7386..885ee3a563aa3cb06da934c822232369a05fb9f9 100644 (file)
@@ -439,9 +439,9 @@ static unsigned long lcdc_sys_read_data(void *handle)
 }
 
 static struct sh_mobile_lcdc_sys_bus_ops sh_mobile_lcdc_sys_bus_ops = {
-       lcdc_sys_write_index,
-       lcdc_sys_write_data,
-       lcdc_sys_read_data,
+       .write_index    = lcdc_sys_write_index,
+       .write_data     = lcdc_sys_write_data,
+       .read_data      = lcdc_sys_read_data,
 };
 
 static int sh_mobile_lcdc_sginit(struct fb_info *info,
@@ -2782,8 +2782,10 @@ static int sh_mobile_lcdc_probe(struct platform_device *pdev)
                priv->forced_fourcc = pdata->ch[0].fourcc;
 
        priv->base = ioremap_nocache(res->start, resource_size(res));
-       if (!priv->base)
+       if (!priv->base) {
+               error = -ENOMEM;
                goto err1;
+       }
 
        error = sh_mobile_lcdc_setup_clocks(priv, pdata->clock_source);
        if (error) {
index 61f799a515dc797e36aaa3f203f76153f59d802e..a3c44ecf4523e8edb6d64c45a89f574924453d7f 100644 (file)
@@ -180,10 +180,12 @@ static int simplefb_parse_pd(struct platform_device *pdev,
 struct simplefb_par {
        u32 palette[PSEUDO_PALETTE_SIZE];
 #if defined CONFIG_OF && defined CONFIG_COMMON_CLK
+       bool clks_enabled;
        unsigned int clk_count;
        struct clk **clks;
 #endif
 #if defined CONFIG_OF && defined CONFIG_REGULATOR
+       bool regulators_enabled;
        u32 regulator_count;
        struct regulator **regulators;
 #endif
@@ -208,12 +210,12 @@ struct simplefb_par {
  * the fb probe will not help us much either. So just complain and carry on,
  * and hope that the user actually gets a working fb at the end of things.
  */
-static int simplefb_clocks_init(struct simplefb_par *par,
-                               struct platform_device *pdev)
+static int simplefb_clocks_get(struct simplefb_par *par,
+                              struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct clk *clock;
-       int i, ret;
+       int i;
 
        if (dev_get_platdata(&pdev->dev) || !np)
                return 0;
@@ -244,6 +246,14 @@ static int simplefb_clocks_init(struct simplefb_par *par,
                par->clks[i] = clock;
        }
 
+       return 0;
+}
+
+static void simplefb_clocks_enable(struct simplefb_par *par,
+                                  struct platform_device *pdev)
+{
+       int i, ret;
+
        for (i = 0; i < par->clk_count; i++) {
                if (par->clks[i]) {
                        ret = clk_prepare_enable(par->clks[i]);
@@ -256,8 +266,7 @@ static int simplefb_clocks_init(struct simplefb_par *par,
                        }
                }
        }
-
-       return 0;
+       par->clks_enabled = true;
 }
 
 static void simplefb_clocks_destroy(struct simplefb_par *par)
@@ -269,7 +278,8 @@ static void simplefb_clocks_destroy(struct simplefb_par *par)
 
        for (i = 0; i < par->clk_count; i++) {
                if (par->clks[i]) {
-                       clk_disable_unprepare(par->clks[i]);
+                       if (par->clks_enabled)
+                               clk_disable_unprepare(par->clks[i]);
                        clk_put(par->clks[i]);
                }
        }
@@ -277,8 +287,10 @@ static void simplefb_clocks_destroy(struct simplefb_par *par)
        kfree(par->clks);
 }
 #else
-static int simplefb_clocks_init(struct simplefb_par *par,
+static int simplefb_clocks_get(struct simplefb_par *par,
        struct platform_device *pdev) { return 0; }
+static void simplefb_clocks_enable(struct simplefb_par *par,
+       struct platform_device *pdev) { }
 static void simplefb_clocks_destroy(struct simplefb_par *par) { }
 #endif
 
@@ -305,14 +317,14 @@ static void simplefb_clocks_destroy(struct simplefb_par *par) { }
  * the fb probe will not help us much either. So just complain and carry on,
  * and hope that the user actually gets a working fb at the end of things.
  */
-static int simplefb_regulators_init(struct simplefb_par *par,
-       struct platform_device *pdev)
+static int simplefb_regulators_get(struct simplefb_par *par,
+                                  struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct property *prop;
        struct regulator *regulator;
        const char *p;
-       int count = 0, i = 0, ret;
+       int count = 0, i = 0;
 
        if (dev_get_platdata(&pdev->dev) || !np)
                return 0;
@@ -354,6 +366,14 @@ static int simplefb_regulators_init(struct simplefb_par *par,
        }
        par->regulator_count = i;
 
+       return 0;
+}
+
+static void simplefb_regulators_enable(struct simplefb_par *par,
+                                      struct platform_device *pdev)
+{
+       int i, ret;
+
        /* Enable all the regulators */
        for (i = 0; i < par->regulator_count; i++) {
                ret = regulator_enable(par->regulators[i]);
@@ -365,15 +385,14 @@ static int simplefb_regulators_init(struct simplefb_par *par,
                        par->regulators[i] = NULL;
                }
        }
-
-       return 0;
+       par->regulators_enabled = true;
 }
 
 static void simplefb_regulators_destroy(struct simplefb_par *par)
 {
        int i;
 
-       if (!par->regulators)
+       if (!par->regulators || !par->regulators_enabled)
                return;
 
        for (i = 0; i < par->regulator_count; i++)
@@ -381,8 +400,10 @@ static void simplefb_regulators_destroy(struct simplefb_par *par)
                        regulator_disable(par->regulators[i]);
 }
 #else
-static int simplefb_regulators_init(struct simplefb_par *par,
+static int simplefb_regulators_get(struct simplefb_par *par,
        struct platform_device *pdev) { return 0; }
+static void simplefb_regulators_enable(struct simplefb_par *par,
+       struct platform_device *pdev) { }
 static void simplefb_regulators_destroy(struct simplefb_par *par) { }
 #endif
 
@@ -453,14 +474,17 @@ static int simplefb_probe(struct platform_device *pdev)
        }
        info->pseudo_palette = par->palette;
 
-       ret = simplefb_clocks_init(par, pdev);
+       ret = simplefb_clocks_get(par, pdev);
        if (ret < 0)
                goto error_unmap;
 
-       ret = simplefb_regulators_init(par, pdev);
+       ret = simplefb_regulators_get(par, pdev);
        if (ret < 0)
                goto error_clocks;
 
+       simplefb_clocks_enable(par, pdev);
+       simplefb_regulators_enable(par, pdev);
+
        dev_info(&pdev->dev, "framebuffer at 0x%lx, 0x%x bytes, mapped to 0x%p\n",
                             info->fix.smem_start, info->fix.smem_len,
                             info->screen_base);
index 2925d5ce8d3e561c1d674cab95be1ce757ed8f39..bd017b57c47f8af4ff1558cedaa8589a5f0ce9ff 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/backlight.h>
 #include <linux/delay.h>
 #include <linux/fb.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -16,6 +17,7 @@
 #include <linux/of_gpio.h>
 #include <linux/pwm.h>
 #include <linux/uaccess.h>
+#include <linux/regulator/consumer.h>
 
 #define SSD1307FB_DATA                 0x40
 #define SSD1307FB_COMMAND              0x80
@@ -73,7 +75,8 @@ struct ssd1307fb_par {
        u32 prechargep2;
        struct pwm_device *pwm;
        u32 pwm_period;
-       int reset;
+       struct gpio_desc *reset;
+       struct regulator *vbat_reg;
        u32 seg_remap;
        u32 vcomh;
        u32 width;
@@ -439,6 +442,9 @@ static int ssd1307fb_init(struct ssd1307fb_par *par)
        if (ret < 0)
                return ret;
 
+       /* Clear the screen */
+       ssd1307fb_update_display(par);
+
        /* Turn on the display */
        ret = ssd1307fb_write_cmd(par->client, SSD1307FB_DISPLAY_ON);
        if (ret < 0)
@@ -561,10 +567,20 @@ static int ssd1307fb_probe(struct i2c_client *client,
 
        par->device_info = of_device_get_match_data(&client->dev);
 
-       par->reset = of_get_named_gpio(client->dev.of_node,
-                                        "reset-gpios", 0);
-       if (!gpio_is_valid(par->reset)) {
-               ret = -EINVAL;
+       par->reset = devm_gpiod_get_optional(&client->dev, "reset",
+                                            GPIOD_OUT_LOW);
+       if (IS_ERR(par->reset)) {
+               dev_err(&client->dev, "failed to get reset gpio: %ld\n",
+                       PTR_ERR(par->reset));
+               ret = PTR_ERR(par->reset);
+               goto fb_alloc_error;
+       }
+
+       par->vbat_reg = devm_regulator_get_optional(&client->dev, "vbat");
+       if (IS_ERR(par->vbat_reg)) {
+               dev_err(&client->dev, "failed to get VBAT regulator: %ld\n",
+                       PTR_ERR(par->vbat_reg));
+               ret = PTR_ERR(par->vbat_reg);
                goto fb_alloc_error;
        }
 
@@ -642,27 +658,25 @@ static int ssd1307fb_probe(struct i2c_client *client,
 
        fb_deferred_io_init(info);
 
-       ret = devm_gpio_request_one(&client->dev, par->reset,
-                                   GPIOF_OUT_INIT_HIGH,
-                                   "oled-reset");
+       i2c_set_clientdata(client, info);
+
+       if (par->reset) {
+               /* Reset the screen */
+               gpiod_set_value(par->reset, 0);
+               udelay(4);
+               gpiod_set_value(par->reset, 1);
+               udelay(4);
+       }
+
+       ret = regulator_enable(par->vbat_reg);
        if (ret) {
-               dev_err(&client->dev,
-                       "failed to request gpio %d: %d\n",
-                       par->reset, ret);
+               dev_err(&client->dev, "failed to enable VBAT: %d\n", ret);
                goto reset_oled_error;
        }
 
-       i2c_set_clientdata(client, info);
-
-       /* Reset the screen */
-       gpio_set_value(par->reset, 0);
-       udelay(4);
-       gpio_set_value(par->reset, 1);
-       udelay(4);
-
        ret = ssd1307fb_init(par);
        if (ret)
-               goto reset_oled_error;
+               goto regulator_enable_error;
 
        ret = register_framebuffer(info);
        if (ret) {
@@ -695,6 +709,8 @@ panel_init_error:
                pwm_disable(par->pwm);
                pwm_put(par->pwm);
        };
+regulator_enable_error:
+       regulator_disable(par->vbat_reg);
 reset_oled_error:
        fb_deferred_io_cleanup(info);
 fb_alloc_error:
index accfef71e984763ce83a380337199d0915b5c649..6ded5c1989985c5889a69383e767d91670799f09 100644 (file)
@@ -1294,6 +1294,10 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
        strcpy(fix->id, "stifb");
        info->fbops = &stifb_ops;
        info->screen_base = ioremap_nocache(REGION_BASE(fb,1), fix->smem_len);
+       if (!info->screen_base) {
+               printk(KERN_ERR "stifb: failed to map memory\n");
+               goto out_err0;
+       }
        info->screen_size = fix->smem_len;
        info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA;
        info->pseudo_palette = &fb->pseudo_palette;
index e925619da39bf092e71f9fec3319bb516352cc30..253ffe9baab2b7a5515b8237d81379f41f0ce394 100644 (file)
@@ -182,7 +182,7 @@ static ssize_t contrast_store(struct device *dev,
        return count;
 }
 
-static DEVICE_ATTR(contrast, 0644, contrast_show, contrast_store);
+static DEVICE_ATTR_RW(contrast);
 
 static inline u_int chan_to_field(u_int chan, struct fb_bitfield *bf)
 {
index 181793f078524ae8c06751d4b03677a132b4a7c3..9d2738e9217f104ec0702245a2d9b48500924cf6 100644 (file)
@@ -615,8 +615,12 @@ static void virtballoon_remove(struct virtio_device *vdev)
        cancel_work_sync(&vb->update_balloon_stats_work);
 
        remove_common(vb);
+#ifdef CONFIG_BALLOON_COMPACTION
        if (vb->vb_dev_info.inode)
                iput(vb->vb_dev_info.inode);
+
+       kern_unmount(balloon_mnt);
+#endif
        kfree(vb);
 }
 
index acb00b53a5207b46252e8f06c1860702f1ccfbe4..c831b7967bf95f46139b55792e6d249a5c5812b2 100644 (file)
@@ -71,9 +71,17 @@ config SOFT_WATCHDOG
          To compile this driver as a module, choose M here: the
          module will be called softdog.
 
+config SOFT_WATCHDOG_PRETIMEOUT
+       bool "Software watchdog pretimeout governor support"
+       depends on SOFT_WATCHDOG && WATCHDOG_PRETIMEOUT_GOV
+       help
+         Enable this if you want to use pretimeout governors with the software
+         watchdog. Be aware that governors might affect the watchdog because it
+         is purely software, e.g. the panic governor will stall it!
+
 config DA9052_WATCHDOG
        tristate "Dialog DA9052 Watchdog"
-       depends on PMIC_DA9052
+       depends on PMIC_DA9052 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Support for the watchdog in the DA9052 PMIC. Watchdog trigger
@@ -85,7 +93,7 @@ config DA9052_WATCHDOG
 
 config DA9055_WATCHDOG
        tristate "Dialog Semiconductor DA9055 Watchdog"
-       depends on MFD_DA9055
+       depends on MFD_DA9055 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          If you say yes here you get support for watchdog on the Dialog
@@ -96,7 +104,7 @@ config DA9055_WATCHDOG
 
 config DA9063_WATCHDOG
        tristate "Dialog DA9063 Watchdog"
-       depends on MFD_DA9063
+       depends on MFD_DA9063 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Support for the watchdog in the DA9063 PMIC.
@@ -105,7 +113,7 @@ config DA9063_WATCHDOG
 
 config DA9062_WATCHDOG
        tristate "Dialog DA9062/61 Watchdog"
-       depends on MFD_DA9062
+       depends on MFD_DA9062 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Support for the watchdog in the DA9062 and DA9061 PMICs.
@@ -133,7 +141,7 @@ config GPIO_WATCHDOG_ARCH_INITCALL
 
 config MENF21BMC_WATCHDOG
        tristate "MEN 14F021P00 BMC Watchdog"
-       depends on MFD_MENF21BMC
+       depends on MFD_MENF21BMC || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the MEN 14F021P00 BMC Watchdog.
@@ -168,7 +176,7 @@ config WDAT_WDT
 
 config WM831X_WATCHDOG
        tristate "WM831x watchdog"
-       depends on MFD_WM831X
+       depends on MFD_WM831X || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Support for the watchdog in the WM831x AudioPlus PMICs.  When
@@ -209,7 +217,7 @@ config ZIIRAVE_WATCHDOG
 
 config ARM_SP805_WATCHDOG
        tristate "ARM SP805 Watchdog"
-       depends on (ARM || ARM64) && ARM_AMBA
+       depends on (ARM || ARM64) && (ARM_AMBA || COMPILE_TEST)
        select WATCHDOG_CORE
        help
          ARM Primecell SP805 Watchdog timer. This will reboot your system when
@@ -237,7 +245,7 @@ config ARM_SBSA_WATCHDOG
 
 config ASM9260_WATCHDOG
        tristate "Alphascale ASM9260 watchdog"
-       depends on MACH_ASM9260
+       depends on MACH_ASM9260 || COMPILE_TEST
        depends on OF
        select WATCHDOG_CORE
        select RESET_CONTROLLER
@@ -247,14 +255,14 @@ config ASM9260_WATCHDOG
 
 config AT91RM9200_WATCHDOG
        tristate "AT91RM9200 watchdog"
-       depends on SOC_AT91RM9200 && MFD_SYSCON
+       depends on (SOC_AT91RM9200 && MFD_SYSCON) || COMPILE_TEST
        help
          Watchdog timer embedded into AT91RM9200 chips. This will reboot your
          system when the timeout is reached.
 
 config AT91SAM9X_WATCHDOG
        tristate "AT91SAM9X / AT91CAP9 watchdog"
-       depends on ARCH_AT91
+       depends on ARCH_AT91 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will
@@ -262,7 +270,7 @@ config AT91SAM9X_WATCHDOG
 
 config SAMA5D4_WATCHDOG
        tristate "Atmel SAMA5D4 Watchdog Timer"
-       depends on ARCH_AT91
+       depends on ARCH_AT91 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Atmel SAMA5D4 watchdog timer is embedded into SAMA5D4 chips.
@@ -293,7 +301,7 @@ config 21285_WATCHDOG
 
 config 977_WATCHDOG
        tristate "NetWinder WB83C977 watchdog"
-       depends on FOOTBRIDGE && ARCH_NETWINDER
+       depends on (FOOTBRIDGE && ARCH_NETWINDER) || (ARM && COMPILE_TEST)
        help
          Say Y here to include support for the WB977 watchdog included in
          NetWinder machines. Alternatively say M to compile the driver as
@@ -301,6 +309,17 @@ config 977_WATCHDOG
 
          Not sure? It's safe to say N.
 
+config GEMINI_WATCHDOG
+       tristate "Gemini watchdog"
+       depends on ARCH_GEMINI
+       select WATCHDOG_CORE
+       help
+         Say Y here if to include support for the watchdog timer
+         embedded in the Cortina Systems Gemini family of devices.
+
+         To compile this driver as a module, choose M here: the
+         module will be called gemini_wdt.
+
 config IXP4XX_WATCHDOG
        tristate "IXP4xx Watchdog"
        depends on ARCH_IXP4XX
@@ -333,9 +352,9 @@ config HAVE_S3C2410_WATCHDOG
 
 config S3C2410_WATCHDOG
        tristate "S3C2410 Watchdog"
-       depends on HAVE_S3C2410_WATCHDOG
+       depends on HAVE_S3C2410_WATCHDOG || COMPILE_TEST
        select WATCHDOG_CORE
-       select MFD_SYSCON if ARCH_EXYNOS5
+       select MFD_SYSCON if ARCH_EXYNOS
        help
          Watchdog timer block in the Samsung SoCs. This will reboot
          the system when the timer expires with the watchdog enabled.
@@ -372,7 +391,7 @@ config DW_WATCHDOG
 
 config EP93XX_WATCHDOG
        tristate "EP93xx Watchdog"
-       depends on ARCH_EP93XX
+       depends on ARCH_EP93XX || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here if to include support for the watchdog timer
@@ -383,7 +402,7 @@ config EP93XX_WATCHDOG
 
 config OMAP_WATCHDOG
        tristate "OMAP Watchdog"
-       depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS
+       depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Support for TI OMAP1610/OMAP1710/OMAP2420/OMAP3430/OMAP4430 watchdog.  Say 'Y'
@@ -419,7 +438,7 @@ config IOP_WATCHDOG
 
 config DAVINCI_WATCHDOG
        tristate "DaVinci watchdog"
-       depends on ARCH_DAVINCI || ARCH_KEYSTONE
+       depends on ARCH_DAVINCI || ARCH_KEYSTONE || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here if to include support for the watchdog timer
@@ -432,7 +451,7 @@ config DAVINCI_WATCHDOG
 
 config ORION_WATCHDOG
        tristate "Orion watchdog"
-       depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU
+       depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || COMPILE_TEST
        depends on ARM
        select WATCHDOG_CORE
        help
@@ -443,7 +462,7 @@ config ORION_WATCHDOG
 
 config RN5T618_WATCHDOG
        tristate "Ricoh RN5T618 watchdog"
-       depends on MFD_RN5T618
+       depends on MFD_RN5T618 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          If you say yes here you get support for watchdog on the Ricoh
@@ -454,7 +473,7 @@ config RN5T618_WATCHDOG
 
 config SUNXI_WATCHDOG
        tristate "Allwinner SoCs watchdog support"
-       depends on ARCH_SUNXI
+       depends on ARCH_SUNXI || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the watchdog timer
@@ -464,7 +483,7 @@ config SUNXI_WATCHDOG
 
 config COH901327_WATCHDOG
        bool "ST-Ericsson COH 901 327 watchdog"
-       depends on ARCH_U300
+       depends on ARCH_U300 || (ARM && COMPILE_TEST)
        default y if MACH_U300
        select WATCHDOG_CORE
        help
@@ -483,7 +502,7 @@ config TWL4030_WATCHDOG
 
 config STMP3XXX_RTC_WATCHDOG
        tristate "Freescale STMP3XXX & i.MX23/28 watchdog"
-       depends on RTC_DRV_STMP
+       depends on RTC_DRV_STMP || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the watchdog timer inside
@@ -493,7 +512,7 @@ config STMP3XXX_RTC_WATCHDOG
 
 config NUC900_WATCHDOG
        tristate "Nuvoton NUC900 watchdog"
-       depends on ARCH_W90X900
+       depends on ARCH_W90X900 || COMPILE_TEST
        help
          Say Y here if to include support for the watchdog timer
          for the Nuvoton NUC900 series SoCs.
@@ -513,7 +532,7 @@ config TS4800_WATCHDOG
 
 config TS72XX_WATCHDOG
        tristate "TS-72XX SBC Watchdog"
-       depends on MACH_TS72XX
+       depends on MACH_TS72XX || COMPILE_TEST
        help
          Technologic Systems TS-7200, TS-7250 and TS-7260 boards have
          watchdog timer implemented in a external CPLD chip. Say Y here
@@ -531,7 +550,7 @@ config MAX63XX_WATCHDOG
 
 config MAX77620_WATCHDOG
        tristate "Maxim Max77620 Watchdog Timer"
-       depends on MFD_MAX77620
+       depends on MFD_MAX77620 || COMPILE_TEST
        help
         This is the driver for the Max77620 watchdog timer.
         Say 'Y' here to enable the watchdog timer support for
@@ -540,7 +559,7 @@ config MAX77620_WATCHDOG
 
 config IMX2_WDT
        tristate "IMX2+ Watchdog"
-       depends on ARCH_MXC || ARCH_LAYERSCAPE
+       depends on ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST
        select REGMAP_MMIO
        select WATCHDOG_CORE
        help
@@ -554,7 +573,7 @@ config IMX2_WDT
 
 config UX500_WATCHDOG
        tristate "ST-Ericsson Ux500 watchdog"
-       depends on MFD_DB8500_PRCMU
+       depends on MFD_DB8500_PRCMU || (ARM && COMPILE_TEST)
        select WATCHDOG_CORE
        default y
        help
@@ -566,7 +585,7 @@ config UX500_WATCHDOG
 
 config RETU_WATCHDOG
        tristate "Retu watchdog"
-       depends on MFD_RETU
+       depends on MFD_RETU || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Retu watchdog driver for Nokia Internet Tablets (770, N800,
@@ -578,7 +597,7 @@ config RETU_WATCHDOG
 
 config MOXART_WDT
        tristate "MOXART watchdog"
-       depends on ARCH_MOXART
+       depends on ARCH_MOXART || COMPILE_TEST
        help
          Say Y here to include Watchdog timer support for the watchdog
          existing on the MOXA ART SoC series platforms.
@@ -588,7 +607,7 @@ config MOXART_WDT
 
 config SIRFSOC_WATCHDOG
        tristate "SiRFSOC watchdog"
-       depends on ARCH_SIRF
+       depends on ARCH_SIRF || COMPILE_TEST
        select WATCHDOG_CORE
        default y
        help
@@ -597,7 +616,7 @@ config SIRFSOC_WATCHDOG
 
 config ST_LPC_WATCHDOG
        tristate "STMicroelectronics LPC Watchdog"
-       depends on ARCH_STI
+       depends on ARCH_STI || COMPILE_TEST
        depends on OF
        select WATCHDOG_CORE
        help
@@ -621,7 +640,7 @@ config TEGRA_WATCHDOG
 config QCOM_WDT
        tristate "QCOM watchdog"
        depends on HAS_IOMEM
-       depends on ARCH_QCOM
+       depends on ARCH_QCOM || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include Watchdog timer support for the watchdog found
@@ -633,7 +652,7 @@ config QCOM_WDT
 
 config MESON_GXBB_WATCHDOG
        tristate "Amlogic Meson GXBB SoCs watchdog support"
-       depends on ARCH_MESON
+       depends on ARCH_MESON || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the watchdog timer
@@ -643,7 +662,7 @@ config MESON_GXBB_WATCHDOG
 
 config MESON_WATCHDOG
        tristate "Amlogic Meson SoCs watchdog support"
-       depends on ARCH_MESON
+       depends on ARCH_MESON || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the watchdog timer
@@ -653,7 +672,7 @@ config MESON_WATCHDOG
 
 config MEDIATEK_WATCHDOG
        tristate "Mediatek SoCs watchdog support"
-       depends on ARCH_MEDIATEK
+       depends on ARCH_MEDIATEK || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the watchdog timer
@@ -663,7 +682,7 @@ config MEDIATEK_WATCHDOG
 
 config DIGICOLOR_WATCHDOG
        tristate "Conexant Digicolor SoCs watchdog support"
-       depends on ARCH_DIGICOLOR
+       depends on ARCH_DIGICOLOR || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the watchdog timer
@@ -685,7 +704,7 @@ config LPC18XX_WATCHDOG
 
 config ATLAS7_WATCHDOG
        tristate "CSRatlas7 watchdog"
-       depends on ARCH_ATLAS7
+       depends on ARCH_ATLAS7 || COMPILE_TEST
        help
          Say Y here to include Watchdog timer support for the watchdog
          existing on the CSRatlas7 series platforms.
@@ -714,11 +733,21 @@ config ASPEED_WATCHDOG
          To compile this driver as a module, choose M here: the
          module will be called aspeed_wdt.
 
+config ZX2967_WATCHDOG
+       tristate "ZTE zx2967 SoCs watchdog support"
+       depends on ARCH_ZX
+       select WATCHDOG_CORE
+       help
+         Say Y here to include support for the watchdog timer
+         in ZTE zx2967 SoCs.
+         To compile this driver as a module, choose M here: the
+         module will be called zx2967_wdt.
+
 # AVR32 Architecture
 
 config AT32AP700X_WDT
        tristate "AT32AP700x watchdog"
-       depends on CPU_AT32AP700X
+       depends on CPU_AT32AP700X || COMPILE_TEST
        help
          Watchdog timer embedded into AT32AP700x devices. This will reboot
          your system when the timeout is reached.
@@ -822,7 +851,7 @@ config SP5100_TCO
 
 config GEODE_WDT
        tristate "AMD Geode CS5535/CS5536 Watchdog"
-       depends on CS5535_MFGPT
+       depends on CS5535_MFGPT || (X86 && COMPILE_TEST)
        help
          This driver enables a watchdog capability built into the
          CS5535/CS5536 companion chips for the AMD Geode GX and LX
@@ -835,7 +864,7 @@ config GEODE_WDT
 
 config SC520_WDT
        tristate "AMD Elan SC520 processor Watchdog"
-       depends on MELAN
+       depends on MELAN || COMPILE_TEST
        help
          This is the driver for the hardware watchdog built in to the
          AMD "Elan" SC520 microcomputer commonly used in embedded systems.
@@ -1034,7 +1063,7 @@ config HP_WATCHDOG
 
 config KEMPLD_WDT
        tristate "Kontron COM Watchdog Timer"
-       depends on MFD_KEMPLD
+       depends on MFD_KEMPLD || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Support for the PLD watchdog on some Kontron ETX and COMexpress
@@ -1108,7 +1137,8 @@ config NV_TCO
 
 config RDC321X_WDT
        tristate "RDC R-321x SoC watchdog"
-       depends on X86_RDC321X
+       depends on X86_RDC321X || COMPILE_TEST
+       depends on PCI
        help
          This is the driver for the built in hardware watchdog
          in the RDC R-321x SoC.
@@ -1326,6 +1356,16 @@ config NI903X_WDT
          To compile this driver as a module, choose M here: the module will be
          called ni903x_wdt.
 
+config NIC7018_WDT
+       tristate "NIC7018 Watchdog"
+       depends on X86 && ACPI
+       select WATCHDOG_CORE
+       ---help---
+         Support for National Instruments NIC7018 Watchdog.
+
+         To compile this driver as a module, choose M here: the module will be
+         called nic7018_wdt.
+
 # M32R Architecture
 
 # M68K Architecture
@@ -1343,14 +1383,14 @@ config M54xx_WATCHDOG
 
 config ATH79_WDT
        tristate "Atheros AR71XX/AR724X/AR913X hardware watchdog"
-       depends on ATH79
+       depends on ATH79 || (ARM && COMPILE_TEST)
        help
          Hardware driver for the built-in watchdog timer on the Atheros
          AR71XX/AR724X/AR913X SoCs.
 
 config BCM47XX_WDT
        tristate "Broadcom BCM47xx Watchdog Timer"
-       depends on BCM47XX || ARCH_BCM_5301X
+       depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Hardware driver for the Broadcom BCM47xx Watchdog Timer.
@@ -1367,7 +1407,7 @@ config RC32434_WDT
 
 config INDYDOG
        tristate "Indy/I2 Hardware Watchdog"
-       depends on SGI_HAS_INDYDOG
+       depends on SGI_HAS_INDYDOG || (MIPS && COMPILE_TEST)
        help
          Hardware driver for the Indy's/I2's watchdog. This is a
          watchdog timer that will reboot the machine after a 60 second
@@ -1383,7 +1423,7 @@ config JZ4740_WDT
 
 config WDT_MTX1
        tristate "MTX-1 Hardware Watchdog"
-       depends on MIPS_MTX1
+       depends on MIPS_MTX1 || (MIPS && COMPILE_TEST)
        help
          Hardware driver for the MTX-1 boards. This is a watchdog timer that
          will reboot the machine after a 100 seconds timer expired.
@@ -1391,6 +1431,7 @@ config WDT_MTX1
 config PNX833X_WDT
        tristate "PNX833x Hardware Watchdog"
        depends on SOC_PNX8335
+       depends on BROKEN
        help
          Hardware driver for the PNX833x's watchdog. This is a
          watchdog timer that will reboot the machine after a programmable
@@ -1399,7 +1440,7 @@ config PNX833X_WDT
 
 config SIBYTE_WDOG
        tristate "Sibyte SoC hardware watchdog"
-       depends on CPU_SB1
+       depends on CPU_SB1 || (MIPS && COMPILE_TEST)
        help
          Watchdog driver for the built in watchdog hardware in Sibyte
          SoC processors.  There are apparently two watchdog timers
@@ -1412,13 +1453,13 @@ config SIBYTE_WDOG
 
 config AR7_WDT
        tristate "TI AR7 Watchdog Timer"
-       depends on AR7
+       depends on AR7 || (MIPS && COMPILE_TEST)
        help
          Hardware driver for the TI AR7 Watchdog Timer.
 
 config TXX9_WDT
        tristate "Toshiba TXx9 Watchdog Timer"
-       depends on CPU_TX39XX || CPU_TX49XX
+       depends on CPU_TX39XX || CPU_TX49XX || (MIPS && COMPILE_TEST)
        select WATCHDOG_CORE
        help
          Hardware driver for the built-in watchdog timer on TXx9 MIPS SoCs.
@@ -1454,7 +1495,7 @@ config BCM63XX_WDT
 
 config BCM2835_WDT
        tristate "Broadcom BCM2835 hardware watchdog"
-       depends on ARCH_BCM2835
+       depends on ARCH_BCM2835 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Watchdog driver for the built in watchdog hardware in Broadcom
@@ -1465,7 +1506,7 @@ config BCM2835_WDT
 
 config BCM_KONA_WDT
        tristate "BCM Kona Watchdog"
-       depends on ARCH_BCM_MOBILE
+       depends on ARCH_BCM_MOBILE || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Support for the watchdog timer on the following Broadcom BCM281xx
@@ -1477,7 +1518,7 @@ config BCM_KONA_WDT
 
 config BCM_KONA_WDT_DEBUG
        bool "DEBUGFS support for BCM Kona Watchdog"
-       depends on BCM_KONA_WDT
+       depends on BCM_KONA_WDT || COMPILE_TEST
        help
          If enabled, adds /sys/kernel/debug/bcm_kona_wdt/info which provides
          access to the driver's internal data structures as well as watchdog
@@ -1538,7 +1579,7 @@ config MT7621_WDT
 config PIC32_WDT
        tristate "Microchip PIC32 hardware watchdog"
        select WATCHDOG_CORE
-       depends on MACH_PIC32
+       depends on MACH_PIC32 || (MIPS && COMPILE_TEST)
        help
          Watchdog driver for the built in watchdog hardware in a PIC32.
 
@@ -1551,7 +1592,7 @@ config PIC32_WDT
 config PIC32_DMT
        tristate "Microchip PIC32 Deadman Timer"
        select WATCHDOG_CORE
-       depends on MACH_PIC32
+       depends on MACH_PIC32 || (MIPS && COMPILE_TEST)
        help
          Watchdog driver for PIC32 instruction fetch counting timer. This specific
          timer is typically be used in misson critical and safety critical
@@ -1573,7 +1614,7 @@ config GEF_WDT
 
 config MPC5200_WDT
        bool "MPC52xx Watchdog Timer"
-       depends on PPC_MPC52xx
+       depends on PPC_MPC52xx || COMPILE_TEST
        help
          Use General Purpose Timer (GPT) 0 on the MPC5200 as Watchdog.
 
@@ -1592,11 +1633,11 @@ config 8xxx_WDT
 
 config MV64X60_WDT
        tristate "MV64X60 (Marvell Discovery) Watchdog Timer"
-       depends on MV64X60
+       depends on MV64X60 || COMPILE_TEST
 
 config PIKA_WDT
        tristate "PIKA FPGA Watchdog"
-       depends on WARP
+       depends on WARP || (PPC64 && COMPILE_TEST)
        default y
        help
          This enables the watchdog in the PIKA FPGA. Currently used on
@@ -1646,7 +1687,7 @@ config MEN_A21_WDT
 
 config WATCHDOG_RTAS
        tristate "RTAS watchdog"
-       depends on PPC_RTAS
+       depends on PPC_RTAS || (PPC64 && COMPILE_TEST)
        help
          This driver adds watchdog support for the RTAS watchdog.
 
@@ -1674,7 +1715,7 @@ config DIAG288_WATCHDOG
 
 config SH_WDT
        tristate "SuperH Watchdog"
-       depends on SUPERH && (CPU_SH3 || CPU_SH4)
+       depends on SUPERH && (CPU_SH3 || CPU_SH4 || COMPILE_TEST)
        select WATCHDOG_CORE
        help
          This driver adds watchdog support for the integrated watchdog in the
@@ -1741,7 +1782,7 @@ config XEN_WDT
 
 config UML_WATCHDOG
        tristate "UML watchdog"
-       depends on UML
+       depends on UML || COMPILE_TEST
 
 #
 # ISA-based Watchdog Cards
index 0c3d35e3c3341541fc4140797653611a1bed88a8..a2126e2a99ae85205bb96c7981f44cb36fc97efd 100644 (file)
@@ -45,6 +45,7 @@ obj-$(CONFIG_OMAP_WATCHDOG) += omap_wdt.o
 obj-$(CONFIG_TWL4030_WATCHDOG) += twl4030_wdt.o
 obj-$(CONFIG_21285_WATCHDOG) += wdt285.o
 obj-$(CONFIG_977_WATCHDOG) += wdt977.o
+obj-$(CONFIG_GEMINI_WATCHDOG) += gemini_wdt.o
 obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o
 obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o
 obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o
@@ -82,6 +83,7 @@ obj-$(CONFIG_BCM7038_WDT) += bcm7038_wdt.o
 obj-$(CONFIG_ATLAS7_WATCHDOG) += atlas7_wdt.o
 obj-$(CONFIG_RENESAS_WDT) += renesas_wdt.o
 obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
+obj-$(CONFIG_ZX2967_WATCHDOG) += zx2967_wdt.o
 
 # AVR32 Architecture
 obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
@@ -139,6 +141,7 @@ obj-$(CONFIG_INTEL_SCU_WATCHDOG) += intel_scu_watchdog.o
 obj-$(CONFIG_INTEL_MID_WATCHDOG) += intel-mid_wdt.o
 obj-$(CONFIG_INTEL_MEI_WDT) += mei_wdt.o
 obj-$(CONFIG_NI903X_WDT) += ni903x_wdt.o
+obj-$(CONFIG_NIC7018_WDT) += nic7018_wdt.o
 
 # M32R Architecture
 
index d0b59ba0f661a0be2dbfce74cc35851d08463ed5..53da001f0838eeb09303bce1725d316068b369c1 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/reboot.h>
 #include <linux/reset.h>
 #include <linux/watchdog.h>
 
@@ -59,7 +58,6 @@ struct asm9260_wdt_priv {
        struct clk              *clk;
        struct clk              *clk_ahb;
        struct reset_control    *rst;
-       struct notifier_block   restart_handler;
 
        void __iomem            *iobase;
        int                     irq;
@@ -172,15 +170,14 @@ static irqreturn_t asm9260_wdt_irq(int irq, void *devid)
        return IRQ_HANDLED;
 }
 
-static int asm9260_restart_handler(struct notifier_block *this,
-                                  unsigned long mode, void *cmd)
+static int asm9260_restart(struct watchdog_device *wdd, unsigned long action,
+                          void *data)
 {
-       struct asm9260_wdt_priv *priv =
-               container_of(this, struct asm9260_wdt_priv, restart_handler);
+       struct asm9260_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
        asm9260_wdt_sys_reset(priv);
 
-       return NOTIFY_DONE;
+       return 0;
 }
 
 static const struct watchdog_info asm9260_wdt_ident = {
@@ -189,13 +186,14 @@ static const struct watchdog_info asm9260_wdt_ident = {
        .identity         =     "Alphascale asm9260 Watchdog",
 };
 
-static struct watchdog_ops asm9260_wdt_ops = {
+static const struct watchdog_ops asm9260_wdt_ops = {
        .owner          = THIS_MODULE,
        .start          = asm9260_wdt_enable,
        .stop           = asm9260_wdt_disable,
        .get_timeleft   = asm9260_wdt_gettimeleft,
        .ping           = asm9260_wdt_feed,
        .set_timeout    = asm9260_wdt_settimeout,
+       .restart        = asm9260_restart,
 };
 
 static int asm9260_wdt_get_dt_clks(struct asm9260_wdt_priv *priv)
@@ -335,18 +333,14 @@ static int asm9260_wdt_probe(struct platform_device *pdev)
                        dev_warn(&pdev->dev, "failed to request IRQ\n");
        }
 
+       watchdog_set_restart_priority(wdd, 128);
+
        ret = watchdog_register_device(wdd);
        if (ret)
                goto clk_off;
 
        platform_set_drvdata(pdev, priv);
 
-       priv->restart_handler.notifier_call = asm9260_restart_handler;
-       priv->restart_handler.priority = 128;
-       ret = register_restart_handler(&priv->restart_handler);
-       if (ret)
-               dev_warn(&pdev->dev, "cannot register restart handler\n");
-
        dev_info(&pdev->dev, "Watchdog enabled (timeout: %d sec, mode: %s)\n",
                 wdd->timeout, mode_name[priv->mode]);
        return 0;
@@ -370,8 +364,6 @@ static int asm9260_wdt_remove(struct platform_device *pdev)
 
        asm9260_wdt_disable(&priv->wdd);
 
-       unregister_restart_handler(&priv->restart_handler);
-
        watchdog_unregister_device(&priv->wdd);
 
        clk_disable_unprepare(priv->clk);
index f5ad8023c2e6aeea92ddc245ebc2d034dd843ac6..1c652582de40036ed44d21d8c8d67bded52aa643 100644 (file)
@@ -136,15 +136,6 @@ static const struct watchdog_info aspeed_wdt_info = {
        .identity       = KBUILD_MODNAME,
 };
 
-static int aspeed_wdt_remove(struct platform_device *pdev)
-{
-       struct aspeed_wdt *wdt = platform_get_drvdata(pdev);
-
-       watchdog_unregister_device(&wdt->wdd);
-
-       return 0;
-}
-
 static int aspeed_wdt_probe(struct platform_device *pdev)
 {
        struct aspeed_wdt *wdt;
@@ -187,20 +178,17 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
                set_bit(WDOG_HW_RUNNING, &wdt->wdd.status);
        }
 
-       ret = watchdog_register_device(&wdt->wdd);
+       ret = devm_watchdog_register_device(&pdev->dev, &wdt->wdd);
        if (ret) {
                dev_err(&pdev->dev, "failed to register\n");
                return ret;
        }
 
-       platform_set_drvdata(pdev, wdt);
-
        return 0;
 }
 
 static struct platform_driver aspeed_watchdog_driver = {
        .probe = aspeed_wdt_probe,
-       .remove = aspeed_wdt_remove,
        .driver = {
                .name = KBUILD_MODNAME,
                .of_match_table = of_match_ptr(aspeed_wdt_of_table),
index ed80734befae16ea294b97fa6f1bfc5249a5d6bc..4abdcabd8219e61ddc8adbbba23ac804f2f54f2c 100644 (file)
@@ -105,7 +105,7 @@ static const struct watchdog_info atlas7_wdt_ident = {
        .identity = "atlas7 Watchdog",
 };
 
-static struct watchdog_ops atlas7_wdt_ops = {
+static const struct watchdog_ops atlas7_wdt_ops = {
        .owner = THIS_MODULE,
        .start = atlas7_wdt_enable,
        .stop = atlas7_wdt_disable,
index c32c45bd8b097889c8f322255fa63c8ed507d6ab..b339e0e67b4c1275fd4992fea4f1e24c0575b783 100644 (file)
@@ -14,7 +14,6 @@
  */
 
 #include <linux/delay.h>
-#include <linux/reboot.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/io.h>
@@ -37,9 +36,9 @@
 #define PM_RSTC_RESET                  0x00000102
 
 /*
- * The Raspberry Pi firmware uses the RSTS register to know which partiton
- * to boot from. The partiton value is spread into bits 0, 2, 4, 6, 8, 10.
- * Partiton 63 is a special partition used by the firmware to indicate halt.
+ * The Raspberry Pi firmware uses the RSTS register to know which partition
+ * to boot from. The partition value is spread into bits 0, 2, 4, 6, 8, 10.
+ * Partition 63 is a special partition used by the firmware to indicate halt.
  */
 #define PM_RSTS_RASPBERRYPI_HALT       0x555
 
@@ -49,7 +48,6 @@
 struct bcm2835_wdt {
        void __iomem            *base;
        spinlock_t              lock;
-       struct notifier_block   restart_handler;
 };
 
 static unsigned int heartbeat;
@@ -99,11 +97,37 @@ static unsigned int bcm2835_wdt_get_timeleft(struct watchdog_device *wdog)
        return WDOG_TICKS_TO_SECS(ret & PM_WDOG_TIME_SET);
 }
 
+static void __bcm2835_restart(struct bcm2835_wdt *wdt)
+{
+       u32 val;
+
+       /* use a timeout of 10 ticks (~150us) */
+       writel_relaxed(10 | PM_PASSWORD, wdt->base + PM_WDOG);
+       val = readl_relaxed(wdt->base + PM_RSTC);
+       val &= PM_RSTC_WRCFG_CLR;
+       val |= PM_PASSWORD | PM_RSTC_WRCFG_FULL_RESET;
+       writel_relaxed(val, wdt->base + PM_RSTC);
+
+       /* No sleeping, possibly atomic. */
+       mdelay(1);
+}
+
+static int bcm2835_restart(struct watchdog_device *wdog,
+                          unsigned long action, void *data)
+{
+       struct bcm2835_wdt *wdt = watchdog_get_drvdata(wdog);
+
+       __bcm2835_restart(wdt);
+
+       return 0;
+}
+
 static const struct watchdog_ops bcm2835_wdt_ops = {
        .owner =        THIS_MODULE,
        .start =        bcm2835_wdt_start,
        .stop =         bcm2835_wdt_stop,
        .get_timeleft = bcm2835_wdt_get_timeleft,
+       .restart =      bcm2835_restart,
 };
 
 static const struct watchdog_info bcm2835_wdt_info = {
@@ -120,26 +144,6 @@ static struct watchdog_device bcm2835_wdt_wdd = {
        .timeout =      WDOG_TICKS_TO_SECS(PM_WDOG_TIME_SET),
 };
 
-static int
-bcm2835_restart(struct notifier_block *this, unsigned long mode, void *cmd)
-{
-       struct bcm2835_wdt *wdt = container_of(this, struct bcm2835_wdt,
-                                              restart_handler);
-       u32 val;
-
-       /* use a timeout of 10 ticks (~150us) */
-       writel_relaxed(10 | PM_PASSWORD, wdt->base + PM_WDOG);
-       val = readl_relaxed(wdt->base + PM_RSTC);
-       val &= PM_RSTC_WRCFG_CLR;
-       val |= PM_PASSWORD | PM_RSTC_WRCFG_FULL_RESET;
-       writel_relaxed(val, wdt->base + PM_RSTC);
-
-       /* No sleeping, possibly atomic. */
-       mdelay(1);
-
-       return 0;
-}
-
 /*
  * We can't really power off, but if we do the normal reset scheme, and
  * indicate to bootcode.bin not to reboot, then most of the chip will be
@@ -163,13 +167,13 @@ static void bcm2835_power_off(void)
        writel_relaxed(val, wdt->base + PM_RSTS);
 
        /* Continue with normal reset mechanism */
-       bcm2835_restart(&wdt->restart_handler, REBOOT_HARD, NULL);
+       __bcm2835_restart(wdt);
 }
 
 static int bcm2835_wdt_probe(struct platform_device *pdev)
 {
+       struct resource *res;
        struct device *dev = &pdev->dev;
-       struct device_node *np = dev->of_node;
        struct bcm2835_wdt *wdt;
        int err;
 
@@ -180,16 +184,15 @@ static int bcm2835_wdt_probe(struct platform_device *pdev)
 
        spin_lock_init(&wdt->lock);
 
-       wdt->base = of_iomap(np, 0);
-       if (!wdt->base) {
-               dev_err(dev, "Failed to remap watchdog regs");
-               return -ENODEV;
-       }
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       wdt->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(wdt->base))
+               return PTR_ERR(wdt->base);
 
        watchdog_set_drvdata(&bcm2835_wdt_wdd, wdt);
        watchdog_init_timeout(&bcm2835_wdt_wdd, heartbeat, dev);
        watchdog_set_nowayout(&bcm2835_wdt_wdd, nowayout);
-       bcm2835_wdt_wdd.parent = &pdev->dev;
+       bcm2835_wdt_wdd.parent = dev;
        if (bcm2835_wdt_is_running(wdt)) {
                /*
                 * The currently active timeout value (set by the
@@ -201,16 +204,16 @@ static int bcm2835_wdt_probe(struct platform_device *pdev)
                 */
                set_bit(WDOG_HW_RUNNING, &bcm2835_wdt_wdd.status);
        }
-       err = watchdog_register_device(&bcm2835_wdt_wdd);
+
+       watchdog_set_restart_priority(&bcm2835_wdt_wdd, 128);
+
+       watchdog_stop_on_reboot(&bcm2835_wdt_wdd);
+       err = devm_watchdog_register_device(dev, &bcm2835_wdt_wdd);
        if (err) {
                dev_err(dev, "Failed to register watchdog device");
-               iounmap(wdt->base);
                return err;
        }
 
-       wdt->restart_handler.notifier_call = bcm2835_restart;
-       wdt->restart_handler.priority = 128;
-       register_restart_handler(&wdt->restart_handler);
        if (pm_power_off == NULL)
                pm_power_off = bcm2835_power_off;
 
@@ -220,22 +223,12 @@ static int bcm2835_wdt_probe(struct platform_device *pdev)
 
 static int bcm2835_wdt_remove(struct platform_device *pdev)
 {
-       struct bcm2835_wdt *wdt = platform_get_drvdata(pdev);
-
-       unregister_restart_handler(&wdt->restart_handler);
        if (pm_power_off == bcm2835_power_off)
                pm_power_off = NULL;
-       watchdog_unregister_device(&bcm2835_wdt_wdd);
-       iounmap(wdt->base);
 
        return 0;
 }
 
-static void bcm2835_wdt_shutdown(struct platform_device *pdev)
-{
-       bcm2835_wdt_stop(&bcm2835_wdt_wdd);
-}
-
 static const struct of_device_id bcm2835_wdt_of_match[] = {
        { .compatible = "brcm,bcm2835-pm-wdt", },
        {},
@@ -245,7 +238,6 @@ MODULE_DEVICE_TABLE(of, bcm2835_wdt_of_match);
 static struct platform_driver bcm2835_wdt_driver = {
        .probe          = bcm2835_wdt_probe,
        .remove         = bcm2835_wdt_remove,
-       .shutdown       = bcm2835_wdt_shutdown,
        .driver = {
                .name =         "bcm2835-wdt",
                .of_match_table = bcm2835_wdt_of_match,
index a1900b9ab6c4e651b799f39b6c0f80ae62657d05..35725e21b18a609d267b5dfd49ce4dcc803133af 100644 (file)
@@ -226,9 +226,6 @@ static int bcm47xx_wdt_remove(struct platform_device *pdev)
 {
        struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev);
 
-       if (!wdt)
-               return -ENXIO;
-
        watchdog_unregister_device(&wdt->wdd);
 
        return 0;
index 4814c00b32f6de8c47b6c112988ce3837ffe7f81..c1b8e534fb5585ad52c2b4d9693981d33b448dff 100644 (file)
@@ -101,7 +101,7 @@ static unsigned int bcm7038_wdt_get_timeleft(struct watchdog_device *wdog)
        return time_left / wdt->rate;
 }
 
-static struct watchdog_info bcm7038_wdt_info = {
+static const struct watchdog_info bcm7038_wdt_info = {
        .identity       = "Broadcom BCM7038 Watchdog Timer",
        .options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
                                WDIOF_MAGICCLOSE
index e0c98423f2c9d0c2203a14933a0061f3026b224e..6fce17d5b9f1a27f94e711d0e69d9844c920a91d 100644 (file)
@@ -266,7 +266,7 @@ static int bcm_kona_wdt_stop(struct watchdog_device *wdog)
                                            SECWDOG_SRSTEN_MASK, 0);
 }
 
-static struct watchdog_ops bcm_kona_wdt_ops = {
+static const struct watchdog_ops bcm_kona_wdt_ops = {
        .owner =        THIS_MODULE,
        .start =        bcm_kona_wdt_start,
        .stop =         bcm_kona_wdt_stop,
@@ -274,7 +274,7 @@ static struct watchdog_ops bcm_kona_wdt_ops = {
        .get_timeleft = bcm_kona_wdt_get_timeleft,
 };
 
-static struct watchdog_info bcm_kona_wdt_info = {
+static const struct watchdog_info bcm_kona_wdt_info = {
        .options =      WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE |
                        WDIOF_KEEPALIVEPING,
        .identity =     "Broadcom Kona Watchdog Timer",
index 04da4b66c75e361d191a0c0faac4fd4010d9d26a..3ad1e44bef44b16cf653eaeac430ee8316916f68 100644 (file)
@@ -192,12 +192,12 @@ static int booke_wdt_set_timeout(struct watchdog_device *wdt_dev,
        return 0;
 }
 
-static struct watchdog_info booke_wdt_info = {
+static struct watchdog_info booke_wdt_info __ro_after_init = {
        .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
        .identity = "PowerPC Book-E Watchdog",
 };
 
-static struct watchdog_ops booke_wdt_ops = {
+static const struct watchdog_ops booke_wdt_ops = {
        .owner = THIS_MODULE,
        .start = booke_wdt_start,
        .stop = booke_wdt_stop,
index 98acef72334d7760296ab98ef30fa1d417a6ad83..8d61e8bfe60b1a418eab489e966652840dedccff 100644 (file)
@@ -262,7 +262,7 @@ static irqreturn_t cdns_wdt_irq_handler(int irq, void *dev_id)
  * Info structure used to indicate the features supported by the device
  * to the upper layers. This is defined in watchdog.h header file.
  */
-static struct watchdog_info cdns_wdt_info = {
+static const struct watchdog_info cdns_wdt_info = {
        .identity       = "cdns_wdt watchdog",
        .options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
                          WDIOF_MAGICCLOSE,
index a099b77fc0b91a076302f6657aaf87d4be9ed159..38dd60f0cfcc636d0b5df5197c06ffcb9f09adbf 100644 (file)
 
 /* Default timeout in seconds = 1 minute */
 static unsigned int margin = 60;
-static resource_size_t phybase;
-static resource_size_t physize;
 static int irq;
 static void __iomem *virtbase;
 static struct device *parent;
 
-/*
- * The watchdog block is of course always clocked, the
- * clk_enable()/clk_disable() calls are mainly for performing reference
- * counting higher up in the clock hierarchy.
- */
 static struct clk *clk;
 
 /*
@@ -90,7 +83,6 @@ static void coh901327_enable(u16 timeout)
        unsigned long freq;
        unsigned long delay_ns;
 
-       clk_enable(clk);
        /* Restart timer if it is disabled */
        val = readw(virtbase + U300_WDOG_D2R);
        if (val == U300_WDOG_D2R_DISABLE_STATUS_DISABLED)
@@ -118,7 +110,6 @@ static void coh901327_enable(u16 timeout)
         */
        (void) readw(virtbase + U300_WDOG_CR);
        val = readw(virtbase + U300_WDOG_D2R);
-       clk_disable(clk);
        if (val != U300_WDOG_D2R_DISABLE_STATUS_ENABLED)
                dev_err(parent,
                        "%s(): watchdog not enabled! D2R value %04x\n",
@@ -129,7 +120,6 @@ static void coh901327_disable(void)
 {
        u16 val;
 
-       clk_enable(clk);
        /* Disable the watchdog interrupt if it is active */
        writew(0x0000U, virtbase + U300_WDOG_IMR);
        /* If the watchdog is currently enabled, attempt to disable it */
@@ -144,7 +134,6 @@ static void coh901327_disable(void)
                       virtbase + U300_WDOG_D2R);
        }
        val = readw(virtbase + U300_WDOG_D2R);
-       clk_disable(clk);
        if (val != U300_WDOG_D2R_DISABLE_STATUS_DISABLED)
                dev_err(parent,
                        "%s(): watchdog not disabled! D2R value %04x\n",
@@ -165,11 +154,9 @@ static int coh901327_stop(struct watchdog_device *wdt_dev)
 
 static int coh901327_ping(struct watchdog_device *wdd)
 {
-       clk_enable(clk);
        /* Feed the watchdog */
        writew(U300_WDOG_FR_FEED_RESTART_TIMER,
               virtbase + U300_WDOG_FR);
-       clk_disable(clk);
        return 0;
 }
 
@@ -177,13 +164,11 @@ static int coh901327_settimeout(struct watchdog_device *wdt_dev,
                                unsigned int time)
 {
        wdt_dev->timeout = time;
-       clk_enable(clk);
        /* Set new timeout value */
        writew(time * 100, virtbase + U300_WDOG_TR);
        /* Feed the dog */
        writew(U300_WDOG_FR_FEED_RESTART_TIMER,
               virtbase + U300_WDOG_FR);
-       clk_disable(clk);
        return 0;
 }
 
@@ -191,13 +176,11 @@ static unsigned int coh901327_gettimeleft(struct watchdog_device *wdt_dev)
 {
        u16 val;
 
-       clk_enable(clk);
        /* Read repeatedly until the value is stable! */
        val = readw(virtbase + U300_WDOG_CR);
        while (val & U300_WDOG_CR_VALID_IND)
                val = readw(virtbase + U300_WDOG_CR);
        val &= U300_WDOG_CR_COUNT_VALUE_MASK;
-       clk_disable(clk);
        if (val != 0)
                val /= 100;
 
@@ -221,13 +204,11 @@ static irqreturn_t coh901327_interrupt(int irq, void *data)
         * to prevent a watchdog reset by feeding the watchdog at this
         * point.
         */
-       clk_enable(clk);
        val = readw(virtbase + U300_WDOG_IER);
        if (val == U300_WDOG_IER_WILL_BARK_IRQ_EVENT_IND)
                writew(U300_WDOG_IER_WILL_BARK_IRQ_ACK_ENABLE,
                       virtbase + U300_WDOG_IER);
        writew(0x0000U, virtbase + U300_WDOG_IMR);
-       clk_disable(clk);
        dev_crit(parent, "watchdog is barking!\n");
        return IRQ_HANDLED;
 }
@@ -263,81 +244,63 @@ static int __exit coh901327_remove(struct platform_device *pdev)
        watchdog_unregister_device(&coh901327_wdt);
        coh901327_disable();
        free_irq(irq, pdev);
-       clk_unprepare(clk);
+       clk_disable_unprepare(clk);
        clk_put(clk);
-       iounmap(virtbase);
-       release_mem_region(phybase, physize);
        return 0;
 }
 
 static int __init coh901327_probe(struct platform_device *pdev)
 {
+       struct device *dev = &pdev->dev;
        int ret;
        u16 val;
        struct resource *res;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENOENT;
-
-       parent = &pdev->dev;
-       physize = resource_size(res);
-       phybase = res->start;
+       parent = dev;
 
-       if (request_mem_region(phybase, physize, DRV_NAME) == NULL) {
-               ret = -EBUSY;
-               goto out;
-       }
-
-       virtbase = ioremap(phybase, physize);
-       if (!virtbase) {
-               ret = -ENOMEM;
-               goto out_no_remap;
-       }
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       virtbase = devm_ioremap_resource(dev, res);
+       if (IS_ERR(virtbase))
+               return PTR_ERR(virtbase);
 
-       clk = clk_get(&pdev->dev, NULL);
+       clk = clk_get(dev, NULL);
        if (IS_ERR(clk)) {
                ret = PTR_ERR(clk);
-               dev_err(&pdev->dev, "could not get clock\n");
-               goto out_no_clk;
+               dev_err(dev, "could not get clock\n");
+               return ret;
        }
        ret = clk_prepare_enable(clk);
        if (ret) {
-               dev_err(&pdev->dev, "could not prepare and enable clock\n");
+               dev_err(dev, "could not prepare and enable clock\n");
                goto out_no_clk_enable;
        }
 
        val = readw(virtbase + U300_WDOG_SR);
        switch (val) {
        case U300_WDOG_SR_STATUS_TIMED_OUT:
-               dev_info(&pdev->dev,
-                       "watchdog timed out since last chip reset!\n");
+               dev_info(dev, "watchdog timed out since last chip reset!\n");
                coh901327_wdt.bootstatus |= WDIOF_CARDRESET;
                /* Status will be cleared below */
                break;
        case U300_WDOG_SR_STATUS_NORMAL:
-               dev_info(&pdev->dev,
-                       "in normal status, no timeouts have occurred.\n");
+               dev_info(dev, "in normal status, no timeouts have occurred.\n");
                break;
        default:
-               dev_info(&pdev->dev,
-                       "contains an illegal status code (%08x)\n", val);
+               dev_info(dev, "contains an illegal status code (%08x)\n", val);
                break;
        }
 
        val = readw(virtbase + U300_WDOG_D2R);
        switch (val) {
        case U300_WDOG_D2R_DISABLE_STATUS_DISABLED:
-               dev_info(&pdev->dev, "currently disabled.\n");
+               dev_info(dev, "currently disabled.\n");
                break;
        case U300_WDOG_D2R_DISABLE_STATUS_ENABLED:
-               dev_info(&pdev->dev,
-                        "currently enabled! (disabling it now)\n");
+               dev_info(dev, "currently enabled! (disabling it now)\n");
                coh901327_disable();
                break;
        default:
-               dev_err(&pdev->dev,
-                       "contains an illegal enable/disable code (%08x)\n",
+               dev_err(dev, "contains an illegal enable/disable code (%08x)\n",
                        val);
                break;
        }
@@ -352,20 +315,16 @@ static int __init coh901327_probe(struct platform_device *pdev)
                goto out_no_irq;
        }
 
-       clk_disable(clk);
-
-       ret = watchdog_init_timeout(&coh901327_wdt, margin, &pdev->dev);
+       ret = watchdog_init_timeout(&coh901327_wdt, margin, dev);
        if (ret < 0)
                coh901327_wdt.timeout = 60;
 
-       coh901327_wdt.parent = &pdev->dev;
+       coh901327_wdt.parent = dev;
        ret = watchdog_register_device(&coh901327_wdt);
-       if (ret == 0)
-               dev_info(&pdev->dev,
-                        "initialized. timer margin=%d sec\n", margin);
-       else
+       if (ret)
                goto out_no_wdog;
 
+       dev_info(dev, "initialized. timer margin=%d sec\n", margin);
        return 0;
 
 out_no_wdog:
@@ -374,11 +333,6 @@ out_no_irq:
        clk_disable_unprepare(clk);
 out_no_clk_enable:
        clk_put(clk);
-out_no_clk:
-       iounmap(virtbase);
-out_no_remap:
-       release_mem_region(phybase, SZ_4K);
-out:
        return ret;
 }
 
index 2fc19a32a320bfcd49113c30e441bb837054d588..d6d5006efa717df162a4746341467d96a677f412 100644 (file)
@@ -128,19 +128,17 @@ static int da9052_wdt_ping(struct watchdog_device *wdt_dev)
        ret = da9052_reg_update(da9052, DA9052_CONTROL_D_REG,
                                DA9052_CONTROLD_WATCHDOG, 1 << 7);
        if (ret < 0)
-               goto err_strobe;
+               return ret;
 
        /*
         * FIXME: Reset the watchdog core, in general PMIC
         * is supposed to do this
         */
-       ret = da9052_reg_update(da9052, DA9052_CONTROL_D_REG,
-                               DA9052_CONTROLD_WATCHDOG, 0 << 7);
-err_strobe:
-       return ret;
+       return da9052_reg_update(da9052, DA9052_CONTROL_D_REG,
+                                DA9052_CONTROLD_WATCHDOG, 0 << 7);
 }
 
-static struct watchdog_info da9052_wdt_info = {
+static const struct watchdog_info da9052_wdt_info = {
        .options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
        .identity       = "DA9052 Watchdog",
 };
@@ -163,10 +161,8 @@ static int da9052_wdt_probe(struct platform_device *pdev)
 
        driver_data = devm_kzalloc(&pdev->dev, sizeof(*driver_data),
                                   GFP_KERNEL);
-       if (!driver_data) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       if (!driver_data)
+               return -ENOMEM;
        driver_data->da9052 = da9052;
 
        da9052_wdt = &driver_data->wdt;
@@ -182,33 +178,21 @@ static int da9052_wdt_probe(struct platform_device *pdev)
        if (ret < 0) {
                dev_err(&pdev->dev, "Failed to disable watchdog bits, %d\n",
                        ret);
-               goto err;
+               return ret;
        }
 
-       ret = watchdog_register_device(&driver_data->wdt);
+       ret = devm_watchdog_register_device(&pdev->dev, &driver_data->wdt);
        if (ret != 0) {
                dev_err(da9052->dev, "watchdog_register_device() failed: %d\n",
                        ret);
-               goto err;
+               return ret;
        }
 
-       platform_set_drvdata(pdev, driver_data);
-err:
        return ret;
 }
 
-static int da9052_wdt_remove(struct platform_device *pdev)
-{
-       struct da9052_wdt_data *driver_data = platform_get_drvdata(pdev);
-
-       watchdog_unregister_device(&driver_data->wdt);
-
-       return 0;
-}
-
 static struct platform_driver da9052_wdt_driver = {
        .probe = da9052_wdt_probe,
-       .remove = da9052_wdt_remove,
        .driver = {
                .name   = "da9052-watchdog",
        },
index 8377c43f3f20ff0c7537642b05ac580badffdd71..50bdd10221864994a771b868acd98c07fc398528 100644 (file)
@@ -108,7 +108,7 @@ static int da9055_wdt_stop(struct watchdog_device *wdt_dev)
        return da9055_wdt_set_timeout(wdt_dev, 0);
 }
 
-static struct watchdog_info da9055_wdt_info = {
+static const struct watchdog_info da9055_wdt_info = {
        .options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
        .identity       = "DA9055 Watchdog",
 };
@@ -147,32 +147,19 @@ static int da9055_wdt_probe(struct platform_device *pdev)
        ret = da9055_wdt_stop(da9055_wdt);
        if (ret < 0) {
                dev_err(&pdev->dev, "Failed to stop watchdog, %d\n", ret);
-               goto err;
+               return ret;
        }
 
-       platform_set_drvdata(pdev, driver_data);
-
-       ret = watchdog_register_device(&driver_data->wdt);
+       ret = devm_watchdog_register_device(&pdev->dev, &driver_data->wdt);
        if (ret != 0)
                dev_err(da9055->dev, "watchdog_register_device() failed: %d\n",
                        ret);
 
-err:
        return ret;
 }
 
-static int da9055_wdt_remove(struct platform_device *pdev)
-{
-       struct da9055_wdt_data *driver_data = platform_get_drvdata(pdev);
-
-       watchdog_unregister_device(&driver_data->wdt);
-
-       return 0;
-}
-
 static struct platform_driver da9055_wdt_driver = {
        .probe = da9055_wdt_probe,
-       .remove = da9055_wdt_remove,
        .driver = {
                .name   = "da9055-watchdog",
        },
index a02cee6820a1593ede5ff6969c48451aa2727e8d..9083d3d922b0b4304a3623b307c3b6a66e99f2ef 100644 (file)
@@ -220,9 +220,8 @@ static int da9062_wdt_probe(struct platform_device *pdev)
        wdt->wdtdev.parent = &pdev->dev;
 
        watchdog_set_drvdata(&wdt->wdtdev, wdt);
-       dev_set_drvdata(&pdev->dev, wdt);
 
-       ret = watchdog_register_device(&wdt->wdtdev);
+       ret = devm_watchdog_register_device(&pdev->dev, &wdt->wdtdev);
        if (ret < 0) {
                dev_err(wdt->hw->dev,
                        "watchdog registration failed (%d)\n", ret);
@@ -231,24 +230,11 @@ static int da9062_wdt_probe(struct platform_device *pdev)
 
        da9062_set_window_start(wdt);
 
-       ret = da9062_wdt_ping(&wdt->wdtdev);
-       if (ret < 0)
-               watchdog_unregister_device(&wdt->wdtdev);
-
-       return ret;
-}
-
-static int da9062_wdt_remove(struct platform_device *pdev)
-{
-       struct da9062_watchdog *wdt = dev_get_drvdata(&pdev->dev);
-
-       watchdog_unregister_device(&wdt->wdtdev);
-       return 0;
+       return da9062_wdt_ping(&wdt->wdtdev);
 }
 
 static struct platform_driver da9062_wdt_driver = {
        .probe = da9062_wdt_probe,
-       .remove = da9062_wdt_remove,
        .driver = {
                .name = "da9062-watchdog",
                .of_match_table = da9062_compatible_id_table,
index 5d6b4e5f7989ccc4d63a88e8b366f9b6d48b63bb..4691c5509129ec51f3df37e63b3af584a833929d 100644 (file)
@@ -151,7 +151,6 @@ static const struct watchdog_ops da9063_watchdog_ops = {
 
 static int da9063_wdt_probe(struct platform_device *pdev)
 {
-       int ret;
        struct da9063 *da9063;
        struct da9063_watchdog *wdt;
 
@@ -181,27 +180,12 @@ static int da9063_wdt_probe(struct platform_device *pdev)
        watchdog_set_restart_priority(&wdt->wdtdev, 128);
 
        watchdog_set_drvdata(&wdt->wdtdev, wdt);
-       dev_set_drvdata(&pdev->dev, wdt);
-
-       ret = watchdog_register_device(&wdt->wdtdev);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-static int da9063_wdt_remove(struct platform_device *pdev)
-{
-       struct da9063_watchdog *wdt = dev_get_drvdata(&pdev->dev);
-
-       watchdog_unregister_device(&wdt->wdtdev);
 
-       return 0;
+       return devm_watchdog_register_device(&pdev->dev, &wdt->wdtdev);
 }
 
 static struct platform_driver da9063_wdt_driver = {
        .probe = da9063_wdt_probe,
-       .remove = da9063_wdt_remove,
        .driver = {
                .name = DA9063_DRVNAME_WATCHDOG,
        },
index 861d3d3133f8cac99ed2c07188ffb68947304e08..6f591084bb7af2fb64510171a8ba872c919f53b1 100644 (file)
@@ -205,7 +205,7 @@ static int wdt_set_timeout(struct watchdog_device * dev, unsigned int new_to)
        return wdt_ping(dev);
 }
 
-static struct watchdog_ops wdt_ops = {
+static const struct watchdog_ops wdt_ops = {
        .owner = THIS_MODULE,
        .start = wdt_start,
        .stop = wdt_stop,
index 77df772406b084b93104f13221bdc248d7317bef..5e4ef93caa02b603998a2ab4b5f71cbf5c2ca4df 100644 (file)
@@ -96,7 +96,7 @@ static unsigned int dc_wdt_get_timeleft(struct watchdog_device *wdog)
        return count / clk_get_rate(wdt->clk);
 }
 
-static struct watchdog_ops dc_wdt_ops = {
+static const struct watchdog_ops dc_wdt_ops = {
        .owner          = THIS_MODULE,
        .start          = dc_wdt_start,
        .stop           = dc_wdt_stop,
@@ -105,7 +105,7 @@ static struct watchdog_ops dc_wdt_ops = {
        .restart        = dc_wdt_restart,
 };
 
-static struct watchdog_info dc_wdt_info = {
+static const struct watchdog_info dc_wdt_info = {
        .options        = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE
                        | WDIOF_KEEPALIVEPING,
        .identity       = "Conexant Digicolor Watchdog",
@@ -119,62 +119,40 @@ static struct watchdog_device dc_wdt_wdd = {
 
 static int dc_wdt_probe(struct platform_device *pdev)
 {
+       struct resource *res;
        struct device *dev = &pdev->dev;
-       struct device_node *np = dev->of_node;
        struct dc_wdt *wdt;
        int ret;
 
        wdt = devm_kzalloc(dev, sizeof(struct dc_wdt), GFP_KERNEL);
        if (!wdt)
                return -ENOMEM;
-       platform_set_drvdata(pdev, wdt);
 
-       wdt->base = of_iomap(np, 0);
-       if (!wdt->base) {
-               dev_err(dev, "Failed to remap watchdog regs");
-               return -ENODEV;
-       }
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       wdt->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(wdt->base))
+               return PTR_ERR(wdt->base);
 
-       wdt->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(wdt->clk)) {
-               ret = PTR_ERR(wdt->clk);
-               goto err_iounmap;
-       }
+       wdt->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(wdt->clk))
+               return PTR_ERR(wdt->clk);
        dc_wdt_wdd.max_timeout = U32_MAX / clk_get_rate(wdt->clk);
        dc_wdt_wdd.timeout = dc_wdt_wdd.max_timeout;
-       dc_wdt_wdd.parent = &pdev->dev;
+       dc_wdt_wdd.parent = dev;
 
        spin_lock_init(&wdt->lock);
 
        watchdog_set_drvdata(&dc_wdt_wdd, wdt);
        watchdog_set_restart_priority(&dc_wdt_wdd, 128);
        watchdog_init_timeout(&dc_wdt_wdd, timeout, dev);
-       ret = watchdog_register_device(&dc_wdt_wdd);
+       watchdog_stop_on_reboot(&dc_wdt_wdd);
+       ret = devm_watchdog_register_device(dev, &dc_wdt_wdd);
        if (ret) {
                dev_err(dev, "Failed to register watchdog device");
-               goto err_iounmap;
+               return ret;
        }
 
        return 0;
-
-err_iounmap:
-       iounmap(wdt->base);
-       return ret;
-}
-
-static int dc_wdt_remove(struct platform_device *pdev)
-{
-       struct dc_wdt *wdt = platform_get_drvdata(pdev);
-
-       watchdog_unregister_device(&dc_wdt_wdd);
-       iounmap(wdt->base);
-
-       return 0;
-}
-
-static void dc_wdt_shutdown(struct platform_device *pdev)
-{
-       dc_wdt_stop(&dc_wdt_wdd);
 }
 
 static const struct of_device_id dc_wdt_of_match[] = {
@@ -185,8 +163,6 @@ MODULE_DEVICE_TABLE(of, dc_wdt_of_match);
 
 static struct platform_driver dc_wdt_driver = {
        .probe          = dc_wdt_probe,
-       .remove         = dc_wdt_remove,
-       .shutdown       = dc_wdt_shutdown,
        .driver = {
                .name =         "digicolor-wdt",
                .of_match_table = dc_wdt_of_match,
index 3c6a3de13a1bc1cf06c3cc2a85f9489227952c4f..914da3a4d3341514c4d27560f39cd27624afb9c6 100644 (file)
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/pm.h>
 #include <linux/platform_device.h>
-#include <linux/reboot.h>
 #include <linux/watchdog.h>
 
 #define WDOG_CONTROL_REG_OFFSET                    0x00
@@ -55,7 +53,6 @@ struct dw_wdt {
        void __iomem            *regs;
        struct clk              *clk;
        unsigned long           rate;
-       struct notifier_block   restart_handler;
        struct watchdog_device  wdd;
 };
 
@@ -136,14 +133,12 @@ static int dw_wdt_start(struct watchdog_device *wdd)
        return 0;
 }
 
-static int dw_wdt_restart_handle(struct notifier_block *this,
-                                unsigned long mode, void *cmd)
+static int dw_wdt_restart(struct watchdog_device *wdd,
+                         unsigned long action, void *data)
 {
-       struct dw_wdt *dw_wdt;
+       struct dw_wdt *dw_wdt = to_dw_wdt(wdd);
        u32 val;
 
-       dw_wdt = container_of(this, struct dw_wdt, restart_handler);
-
        writel(0, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
        val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
        if (val & WDOG_CONTROL_REG_WDT_EN_MASK)
@@ -156,7 +151,7 @@ static int dw_wdt_restart_handle(struct notifier_block *this,
        /* wait for reset to assert... */
        mdelay(500);
 
-       return NOTIFY_DONE;
+       return 0;
 }
 
 static unsigned int dw_wdt_get_timeleft(struct watchdog_device *wdd)
@@ -179,6 +174,7 @@ static const struct watchdog_ops dw_wdt_ops = {
        .ping           = dw_wdt_ping,
        .set_timeout    = dw_wdt_set_timeout,
        .get_timeleft   = dw_wdt_get_timeleft,
+       .restart        = dw_wdt_restart,
 };
 
 #ifdef CONFIG_PM_SLEEP
@@ -265,16 +261,12 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, dw_wdt);
 
+       watchdog_set_restart_priority(wdd, 128);
+
        ret = watchdog_register_device(wdd);
        if (ret)
                goto out_disable_clk;
 
-       dw_wdt->restart_handler.notifier_call = dw_wdt_restart_handle;
-       dw_wdt->restart_handler.priority = 128;
-       ret = register_restart_handler(&dw_wdt->restart_handler);
-       if (ret)
-               pr_warn("cannot register restart handler\n");
-
        return 0;
 
 out_disable_clk:
@@ -286,7 +278,6 @@ static int dw_wdt_drv_remove(struct platform_device *pdev)
 {
        struct dw_wdt *dw_wdt = platform_get_drvdata(pdev);
 
-       unregister_restart_handler(&dw_wdt->restart_handler);
        watchdog_unregister_device(&dw_wdt->wdd);
        clk_disable_unprepare(dw_wdt->clk);
 
index 4b849b8e37c2621ab6be50b1b4a0b2f6c81f9297..2170b275ea017ab2c0fcf35294111b2cf58da8ca 100644 (file)
@@ -121,18 +121,7 @@ static int ebc_c384_wdt_probe(struct device *dev, unsigned int id)
                dev_warn(dev, "Invalid timeout (%u seconds), using default (%u seconds)\n",
                        timeout, WATCHDOG_TIMEOUT);
 
-       dev_set_drvdata(dev, wdd);
-
-       return watchdog_register_device(wdd);
-}
-
-static int ebc_c384_wdt_remove(struct device *dev, unsigned int id)
-{
-       struct watchdog_device *wdd = dev_get_drvdata(dev);
-
-       watchdog_unregister_device(wdd);
-
-       return 0;
+       return devm_watchdog_register_device(dev, wdd);
 }
 
 static struct isa_driver ebc_c384_wdt_driver = {
@@ -140,7 +129,6 @@ static struct isa_driver ebc_c384_wdt_driver = {
        .driver = {
                .name = MODULE_NAME
        },
-       .remove = ebc_c384_wdt_remove
 };
 
 static int __init ebc_c384_wdt_init(void)
index 0a4d7cc05d5439346ae0b77633089a08ea0464d6..f9b14e6efd9ac226f56ca232812e61276266e090 100644 (file)
  * for us to rely on the user space daemon alone. So we ping the
  * wdt each ~200msec and eventually stop doing it if the user space
  * daemon dies.
- *
- * TODO:
- *
- *     - Test last reset from watchdog status
- *     - Add a few missing ioctls
  */
 
 #include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/watchdog.h>
-#include <linux/timer.h>
 #include <linux/io.h>
 
-#define WDT_VERSION    "0.4"
-
 /* default timeout (secs) */
 #define WDT_TIMEOUT 30
 
@@ -41,117 +33,101 @@ static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started");
 
-static unsigned int timeout = WDT_TIMEOUT;
+static unsigned int timeout;
 module_param(timeout, uint, 0);
-MODULE_PARM_DESC(timeout,
-       "Watchdog timeout in seconds. (1<=timeout<=3600, default="
-                               __MODULE_STRING(WDT_TIMEOUT) ")");
-
-static void __iomem *mmio_base;
-static struct timer_list timer;
-static unsigned long next_heartbeat;
+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds.");
 
 #define EP93XX_WATCHDOG                0x00
 #define EP93XX_WDSTATUS                0x04
 
-/* reset the wdt every ~200ms - the heartbeat of the device is 0.250 seconds*/
-#define WDT_INTERVAL (HZ/5)
-
-static void ep93xx_wdt_timer_ping(unsigned long data)
-{
-       if (time_before(jiffies, next_heartbeat))
-               writel(0x5555, mmio_base + EP93XX_WATCHDOG);
-
-       /* Re-set the timer interval */
-       mod_timer(&timer, jiffies + WDT_INTERVAL);
-}
+struct ep93xx_wdt_priv {
+       void __iomem *mmio;
+       struct watchdog_device wdd;
+};
 
 static int ep93xx_wdt_start(struct watchdog_device *wdd)
 {
-       next_heartbeat = jiffies + (timeout * HZ);
+       struct ep93xx_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-       writel(0xaaaa, mmio_base + EP93XX_WATCHDOG);
-       mod_timer(&timer, jiffies + WDT_INTERVAL);
+       writel(0xaaaa, priv->mmio + EP93XX_WATCHDOG);
 
        return 0;
 }
 
 static int ep93xx_wdt_stop(struct watchdog_device *wdd)
 {
-       del_timer_sync(&timer);
-       writel(0xaa55, mmio_base + EP93XX_WATCHDOG);
+       struct ep93xx_wdt_priv *priv = watchdog_get_drvdata(wdd);
+
+       writel(0xaa55, priv->mmio + EP93XX_WATCHDOG);
 
        return 0;
 }
 
-static int ep93xx_wdt_keepalive(struct watchdog_device *wdd)
+static int ep93xx_wdt_ping(struct watchdog_device *wdd)
 {
-       /* user land ping */
-       next_heartbeat = jiffies + (timeout * HZ);
+       struct ep93xx_wdt_priv *priv = watchdog_get_drvdata(wdd);
+
+       writel(0x5555, priv->mmio + EP93XX_WATCHDOG);
 
        return 0;
 }
 
 static const struct watchdog_info ep93xx_wdt_ident = {
        .options        = WDIOF_CARDRESET |
+                         WDIOF_SETTIMEOUT |
                          WDIOF_MAGICCLOSE |
                          WDIOF_KEEPALIVEPING,
        .identity       = "EP93xx Watchdog",
 };
 
-static struct watchdog_ops ep93xx_wdt_ops = {
+static const struct watchdog_ops ep93xx_wdt_ops = {
        .owner          = THIS_MODULE,
        .start          = ep93xx_wdt_start,
        .stop           = ep93xx_wdt_stop,
-       .ping           = ep93xx_wdt_keepalive,
-};
-
-static struct watchdog_device ep93xx_wdt_wdd = {
-       .info           = &ep93xx_wdt_ident,
-       .ops            = &ep93xx_wdt_ops,
+       .ping           = ep93xx_wdt_ping,
 };
 
 static int ep93xx_wdt_probe(struct platform_device *pdev)
 {
+       struct ep93xx_wdt_priv *priv;
+       struct watchdog_device *wdd;
        struct resource *res;
        unsigned long val;
-       int err;
+       int ret;
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       mmio_base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(mmio_base))
-               return PTR_ERR(mmio_base);
+       priv->mmio = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(priv->mmio))
+               return PTR_ERR(priv->mmio);
 
-       if (timeout < 1 || timeout > 3600) {
-               timeout = WDT_TIMEOUT;
-               dev_warn(&pdev->dev,
-                       "timeout value must be 1<=x<=3600, using %d\n",
-                       timeout);
-       }
+       val = readl(priv->mmio + EP93XX_WATCHDOG);
 
-       val = readl(mmio_base + EP93XX_WATCHDOG);
-       ep93xx_wdt_wdd.bootstatus = (val & 0x01) ? WDIOF_CARDRESET : 0;
-       ep93xx_wdt_wdd.timeout = timeout;
-       ep93xx_wdt_wdd.parent = &pdev->dev;
+       wdd = &priv->wdd;
+       wdd->bootstatus = (val & 0x01) ? WDIOF_CARDRESET : 0;
+       wdd->info = &ep93xx_wdt_ident;
+       wdd->ops = &ep93xx_wdt_ops;
+       wdd->min_timeout = 1;
+       wdd->max_hw_heartbeat_ms = 200;
+       wdd->parent = &pdev->dev;
 
-       watchdog_set_nowayout(&ep93xx_wdt_wdd, nowayout);
+       watchdog_set_nowayout(wdd, nowayout);
 
-       setup_timer(&timer, ep93xx_wdt_timer_ping, 1);
+       wdd->timeout = WDT_TIMEOUT;
+       watchdog_init_timeout(wdd, timeout, &pdev->dev);
 
-       err = watchdog_register_device(&ep93xx_wdt_wdd);
-       if (err)
-               return err;
+       watchdog_set_drvdata(wdd, priv);
 
-       dev_info(&pdev->dev,
-               "EP93XX watchdog, driver version " WDT_VERSION "%s\n",
-               (val & 0x08) ? " (nCS1 disable detected)" : "");
+       ret = devm_watchdog_register_device(&pdev->dev, wdd);
+       if (ret)
+               return ret;
 
-       return 0;
-}
+       dev_info(&pdev->dev, "EP93XX watchdog driver %s\n",
+               (val & 0x08) ? " (nCS1 disable detected)" : "");
 
-static int ep93xx_wdt_remove(struct platform_device *pdev)
-{
-       watchdog_unregister_device(&ep93xx_wdt_wdd);
        return 0;
 }
 
@@ -160,7 +136,6 @@ static struct platform_driver ep93xx_wdt_driver = {
                .name   = "ep93xx-wdt",
        },
        .probe          = ep93xx_wdt_probe,
-       .remove         = ep93xx_wdt_remove,
 };
 
 module_platform_driver(ep93xx_wdt_driver);
@@ -170,4 +145,3 @@ MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
 MODULE_AUTHOR("H Hartley Sweeten <hsweeten@visionengravers.com>");
 MODULE_DESCRIPTION("EP93xx Watchdog");
 MODULE_LICENSE("GPL");
-MODULE_VERSION(WDT_VERSION);
diff --git a/drivers/watchdog/gemini_wdt.c b/drivers/watchdog/gemini_wdt.c
new file mode 100644 (file)
index 0000000..8155aa6
--- /dev/null
@@ -0,0 +1,229 @@
+/*
+ * Watchdog driver for Cortina Systems Gemini SoC
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ *
+ * Inspired by the out-of-tree drivers from OpenWRT:
+ * Copyright (C) 2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/watchdog.h>
+
+#define GEMINI_WDCOUNTER       0x0
+#define GEMINI_WDLOAD          0x4
+#define GEMINI_WDRESTART       0x8
+#define GEMINI_WDCR            0xC
+
+#define WDRESTART_MAGIC                0x5AB9
+
+#define WDCR_CLOCK_5MHZ                BIT(4)
+#define WDCR_SYS_RST           BIT(1)
+#define WDCR_ENABLE            BIT(0)
+
+#define WDT_CLOCK              5000000         /* 5 MHz */
+
+struct gemini_wdt {
+       struct watchdog_device  wdd;
+       struct device           *dev;
+       void __iomem            *base;
+};
+
+static inline
+struct gemini_wdt *to_gemini_wdt(struct watchdog_device *wdd)
+{
+       return container_of(wdd, struct gemini_wdt, wdd);
+}
+
+static int gemini_wdt_start(struct watchdog_device *wdd)
+{
+       struct gemini_wdt *gwdt = to_gemini_wdt(wdd);
+
+       writel(wdd->timeout * WDT_CLOCK, gwdt->base + GEMINI_WDLOAD);
+       writel(WDRESTART_MAGIC, gwdt->base + GEMINI_WDRESTART);
+       /* set clock before enabling */
+       writel(WDCR_CLOCK_5MHZ | WDCR_SYS_RST,
+                       gwdt->base + GEMINI_WDCR);
+       writel(WDCR_CLOCK_5MHZ | WDCR_SYS_RST | WDCR_ENABLE,
+                       gwdt->base + GEMINI_WDCR);
+
+       return 0;
+}
+
+static int gemini_wdt_stop(struct watchdog_device *wdd)
+{
+       struct gemini_wdt *gwdt = to_gemini_wdt(wdd);
+
+       writel(0, gwdt->base + GEMINI_WDCR);
+
+       return 0;
+}
+
+static int gemini_wdt_ping(struct watchdog_device *wdd)
+{
+       struct gemini_wdt *gwdt = to_gemini_wdt(wdd);
+
+       writel(WDRESTART_MAGIC, gwdt->base + GEMINI_WDRESTART);
+
+       return 0;
+}
+
+static int gemini_wdt_set_timeout(struct watchdog_device *wdd,
+                                 unsigned int timeout)
+{
+       wdd->timeout = timeout;
+       if (watchdog_active(wdd))
+               gemini_wdt_start(wdd);
+
+       return 0;
+}
+
+static irqreturn_t gemini_wdt_interrupt(int irq, void *data)
+{
+       struct gemini_wdt *gwdt = data;
+
+       watchdog_notify_pretimeout(&gwdt->wdd);
+
+       return IRQ_HANDLED;
+}
+
+static const struct watchdog_ops gemini_wdt_ops = {
+       .start          = gemini_wdt_start,
+       .stop           = gemini_wdt_stop,
+       .ping           = gemini_wdt_ping,
+       .set_timeout    = gemini_wdt_set_timeout,
+       .owner          = THIS_MODULE,
+};
+
+static const struct watchdog_info gemini_wdt_info = {
+       .options        = WDIOF_KEEPALIVEPING
+                       | WDIOF_MAGICCLOSE
+                       | WDIOF_SETTIMEOUT,
+       .identity       = KBUILD_MODNAME,
+};
+
+
+static int gemini_wdt_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *res;
+       struct gemini_wdt *gwdt;
+       unsigned int reg;
+       int irq;
+       int ret;
+
+       gwdt = devm_kzalloc(dev, sizeof(*gwdt), GFP_KERNEL);
+       if (!gwdt)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       gwdt->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(gwdt->base))
+               return PTR_ERR(gwdt->base);
+
+       irq = platform_get_irq(pdev, 0);
+       if (!irq)
+               return -EINVAL;
+
+       gwdt->dev = dev;
+       gwdt->wdd.info = &gemini_wdt_info;
+       gwdt->wdd.ops = &gemini_wdt_ops;
+       gwdt->wdd.min_timeout = 1;
+       gwdt->wdd.max_timeout = 0xFFFFFFFF / WDT_CLOCK;
+       gwdt->wdd.parent = dev;
+
+       /*
+        * If 'timeout-sec' unspecified in devicetree, assume a 13 second
+        * default.
+        */
+       gwdt->wdd.timeout = 13U;
+       watchdog_init_timeout(&gwdt->wdd, 0, dev);
+
+       reg = readw(gwdt->base + GEMINI_WDCR);
+       if (reg & WDCR_ENABLE) {
+               /* Watchdog was enabled by the bootloader, disable it. */
+               reg &= ~WDCR_ENABLE;
+               writel(reg, gwdt->base + GEMINI_WDCR);
+       }
+
+       ret = devm_request_irq(dev, irq, gemini_wdt_interrupt, 0,
+                              "watchdog bark", gwdt);
+       if (ret)
+               return ret;
+
+       ret = devm_watchdog_register_device(dev, &gwdt->wdd);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to register watchdog\n");
+               return ret;
+       }
+
+       /* Set up platform driver data */
+       platform_set_drvdata(pdev, gwdt);
+       dev_info(dev, "Gemini watchdog driver enabled\n");
+
+       return 0;
+}
+
+static int __maybe_unused gemini_wdt_suspend(struct device *dev)
+{
+       struct gemini_wdt *gwdt = dev_get_drvdata(dev);
+       unsigned int reg;
+
+       reg = readw(gwdt->base + GEMINI_WDCR);
+       reg &= ~WDCR_ENABLE;
+       writel(reg, gwdt->base + GEMINI_WDCR);
+
+       return 0;
+}
+
+static int __maybe_unused gemini_wdt_resume(struct device *dev)
+{
+       struct gemini_wdt *gwdt = dev_get_drvdata(dev);
+       unsigned int reg;
+
+       if (watchdog_active(&gwdt->wdd)) {
+               reg = readw(gwdt->base + GEMINI_WDCR);
+               reg |= WDCR_ENABLE;
+               writel(reg, gwdt->base + GEMINI_WDCR);
+       }
+
+       return 0;
+}
+
+static const struct dev_pm_ops gemini_wdt_dev_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(gemini_wdt_suspend,
+                               gemini_wdt_resume)
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id gemini_wdt_match[] = {
+       { .compatible = "cortina,gemini-watchdog" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, gemini_wdt_match);
+#endif
+
+static struct platform_driver gemini_wdt_driver = {
+       .probe          = gemini_wdt_probe,
+       .driver         = {
+               .name   = "gemini-wdt",
+               .of_match_table = of_match_ptr(gemini_wdt_match),
+               .pm = &gemini_wdt_dev_pm_ops,
+       },
+};
+module_platform_driver(gemini_wdt_driver);
+MODULE_AUTHOR("Linus Walleij");
+MODULE_DESCRIPTION("Watchdog driver for Gemini");
+MODULE_LICENSE("GPL");
index 06fcb6c8c9172379dd320aaaed10f368cdced739..3d0abc0d59b4f176d62ce30ebc236471cde051d2 100644 (file)
 
 /* Address definitions for the TCO */
 /* TCO base address */
-#define TCOBASE                (iTCO_wdt_private.tco_res->start)
+#define TCOBASE(p)     ((p)->tco_res->start)
 /* SMI Control and Enable Register */
-#define SMI_EN         (iTCO_wdt_private.smi_res->start)
-
-#define TCO_RLD                (TCOBASE + 0x00) /* TCO Timer Reload and Curr. Value */
-#define TCOv1_TMR      (TCOBASE + 0x01) /* TCOv1 Timer Initial Value   */
-#define TCO_DAT_IN     (TCOBASE + 0x02) /* TCO Data In Register        */
-#define TCO_DAT_OUT    (TCOBASE + 0x03) /* TCO Data Out Register       */
-#define TCO1_STS       (TCOBASE + 0x04) /* TCO1 Status Register        */
-#define TCO2_STS       (TCOBASE + 0x06) /* TCO2 Status Register        */
-#define TCO1_CNT       (TCOBASE + 0x08) /* TCO1 Control Register       */
-#define TCO2_CNT       (TCOBASE + 0x0a) /* TCO2 Control Register       */
-#define TCOv2_TMR      (TCOBASE + 0x12) /* TCOv2 Timer Initial Value   */
+#define SMI_EN(p)      ((p)->smi_res->start)
+
+#define TCO_RLD(p)     (TCOBASE(p) + 0x00) /* TCO Timer Reload/Curr. Value */
+#define TCOv1_TMR(p)   (TCOBASE(p) + 0x01) /* TCOv1 Timer Initial Value*/
+#define TCO_DAT_IN(p)  (TCOBASE(p) + 0x02) /* TCO Data In Register     */
+#define TCO_DAT_OUT(p) (TCOBASE(p) + 0x03) /* TCO Data Out Register    */
+#define TCO1_STS(p)    (TCOBASE(p) + 0x04) /* TCO1 Status Register     */
+#define TCO2_STS(p)    (TCOBASE(p) + 0x06) /* TCO2 Status Register     */
+#define TCO1_CNT(p)    (TCOBASE(p) + 0x08) /* TCO1 Control Register    */
+#define TCO2_CNT(p)    (TCOBASE(p) + 0x0a) /* TCO2 Control Register    */
+#define TCOv2_TMR(p)   (TCOBASE(p) + 0x12) /* TCOv2 Timer Initial Value*/
 
 /* internal variables */
-static struct {                /* this is private data for the iTCO_wdt device */
+struct iTCO_wdt_private {
+       struct watchdog_device wddev;
+
        /* TCO version/generation */
        unsigned int iTCO_version;
        struct resource *tco_res;
@@ -100,12 +102,11 @@ static struct {           /* this is private data for the iTCO_wdt device */
        unsigned long __iomem *gcs_pmc;
        /* the lock for io operations */
        spinlock_t io_lock;
-       struct platform_device *dev;
        /* the PCI-device */
-       struct pci_dev *pdev;
+       struct pci_dev *pci_dev;
        /* whether or not the watchdog has been suspended */
        bool suspended;
-} iTCO_wdt_private;
+};
 
 /* module parameters */
 #define WATCHDOG_TIMEOUT 30    /* 30 sec default heartbeat */
@@ -135,21 +136,23 @@ MODULE_PARM_DESC(turn_SMI_watchdog_clear_off,
  * every 0.6 seconds.  v3's internal timer is stored as seconds (some
  * datasheets incorrectly state 0.6 seconds).
  */
-static inline unsigned int seconds_to_ticks(int secs)
+static inline unsigned int seconds_to_ticks(struct iTCO_wdt_private *p,
+                                           int secs)
 {
-       return iTCO_wdt_private.iTCO_version == 3 ? secs : (secs * 10) / 6;
+       return p->iTCO_version == 3 ? secs : (secs * 10) / 6;
 }
 
-static inline unsigned int ticks_to_seconds(int ticks)
+static inline unsigned int ticks_to_seconds(struct iTCO_wdt_private *p,
+                                           int ticks)
 {
-       return iTCO_wdt_private.iTCO_version == 3 ? ticks : (ticks * 6) / 10;
+       return p->iTCO_version == 3 ? ticks : (ticks * 6) / 10;
 }
 
-static inline u32 no_reboot_bit(void)
+static inline u32 no_reboot_bit(struct iTCO_wdt_private *p)
 {
        u32 enable_bit;
 
-       switch (iTCO_wdt_private.iTCO_version) {
+       switch (p->iTCO_version) {
        case 5:
        case 3:
                enable_bit = 0x00000010;
@@ -167,40 +170,40 @@ static inline u32 no_reboot_bit(void)
        return enable_bit;
 }
 
-static void iTCO_wdt_set_NO_REBOOT_bit(void)
+static void iTCO_wdt_set_NO_REBOOT_bit(struct iTCO_wdt_private *p)
 {
        u32 val32;
 
        /* Set the NO_REBOOT bit: this disables reboots */
-       if (iTCO_wdt_private.iTCO_version >= 2) {
-               val32 = readl(iTCO_wdt_private.gcs_pmc);
-               val32 |= no_reboot_bit();
-               writel(val32, iTCO_wdt_private.gcs_pmc);
-       } else if (iTCO_wdt_private.iTCO_version == 1) {
-               pci_read_config_dword(iTCO_wdt_private.pdev, 0xd4, &val32);
-               val32 |= no_reboot_bit();
-               pci_write_config_dword(iTCO_wdt_private.pdev, 0xd4, val32);
+       if (p->iTCO_version >= 2) {
+               val32 = readl(p->gcs_pmc);
+               val32 |= no_reboot_bit(p);
+               writel(val32, p->gcs_pmc);
+       } else if (p->iTCO_version == 1) {
+               pci_read_config_dword(p->pci_dev, 0xd4, &val32);
+               val32 |= no_reboot_bit(p);
+               pci_write_config_dword(p->pci_dev, 0xd4, val32);
        }
 }
 
-static int iTCO_wdt_unset_NO_REBOOT_bit(void)
+static int iTCO_wdt_unset_NO_REBOOT_bit(struct iTCO_wdt_private *p)
 {
-       u32 enable_bit = no_reboot_bit();
+       u32 enable_bit = no_reboot_bit(p);
        u32 val32 = 0;
 
        /* Unset the NO_REBOOT bit: this enables reboots */
-       if (iTCO_wdt_private.iTCO_version >= 2) {
-               val32 = readl(iTCO_wdt_private.gcs_pmc);
+       if (p->iTCO_version >= 2) {
+               val32 = readl(p->gcs_pmc);
                val32 &= ~enable_bit;
-               writel(val32, iTCO_wdt_private.gcs_pmc);
+               writel(val32, p->gcs_pmc);
 
-               val32 = readl(iTCO_wdt_private.gcs_pmc);
-       } else if (iTCO_wdt_private.iTCO_version == 1) {
-               pci_read_config_dword(iTCO_wdt_private.pdev, 0xd4, &val32);
+               val32 = readl(p->gcs_pmc);
+       } else if (p->iTCO_version == 1) {
+               pci_read_config_dword(p->pci_dev, 0xd4, &val32);
                val32 &= ~enable_bit;
-               pci_write_config_dword(iTCO_wdt_private.pdev, 0xd4, val32);
+               pci_write_config_dword(p->pci_dev, 0xd4, val32);
 
-               pci_read_config_dword(iTCO_wdt_private.pdev, 0xd4, &val32);
+               pci_read_config_dword(p->pci_dev, 0xd4, &val32);
        }
 
        if (val32 & enable_bit)
@@ -211,32 +214,33 @@ static int iTCO_wdt_unset_NO_REBOOT_bit(void)
 
 static int iTCO_wdt_start(struct watchdog_device *wd_dev)
 {
+       struct iTCO_wdt_private *p = watchdog_get_drvdata(wd_dev);
        unsigned int val;
 
-       spin_lock(&iTCO_wdt_private.io_lock);
+       spin_lock(&p->io_lock);
 
-       iTCO_vendor_pre_start(iTCO_wdt_private.smi_res, wd_dev->timeout);
+       iTCO_vendor_pre_start(p->smi_res, wd_dev->timeout);
 
        /* disable chipset's NO_REBOOT bit */
-       if (iTCO_wdt_unset_NO_REBOOT_bit()) {
-               spin_unlock(&iTCO_wdt_private.io_lock);
+       if (iTCO_wdt_unset_NO_REBOOT_bit(p)) {
+               spin_unlock(&p->io_lock);
                pr_err("failed to reset NO_REBOOT flag, reboot disabled by hardware/BIOS\n");
                return -EIO;
        }
 
        /* Force the timer to its reload value by writing to the TCO_RLD
           register */
-       if (iTCO_wdt_private.iTCO_version >= 2)
-               outw(0x01, TCO_RLD);
-       else if (iTCO_wdt_private.iTCO_version == 1)
-               outb(0x01, TCO_RLD);
+       if (p->iTCO_version >= 2)
+               outw(0x01, TCO_RLD(p));
+       else if (p->iTCO_version == 1)
+               outb(0x01, TCO_RLD(p));
 
        /* Bit 11: TCO Timer Halt -> 0 = The TCO timer is enabled to count */
-       val = inw(TCO1_CNT);
+       val = inw(TCO1_CNT(p));
        val &= 0xf7ff;
-       outw(val, TCO1_CNT);
-       val = inw(TCO1_CNT);
-       spin_unlock(&iTCO_wdt_private.io_lock);
+       outw(val, TCO1_CNT(p));
+       val = inw(TCO1_CNT(p));
+       spin_unlock(&p->io_lock);
 
        if (val & 0x0800)
                return -1;
@@ -245,22 +249,23 @@ static int iTCO_wdt_start(struct watchdog_device *wd_dev)
 
 static int iTCO_wdt_stop(struct watchdog_device *wd_dev)
 {
+       struct iTCO_wdt_private *p = watchdog_get_drvdata(wd_dev);
        unsigned int val;
 
-       spin_lock(&iTCO_wdt_private.io_lock);
+       spin_lock(&p->io_lock);
 
-       iTCO_vendor_pre_stop(iTCO_wdt_private.smi_res);
+       iTCO_vendor_pre_stop(p->smi_res);
 
        /* Bit 11: TCO Timer Halt -> 1 = The TCO timer is disabled */
-       val = inw(TCO1_CNT);
+       val = inw(TCO1_CNT(p));
        val |= 0x0800;
-       outw(val, TCO1_CNT);
-       val = inw(TCO1_CNT);
+       outw(val, TCO1_CNT(p));
+       val = inw(TCO1_CNT(p));
 
        /* Set the NO_REBOOT bit to prevent later reboots, just for sure */
-       iTCO_wdt_set_NO_REBOOT_bit();
+       iTCO_wdt_set_NO_REBOOT_bit(p);
 
-       spin_unlock(&iTCO_wdt_private.io_lock);
+       spin_unlock(&p->io_lock);
 
        if ((val & 0x0800) == 0)
                return -1;
@@ -269,67 +274,70 @@ static int iTCO_wdt_stop(struct watchdog_device *wd_dev)
 
 static int iTCO_wdt_ping(struct watchdog_device *wd_dev)
 {
-       spin_lock(&iTCO_wdt_private.io_lock);
+       struct iTCO_wdt_private *p = watchdog_get_drvdata(wd_dev);
 
-       iTCO_vendor_pre_keepalive(iTCO_wdt_private.smi_res, wd_dev->timeout);
+       spin_lock(&p->io_lock);
+
+       iTCO_vendor_pre_keepalive(p->smi_res, wd_dev->timeout);
 
        /* Reload the timer by writing to the TCO Timer Counter register */
-       if (iTCO_wdt_private.iTCO_version >= 2) {
-               outw(0x01, TCO_RLD);
-       } else if (iTCO_wdt_private.iTCO_version == 1) {
+       if (p->iTCO_version >= 2) {
+               outw(0x01, TCO_RLD(p));
+       } else if (p->iTCO_version == 1) {
                /* Reset the timeout status bit so that the timer
                 * needs to count down twice again before rebooting */
-               outw(0x0008, TCO1_STS); /* write 1 to clear bit */
+               outw(0x0008, TCO1_STS(p));      /* write 1 to clear bit */
 
-               outb(0x01, TCO_RLD);
+               outb(0x01, TCO_RLD(p));
        }
 
-       spin_unlock(&iTCO_wdt_private.io_lock);
+       spin_unlock(&p->io_lock);
        return 0;
 }
 
 static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t)
 {
+       struct iTCO_wdt_private *p = watchdog_get_drvdata(wd_dev);
        unsigned int val16;
        unsigned char val8;
        unsigned int tmrval;
 
-       tmrval = seconds_to_ticks(t);
+       tmrval = seconds_to_ticks(p, t);
 
        /* For TCO v1 the timer counts down twice before rebooting */
-       if (iTCO_wdt_private.iTCO_version == 1)
+       if (p->iTCO_version == 1)
                tmrval /= 2;
 
        /* from the specs: */
        /* "Values of 0h-3h are ignored and should not be attempted" */
        if (tmrval < 0x04)
                return -EINVAL;
-       if (((iTCO_wdt_private.iTCO_version >= 2) && (tmrval > 0x3ff)) ||
-           ((iTCO_wdt_private.iTCO_version == 1) && (tmrval > 0x03f)))
+       if ((p->iTCO_version >= 2 && tmrval > 0x3ff) ||
+           (p->iTCO_version == 1 && tmrval > 0x03f))
                return -EINVAL;
 
        iTCO_vendor_pre_set_heartbeat(tmrval);
 
        /* Write new heartbeat to watchdog */
-       if (iTCO_wdt_private.iTCO_version >= 2) {
-               spin_lock(&iTCO_wdt_private.io_lock);
-               val16 = inw(TCOv2_TMR);
+       if (p->iTCO_version >= 2) {
+               spin_lock(&p->io_lock);
+               val16 = inw(TCOv2_TMR(p));
                val16 &= 0xfc00;
                val16 |= tmrval;
-               outw(val16, TCOv2_TMR);
-               val16 = inw(TCOv2_TMR);
-               spin_unlock(&iTCO_wdt_private.io_lock);
+               outw(val16, TCOv2_TMR(p));
+               val16 = inw(TCOv2_TMR(p));
+               spin_unlock(&p->io_lock);
 
                if ((val16 & 0x3ff) != tmrval)
                        return -EINVAL;
-       } else if (iTCO_wdt_private.iTCO_version == 1) {
-               spin_lock(&iTCO_wdt_private.io_lock);
-               val8 = inb(TCOv1_TMR);
+       } else if (p->iTCO_version == 1) {
+               spin_lock(&p->io_lock);
+               val8 = inb(TCOv1_TMR(p));
                val8 &= 0xc0;
                val8 |= (tmrval & 0xff);
-               outb(val8, TCOv1_TMR);
-               val8 = inb(TCOv1_TMR);
-               spin_unlock(&iTCO_wdt_private.io_lock);
+               outb(val8, TCOv1_TMR(p));
+               val8 = inb(TCOv1_TMR(p));
+               spin_unlock(&p->io_lock);
 
                if ((val8 & 0x3f) != tmrval)
                        return -EINVAL;
@@ -341,27 +349,28 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t)
 
 static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev)
 {
+       struct iTCO_wdt_private *p = watchdog_get_drvdata(wd_dev);
        unsigned int val16;
        unsigned char val8;
        unsigned int time_left = 0;
 
        /* read the TCO Timer */
-       if (iTCO_wdt_private.iTCO_version >= 2) {
-               spin_lock(&iTCO_wdt_private.io_lock);
-               val16 = inw(TCO_RLD);
+       if (p->iTCO_version >= 2) {
+               spin_lock(&p->io_lock);
+               val16 = inw(TCO_RLD(p));
                val16 &= 0x3ff;
-               spin_unlock(&iTCO_wdt_private.io_lock);
+               spin_unlock(&p->io_lock);
 
-               time_left = ticks_to_seconds(val16);
-       } else if (iTCO_wdt_private.iTCO_version == 1) {
-               spin_lock(&iTCO_wdt_private.io_lock);
-               val8 = inb(TCO_RLD);
+               time_left = ticks_to_seconds(p, val16);
+       } else if (p->iTCO_version == 1) {
+               spin_lock(&p->io_lock);
+               val8 = inb(TCO_RLD(p));
                val8 &= 0x3f;
-               if (!(inw(TCO1_STS) & 0x0008))
-                       val8 += (inb(TCOv1_TMR) & 0x3f);
-               spin_unlock(&iTCO_wdt_private.io_lock);
+               if (!(inw(TCO1_STS(p)) & 0x0008))
+                       val8 += (inb(TCOv1_TMR(p)) & 0x3f);
+               spin_unlock(&p->io_lock);
 
-               time_left = ticks_to_seconds(val8);
+               time_left = ticks_to_seconds(p, val8);
        }
        return time_left;
 }
@@ -387,209 +396,152 @@ static const struct watchdog_ops iTCO_wdt_ops = {
        .get_timeleft =         iTCO_wdt_get_timeleft,
 };
 
-static struct watchdog_device iTCO_wdt_watchdog_dev = {
-       .info =         &ident,
-       .ops =          &iTCO_wdt_ops,
-};
-
 /*
  *     Init & exit routines
  */
 
-static void iTCO_wdt_cleanup(void)
-{
-       /* Stop the timer before we leave */
-       if (!nowayout)
-               iTCO_wdt_stop(&iTCO_wdt_watchdog_dev);
-
-       /* Deregister */
-       watchdog_unregister_device(&iTCO_wdt_watchdog_dev);
-
-       /* release resources */
-       release_region(iTCO_wdt_private.tco_res->start,
-                       resource_size(iTCO_wdt_private.tco_res));
-       release_region(iTCO_wdt_private.smi_res->start,
-                       resource_size(iTCO_wdt_private.smi_res));
-       if (iTCO_wdt_private.iTCO_version >= 2) {
-               iounmap(iTCO_wdt_private.gcs_pmc);
-               release_mem_region(iTCO_wdt_private.gcs_pmc_res->start,
-                               resource_size(iTCO_wdt_private.gcs_pmc_res));
-       }
-
-       iTCO_wdt_private.tco_res = NULL;
-       iTCO_wdt_private.smi_res = NULL;
-       iTCO_wdt_private.gcs_pmc_res = NULL;
-       iTCO_wdt_private.gcs_pmc = NULL;
-}
-
-static int iTCO_wdt_probe(struct platform_device *dev)
+static int iTCO_wdt_probe(struct platform_device *pdev)
 {
-       int ret = -ENODEV;
+       struct device *dev = &pdev->dev;
+       struct itco_wdt_platform_data *pdata = dev_get_platdata(dev);
+       struct iTCO_wdt_private *p;
        unsigned long val32;
-       struct itco_wdt_platform_data *pdata = dev_get_platdata(&dev->dev);
+       int ret;
 
        if (!pdata)
-               goto out;
+               return -ENODEV;
+
+       p = devm_kzalloc(dev, sizeof(*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
 
-       spin_lock_init(&iTCO_wdt_private.io_lock);
+       spin_lock_init(&p->io_lock);
 
-       iTCO_wdt_private.tco_res =
-               platform_get_resource(dev, IORESOURCE_IO, ICH_RES_IO_TCO);
-       if (!iTCO_wdt_private.tco_res)
-               goto out;
+       p->tco_res = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_IO_TCO);
+       if (!p->tco_res)
+               return -ENODEV;
 
-       iTCO_wdt_private.smi_res =
-               platform_get_resource(dev, IORESOURCE_IO, ICH_RES_IO_SMI);
-       if (!iTCO_wdt_private.smi_res)
-               goto out;
+       p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_IO_SMI);
+       if (!p->smi_res)
+               return -ENODEV;
 
-       iTCO_wdt_private.iTCO_version = pdata->version;
-       iTCO_wdt_private.dev = dev;
-       iTCO_wdt_private.pdev = to_pci_dev(dev->dev.parent);
+       p->iTCO_version = pdata->version;
+       p->pci_dev = to_pci_dev(dev->parent);
 
        /*
         * Get the Memory-Mapped GCS or PMC register, we need it for the
         * NO_REBOOT flag (TCO v2 and v3).
         */
-       if (iTCO_wdt_private.iTCO_version >= 2) {
-               iTCO_wdt_private.gcs_pmc_res = platform_get_resource(dev,
-                                                       IORESOURCE_MEM,
-                                                       ICH_RES_MEM_GCS_PMC);
-
-               if (!iTCO_wdt_private.gcs_pmc_res)
-                       goto out;
-
-               if (!request_mem_region(iTCO_wdt_private.gcs_pmc_res->start,
-                       resource_size(iTCO_wdt_private.gcs_pmc_res), dev->name)) {
-                       ret = -EBUSY;
-                       goto out;
-               }
-               iTCO_wdt_private.gcs_pmc = ioremap(iTCO_wdt_private.gcs_pmc_res->start,
-                       resource_size(iTCO_wdt_private.gcs_pmc_res));
-               if (!iTCO_wdt_private.gcs_pmc) {
-                       ret = -EIO;
-                       goto unreg_gcs_pmc;
-               }
+       if (p->iTCO_version >= 2) {
+               p->gcs_pmc_res = platform_get_resource(pdev,
+                                                      IORESOURCE_MEM,
+                                                      ICH_RES_MEM_GCS_PMC);
+               p->gcs_pmc = devm_ioremap_resource(dev, p->gcs_pmc_res);
+               if (IS_ERR(p->gcs_pmc))
+                       return PTR_ERR(p->gcs_pmc);
        }
 
        /* Check chipset's NO_REBOOT bit */
-       if (iTCO_wdt_unset_NO_REBOOT_bit() && iTCO_vendor_check_noreboot_on()) {
+       if (iTCO_wdt_unset_NO_REBOOT_bit(p) &&
+           iTCO_vendor_check_noreboot_on()) {
                pr_info("unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n");
-               ret = -ENODEV;  /* Cannot reset NO_REBOOT bit */
-               goto unmap_gcs_pmc;
+               return -ENODEV; /* Cannot reset NO_REBOOT bit */
        }
 
        /* Set the NO_REBOOT bit to prevent later reboots, just for sure */
-       iTCO_wdt_set_NO_REBOOT_bit();
+       iTCO_wdt_set_NO_REBOOT_bit(p);
 
        /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
-       if (!request_region(iTCO_wdt_private.smi_res->start,
-                       resource_size(iTCO_wdt_private.smi_res), dev->name)) {
+       if (!devm_request_region(dev, p->smi_res->start,
+                                resource_size(p->smi_res),
+                                pdev->name)) {
                pr_err("I/O address 0x%04llx already in use, device disabled\n",
-                      (u64)SMI_EN);
-               ret = -EBUSY;
-               goto unmap_gcs_pmc;
+                      (u64)SMI_EN(p));
+               return -EBUSY;
        }
-       if (turn_SMI_watchdog_clear_off >= iTCO_wdt_private.iTCO_version) {
+       if (turn_SMI_watchdog_clear_off >= p->iTCO_version) {
                /*
                 * Bit 13: TCO_EN -> 0
                 * Disables TCO logic generating an SMI#
                 */
-               val32 = inl(SMI_EN);
+               val32 = inl(SMI_EN(p));
                val32 &= 0xffffdfff;    /* Turn off SMI clearing watchdog */
-               outl(val32, SMI_EN);
+               outl(val32, SMI_EN(p));
        }
 
-       if (!request_region(iTCO_wdt_private.tco_res->start,
-                       resource_size(iTCO_wdt_private.tco_res), dev->name)) {
+       if (!devm_request_region(dev, p->tco_res->start,
+                                resource_size(p->tco_res),
+                                pdev->name)) {
                pr_err("I/O address 0x%04llx already in use, device disabled\n",
-                      (u64)TCOBASE);
-               ret = -EBUSY;
-               goto unreg_smi;
+                      (u64)TCOBASE(p));
+               return -EBUSY;
        }
 
        pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n",
-               pdata->name, pdata->version, (u64)TCOBASE);
+               pdata->name, pdata->version, (u64)TCOBASE(p));
 
        /* Clear out the (probably old) status */
-       switch (iTCO_wdt_private.iTCO_version) {
+       switch (p->iTCO_version) {
        case 5:
        case 4:
-               outw(0x0008, TCO1_STS); /* Clear the Time Out Status bit */
-               outw(0x0002, TCO2_STS); /* Clear SECOND_TO_STS bit */
+               outw(0x0008, TCO1_STS(p)); /* Clear the Time Out Status bit */
+               outw(0x0002, TCO2_STS(p)); /* Clear SECOND_TO_STS bit */
                break;
        case 3:
-               outl(0x20008, TCO1_STS);
+               outl(0x20008, TCO1_STS(p));
                break;
        case 2:
        case 1:
        default:
-               outw(0x0008, TCO1_STS); /* Clear the Time Out Status bit */
-               outw(0x0002, TCO2_STS); /* Clear SECOND_TO_STS bit */
-               outw(0x0004, TCO2_STS); /* Clear BOOT_STS bit */
+               outw(0x0008, TCO1_STS(p)); /* Clear the Time Out Status bit */
+               outw(0x0002, TCO2_STS(p)); /* Clear SECOND_TO_STS bit */
+               outw(0x0004, TCO2_STS(p)); /* Clear BOOT_STS bit */
                break;
        }
 
-       iTCO_wdt_watchdog_dev.bootstatus = 0;
-       iTCO_wdt_watchdog_dev.timeout = WATCHDOG_TIMEOUT;
-       watchdog_set_nowayout(&iTCO_wdt_watchdog_dev, nowayout);
-       iTCO_wdt_watchdog_dev.parent = &dev->dev;
+       p->wddev.info = &ident,
+       p->wddev.ops = &iTCO_wdt_ops,
+       p->wddev.bootstatus = 0;
+       p->wddev.timeout = WATCHDOG_TIMEOUT;
+       watchdog_set_nowayout(&p->wddev, nowayout);
+       p->wddev.parent = dev;
+
+       watchdog_set_drvdata(&p->wddev, p);
+       platform_set_drvdata(pdev, p);
 
        /* Make sure the watchdog is not running */
-       iTCO_wdt_stop(&iTCO_wdt_watchdog_dev);
+       iTCO_wdt_stop(&p->wddev);
 
        /* Check that the heartbeat value is within it's range;
           if not reset to the default */
-       if (iTCO_wdt_set_timeout(&iTCO_wdt_watchdog_dev, heartbeat)) {
-               iTCO_wdt_set_timeout(&iTCO_wdt_watchdog_dev, WATCHDOG_TIMEOUT);
+       if (iTCO_wdt_set_timeout(&p->wddev, heartbeat)) {
+               iTCO_wdt_set_timeout(&p->wddev, WATCHDOG_TIMEOUT);
                pr_info("timeout value out of range, using %d\n",
                        WATCHDOG_TIMEOUT);
        }
 
-       ret = watchdog_register_device(&iTCO_wdt_watchdog_dev);
+       watchdog_stop_on_reboot(&p->wddev);
+       ret = devm_watchdog_register_device(dev, &p->wddev);
        if (ret != 0) {
                pr_err("cannot register watchdog device (err=%d)\n", ret);
-               goto unreg_tco;
+               return ret;
        }
 
        pr_info("initialized. heartbeat=%d sec (nowayout=%d)\n",
                heartbeat, nowayout);
 
        return 0;
-
-unreg_tco:
-       release_region(iTCO_wdt_private.tco_res->start,
-                       resource_size(iTCO_wdt_private.tco_res));
-unreg_smi:
-       release_region(iTCO_wdt_private.smi_res->start,
-                       resource_size(iTCO_wdt_private.smi_res));
-unmap_gcs_pmc:
-       if (iTCO_wdt_private.iTCO_version >= 2)
-               iounmap(iTCO_wdt_private.gcs_pmc);
-unreg_gcs_pmc:
-       if (iTCO_wdt_private.iTCO_version >= 2)
-               release_mem_region(iTCO_wdt_private.gcs_pmc_res->start,
-                               resource_size(iTCO_wdt_private.gcs_pmc_res));
-out:
-       iTCO_wdt_private.tco_res = NULL;
-       iTCO_wdt_private.smi_res = NULL;
-       iTCO_wdt_private.gcs_pmc_res = NULL;
-       iTCO_wdt_private.gcs_pmc = NULL;
-
-       return ret;
 }
 
-static int iTCO_wdt_remove(struct platform_device *dev)
+static int iTCO_wdt_remove(struct platform_device *pdev)
 {
-       if (iTCO_wdt_private.tco_res || iTCO_wdt_private.smi_res)
-               iTCO_wdt_cleanup();
+       struct iTCO_wdt_private *p = platform_get_drvdata(pdev);
 
-       return 0;
-}
+       /* Stop the timer before we leave */
+       if (!nowayout)
+               iTCO_wdt_stop(&p->wddev);
 
-static void iTCO_wdt_shutdown(struct platform_device *dev)
-{
-       iTCO_wdt_stop(NULL);
+       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -610,21 +562,24 @@ static inline bool need_suspend(void) { return true; }
 
 static int iTCO_wdt_suspend_noirq(struct device *dev)
 {
+       struct iTCO_wdt_private *p = dev_get_drvdata(dev);
        int ret = 0;
 
-       iTCO_wdt_private.suspended = false;
-       if (watchdog_active(&iTCO_wdt_watchdog_dev) && need_suspend()) {
-               ret = iTCO_wdt_stop(&iTCO_wdt_watchdog_dev);
+       p->suspended = false;
+       if (watchdog_active(&p->wddev) && need_suspend()) {
+               ret = iTCO_wdt_stop(&p->wddev);
                if (!ret)
-                       iTCO_wdt_private.suspended = true;
+                       p->suspended = true;
        }
        return ret;
 }
 
 static int iTCO_wdt_resume_noirq(struct device *dev)
 {
-       if (iTCO_wdt_private.suspended)
-               iTCO_wdt_start(&iTCO_wdt_watchdog_dev);
+       struct iTCO_wdt_private *p = dev_get_drvdata(dev);
+
+       if (p->suspended)
+               iTCO_wdt_start(&p->wddev);
 
        return 0;
 }
@@ -642,7 +597,6 @@ static const struct dev_pm_ops iTCO_wdt_pm = {
 static struct platform_driver iTCO_wdt_driver = {
        .probe          = iTCO_wdt_probe,
        .remove         = iTCO_wdt_remove,
-       .shutdown       = iTCO_wdt_shutdown,
        .driver         = {
                .name   = DRV_NAME,
                .pm     = ITCO_WDT_PM_OPS,
@@ -651,15 +605,9 @@ static struct platform_driver iTCO_wdt_driver = {
 
 static int __init iTCO_wdt_init_module(void)
 {
-       int err;
-
        pr_info("Intel TCO WatchDog Timer Driver v%s\n", DRV_VERSION);
 
-       err = platform_driver_register(&iTCO_wdt_driver);
-       if (err)
-               return err;
-
-       return 0;
+       return platform_driver_register(&iTCO_wdt_driver);
 }
 
 static void __exit iTCO_wdt_cleanup_module(void)
index 516fbef00856edeceb59fba31968c1c2328bd24e..6ed39dee995f4f713845ce4bd60ac44d6e9d64fd 100644 (file)
@@ -161,7 +161,7 @@ static int pdc_wdt_restart(struct watchdog_device *wdt_dev,
        return 0;
 }
 
-static struct watchdog_info pdc_wdt_info = {
+static const struct watchdog_info pdc_wdt_info = {
        .identity       = "IMG PDC Watchdog",
        .options        = WDIOF_SETTIMEOUT |
                          WDIOF_KEEPALIVEPING |
index a4b729259b122fe4c18a9f4fcd6080446e635c34..45e4d02221b5849e1a3f61fa83d85b16bb8382d6 100644 (file)
@@ -137,7 +137,6 @@ static int mid_wdt_probe(struct platform_device *pdev)
        wdt_dev->parent = &pdev->dev;
 
        watchdog_set_drvdata(wdt_dev, &pdev->dev);
-       platform_set_drvdata(pdev, wdt_dev);
 
        ret = devm_request_irq(&pdev->dev, pdata->irq, mid_wdt_irq,
                               IRQF_SHARED | IRQF_NO_SUSPEND, "watchdog",
@@ -151,7 +150,7 @@ static int mid_wdt_probe(struct platform_device *pdev)
        /* Make sure the watchdog is not running */
        wdt_stop(wdt_dev);
 
-       ret = watchdog_register_device(wdt_dev);
+       ret = devm_watchdog_register_device(&pdev->dev, wdt_dev);
        if (ret) {
                dev_err(&pdev->dev, "error registering watchdog device\n");
                return ret;
@@ -162,16 +161,8 @@ static int mid_wdt_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int mid_wdt_remove(struct platform_device *pdev)
-{
-       struct watchdog_device *wd = platform_get_drvdata(pdev);
-       watchdog_unregister_device(wd);
-       return 0;
-}
-
 static struct platform_driver mid_wdt_driver = {
        .probe          = mid_wdt_probe,
-       .remove         = mid_wdt_remove,
        .driver         = {
                .name   = "intel_mid_wdt",
        },
index 8e302d0e346c9b6a617fd0c1f3a64c65f0a084df..73c46b3a09ab3ec80db0bfb7577fe289fc811b51 100644 (file)
@@ -422,7 +422,7 @@ static int kempld_wdt_probe_stages(struct watchdog_device *wdd)
        return 0;
 }
 
-static struct watchdog_info kempld_wdt_info = {
+static const struct watchdog_info kempld_wdt_info = {
        .identity       = "KEMPLD Watchdog",
        .options        = WDIOF_SETTIMEOUT |
                        WDIOF_KEEPALIVEPING |
index 582f2fa1b8d91709dde248463f20edabb5850461..e0823677d8c17a1b4733d1c111c3415f01b0ca72 100644 (file)
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  *  Based on EP93xx wdt driver
  */
 
@@ -240,6 +240,6 @@ module_platform_driver(ltq_wdt_driver);
 
 module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started");
-MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
+MODULE_AUTHOR("John Crispin <john@phrozen.org>");
 MODULE_DESCRIPTION("Lantiq SoC Watchdog");
 MODULE_LICENSE("GPL");
index fd171e6caa167457777ced3220d6bd277f0dde34..3b8bb59adf027bddf5fde1013c3621f4b480e9a3 100644 (file)
@@ -181,7 +181,7 @@ static int lpc18xx_wdt_restart(struct watchdog_device *wdt_dev,
        return 0;
 }
 
-static struct watchdog_info lpc18xx_wdt_info = {
+static const struct watchdog_info lpc18xx_wdt_info = {
        .identity       = "NXP LPC18xx Watchdog",
        .options        = WDIOF_SETTIMEOUT |
                          WDIOF_KEEPALIVEPING |
index af6a7c489f085aaa31e2d1361f059145f2116ba7..045201a6fdb38dfa8bc3bc75978455da2e8101a9 100644 (file)
@@ -212,33 +212,14 @@ static int a21_wdt_probe(struct platform_device *pdev)
        drv->wdt = a21_wdt;
        dev_set_drvdata(&pdev->dev, drv);
 
-       ret = watchdog_register_device(&a21_wdt);
+       ret = devm_watchdog_register_device(&pdev->dev, &a21_wdt);
        if (ret) {
                dev_err(&pdev->dev, "Cannot register watchdog device\n");
-               goto err_register_wd;
+               return ret;
        }
 
        dev_info(&pdev->dev, "MEN A21 watchdog timer driver enabled\n");
 
-       return 0;
-
-err_register_wd:
-       mutex_destroy(&drv->lock);
-
-       return ret;
-}
-
-static int a21_wdt_remove(struct platform_device *pdev)
-{
-       struct a21_wdt_drv *drv = dev_get_drvdata(&pdev->dev);
-
-       dev_warn(&pdev->dev,
-               "Unregistering A21 watchdog driver, board may reboot\n");
-
-       watchdog_unregister_device(&drv->wdt);
-
-       mutex_destroy(&drv->lock);
-
        return 0;
 }
 
@@ -257,7 +238,6 @@ MODULE_DEVICE_TABLE(of, a21_wdt_ids);
 
 static struct platform_driver a21_wdt_driver = {
        .probe = a21_wdt_probe,
-       .remove = a21_wdt_remove,
        .shutdown = a21_wdt_shutdown,
        .driver = {
                .name = "a21-watchdog",
index 56ea1caf71c35d3bfde164dacdb8775f4fba98bb..491b9bf13d849d2f2bc578538e113073b757f70f 100644 (file)
@@ -201,38 +201,19 @@ static int meson_wdt_probe(struct platform_device *pdev)
 
        meson_wdt_stop(&meson_wdt->wdt_dev);
 
-       err = watchdog_register_device(&meson_wdt->wdt_dev);
+       watchdog_stop_on_reboot(&meson_wdt->wdt_dev);
+       err = devm_watchdog_register_device(&pdev->dev, &meson_wdt->wdt_dev);
        if (err)
                return err;
 
-       platform_set_drvdata(pdev, meson_wdt);
-
        dev_info(&pdev->dev, "Watchdog enabled (timeout=%d sec, nowayout=%d)",
                 meson_wdt->wdt_dev.timeout, nowayout);
 
        return 0;
 }
 
-static int meson_wdt_remove(struct platform_device *pdev)
-{
-       struct meson_wdt_dev *meson_wdt = platform_get_drvdata(pdev);
-
-       watchdog_unregister_device(&meson_wdt->wdt_dev);
-
-       return 0;
-}
-
-static void meson_wdt_shutdown(struct platform_device *pdev)
-{
-       struct meson_wdt_dev *meson_wdt = platform_get_drvdata(pdev);
-
-       meson_wdt_stop(&meson_wdt->wdt_dev);
-}
-
 static struct platform_driver meson_wdt_driver = {
        .probe          = meson_wdt_probe,
-       .remove         = meson_wdt_remove,
-       .shutdown       = meson_wdt_shutdown,
        .driver         = {
                .name           = DRV_NAME,
                .of_match_table = meson_wdt_dt_ids,
index d5735c12067d609465c556e17422e86b08ffe140..48a06067075d56c2f90358527cdd351575992ea1 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Ralink MT7621/MT7628 built-in hardware watchdog timer
  *
- * Copyright (C) 2014 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2014 John Crispin <john@phrozen.org>
  *
  * This driver was based on: drivers/watchdog/rt2880_wdt.c
  *
@@ -110,7 +110,7 @@ static struct watchdog_info mt7621_wdt_info = {
        .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
 };
 
-static struct watchdog_ops mt7621_wdt_ops = {
+static const struct watchdog_ops mt7621_wdt_ops = {
        .owner = THIS_MODULE,
        .start = mt7621_wdt_start,
        .stop = mt7621_wdt_stop,
@@ -181,5 +181,5 @@ static struct platform_driver mt7621_wdt_driver = {
 module_platform_driver(mt7621_wdt_driver);
 
 MODULE_DESCRIPTION("MediaTek MT762x hardware watchdog driver");
-MODULE_AUTHOR("John Crispin <blogic@openwrt.org");
+MODULE_AUTHOR("John Crispin <john@phrozen.org");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/nic7018_wdt.c b/drivers/watchdog/nic7018_wdt.c
new file mode 100644 (file)
index 0000000..dcd2656
--- /dev/null
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2016 National Instruments Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+#define LOCK                   0xA5
+#define UNLOCK                 0x5A
+
+#define WDT_CTRL_RESET_EN      BIT(7)
+#define WDT_RELOAD_PORT_EN     BIT(7)
+
+#define WDT_CTRL               1
+#define WDT_RELOAD_CTRL                2
+#define WDT_PRESET_PRESCALE    4
+#define WDT_REG_LOCK           5
+#define WDT_COUNT              6
+#define WDT_RELOAD_PORT                7
+
+#define WDT_MIN_TIMEOUT                1
+#define WDT_MAX_TIMEOUT                464
+#define WDT_DEFAULT_TIMEOUT    80
+
+#define WDT_MAX_COUNTER                15
+
+static unsigned int timeout;
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout,
+                "Watchdog timeout in seconds. (default="
+                __MODULE_STRING(WDT_DEFAULT_TIMEOUT) ")");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout,
+                "Watchdog cannot be stopped once started. (default="
+                __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+struct nic7018_wdt {
+       u16 io_base;
+       u32 period;
+       struct watchdog_device wdd;
+};
+
+struct nic7018_config {
+       u32 period;
+       u8 divider;
+};
+
+static const struct nic7018_config nic7018_configs[] = {
+       {  2, 4 },
+       { 32, 5 },
+};
+
+static inline u32 nic7018_timeout(u32 period, u8 counter)
+{
+       return period * counter - period / 2;
+}
+
+static const struct nic7018_config *nic7018_get_config(u32 timeout,
+                                                      u8 *counter)
+{
+       const struct nic7018_config *config;
+       u8 count;
+
+       if (timeout < 30 && timeout != 16) {
+               config = &nic7018_configs[0];
+               count = timeout / 2 + 1;
+       } else {
+               config = &nic7018_configs[1];
+               count = DIV_ROUND_UP(timeout + 16, 32);
+
+               if (count > WDT_MAX_COUNTER)
+                       count = WDT_MAX_COUNTER;
+       }
+       *counter = count;
+       return config;
+}
+
+static int nic7018_set_timeout(struct watchdog_device *wdd,
+                              unsigned int timeout)
+{
+       struct nic7018_wdt *wdt = watchdog_get_drvdata(wdd);
+       const struct nic7018_config *config;
+       u8 counter;
+
+       config = nic7018_get_config(timeout, &counter);
+
+       outb(counter << 4 | config->divider,
+            wdt->io_base + WDT_PRESET_PRESCALE);
+
+       wdd->timeout = nic7018_timeout(config->period, counter);
+       wdt->period = config->period;
+
+       return 0;
+}
+
+static int nic7018_start(struct watchdog_device *wdd)
+{
+       struct nic7018_wdt *wdt = watchdog_get_drvdata(wdd);
+       u8 control;
+
+       nic7018_set_timeout(wdd, wdd->timeout);
+
+       control = inb(wdt->io_base + WDT_RELOAD_CTRL);
+       outb(control | WDT_RELOAD_PORT_EN, wdt->io_base + WDT_RELOAD_CTRL);
+
+       outb(1, wdt->io_base + WDT_RELOAD_PORT);
+
+       control = inb(wdt->io_base + WDT_CTRL);
+       outb(control | WDT_CTRL_RESET_EN, wdt->io_base + WDT_CTRL);
+
+       return 0;
+}
+
+static int nic7018_stop(struct watchdog_device *wdd)
+{
+       struct nic7018_wdt *wdt = watchdog_get_drvdata(wdd);
+
+       outb(0, wdt->io_base + WDT_CTRL);
+       outb(0, wdt->io_base + WDT_RELOAD_CTRL);
+       outb(0xF0, wdt->io_base + WDT_PRESET_PRESCALE);
+
+       return 0;
+}
+
+static int nic7018_ping(struct watchdog_device *wdd)
+{
+       struct nic7018_wdt *wdt = watchdog_get_drvdata(wdd);
+
+       outb(1, wdt->io_base + WDT_RELOAD_PORT);
+
+       return 0;
+}
+
+static unsigned int nic7018_get_timeleft(struct watchdog_device *wdd)
+{
+       struct nic7018_wdt *wdt = watchdog_get_drvdata(wdd);
+       u8 count;
+
+       count = inb(wdt->io_base + WDT_COUNT) & 0xF;
+       if (!count)
+               return 0;
+
+       return nic7018_timeout(wdt->period, count);
+}
+
+static const struct watchdog_info nic7018_wdd_info = {
+       .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+       .identity = "NIC7018 Watchdog",
+};
+
+static const struct watchdog_ops nic7018_wdd_ops = {
+       .owner = THIS_MODULE,
+       .start = nic7018_start,
+       .stop = nic7018_stop,
+       .ping = nic7018_ping,
+       .set_timeout = nic7018_set_timeout,
+       .get_timeleft = nic7018_get_timeleft,
+};
+
+static int nic7018_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct watchdog_device *wdd;
+       struct nic7018_wdt *wdt;
+       struct resource *io_rc;
+       int ret;
+
+       wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
+       if (!wdt)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, wdt);
+
+       io_rc = platform_get_resource(pdev, IORESOURCE_IO, 0);
+       if (!io_rc) {
+               dev_err(dev, "missing IO resources\n");
+               return -EINVAL;
+       }
+
+       if (!devm_request_region(dev, io_rc->start, resource_size(io_rc),
+                                KBUILD_MODNAME)) {
+               dev_err(dev, "failed to get IO region\n");
+               return -EBUSY;
+       }
+
+       wdt->io_base = io_rc->start;
+       wdd = &wdt->wdd;
+       wdd->info = &nic7018_wdd_info;
+       wdd->ops = &nic7018_wdd_ops;
+       wdd->min_timeout = WDT_MIN_TIMEOUT;
+       wdd->max_timeout = WDT_MAX_TIMEOUT;
+       wdd->timeout = WDT_DEFAULT_TIMEOUT;
+       wdd->parent = dev;
+
+       watchdog_set_drvdata(wdd, wdt);
+       watchdog_set_nowayout(wdd, nowayout);
+
+       ret = watchdog_init_timeout(wdd, timeout, dev);
+       if (ret)
+               dev_warn(dev, "unable to set timeout value, using default\n");
+
+       /* Unlock WDT register */
+       outb(UNLOCK, wdt->io_base + WDT_REG_LOCK);
+
+       ret = watchdog_register_device(wdd);
+       if (ret) {
+               outb(LOCK, wdt->io_base + WDT_REG_LOCK);
+               dev_err(dev, "failed to register watchdog\n");
+               return ret;
+       }
+
+       dev_dbg(dev, "io_base=0x%04X, timeout=%d, nowayout=%d\n",
+               wdt->io_base, timeout, nowayout);
+       return 0;
+}
+
+static int nic7018_remove(struct platform_device *pdev)
+{
+       struct nic7018_wdt *wdt = platform_get_drvdata(pdev);
+
+       watchdog_unregister_device(&wdt->wdd);
+
+       /* Lock WDT register */
+       outb(LOCK, wdt->io_base + WDT_REG_LOCK);
+
+       return 0;
+}
+
+static const struct acpi_device_id nic7018_device_ids[] = {
+       {"NIC7018", 0},
+       {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, nic7018_device_ids);
+
+static struct platform_driver watchdog_driver = {
+       .probe = nic7018_probe,
+       .remove = nic7018_remove,
+       .driver = {
+               .name = KBUILD_MODNAME,
+               .acpi_match_table = ACPI_PTR(nic7018_device_ids),
+       },
+};
+
+module_platform_driver(watchdog_driver);
+
+MODULE_DESCRIPTION("National Instruments NIC7018 Watchdog driver");
+MODULE_AUTHOR("Hui Chun Ong <hui.chun.ong@ni.com>");
+MODULE_LICENSE("GPL");
index c6b8f4a43bdeff2df7faa71f6356819f8129c326..39be4dd8035ed0952af5dc6f81c6db888e1506ac 100644 (file)
@@ -395,7 +395,7 @@ static void __iomem *orion_wdt_ioremap_rstout(struct platform_device *pdev,
 
        rstout = internal_regs + ORION_RSTOUT_MASK_OFFSET;
 
-       WARN(1, FW_BUG "falling back to harcoded RSTOUT reg %pa\n", &rstout);
+       WARN(1, FW_BUG "falling back to hardcoded RSTOUT reg %pa\n", &rstout);
        return devm_ioremap(&pdev->dev, rstout, 0x4);
 }
 
index 0cdfee266690b70fccc93eee4573f0b0cb65766a..e35cf5e87907c3f98520ab0440d6088e6976f803 100644 (file)
@@ -54,7 +54,7 @@ static struct {
        struct timer_list timer;        /* The timer that pings the watchdog */
 } pikawdt_private;
 
-static struct watchdog_info ident = {
+static struct watchdog_info ident __ro_after_init = {
        .identity       = DRV_NAME,
        .options        = WDIOF_CARDRESET |
                          WDIOF_SETTIMEOUT |
index 0805ee2acd7a94bb913472f5bd33bea5d95bc0c8..e60f55702ab79d111078dae1b253799dfdc4684d 100644 (file)
@@ -130,7 +130,7 @@ static int rn5t618_wdt_ping(struct watchdog_device *wdt_dev)
                                  RN5T618_PWRIRQ_IR_WDOG, 0);
 }
 
-static struct watchdog_info rn5t618_wdt_info = {
+static const struct watchdog_info rn5t618_wdt_info = {
        .options        = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE |
                          WDIOF_KEEPALIVEPING,
        .identity       = DRIVER_NAME,
index 14b4fd428fffbbfaf539eb0ab06273858d15ed93..05524baf7dccba28fa84360a909bea22127f0ddc 100644 (file)
@@ -2,7 +2,7 @@
  * Ralink RT288x/RT3xxx/MT76xx built-in hardware watchdog timer
  *
  * Copyright (C) 2011 Gabor Juhos <juhosg@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  *
  * This driver was based on: drivers/watchdog/softdog.c
  *
@@ -124,7 +124,7 @@ static struct watchdog_info rt288x_wdt_info = {
        .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
 };
 
-static struct watchdog_ops rt288x_wdt_ops = {
+static const struct watchdog_ops rt288x_wdt_ops = {
        .owner = THIS_MODULE,
        .start = rt288x_wdt_start,
        .stop = rt288x_wdt_stop,
index 59e95762a6de776ba9a6bcc793138eccb8396675..6ed97596ca80685519ec327d4d6a847824788546 100644 (file)
@@ -23,8 +23,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/types.h>
@@ -46,6 +44,7 @@
 #define S3C2410_WTCON          0x00
 #define S3C2410_WTDAT          0x04
 #define S3C2410_WTCNT          0x08
+#define S3C2410_WTCLRINT       0x0c
 
 #define S3C2410_WTCNT_MAXCNT   0xffff
 
 #define S3C2410_WTCON_PRESCALE_MASK    (0xff << 8)
 #define S3C2410_WTCON_PRESCALE_MAX     0xff
 
-#define CONFIG_S3C2410_WATCHDOG_ATBOOT         (0)
-#define CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME   (15)
+#define S3C2410_WATCHDOG_ATBOOT                (0)
+#define S3C2410_WATCHDOG_DEFAULT_TIME  (15)
 
 #define EXYNOS5_RST_STAT_REG_OFFSET            0x0404
 #define EXYNOS5_WDT_DISABLE_REG_OFFSET         0x0408
 #define EXYNOS5_WDT_MASK_RESET_REG_OFFSET      0x040c
 #define QUIRK_HAS_PMU_CONFIG                   (1 << 0)
 #define QUIRK_HAS_RST_STAT                     (1 << 1)
+#define QUIRK_HAS_WTCLRINT_REG                 (1 << 2)
 
 /* These quirks require that we have a PMU register map */
 #define QUIRKS_HAVE_PMUREG                     (QUIRK_HAS_PMU_CONFIG | \
 
 static bool nowayout   = WATCHDOG_NOWAYOUT;
 static int tmr_margin;
-static int tmr_atboot  = CONFIG_S3C2410_WATCHDOG_ATBOOT;
+static int tmr_atboot  = S3C2410_WATCHDOG_ATBOOT;
 static int soft_noboot;
-static int debug;
 
 module_param(tmr_margin,  int, 0);
 module_param(tmr_atboot,  int, 0);
 module_param(nowayout,   bool, 0);
 module_param(soft_noboot, int, 0);
-module_param(debug,      int, 0);
 
 MODULE_PARM_DESC(tmr_margin, "Watchdog tmr_margin in seconds. (default="
-               __MODULE_STRING(CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME) ")");
+               __MODULE_STRING(S3C2410_WATCHDOG_DEFAULT_TIME) ")");
 MODULE_PARM_DESC(tmr_atboot,
                "Watchdog is started at boot time if set to 1, default="
-                       __MODULE_STRING(CONFIG_S3C2410_WATCHDOG_ATBOOT));
+                       __MODULE_STRING(S3C2410_WATCHDOG_ATBOOT));
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
                        __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, "
                        "0 to reboot (default 0)");
-MODULE_PARM_DESC(debug, "Watchdog debug, set to >1 for debug (default 0)");
 
 /**
  * struct s3c2410_wdt_variant - Per-variant config data
@@ -143,13 +140,18 @@ static const struct s3c2410_wdt_variant drv_data_s3c2410 = {
 };
 
 #ifdef CONFIG_OF
+static const struct s3c2410_wdt_variant drv_data_s3c6410 = {
+       .quirks = QUIRK_HAS_WTCLRINT_REG,
+};
+
 static const struct s3c2410_wdt_variant drv_data_exynos5250  = {
        .disable_reg = EXYNOS5_WDT_DISABLE_REG_OFFSET,
        .mask_reset_reg = EXYNOS5_WDT_MASK_RESET_REG_OFFSET,
        .mask_bit = 20,
        .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
        .rst_stat_bit = 20,
-       .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT,
+       .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \
+                 | QUIRK_HAS_WTCLRINT_REG,
 };
 
 static const struct s3c2410_wdt_variant drv_data_exynos5420 = {
@@ -158,7 +160,8 @@ static const struct s3c2410_wdt_variant drv_data_exynos5420 = {
        .mask_bit = 0,
        .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
        .rst_stat_bit = 9,
-       .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT,
+       .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \
+                 | QUIRK_HAS_WTCLRINT_REG,
 };
 
 static const struct s3c2410_wdt_variant drv_data_exynos7 = {
@@ -167,12 +170,15 @@ static const struct s3c2410_wdt_variant drv_data_exynos7 = {
        .mask_bit = 23,
        .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
        .rst_stat_bit = 23,     /* A57 WDTRESET */
-       .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT,
+       .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT \
+                 | QUIRK_HAS_WTCLRINT_REG,
 };
 
 static const struct of_device_id s3c2410_wdt_match[] = {
        { .compatible = "samsung,s3c2410-wdt",
          .data = &drv_data_s3c2410 },
+       { .compatible = "samsung,s3c6410-wdt",
+         .data = &drv_data_s3c6410 },
        { .compatible = "samsung,exynos5250-wdt",
          .data = &drv_data_exynos5250 },
        { .compatible = "samsung,exynos5420-wdt",
@@ -193,14 +199,6 @@ static const struct platform_device_id s3c2410_wdt_ids[] = {
 };
 MODULE_DEVICE_TABLE(platform, s3c2410_wdt_ids);
 
-/* watchdog control routines */
-
-#define DBG(fmt, ...)                                  \
-do {                                                   \
-       if (debug)                                      \
-               pr_info(fmt, ##__VA_ARGS__);            \
-} while (0)
-
 /* functions */
 
 static inline unsigned int s3c2410wdt_max_timeout(struct clk *clock)
@@ -296,8 +294,8 @@ static int s3c2410wdt_start(struct watchdog_device *wdd)
                wtcon |= S3C2410_WTCON_RSTEN;
        }
 
-       DBG("%s: count=0x%08x, wtcon=%08lx\n",
-           __func__, wdt->count, wtcon);
+       dev_dbg(wdt->dev, "Starting watchdog: count=0x%08x, wtcon=%08lx\n",
+               wdt->count, wtcon);
 
        writel(wdt->count, wdt->reg_base + S3C2410_WTDAT);
        writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
@@ -326,8 +324,8 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou
        freq = DIV_ROUND_UP(freq, 128);
        count = timeout * freq;
 
-       DBG("%s: count=%d, timeout=%d, freq=%lu\n",
-           __func__, count, timeout, freq);
+       dev_dbg(wdt->dev, "Heartbeat: count=%d, timeout=%d, freq=%lu\n",
+               count, timeout, freq);
 
        /* if the count is bigger than the watchdog register,
           then work out what we need to do (and if) we can
@@ -343,8 +341,8 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou
                }
        }
 
-       DBG("%s: timeout=%d, divisor=%d, count=%d (%08x)\n",
-           __func__, timeout, divisor, count, DIV_ROUND_UP(count, divisor));
+       dev_dbg(wdt->dev, "Heartbeat: timeout=%d, divisor=%d, count=%d (%08x)\n",
+               timeout, divisor, count, DIV_ROUND_UP(count, divisor));
 
        count = DIV_ROUND_UP(count, divisor);
        wdt->count = count;
@@ -394,7 +392,7 @@ static const struct watchdog_info s3c2410_wdt_ident = {
        .identity         =     "S3C2410 Watchdog",
 };
 
-static struct watchdog_ops s3c2410wdt_ops = {
+static const struct watchdog_ops s3c2410wdt_ops = {
        .owner = THIS_MODULE,
        .start = s3c2410wdt_start,
        .stop = s3c2410wdt_stop,
@@ -406,7 +404,7 @@ static struct watchdog_ops s3c2410wdt_ops = {
 static struct watchdog_device s3c2410_wdd = {
        .info = &s3c2410_wdt_ident,
        .ops = &s3c2410wdt_ops,
-       .timeout = CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME,
+       .timeout = S3C2410_WATCHDOG_DEFAULT_TIME,
 };
 
 /* interrupt handler code */
@@ -418,6 +416,10 @@ static irqreturn_t s3c2410wdt_irq(int irqno, void *param)
        dev_info(wdt->dev, "watchdog timer expired (irq)\n");
 
        s3c2410wdt_keepalive(&wdt->wdt_device);
+
+       if (wdt->drv_data->quirks & QUIRK_HAS_WTCLRINT_REG)
+               writel(0x1, wdt->reg_base + S3C2410_WTCLRINT);
+
        return IRQ_HANDLED;
 }
 
@@ -505,9 +507,8 @@ static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt)
        return 0;
 }
 
-/* s3c2410_get_wdt_driver_data */
 static inline struct s3c2410_wdt_variant *
-get_wdt_drv_data(struct platform_device *pdev)
+s3c2410_get_wdt_drv_data(struct platform_device *pdev)
 {
        if (pdev->dev.of_node) {
                const struct of_device_id *match;
@@ -529,8 +530,6 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
        int started = 0;
        int ret;
 
-       DBG("%s: probe=%p\n", __func__, pdev);
-
        dev = &pdev->dev;
 
        wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
@@ -541,7 +540,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
        spin_lock_init(&wdt->lock);
        wdt->wdt_device = s3c2410_wdd;
 
-       wdt->drv_data = get_wdt_drv_data(pdev);
+       wdt->drv_data = s3c2410_get_wdt_drv_data(pdev);
        if (wdt->drv_data->quirks & QUIRKS_HAVE_PMUREG) {
                wdt->pmureg = syscon_regmap_lookup_by_phandle(dev->of_node,
                                                "samsung,syscon-phandle");
@@ -566,8 +565,6 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
                goto err;
        }
 
-       DBG("probe: mapped reg_base=%p\n", wdt->reg_base);
-
        wdt->clock = devm_clk_get(dev, "watchdog");
        if (IS_ERR(wdt->clock)) {
                dev_err(dev, "failed to find watchdog clock source\n");
@@ -600,12 +597,12 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
                                        wdt->wdt_device.timeout);
        if (ret) {
                started = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
-                                       CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME);
+                                       S3C2410_WATCHDOG_DEFAULT_TIME);
 
                if (started == 0)
                        dev_info(dev,
                           "tmr_margin value out of range, default %d used\n",
-                              CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME);
+                              S3C2410_WATCHDOG_DEFAULT_TIME);
                else
                        dev_info(dev, "default timer value is out of range, "
                                                        "cannot start\n");
index 8965e3f536c3572516053c522eb1d4208309988a..d3be4f831db58a22cee4e868920923ad3bc5612d 100644 (file)
@@ -188,12 +188,14 @@ static int __init sa1100dog_init(void)
        pre_margin = oscr_freq * margin;
 
        ret = misc_register(&sa1100dog_miscdev);
-       if (ret == 0)
+       if (ret == 0) {
                pr_info("SA1100/PXA2xx Watchdog Timer: timer margin %d sec\n",
                        margin);
-       return ret;
-err:
+               return 0;
+       }
+
        clk_disable_unprepare(clk);
+err:
        clk_put(clk);
        return ret;
 }
index a49634cdc1ccbc987576bae3480f6669db3c0cde..f709962018ac260107dd2c419c1fbcdfafd4599b 100644 (file)
@@ -28,7 +28,7 @@
 struct sama5d4_wdt {
        struct watchdog_device  wdd;
        void __iomem            *reg_base;
-       u32     config;
+       u32                     mr;
 };
 
 static int wdt_timeout = WDT_DEFAULT_TIMEOUT;
@@ -53,11 +53,9 @@ MODULE_PARM_DESC(nowayout,
 static int sama5d4_wdt_start(struct watchdog_device *wdd)
 {
        struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd);
-       u32 reg;
 
-       reg = wdt_read(wdt, AT91_WDT_MR);
-       reg &= ~AT91_WDT_WDDIS;
-       wdt_write(wdt, AT91_WDT_MR, reg);
+       wdt->mr &= ~AT91_WDT_WDDIS;
+       wdt_write(wdt, AT91_WDT_MR, wdt->mr);
 
        return 0;
 }
@@ -65,11 +63,9 @@ static int sama5d4_wdt_start(struct watchdog_device *wdd)
 static int sama5d4_wdt_stop(struct watchdog_device *wdd)
 {
        struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd);
-       u32 reg;
 
-       reg = wdt_read(wdt, AT91_WDT_MR);
-       reg |= AT91_WDT_WDDIS;
-       wdt_write(wdt, AT91_WDT_MR, reg);
+       wdt->mr |= AT91_WDT_WDDIS;
+       wdt_write(wdt, AT91_WDT_MR, wdt->mr);
 
        return 0;
 }
@@ -88,14 +84,12 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd,
 {
        struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd);
        u32 value = WDT_SEC2TICKS(timeout);
-       u32 reg;
 
-       reg = wdt_read(wdt, AT91_WDT_MR);
-       reg &= ~AT91_WDT_WDV;
-       reg &= ~AT91_WDT_WDD;
-       reg |= AT91_WDT_SET_WDV(value);
-       reg |= AT91_WDT_SET_WDD(value);
-       wdt_write(wdt, AT91_WDT_MR, reg);
+       wdt->mr &= ~AT91_WDT_WDV;
+       wdt->mr &= ~AT91_WDT_WDD;
+       wdt->mr |= AT91_WDT_SET_WDV(value);
+       wdt->mr |= AT91_WDT_SET_WDD(value);
+       wdt_write(wdt, AT91_WDT_MR, wdt->mr);
 
        wdd->timeout = timeout;
 
@@ -107,7 +101,7 @@ static const struct watchdog_info sama5d4_wdt_info = {
        .identity = "Atmel SAMA5D4 Watchdog",
 };
 
-static struct watchdog_ops sama5d4_wdt_ops = {
+static const struct watchdog_ops sama5d4_wdt_ops = {
        .owner = THIS_MODULE,
        .start = sama5d4_wdt_start,
        .stop = sama5d4_wdt_stop,
@@ -132,19 +126,19 @@ static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt)
 {
        const char *tmp;
 
-       wdt->config = AT91_WDT_WDDIS;
+       wdt->mr = AT91_WDT_WDDIS;
 
        if (!of_property_read_string(np, "atmel,watchdog-type", &tmp) &&
            !strcmp(tmp, "software"))
-               wdt->config |= AT91_WDT_WDFIEN;
+               wdt->mr |= AT91_WDT_WDFIEN;
        else
-               wdt->config |= AT91_WDT_WDRSTEN;
+               wdt->mr |= AT91_WDT_WDRSTEN;
 
        if (of_property_read_bool(np, "atmel,idle-halt"))
-               wdt->config |= AT91_WDT_WDIDLEHLT;
+               wdt->mr |= AT91_WDT_WDIDLEHLT;
 
        if (of_property_read_bool(np, "atmel,dbg-halt"))
-               wdt->config |= AT91_WDT_WDDBGHLT;
+               wdt->mr |= AT91_WDT_WDDBGHLT;
 
        return 0;
 }
@@ -163,11 +157,10 @@ static int sama5d4_wdt_init(struct sama5d4_wdt *wdt)
        reg &= ~AT91_WDT_WDDIS;
        wdt_write(wdt, AT91_WDT_MR, reg);
 
-       reg = wdt->config;
-       reg |= AT91_WDT_SET_WDD(value);
-       reg |= AT91_WDT_SET_WDV(value);
+       wdt->mr |= AT91_WDT_SET_WDD(value);
+       wdt->mr |= AT91_WDT_SET_WDV(value);
 
-       wdt_write(wdt, AT91_WDT_MR, reg);
+       wdt_write(wdt, AT91_WDT_MR, wdt->mr);
 
        return 0;
 }
@@ -211,7 +204,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
                        return ret;
        }
 
-       if ((wdt->config & AT91_WDT_WDFIEN) && irq) {
+       if ((wdt->mr & AT91_WDT_WDFIEN) && irq) {
                ret = devm_request_irq(&pdev->dev, irq, sama5d4_wdt_irq_handler,
                                       IRQF_SHARED | IRQF_IRQPOLL |
                                       IRQF_NO_SUSPEND, pdev->name, pdev);
@@ -265,11 +258,28 @@ static const struct of_device_id sama5d4_wdt_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, sama5d4_wdt_of_match);
 
+#ifdef CONFIG_PM_SLEEP
+static int sama5d4_wdt_resume(struct device *dev)
+{
+       struct sama5d4_wdt *wdt = dev_get_drvdata(dev);
+
+       wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS);
+       if (wdt->mr & AT91_WDT_WDDIS)
+               wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(sama5d4_wdt_pm_ops, NULL,
+                        sama5d4_wdt_resume);
+
 static struct platform_driver sama5d4_wdt_driver = {
        .probe          = sama5d4_wdt_probe,
        .remove         = sama5d4_wdt_remove,
        .driver         = {
                .name   = "sama5d4_wdt",
+               .pm     = &sama5d4_wdt_pm_ops,
                .of_match_table = sama5d4_wdt_of_match,
        }
 };
index ce0c38bd0f0078eb32826b9377724432530efcf3..316c2eb122d23d335d738947a63fc2f9db2e4f1b 100644 (file)
@@ -207,7 +207,7 @@ static irqreturn_t sbsa_gwdt_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static struct watchdog_info sbsa_gwdt_info = {
+static const struct watchdog_info sbsa_gwdt_info = {
        .identity       = WATCHDOG_NAME,
        .options        = WDIOF_SETTIMEOUT |
                          WDIOF_KEEPALIVEPING |
@@ -215,7 +215,7 @@ static struct watchdog_info sbsa_gwdt_info = {
                          WDIOF_CARDRESET,
 };
 
-static struct watchdog_ops sbsa_gwdt_ops = {
+static const struct watchdog_ops sbsa_gwdt_ops = {
        .owner          = THIS_MODULE,
        .start          = sbsa_gwdt_start,
        .stop           = sbsa_gwdt_stop,
index 3050a0031479f4737a3358d9152d973ad347fff1..4eea351e09b0d205d20e771e2349abc04d6310f8 100644 (file)
@@ -127,7 +127,7 @@ static const struct watchdog_info sirfsoc_wdt_ident = {
        .identity         =     "SiRFSOC Watchdog",
 };
 
-static struct watchdog_ops sirfsoc_wdt_ops = {
+static const struct watchdog_ops sirfsoc_wdt_ops = {
        .owner = THIS_MODULE,
        .start = sirfsoc_wdt_enable,
        .stop = sirfsoc_wdt_disable,
index c7bdc986dca1c249c2b61ad902504cdd1df4fb2d..7983029852ab0dc7e688b0766337fd0bb9da8c54 100644 (file)
@@ -87,11 +87,13 @@ static int softdog_ping(struct watchdog_device *w)
        if (!mod_timer(&softdog_ticktock, jiffies + (w->timeout * HZ)))
                __module_get(THIS_MODULE);
 
-       if (w->pretimeout)
-               mod_timer(&softdog_preticktock, jiffies +
-                         (w->timeout - w->pretimeout) * HZ);
-       else
-               del_timer(&softdog_preticktock);
+       if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT)) {
+               if (w->pretimeout)
+                       mod_timer(&softdog_preticktock, jiffies +
+                                 (w->timeout - w->pretimeout) * HZ);
+               else
+                       del_timer(&softdog_preticktock);
+       }
 
        return 0;
 }
@@ -101,15 +103,15 @@ static int softdog_stop(struct watchdog_device *w)
        if (del_timer(&softdog_ticktock))
                module_put(THIS_MODULE);
 
-       del_timer(&softdog_preticktock);
+       if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT))
+               del_timer(&softdog_preticktock);
 
        return 0;
 }
 
 static struct watchdog_info softdog_info = {
        .identity = "Software Watchdog",
-       .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE |
-                  WDIOF_PRETIMEOUT,
+       .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
 };
 
 static const struct watchdog_ops softdog_ops = {
@@ -134,6 +136,9 @@ static int __init softdog_init(void)
        watchdog_set_nowayout(&softdog_dev, nowayout);
        watchdog_stop_on_reboot(&softdog_dev);
 
+       if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT))
+               softdog_info.options |= WDIOF_PRETIMEOUT;
+
        ret = watchdog_register_device(&softdog_dev);
        if (ret)
                return ret;
index 1467fe50a76fad5399d3eb25babc4cdaed1d0049..00907973608c6ff7ef8a571c36f64af30945f3d9 100644 (file)
@@ -77,7 +77,7 @@ static const struct watchdog_info sun4v_wdt_ident = {
        .firmware_version = 0,
 };
 
-static struct watchdog_ops sun4v_wdt_ops = {
+static const struct watchdog_ops sun4v_wdt_ops = {
        .owner =        THIS_MODULE,
        .start =        sun4v_wdt_ping,
        .stop =         sun4v_wdt_stop,
index 953bb7b7446f95367d39923a6c6a11a186a9052a..9728fa32c357b88f5cab03300c96aeaf9f6f1968 100644 (file)
@@ -242,8 +242,6 @@ static int sunxi_wdt_probe(struct platform_device *pdev)
        if (!sunxi_wdt)
                return -EINVAL;
 
-       platform_set_drvdata(pdev, sunxi_wdt);
-
        device = of_match_device(sunxi_wdt_dt_ids, &pdev->dev);
        if (!device)
                return -ENODEV;
@@ -270,7 +268,8 @@ static int sunxi_wdt_probe(struct platform_device *pdev)
 
        sunxi_wdt_stop(&sunxi_wdt->wdt_dev);
 
-       err = watchdog_register_device(&sunxi_wdt->wdt_dev);
+       watchdog_stop_on_reboot(&sunxi_wdt->wdt_dev);
+       err = devm_watchdog_register_device(&pdev->dev, &sunxi_wdt->wdt_dev);
        if (unlikely(err))
                return err;
 
@@ -280,27 +279,8 @@ static int sunxi_wdt_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int sunxi_wdt_remove(struct platform_device *pdev)
-{
-       struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev);
-
-       watchdog_unregister_device(&sunxi_wdt->wdt_dev);
-       watchdog_set_drvdata(&sunxi_wdt->wdt_dev, NULL);
-
-       return 0;
-}
-
-static void sunxi_wdt_shutdown(struct platform_device *pdev)
-{
-       struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev);
-
-       sunxi_wdt_stop(&sunxi_wdt->wdt_dev);
-}
-
 static struct platform_driver sunxi_wdt_driver = {
        .probe          = sunxi_wdt_probe,
-       .remove         = sunxi_wdt_remove,
-       .shutdown       = sunxi_wdt_shutdown,
        .driver         = {
                .name           = DRV_NAME,
                .of_match_table = sunxi_wdt_dt_ids,
index 202c4b9cc9212b779c9bc2145bbb9bac503c6cf6..d5fcce062920e0d1739e85b2956bd9d30b436daa 100644 (file)
@@ -15,9 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/notifier.h>
 #include <linux/platform_device.h>
-#include <linux/reboot.h>
 #include <linux/watchdog.h>
 
 #define DEFAULT_TIMEOUT 30
@@ -47,7 +45,6 @@ struct tangox_wdt_device {
        void __iomem *base;
        unsigned long clk_rate;
        struct clk *clk;
-       struct notifier_block restart;
 };
 
 static int tangox_wdt_set_timeout(struct watchdog_device *wdt,
@@ -96,24 +93,24 @@ static const struct watchdog_info tangox_wdt_info = {
        .identity = "tangox watchdog",
 };
 
+static int tangox_wdt_restart(struct watchdog_device *wdt,
+                             unsigned long action, void *data)
+{
+       struct tangox_wdt_device *dev = watchdog_get_drvdata(wdt);
+
+       writel(1, dev->base + WD_COUNTER);
+
+       return 0;
+}
+
 static const struct watchdog_ops tangox_wdt_ops = {
        .start          = tangox_wdt_start,
        .stop           = tangox_wdt_stop,
        .set_timeout    = tangox_wdt_set_timeout,
        .get_timeleft   = tangox_wdt_get_timeleft,
+       .restart        = tangox_wdt_restart,
 };
 
-static int tangox_wdt_restart(struct notifier_block *nb, unsigned long action,
-                             void *data)
-{
-       struct tangox_wdt_device *dev =
-               container_of(nb, struct tangox_wdt_device, restart);
-
-       writel(1, dev->base + WD_COUNTER);
-
-       return NOTIFY_DONE;
-}
-
 static int tangox_wdt_probe(struct platform_device *pdev)
 {
        struct tangox_wdt_device *dev;
@@ -174,18 +171,14 @@ static int tangox_wdt_probe(struct platform_device *pdev)
                tangox_wdt_start(&dev->wdt);
        }
 
+       watchdog_set_restart_priority(&dev->wdt, 128);
+
        err = watchdog_register_device(&dev->wdt);
        if (err)
                goto err;
 
        platform_set_drvdata(pdev, dev);
 
-       dev->restart.notifier_call = tangox_wdt_restart;
-       dev->restart.priority = 128;
-       err = register_restart_handler(&dev->restart);
-       if (err)
-               dev_warn(&pdev->dev, "failed to register restart handler\n");
-
        dev_info(&pdev->dev, "SMP86xx/SMP87xx watchdog registered\n");
 
        return 0;
@@ -202,7 +195,6 @@ static int tangox_wdt_remove(struct platform_device *pdev)
        tangox_wdt_stop(&dev->wdt);
        clk_disable_unprepare(dev->clk);
 
-       unregister_restart_handler(&dev->restart);
        watchdog_unregister_device(&dev->wdt);
 
        return 0;
index 2d53c3f9394f2a39bf78175bf4c229c7f3bbab90..9403c08816e35f47e7338938674414953f184f20 100644 (file)
@@ -226,7 +226,7 @@ static int tegra_wdt_probe(struct platform_device *pdev)
 
        watchdog_set_nowayout(wdd, nowayout);
 
-       ret = watchdog_register_device(wdd);
+       ret = devm_watchdog_register_device(&pdev->dev, wdd);
        if (ret) {
                dev_err(&pdev->dev,
                        "failed to register watchdog device\n");
@@ -248,8 +248,6 @@ static int tegra_wdt_remove(struct platform_device *pdev)
 
        tegra_wdt_stop(&wdt->wdd);
 
-       watchdog_unregister_device(&wdt->wdd);
-
        dev_info(&pdev->dev, "removed wdt\n");
 
        return 0;
index 4b541934b6c59e6702345e14ea90360ff67a694e..17c25daebcceb6678467890593a0023b7167f82b 100644 (file)
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/fs.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/miscdevice.h>
-#include <linux/mutex.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/watchdog.h>
-#include <linux/uaccess.h>
+#include <linux/io.h>
 
-#define TS72XX_WDT_FEED_VAL            0x05
-#define TS72XX_WDT_DEFAULT_TIMEOUT     8
+#define TS72XX_WDT_DEFAULT_TIMEOUT     30
 
-static int timeout = TS72XX_WDT_DEFAULT_TIMEOUT;
+static int timeout;
 module_param(timeout, int, 0);
-MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds. "
-                         "(1 <= timeout <= 8, default="
-                         __MODULE_STRING(TS72XX_WDT_DEFAULT_TIMEOUT)
-                         ")");
+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds.");
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close");
 
-/**
- * struct ts72xx_wdt - watchdog control structure
- * @lock: lock that protects this structure
- * @regval: watchdog timeout value suitable for control register
- * @flags: flags controlling watchdog device state
- * @control_reg: watchdog control register
- * @feed_reg: watchdog feed register
- * @pdev: back pointer to platform dev
- */
-struct ts72xx_wdt {
-       struct mutex    lock;
-       int             regval;
-
-#define TS72XX_WDT_BUSY_FLAG           1
-#define TS72XX_WDT_EXPECT_CLOSE_FLAG   2
-       int             flags;
+/* priv->control_reg */
+#define TS72XX_WDT_CTRL_DISABLE                0x00
+#define TS72XX_WDT_CTRL_250MS          0x01
+#define TS72XX_WDT_CTRL_500MS          0x02
+#define TS72XX_WDT_CTRL_1SEC           0x03
+#define TS72XX_WDT_CTRL_RESERVED       0x04
+#define TS72XX_WDT_CTRL_2SEC           0x05
+#define TS72XX_WDT_CTRL_4SEC           0x06
+#define TS72XX_WDT_CTRL_8SEC           0x07
+
+/* priv->feed_reg */
+#define TS72XX_WDT_FEED_VAL            0x05
 
+struct ts72xx_wdt_priv {
        void __iomem    *control_reg;
        void __iomem    *feed_reg;
-
-       struct platform_device *pdev;
+       struct watchdog_device wdd;
+       unsigned char regval;
 };
 
-static struct platform_device *ts72xx_wdt_pdev;
-
-/*
- * TS-72xx Watchdog supports following timeouts (value written
- * to control register):
- *     value   description
- *     -------------------------
- *     0x00    watchdog disabled
- *     0x01    250ms
- *     0x02    500ms
- *     0x03    1s
- *     0x04    reserved
- *     0x05    2s
- *     0x06    4s
- *     0x07    8s
- *
- * Timeouts below 1s are not very usable so we don't
- * allow them at all.
- *
- * We provide two functions that convert between these:
- * timeout_to_regval() and regval_to_timeout().
- */
-static const struct {
-       int     timeout;
-       int     regval;
-} ts72xx_wdt_map[] = {
-       { 1, 3 },
-       { 2, 5 },
-       { 4, 6 },
-       { 8, 7 },
-};
-
-/**
- * timeout_to_regval() - converts given timeout to control register value
- * @new_timeout: timeout in seconds to be converted
- *
- * Function converts given @new_timeout into valid value that can
- * be programmed into watchdog control register. When conversion is
- * not possible, function returns %-EINVAL.
- */
-static int timeout_to_regval(int new_timeout)
-{
-       int i;
-
-       /* first limit it to 1 - 8 seconds */
-       new_timeout = clamp_val(new_timeout, 1, 8);
-
-       for (i = 0; i < ARRAY_SIZE(ts72xx_wdt_map); i++) {
-               if (ts72xx_wdt_map[i].timeout >= new_timeout)
-                       return ts72xx_wdt_map[i].regval;
-       }
-
-       return -EINVAL;
-}
-
-/**
- * regval_to_timeout() - converts control register value to timeout
- * @regval: control register value to be converted
- *
- * Function converts given @regval to timeout in seconds (1, 2, 4 or 8).
- * If @regval cannot be converted, function returns %-EINVAL.
- */
-static int regval_to_timeout(int regval)
+static int ts72xx_wdt_start(struct watchdog_device *wdd)
 {
-       int i;
+       struct ts72xx_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-       for (i = 0; i < ARRAY_SIZE(ts72xx_wdt_map); i++) {
-               if (ts72xx_wdt_map[i].regval == regval)
-                       return ts72xx_wdt_map[i].timeout;
-       }
+       writeb(TS72XX_WDT_FEED_VAL, priv->feed_reg);
+       writeb(priv->regval, priv->control_reg);
 
-       return -EINVAL;
+       return 0;
 }
 
-/**
- * ts72xx_wdt_kick() - kick the watchdog
- * @wdt: watchdog to be kicked
- *
- * Called with @wdt->lock held.
- */
-static inline void ts72xx_wdt_kick(struct ts72xx_wdt *wdt)
+static int ts72xx_wdt_stop(struct watchdog_device *wdd)
 {
-       __raw_writeb(TS72XX_WDT_FEED_VAL, wdt->feed_reg);
-}
+       struct ts72xx_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-/**
- * ts72xx_wdt_start() - starts the watchdog timer
- * @wdt: watchdog to be started
- *
- * This function programs timeout to watchdog timer
- * and starts it.
- *
- * Called with @wdt->lock held.
- */
-static void ts72xx_wdt_start(struct ts72xx_wdt *wdt)
-{
-       /*
-        * To program the wdt, it first must be "fed" and
-        * only after that (within 30 usecs) the configuration
-        * can be changed.
-        */
-       ts72xx_wdt_kick(wdt);
-       __raw_writeb((u8)wdt->regval, wdt->control_reg);
-}
+       writeb(TS72XX_WDT_FEED_VAL, priv->feed_reg);
+       writeb(TS72XX_WDT_CTRL_DISABLE, priv->control_reg);
 
-/**
- * ts72xx_wdt_stop() - stops the watchdog timer
- * @wdt: watchdog to be stopped
- *
- * Called with @wdt->lock held.
- */
-static void ts72xx_wdt_stop(struct ts72xx_wdt *wdt)
-{
-       ts72xx_wdt_kick(wdt);
-       __raw_writeb(0, wdt->control_reg);
+       return 0;
 }
 
-static int ts72xx_wdt_open(struct inode *inode, struct file *file)
+static int ts72xx_wdt_ping(struct watchdog_device *wdd)
 {
-       struct ts72xx_wdt *wdt = platform_get_drvdata(ts72xx_wdt_pdev);
-       int regval;
-
-       /*
-        * Try to convert default timeout to valid register
-        * value first.
-        */
-       regval = timeout_to_regval(timeout);
-       if (regval < 0) {
-               dev_err(&wdt->pdev->dev,
-                       "failed to convert timeout (%d) to register value\n",
-                       timeout);
-               return regval;
-       }
-
-       if (mutex_lock_interruptible(&wdt->lock))
-               return -ERESTARTSYS;
+       struct ts72xx_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-       if ((wdt->flags & TS72XX_WDT_BUSY_FLAG) != 0) {
-               mutex_unlock(&wdt->lock);
-               return -EBUSY;
-       }
-
-       wdt->flags = TS72XX_WDT_BUSY_FLAG;
-       wdt->regval = regval;
-       file->private_data = wdt;
-
-       ts72xx_wdt_start(wdt);
+       writeb(TS72XX_WDT_FEED_VAL, priv->feed_reg);
 
-       mutex_unlock(&wdt->lock);
-       return nonseekable_open(inode, file);
+       return 0;
 }
 
-static int ts72xx_wdt_release(struct inode *inode, struct file *file)
+static int ts72xx_wdt_settimeout(struct watchdog_device *wdd, unsigned int to)
 {
-       struct ts72xx_wdt *wdt = file->private_data;
-
-       if (mutex_lock_interruptible(&wdt->lock))
-               return -ERESTARTSYS;
-
-       if ((wdt->flags & TS72XX_WDT_EXPECT_CLOSE_FLAG) != 0) {
-               ts72xx_wdt_stop(wdt);
+       struct ts72xx_wdt_priv *priv = watchdog_get_drvdata(wdd);
+
+       if (to == 1) {
+               priv->regval = TS72XX_WDT_CTRL_1SEC;
+       } else if (to == 2) {
+               priv->regval = TS72XX_WDT_CTRL_2SEC;
+       } else if (to <= 4) {
+               priv->regval = TS72XX_WDT_CTRL_4SEC;
+               to = 4;
        } else {
-               dev_warn(&wdt->pdev->dev,
-                        "TS-72XX WDT device closed unexpectly. "
-                        "Watchdog timer will not stop!\n");
-               /*
-                * Kick it one more time, to give userland some time
-                * to recover (for example, respawning the kicker
-                * daemon).
-                */
-               ts72xx_wdt_kick(wdt);
+               priv->regval = TS72XX_WDT_CTRL_8SEC;
+               if (to <= 8)
+                       to = 8;
        }
 
-       wdt->flags = 0;
+       wdd->timeout = to;
 
-       mutex_unlock(&wdt->lock);
-       return 0;
-}
-
-static ssize_t ts72xx_wdt_write(struct file *file,
-                               const char __user *data,
-                               size_t len,
-                               loff_t *ppos)
-{
-       struct ts72xx_wdt *wdt = file->private_data;
-
-       if (!len)
-               return 0;
-
-       if (mutex_lock_interruptible(&wdt->lock))
-               return -ERESTARTSYS;
-
-       ts72xx_wdt_kick(wdt);
-
-       /*
-        * Support for magic character closing. User process
-        * writes 'V' into the device, just before it is closed.
-        * This means that we know that the wdt timer can be
-        * stopped after user closes the device.
-        */
-       if (!nowayout) {
-               int i;
-
-               for (i = 0; i < len; i++) {
-                       char c;
-
-                       /* In case it was set long ago */
-                       wdt->flags &= ~TS72XX_WDT_EXPECT_CLOSE_FLAG;
-
-                       if (get_user(c, data + i)) {
-                               mutex_unlock(&wdt->lock);
-                               return -EFAULT;
-                       }
-                       if (c == 'V') {
-                               wdt->flags |= TS72XX_WDT_EXPECT_CLOSE_FLAG;
-                               break;
-                       }
-               }
+       if (watchdog_active(wdd)) {
+               ts72xx_wdt_stop(wdd);
+               ts72xx_wdt_start(wdd);
        }
 
-       mutex_unlock(&wdt->lock);
-       return len;
+       return 0;
 }
 
-static const struct watchdog_info winfo = {
-       .options                = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT |
+static const struct watchdog_info ts72xx_wdt_ident = {
+       .options                = WDIOF_KEEPALIVEPING |
+                                 WDIOF_SETTIMEOUT |
                                  WDIOF_MAGICCLOSE,
        .firmware_version       = 1,
        .identity               = "TS-72XX WDT",
 };
 
-static long ts72xx_wdt_ioctl(struct file *file, unsigned int cmd,
-                            unsigned long arg)
-{
-       struct ts72xx_wdt *wdt = file->private_data;
-       void __user *argp = (void __user *)arg;
-       int __user *p = (int __user *)argp;
-       int error = 0;
-
-       if (mutex_lock_interruptible(&wdt->lock))
-               return -ERESTARTSYS;
-
-       switch (cmd) {
-       case WDIOC_GETSUPPORT:
-               if (copy_to_user(argp, &winfo, sizeof(winfo)))
-                       error = -EFAULT;
-               break;
-
-       case WDIOC_GETSTATUS:
-       case WDIOC_GETBOOTSTATUS:
-               error = put_user(0, p);
-               break;
-
-       case WDIOC_KEEPALIVE:
-               ts72xx_wdt_kick(wdt);
-               break;
-
-       case WDIOC_SETOPTIONS: {
-               int options;
-
-               error = get_user(options, p);
-               if (error)
-                       break;
-
-               error = -EINVAL;
-
-               if ((options & WDIOS_DISABLECARD) != 0) {
-                       ts72xx_wdt_stop(wdt);
-                       error = 0;
-               }
-               if ((options & WDIOS_ENABLECARD) != 0) {
-                       ts72xx_wdt_start(wdt);
-                       error = 0;
-               }
-
-               break;
-       }
-
-       case WDIOC_SETTIMEOUT: {
-               int new_timeout;
-               int regval;
-
-               error = get_user(new_timeout, p);
-               if (error)
-                       break;
-
-               regval = timeout_to_regval(new_timeout);
-               if (regval < 0) {
-                       error = regval;
-                       break;
-               }
-               ts72xx_wdt_stop(wdt);
-               wdt->regval = regval;
-               ts72xx_wdt_start(wdt);
-
-               /*FALLTHROUGH*/
-       }
-
-       case WDIOC_GETTIMEOUT:
-               error = put_user(regval_to_timeout(wdt->regval), p);
-               break;
-
-       default:
-               error = -ENOTTY;
-               break;
-       }
-
-       mutex_unlock(&wdt->lock);
-       return error;
-}
-
-static const struct file_operations ts72xx_wdt_fops = {
+static struct watchdog_ops ts72xx_wdt_ops = {
        .owner          = THIS_MODULE,
-       .llseek         = no_llseek,
-       .open           = ts72xx_wdt_open,
-       .release        = ts72xx_wdt_release,
-       .write          = ts72xx_wdt_write,
-       .unlocked_ioctl = ts72xx_wdt_ioctl,
-};
-
-static struct miscdevice ts72xx_wdt_miscdev = {
-       .minor          = WATCHDOG_MINOR,
-       .name           = "watchdog",
-       .fops           = &ts72xx_wdt_fops,
+       .start          = ts72xx_wdt_start,
+       .stop           = ts72xx_wdt_stop,
+       .ping           = ts72xx_wdt_ping,
+       .set_timeout    = ts72xx_wdt_settimeout,
 };
 
 static int ts72xx_wdt_probe(struct platform_device *pdev)
 {
-       struct ts72xx_wdt *wdt;
-       struct resource *r1, *r2;
-       int error = 0;
+       struct ts72xx_wdt_priv *priv;
+       struct watchdog_device *wdd;
+       struct resource *res;
+       int ret;
 
-       wdt = devm_kzalloc(&pdev->dev, sizeof(struct ts72xx_wdt), GFP_KERNEL);
-       if (!wdt)
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
                return -ENOMEM;
 
-       r1 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       wdt->control_reg = devm_ioremap_resource(&pdev->dev, r1);
-       if (IS_ERR(wdt->control_reg))
-               return PTR_ERR(wdt->control_reg);
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       priv->control_reg = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(priv->control_reg))
+               return PTR_ERR(priv->control_reg);
 
-       r2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       wdt->feed_reg = devm_ioremap_resource(&pdev->dev, r2);
-       if (IS_ERR(wdt->feed_reg))
-               return PTR_ERR(wdt->feed_reg);
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       priv->feed_reg = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(priv->feed_reg))
+               return PTR_ERR(priv->feed_reg);
 
-       platform_set_drvdata(pdev, wdt);
-       ts72xx_wdt_pdev = pdev;
-       wdt->pdev = pdev;
-       mutex_init(&wdt->lock);
+       wdd = &priv->wdd;
+       wdd->info = &ts72xx_wdt_ident;
+       wdd->ops = &ts72xx_wdt_ops;
+       wdd->min_timeout = 1;
+       wdd->max_hw_heartbeat_ms = 8000;
+       wdd->parent = &pdev->dev;
 
-       /* make sure that the watchdog is disabled */
-       ts72xx_wdt_stop(wdt);
+       watchdog_set_nowayout(wdd, nowayout);
 
-       error = misc_register(&ts72xx_wdt_miscdev);
-       if (error) {
-               dev_err(&pdev->dev, "failed to register miscdev\n");
-               return error;
-       }
+       wdd->timeout = TS72XX_WDT_DEFAULT_TIMEOUT;
+       watchdog_init_timeout(wdd, timeout, &pdev->dev);
 
-       dev_info(&pdev->dev, "TS-72xx Watchdog driver\n");
+       watchdog_set_drvdata(wdd, priv);
 
-       return 0;
-}
+       ret = devm_watchdog_register_device(&pdev->dev, wdd);
+       if (ret)
+               return ret;
+
+       dev_info(&pdev->dev, "TS-72xx Watchdog driver\n");
 
-static int ts72xx_wdt_remove(struct platform_device *pdev)
-{
-       misc_deregister(&ts72xx_wdt_miscdev);
        return 0;
 }
 
 static struct platform_driver ts72xx_wdt_driver = {
        .probe          = ts72xx_wdt_probe,
-       .remove         = ts72xx_wdt_remove,
        .driver         = {
                .name   = "ts72xx-wdt",
        },
index ef2ecaf53a147e54ca96c862b43e4c87e59903b6..98fd186c6878833c830b7a45779789912c0c7973 100644 (file)
@@ -297,7 +297,7 @@ static unsigned int wdt_get_time(struct watchdog_device *wdog)
  *     Kernel Interfaces
  */
 
-static struct watchdog_info wdt_info = {
+static const struct watchdog_info wdt_info = {
        .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
        .identity = "W83627HF Watchdog",
 };
index 32930a073a12744b6d6cf1bcc0b154effea431ab..d5d2bbd8f428565357bb6e2f44be6adc9fa84701 100644 (file)
@@ -987,6 +987,11 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd)
        wdd->wd_data = NULL;
        mutex_unlock(&wd_data->lock);
 
+       if (watchdog_active(wdd) &&
+           test_bit(WDOG_STOP_ON_UNREGISTER, &wdd->status)) {
+               watchdog_stop(wdd);
+       }
+
        cancel_delayed_work_sync(&wd_data->work);
 
        kref_put(&wd_data->kref, watchdog_core_data_release);
index 8d1184aee932e064240eba56195b812e32229f6e..1ddc1f742cd4fff66f2e3dd96f60de2fc2b24606 100644 (file)
@@ -194,7 +194,7 @@ static int wm831x_wdt_probe(struct platform_device *pdev)
        if (ret < 0) {
                dev_err(wm831x->dev, "Failed to read watchdog status: %d\n",
                        ret);
-               goto err;
+               return ret;
        }
        reg = ret;
 
@@ -203,10 +203,8 @@ static int wm831x_wdt_probe(struct platform_device *pdev)
 
        driver_data = devm_kzalloc(&pdev->dev, sizeof(*driver_data),
                                   GFP_KERNEL);
-       if (!driver_data) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       if (!driver_data)
+               return -ENOMEM;
 
        mutex_init(&driver_data->lock);
        driver_data->wm831x = wm831x;
@@ -253,7 +251,7 @@ static int wm831x_wdt_probe(struct platform_device *pdev)
                                dev_err(wm831x->dev,
                                        "Failed to request update GPIO: %d\n",
                                        ret);
-                               goto err;
+                               return ret;
                        }
 
                        driver_data->update_gpio = pdata->update_gpio;
@@ -269,37 +267,22 @@ static int wm831x_wdt_probe(struct platform_device *pdev)
                } else {
                        dev_err(wm831x->dev,
                                "Failed to unlock security key: %d\n", ret);
-                       goto err;
+                       return ret;
                }
        }
 
-       ret = watchdog_register_device(&driver_data->wdt);
+       ret = devm_watchdog_register_device(&pdev->dev, &driver_data->wdt);
        if (ret != 0) {
                dev_err(wm831x->dev, "watchdog_register_device() failed: %d\n",
                        ret);
-               goto err;
+               return ret;
        }
 
-       platform_set_drvdata(pdev, driver_data);
-
-       return 0;
-
-err:
-       return ret;
-}
-
-static int wm831x_wdt_remove(struct platform_device *pdev)
-{
-       struct wm831x_wdt_drvdata *driver_data = platform_get_drvdata(pdev);
-
-       watchdog_unregister_device(&driver_data->wdt);
-
        return 0;
 }
 
 static struct platform_driver wm831x_wdt_driver = {
        .probe = wm831x_wdt_probe,
-       .remove = wm831x_wdt_remove,
        .driver = {
                .name = "wm831x-watchdog",
        },
diff --git a/drivers/watchdog/zx2967_wdt.c b/drivers/watchdog/zx2967_wdt.c
new file mode 100644 (file)
index 0000000..e290d5a
--- /dev/null
@@ -0,0 +1,291 @@
+/*
+ * watchdog driver for ZTE's zx2967 family
+ *
+ * Copyright (C) 2017 ZTE Ltd.
+ *
+ * Author: Baoyou Xie <baoyou.xie@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/watchdog.h>
+
+#define ZX2967_WDT_CFG_REG                     0x4
+#define ZX2967_WDT_LOAD_REG                    0x8
+#define ZX2967_WDT_REFRESH_REG                 0x18
+#define ZX2967_WDT_START_REG                   0x1c
+
+#define ZX2967_WDT_REFRESH_MASK                        GENMASK(5, 0)
+
+#define ZX2967_WDT_CFG_DIV(n)                  ((((n) & 0xff) - 1) << 8)
+#define ZX2967_WDT_START_EN                    0x1
+
+/*
+ * Hardware magic number.
+ * When watchdog reg is written, the lowest 16 bits are valid, but
+ * the highest 16 bits should be always this number.
+ */
+#define ZX2967_WDT_WRITEKEY                    (0x1234 << 16)
+#define ZX2967_WDT_VAL_MASK                    GENMASK(15, 0)
+
+#define ZX2967_WDT_DIV_DEFAULT                 16
+#define ZX2967_WDT_DEFAULT_TIMEOUT             32
+#define ZX2967_WDT_MIN_TIMEOUT                 1
+#define ZX2967_WDT_MAX_TIMEOUT                 524
+#define ZX2967_WDT_MAX_COUNT                   0xffff
+
+#define ZX2967_WDT_CLK_FREQ                    0x8000
+
+#define ZX2967_WDT_FLAG_REBOOT_MON             BIT(0)
+
+struct zx2967_wdt {
+       struct watchdog_device  wdt_device;
+       void __iomem            *reg_base;
+       struct clk              *clock;
+};
+
+static inline u32 zx2967_wdt_readl(struct zx2967_wdt *wdt, u16 reg)
+{
+       return readl_relaxed(wdt->reg_base + reg);
+}
+
+static inline void zx2967_wdt_writel(struct zx2967_wdt *wdt, u16 reg, u32 val)
+{
+       writel_relaxed(val | ZX2967_WDT_WRITEKEY, wdt->reg_base + reg);
+}
+
+static void zx2967_wdt_refresh(struct zx2967_wdt *wdt)
+{
+       u32 val;
+
+       val = zx2967_wdt_readl(wdt, ZX2967_WDT_REFRESH_REG);
+       /*
+        * Bit 4-5, 1 and 2: refresh config info
+        * Bit 2-3, 1 and 2: refresh counter
+        * Bit 0-1, 1 and 2: refresh int-value
+        * we shift each group value between 1 and 2 to refresh all data.
+        */
+       val ^= ZX2967_WDT_REFRESH_MASK;
+       zx2967_wdt_writel(wdt, ZX2967_WDT_REFRESH_REG,
+                         val & ZX2967_WDT_VAL_MASK);
+}
+
+static int
+zx2967_wdt_set_timeout(struct watchdog_device *wdd, unsigned int timeout)
+{
+       struct zx2967_wdt *wdt = watchdog_get_drvdata(wdd);
+       unsigned int divisor = ZX2967_WDT_DIV_DEFAULT;
+       u32 count;
+
+       count = timeout * ZX2967_WDT_CLK_FREQ;
+       if (count > divisor * ZX2967_WDT_MAX_COUNT)
+               divisor = DIV_ROUND_UP(count, ZX2967_WDT_MAX_COUNT);
+       count = DIV_ROUND_UP(count, divisor);
+       zx2967_wdt_writel(wdt, ZX2967_WDT_CFG_REG,
+                       ZX2967_WDT_CFG_DIV(divisor) & ZX2967_WDT_VAL_MASK);
+       zx2967_wdt_writel(wdt, ZX2967_WDT_LOAD_REG,
+                       count & ZX2967_WDT_VAL_MASK);
+       zx2967_wdt_refresh(wdt);
+       wdd->timeout =  (count * divisor) / ZX2967_WDT_CLK_FREQ;
+
+       return 0;
+}
+
+static void __zx2967_wdt_start(struct zx2967_wdt *wdt)
+{
+       u32 val;
+
+       val = zx2967_wdt_readl(wdt, ZX2967_WDT_START_REG);
+       val |= ZX2967_WDT_START_EN;
+       zx2967_wdt_writel(wdt, ZX2967_WDT_START_REG,
+                       val & ZX2967_WDT_VAL_MASK);
+}
+
+static void __zx2967_wdt_stop(struct zx2967_wdt *wdt)
+{
+       u32 val;
+
+       val = zx2967_wdt_readl(wdt, ZX2967_WDT_START_REG);
+       val &= ~ZX2967_WDT_START_EN;
+       zx2967_wdt_writel(wdt, ZX2967_WDT_START_REG,
+                       val & ZX2967_WDT_VAL_MASK);
+}
+
+static int zx2967_wdt_start(struct watchdog_device *wdd)
+{
+       struct zx2967_wdt *wdt = watchdog_get_drvdata(wdd);
+
+       zx2967_wdt_set_timeout(wdd, wdd->timeout);
+       __zx2967_wdt_start(wdt);
+
+       return 0;
+}
+
+static int zx2967_wdt_stop(struct watchdog_device *wdd)
+{
+       struct zx2967_wdt *wdt = watchdog_get_drvdata(wdd);
+
+       __zx2967_wdt_stop(wdt);
+
+       return 0;
+}
+
+static int zx2967_wdt_keepalive(struct watchdog_device *wdd)
+{
+       struct zx2967_wdt *wdt = watchdog_get_drvdata(wdd);
+
+       zx2967_wdt_refresh(wdt);
+
+       return 0;
+}
+
+#define ZX2967_WDT_OPTIONS \
+       (WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE)
+static const struct watchdog_info zx2967_wdt_ident = {
+       .options          =     ZX2967_WDT_OPTIONS,
+       .identity         =     "zx2967 watchdog",
+};
+
+static struct watchdog_ops zx2967_wdt_ops = {
+       .owner = THIS_MODULE,
+       .start = zx2967_wdt_start,
+       .stop = zx2967_wdt_stop,
+       .ping = zx2967_wdt_keepalive,
+       .set_timeout = zx2967_wdt_set_timeout,
+};
+
+static void zx2967_wdt_reset_sysctrl(struct device *dev)
+{
+       int ret;
+       void __iomem *regmap;
+       unsigned int offset, mask, config;
+       struct of_phandle_args out_args;
+
+       ret = of_parse_phandle_with_fixed_args(dev->of_node,
+                       "zte,wdt-reset-sysctrl", 3, 0, &out_args);
+       if (ret)
+               return;
+
+       offset = out_args.args[0];
+       config = out_args.args[1];
+       mask = out_args.args[2];
+
+       regmap = syscon_node_to_regmap(out_args.np);
+       if (IS_ERR(regmap)) {
+               of_node_put(out_args.np);
+               return;
+       }
+
+       regmap_update_bits(regmap, offset, mask, config);
+       of_node_put(out_args.np);
+}
+
+static int zx2967_wdt_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct zx2967_wdt *wdt;
+       struct resource *base;
+       int ret;
+       struct reset_control *rstc;
+
+       wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
+       if (!wdt)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, wdt);
+
+       wdt->wdt_device.info = &zx2967_wdt_ident;
+       wdt->wdt_device.ops = &zx2967_wdt_ops;
+       wdt->wdt_device.timeout = ZX2967_WDT_DEFAULT_TIMEOUT;
+       wdt->wdt_device.max_timeout = ZX2967_WDT_MAX_TIMEOUT;
+       wdt->wdt_device.min_timeout = ZX2967_WDT_MIN_TIMEOUT;
+       wdt->wdt_device.parent = &pdev->dev;
+
+       base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       wdt->reg_base = devm_ioremap_resource(dev, base);
+       if (IS_ERR(wdt->reg_base)) {
+               dev_err(dev, "ioremap failed\n");
+               return PTR_ERR(wdt->reg_base);
+       }
+
+       zx2967_wdt_reset_sysctrl(dev);
+
+       wdt->clock = devm_clk_get(dev, NULL);
+       if (IS_ERR(wdt->clock)) {
+               dev_err(dev, "failed to find watchdog clock source\n");
+               return PTR_ERR(wdt->clock);
+       }
+
+       ret = clk_prepare_enable(wdt->clock);
+       if (ret < 0) {
+               dev_err(dev, "failed to enable clock\n");
+               return ret;
+       }
+       clk_set_rate(wdt->clock, ZX2967_WDT_CLK_FREQ);
+
+       rstc = devm_reset_control_get(dev, NULL);
+       if (IS_ERR(rstc)) {
+               dev_err(dev, "failed to get rstc");
+               ret = PTR_ERR(rstc);
+               goto err;
+       }
+
+       reset_control_assert(rstc);
+       reset_control_deassert(rstc);
+
+       watchdog_set_drvdata(&wdt->wdt_device, wdt);
+       watchdog_init_timeout(&wdt->wdt_device,
+                       ZX2967_WDT_DEFAULT_TIMEOUT, dev);
+       watchdog_set_nowayout(&wdt->wdt_device, WATCHDOG_NOWAYOUT);
+
+       ret = watchdog_register_device(&wdt->wdt_device);
+       if (ret)
+               goto err;
+
+       dev_info(dev, "watchdog enabled (timeout=%d sec, nowayout=%d)",
+                wdt->wdt_device.timeout, WATCHDOG_NOWAYOUT);
+
+       return 0;
+
+err:
+       clk_disable_unprepare(wdt->clock);
+       return ret;
+}
+
+static int zx2967_wdt_remove(struct platform_device *pdev)
+{
+       struct zx2967_wdt *wdt = platform_get_drvdata(pdev);
+
+       watchdog_unregister_device(&wdt->wdt_device);
+       clk_disable_unprepare(wdt->clock);
+
+       return 0;
+}
+
+static const struct of_device_id zx2967_wdt_match[] = {
+       { .compatible = "zte,zx296718-wdt", },
+       {}
+};
+MODULE_DEVICE_TABLE(of, zx2967_wdt_match);
+
+static struct platform_driver zx2967_wdt_driver = {
+       .probe          = zx2967_wdt_probe,
+       .remove         = zx2967_wdt_remove,
+       .driver         = {
+               .name   = "zx2967-wdt",
+               .of_match_table = of_match_ptr(zx2967_wdt_match),
+       },
+};
+module_platform_driver(zx2967_wdt_driver);
+
+MODULE_AUTHOR("Baoyou Xie <baoyou.xie@linaro.org>");
+MODULE_DESCRIPTION("ZTE zx2967 Watchdog Device Driver");
+MODULE_LICENSE("GPL v2");
index 2077a3ac7c0ca5ac12b111981ec7c64a59fa77f0..7a92a5e1d40c6f17227936ee2b6925286deab511 100644 (file)
@@ -804,10 +804,10 @@ static void privcmd_close(struct vm_area_struct *vma)
        kfree(pages);
 }
 
-static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int privcmd_fault(struct vm_fault *vmf)
 {
        printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
-              vma, vma->vm_start, vma->vm_end,
+              vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
               vmf->pgoff, (void *)vmf->address);
 
        return VM_FAULT_SIGBUS;
index 6a0f3fa85ef7c3390fa00d79c0b1773f41a68778..3de3b4a89d89a16a2f23b63a990f8959568583df 100644 (file)
@@ -534,11 +534,11 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
 }
 
 static int
-v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+v9fs_vm_page_mkwrite(struct vm_fault *vmf)
 {
        struct v9fs_inode *v9inode;
        struct page *page = vmf->page;
-       struct file *filp = vma->vm_file;
+       struct file *filp = vmf->vma->vm_file;
        struct inode *inode = file_inode(filp);
 
 
index 2f088773f1c0ac49672da64bfb7e49c15696a14b..2f8bab390d1379731ce889ff2b1beec5ddf91a53 100644 (file)
@@ -138,9 +138,9 @@ extern int  affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh);
 extern int     affs_remove_header(struct dentry *dentry);
 extern u32     affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
 extern void    affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
-extern void    secs_to_datestamp(time64_t secs, struct affs_date *ds);
-extern umode_t prot_to_mode(u32 prot);
-extern void    mode_to_prot(struct inode *inode);
+extern void    affs_secs_to_datestamp(time64_t secs, struct affs_date *ds);
+extern umode_t affs_prot_to_mode(u32 prot);
+extern void    affs_mode_to_prot(struct inode *inode);
 __printf(3, 4)
 extern void    affs_error(struct super_block *sb, const char *function,
                           const char *fmt, ...);
@@ -162,6 +162,7 @@ extern void affs_free_bitmap(struct super_block *sb);
 
 /* namei.c */
 
+extern const struct export_operations affs_export_ops;
 extern int     affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
 extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
 extern int     affs_unlink(struct inode *dir, struct dentry *dentry);
@@ -178,7 +179,6 @@ extern int  affs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 /* inode.c */
 
-extern unsigned long            affs_parent_ino(struct inode *dir);
 extern struct inode            *affs_new_inode(struct inode *dir);
 extern int                      affs_notify_change(struct dentry *dentry, struct iattr *attr);
 extern void                     affs_evict_inode(struct inode *inode);
@@ -213,6 +213,12 @@ extern const struct address_space_operations        affs_aops_ofs;
 extern const struct dentry_operations   affs_dentry_operations;
 extern const struct dentry_operations   affs_intl_dentry_operations;
 
+static inline bool affs_validblock(struct super_block *sb, int block)
+{
+       return(block >= AFFS_SB(sb)->s_reserved &&
+              block < AFFS_SB(sb)->s_partition_size);
+}
+
 static inline void
 affs_set_blocksize(struct super_block *sb, int size)
 {
@@ -222,7 +228,7 @@ static inline struct buffer_head *
 affs_bread(struct super_block *sb, int block)
 {
        pr_debug("%s: %d\n", __func__, block);
-       if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
+       if (affs_validblock(sb, block))
                return sb_bread(sb, block);
        return NULL;
 }
@@ -230,7 +236,7 @@ static inline struct buffer_head *
 affs_getblk(struct super_block *sb, int block)
 {
        pr_debug("%s: %d\n", __func__, block);
-       if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
+       if (affs_validblock(sb, block))
                return sb_getblk(sb, block);
        return NULL;
 }
@@ -239,7 +245,7 @@ affs_getzeroblk(struct super_block *sb, int block)
 {
        struct buffer_head *bh;
        pr_debug("%s: %d\n", __func__, block);
-       if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
+       if (affs_validblock(sb, block)) {
                bh = sb_getblk(sb, block);
                lock_buffer(bh);
                memset(bh->b_data, 0 , sb->s_blocksize);
@@ -254,7 +260,7 @@ affs_getemptyblk(struct super_block *sb, int block)
 {
        struct buffer_head *bh;
        pr_debug("%s: %d\n", __func__, block);
-       if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
+       if (affs_validblock(sb, block)) {
                bh = sb_getblk(sb, block);
                wait_on_buffer(bh);
                set_buffer_uptodate(bh);
index 0ec65c133b93408408c21d70c66887567390d470..b573c3b9a3287014c469ea957be95546a23c8b77 100644 (file)
@@ -367,7 +367,7 @@ affs_fix_checksum(struct super_block *sb, struct buffer_head *bh)
 }
 
 void
-secs_to_datestamp(time64_t secs, struct affs_date *ds)
+affs_secs_to_datestamp(time64_t secs, struct affs_date *ds)
 {
        u32      days;
        u32      minute;
@@ -386,55 +386,55 @@ secs_to_datestamp(time64_t secs, struct affs_date *ds)
 }
 
 umode_t
-prot_to_mode(u32 prot)
+affs_prot_to_mode(u32 prot)
 {
        umode_t mode = 0;
 
        if (!(prot & FIBF_NOWRITE))
-               mode |= S_IWUSR;
+               mode |= 0200;
        if (!(prot & FIBF_NOREAD))
-               mode |= S_IRUSR;
+               mode |= 0400;
        if (!(prot & FIBF_NOEXECUTE))
-               mode |= S_IXUSR;
+               mode |= 0100;
        if (prot & FIBF_GRP_WRITE)
-               mode |= S_IWGRP;
+               mode |= 0020;
        if (prot & FIBF_GRP_READ)
-               mode |= S_IRGRP;
+               mode |= 0040;
        if (prot & FIBF_GRP_EXECUTE)
-               mode |= S_IXGRP;
+               mode |= 0010;
        if (prot & FIBF_OTR_WRITE)
-               mode |= S_IWOTH;
+               mode |= 0002;
        if (prot & FIBF_OTR_READ)
-               mode |= S_IROTH;
+               mode |= 0004;
        if (prot & FIBF_OTR_EXECUTE)
-               mode |= S_IXOTH;
+               mode |= 0001;
 
        return mode;
 }
 
 void
-mode_to_prot(struct inode *inode)
+affs_mode_to_prot(struct inode *inode)
 {
        u32 prot = AFFS_I(inode)->i_protect;
        umode_t mode = inode->i_mode;
 
-       if (!(mode & S_IXUSR))
+       if (!(mode & 0100))
                prot |= FIBF_NOEXECUTE;
-       if (!(mode & S_IRUSR))
+       if (!(mode & 0400))
                prot |= FIBF_NOREAD;
-       if (!(mode & S_IWUSR))
+       if (!(mode & 0200))
                prot |= FIBF_NOWRITE;
-       if (mode & S_IXGRP)
+       if (mode & 0010)
                prot |= FIBF_GRP_EXECUTE;
-       if (mode & S_IRGRP)
+       if (mode & 0040)
                prot |= FIBF_GRP_READ;
-       if (mode & S_IWGRP)
+       if (mode & 0020)
                prot |= FIBF_GRP_WRITE;
-       if (mode & S_IXOTH)
+       if (mode & 0001)
                prot |= FIBF_OTR_EXECUTE;
-       if (mode & S_IROTH)
+       if (mode & 0004)
                prot |= FIBF_OTR_READ;
-       if (mode & S_IWOTH)
+       if (mode & 0002)
                prot |= FIBF_OTR_WRITE;
 
        AFFS_I(inode)->i_protect = prot;
index fe4e1290dbb5d59288b79463250ec9be378b0227..a5e6097eb5a9dd157576db7e546d6b94a5a54970 100644 (file)
@@ -69,7 +69,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
        if (affs_test_opt(sbi->s_flags, SF_SETMODE))
                inode->i_mode = sbi->s_mode;
        else
-               inode->i_mode = prot_to_mode(prot);
+               inode->i_mode = affs_prot_to_mode(prot);
 
        id = be16_to_cpu(tail->uid);
        if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
@@ -184,11 +184,12 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
        }
        tail = AFFS_TAIL(sb, bh);
        if (tail->stype == cpu_to_be32(ST_ROOT)) {
-               secs_to_datestamp(inode->i_mtime.tv_sec,&AFFS_ROOT_TAIL(sb, bh)->root_change);
+               affs_secs_to_datestamp(inode->i_mtime.tv_sec,
+                                      &AFFS_ROOT_TAIL(sb, bh)->root_change);
        } else {
                tail->protect = cpu_to_be32(AFFS_I(inode)->i_protect);
                tail->size = cpu_to_be32(inode->i_size);
-               secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change);
+               affs_secs_to_datestamp(inode->i_mtime.tv_sec, &tail->change);
                if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
                        uid = i_uid_read(inode);
                        gid = i_gid_read(inode);
@@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
        mark_inode_dirty(inode);
 
        if (attr->ia_valid & ATTR_MODE)
-               mode_to_prot(inode);
+               affs_mode_to_prot(inode);
 out:
        return error;
 }
index 29186d29a3b611809f70b1ee7ba0cce4eb85d824..96dd1d09a2735676f51b94ff9390bbc114cf8367 100644 (file)
@@ -9,29 +9,10 @@
  */
 
 #include "affs.h"
+#include <linux/exportfs.h>
 
 typedef int (*toupper_t)(int);
 
-static int      affs_toupper(int ch);
-static int      affs_hash_dentry(const struct dentry *, struct qstr *);
-static int       affs_compare_dentry(const struct dentry *dentry,
-               unsigned int len, const char *str, const struct qstr *name);
-static int      affs_intl_toupper(int ch);
-static int      affs_intl_hash_dentry(const struct dentry *, struct qstr *);
-static int       affs_intl_compare_dentry(const struct dentry *dentry,
-               unsigned int len, const char *str, const struct qstr *name);
-
-const struct dentry_operations affs_dentry_operations = {
-       .d_hash         = affs_hash_dentry,
-       .d_compare      = affs_compare_dentry,
-};
-
-const struct dentry_operations affs_intl_dentry_operations = {
-       .d_hash         = affs_intl_hash_dentry,
-       .d_compare      = affs_intl_compare_dentry,
-};
-
-
 /* Simple toupper() for DOS\1 */
 
 static int
@@ -271,7 +252,7 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
                return -ENOSPC;
 
        inode->i_mode = mode;
-       mode_to_prot(inode);
+       affs_mode_to_prot(inode);
        mark_inode_dirty(inode);
 
        inode->i_op = &affs_file_inode_operations;
@@ -301,7 +282,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
                return -ENOSPC;
 
        inode->i_mode = S_IFDIR | mode;
-       mode_to_prot(inode);
+       affs_mode_to_prot(inode);
 
        inode->i_op = &affs_dir_inode_operations;
        inode->i_fop = &affs_dir_operations;
@@ -347,7 +328,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
        inode_nohighmem(inode);
        inode->i_data.a_ops = &affs_symlink_aops;
        inode->i_mode = S_IFLNK | 0777;
-       mode_to_prot(inode);
+       affs_mode_to_prot(inode);
 
        error = -EIO;
        bh = affs_bread(sb, inode->i_ino);
@@ -465,3 +446,71 @@ done:
        affs_brelse(bh);
        return retval;
 }
+
+static struct dentry *affs_get_parent(struct dentry *child)
+{
+       struct inode *parent;
+       struct buffer_head *bh;
+
+       bh = affs_bread(child->d_sb, d_inode(child)->i_ino);
+       if (!bh)
+               return ERR_PTR(-EIO);
+
+       parent = affs_iget(child->d_sb,
+                          be32_to_cpu(AFFS_TAIL(child->d_sb, bh)->parent));
+       brelse(bh);
+       if (IS_ERR(parent))
+               return ERR_CAST(parent);
+
+       return d_obtain_alias(parent);
+}
+
+static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino,
+                                       u32 generation)
+{
+       struct inode *inode;
+
+       if (!affs_validblock(sb, ino))
+               return ERR_PTR(-ESTALE);
+
+       inode = affs_iget(sb, ino);
+       if (IS_ERR(inode))
+               return ERR_CAST(inode);
+
+       if (generation && inode->i_generation != generation) {
+               iput(inode);
+               return ERR_PTR(-ESTALE);
+       }
+
+       return inode;
+}
+
+static struct dentry *affs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+                                       int fh_len, int fh_type)
+{
+       return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+                                   affs_nfs_get_inode);
+}
+
+static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid *fid,
+                                       int fh_len, int fh_type)
+{
+       return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+                                   affs_nfs_get_inode);
+}
+
+const struct export_operations affs_export_ops = {
+       .fh_to_dentry = affs_fh_to_dentry,
+       .fh_to_parent = affs_fh_to_parent,
+       .get_parent = affs_get_parent,
+};
+
+const struct dentry_operations affs_dentry_operations = {
+       .d_hash         = affs_hash_dentry,
+       .d_compare      = affs_compare_dentry,
+};
+
+const struct dentry_operations affs_intl_dentry_operations = {
+       .d_hash         = affs_intl_hash_dentry,
+       .d_compare      = affs_intl_compare_dentry,
+};
index d6384863192ca5c47a3593680c59713dfff8ad29..37532538e8ab12fac95f5182512aedd104b56dbf 100644 (file)
@@ -32,7 +32,7 @@ affs_commit_super(struct super_block *sb, int wait)
        struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
 
        lock_buffer(bh);
-       secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
+       affs_secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
        affs_fix_checksum(sb, bh);
        unlock_buffer(bh);
 
@@ -507,6 +507,7 @@ got_root:
                return -ENOMEM;
        }
 
+       sb->s_export_op = &affs_export_ops;
        pr_debug("s_flags=%lX\n", sb->s_flags);
        return 0;
 }
index 51a241e09fbb9c96a19f5bc97cadce0ffa582514..949f960337f5b30a880d5f6c547dca6eccfc748d 100644 (file)
@@ -252,7 +252,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
                /* skip entries marked unused in the bitmap */
                if (!(block->pagehdr.bitmap[offset / 8] &
                      (1 << (offset % 8)))) {
-                       _debug("ENT[%Zu.%u]: unused",
+                       _debug("ENT[%zu.%u]: unused",
                               blkoff / sizeof(union afs_dir_block), offset);
                        if (offset >= curr)
                                ctx->pos = blkoff +
@@ -266,7 +266,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
                               sizeof(*block) -
                               offset * sizeof(union afs_dirent));
 
-               _debug("ENT[%Zu.%u]: %s %Zu \"%s\"",
+               _debug("ENT[%zu.%u]: %s %zu \"%s\"",
                       blkoff / sizeof(union afs_dir_block), offset,
                       (offset < curr ? "skip" : "fill"),
                       nlen, dire->u.name);
@@ -274,23 +274,23 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
                /* work out where the next possible entry is */
                for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_dirent)) {
                        if (next >= AFS_DIRENT_PER_BLOCK) {
-                               _debug("ENT[%Zu.%u]:"
+                               _debug("ENT[%zu.%u]:"
                                       " %u travelled beyond end dir block"
-                                      " (len %u/%Zu)",
+                                      " (len %u/%zu)",
                                       blkoff / sizeof(union afs_dir_block),
                                       offset, next, tmp, nlen);
                                return -EIO;
                        }
                        if (!(block->pagehdr.bitmap[next / 8] &
                              (1 << (next % 8)))) {
-                               _debug("ENT[%Zu.%u]:"
-                                      " %u unmarked extension (len %u/%Zu)",
+                               _debug("ENT[%zu.%u]:"
+                                      " %u unmarked extension (len %u/%zu)",
                                       blkoff / sizeof(union afs_dir_block),
                                       offset, next, tmp, nlen);
                                return -EIO;
                        }
 
-                       _debug("ENT[%Zu.%u]: ext %u/%Zu",
+                       _debug("ENT[%zu.%u]: ext %u/%zu",
                               blkoff / sizeof(union afs_dir_block),
                               next, tmp, nlen);
                        next++;
index 873b4ca82ccbcde4c108a2592fbd7f46b88273a0..7e2ab9c8e39c2772b2d5f18ec39996d6efe67758 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,7 +512,7 @@ static int aio_setup_ring(struct kioctx *ctx)
 
        ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
                                       PROT_READ | PROT_WRITE,
-                                      MAP_SHARED, 0, &unused);
+                                      MAP_SHARED, 0, &unused, NULL);
        up_write(&mm->mmap_sem);
        if (IS_ERR((void *)ctx->mmap_base)) {
                ctx->mmap_size = 0;
index 6f48d670c9415ce610980e6288d118623cd237d1..806df746f1a93b5a06ca970735faa3e775c28566 100644 (file)
@@ -38,8 +38,6 @@
  * which have been left busy at at service shutdown.
  */
 
-#define AUTOFS_DEV_IOCTL_SIZE  sizeof(struct autofs_dev_ioctl)
-
 typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *,
                        struct autofs_dev_ioctl *);
 
index 82e8f6edfb48d0e8670dd58e3fbdcfb4b5ceb85d..d79ced9258614010128dd8f1ed6a0cff2b525f21 100644 (file)
@@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
                pr_debug("waiting for mount name=%pd\n", path->dentry);
                status = autofs4_wait(sbi, path, NFY_MOUNT);
                pr_debug("mount wait done status=%d\n", status);
+               ino->last_used = jiffies;
        }
-       ino->last_used = jiffies;
        return status;
 }
 
@@ -321,16 +321,21 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
         */
        if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
                struct dentry *parent = dentry->d_parent;
-               struct autofs_info *ino;
                struct dentry *new;
 
                new = d_lookup(parent, &dentry->d_name);
                if (!new)
                        return NULL;
-               ino = autofs4_dentry_ino(new);
-               ino->last_used = jiffies;
-               dput(path->dentry);
-               path->dentry = new;
+               if (new == dentry)
+                       dput(new);
+               else {
+                       struct autofs_info *ino;
+
+                       ino = autofs4_dentry_ino(new);
+                       ino->last_used = jiffies;
+                       dput(path->dentry);
+                       path->dentry = new;
+               }
        }
        return path->dentry;
 }
index 73031ec54a7be5ff34be619581cb20cef81cb8f5..77c30f15a02c3101c0dc4a96008c159c429b7441 100644 (file)
@@ -989,7 +989,7 @@ struct block_device *bdget(dev_t dev)
                bdev->bd_super = NULL;
                bdev->bd_inode = inode;
                bdev->bd_bdi = &noop_backing_dev_info;
-               bdev->bd_block_size = (1 << inode->i_blkbits);
+               bdev->bd_block_size = i_blocksize(inode);
                bdev->bd_part_count = 0;
                bdev->bd_invalidated = 0;
                inode->i_mode = S_IFBLK;
@@ -1043,13 +1043,22 @@ static struct block_device *bd_acquire(struct inode *inode)
 
        spin_lock(&bdev_lock);
        bdev = inode->i_bdev;
-       if (bdev) {
+       if (bdev && !inode_unhashed(bdev->bd_inode)) {
                bdgrab(bdev);
                spin_unlock(&bdev_lock);
                return bdev;
        }
        spin_unlock(&bdev_lock);
 
+       /*
+        * i_bdev references block device inode that was already shut down
+        * (corresponding device got removed).  Remove the reference and look
+        * up block device inode again just in case new device got
+        * reestablished under the same device number.
+        */
+       if (bdev)
+               bd_forget(inode);
+
        bdev = bdget(inode->i_rdev);
        if (bdev) {
                spin_lock(&bdev_lock);
index 8299601a35493b28f137cce3074dec455efbb3bf..7699e16784d313459181c746d0b8c30d468e23a7 100644 (file)
@@ -956,8 +956,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
 /*
  * add all inline backrefs for bytenr to the list
  */
-static int __add_inline_refs(struct btrfs_fs_info *fs_info,
-                            struct btrfs_path *path, u64 bytenr,
+static int __add_inline_refs(struct btrfs_path *path, u64 bytenr,
                             int *info_level, struct list_head *prefs,
                             struct ref_root *ref_tree,
                             u64 *total_refs, u64 inum)
@@ -1284,7 +1283,7 @@ again:
                 */
                delayed_refs = &trans->transaction->delayed_refs;
                spin_lock(&delayed_refs->lock);
-               head = btrfs_find_delayed_ref_head(trans, bytenr);
+               head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
                if (head) {
                        if (!mutex_trylock(&head->mutex)) {
                                atomic_inc(&head->node.refs);
@@ -1354,7 +1353,7 @@ again:
                if (key.objectid == bytenr &&
                    (key.type == BTRFS_EXTENT_ITEM_KEY ||
                     key.type == BTRFS_METADATA_ITEM_KEY)) {
-                       ret = __add_inline_refs(fs_info, path, bytenr,
+                       ret = __add_inline_refs(path, bytenr,
                                                &info_level, &prefs,
                                                ref_tree, &total_refs,
                                                inum);
index 1a8fa46ff87eb6eccae9be16d5e600ff8de0ef8b..819a6d27218a902c6149ae73576afb90fe31b467 100644 (file)
@@ -224,16 +224,16 @@ static inline void btrfs_insert_inode_hash(struct inode *inode)
        __insert_inode_hash(inode, h);
 }
 
-static inline u64 btrfs_ino(struct inode *inode)
+static inline u64 btrfs_ino(struct btrfs_inode *inode)
 {
-       u64 ino = BTRFS_I(inode)->location.objectid;
+       u64 ino = inode->location.objectid;
 
        /*
         * !ino: btree_inode
         * type == BTRFS_ROOT_ITEM_KEY: subvol dir
         */
-       if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY)
-               ino = inode->i_ino;
+       if (!ino || inode->location.type == BTRFS_ROOT_ITEM_KEY)
+               ino = inode->vfs_inode.i_ino;
        return ino;
 }
 
@@ -248,23 +248,21 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
        if (root == root->fs_info->tree_root &&
-           btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
+           btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID)
                return true;
        if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
                return true;
        return false;
 }
 
-static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
+static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
 {
        int ret = 0;
 
-       spin_lock(&BTRFS_I(inode)->lock);
-       if (BTRFS_I(inode)->logged_trans == generation &&
-           BTRFS_I(inode)->last_sub_trans <=
-           BTRFS_I(inode)->last_log_commit &&
-           BTRFS_I(inode)->last_sub_trans <=
-           BTRFS_I(inode)->root->last_log_commit) {
+       spin_lock(&inode->lock);
+       if (inode->logged_trans == generation &&
+           inode->last_sub_trans <= inode->last_log_commit &&
+           inode->last_sub_trans <= inode->root->last_log_commit) {
                /*
                 * After a ranged fsync we might have left some extent maps
                 * (that fall outside the fsync's range). So return false
@@ -272,10 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
                 * will be called and process those extent maps.
                 */
                smp_mb();
-               if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
+               if (list_empty(&inode->extent_tree.modified_extents))
                        ret = 1;
        }
-       spin_unlock(&BTRFS_I(inode)->lock);
+       spin_unlock(&inode->lock);
        return ret;
 }
 
@@ -326,6 +324,24 @@ static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
                  &BTRFS_I(inode)->runtime_flags);
 }
 
+static inline void btrfs_print_data_csum_error(struct inode *inode,
+               u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+
+       /* Output minus objectid, which is more meaningful */
+       if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
+               btrfs_warn_rl(root->fs_info,
+       "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+                       root->objectid, btrfs_ino(BTRFS_I(inode)),
+                       logical_start, csum, csum_expected, mirror_num);
+       else
+               btrfs_warn_rl(root->fs_info,
+       "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+                       root->objectid, btrfs_ino(BTRFS_I(inode)),
+                       logical_start, csum, csum_expected, mirror_num);
+}
+
 bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
 
 #endif
index c4444d6f439f676cee59a20322d0f88fcb3cc4a3..903c32c9eb22214f10d6dbdadb64f6b698e2bf66 100644 (file)
@@ -124,10 +124,8 @@ static int check_compressed_csum(struct inode *inode,
                kunmap_atomic(kaddr);
 
                if (csum != *cb_sum) {
-                       btrfs_info(BTRFS_I(inode)->root->fs_info,
-                          "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
-                          btrfs_ino(inode), disk_start, csum, *cb_sum,
-                          cb->mirror_num);
+                       btrfs_print_data_csum_error(inode, disk_start, csum,
+                                                   *cb_sum, cb->mirror_num);
                        ret = -EIO;
                        goto fail;
                }
index a426dc822d4de7ad309934276a945fca024f91bc..1192bc7d2ee782c1c915ba71aa771e57762b7d06 100644 (file)
@@ -28,9 +28,9 @@
 
 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root, struct btrfs_path *path, int level);
-static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
-                     *root, struct btrfs_key *ins_key,
-                     struct btrfs_path *path, int data_size, int extend);
+static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     const struct btrfs_key *ins_key, struct btrfs_path *path,
+                     int data_size, int extend);
 static int push_node_left(struct btrfs_trans_handle *trans,
                          struct btrfs_fs_info *fs_info,
                          struct extent_buffer *dst,
@@ -426,7 +426,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
        tm_root = &fs_info->tree_mod_log;
        for (node = rb_first(tm_root); node; node = next) {
                next = rb_next(node);
-               tm = container_of(node, struct tree_mod_elem, node);
+               tm = rb_entry(node, struct tree_mod_elem, node);
                if (tm->seq > min_seq)
                        continue;
                rb_erase(node, tm_root);
@@ -460,7 +460,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
        tm_root = &fs_info->tree_mod_log;
        new = &tm_root->rb_node;
        while (*new) {
-               cur = container_of(*new, struct tree_mod_elem, node);
+               cur = rb_entry(*new, struct tree_mod_elem, node);
                parent = *new;
                if (cur->logical < tm->logical)
                        new = &((*new)->rb_left);
@@ -746,7 +746,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
        tm_root = &fs_info->tree_mod_log;
        node = tm_root->rb_node;
        while (node) {
-               cur = container_of(node, struct tree_mod_elem, node);
+               cur = rb_entry(node, struct tree_mod_elem, node);
                if (cur->logical < start) {
                        node = node->rb_left;
                } else if (cur->logical > start) {
@@ -1074,7 +1074,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        ret = btrfs_dec_ref(trans, root, buf, 1);
                        BUG_ON(ret); /* -ENOMEM */
                }
-               clean_tree_block(trans, fs_info, buf);
+               clean_tree_block(fs_info, buf);
                *last_ref = 1;
        }
        return 0;
@@ -1326,7 +1326,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
                next = rb_next(&tm->node);
                if (!next)
                        break;
-               tm = container_of(next, struct tree_mod_elem, node);
+               tm = rb_entry(next, struct tree_mod_elem, node);
                if (tm->logical != first_tm->logical)
                        break;
        }
@@ -1580,7 +1580,8 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
 /*
  * compare two keys in a memcmp fashion
  */
-static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+static int comp_keys(const struct btrfs_disk_key *disk,
+                    const struct btrfs_key *k2)
 {
        struct btrfs_key k1;
 
@@ -1592,7 +1593,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
 /*
  * same as comp_keys only with two btrfs_key's
  */
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
 {
        if (k1->objectid > k2->objectid)
                return 1;
@@ -1732,8 +1733,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
  * slot may point to max if the key is bigger than all of the keys
  */
 static noinline int generic_bin_search(struct extent_buffer *eb,
-                                      unsigned long p,
-                                      int item_size, struct btrfs_key *key,
+                                      unsigned long p, int item_size,
+                                      const struct btrfs_key *key,
                                       int max, int *slot)
 {
        int low = 0;
@@ -1802,7 +1803,7 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
  * simple bin_search frontend that does the right thing for
  * leaves vs nodes
  */
-static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
                      int level, int *slot)
 {
        if (level == 0)
@@ -1819,7 +1820,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                                          slot);
 }
 
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
                     int level, int *slot)
 {
        return bin_search(eb, key, level, slot);
@@ -1937,7 +1938,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
                path->locks[level] = 0;
                path->nodes[level] = NULL;
-               clean_tree_block(trans, fs_info, mid);
+               clean_tree_block(fs_info, mid);
                btrfs_tree_unlock(mid);
                /* once for the path */
                free_extent_buffer(mid);
@@ -1998,7 +1999,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                if (wret < 0 && wret != -ENOSPC)
                        ret = wret;
                if (btrfs_header_nritems(right) == 0) {
-                       clean_tree_block(trans, fs_info, right);
+                       clean_tree_block(fs_info, right);
                        btrfs_tree_unlock(right);
                        del_ptr(root, path, level + 1, pslot + 1);
                        root_sub_used(root, right->len);
@@ -2042,7 +2043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                BUG_ON(wret == 1);
        }
        if (btrfs_header_nritems(mid) == 0) {
-               clean_tree_block(trans, fs_info, mid);
+               clean_tree_block(fs_info, mid);
                btrfs_tree_unlock(mid);
                del_ptr(root, path, level + 1, pslot);
                root_sub_used(root, mid->len);
@@ -2437,10 +2438,9 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
  * reada.  -EAGAIN is returned and the search must be repeated.
  */
 static int
-read_block_for_search(struct btrfs_trans_handle *trans,
-                      struct btrfs_root *root, struct btrfs_path *p,
-                      struct extent_buffer **eb_ret, int level, int slot,
-                      struct btrfs_key *key, u64 time_seq)
+read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
+                     struct extent_buffer **eb_ret, int level, int slot,
+                     const struct btrfs_key *key)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        u64 blocknr;
@@ -2587,7 +2587,7 @@ done:
 }
 
 static void key_search_validate(struct extent_buffer *b,
-                               struct btrfs_key *key,
+                               const struct btrfs_key *key,
                                int level)
 {
 #ifdef CONFIG_BTRFS_ASSERT
@@ -2606,7 +2606,7 @@ static void key_search_validate(struct extent_buffer *b,
 #endif
 }
 
-static int key_search(struct extent_buffer *b, struct btrfs_key *key,
+static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
                      int level, int *prev_cmp, int *slot)
 {
        if (*prev_cmp != 0) {
@@ -2668,9 +2668,9 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
  * possible)
  */
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
-                     *root, struct btrfs_key *key, struct btrfs_path *p, int
-                     ins_len, int cow)
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     const struct btrfs_key *key, struct btrfs_path *p,
+                     int ins_len, int cow)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_buffer *b;
@@ -2870,8 +2870,8 @@ cow_done:
                                goto done;
                        }
 
-                       err = read_block_for_search(trans, root, p,
-                                                   &b, level, slot, key, 0);
+                       err = read_block_for_search(root, p, &b, level,
+                                                   slot, key);
                        if (err == -EAGAIN)
                                goto again;
                        if (err) {
@@ -2953,7 +2953,7 @@ done:
  * The resulting path and return value will be set up as if we called
  * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
  */
-int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
+int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
                          struct btrfs_path *p, u64 time_seq)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -3014,8 +3014,8 @@ again:
                                goto done;
                        }
 
-                       err = read_block_for_search(NULL, root, p, &b, level,
-                                                   slot, key, time_seq);
+                       err = read_block_for_search(root, p, &b, level,
+                                                   slot, key);
                        if (err == -EAGAIN)
                                goto again;
                        if (err) {
@@ -3067,8 +3067,9 @@ done:
  * < 0 on error
  */
 int btrfs_search_slot_for_read(struct btrfs_root *root,
-                              struct btrfs_key *key, struct btrfs_path *p,
-                              int find_higher, int return_any)
+                              const struct btrfs_key *key,
+                              struct btrfs_path *p, int find_higher,
+                              int return_any)
 {
        int ret;
        struct extent_buffer *leaf;
@@ -3166,7 +3167,7 @@ static void fixup_low_keys(struct btrfs_fs_info *fs_info,
  */
 void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
                             struct btrfs_path *path,
-                            struct btrfs_key *new_key)
+                            const struct btrfs_key *new_key)
 {
        struct btrfs_disk_key disk_key;
        struct extent_buffer *eb;
@@ -3594,8 +3595,7 @@ noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
  * min slot controls the lowest index we're willing to push to the
  * right.  We'll push up to and including min_slot, but no lower
  */
-static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
-                                     struct btrfs_fs_info *fs_info,
+static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
                                      struct btrfs_path *path,
                                      int data_size, int empty,
                                      struct extent_buffer *right,
@@ -3704,7 +3704,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        if (left_nritems)
                btrfs_mark_buffer_dirty(left);
        else
-               clean_tree_block(trans, fs_info, left);
+               clean_tree_block(fs_info, left);
 
        btrfs_mark_buffer_dirty(right);
 
@@ -3716,7 +3716,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        if (path->slots[0] >= left_nritems) {
                path->slots[0] -= left_nritems;
                if (btrfs_header_nritems(path->nodes[0]) == 0)
-                       clean_tree_block(trans, fs_info, path->nodes[0]);
+                       clean_tree_block(fs_info, path->nodes[0]);
                btrfs_tree_unlock(path->nodes[0]);
                free_extent_buffer(path->nodes[0]);
                path->nodes[0] = right;
@@ -3809,7 +3809,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
                return 0;
        }
 
-       return __push_leaf_right(trans, fs_info, path, min_data_size, empty,
+       return __push_leaf_right(fs_info, path, min_data_size, empty,
                                right, free_space, left_nritems, min_slot);
 out_unlock:
        btrfs_tree_unlock(right);
@@ -3825,8 +3825,7 @@ out_unlock:
  * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
  * items
  */
-static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
-                                    struct btrfs_fs_info *fs_info,
+static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
                                     struct btrfs_path *path, int data_size,
                                     int empty, struct extent_buffer *left,
                                     int free_space, u32 right_nritems,
@@ -3945,7 +3944,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        if (right_nritems)
                btrfs_mark_buffer_dirty(right);
        else
-               clean_tree_block(trans, fs_info, right);
+               clean_tree_block(fs_info, right);
 
        btrfs_item_key(right, &disk_key, 0);
        fixup_low_keys(fs_info, path, &disk_key, 1);
@@ -4035,7 +4034,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
                goto out;
        }
 
-       return __push_leaf_left(trans, fs_info, path, min_data_size,
+       return __push_leaf_left(fs_info, path, min_data_size,
                               empty, left, free_space, right_nritems,
                               max_slot);
 out:
@@ -4180,7 +4179,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
  */
 static noinline int split_leaf(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
-                              struct btrfs_key *ins_key,
+                              const struct btrfs_key *ins_key,
                               struct btrfs_path *path, int data_size,
                               int extend)
 {
@@ -4412,10 +4411,9 @@ err:
        return ret;
 }
 
-static noinline int split_item(struct btrfs_trans_handle *trans,
-                              struct btrfs_fs_info *fs_info,
+static noinline int split_item(struct btrfs_fs_info *fs_info,
                               struct btrfs_path *path,
-                              struct btrfs_key *new_key,
+                              const struct btrfs_key *new_key,
                               unsigned long split_offset)
 {
        struct extent_buffer *leaf;
@@ -4501,7 +4499,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
 int btrfs_split_item(struct btrfs_trans_handle *trans,
                     struct btrfs_root *root,
                     struct btrfs_path *path,
-                    struct btrfs_key *new_key,
+                    const struct btrfs_key *new_key,
                     unsigned long split_offset)
 {
        int ret;
@@ -4510,7 +4508,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
-       ret = split_item(trans, root->fs_info, path, new_key, split_offset);
+       ret = split_item(root->fs_info, path, new_key, split_offset);
        return ret;
 }
 
@@ -4525,7 +4523,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
 int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         struct btrfs_path *path,
-                        struct btrfs_key *new_key)
+                        const struct btrfs_key *new_key)
 {
        struct extent_buffer *leaf;
        int ret;
@@ -4726,7 +4724,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
  * that doesn't call btrfs_search_slot
  */
 void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
-                           struct btrfs_key *cpu_key, u32 *data_size,
+                           const struct btrfs_key *cpu_key, u32 *data_size,
                            u32 total_data, u32 total_size, int nr)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4820,7 +4818,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root,
                            struct btrfs_path *path,
-                           struct btrfs_key *cpu_key, u32 *data_size,
+                           const struct btrfs_key *cpu_key, u32 *data_size,
                            int nr)
 {
        int ret = 0;
@@ -4851,9 +4849,9 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
  * Given a key and some data, insert an item into the tree.
  * This does all the path init required, making room in the tree if needed.
  */
-int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
-                     *root, struct btrfs_key *cpu_key, void *data, u32
-                     data_size)
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     const struct btrfs_key *cpu_key, void *data,
+                     u32 data_size)
 {
        int ret = 0;
        struct btrfs_path *path;
@@ -5008,7 +5006,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        btrfs_set_header_level(leaf, 0);
                } else {
                        btrfs_set_path_blocking(path);
-                       clean_tree_block(trans, fs_info, leaf);
+                       clean_tree_block(fs_info, leaf);
                        btrfs_del_leaf(trans, root, path, leaf);
                }
        } else {
@@ -5243,7 +5241,7 @@ out:
 
 static int tree_move_down(struct btrfs_fs_info *fs_info,
                           struct btrfs_path *path,
-                          int *level, int root_level)
+                          int *level)
 {
        struct extent_buffer *eb;
 
@@ -5258,8 +5256,7 @@ static int tree_move_down(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
-static int tree_move_next_or_upnext(struct btrfs_fs_info *fs_info,
-                                   struct btrfs_path *path,
+static int tree_move_next_or_upnext(struct btrfs_path *path,
                                    int *level, int root_level)
 {
        int ret = 0;
@@ -5298,10 +5295,9 @@ static int tree_advance(struct btrfs_fs_info *fs_info,
        int ret;
 
        if (*level == 0 || !allow_down) {
-               ret = tree_move_next_or_upnext(fs_info, path, level,
-                                              root_level);
+               ret = tree_move_next_or_upnext(path, level, root_level);
        } else {
-               ret = tree_move_down(fs_info, path, level, root_level);
+               ret = tree_move_down(fs_info, path, level);
        }
        if (ret >= 0) {
                if (*level == 0)
@@ -5784,8 +5780,8 @@ again:
 
                next = c;
                next_rw_lock = path->locks[level];
-               ret = read_block_for_search(NULL, root, path, &next, level,
-                                           slot, &key, 0);
+               ret = read_block_for_search(root, path, &next, level,
+                                           slot, &key);
                if (ret == -EAGAIN)
                        goto again;
 
@@ -5834,8 +5830,8 @@ again:
                if (!level)
                        break;
 
-               ret = read_block_for_search(NULL, root, path, &next, level,
-                                           0, &key, 0);
+               ret = read_block_for_search(root, path, &next, level,
+                                           0, &key);
                if (ret == -EAGAIN)
                        goto again;
 
index 6a823719b6c580557cccab3d01123d50857ad334..105d4d43993e9f46a800b8aa09b48ed391391921 100644 (file)
@@ -97,6 +97,14 @@ static const int btrfs_csum_sizes[] = { 4 };
 
 #define BTRFS_MAX_EXTENT_SIZE SZ_128M
 
+/*
+ * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+ */
+static inline u32 count_max_extents(u64 size)
+{
+       return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+}
+
 struct btrfs_mapping_tree {
        struct extent_map_tree map_tree;
 };
@@ -1953,7 +1961,7 @@ BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
 BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
 
 static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
-                                        struct btrfs_disk_key *disk)
+                                        const struct btrfs_disk_key *disk)
 {
        cpu->offset = le64_to_cpu(disk->offset);
        cpu->type = disk->type;
@@ -1961,7 +1969,7 @@ static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
 }
 
 static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
-                                        struct btrfs_key *cpu)
+                                        const struct btrfs_key *cpu)
 {
        disk->offset = cpu_to_le64(cpu->offset);
        disk->type = cpu->type;
@@ -1993,8 +2001,7 @@ static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
        btrfs_disk_key_to_cpu(key, &disk_key);
 }
 
-
-static inline u8 btrfs_key_type(struct btrfs_key *key)
+static inline u8 btrfs_key_type(const struct btrfs_key *key)
 {
        return key->type;
 }
@@ -2577,8 +2584,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
                                    u64 bytenr, u64 num_bytes);
 int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
                                 struct extent_buffer *eb);
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
+int btrfs_cross_ref_exist(struct btrfs_root *root,
                          u64 objectid, u64 offset, u64 bytenr);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(
                                                 struct btrfs_fs_info *info,
@@ -2587,10 +2593,11 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int get_block_group_index(struct btrfs_block_group_cache *cache);
 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
-                                       struct btrfs_root *root, u64 parent,
-                                       u64 root_objectid,
-                                       struct btrfs_disk_key *key, int level,
-                                       u64 hint, u64 empty_size);
+                                            struct btrfs_root *root,
+                                            u64 parent, u64 root_objectid,
+                                            const struct btrfs_disk_key *key,
+                                            int level, u64 hint,
+                                            u64 empty_size);
 void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct extent_buffer *buf,
@@ -2623,8 +2630,7 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
                               u64 start, u64 len, int delalloc);
 int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
                                       u64 start, u64 len);
-void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
-                                struct btrfs_fs_info *fs_info);
+void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                               struct btrfs_fs_info *fs_info);
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2696,8 +2702,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     int nitems,
                                     u64 *qgroup_reserved, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_block_rsv *rsv,
-                                     u64 qgroup_reserved);
+                                     struct btrfs_block_rsv *rsv);
 int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
 int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
@@ -2724,7 +2729,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
                             struct btrfs_block_rsv *block_rsv,
                             u64 num_bytes);
-int btrfs_inc_block_group_ro(struct btrfs_root *root,
+int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
                             struct btrfs_block_group_cache *cache);
 void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
@@ -2750,9 +2755,9 @@ u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
                       struct btrfs_fs_info *info, u64 start, u64 end);
 
 /* ctree.c */
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
                     int level, int *slot);
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
 int btrfs_previous_item(struct btrfs_root *root,
                        struct btrfs_path *path, u64 min_objectid,
                        int type);
@@ -2760,7 +2765,7 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
                        struct btrfs_path *path, u64 min_objectid);
 void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
                             struct btrfs_path *path,
-                            struct btrfs_key *new_key);
+                            const struct btrfs_key *new_key);
 struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@@ -2802,22 +2807,23 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
 int btrfs_split_item(struct btrfs_trans_handle *trans,
                     struct btrfs_root *root,
                     struct btrfs_path *path,
-                    struct btrfs_key *new_key,
+                    const struct btrfs_key *new_key,
                     unsigned long split_offset);
 int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         struct btrfs_path *path,
-                        struct btrfs_key *new_key);
+                        const struct btrfs_key *new_key);
 int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
                u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
-                     *root, struct btrfs_key *key, struct btrfs_path *p, int
-                     ins_len, int cow);
-int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     const struct btrfs_key *key, struct btrfs_path *p,
+                     int ins_len, int cow);
+int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
                          struct btrfs_path *p, u64 time_seq);
 int btrfs_search_slot_for_read(struct btrfs_root *root,
-                              struct btrfs_key *key, struct btrfs_path *p,
-                              int find_higher, int return_any);
+                              const struct btrfs_key *key,
+                              struct btrfs_path *p, int find_higher,
+                              int return_any);
 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct extent_buffer *parent,
                       int start_slot, u64 *last_ret,
@@ -2840,19 +2846,20 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
 }
 
 void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
-                           struct btrfs_key *cpu_key, u32 *data_size,
+                           const struct btrfs_key *cpu_key, u32 *data_size,
                            u32 total_data, u32 total_size, int nr);
-int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
-                     *root, struct btrfs_key *key, void *data, u32 data_size);
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     const struct btrfs_key *key, void *data, u32 data_size);
 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct btrfs_path *path,
-                            struct btrfs_key *cpu_key, u32 *data_size, int nr);
+                            const struct btrfs_key *cpu_key, u32 *data_size,
+                            int nr);
 
 static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root,
                                          struct btrfs_path *path,
-                                         struct btrfs_key *key,
+                                         const struct btrfs_key *key,
                                          u32 data_size)
 {
        return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
@@ -2941,15 +2948,15 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
                       u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
                       const char *name, int name_len);
 int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                  struct btrfs_key *key);
-int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
-                     *root, struct btrfs_key *key, struct btrfs_root_item
-                     *item);
+                  const struct btrfs_key *key);
+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     const struct btrfs_key *key,
+                     struct btrfs_root_item *item);
 int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   struct btrfs_key *key,
                                   struct btrfs_root_item *item);
-int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
+int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
                    struct btrfs_path *path, struct btrfs_root_item *root_item,
                    struct btrfs_key *root_key);
 int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info);
@@ -3119,7 +3126,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
 int btrfs_set_inode_index(struct inode *dir, u64 *index);
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root,
-                      struct inode *dir, struct inode *inode,
+                      struct btrfs_inode *dir, struct btrfs_inode *inode,
                       const char *name, int name_len);
 int btrfs_add_link(struct btrfs_trans_handle *trans,
                   struct inode *parent_inode, struct inode *inode,
@@ -3147,7 +3154,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                         size_t size, struct bio *bio,
                         unsigned long bio_flags);
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+int btrfs_page_mkwrite(struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_evict_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
@@ -3447,7 +3454,8 @@ do {                                                              \
                        "BTRFS: Transaction aborted (error %d)\n",      \
                        (errno));                                       \
                } else {                                                \
-                       pr_debug("BTRFS: Transaction aborted (error %d)\n", \
+                       btrfs_debug((trans)->fs_info,                   \
+                                   "Transaction aborted (error %d)", \
                                  (errno));                     \
                }                                               \
        }                                                       \
index 80982a83c9fdb9eac6e1fdd97623e836c3f6095f..f7a6ee5ccc809a0ccf8338e058fec98b9d98da33 100644 (file)
@@ -72,14 +72,14 @@ static inline int btrfs_is_continuous_delayed_item(
        return 0;
 }
 
-static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
+static struct btrfs_delayed_node *btrfs_get_delayed_node(
+               struct btrfs_inode *btrfs_inode)
 {
-       struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
        struct btrfs_root *root = btrfs_inode->root;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(btrfs_inode);
        struct btrfs_delayed_node *node;
 
-       node = ACCESS_ONCE(btrfs_inode->delayed_node);
+       node = READ_ONCE(btrfs_inode->delayed_node);
        if (node) {
                atomic_inc(&node->refs);
                return node;
@@ -107,16 +107,15 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
 
 /* Will return either the node or PTR_ERR(-ENOMEM) */
 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
-                                                       struct inode *inode)
+               struct btrfs_inode *btrfs_inode)
 {
        struct btrfs_delayed_node *node;
-       struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
        struct btrfs_root *root = btrfs_inode->root;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(btrfs_inode);
        int ret;
 
 again:
-       node = btrfs_get_delayed_node(inode);
+       node = btrfs_get_delayed_node(btrfs_inode);
        if (node)
                return node;
 
@@ -574,7 +573,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
 static int btrfs_delayed_inode_reserve_metadata(
                                        struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root,
-                                       struct inode *inode,
+                                       struct btrfs_inode *inode,
                                        struct btrfs_delayed_node *node)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -603,13 +602,13 @@ static int btrfs_delayed_inode_reserve_metadata(
         * worth which is less likely to hurt us.
         */
        if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
-               spin_lock(&BTRFS_I(inode)->lock);
+               spin_lock(&inode->lock);
                if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
-                                      &BTRFS_I(inode)->runtime_flags))
+                                      &inode->runtime_flags))
                        release = true;
                else
                        src_rsv = NULL;
-               spin_unlock(&BTRFS_I(inode)->lock);
+               spin_unlock(&inode->lock);
        }
 
        /*
@@ -1196,7 +1195,7 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
 }
 
 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
-                                    struct inode *inode)
+                                    struct btrfs_inode *inode)
 {
        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
        struct btrfs_path *path;
@@ -1233,9 +1232,9 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-int btrfs_commit_inode_delayed_inode(struct inode *inode)
+int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
        struct btrfs_trans_handle *trans;
        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
        struct btrfs_path *path;
@@ -1288,15 +1287,15 @@ out:
        return ret;
 }
 
-void btrfs_remove_delayed_node(struct inode *inode)
+void btrfs_remove_delayed_node(struct btrfs_inode *inode)
 {
        struct btrfs_delayed_node *delayed_node;
 
-       delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
+       delayed_node = READ_ONCE(inode->delayed_node);
        if (!delayed_node)
                return;
 
-       BTRFS_I(inode)->delayed_node = NULL;
+       inode->delayed_node = NULL;
        btrfs_release_delayed_node(delayed_node);
 }
 
@@ -1434,7 +1433,7 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
                                   struct btrfs_fs_info *fs_info,
                                   const char *name, int name_len,
-                                  struct inode *dir,
+                                  struct btrfs_inode *dir,
                                   struct btrfs_disk_key *disk_key, u8 type,
                                   u64 index)
 {
@@ -1510,7 +1509,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
 
 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
                                   struct btrfs_fs_info *fs_info,
-                                  struct inode *dir, u64 index)
+                                  struct btrfs_inode *dir, u64 index)
 {
        struct btrfs_delayed_node *node;
        struct btrfs_delayed_item *item;
@@ -1558,7 +1557,7 @@ end:
        return ret;
 }
 
-int btrfs_inode_delayed_dir_index_count(struct inode *inode)
+int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
 {
        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
 
@@ -1575,7 +1574,7 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
                return -EINVAL;
        }
 
-       BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
+       inode->index_cnt = delayed_node->index_cnt;
        btrfs_release_delayed_node(delayed_node);
        return 0;
 }
@@ -1587,7 +1586,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
        struct btrfs_delayed_node *delayed_node;
        struct btrfs_delayed_item *item;
 
-       delayed_node = btrfs_get_delayed_node(inode);
+       delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
        if (!delayed_node)
                return false;
 
@@ -1776,7 +1775,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
        struct btrfs_delayed_node *delayed_node;
        struct btrfs_inode_item *inode_item;
 
-       delayed_node = btrfs_get_delayed_node(inode);
+       delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
        if (!delayed_node)
                return -ENOENT;
 
@@ -1831,7 +1830,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_node *delayed_node;
        int ret = 0;
 
-       delayed_node = btrfs_get_or_create_delayed_node(inode);
+       delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
        if (IS_ERR(delayed_node))
                return PTR_ERR(delayed_node);
 
@@ -1841,7 +1840,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
                goto release_node;
        }
 
-       ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
+       ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
                                                   delayed_node);
        if (ret)
                goto release_node;
@@ -1856,9 +1855,9 @@ release_node:
        return ret;
 }
 
-int btrfs_delayed_delete_inode_ref(struct inode *inode)
+int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
        struct btrfs_delayed_node *delayed_node;
 
        /*
@@ -1933,7 +1932,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
        mutex_unlock(&delayed_node->mutex);
 }
 
-void btrfs_kill_delayed_inode_items(struct inode *inode)
+void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
 {
        struct btrfs_delayed_node *delayed_node;
 
index 8a2bf5e3e4cf6960fa0c6d9fe911cd3f8a79ea90..40327cc3b99a3bdcd517827652969299e1ce2a2b 100644 (file)
@@ -101,15 +101,15 @@ static inline void btrfs_init_delayed_root(
 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
                                   struct btrfs_fs_info *fs_info,
                                   const char *name, int name_len,
-                                  struct inode *dir,
+                                  struct btrfs_inode *dir,
                                   struct btrfs_disk_key *disk_key, u8 type,
                                   u64 index);
 
 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
                                   struct btrfs_fs_info *fs_info,
-                                  struct inode *dir, u64 index);
+                                  struct btrfs_inode *dir, u64 index);
 
-int btrfs_inode_delayed_dir_index_count(struct inode *inode);
+int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
 
 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
                            struct btrfs_fs_info *fs_info);
@@ -119,17 +119,17 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info);
 
 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
-                                    struct inode *inode);
+                                    struct btrfs_inode *inode);
 /* Used for evicting the inode. */
-void btrfs_remove_delayed_node(struct inode *inode);
-void btrfs_kill_delayed_inode_items(struct inode *inode);
-int btrfs_commit_inode_delayed_inode(struct inode *inode);
+void btrfs_remove_delayed_node(struct btrfs_inode *inode);
+void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode);
+int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode);
 
 
 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root, struct inode *inode);
 int btrfs_fill_inode(struct inode *inode, u32 *rdev);
-int btrfs_delayed_delete_inode_ref(struct inode *inode);
+int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode);
 
 /* Used for drop dead root */
 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
index ef724a5fc30ef6f91bd67859966344ccc22b16d9..6eb80952efb3310ae55de9c2e234319abe14465a 100644 (file)
@@ -550,13 +550,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                     struct btrfs_delayed_ref_node *ref,
                     struct btrfs_qgroup_extent_record *qrecord,
                     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
-                    int action, int is_data)
+                    int action, int is_data, int *qrecord_inserted_ret)
 {
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_head *head_ref = NULL;
        struct btrfs_delayed_ref_root *delayed_refs;
        int count_mod = 1;
        int must_insert_reserved = 0;
+       int qrecord_inserted = 0;
 
        /* If reserved is provided, it must be a data extent. */
        BUG_ON(!is_data && reserved);
@@ -623,6 +624,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                if(btrfs_qgroup_trace_extent_nolock(fs_info,
                                        delayed_refs, qrecord))
                        kfree(qrecord);
+               else
+                       qrecord_inserted = 1;
        }
 
        spin_lock_init(&head_ref->lock);
@@ -650,6 +653,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                atomic_inc(&delayed_refs->num_entries);
                trans->delayed_ref_updates++;
        }
+       if (qrecord_inserted_ret)
+               *qrecord_inserted_ret = qrecord_inserted;
        return head_ref;
 }
 
@@ -779,6 +784,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_qgroup_extent_record *record = NULL;
+       int qrecord_inserted;
 
        BUG_ON(extent_op && extent_op->is_data);
        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -806,12 +812,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
         * the spin lock
         */
        head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
-                                       bytenr, num_bytes, 0, 0, action, 0);
+                                       bytenr, num_bytes, 0, 0, action, 0,
+                                       &qrecord_inserted);
 
        add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
                             num_bytes, parent, ref_root, level, action);
        spin_unlock(&delayed_refs->lock);
 
+       if (qrecord_inserted)
+               return btrfs_qgroup_trace_extent_post(fs_info, record);
        return 0;
 
 free_head_ref:
@@ -829,15 +838,14 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                               struct btrfs_trans_handle *trans,
                               u64 bytenr, u64 num_bytes,
                               u64 parent, u64 ref_root,
-                              u64 owner, u64 offset, u64 reserved, int action,
-                              struct btrfs_delayed_extent_op *extent_op)
+                              u64 owner, u64 offset, u64 reserved, int action)
 {
        struct btrfs_delayed_data_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_qgroup_extent_record *record = NULL;
+       int qrecord_inserted;
 
-       BUG_ON(extent_op && !extent_op->is_data);
        ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
        if (!ref)
                return -ENOMEM;
@@ -859,7 +867,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                }
        }
 
-       head_ref->extent_op = extent_op;
+       head_ref->extent_op = NULL;
 
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
@@ -870,13 +878,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
         */
        head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
                                        bytenr, num_bytes, ref_root, reserved,
-                                       action, 1);
+                                       action, 1, &qrecord_inserted);
 
        add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
                                   num_bytes, parent, ref_root, owner, offset,
                                   action);
        spin_unlock(&delayed_refs->lock);
 
+       if (qrecord_inserted)
+               return btrfs_qgroup_trace_extent_post(fs_info, record);
        return 0;
 }
 
@@ -899,7 +909,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 
        add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
                             num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
-                            extent_op->is_data);
+                            extent_op->is_data, NULL);
 
        spin_unlock(&delayed_refs->lock);
        return 0;
@@ -911,11 +921,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
  * the head node if any where found, or NULL if not.
  */
 struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
+btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
 {
-       struct btrfs_delayed_ref_root *delayed_refs;
-
-       delayed_refs = &trans->transaction->delayed_refs;
        return find_ref_head(&delayed_refs->href_root, bytenr, 0);
 }
 
index 50947b5a915252590be5f2caf8d024bfd9f15794..0e537f98f1a1c63c529c118baed1cac26efa194e 100644 (file)
@@ -250,8 +250,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                               struct btrfs_trans_handle *trans,
                               u64 bytenr, u64 num_bytes,
                               u64 parent, u64 ref_root,
-                              u64 owner, u64 offset, u64 reserved, int action,
-                              struct btrfs_delayed_extent_op *extent_op);
+                              u64 owner, u64 offset, u64 reserved, int action);
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
@@ -262,7 +261,8 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
                              struct btrfs_delayed_ref_head *head);
 
 struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
+btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
+                           u64 bytenr);
 int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
                           struct btrfs_delayed_ref_head *head);
 static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
index b039fe0c751a0ae3788f4dc3710c3ad4e0c365ea..724504a2d7ac56986cc67db5da8521353a049636 100644 (file)
@@ -133,7 +133,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
        struct btrfs_disk_key disk_key;
        u32 data_size;
 
-       key.objectid = btrfs_ino(dir);
+       key.objectid = btrfs_ino(BTRFS_I(dir));
        key.type = BTRFS_DIR_ITEM_KEY;
        key.offset = btrfs_name_hash(name, name_len);
 
@@ -174,8 +174,7 @@ second_insert:
        btrfs_release_path(path);
 
        ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
-                                             name_len, dir, &disk_key, type,
-                                             index);
+                       name_len, BTRFS_I(dir), &disk_key, type, index);
 out_free:
        btrfs_free_path(path);
        if (ret)
index 37a31b12bb0c02e40c183d3ad57dfdf7879efa91..207db0270b1502add56171c3d39e94de2b1472f6 100644 (file)
@@ -64,8 +64,7 @@
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
-                                   int read_only);
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                                      struct btrfs_fs_info *fs_info);
@@ -1005,7 +1004,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
        return ret;
 }
 
-static int check_async_write(struct inode *inode, unsigned long bio_flags)
+static int check_async_write(unsigned long bio_flags)
 {
        if (bio_flags & EXTENT_BIO_TREE_LOG)
                return 0;
@@ -1021,7 +1020,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
                                 u64 bio_offset)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       int async = check_async_write(inode, bio_flags);
+       int async = check_async_write(bio_flags);
        int ret;
 
        if (bio_op(bio) != REQ_OP_WRITE) {
@@ -1248,8 +1247,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
 
 }
 
-void clean_tree_block(struct btrfs_trans_handle *trans,
-                     struct btrfs_fs_info *fs_info,
+void clean_tree_block(struct btrfs_fs_info *fs_info,
                      struct extent_buffer *buf)
 {
        if (btrfs_header_generation(buf) ==
@@ -2802,7 +2800,7 @@ int open_ctree(struct super_block *sb,
 
        memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
 
-       ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+       ret = btrfs_check_super_valid(fs_info);
        if (ret) {
                btrfs_err(fs_info, "superblock contains fatal errors");
                err = -EINVAL;
@@ -3411,7 +3409,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
  */
 static int write_dev_supers(struct btrfs_device *device,
                            struct btrfs_super_block *sb,
-                           int do_barriers, int wait, int max_mirrors)
+                           int wait, int max_mirrors)
 {
        struct buffer_head *bh;
        int i;
@@ -3696,7 +3694,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
        return num_tolerated_disk_barrier_failures;
 }
 
-static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
+int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
 {
        struct list_head *head;
        struct btrfs_device *dev;
@@ -3753,7 +3751,7 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
                flags = btrfs_super_flags(sb);
                btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
 
-               ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
+               ret = write_dev_supers(dev, sb, 0, max_mirrors);
                if (ret)
                        total_errors++;
        }
@@ -3776,7 +3774,7 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
                if (!dev->in_fs_metadata || !dev->writeable)
                        continue;
 
-               ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
+               ret = write_dev_supers(dev, sb, 1, max_mirrors);
                if (ret)
                        total_errors++;
        }
@@ -3790,12 +3788,6 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
        return 0;
 }
 
-int write_ctree_super(struct btrfs_trans_handle *trans,
-                     struct btrfs_fs_info *fs_info, int max_mirrors)
-{
-       return write_all_supers(fs_info, max_mirrors);
-}
-
 /* Drop a fs root from the radix tree and free it. */
 void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
                                  struct btrfs_root *root)
@@ -4122,8 +4114,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
        return btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
 }
 
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
-                             int read_only)
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_super_block *sb = fs_info->super_copy;
        u64 nodesize = btrfs_super_nodesize(sb);
index 44dcd9af6b7c5c808e1a2a8d42a4ffc5d67d9d15..0be2d4fe705b4d563cb33134ea769d51c88aedbd 100644 (file)
@@ -52,14 +52,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
 struct extent_buffer *btrfs_find_create_tree_block(
                                                struct btrfs_fs_info *fs_info,
                                                u64 bytenr);
-void clean_tree_block(struct btrfs_trans_handle *trans,
-                     struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
+void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
 int open_ctree(struct super_block *sb,
               struct btrfs_fs_devices *fs_devices,
               char *options);
 void close_ctree(struct btrfs_fs_info *fs_info);
-int write_ctree_super(struct btrfs_trans_handle *trans,
-                     struct btrfs_fs_info *fs_info, int max_mirrors);
+int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
 int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
                        struct buffer_head **bh_ret);
index 340d90751263ca22c0fec999def023343401b3b4..87144c9f9593823922161c61a687c19ab13e7e71 100644 (file)
@@ -30,7 +30,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
        len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
        type = FILEID_BTRFS_WITHOUT_PARENT;
 
-       fid->objectid = btrfs_ino(inode);
+       fid->objectid = btrfs_ino(BTRFS_I(inode));
        fid->root_objectid = BTRFS_I(inode)->root->objectid;
        fid->gen = inode->i_generation;
 
@@ -166,13 +166,13 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
        if (!path)
                return ERR_PTR(-ENOMEM);
 
-       if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
+       if (btrfs_ino(BTRFS_I(dir)) == BTRFS_FIRST_FREE_OBJECTID) {
                key.objectid = root->root_key.objectid;
                key.type = BTRFS_ROOT_BACKREF_KEY;
                key.offset = (u64)-1;
                root = fs_info->tree_root;
        } else {
-               key.objectid = btrfs_ino(dir);
+               key.objectid = btrfs_ino(BTRFS_I(dir));
                key.type = BTRFS_INODE_REF_KEY;
                key.offset = (u64)-1;
        }
@@ -235,13 +235,10 @@ static int btrfs_get_name(struct dentry *parent, char *name,
        int ret;
        u64 ino;
 
-       if (!dir || !inode)
-               return -EINVAL;
-
        if (!S_ISDIR(dir->i_mode))
                return -EINVAL;
 
-       ino = btrfs_ino(inode);
+       ino = btrfs_ino(BTRFS_I(inode));
 
        path = btrfs_alloc_path();
        if (!path)
@@ -255,7 +252,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
                root = fs_info->tree_root;
        } else {
                key.objectid = ino;
-               key.offset = btrfs_ino(dir);
+               key.offset = btrfs_ino(BTRFS_I(dir));
                key.type = BTRFS_INODE_REF_KEY;
        }
 
index dcd2e798767e57a80e7f50ff791a675e394a62c0..c35b966335543cfa5f499e30efda9d5f6eb554af 100644 (file)
@@ -888,7 +888,7 @@ search_again:
 
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
-       head = btrfs_find_delayed_ref_head(trans, bytenr);
+       head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
        if (head) {
                if (!mutex_trylock(&head->mutex)) {
                        atomic_inc(&head->node.refs);
@@ -1035,10 +1035,11 @@ out_free:
 
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *root,
+                                 struct btrfs_fs_info *fs_info,
                                  struct btrfs_path *path,
                                  u64 owner, u32 extra_size)
 {
+       struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_extent_item *item;
        struct btrfs_extent_item_v0 *ei0;
        struct btrfs_extent_ref_v0 *ref0;
@@ -1092,7 +1093,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
                return ret;
        BUG_ON(ret); /* Corruption */
 
-       btrfs_extend_item(root->fs_info, path, new_size);
+       btrfs_extend_item(fs_info, path, new_size);
 
        leaf = path->nodes[0];
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1151,12 +1152,13 @@ static int match_extent_data_ref(struct extent_buffer *leaf,
 }
 
 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
-                                          struct btrfs_root *root,
+                                          struct btrfs_fs_info *fs_info,
                                           struct btrfs_path *path,
                                           u64 bytenr, u64 parent,
                                           u64 root_objectid,
                                           u64 owner, u64 offset)
 {
+       struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_key key;
        struct btrfs_extent_data_ref *ref;
        struct extent_buffer *leaf;
@@ -1238,12 +1240,13 @@ fail:
 }
 
 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
-                                          struct btrfs_root *root,
+                                          struct btrfs_fs_info *fs_info,
                                           struct btrfs_path *path,
                                           u64 bytenr, u64 parent,
                                           u64 root_objectid, u64 owner,
                                           u64 offset, int refs_to_add)
 {
+       struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_key key;
        struct extent_buffer *leaf;
        u32 size;
@@ -1317,7 +1320,7 @@ fail:
 }
 
 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
-                                          struct btrfs_root *root,
+                                          struct btrfs_fs_info *fs_info,
                                           struct btrfs_path *path,
                                           int refs_to_drop, int *last_ref)
 {
@@ -1354,7 +1357,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
        num_refs -= refs_to_drop;
 
        if (num_refs == 0) {
-               ret = btrfs_del_item(trans, root, path);
+               ret = btrfs_del_item(trans, fs_info->extent_root, path);
                *last_ref = 1;
        } else {
                if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
@@ -1416,11 +1419,12 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
 }
 
 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
-                                         struct btrfs_root *root,
+                                         struct btrfs_fs_info *fs_info,
                                          struct btrfs_path *path,
                                          u64 bytenr, u64 parent,
                                          u64 root_objectid)
 {
+       struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_key key;
        int ret;
 
@@ -1449,7 +1453,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
 }
 
 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
-                                         struct btrfs_root *root,
+                                         struct btrfs_fs_info *fs_info,
                                          struct btrfs_path *path,
                                          u64 bytenr, u64 parent,
                                          u64 root_objectid)
@@ -1466,7 +1470,8 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
                key.offset = root_objectid;
        }
 
-       ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+       ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
+                                     path, &key, 0);
        btrfs_release_path(path);
        return ret;
 }
@@ -1524,14 +1529,14 @@ static int find_next_key(struct btrfs_path *path, int level,
  */
 static noinline_for_stack
 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
+                                struct btrfs_fs_info *fs_info,
                                 struct btrfs_path *path,
                                 struct btrfs_extent_inline_ref **ref_ret,
                                 u64 bytenr, u64 num_bytes,
                                 u64 parent, u64 root_objectid,
                                 u64 owner, u64 offset, int insert)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_key key;
        struct extent_buffer *leaf;
        struct btrfs_extent_item *ei;
@@ -1614,7 +1619,7 @@ again:
                        err = -ENOENT;
                        goto out;
                }
-               ret = convert_extent_item_v0(trans, root, path, owner,
+               ret = convert_extent_item_v0(trans, fs_info, path, owner,
                                             extra_size);
                if (ret < 0) {
                        err = ret;
@@ -1716,7 +1721,7 @@ out:
  * helper to add new inline back ref
  */
 static noinline_for_stack
-void setup_inline_extent_backref(struct btrfs_root *root,
+void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
                                 struct btrfs_path *path,
                                 struct btrfs_extent_inline_ref *iref,
                                 u64 parent, u64 root_objectid,
@@ -1739,7 +1744,7 @@ void setup_inline_extent_backref(struct btrfs_root *root,
        type = extent_ref_type(parent, owner);
        size = btrfs_extent_inline_ref_size(type);
 
-       btrfs_extend_item(root->fs_info, path, size);
+       btrfs_extend_item(fs_info, path, size);
 
        ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
        refs = btrfs_extent_refs(leaf, ei);
@@ -1777,7 +1782,7 @@ void setup_inline_extent_backref(struct btrfs_root *root,
 }
 
 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
+                                struct btrfs_fs_info *fs_info,
                                 struct btrfs_path *path,
                                 struct btrfs_extent_inline_ref **ref_ret,
                                 u64 bytenr, u64 num_bytes, u64 parent,
@@ -1785,7 +1790,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
 {
        int ret;
 
-       ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
+       ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
                                           bytenr, num_bytes, parent,
                                           root_objectid, owner, offset, 0);
        if (ret != -ENOENT)
@@ -1795,11 +1800,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
        *ref_ret = NULL;
 
        if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-               ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
-                                           root_objectid);
+               ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
+                                           parent, root_objectid);
        } else {
-               ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
-                                            root_objectid, owner, offset);
+               ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
+                                            parent, root_objectid, owner,
+                                            offset);
        }
        return ret;
 }
@@ -1808,7 +1814,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
  * helper to update/remove inline back ref
  */
 static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_root *root,
+void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
                                  struct btrfs_path *path,
                                  struct btrfs_extent_inline_ref *iref,
                                  int refs_to_mod,
@@ -1866,14 +1872,14 @@ void update_inline_extent_backref(struct btrfs_root *root,
                        memmove_extent_buffer(leaf, ptr, ptr + size,
                                              end - ptr - size);
                item_size -= size;
-               btrfs_truncate_item(root->fs_info, path, item_size, 1);
+               btrfs_truncate_item(fs_info, path, item_size, 1);
        }
        btrfs_mark_buffer_dirty(leaf);
 }
 
 static noinline_for_stack
 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
+                                struct btrfs_fs_info *fs_info,
                                 struct btrfs_path *path,
                                 u64 bytenr, u64 num_bytes, u64 parent,
                                 u64 root_objectid, u64 owner,
@@ -1883,15 +1889,15 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
        struct btrfs_extent_inline_ref *iref;
        int ret;
 
-       ret = lookup_inline_extent_backref(trans, root, path, &iref,
+       ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
                                           bytenr, num_bytes, parent,
                                           root_objectid, owner, offset, 1);
        if (ret == 0) {
                BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
-               update_inline_extent_backref(root, path, iref,
+               update_inline_extent_backref(fs_info, path, iref,
                                             refs_to_add, extent_op, NULL);
        } else if (ret == -ENOENT) {
-               setup_inline_extent_backref(root, path, iref, parent,
+               setup_inline_extent_backref(fs_info, path, iref, parent,
                                            root_objectid, owner, offset,
                                            refs_to_add, extent_op);
                ret = 0;
@@ -1900,7 +1906,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 }
 
 static int insert_extent_backref(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
+                                struct btrfs_fs_info *fs_info,
                                 struct btrfs_path *path,
                                 u64 bytenr, u64 parent, u64 root_objectid,
                                 u64 owner, u64 offset, int refs_to_add)
@@ -1908,10 +1914,10 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
        int ret;
        if (owner < BTRFS_FIRST_FREE_OBJECTID) {
                BUG_ON(refs_to_add != 1);
-               ret = insert_tree_block_ref(trans, root, path, bytenr,
+               ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
                                            parent, root_objectid);
        } else {
-               ret = insert_extent_data_ref(trans, root, path, bytenr,
+               ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
                                             parent, root_objectid,
                                             owner, offset, refs_to_add);
        }
@@ -1919,7 +1925,7 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
 }
 
 static int remove_extent_backref(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
+                                struct btrfs_fs_info *fs_info,
                                 struct btrfs_path *path,
                                 struct btrfs_extent_inline_ref *iref,
                                 int refs_to_drop, int is_data, int *last_ref)
@@ -1928,14 +1934,14 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 
        BUG_ON(!is_data && refs_to_drop != 1);
        if (iref) {
-               update_inline_extent_backref(root, path, iref,
+               update_inline_extent_backref(fs_info, path, iref,
                                             -refs_to_drop, NULL, last_ref);
        } else if (is_data) {
-               ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
+               ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
                                             last_ref);
        } else {
                *last_ref = 1;
-               ret = btrfs_del_item(trans, root, path);
+               ret = btrfs_del_item(trans, fs_info->extent_root, path);
        }
        return ret;
 }
@@ -2089,7 +2095,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
                                        num_bytes, parent, root_objectid,
                                        owner, offset, 0,
-                                       BTRFS_ADD_DELAYED_REF, NULL);
+                                       BTRFS_ADD_DELAYED_REF);
        }
        return ret;
 }
@@ -2117,9 +2123,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        path->reada = READA_FORWARD;
        path->leave_spinning = 1;
        /* this will setup the path even if it fails to insert the back ref */
-       ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
-                                          bytenr, num_bytes, parent,
-                                          root_objectid, owner, offset,
+       ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
+                                          num_bytes, parent, root_objectid,
+                                          owner, offset,
                                           refs_to_add, extent_op);
        if ((ret < 0 && ret != -EAGAIN) || !ret)
                goto out;
@@ -2143,9 +2149,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        path->reada = READA_FORWARD;
        path->leave_spinning = 1;
        /* now insert the actual backref */
-       ret = insert_extent_backref(trans, fs_info->extent_root,
-                                   path, bytenr, parent, root_objectid,
-                                   owner, offset, refs_to_add);
+       ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
+                                   root_objectid, owner, offset, refs_to_add);
        if (ret)
                btrfs_abort_transaction(trans, ret);
 out:
@@ -2290,8 +2295,7 @@ again:
        item_size = btrfs_item_size_nr(leaf, path->slots[0]);
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
        if (item_size < sizeof(*ei)) {
-               ret = convert_extent_item_v0(trans, fs_info->extent_root,
-                                            path, (u64)-1, 0);
+               ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
                if (ret < 0) {
                        err = ret;
                        goto out;
@@ -3028,8 +3032,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
-                                     struct btrfs_root *root,
+static noinline int check_delayed_ref(struct btrfs_root *root,
                                      struct btrfs_path *path,
                                      u64 objectid, u64 offset, u64 bytenr)
 {
@@ -3037,11 +3040,16 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_node *ref;
        struct btrfs_delayed_data_ref *data_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_transaction *cur_trans;
        int ret = 0;
 
-       delayed_refs = &trans->transaction->delayed_refs;
+       cur_trans = root->fs_info->running_transaction;
+       if (!cur_trans)
+               return 0;
+
+       delayed_refs = &cur_trans->delayed_refs;
        spin_lock(&delayed_refs->lock);
-       head = btrfs_find_delayed_ref_head(trans, bytenr);
+       head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
        if (!head) {
                spin_unlock(&delayed_refs->lock);
                return 0;
@@ -3090,8 +3098,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
-                                       struct btrfs_root *root,
+static noinline int check_committed_ref(struct btrfs_root *root,
                                        struct btrfs_path *path,
                                        u64 objectid, u64 offset, u64 bytenr)
 {
@@ -3162,9 +3169,8 @@ out:
        return ret;
 }
 
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
-                         u64 objectid, u64 offset, u64 bytenr)
+int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
+                         u64 bytenr)
 {
        struct btrfs_path *path;
        int ret;
@@ -3175,12 +3181,12 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
                return -ENOENT;
 
        do {
-               ret = check_committed_ref(trans, root, path, objectid,
+               ret = check_committed_ref(root, path, objectid,
                                          offset, bytenr);
                if (ret && ret != -ENOENT)
                        goto out;
 
-               ret2 = check_delayed_ref(trans, root, path, objectid,
+               ret2 = check_delayed_ref(root, path, objectid,
                                         offset, bytenr);
        } while (ret2 == -EAGAIN);
 
@@ -3368,7 +3374,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
        if (trans->aborted)
                return 0;
 again:
-       inode = lookup_free_space_inode(root, block_group, path);
+       inode = lookup_free_space_inode(fs_info, block_group, path);
        if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
                ret = PTR_ERR(inode);
                btrfs_release_path(path);
@@ -3382,7 +3388,8 @@ again:
                if (block_group->ro)
                        goto out_free;
 
-               ret = create_free_space_inode(root, trans, block_group, path);
+               ret = create_free_space_inode(fs_info, trans, block_group,
+                                             path);
                if (ret)
                        goto out_free;
                goto again;
@@ -3424,7 +3431,7 @@ again:
                if (ret)
                        goto out_put;
 
-               ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
+               ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
                if (ret)
                        goto out_put;
        }
@@ -4119,6 +4126,15 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
        return ret;
 }
 
+static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+                                bool may_use_included)
+{
+       ASSERT(s_info);
+       return s_info->bytes_used + s_info->bytes_reserved +
+               s_info->bytes_pinned + s_info->bytes_readonly +
+               (may_use_included ? s_info->bytes_may_use : 0);
+}
+
 int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
 {
        struct btrfs_space_info *data_sinfo;
@@ -4144,9 +4160,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
 again:
        /* make sure we have enough space to handle the data first */
        spin_lock(&data_sinfo->lock);
-       used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
-               data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
-               data_sinfo->bytes_may_use;
+       used = btrfs_space_info_used(data_sinfo, true);
 
        if (used + bytes > data_sinfo->total_bytes) {
                struct btrfs_trans_handle *trans;
@@ -4421,9 +4435,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
 
        info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
        spin_lock(&info->lock);
-       left = info->total_bytes - info->bytes_used - info->bytes_pinned -
-               info->bytes_reserved - info->bytes_readonly -
-               info->bytes_may_use;
+       left = info->total_bytes - btrfs_space_info_used(info, true);
        spin_unlock(&info->lock);
 
        num_devs = get_profile_num_devs(fs_info, type);
@@ -4606,8 +4618,7 @@ static int can_overcommit(struct btrfs_root *root,
                return 0;
 
        profile = btrfs_get_alloc_profile(root, 0);
-       used = space_info->bytes_used + space_info->bytes_reserved +
-               space_info->bytes_pinned + space_info->bytes_readonly;
+       used = btrfs_space_info_used(space_info, false);
 
        /*
         * We only want to allow over committing if we have lots of actual space
@@ -4787,11 +4798,10 @@ skip_async:
  * get us somewhere and then commit the transaction if it does.  Otherwise it
  * will return -ENOSPC.
  */
-static int may_commit_transaction(struct btrfs_root *root,
+static int may_commit_transaction(struct btrfs_fs_info *fs_info,
                                  struct btrfs_space_info *space_info,
                                  u64 bytes, int force)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
        struct btrfs_trans_handle *trans;
 
@@ -4823,7 +4833,7 @@ static int may_commit_transaction(struct btrfs_root *root,
        spin_unlock(&delayed_rsv->lock);
 
 commit:
-       trans = btrfs_join_transaction(root);
+       trans = btrfs_join_transaction(fs_info->fs_root);
        if (IS_ERR(trans))
                return -ENOSPC;
 
@@ -4837,11 +4847,11 @@ struct reserve_ticket {
        wait_queue_head_t wait;
 };
 
-static int flush_space(struct btrfs_root *root,
+static int flush_space(struct btrfs_fs_info *fs_info,
                       struct btrfs_space_info *space_info, u64 num_bytes,
                       u64 orig_bytes, int state)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_root *root = fs_info->fs_root;
        struct btrfs_trans_handle *trans;
        int nr;
        int ret = 0;
@@ -4881,7 +4891,8 @@ static int flush_space(struct btrfs_root *root,
                        ret = 0;
                break;
        case COMMIT_TRANS:
-               ret = may_commit_transaction(root, space_info, orig_bytes, 0);
+               ret = may_commit_transaction(fs_info, space_info,
+                                            orig_bytes, 0);
                break;
        default:
                ret = -ENOSPC;
@@ -4993,8 +5004,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
                struct reserve_ticket *ticket;
                int ret;
 
-               ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
-                           to_reclaim, flush_state);
+               ret = flush_space(fs_info, space_info, to_reclaim, to_reclaim,
+                                 flush_state);
                spin_lock(&space_info->lock);
                if (list_empty(&space_info->tickets)) {
                        space_info->flush = 0;
@@ -5049,8 +5060,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
        spin_unlock(&space_info->lock);
 
        do {
-               flush_space(fs_info->fs_root, space_info, to_reclaim,
-                           to_reclaim, flush_state);
+               flush_space(fs_info, space_info, to_reclaim, to_reclaim,
+                           flush_state);
                flush_state++;
                spin_lock(&space_info->lock);
                if (ticket->bytes == 0) {
@@ -5135,9 +5146,7 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
 
        spin_lock(&space_info->lock);
        ret = -ENOSPC;
-       used = space_info->bytes_used + space_info->bytes_reserved +
-               space_info->bytes_pinned + space_info->bytes_readonly +
-               space_info->bytes_may_use;
+       used = btrfs_space_info_used(space_info, true);
 
        /*
         * If we have enough space then hooray, make our reservation and carry
@@ -5630,9 +5639,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
        block_rsv->size = min_t(u64, num_bytes, SZ_512M);
 
        if (block_rsv->reserved < block_rsv->size) {
-               num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
-                       sinfo->bytes_reserved + sinfo->bytes_readonly +
-                       sinfo->bytes_may_use;
+               num_bytes = btrfs_space_info_used(sinfo, true);
                if (sinfo->total_bytes > num_bytes) {
                        num_bytes = sinfo->total_bytes - num_bytes;
                        num_bytes = min(num_bytes,
@@ -5756,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
        u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
        trace_btrfs_space_reservation(fs_info, "orphan",
-                                     btrfs_ino(inode), num_bytes, 1);
+                                     btrfs_ino(BTRFS_I(inode)), num_bytes, 1);
        return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
 }
 
@@ -5767,7 +5774,7 @@ void btrfs_orphan_release_metadata(struct inode *inode)
        u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
        trace_btrfs_space_reservation(fs_info, "orphan",
-                                     btrfs_ino(inode), num_bytes, 0);
+                                     btrfs_ino(BTRFS_I(inode)), num_bytes, 0);
        btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
 }
 
@@ -5799,7 +5806,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
                /* One for parent inode, two for dir entries */
                num_bytes = 3 * fs_info->nodesize;
-               ret = btrfs_qgroup_reserve_meta(root, num_bytes);
+               ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
                if (ret)
                        return ret;
        } else {
@@ -5824,8 +5831,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 }
 
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_block_rsv *rsv,
-                                     u64 qgroup_reserved)
+                                     struct btrfs_block_rsv *rsv)
 {
        btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
 }
@@ -5844,11 +5850,9 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
 {
        unsigned drop_inode_space = 0;
        unsigned dropped_extents = 0;
-       unsigned num_extents = 0;
+       unsigned num_extents;
 
-       num_extents = (unsigned)div64_u64(num_bytes +
-                                         BTRFS_MAX_EXTENT_SIZE - 1,
-                                         BTRFS_MAX_EXTENT_SIZE);
+       num_extents = count_max_extents(num_bytes);
        ASSERT(num_extents);
        ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
        BTRFS_I(inode)->outstanding_extents -= num_extents;
@@ -5927,7 +5931,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
        u64 to_reserve = 0;
        u64 csum_bytes;
-       unsigned nr_extents = 0;
+       unsigned nr_extents;
        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
        int ret = 0;
        bool delalloc_lock = true;
@@ -5960,9 +5964,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 
        spin_lock(&BTRFS_I(inode)->lock);
-       nr_extents = (unsigned)div64_u64(num_bytes +
-                                        BTRFS_MAX_EXTENT_SIZE - 1,
-                                        BTRFS_MAX_EXTENT_SIZE);
+       nr_extents = count_max_extents(num_bytes);
        BTRFS_I(inode)->outstanding_extents += nr_extents;
 
        nr_extents = 0;
@@ -5979,7 +5981,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
                ret = btrfs_qgroup_reserve_meta(root,
-                               nr_extents * fs_info->nodesize);
+                               nr_extents * fs_info->nodesize, true);
                if (ret)
                        goto out_fail;
        }
@@ -6005,7 +6007,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
        if (to_reserve)
                trace_btrfs_space_reservation(fs_info, "delalloc",
-                                             btrfs_ino(inode), to_reserve, 1);
+                                     btrfs_ino(BTRFS_I(inode)), to_reserve, 1);
        if (release_extra)
                btrfs_block_rsv_release(fs_info, block_rsv,
                                btrfs_calc_trans_metadata_size(fs_info, 1));
@@ -6068,7 +6070,7 @@ out_fail:
        if (to_free) {
                btrfs_block_rsv_release(fs_info, block_rsv, to_free);
                trace_btrfs_space_reservation(fs_info, "delalloc",
-                                             btrfs_ino(inode), to_free, 0);
+                                     btrfs_ino(BTRFS_I(inode)), to_free, 0);
        }
        if (delalloc_lock)
                mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
@@ -6104,7 +6106,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
                return;
 
        trace_btrfs_space_reservation(fs_info, "delalloc",
-                                     btrfs_ino(inode), to_free, 0);
+                                     btrfs_ino(BTRFS_I(inode)), to_free, 0);
 
        btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
 }
@@ -6561,8 +6563,7 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
        spin_unlock(&space_info->lock);
        return ret;
 }
-void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info)
+void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_caching_control *next;
        struct btrfs_caching_control *caching_ctl;
@@ -6845,7 +6846,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        if (is_data)
                skinny_metadata = 0;
 
-       ret = lookup_extent_backref(trans, extent_root, path, &iref,
+       ret = lookup_extent_backref(trans, info, path, &iref,
                                    bytenr, num_bytes, parent,
                                    root_objectid, owner_objectid,
                                    owner_offset);
@@ -6877,8 +6878,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 #endif
                if (!found_extent) {
                        BUG_ON(iref);
-                       ret = remove_extent_backref(trans, extent_root, path,
-                                                   NULL, refs_to_drop,
+                       ret = remove_extent_backref(trans, info, path, NULL,
+                                                   refs_to_drop,
                                                    is_data, &last_ref);
                        if (ret) {
                                btrfs_abort_transaction(trans, ret);
@@ -6953,8 +6954,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
        if (item_size < sizeof(*ei)) {
                BUG_ON(found_extent || extent_slot != path->slots[0]);
-               ret = convert_extent_item_v0(trans, extent_root, path,
-                                            owner_objectid, 0);
+               ret = convert_extent_item_v0(trans, info, path, owner_objectid,
+                                            0);
                if (ret < 0) {
                        btrfs_abort_transaction(trans, ret);
                        goto out;
@@ -7021,7 +7022,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        btrfs_mark_buffer_dirty(leaf);
                }
                if (found_extent) {
-                       ret = remove_extent_backref(trans, extent_root, path,
+                       ret = remove_extent_backref(trans, info, path,
                                                    iref, refs_to_drop,
                                                    is_data, &last_ref);
                        if (ret) {
@@ -7095,7 +7096,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
 
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
-       head = btrfs_find_delayed_ref_head(trans, bytenr);
+       head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
        if (!head)
                goto out_delayed_unlock;
 
@@ -7244,7 +7245,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                                                num_bytes,
                                                parent, root_objectid, owner,
                                                offset, 0,
-                                               BTRFS_DROP_DELAYED_REF, NULL);
+                                               BTRFS_DROP_DELAYED_REF);
        }
        return ret;
 }
@@ -7419,12 +7420,11 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
  * If there is no suitable free space, we will record the max size of
  * the free space extent currently.
  */
-static noinline int find_free_extent(struct btrfs_root *orig_root,
+static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
                                u64 ram_bytes, u64 num_bytes, u64 empty_size,
                                u64 hint_byte, struct btrfs_key *ins,
                                u64 flags, int delalloc)
 {
-       struct btrfs_fs_info *fs_info = orig_root->fs_info;
        int ret = 0;
        struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_free_cluster *last_ptr = NULL;
@@ -7716,18 +7716,20 @@ unclustered_alloc:
                        last_ptr->fragmented = 1;
                        spin_unlock(&last_ptr->lock);
                }
-               spin_lock(&block_group->free_space_ctl->tree_lock);
-               if (cached &&
-                   block_group->free_space_ctl->free_space <
-                   num_bytes + empty_cluster + empty_size) {
-                       if (block_group->free_space_ctl->free_space >
-                           max_extent_size)
-                               max_extent_size =
-                                       block_group->free_space_ctl->free_space;
-                       spin_unlock(&block_group->free_space_ctl->tree_lock);
-                       goto loop;
+               if (cached) {
+                       struct btrfs_free_space_ctl *ctl =
+                               block_group->free_space_ctl;
+
+                       spin_lock(&ctl->tree_lock);
+                       if (ctl->free_space <
+                           num_bytes + empty_cluster + empty_size) {
+                               if (ctl->free_space > max_extent_size)
+                                       max_extent_size = ctl->free_space;
+                               spin_unlock(&ctl->tree_lock);
+                               goto loop;
+                       }
+                       spin_unlock(&ctl->tree_lock);
                }
-               spin_unlock(&block_group->free_space_ctl->tree_lock);
 
                offset = btrfs_find_space_for_alloc(block_group, search_start,
                                                    num_bytes, empty_size,
@@ -7908,9 +7910,8 @@ static void dump_space_info(struct btrfs_fs_info *fs_info,
        spin_lock(&info->lock);
        btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
                   info->flags,
-                  info->total_bytes - info->bytes_used - info->bytes_pinned -
-                  info->bytes_reserved - info->bytes_readonly -
-                  info->bytes_may_use, (info->full) ? "" : "not ");
+                  info->total_bytes - btrfs_space_info_used(info, true),
+                  info->full ? "" : "not ");
        btrfs_info(fs_info,
                "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
                info->total_bytes, info->bytes_used, info->bytes_pinned,
@@ -7951,7 +7952,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
        flags = btrfs_get_alloc_profile(root, is_data);
 again:
        WARN_ON(num_bytes < fs_info->sectorsize);
-       ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+       ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
                               hint_byte, ins, flags, delalloc);
        if (!ret && !is_data) {
                btrfs_dec_block_group_reservations(fs_info, ins->objectid);
@@ -8194,8 +8195,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
                                         ins->offset, 0,
                                         root_objectid, owner, offset,
-                                        ram_bytes, BTRFS_ADD_DELAYED_EXTENT,
-                                        NULL);
+                                        ram_bytes, BTRFS_ADD_DELAYED_EXTENT);
        return ret;
 }
 
@@ -8256,7 +8256,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        btrfs_set_header_generation(buf, trans->transid);
        btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
        btrfs_tree_lock(buf);
-       clean_tree_block(trans, fs_info, buf);
+       clean_tree_block(fs_info, buf);
        clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
 
        btrfs_set_lock_blocking(buf);
@@ -8351,10 +8351,11 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
  * returns the tree buffer or an ERR_PTR on error.
  */
 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
-                                       struct btrfs_root *root,
-                                       u64 parent, u64 root_objectid,
-                                       struct btrfs_disk_key *key, int level,
-                                       u64 hint, u64 empty_size)
+                                            struct btrfs_root *root,
+                                            u64 parent, u64 root_objectid,
+                                            const struct btrfs_disk_key *key,
+                                            int level, u64 hint,
+                                            u64 empty_size)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_key ins;
@@ -8876,7 +8877,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        btrfs_set_lock_blocking(eb);
                        path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                }
-               clean_tree_block(trans, fs_info, eb);
+               clean_tree_block(fs_info, eb);
        }
 
        if (eb == root->node) {
@@ -9346,8 +9347,7 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
        num_bytes = cache->key.offset - cache->reserved - cache->pinned -
                    cache->bytes_super - btrfs_block_group_used(&cache->item);
 
-       if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
-           sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
+       if (btrfs_space_info_used(sinfo, true) + num_bytes +
            min_allocable_bytes <= sinfo->total_bytes) {
                sinfo->bytes_readonly += num_bytes;
                cache->ro++;
@@ -9360,17 +9360,16 @@ out:
        return ret;
 }
 
-int btrfs_inc_block_group_ro(struct btrfs_root *root,
+int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
                             struct btrfs_block_group_cache *cache)
 
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_trans_handle *trans;
        u64 alloc_flags;
        int ret;
 
 again:
-       trans = btrfs_join_transaction(root);
+       trans = btrfs_join_transaction(fs_info->extent_root);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
@@ -9557,9 +9556,8 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
         * all of the extents from this block group.  If we can, we're good
         */
        if ((space_info->total_bytes != block_group->key.offset) &&
-           (space_info->bytes_used + space_info->bytes_reserved +
-            space_info->bytes_pinned + space_info->bytes_readonly +
-            min_free < space_info->total_bytes)) {
+           (btrfs_space_info_used(space_info, false) + min_free <
+            space_info->total_bytes)) {
                spin_unlock(&space_info->lock);
                goto out;
        }
@@ -10317,7 +10315,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         * get the inode first so any iput calls done for the io_list
         * aren't the final iput (no unlinks allowed now)
         */
-       inode = lookup_free_space_inode(tree_root, block_group, path);
+       inode = lookup_free_space_inode(fs_info, block_group, path);
 
        mutex_lock(&trans->transaction->cache_write_mutex);
        /*
index 4ac383a3a649106d29b68292b49108b4d833bcd0..d15b5ddb6732c6db31c472f6936760fff47c6310 100644 (file)
@@ -98,7 +98,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
        if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
                btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
                    "%s: ino %llu isize %llu odd range [%llu,%llu]",
-                               caller, btrfs_ino(inode), isize, start, end);
+                       caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
        }
 }
 #else
@@ -144,7 +144,7 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits,
        if (!set && (state->state & bits) == 0)
                return;
        changeset->bytes_changed += state->end - state->start + 1;
-       ret = ulist_add(changeset->range_changed, state->start, state->end,
+       ret = ulist_add(&changeset->range_changed, state->start, state->end,
                        GFP_ATOMIC);
        /* ENOMEM */
        BUG_ON(ret < 0);
@@ -226,6 +226,11 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 {
        struct extent_state *state;
 
+       /*
+        * The given mask might be not appropriate for the slab allocator,
+        * drop the unsupported bits
+        */
+       mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
        state = kmem_cache_alloc(extent_state_cache, mask);
        if (!state)
                return state;
@@ -1549,33 +1554,24 @@ out:
        return found;
 }
 
+static int __process_pages_contig(struct address_space *mapping,
+                                 struct page *locked_page,
+                                 pgoff_t start_index, pgoff_t end_index,
+                                 unsigned long page_ops, pgoff_t *index_ret);
+
 static noinline void __unlock_for_delalloc(struct inode *inode,
                                           struct page *locked_page,
                                           u64 start, u64 end)
 {
-       int ret;
-       struct page *pages[16];
        unsigned long index = start >> PAGE_SHIFT;
        unsigned long end_index = end >> PAGE_SHIFT;
-       unsigned long nr_pages = end_index - index + 1;
-       int i;
 
+       ASSERT(locked_page);
        if (index == locked_page->index && end_index == index)
                return;
 
-       while (nr_pages > 0) {
-               ret = find_get_pages_contig(inode->i_mapping, index,
-                                    min_t(unsigned long, nr_pages,
-                                    ARRAY_SIZE(pages)), pages);
-               for (i = 0; i < ret; i++) {
-                       if (pages[i] != locked_page)
-                               unlock_page(pages[i]);
-                       put_page(pages[i]);
-               }
-               nr_pages -= ret;
-               index += ret;
-               cond_resched();
-       }
+       __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
+                              PAGE_UNLOCK, NULL);
 }
 
 static noinline int lock_delalloc_pages(struct inode *inode,
@@ -1584,59 +1580,19 @@ static noinline int lock_delalloc_pages(struct inode *inode,
                                        u64 delalloc_end)
 {
        unsigned long index = delalloc_start >> PAGE_SHIFT;
-       unsigned long start_index = index;
+       unsigned long index_ret = index;
        unsigned long end_index = delalloc_end >> PAGE_SHIFT;
-       unsigned long pages_locked = 0;
-       struct page *pages[16];
-       unsigned long nrpages;
        int ret;
-       int i;
 
-       /* the caller is responsible for locking the start index */
+       ASSERT(locked_page);
        if (index == locked_page->index && index == end_index)
                return 0;
 
-       /* skip the page at the start index */
-       nrpages = end_index - index + 1;
-       while (nrpages > 0) {
-               ret = find_get_pages_contig(inode->i_mapping, index,
-                                    min_t(unsigned long,
-                                    nrpages, ARRAY_SIZE(pages)), pages);
-               if (ret == 0) {
-                       ret = -EAGAIN;
-                       goto done;
-               }
-               /* now we have an array of pages, lock them all */
-               for (i = 0; i < ret; i++) {
-                       /*
-                        * the caller is taking responsibility for
-                        * locked_page
-                        */
-                       if (pages[i] != locked_page) {
-                               lock_page(pages[i]);
-                               if (!PageDirty(pages[i]) ||
-                                   pages[i]->mapping != inode->i_mapping) {
-                                       ret = -EAGAIN;
-                                       unlock_page(pages[i]);
-                                       put_page(pages[i]);
-                                       goto done;
-                               }
-                       }
-                       put_page(pages[i]);
-                       pages_locked++;
-               }
-               nrpages -= ret;
-               index += ret;
-               cond_resched();
-       }
-       ret = 0;
-done:
-       if (ret && pages_locked) {
-               __unlock_for_delalloc(inode, locked_page,
-                             delalloc_start,
-                             ((u64)(start_index + pages_locked - 1)) <<
-                             PAGE_SHIFT);
-       }
+       ret = __process_pages_contig(inode->i_mapping, locked_page, index,
+                                    end_index, PAGE_LOCK, &index_ret);
+       if (ret == -EAGAIN)
+               __unlock_for_delalloc(inode, locked_page, delalloc_start,
+                                     (u64)index_ret << PAGE_SHIFT);
        return ret;
 }
 
@@ -1726,37 +1682,47 @@ out_failed:
        return found;
 }
 
-void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
-                                u64 delalloc_end, struct page *locked_page,
-                                unsigned clear_bits,
-                                unsigned long page_ops)
+static int __process_pages_contig(struct address_space *mapping,
+                                 struct page *locked_page,
+                                 pgoff_t start_index, pgoff_t end_index,
+                                 unsigned long page_ops, pgoff_t *index_ret)
 {
-       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
-       int ret;
+       unsigned long nr_pages = end_index - start_index + 1;
+       unsigned long pages_locked = 0;
+       pgoff_t index = start_index;
        struct page *pages[16];
-       unsigned long index = start >> PAGE_SHIFT;
-       unsigned long end_index = end >> PAGE_SHIFT;
-       unsigned long nr_pages = end_index - index + 1;
+       unsigned ret;
+       int err = 0;
        int i;
 
-       clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
-       if (page_ops == 0)
-               return;
+       if (page_ops & PAGE_LOCK) {
+               ASSERT(page_ops == PAGE_LOCK);
+               ASSERT(index_ret && *index_ret == start_index);
+       }
 
        if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
-               mapping_set_error(inode->i_mapping, -EIO);
+               mapping_set_error(mapping, -EIO);
 
        while (nr_pages > 0) {
-               ret = find_get_pages_contig(inode->i_mapping, index,
+               ret = find_get_pages_contig(mapping, index,
                                     min_t(unsigned long,
                                     nr_pages, ARRAY_SIZE(pages)), pages);
-               for (i = 0; i < ret; i++) {
+               if (ret == 0) {
+                       /*
+                        * Only if we're going to lock these pages,
+                        * can we find nothing at @index.
+                        */
+                       ASSERT(page_ops & PAGE_LOCK);
+                       return ret;
+               }
 
+               for (i = 0; i < ret; i++) {
                        if (page_ops & PAGE_SET_PRIVATE2)
                                SetPagePrivate2(pages[i]);
 
                        if (pages[i] == locked_page) {
                                put_page(pages[i]);
+                               pages_locked++;
                                continue;
                        }
                        if (page_ops & PAGE_CLEAR_DIRTY)
@@ -1769,12 +1735,40 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
                                end_page_writeback(pages[i]);
                        if (page_ops & PAGE_UNLOCK)
                                unlock_page(pages[i]);
+                       if (page_ops & PAGE_LOCK) {
+                               lock_page(pages[i]);
+                               if (!PageDirty(pages[i]) ||
+                                   pages[i]->mapping != mapping) {
+                                       unlock_page(pages[i]);
+                                       put_page(pages[i]);
+                                       err = -EAGAIN;
+                                       goto out;
+                               }
+                       }
                        put_page(pages[i]);
+                       pages_locked++;
                }
                nr_pages -= ret;
                index += ret;
                cond_resched();
        }
+out:
+       if (err && index_ret)
+               *index_ret = start_index + pages_locked - 1;
+       return err;
+}
+
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+                                u64 delalloc_end, struct page *locked_page,
+                                unsigned clear_bits,
+                                unsigned long page_ops)
+{
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
+                        NULL, GFP_NOFS);
+
+       __process_pages_contig(inode->i_mapping, locked_page,
+                              start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+                              page_ops, NULL);
 }
 
 /*
@@ -2060,7 +2054,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
 
        btrfs_info_rl_in_rcu(fs_info,
                "read error corrected: ino %llu off %llu (dev %s sector %llu)",
-                                 btrfs_ino(inode), start,
+                                 btrfs_ino(BTRFS_I(inode)), start,
                                  rcu_str_deref(dev->name), sector);
        btrfs_bio_counter_dec(fs_info);
        bio_put(bio);
@@ -2765,7 +2759,6 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
                              size_t size, unsigned long offset,
                              struct block_device *bdev,
                              struct bio **bio_ret,
-                             unsigned long max_pages,
                              bio_end_io_t end_io_func,
                              int mirror_num,
                              unsigned long prev_bio_flags,
@@ -2931,7 +2924,6 @@ static int __do_readpage(struct extent_io_tree *tree,
                }
        }
        while (cur <= end) {
-               unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1;
                bool force_bio_submit = false;
 
                if (cur >= last_byte) {
@@ -3066,10 +3058,9 @@ static int __do_readpage(struct extent_io_tree *tree,
                        continue;
                }
 
-               pnr -= page->index;
                ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
                                         page, sector, disk_io_size, pg_offset,
-                                        bdev, bio, pnr,
+                                        bdev, bio,
                                         end_bio_extent_readpage, mirror_num,
                                         *bio_flags,
                                         this_bio_flag,
@@ -3210,7 +3201,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        return ret;
 }
 
-static void update_nr_written(struct page *page, struct writeback_control *wbc,
+static void update_nr_written(struct writeback_control *wbc,
                              unsigned long nr_written)
 {
        wbc->nr_to_write -= nr_written;
@@ -3330,7 +3321,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
        u64 block_start;
        u64 iosize;
        sector_t sector;
-       struct extent_state *cached_state = NULL;
        struct extent_map *em;
        struct block_device *bdev;
        size_t pg_offset = 0;
@@ -3349,10 +3339,9 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                        else
                                redirty_page_for_writepage(wbc, page);
 
-                       update_nr_written(page, wbc, nr_written);
+                       update_nr_written(wbc, nr_written);
                        unlock_page(page);
-                       ret = 1;
-                       goto done_unlocked;
+                       return 1;
                }
        }
 
@@ -3360,7 +3349,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
         * we don't want to touch the inode after unlocking the page,
         * so we update the mapping writeback index now
         */
-       update_nr_written(page, wbc, nr_written + 1);
+       update_nr_written(wbc, nr_written + 1);
 
        end = page_end;
        if (i_size <= start) {
@@ -3374,7 +3363,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 
        while (cur <= end) {
                u64 em_end;
-               unsigned long max_nr;
 
                if (cur >= i_size) {
                        if (tree->ops && tree->ops->writepage_end_io_hook)
@@ -3431,8 +3419,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                        continue;
                }
 
-               max_nr = (i_size >> PAGE_SHIFT) + 1;
-
                set_range_writeback(tree, cur, cur + iosize - 1);
                if (!PageWriteback(page)) {
                        btrfs_err(BTRFS_I(inode)->root->fs_info,
@@ -3442,11 +3428,14 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 
                ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
                                         page, sector, iosize, pg_offset,
-                                        bdev, &epd->bio, max_nr,
+                                        bdev, &epd->bio,
                                         end_bio_extent_writepage,
                                         0, 0, 0, false);
-               if (ret)
+               if (ret) {
                        SetPageError(page);
+                       if (PageWriteback(page))
+                               end_page_writeback(page);
+               }
 
                cur = cur + iosize;
                pg_offset += iosize;
@@ -3454,11 +3443,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
        }
 done:
        *nr_ret = nr;
-
-done_unlocked:
-
-       /* drop our reference on any cached states */
-       free_extent_state(cached_state);
        return ret;
 }
 
@@ -3761,20 +3745,21 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
                set_page_writeback(p);
                ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
                                         p, offset >> 9, PAGE_SIZE, 0, bdev,
-                                        &epd->bio, -1,
+                                        &epd->bio,
                                         end_bio_extent_buffer_writepage,
                                         0, epd->bio_flags, bio_flags, false);
                epd->bio_flags = bio_flags;
                if (ret) {
                        set_btree_ioerr(p);
-                       end_page_writeback(p);
+                       if (PageWriteback(p))
+                               end_page_writeback(p);
                        if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
                                end_extent_buffer_writeback(eb);
                        ret = -EIO;
                        break;
                }
                offset += PAGE_SIZE;
-               update_nr_written(p, wbc, 1);
+               update_nr_written(wbc, 1);
                unlock_page(p);
        }
 
@@ -3926,8 +3911,7 @@ retry:
  * WB_SYNC_ALL then we were called for data integrity and we must wait for
  * existing IO to complete.
  */
-static int extent_write_cache_pages(struct extent_io_tree *tree,
-                            struct address_space *mapping,
+static int extent_write_cache_pages(struct address_space *mapping,
                             struct writeback_control *wbc,
                             writepage_t writepage, void *data,
                             void (*flush_fn)(void *))
@@ -4168,8 +4152,7 @@ int extent_writepages(struct extent_io_tree *tree,
                .bio_flags = 0,
        };
 
-       ret = extent_write_cache_pages(tree, mapping, wbc,
-                                      __extent_writepage, &epd,
+       ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
                                       flush_write_bio);
        flush_epd_write_bio(&epd);
        return ret;
@@ -4264,8 +4247,6 @@ static int try_release_extent_state(struct extent_map_tree *map,
                           EXTENT_IOBITS, 0, NULL))
                ret = 0;
        else {
-               if ((mask & GFP_NOFS) == GFP_NOFS)
-                       mask = GFP_NOFS;
                /*
                 * at this point we can safely clear everything except the
                 * locked bit and the nodatasum bit
@@ -4410,8 +4391,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
         * lookup the last file extent.  We're not using i_size here
         * because there might be preallocation past i_size
         */
-       ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
-                                      0);
+       ret = btrfs_lookup_file_extent(NULL, root, path,
+                       btrfs_ino(BTRFS_I(inode)), -1, 0);
        if (ret < 0) {
                btrfs_free_path(path);
                return ret;
@@ -4426,7 +4407,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        found_type = found_key.type;
 
        /* No extents, but there might be delalloc bits */
-       if (found_key.objectid != btrfs_ino(inode) ||
+       if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
            found_type != BTRFS_EXTENT_DATA_KEY) {
                /* have to trust i_size as the end */
                last = (u64)-1;
@@ -4535,8 +4516,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         * lookup stuff.
                         */
                        ret = btrfs_check_shared(trans, root->fs_info,
-                                                root->objectid,
-                                                btrfs_ino(inode), bytenr);
+                                       root->objectid,
+                                       btrfs_ino(BTRFS_I(inode)), bytenr);
                        if (trans)
                                btrfs_end_transaction(trans);
                        if (ret < 0)
index 17f9ce479ed7fe12fb84f3bbfb0e4decbe29fcde..270d03be290eec9a53e214634d1aabb0135b6e6d 100644 (file)
 #define EXTENT_BUFFER_IN_TREE 10
 #define EXTENT_BUFFER_WRITE_ERR 11    /* write IO error */
 
-/* these are flags for extent_clear_unlock_delalloc */
+/* these are flags for __process_pages_contig */
 #define PAGE_UNLOCK            (1 << 0)
 #define PAGE_CLEAR_DIRTY       (1 << 1)
 #define PAGE_SET_WRITEBACK     (1 << 2)
 #define PAGE_END_WRITEBACK     (1 << 3)
 #define PAGE_SET_PRIVATE2      (1 << 4)
 #define PAGE_SET_ERROR         (1 << 5)
+#define PAGE_LOCK              (1 << 6)
 
 /*
  * page->private values.  Every page that is controlled by the extent
@@ -192,7 +193,7 @@ struct extent_changeset {
        u64 bytes_changed;
 
        /* Changed ranges */
-       struct ulist *range_changed;
+       struct ulist range_changed;
 };
 
 static inline void extent_set_compress_type(unsigned long *bio_flags,
index e97e322c28f0430753b77b3a2c240ea10e9c8e7e..f7b9a92ad56d1757d4a73b9058b0f1e5b2f68a1a 100644 (file)
@@ -255,7 +255,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
                                } else {
                                        btrfs_info_rl(fs_info,
                                                   "no csum found for inode %llu start %llu",
-                                              btrfs_ino(inode), offset);
+                                              btrfs_ino(BTRFS_I(inode)), offset);
                                }
                                item = NULL;
                                btrfs_release_path(path);
@@ -856,8 +856,8 @@ insert:
                tmp = min(tmp, (next_offset - file_key.offset) >>
                                         fs_info->sb->s_blocksize_bits);
 
-               tmp = max((u64)1, tmp);
-               tmp = min(tmp, (u64)MAX_CSUM_ITEMS(fs_info, csum_size));
+               tmp = max_t(u64, 1, tmp);
+               tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
                ins_size = csum_size * tmp;
        } else {
                ins_size = csum_size;
@@ -977,7 +977,7 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
        } else {
                btrfs_err(fs_info,
                          "unknown file extent item type %d, inode %llu, offset %llu, root %llu",
-                         type, btrfs_ino(inode), extent_start,
+                         type, btrfs_ino(BTRFS_I(inode)), extent_start,
                          root->root_key.objectid);
        }
 }
index b5c5da215d051e2d3ef13673917b2f86b042edc8..c1d2a07205daf02795f22ba23527472b871c6457 100644 (file)
@@ -168,7 +168,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
        if (!defrag)
                return -ENOMEM;
 
-       defrag->ino = btrfs_ino(inode);
+       defrag->ino = btrfs_ino(BTRFS_I(inode));
        defrag->transid = transid;
        defrag->root = root->root_key.objectid;
 
@@ -702,7 +702,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
        struct btrfs_file_extent_item *fi;
        struct btrfs_key key;
        struct btrfs_key new_key;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
        u64 search_start = start;
        u64 disk_bytenr = 0;
        u64 num_bytes = 0;
@@ -1102,7 +1102,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
        int del_slot = 0;
        int recow;
        int ret;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
 
        path = btrfs_alloc_path();
        if (!path)
@@ -2062,7 +2062,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         * commit does not start nor waits for ordered extents to complete.
         */
        smp_mb();
-       if (btrfs_inode_in_log(inode, fs_info->generation) ||
+       if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
            (full_sync && BTRFS_I(inode)->last_trans <=
             fs_info->last_trans_committed) ||
            (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
@@ -2203,7 +2203,7 @@ static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
                return 0;
 
        btrfs_item_key_to_cpu(leaf, &key, slot);
-       if (key.objectid != btrfs_ino(inode) ||
+       if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
            key.type != BTRFS_EXTENT_DATA_KEY)
                return 0;
 
@@ -2237,7 +2237,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
        if (btrfs_fs_incompat(fs_info, NO_HOLES))
                goto out;
 
-       key.objectid = btrfs_ino(inode);
+       key.objectid = btrfs_ino(BTRFS_I(inode));
        key.type = BTRFS_EXTENT_DATA_KEY;
        key.offset = offset;
 
@@ -2285,9 +2285,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
        }
        btrfs_release_path(path);
 
-       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
-                                      0, 0, end - offset, 0, end - offset,
-                                      0, 0, 0);
+       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+                       offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
        if (ret)
                return ret;
 
@@ -2876,7 +2875,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                if (!ret)
                        ret = btrfs_prealloc_file_range(inode, mode,
                                        range->start,
-                                       range->len, 1 << inode->i_blkbits,
+                                       range->len, i_blocksize(inode),
                                        offset + len, &alloc_hint);
                else
                        btrfs_free_reserved_data_space(inode, range->start,
index 7015892c9ee8263d09da8768d3407bd684fa3b2a..1a131f7d6c1bed3f751c4f25835216d0b0910ad4 100644 (file)
@@ -94,12 +94,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
        return inode;
 }
 
-struct inode *lookup_free_space_inode(struct btrfs_root *root,
+struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
                                      struct btrfs_block_group_cache
                                      *block_group, struct btrfs_path *path)
 {
        struct inode *inode = NULL;
-       struct btrfs_fs_info *fs_info = root->fs_info;
        u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
 
        spin_lock(&block_group->lock);
@@ -109,7 +108,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
        if (inode)
                return inode;
 
-       inode = __lookup_free_space_inode(root, path,
+       inode = __lookup_free_space_inode(fs_info->tree_root, path,
                                          block_group->key.objectid);
        if (IS_ERR(inode))
                return inode;
@@ -192,7 +191,7 @@ static int __create_free_space_inode(struct btrfs_root *root,
        return 0;
 }
 
-int create_free_space_inode(struct btrfs_root *root,
+int create_free_space_inode(struct btrfs_fs_info *fs_info,
                            struct btrfs_trans_handle *trans,
                            struct btrfs_block_group_cache *block_group,
                            struct btrfs_path *path)
@@ -200,11 +199,11 @@ int create_free_space_inode(struct btrfs_root *root,
        int ret;
        u64 ino;
 
-       ret = btrfs_find_free_objectid(root, &ino);
+       ret = btrfs_find_free_objectid(fs_info->tree_root, &ino);
        if (ret < 0)
                return ret;
 
-       return __create_free_space_inode(root, trans, path, ino,
+       return __create_free_space_inode(fs_info->tree_root, trans, path, ino,
                                         block_group->key.objectid);
 }
 
@@ -227,21 +226,21 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
        return ret;
 }
 
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
-                                   struct btrfs_trans_handle *trans,
+int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
                                    struct btrfs_block_group_cache *block_group,
                                    struct inode *inode)
 {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
-       struct btrfs_path *path = btrfs_alloc_path();
        bool locked = false;
 
-       if (!path) {
-               ret = -ENOMEM;
-               goto fail;
-       }
-
        if (block_group) {
+               struct btrfs_path *path = btrfs_alloc_path();
+
+               if (!path) {
+                       ret = -ENOMEM;
+                       goto fail;
+               }
                locked = true;
                mutex_lock(&trans->transaction->cache_write_mutex);
                if (!list_empty(&block_group->io_list)) {
@@ -258,8 +257,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                spin_lock(&block_group->lock);
                block_group->disk_cache_state = BTRFS_DC_CLEAR;
                spin_unlock(&block_group->lock);
+               btrfs_free_path(path);
        }
-       btrfs_free_path(path);
 
        btrfs_i_size_write(inode, 0);
        truncate_pagecache(inode, 0);
@@ -286,14 +285,14 @@ fail:
        return ret;
 }
 
-static int readahead_cache(struct inode *inode)
+static void readahead_cache(struct inode *inode)
 {
        struct file_ra_state *ra;
        unsigned long last_index;
 
        ra = kzalloc(sizeof(*ra), GFP_NOFS);
        if (!ra)
-               return -ENOMEM;
+               return;
 
        file_ra_state_init(ra, inode->i_mapping);
        last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
@@ -301,8 +300,6 @@ static int readahead_cache(struct inode *inode)
        page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
 
        kfree(ra);
-
-       return 0;
 }
 
 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
@@ -313,7 +310,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
 
        num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 
-       if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
+       if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
                check_crcs = 1;
 
        /* Make sure we can fit our crcs into the first page */
@@ -730,9 +727,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
        if (ret)
                return ret;
 
-       ret = readahead_cache(inode);
-       if (ret)
-               goto out;
+       readahead_cache(inode);
 
        ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
        if (ret)
@@ -828,7 +823,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
                          struct btrfs_block_group_cache *block_group)
 {
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
-       struct btrfs_root *root = fs_info->tree_root;
        struct inode *inode;
        struct btrfs_path *path;
        int ret = 0;
@@ -852,7 +846,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
        path->search_commit_root = 1;
        path->skip_locking = 1;
 
-       inode = lookup_free_space_inode(root, block_group, path);
+       inode = lookup_free_space_inode(fs_info, block_group, path);
        if (IS_ERR(inode)) {
                btrfs_free_path(path);
                return 0;
@@ -1128,8 +1122,7 @@ cleanup_bitmap_list(struct list_head *bitmap_list)
 static void noinline_for_stack
 cleanup_write_cache_enospc(struct inode *inode,
                           struct btrfs_io_ctl *io_ctl,
-                          struct extent_state **cached_state,
-                          struct list_head *bitmap_list)
+                          struct extent_state **cached_state)
 {
        io_ctl_drop_pages(io_ctl);
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
@@ -1225,8 +1218,6 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
  * @ctl - the free space cache we are going to write out
  * @block_group - the block_group for this cache if it belongs to a block_group
  * @trans - the trans handle
- * @path - the path to use
- * @offset - the offset for the key we'll insert
  *
  * This function writes out a free space cache struct to disk for quick recovery
  * on mount.  This will return 0 if it was successful in writing the cache out,
@@ -1236,8 +1227,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                   struct btrfs_free_space_ctl *ctl,
                                   struct btrfs_block_group_cache *block_group,
                                   struct btrfs_io_ctl *io_ctl,
-                                  struct btrfs_trans_handle *trans,
-                                  struct btrfs_path *path, u64 offset)
+                                  struct btrfs_trans_handle *trans)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_state *cached_state = NULL;
@@ -1365,7 +1355,7 @@ out_nospc_locked:
        mutex_unlock(&ctl->cache_writeout_mutex);
 
 out_nospc:
-       cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
+       cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
 
        if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
                up_write(&block_group->data_rwsem);
@@ -1378,7 +1368,6 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
                          struct btrfs_block_group_cache *block_group,
                          struct btrfs_path *path)
 {
-       struct btrfs_root *root = fs_info->tree_root;
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct inode *inode;
        int ret = 0;
@@ -1390,13 +1379,12 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
        }
        spin_unlock(&block_group->lock);
 
-       inode = lookup_free_space_inode(root, block_group, path);
+       inode = lookup_free_space_inode(fs_info, block_group, path);
        if (IS_ERR(inode))
                return 0;
 
-       ret = __btrfs_write_out_cache(root, inode, ctl, block_group,
-                                     &block_group->io_ctl, trans,
-                                     path, block_group->key.objectid);
+       ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
+                               block_group, &block_group->io_ctl, trans);
        if (ret) {
 #ifdef DEBUG
                btrfs_err(fs_info,
@@ -3543,8 +3531,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
                return 0;
 
        memset(&io_ctl, 0, sizeof(io_ctl));
-       ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
-                                     trans, path, 0);
+       ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
        if (!ret) {
                /*
                 * At this point writepages() didn't error out, so our metadata
index 6f3c025a2c6c8686daf8787377aae17358dfd2c3..79eca4cabb1c6ae6822c241dfa8cdf37ed908855 100644 (file)
@@ -51,18 +51,17 @@ struct btrfs_free_space_op {
 
 struct btrfs_io_ctl;
 
-struct inode *lookup_free_space_inode(struct btrfs_root *root,
+struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
                                      struct btrfs_block_group_cache
                                      *block_group, struct btrfs_path *path);
-int create_free_space_inode(struct btrfs_root *root,
+int create_free_space_inode(struct btrfs_fs_info *fs_info,
                            struct btrfs_trans_handle *trans,
                            struct btrfs_block_group_cache *block_group,
                            struct btrfs_path *path);
 
 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
                                       struct btrfs_block_rsv *rsv);
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
-                                   struct btrfs_trans_handle *trans,
+int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
                                    struct btrfs_block_group_cache *block_group,
                                    struct inode *inode);
 int load_free_space_cache(struct btrfs_fs_info *fs_info,
index ff0c55337c2e97ccd7b8a1d5dc9dbcd0bce60450..dd7fb22a955a6bff32be4632053bcd9086efe6e2 100644 (file)
@@ -1269,7 +1269,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
        list_del(&free_space_root->dirty_list);
 
        btrfs_tree_lock(free_space_root->node);
-       clean_tree_block(trans, fs_info, free_space_root->node);
+       clean_tree_block(fs_info, free_space_root->node);
        btrfs_tree_unlock(free_space_root->node);
        btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
                              0, 1);
index 144b119ff43f1e50b0eb2d7074c5112689c4df24..3bbb8f0959535235b1c043a6520e54d05ad291d5 100644 (file)
@@ -467,7 +467,7 @@ again:
        }
 
        if (i_size_read(inode) > 0) {
-               ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
+               ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
                if (ret) {
                        if (ret != -ENOSPC)
                                btrfs_abort_transaction(trans, ret);
index 1e861a063721e7c173a770cd68d0fea484cb75b3..f02823f088c2b1a4f3b22b0eda66ae3a03ba07d5 100644 (file)
@@ -71,6 +71,7 @@ struct btrfs_dio_data {
        u64 reserve;
        u64 unsubmitted_oe_range_start;
        u64 unsubmitted_oe_range_end;
+       int overwrite;
 };
 
 static const struct inode_operations btrfs_dir_inode_operations;
@@ -108,11 +109,11 @@ static noinline int cow_file_range(struct inode *inode,
                                   u64 start, u64 end, u64 delalloc_end,
                                   int *page_started, unsigned long *nr_written,
                                   int unlock, struct btrfs_dedupe_hash *hash);
-static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
-                                          u64 len, u64 orig_start,
-                                          u64 block_start, u64 block_len,
-                                          u64 orig_block_len, u64 ram_bytes,
-                                          int type);
+static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
+                                      u64 orig_start, u64 block_start,
+                                      u64 block_len, u64 orig_block_len,
+                                      u64 ram_bytes, int compress_type,
+                                      int type);
 
 static int btrfs_dirty_inode(struct inode *inode);
 
@@ -166,7 +167,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
                struct btrfs_key key;
                size_t datasize;
 
-               key.objectid = btrfs_ino(inode);
+               key.objectid = btrfs_ino(BTRFS_I(inode));
                key.offset = start;
                key.type = BTRFS_EXTENT_DATA_KEY;
 
@@ -388,6 +389,15 @@ static inline int inode_need_compress(struct inode *inode)
        return 0;
 }
 
+static inline void inode_should_defrag(struct inode *inode,
+               u64 start, u64 end, u64 num_bytes, u64 small_write)
+{
+       /* If this is a small write inside eof, kick off a defrag */
+       if (num_bytes < small_write &&
+           (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+               btrfs_add_inode_defrag(NULL, inode);
+}
+
 /*
  * we create compressed extents in two phases.  The first
  * phase compresses a range of pages that have already been
@@ -430,10 +440,7 @@ static noinline void compress_file_range(struct inode *inode,
        int compress_type = fs_info->compress_type;
        int redirty = 0;
 
-       /* if this is a small write inside eof, kick off a defrag */
-       if ((end - start + 1) < SZ_16K &&
-           (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
-               btrfs_add_inode_defrag(NULL, inode);
+       inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
 
        actual_end = min_t(u64, isize, end + 1);
 again:
@@ -541,7 +548,7 @@ cont:
                         * to make an uncompressed inline extent.
                         */
                        ret = cow_file_range_inline(root, inode, start, end,
-                                                   0, 0, NULL);
+                                           0, BTRFS_COMPRESS_NONE, NULL);
                } else {
                        /* try making a compressed inline extent */
                        ret = cow_file_range_inline(root, inode, start, end,
@@ -690,7 +697,6 @@ static noinline void submit_compressed_extents(struct inode *inode,
        struct btrfs_key ins;
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct extent_io_tree *io_tree;
        int ret = 0;
 
@@ -778,46 +784,19 @@ retry:
                 * here we're doing allocation and writeback of the
                 * compressed pages
                 */
-               btrfs_drop_extent_cache(inode, async_extent->start,
-                                       async_extent->start +
-                                       async_extent->ram_size - 1, 0);
-
-               em = alloc_extent_map();
-               if (!em) {
-                       ret = -ENOMEM;
-                       goto out_free_reserve;
-               }
-               em->start = async_extent->start;
-               em->len = async_extent->ram_size;
-               em->orig_start = em->start;
-               em->mod_start = em->start;
-               em->mod_len = em->len;
-
-               em->block_start = ins.objectid;
-               em->block_len = ins.offset;
-               em->orig_block_len = ins.offset;
-               em->ram_bytes = async_extent->ram_size;
-               em->bdev = fs_info->fs_devices->latest_bdev;
-               em->compress_type = async_extent->compress_type;
-               set_bit(EXTENT_FLAG_PINNED, &em->flags);
-               set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
-               em->generation = -1;
-
-               while (1) {
-                       write_lock(&em_tree->lock);
-                       ret = add_extent_mapping(em_tree, em, 1);
-                       write_unlock(&em_tree->lock);
-                       if (ret != -EEXIST) {
-                               free_extent_map(em);
-                               break;
-                       }
-                       btrfs_drop_extent_cache(inode, async_extent->start,
-                                               async_extent->start +
-                                               async_extent->ram_size - 1, 0);
-               }
-
-               if (ret)
+               em = create_io_em(inode, async_extent->start,
+                                 async_extent->ram_size, /* len */
+                                 async_extent->start, /* orig_start */
+                                 ins.objectid, /* block_start */
+                                 ins.offset, /* block_len */
+                                 ins.offset, /* orig_block_len */
+                                 async_extent->ram_size, /* ram_bytes */
+                                 async_extent->compress_type,
+                                 BTRFS_ORDERED_COMPRESSED);
+               if (IS_ERR(em))
+                       /* ret value is not necessary due to void function */
                        goto out_free_reserve;
+               free_extent_map(em);
 
                ret = btrfs_add_ordered_extent_compress(inode,
                                                async_extent->start,
@@ -952,7 +931,6 @@ static noinline int cow_file_range(struct inode *inode,
        u64 blocksize = fs_info->sectorsize;
        struct btrfs_key ins;
        struct extent_map *em;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
 
        if (btrfs_is_free_space_inode(inode)) {
@@ -965,15 +943,12 @@ static noinline int cow_file_range(struct inode *inode,
        num_bytes = max(blocksize,  num_bytes);
        disk_num_bytes = num_bytes;
 
-       /* if this is a small write inside eof, kick off defrag */
-       if (num_bytes < SZ_64K &&
-           (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
-               btrfs_add_inode_defrag(NULL, inode);
+       inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
 
        if (start == 0) {
                /* lets try to make an inline extent */
-               ret = cow_file_range_inline(root, inode, start, end, 0, 0,
-                                           NULL);
+               ret = cow_file_range_inline(root, inode, start, end, 0,
+                                       BTRFS_COMPRESS_NONE, NULL);
                if (ret == 0) {
                        extent_clear_unlock_delalloc(inode, start, end,
                                     delalloc_end, NULL,
@@ -1008,39 +983,18 @@ static noinline int cow_file_range(struct inode *inode,
                if (ret < 0)
                        goto out_unlock;
 
-               em = alloc_extent_map();
-               if (!em) {
-                       ret = -ENOMEM;
-                       goto out_reserve;
-               }
-               em->start = start;
-               em->orig_start = em->start;
                ram_size = ins.offset;
-               em->len = ins.offset;
-               em->mod_start = em->start;
-               em->mod_len = em->len;
-
-               em->block_start = ins.objectid;
-               em->block_len = ins.offset;
-               em->orig_block_len = ins.offset;
-               em->ram_bytes = ram_size;
-               em->bdev = fs_info->fs_devices->latest_bdev;
-               set_bit(EXTENT_FLAG_PINNED, &em->flags);
-               em->generation = -1;
-
-               while (1) {
-                       write_lock(&em_tree->lock);
-                       ret = add_extent_mapping(em_tree, em, 1);
-                       write_unlock(&em_tree->lock);
-                       if (ret != -EEXIST) {
-                               free_extent_map(em);
-                               break;
-                       }
-                       btrfs_drop_extent_cache(inode, start,
-                                               start + ram_size - 1, 0);
-               }
-               if (ret)
+               em = create_io_em(inode, start, ins.offset, /* len */
+                                 start, /* orig_start */
+                                 ins.objectid, /* block_start */
+                                 ins.offset, /* block_len */
+                                 ins.offset, /* orig_block_len */
+                                 ram_size, /* ram_bytes */
+                                 BTRFS_COMPRESS_NONE, /* compress_type */
+                                 BTRFS_ORDERED_REGULAR /* type */);
+               if (IS_ERR(em))
                        goto out_reserve;
+               free_extent_map(em);
 
                cur_alloc_size = ins.offset;
                ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
@@ -1164,7 +1118,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        unsigned long nr_pages;
        u64 cur_end;
-       int limit = 10 * SZ_1M;
 
        clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
                         1, 0, NULL, GFP_NOFS);
@@ -1196,12 +1149,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 
                btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
 
-               if (atomic_read(&fs_info->async_delalloc_pages) > limit) {
-                       wait_event(fs_info->async_submit_wait,
-                                  (atomic_read(&fs_info->async_delalloc_pages) <
-                                   limit));
-               }
-
                while (atomic_read(&fs_info->async_submit_draining) &&
                       atomic_read(&fs_info->async_delalloc_pages)) {
                        wait_event(fs_info->async_submit_wait,
@@ -1250,11 +1197,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
        struct extent_buffer *leaf;
        struct btrfs_path *path;
        struct btrfs_file_extent_item *fi;
        struct btrfs_key found_key;
+       struct extent_map *em;
        u64 cow_start;
        u64 cur_offset;
        u64 extent_end;
@@ -1269,7 +1216,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        int nocow;
        int check_prev = 1;
        bool nolock;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
 
        path = btrfs_alloc_path();
        if (!path) {
@@ -1286,30 +1233,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 
        nolock = btrfs_is_free_space_inode(inode);
 
-       if (nolock)
-               trans = btrfs_join_transaction_nolock(root);
-       else
-               trans = btrfs_join_transaction(root);
-
-       if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode, start, end, end,
-                                            locked_page,
-                                            EXTENT_LOCKED | EXTENT_DELALLOC |
-                                            EXTENT_DO_ACCOUNTING |
-                                            EXTENT_DEFRAG, PAGE_UNLOCK |
-                                            PAGE_CLEAR_DIRTY |
-                                            PAGE_SET_WRITEBACK |
-                                            PAGE_END_WRITEBACK);
-               btrfs_free_path(path);
-               return PTR_ERR(trans);
-       }
-
-       trans->block_rsv = &fs_info->delalloc_block_rsv;
-
        cow_start = (u64)-1;
        cur_offset = start;
        while (1) {
-               ret = btrfs_lookup_file_extent(trans, root, path, ino,
+               ret = btrfs_lookup_file_extent(NULL, root, path, ino,
                                               cur_offset, 0);
                if (ret < 0)
                        goto error;
@@ -1382,7 +1309,7 @@ next_slot:
                                goto out_check;
                        if (btrfs_extent_readonly(fs_info, disk_bytenr))
                                goto out_check;
-                       if (btrfs_cross_ref_exist(trans, root, ino,
+                       if (btrfs_cross_ref_exist(root, ino,
                                                  found_key.offset -
                                                  extent_offset, disk_bytenr))
                                goto out_check;
@@ -1455,35 +1382,28 @@ out_check:
                }
 
                if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
-                       struct extent_map *em;
-                       struct extent_map_tree *em_tree;
-                       em_tree = &BTRFS_I(inode)->extent_tree;
-                       em = alloc_extent_map();
-                       BUG_ON(!em); /* -ENOMEM */
-                       em->start = cur_offset;
-                       em->orig_start = found_key.offset - extent_offset;
-                       em->len = num_bytes;
-                       em->block_len = num_bytes;
-                       em->block_start = disk_bytenr;
-                       em->orig_block_len = disk_num_bytes;
-                       em->ram_bytes = ram_bytes;
-                       em->bdev = fs_info->fs_devices->latest_bdev;
-                       em->mod_start = em->start;
-                       em->mod_len = em->len;
-                       set_bit(EXTENT_FLAG_PINNED, &em->flags);
-                       set_bit(EXTENT_FLAG_FILLING, &em->flags);
-                       em->generation = -1;
-                       while (1) {
-                               write_lock(&em_tree->lock);
-                               ret = add_extent_mapping(em_tree, em, 1);
-                               write_unlock(&em_tree->lock);
-                               if (ret != -EEXIST) {
-                                       free_extent_map(em);
-                                       break;
-                               }
-                               btrfs_drop_extent_cache(inode, em->start,
-                                               em->start + em->len - 1, 0);
+                       u64 orig_start = found_key.offset - extent_offset;
+
+                       em = create_io_em(inode, cur_offset, num_bytes,
+                                         orig_start,
+                                         disk_bytenr, /* block_start */
+                                         num_bytes, /* block_len */
+                                         disk_num_bytes, /* orig_block_len */
+                                         ram_bytes, BTRFS_COMPRESS_NONE,
+                                         BTRFS_ORDERED_PREALLOC);
+                       if (IS_ERR(em)) {
+                               if (!nolock && nocow)
+                                       btrfs_end_write_no_snapshoting(root);
+                               if (nocow)
+                                       btrfs_dec_nocow_writers(fs_info,
+                                                               disk_bytenr);
+                               ret = PTR_ERR(em);
+                               goto error;
                        }
+                       free_extent_map(em);
+               }
+
+               if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
                        type = BTRFS_ORDERED_PREALLOC;
                } else {
                        type = BTRFS_ORDERED_NOCOW;
@@ -1534,10 +1454,6 @@ out_check:
        }
 
 error:
-       err = btrfs_end_transaction(trans);
-       if (!ret)
-               ret = err;
-
        if (ret && cur_offset < end)
                extent_clear_unlock_delalloc(inode, cur_offset, end, end,
                                             locked_page, EXTENT_LOCKED |
@@ -1609,7 +1525,7 @@ static void btrfs_split_extent_hook(struct inode *inode,
 
        size = orig->end - orig->start + 1;
        if (size > BTRFS_MAX_EXTENT_SIZE) {
-               u64 num_extents;
+               u32 num_extents;
                u64 new_size;
 
                /*
@@ -1617,13 +1533,10 @@ static void btrfs_split_extent_hook(struct inode *inode,
                 * applies here, just in reverse.
                 */
                new_size = orig->end - split + 1;
-               num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                                       BTRFS_MAX_EXTENT_SIZE);
+               num_extents = count_max_extents(new_size);
                new_size = split - orig->start;
-               num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                                       BTRFS_MAX_EXTENT_SIZE);
-               if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
-                             BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+               num_extents += count_max_extents(new_size);
+               if (count_max_extents(size) >= num_extents)
                        return;
        }
 
@@ -1643,7 +1556,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
                                    struct extent_state *other)
 {
        u64 new_size, old_size;
-       u64 num_extents;
+       u32 num_extents;
 
        /* not delalloc, ignore it */
        if (!(other->state & EXTENT_DELALLOC))
@@ -1681,14 +1594,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
         * this case.
         */
        old_size = other->end - other->start + 1;
-       num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                               BTRFS_MAX_EXTENT_SIZE);
+       num_extents = count_max_extents(old_size);
        old_size = new->end - new->start + 1;
-       num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                                BTRFS_MAX_EXTENT_SIZE);
-
-       if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                     BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+       num_extents += count_max_extents(old_size);
+       if (count_max_extents(new_size) >= num_extents)
                return;
 
        spin_lock(&BTRFS_I(inode)->lock);
@@ -1797,8 +1706,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        u64 len = state->end + 1 - state->start;
-       u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
-                                   BTRFS_MAX_EXTENT_SIZE);
+       u32 num_extents = count_max_extents(len);
 
        spin_lock(&BTRFS_I(inode)->lock);
        if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1997,8 +1905,7 @@ out:
  * at IO completion time based on sums calculated at bio submission time.
  */
 static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
-                            struct inode *inode, u64 file_offset,
-                            struct list_head *list)
+                            struct inode *inode, struct list_head *list)
 {
        struct btrfs_ordered_sum *sum;
 
@@ -2161,7 +2068,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
                goto out;
 
        if (!extent_inserted) {
-               ins.objectid = btrfs_ino(inode);
+               ins.objectid = btrfs_ino(BTRFS_I(inode));
                ins.offset = file_pos;
                ins.type = BTRFS_EXTENT_DATA_KEY;
 
@@ -2194,8 +2101,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        ins.offset = disk_num_bytes;
        ins.type = BTRFS_EXTENT_ITEM_KEY;
        ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid,
-                                              btrfs_ino(inode), file_pos,
-                                              ram_bytes, &ins);
+                       btrfs_ino(BTRFS_I(inode)), file_pos, ram_bytes, &ins);
        /*
         * Release the reserved range from inode dirty range map, as it is
         * already moved into delayed_ref_head
@@ -2320,7 +2226,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
        u64 num_bytes;
 
        if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
-           inum == btrfs_ino(inode))
+           inum == btrfs_ino(BTRFS_I(inode)))
                return 0;
 
        key.objectid = root_id;
@@ -2589,7 +2495,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
        if (ret)
                goto out_free_path;
 again:
-       key.objectid = btrfs_ino(inode);
+       key.objectid = btrfs_ino(BTRFS_I(inode));
        key.type = BTRFS_EXTENT_DATA_KEY;
        key.offset = start;
 
@@ -2768,7 +2674,7 @@ record_old_file_extents(struct inode *inode,
        if (!path)
                goto out_kfree;
 
-       key.objectid = btrfs_ino(inode);
+       key.objectid = btrfs_ino(BTRFS_I(inode));
        key.type = BTRFS_EXTENT_DATA_KEY;
        key.offset = new->file_pos;
 
@@ -2803,7 +2709,7 @@ record_old_file_extents(struct inode *inode,
 
                btrfs_item_key_to_cpu(l, &key, slot);
 
-               if (key.objectid != btrfs_ino(inode))
+               if (key.objectid != btrfs_ino(BTRFS_I(inode)))
                        break;
                if (key.type != BTRFS_EXTENT_DATA_KEY)
                        break;
@@ -2993,8 +2899,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                goto out_unlock;
        }
 
-       add_pending_csums(trans, inode, ordered_extent->file_offset,
-                         &ordered_extent->list);
+       add_pending_csums(trans, inode, &ordered_extent->list);
 
        btrfs_ordered_update_i_size(inode, 0, ordered_extent);
        ret = btrfs_update_inode_fallback(trans, root, inode);
@@ -3123,9 +3028,8 @@ static int __readpage_endio_check(struct inode *inode,
        kunmap_atomic(kaddr);
        return 0;
 zeroit:
-       btrfs_warn_rl(BTRFS_I(inode)->root->fs_info,
-               "csum failed ino %llu off %llu csum %u expected csum %u",
-                          btrfs_ino(inode), start, csum, csum_expected);
+       btrfs_print_data_csum_error(inode, start, csum, csum_expected,
+                                   io_bio->mirror_num);
        memset(kaddr + pgoff, 1, len);
        flush_dcache_page(page);
        kunmap_atomic(kaddr);
@@ -3326,7 +3230,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 
        /* insert an orphan item to track this unlinked/truncated file */
        if (insert >= 1) {
-               ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
+               ret = btrfs_insert_orphan_item(trans, root,
+                               btrfs_ino(BTRFS_I(inode)));
                if (ret) {
                        atomic_dec(&root->orphan_inodes);
                        if (reserve) {
@@ -3382,7 +3287,7 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
                atomic_dec(&root->orphan_inodes);
                if (trans)
                        ret = btrfs_del_orphan_item(trans, root,
-                                                   btrfs_ino(inode));
+                                                   btrfs_ino(BTRFS_I(inode)));
        }
 
        if (release_rsv)
@@ -3789,7 +3694,7 @@ cache_index:
                goto cache_acl;
 
        btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
-       if (location.objectid != btrfs_ino(inode))
+       if (location.objectid != btrfs_ino(BTRFS_I(inode)))
                goto cache_acl;
 
        ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -3811,14 +3716,14 @@ cache_acl:
         * any xattrs or acls
         */
        maybe_acls = acls_after_inode_item(leaf, path->slots[0],
-                                          btrfs_ino(inode), &first_xattr_slot);
+                       btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
        if (first_xattr_slot != -1) {
                path->slots[0] = first_xattr_slot;
                ret = btrfs_load_inode_props(inode, path);
                if (ret)
                        btrfs_err(fs_info,
                                  "error loading props for ino %llu (root %llu): %d",
-                                 btrfs_ino(inode),
+                                 btrfs_ino(BTRFS_I(inode)),
                                  root->root_key.objectid, ret);
        }
        btrfs_free_path(path);
@@ -3993,7 +3898,8 @@ noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
  */
 static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                               struct inode *dir, struct inode *inode,
+                               struct btrfs_inode *dir,
+                               struct btrfs_inode *inode,
                                const char *name, int name_len)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4040,10 +3946,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
         * that we delay to delete it, and just do this deletion when
         * we update the inode item.
         */
-       if (BTRFS_I(inode)->dir_index) {
+       if (inode->dir_index) {
                ret = btrfs_delayed_delete_inode_ref(inode);
                if (!ret) {
-                       index = BTRFS_I(inode)->dir_index;
+                       index = inode->dir_index;
                        goto skip_backref;
                }
        }
@@ -4064,15 +3970,15 @@ skip_backref:
                goto err;
        }
 
-       ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
-                                        inode, dir_ino);
+       ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
+                       dir_ino);
        if (ret != 0 && ret != -ENOENT) {
                btrfs_abort_transaction(trans, ret);
                goto err;
        }
 
-       ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
-                                          dir, index);
+       ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
+                       index);
        if (ret == -ENOENT)
                ret = 0;
        else if (ret)
@@ -4082,26 +3988,27 @@ err:
        if (ret)
                goto out;
 
-       btrfs_i_size_write(dir, dir->i_size - name_len * 2);
-       inode_inc_iversion(inode);
-       inode_inc_iversion(dir);
-       inode->i_ctime = dir->i_mtime =
-               dir->i_ctime = current_time(inode);
-       ret = btrfs_update_inode(trans, root, dir);
+       btrfs_i_size_write(&dir->vfs_inode,
+                       dir->vfs_inode.i_size - name_len * 2);
+       inode_inc_iversion(&inode->vfs_inode);
+       inode_inc_iversion(&dir->vfs_inode);
+       inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
+               dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
+       ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
 out:
        return ret;
 }
 
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root,
-                      struct inode *dir, struct inode *inode,
+                      struct btrfs_inode *dir, struct btrfs_inode *inode,
                       const char *name, int name_len)
 {
        int ret;
        ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
        if (!ret) {
-               drop_nlink(inode);
-               ret = btrfs_update_inode(trans, root, inode);
+               drop_nlink(&inode->vfs_inode);
+               ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
        }
        return ret;
 }
@@ -4139,10 +4046,12 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0);
+       btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+                       0);
 
-       ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
-                                dentry->d_name.name, dentry->d_name.len);
+       ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+                       BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+                       dentry->d_name.len);
        if (ret)
                goto out;
 
@@ -4170,7 +4079,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
        struct btrfs_key key;
        u64 index;
        int ret;
-       u64 dir_ino = btrfs_ino(dir);
+       u64 dir_ino = btrfs_ino(BTRFS_I(dir));
 
        path = btrfs_alloc_path();
        if (!path)
@@ -4222,7 +4131,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(path);
 
-       ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index);
+       ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                goto out;
@@ -4249,14 +4158,14 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 
        if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
-       if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+       if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
                return -EPERM;
 
        trans = __unlink_start_trans(dir);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+       if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
                err = btrfs_unlink_subvol(trans, root, dir,
                                          BTRFS_I(inode)->location.objectid,
                                          dentry->d_name.name,
@@ -4271,8 +4180,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
 
        /* now the directory is empty */
-       err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
-                                dentry->d_name.name, dentry->d_name.len);
+       err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+                       BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+                       dentry->d_name.len);
        if (!err) {
                btrfs_i_size_write(inode, 0);
                /*
@@ -4398,7 +4308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        int extent_type = -1;
        int ret;
        int err = 0;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
        u64 bytes_deleted = 0;
        bool be_nice = 0;
        bool should_throttle = 0;
@@ -4437,7 +4347,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
         * items.
         */
        if (min_type == 0 && root == BTRFS_I(inode)->root)
-               btrfs_kill_delayed_inode_items(inode);
+               btrfs_kill_delayed_inode_items(BTRFS_I(inode));
 
        key.objectid = ino;
        key.offset = (u64)-1;
@@ -4702,6 +4612,13 @@ error:
 
        btrfs_free_path(path);
 
+       if (err == 0) {
+               /* only inline file may have last_size != new_size */
+               if (new_size >= fs_info->sectorsize ||
+                   new_size > fs_info->max_inline)
+                       ASSERT(last_size == new_size);
+       }
+
        if (be_nice && bytes_deleted > SZ_32M) {
                unsigned long updates = trans->delayed_ref_updates;
                if (updates) {
@@ -4870,8 +4787,8 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
                return ret;
        }
 
-       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
-                                      0, 0, len, 0, len, 0, 0, 0);
+       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+                       offset, 0, 0, len, 0, len, 0, 0, 0);
        if (ret)
                btrfs_abort_transaction(trans, ret);
        else
@@ -5087,6 +5004,13 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                if (ret && inode->i_nlink) {
                        int err;
 
+                       /* To get a stable disk_i_size */
+                       err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+                       if (err) {
+                               btrfs_orphan_del(NULL, inode);
+                               return err;
+                       }
+
                        /*
                         * failed to truncate, disk_i_size is only adjusted down
                         * as we remove extents, so it should represent the true
@@ -5282,7 +5206,7 @@ void btrfs_evict_inode(struct inode *inode)
                goto no_delete;
        }
 
-       ret = btrfs_commit_inode_delayed_inode(inode);
+       ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
        if (ret) {
                btrfs_orphan_del(NULL, inode);
                goto no_delete;
@@ -5402,12 +5326,12 @@ void btrfs_evict_inode(struct inode *inode)
        trans->block_rsv = &fs_info->trans_block_rsv;
        if (!(root == fs_info->tree_root ||
              root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
-               btrfs_return_ino(root, btrfs_ino(inode));
+               btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
 
        btrfs_end_transaction(trans);
        btrfs_btree_balance_dirty(fs_info);
 no_delete:
-       btrfs_remove_delayed_node(inode);
+       btrfs_remove_delayed_node(BTRFS_I(inode));
        clear_inode(inode);
 }
 
@@ -5429,8 +5353,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
        if (!path)
                return -ENOMEM;
 
-       di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
-                                   namelen, 0);
+       di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
+                       name, namelen, 0);
        if (IS_ERR(di))
                ret = PTR_ERR(di);
 
@@ -5485,7 +5409,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
 
        leaf = path->nodes[0];
        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
-       if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
+       if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
            btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
                goto out;
 
@@ -5520,7 +5444,7 @@ static void inode_tree_add(struct inode *inode)
        struct rb_node **p;
        struct rb_node *parent;
        struct rb_node *new = &BTRFS_I(inode)->rb_node;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
 
        if (inode_unhashed(inode))
                return;
@@ -5531,9 +5455,9 @@ static void inode_tree_add(struct inode *inode)
                parent = *p;
                entry = rb_entry(parent, struct btrfs_inode, rb_node);
 
-               if (ino < btrfs_ino(&entry->vfs_inode))
+               if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
                        p = &parent->rb_left;
-               else if (ino > btrfs_ino(&entry->vfs_inode))
+               else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
                        p = &parent->rb_right;
                else {
                        WARN_ON(!(entry->vfs_inode.i_state &
@@ -5593,9 +5517,9 @@ again:
                prev = node;
                entry = rb_entry(node, struct btrfs_inode, rb_node);
 
-               if (objectid < btrfs_ino(&entry->vfs_inode))
+               if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
                        node = node->rb_left;
-               else if (objectid > btrfs_ino(&entry->vfs_inode))
+               else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
                        node = node->rb_right;
                else
                        break;
@@ -5603,7 +5527,7 @@ again:
        if (!node) {
                while (prev) {
                        entry = rb_entry(prev, struct btrfs_inode, rb_node);
-                       if (objectid <= btrfs_ino(&entry->vfs_inode)) {
+                       if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
                                node = prev;
                                break;
                        }
@@ -5612,7 +5536,7 @@ again:
        }
        while (node) {
                entry = rb_entry(node, struct btrfs_inode, rb_node);
-               objectid = btrfs_ino(&entry->vfs_inode) + 1;
+               objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
                inode = igrab(&entry->vfs_inode);
                if (inode) {
                        spin_unlock(&root->inode_lock);
@@ -5796,7 +5720,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
                if (btrfs_root_refs(&root->root_item) == 0)
                        return 1;
 
-               if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+               if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
                        return 1;
        }
        return 0;
@@ -5865,7 +5789,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 
        key.type = BTRFS_DIR_INDEX_KEY;
        key.offset = ctx->pos;
-       key.objectid = btrfs_ino(inode);
+       key.objectid = btrfs_ino(BTRFS_I(inode));
 
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
@@ -6062,7 +5986,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
        struct extent_buffer *leaf;
        int ret;
 
-       key.objectid = btrfs_ino(inode);
+       key.objectid = btrfs_ino(BTRFS_I(inode));
        key.type = BTRFS_DIR_INDEX_KEY;
        key.offset = (u64)-1;
 
@@ -6094,7 +6018,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
        leaf = path->nodes[0];
        btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
-       if (found_key.objectid != btrfs_ino(inode) ||
+       if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
            found_key.type != BTRFS_DIR_INDEX_KEY) {
                BTRFS_I(inode)->index_cnt = 2;
                goto out;
@@ -6115,7 +6039,7 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
        int ret = 0;
 
        if (BTRFS_I(dir)->index_cnt == (u64)-1) {
-               ret = btrfs_inode_delayed_dir_index_count(dir);
+               ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir));
                if (ret) {
                        ret = btrfs_set_inode_index_count(dir);
                        if (ret)
@@ -6294,7 +6218,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (ret)
                btrfs_err(fs_info,
                          "error inheriting props for ino %llu (root %llu): %d",
-                         btrfs_ino(inode), root->root_key.objectid, ret);
+                       btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
 
        return inode;
 
@@ -6327,8 +6251,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
        int ret = 0;
        struct btrfs_key key;
        struct btrfs_root *root = BTRFS_I(parent_inode)->root;
-       u64 ino = btrfs_ino(inode);
-       u64 parent_ino = btrfs_ino(parent_inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
+       u64 parent_ino = btrfs_ino(BTRFS_I(parent_inode));
 
        if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
                memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
@@ -6427,8 +6351,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-                               dentry->d_name.len, btrfs_ino(dir), objectid,
-                               mode, &index);
+                       dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+                       mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -6499,8 +6423,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-                               dentry->d_name.len, btrfs_ino(dir), objectid,
-                               mode, &index);
+                       dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+                       mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -6609,7 +6533,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                                goto fail;
                }
                d_instantiate(dentry, inode);
-               btrfs_log_new_name(trans, inode, NULL, parent);
+               btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
        }
 
        btrfs_balance_delayed_items(fs_info);
@@ -6649,8 +6573,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
                goto out_fail;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-                               dentry->d_name.len, btrfs_ino(dir), objectid,
-                               S_IFDIR | mode, &index);
+                       dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+                       S_IFDIR | mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_fail;
@@ -6810,7 +6734,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
        int err = 0;
        u64 extent_start = 0;
        u64 extent_end = 0;
-       u64 objectid = btrfs_ino(inode);
+       u64 objectid = btrfs_ino(BTRFS_I(inode));
        u32 found_type;
        struct btrfs_path *path = NULL;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -7068,7 +6992,7 @@ insert:
        write_unlock(&em_tree->lock);
 out:
 
-       trace_btrfs_get_extent(root, inode, em);
+       trace_btrfs_get_extent(root, BTRFS_I(inode), em);
 
        btrfs_free_path(path);
        if (trans) {
@@ -7225,9 +7149,11 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
        int ret;
 
        if (type != BTRFS_ORDERED_NOCOW) {
-               em = create_pinned_em(inode, start, len, orig_start,
-                                     block_start, block_len, orig_block_len,
-                                     ram_bytes, type);
+               em = create_io_em(inode, start, len, orig_start,
+                                 block_start, block_len, orig_block_len,
+                                 ram_bytes,
+                                 BTRFS_COMPRESS_NONE, /* compress_type */
+                                 type);
                if (IS_ERR(em))
                        goto out;
        }
@@ -7264,7 +7190,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 
        em = btrfs_create_dio_extent(inode, start, ins.offset, start,
                                     ins.objectid, ins.offset, ins.offset,
-                                    ins.offset, 0);
+                                    ins.offset, BTRFS_ORDERED_REGULAR);
        btrfs_dec_block_group_reservations(fs_info, ins.objectid);
        if (IS_ERR(em))
                btrfs_free_reserved_extent(fs_info, ins.objectid,
@@ -7282,7 +7208,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *ram_bytes)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_trans_handle *trans;
        struct btrfs_path *path;
        int ret;
        struct extent_buffer *leaf;
@@ -7302,8 +7227,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
        if (!path)
                return -ENOMEM;
 
-       ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
-                                      offset, 0);
+       ret = btrfs_lookup_file_extent(NULL, root, path,
+                       btrfs_ino(BTRFS_I(inode)), offset, 0);
        if (ret < 0)
                goto out;
 
@@ -7319,7 +7244,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
        ret = 0;
        leaf = path->nodes[0];
        btrfs_item_key_to_cpu(leaf, &key, slot);
-       if (key.objectid != btrfs_ino(inode) ||
+       if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
            key.type != BTRFS_EXTENT_DATA_KEY) {
                /* not our file or wrong item type, must cow */
                goto out;
@@ -7385,15 +7310,9 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
         * look for other files referencing this extent, if we
         * find any we must cow
         */
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               ret = 0;
-               goto out;
-       }
 
-       ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
+       ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
                                    key.offset - backref_offset, disk_bytenr);
-       btrfs_end_transaction(trans);
        if (ret) {
                ret = 0;
                goto out;
@@ -7570,17 +7489,23 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
        return ret;
 }
 
-static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
-                                          u64 len, u64 orig_start,
-                                          u64 block_start, u64 block_len,
-                                          u64 orig_block_len, u64 ram_bytes,
-                                          int type)
+/* The callers of this must take lock_extent() */
+static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
+                                      u64 orig_start, u64 block_start,
+                                      u64 block_len, u64 orig_block_len,
+                                      u64 ram_bytes, int compress_type,
+                                      int type)
 {
        struct extent_map_tree *em_tree;
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
 
+       ASSERT(type == BTRFS_ORDERED_PREALLOC ||
+              type == BTRFS_ORDERED_COMPRESSED ||
+              type == BTRFS_ORDERED_NOCOW ||
+              type == BTRFS_ORDERED_REGULAR);
+
        em_tree = &BTRFS_I(inode)->extent_tree;
        em = alloc_extent_map();
        if (!em)
@@ -7588,8 +7513,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 
        em->start = start;
        em->orig_start = orig_start;
-       em->mod_start = start;
-       em->mod_len = len;
        em->len = len;
        em->block_len = block_len;
        em->block_start = block_start;
@@ -7598,8 +7521,12 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
        em->ram_bytes = ram_bytes;
        em->generation = -1;
        set_bit(EXTENT_FLAG_PINNED, &em->flags);
-       if (type == BTRFS_ORDERED_PREALLOC)
+       if (type == BTRFS_ORDERED_PREALLOC) {
                set_bit(EXTENT_FLAG_FILLING, &em->flags);
+       } else if (type == BTRFS_ORDERED_COMPRESSED) {
+               set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+               em->compress_type = compress_type;
+       }
 
        do {
                btrfs_drop_extent_cache(inode, em->start,
@@ -7607,6 +7534,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
                write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em, 1);
                write_unlock(&em_tree->lock);
+               /*
+                * The caller has taken lock_extent(), who could race with us
+                * to add em?
+                */
        } while (ret == -EEXIST);
 
        if (ret) {
@@ -7614,6 +7545,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
                return ERR_PTR(ret);
        }
 
+       /* em got 2 refs now, callers needs to do free_extent_map once. */
        return em;
 }
 
@@ -7621,10 +7553,8 @@ static void adjust_dio_outstanding_extents(struct inode *inode,
                                           struct btrfs_dio_data *dio_data,
                                           const u64 len)
 {
-       unsigned num_extents;
+       unsigned num_extents = count_max_extents(len);
 
-       num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
-                                          BTRFS_MAX_EXTENT_SIZE);
        /*
         * If we have an outstanding_extents count still set then we're
         * within our reservation, otherwise we need to adjust our inode
@@ -7804,7 +7734,7 @@ unlock:
                 * Need to update the i_size under the extent lock so buffered
                 * readers will get the updated i_size when we unlock.
                 */
-               if (start + len > i_size_read(inode))
+               if (!dio_data->overwrite && start + len > i_size_read(inode))
                        i_size_write(inode, start + len);
 
                adjust_dio_outstanding_extents(inode, dio_data, len);
@@ -8254,7 +8184,8 @@ static void btrfs_end_dio_bio(struct bio *bio)
        if (err)
                btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
                           "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
-                          btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf,
+                          btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
+                          bio->bi_opf,
                           (unsigned long long)bio->bi_iter.bi_sector,
                           bio->bi_iter.bi_size, err);
 
@@ -8679,15 +8610,14 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                 * not unlock the i_mutex at this case.
                 */
                if (offset + count <= inode->i_size) {
+                       dio_data.overwrite = 1;
                        inode_unlock(inode);
                        relock = true;
                }
                ret = btrfs_delalloc_reserve_space(inode, offset, count);
                if (ret)
                        goto out;
-               dio_data.outstanding_extents = div64_u64(count +
-                                               BTRFS_MAX_EXTENT_SIZE - 1,
-                                               BTRFS_MAX_EXTENT_SIZE);
+               dio_data.outstanding_extents = count_max_extents(count);
 
                /*
                 * We need to know how many extents we reserved so that we can
@@ -8831,7 +8761,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 {
        if (PageWriteback(page) || PageDirty(page))
                return 0;
-       return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
+       return __btrfs_releasepage(page, gfp_flags);
 }
 
 static void btrfs_invalidatepage(struct page *page, unsigned int offset,
@@ -8964,10 +8894,10 @@ again:
  * beyond EOF, then the page is guaranteed safe against truncation until we
  * unlock the page.
  */
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int btrfs_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_ordered_extent *ordered;
@@ -9000,7 +8930,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        ret = btrfs_delalloc_reserve_space(inode, page_start,
                                           reserved_space);
        if (!ret) {
-               ret = file_update_time(vma->vm_file);
+               ret = file_update_time(vmf->vma->vm_file);
                reserved = 1;
        }
        if (ret) {
@@ -9032,7 +8962,7 @@ again:
         * we can't set the delalloc bits if there are pending ordered
         * extents.  Drop our locks and wait for them to finish
         */
-       ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
+       ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
        if (ordered) {
                unlock_extent_cached(io_tree, page_start, page_end,
                                     &cached_state, GFP_NOFS);
@@ -9056,11 +8986,11 @@ again:
        }
 
        /*
-        * XXX - page_mkwrite gets called every time the page is dirtied, even
-        * if it was already dirty, so for space accounting reasons we need to
-        * clear any delalloc bits for the range we are fixing to save.  There
-        * is probably a better way to do this, but for now keep consistent with
-        * prepare_pages in the normal write path.
+        * page_mkwrite gets called when the page is firstly dirtied after it's
+        * faulted in, but write(2) could also dirty a page and set delalloc
+        * bits, thus in this case for space account reason, we still need to
+        * clear any delalloc bits within this page range since we have to
+        * reserve data&meta space before lock_page() (see above comments).
         */
        clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
                          EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -9384,7 +9314,7 @@ void btrfs_destroy_inode(struct inode *inode)
        if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                     &BTRFS_I(inode)->runtime_flags)) {
                btrfs_info(fs_info, "inode %llu still on the orphan list",
-                          btrfs_ino(inode));
+                          btrfs_ino(BTRFS_I(inode)));
                atomic_dec(&root->orphan_inodes);
        }
 
@@ -9513,8 +9443,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
        struct inode *old_inode = old_dentry->d_inode;
        struct timespec ctime = current_time(old_inode);
        struct dentry *parent;
-       u64 old_ino = btrfs_ino(old_inode);
-       u64 new_ino = btrfs_ino(new_inode);
+       u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
+       u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
        u64 old_idx = 0;
        u64 new_idx = 0;
        u64 root_objectid;
@@ -9571,7 +9501,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                             new_dentry->d_name.name,
                                             new_dentry->d_name.len,
                                             old_ino,
-                                            btrfs_ino(new_dir), old_idx);
+                                            btrfs_ino(BTRFS_I(new_dir)),
+                                            old_idx);
                if (ret)
                        goto out_fail;
        }
@@ -9587,7 +9518,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                             old_dentry->d_name.name,
                                             old_dentry->d_name.len,
                                             new_ino,
-                                            btrfs_ino(old_dir), new_idx);
+                                            btrfs_ino(BTRFS_I(old_dir)),
+                                            new_idx);
                if (ret)
                        goto out_fail;
        }
@@ -9603,8 +9535,10 @@ static int btrfs_rename_exchange(struct inode *old_dir,
        new_inode->i_ctime = ctime;
 
        if (old_dentry->d_parent != new_dentry->d_parent) {
-               btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
-               btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
+               btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
+                               BTRFS_I(old_inode), 1);
+               btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
+                               BTRFS_I(new_inode), 1);
        }
 
        /* src is a subvolume */
@@ -9615,8 +9549,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                          old_dentry->d_name.name,
                                          old_dentry->d_name.len);
        } else { /* src is an inode */
-               ret = __btrfs_unlink_inode(trans, root, old_dir,
-                                          old_dentry->d_inode,
+               ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
+                                          BTRFS_I(old_dentry->d_inode),
                                           old_dentry->d_name.name,
                                           old_dentry->d_name.len);
                if (!ret)
@@ -9635,8 +9569,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                          new_dentry->d_name.name,
                                          new_dentry->d_name.len);
        } else { /* dest is an inode */
-               ret = __btrfs_unlink_inode(trans, dest, new_dir,
-                                          new_dentry->d_inode,
+               ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
+                                          BTRFS_I(new_dentry->d_inode),
                                           new_dentry->d_name.name,
                                           new_dentry->d_name.len);
                if (!ret)
@@ -9670,13 +9604,15 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 
        if (root_log_pinned) {
                parent = new_dentry->d_parent;
-               btrfs_log_new_name(trans, old_inode, old_dir, parent);
+               btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+                               parent);
                btrfs_end_log_trans(root);
                root_log_pinned = false;
        }
        if (dest_log_pinned) {
                parent = old_dentry->d_parent;
-               btrfs_log_new_name(trans, new_inode, new_dir, parent);
+               btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
+                               parent);
                btrfs_end_log_trans(dest);
                dest_log_pinned = false;
        }
@@ -9693,11 +9629,11 @@ out_fail:
         * allow the tasks to sync it.
         */
        if (ret && (root_log_pinned || dest_log_pinned)) {
-               if (btrfs_inode_in_log(old_dir, fs_info->generation) ||
-                   btrfs_inode_in_log(new_dir, fs_info->generation) ||
-                   btrfs_inode_in_log(old_inode, fs_info->generation) ||
+               if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
+                   btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
+                   btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
                    (new_inode &&
-                    btrfs_inode_in_log(new_inode, fs_info->generation)))
+                    btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
                        btrfs_set_log_full_commit(fs_info, trans);
 
                if (root_log_pinned) {
@@ -9736,7 +9672,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
        inode = btrfs_new_inode(trans, root, dir,
                                dentry->d_name.name,
                                dentry->d_name.len,
-                               btrfs_ino(dir),
+                               btrfs_ino(BTRFS_I(dir)),
                                objectid,
                                S_IFCHR | WHITEOUT_MODE,
                                &index);
@@ -9784,10 +9720,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        u64 index = 0;
        u64 root_objectid;
        int ret;
-       u64 old_ino = btrfs_ino(old_inode);
+       u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
        bool log_pinned = false;
 
-       if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+       if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
                return -EPERM;
 
        /* we only allow rename subvolume link between subvolumes */
@@ -9795,7 +9731,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                return -EXDEV;
 
        if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
-           (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
+           (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
                return -ENOTEMPTY;
 
        if (S_ISDIR(old_inode->i_mode) && new_inode &&
@@ -9870,7 +9806,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                             new_dentry->d_name.name,
                                             new_dentry->d_name.len,
                                             old_ino,
-                                            btrfs_ino(new_dir), index);
+                                            btrfs_ino(BTRFS_I(new_dir)), index);
                if (ret)
                        goto out_fail;
        }
@@ -9883,7 +9819,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        old_inode->i_ctime = current_time(old_dir);
 
        if (old_dentry->d_parent != new_dentry->d_parent)
-               btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
+               btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
+                               BTRFS_I(old_inode), 1);
 
        if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
@@ -9891,8 +9828,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                        old_dentry->d_name.name,
                                        old_dentry->d_name.len);
        } else {
-               ret = __btrfs_unlink_inode(trans, root, old_dir,
-                                       d_inode(old_dentry),
+               ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
+                                       BTRFS_I(d_inode(old_dentry)),
                                        old_dentry->d_name.name,
                                        old_dentry->d_name.len);
                if (!ret)
@@ -9906,7 +9843,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (new_inode) {
                inode_inc_iversion(new_inode);
                new_inode->i_ctime = current_time(new_inode);
-               if (unlikely(btrfs_ino(new_inode) ==
+               if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
                             BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
                        root_objectid = BTRFS_I(new_inode)->location.objectid;
                        ret = btrfs_unlink_subvol(trans, dest, new_dir,
@@ -9915,8 +9852,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                                new_dentry->d_name.len);
                        BUG_ON(new_inode->i_nlink == 0);
                } else {
-                       ret = btrfs_unlink_inode(trans, dest, new_dir,
-                                                d_inode(new_dentry),
+                       ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
+                                                BTRFS_I(d_inode(new_dentry)),
                                                 new_dentry->d_name.name,
                                                 new_dentry->d_name.len);
                }
@@ -9942,7 +9879,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (log_pinned) {
                struct dentry *parent = new_dentry->d_parent;
 
-               btrfs_log_new_name(trans, old_inode, old_dir, parent);
+               btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+                               parent);
                btrfs_end_log_trans(root);
                log_pinned = false;
        }
@@ -9969,11 +9907,11 @@ out_fail:
         * allow the tasks to sync it.
         */
        if (ret && log_pinned) {
-               if (btrfs_inode_in_log(old_dir, fs_info->generation) ||
-                   btrfs_inode_in_log(new_dir, fs_info->generation) ||
-                   btrfs_inode_in_log(old_inode, fs_info->generation) ||
+               if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
+                   btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
+                   btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
                    (new_inode &&
-                    btrfs_inode_in_log(new_inode, fs_info->generation)))
+                    btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
                        btrfs_set_log_full_commit(fs_info, trans);
 
                btrfs_end_log_trans(root);
@@ -10237,8 +10175,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-                               dentry->d_name.len, btrfs_ino(dir), objectid,
-                               S_IFLNK|S_IRWXUGO, &index);
+                               dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
+                               objectid, S_IFLNK|S_IRWXUGO, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -10264,7 +10202,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                err = -ENOMEM;
                goto out_unlock_inode;
        }
-       key.objectid = btrfs_ino(inode);
+       key.objectid = btrfs_ino(BTRFS_I(inode));
        key.offset = 0;
        key.type = BTRFS_EXTENT_DATA_KEY;
        datasize = btrfs_file_extent_calc_inline_size(name_len);
@@ -10517,7 +10455,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
                goto out;
 
        inode = btrfs_new_inode(trans, root, dir, NULL, 0,
-                               btrfs_ino(dir), objectid, mode, &index);
+                       btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
                inode = NULL;
index 21e51b0ba188a37be6b71cab9218a8b9457b86f8..d8539979b44ff22609903cad4c8fd3dba882f65c 100644 (file)
@@ -395,7 +395,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
                q = bdev_get_queue(device->bdev);
                if (blk_queue_discard(q)) {
                        num_devices++;
-                       minlen = min((u64)q->limits.discard_granularity,
+                       minlen = min_t(u64, q->limits.discard_granularity,
                                     minlen);
                }
        }
@@ -487,8 +487,7 @@ static noinline int create_subvol(struct inode *dir,
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               btrfs_subvolume_release_metadata(fs_info, &block_rsv,
-                                                qgroup_reserved);
+               btrfs_subvolume_release_metadata(fs_info, &block_rsv);
                goto fail_free;
        }
        trans->block_rsv = &block_rsv;
@@ -601,7 +600,7 @@ static noinline int create_subvol(struct inode *dir,
 
        ret = btrfs_add_root_ref(trans, fs_info,
                                 objectid, root->root_key.objectid,
-                                btrfs_ino(dir), index, name, namelen);
+                                btrfs_ino(BTRFS_I(dir)), index, name, namelen);
        BUG_ON(ret);
 
        ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid,
@@ -613,7 +612,7 @@ fail:
        kfree(root_item);
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
-       btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved);
+       btrfs_subvolume_release_metadata(fs_info, &block_rsv);
 
        if (async_transid) {
                *async_transid = trans->transid;
@@ -657,7 +656,7 @@ static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root)
 }
 
 static int create_snapshot(struct btrfs_root *root, struct inode *dir,
-                          struct dentry *dentry, char *name, int namelen,
+                          struct dentry *dentry,
                           u64 *async_transid, bool readonly,
                           struct btrfs_qgroup_inherit *inherit)
 {
@@ -670,12 +669,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
                return -EINVAL;
 
-       pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
+       pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
        if (!pending_snapshot)
                return -ENOMEM;
 
        pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
-                       GFP_NOFS);
+                       GFP_KERNEL);
        pending_snapshot->path = btrfs_alloc_path();
        if (!pending_snapshot->root_item || !pending_snapshot->path) {
                ret = -ENOMEM;
@@ -753,9 +752,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        d_instantiate(dentry, inode);
        ret = 0;
 fail:
-       btrfs_subvolume_release_metadata(fs_info,
-                                        &pending_snapshot->block_rsv,
-                                        pending_snapshot->qgroup_reserved);
+       btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
 dec_and_free:
        if (atomic_dec_and_test(&root->will_be_snapshoted))
                wake_up_atomic_t(&root->will_be_snapshoted);
@@ -874,7 +871,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
                goto out_up_read;
 
        if (snap_src) {
-               error = create_snapshot(snap_src, dir, dentry, name, namelen,
+               error = create_snapshot(snap_src, dir, dentry,
                                        async_transid, readonly, inherit);
        } else {
                error = create_subvol(dir, dentry, name, namelen,
@@ -941,7 +938,7 @@ static int find_new_extents(struct btrfs_root *root,
        struct btrfs_file_extent_item *extent;
        int type;
        int ret;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
 
        path = btrfs_alloc_path();
        if (!path)
@@ -1780,7 +1777,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
        int ret = 0;
        u64 flags = 0;
 
-       if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
+       if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID)
                return -EINVAL;
 
        down_read(&fs_info->subvol_sem);
@@ -1812,7 +1809,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
        if (ret)
                goto out;
 
-       if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+       if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
                ret = -EINVAL;
                goto out_drop_write;
        }
@@ -2446,7 +2443,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        if (err)
                goto out_dput;
 
-       if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+       if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
                err = -EINVAL;
                goto out_dput;
        }
@@ -2497,7 +2494,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        trans->block_rsv = &block_rsv;
        trans->bytes_reserved = block_rsv.size;
 
-       btrfs_record_snapshot_destroy(trans, dir);
+       btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
 
        ret = btrfs_unlink_subvol(trans, root, dir,
                                dest->root_key.objectid,
@@ -2555,7 +2552,7 @@ out_end_trans:
                err = ret;
        inode->i_flags |= S_DEAD;
 out_release:
-       btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved);
+       btrfs_subvolume_release_metadata(fs_info, &block_rsv);
 out_up_write:
        up_write(&fs_info->subvol_sem);
        if (err) {
@@ -2613,9 +2610,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
                        goto out;
                }
                ret = btrfs_defrag_root(root);
-               if (ret)
-                       goto out;
-               ret = btrfs_defrag_root(root->fs_info->extent_root);
                break;
        case S_IFREG:
                if (!(file->f_mode & FMODE_WRITE)) {
@@ -3047,11 +3041,21 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
        cmp->src_pages = src_pgarr;
        cmp->dst_pages = dst_pgarr;
 
-       ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
+       /*
+        * If deduping ranges in the same inode, locking rules make it mandatory
+        * to always lock pages in ascending order to avoid deadlocks with
+        * concurrent tasks (such as starting writeback/delalloc).
+        */
+       if (src == dst && dst_loff < loff) {
+               swap(src_pgarr, dst_pgarr);
+               swap(loff, dst_loff);
+       }
+
+       ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff);
        if (ret)
                goto out;
 
-       ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
+       ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff);
 
 out:
        if (ret)
@@ -3059,8 +3063,7 @@ out:
        return 0;
 }
 
-static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
-                         u64 dst_loff, u64 len, struct cmp_pages *cmp)
+static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
 {
        int ret = 0;
        int i;
@@ -3128,26 +3131,27 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
        int ret;
        u64 len = olen;
        struct cmp_pages cmp;
-       int same_inode = 0;
+       bool same_inode = (src == dst);
        u64 same_lock_start = 0;
        u64 same_lock_len = 0;
 
-       if (src == dst)
-               same_inode = 1;
-
        if (len == 0)
                return 0;
 
-       if (same_inode) {
+       if (same_inode)
                inode_lock(src);
+       else
+               btrfs_double_inode_lock(src, dst);
 
-               ret = extent_same_check_offsets(src, loff, &len, olen);
-               if (ret)
-                       goto out_unlock;
-               ret = extent_same_check_offsets(src, dst_loff, &len, olen);
-               if (ret)
-                       goto out_unlock;
+       ret = extent_same_check_offsets(src, loff, &len, olen);
+       if (ret)
+               goto out_unlock;
+
+       ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
+       if (ret)
+               goto out_unlock;
 
+       if (same_inode) {
                /*
                 * Single inode case wants the same checks, except we
                 * don't want our length pushed out past i_size as
@@ -3175,16 +3179,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
 
                same_lock_start = min_t(u64, loff, dst_loff);
                same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
-       } else {
-               btrfs_double_inode_lock(src, dst);
-
-               ret = extent_same_check_offsets(src, loff, &len, olen);
-               if (ret)
-                       goto out_unlock;
-
-               ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
-               if (ret)
-                       goto out_unlock;
        }
 
        /* don't make the dst file partly checksummed */
@@ -3236,7 +3230,7 @@ again:
        }
 
        /* pass original length for comparison so we stay within i_size */
-       ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
+       ret = btrfs_cmp_data(olen, &cmp);
        if (ret == 0)
                ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
 
@@ -3399,8 +3393,7 @@ static void clone_update_extent_map(struct inode *inode,
  * data into the destination inode's inline extent if the later is greater then
  * the former.
  */
-static int clone_copy_inline_extent(struct inode *src,
-                                   struct inode *dst,
+static int clone_copy_inline_extent(struct inode *dst,
                                    struct btrfs_trans_handle *trans,
                                    struct btrfs_path *path,
                                    struct btrfs_key *new_key,
@@ -3420,7 +3413,7 @@ static int clone_copy_inline_extent(struct inode *src,
        if (new_key->offset > 0)
                return -EOPNOTSUPP;
 
-       key.objectid = btrfs_ino(dst);
+       key.objectid = btrfs_ino(BTRFS_I(dst));
        key.type = BTRFS_EXTENT_DATA_KEY;
        key.offset = 0;
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -3435,7 +3428,7 @@ static int clone_copy_inline_extent(struct inode *src,
                                goto copy_inline_extent;
                }
                btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-               if (key.objectid == btrfs_ino(dst) &&
+               if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
                    key.type == BTRFS_EXTENT_DATA_KEY) {
                        ASSERT(key.offset > 0);
                        return -EOPNOTSUPP;
@@ -3469,7 +3462,7 @@ static int clone_copy_inline_extent(struct inode *src,
                } else if (ret == 0) {
                        btrfs_item_key_to_cpu(path->nodes[0], &key,
                                              path->slots[0]);
-                       if (key.objectid == btrfs_ino(dst) &&
+                       if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
                            key.type == BTRFS_EXTENT_DATA_KEY)
                                return -EOPNOTSUPP;
                }
@@ -3563,7 +3556,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 
        path->reada = READA_FORWARD;
        /* clone data */
-       key.objectid = btrfs_ino(src);
+       key.objectid = btrfs_ino(BTRFS_I(src));
        key.type = BTRFS_EXTENT_DATA_KEY;
        key.offset = off;
 
@@ -3606,7 +3599,7 @@ process_slot:
 
                btrfs_item_key_to_cpu(leaf, &key, slot);
                if (key.type > BTRFS_EXTENT_DATA_KEY ||
-                   key.objectid != btrfs_ino(src))
+                   key.objectid != btrfs_ino(BTRFS_I(src)))
                        break;
 
                if (key.type == BTRFS_EXTENT_DATA_KEY) {
@@ -3659,7 +3652,7 @@ process_slot:
                        path->leave_spinning = 0;
 
                        memcpy(&new_key, &key, sizeof(new_key));
-                       new_key.objectid = btrfs_ino(inode);
+                       new_key.objectid = btrfs_ino(BTRFS_I(inode));
                        if (off <= key.offset)
                                new_key.offset = key.offset + destoff - off;
                        else
@@ -3749,7 +3742,7 @@ process_slot:
                                                        fs_info,
                                                        disko, diskl, 0,
                                                        root->root_key.objectid,
-                                                       btrfs_ino(inode),
+                                                       btrfs_ino(BTRFS_I(inode)),
                                                        new_key.offset - datao);
                                        if (ret) {
                                                btrfs_abort_transaction(trans,
@@ -3779,7 +3772,7 @@ process_slot:
                                size -= skip + trim;
                                datal -= skip + trim;
 
-                               ret = clone_copy_inline_extent(src, inode,
+                               ret = clone_copy_inline_extent(inode,
                                                               trans, path,
                                                               &new_key,
                                                               drop_start,
@@ -5129,7 +5122,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
 
        down_write(&fs_info->subvol_sem);
 
-       if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+       if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
                ret = -EINVAL;
                goto out;
        }
index 041c3326d1091d3553d8f9111f9f77c4cdea0c05..bc2aba8106293c9fe63bbcfd1b20bf66f18a2dd0 100644 (file)
@@ -432,7 +432,7 @@ out:
 }
 
 /* Needs to either be called under a log transaction or the log_mutex */
-void btrfs_get_logged_extents(struct inode *inode,
+void btrfs_get_logged_extents(struct btrfs_inode *inode,
                              struct list_head *logged_list,
                              const loff_t start,
                              const loff_t end)
@@ -442,7 +442,7 @@ void btrfs_get_logged_extents(struct inode *inode,
        struct rb_node *n;
        struct rb_node *prev;
 
-       tree = &BTRFS_I(inode)->ordered_tree;
+       tree = &inode->ordered_tree;
        spin_lock_irq(&tree->lock);
        n = __tree_search(&tree->tree, end, &prev);
        if (!n)
@@ -984,8 +984,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
        }
        disk_i_size = BTRFS_I(inode)->disk_i_size;
 
-       /* truncate file */
-       if (disk_i_size > i_size) {
+       /*
+        * truncate file.
+        * If ordered is not NULL, then this is called from endio and
+        * disk_i_size will be updated by either truncate itself or any
+        * in-flight IOs which are inside the disk_i_size.
+        *
+        * Because btrfs_setsize() may set i_size with disk_i_size if truncate
+        * fails somehow, we need to make sure we have a precise disk_i_size by
+        * updating it as usual.
+        *
+        */
+       if (!ordered && disk_i_size > i_size) {
                BTRFS_I(inode)->disk_i_size = orig_offset;
                ret = 0;
                goto out;
@@ -1032,25 +1042,22 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
                /* We treat this entry as if it doesn't exist */
                if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
                        continue;
-               if (test->file_offset + test->len <= disk_i_size)
+
+               if (entry_end(test) <= disk_i_size)
                        break;
                if (test->file_offset >= i_size)
                        break;
-               if (entry_end(test) > disk_i_size) {
-                       /*
-                        * we don't update disk_i_size now, so record this
-                        * undealt i_size. Or we will not know the real
-                        * i_size.
-                        */
-                       if (test->outstanding_isize < offset)
-                               test->outstanding_isize = offset;
-                       if (ordered &&
-                           ordered->outstanding_isize >
-                           test->outstanding_isize)
-                               test->outstanding_isize =
-                                               ordered->outstanding_isize;
-                       goto out;
-               }
+
+               /*
+                * We don't update disk_i_size now, so record this undealt
+                * i_size. Or we will not know the real i_size.
+                */
+               if (test->outstanding_isize < offset)
+                       test->outstanding_isize = offset;
+               if (ordered &&
+                   ordered->outstanding_isize > test->outstanding_isize)
+                       test->outstanding_isize = ordered->outstanding_isize;
+               goto out;
        }
        new_i_size = min_t(u64, offset, i_size);
 
index 5f2b0ca287058438159cffb1a9d6917fa2b966bd..a8cb8efe6fae50c7869d08459a0a400ef10528f2 100644 (file)
@@ -75,6 +75,8 @@ struct btrfs_ordered_sum {
                                 * in the logging code. */
 #define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
                                  * complete in the current transaction. */
+#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */
+
 struct btrfs_ordered_extent {
        /* logical offset in the file */
        u64 file_offset;
@@ -201,7 +203,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
                               const u64 range_start, const u64 range_len);
 int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
                              const u64 range_start, const u64 range_len);
-void btrfs_get_logged_extents(struct inode *inode,
+void btrfs_get_logged_extents(struct btrfs_inode *inode,
                              struct list_head *logged_list,
                              const loff_t start,
                              const loff_t end);
index f2621e330954f5471a8e6a5e07277cfa4699e0d3..d6cb155ef7a15fa1cc780bd1f5e6cc9624b891b6 100644 (file)
@@ -279,7 +279,7 @@ static void inode_prop_iterator(void *ctx,
        if (unlikely(ret))
                btrfs_warn(root->fs_info,
                           "error applying prop %s to ino %llu (root %llu): %d",
-                          handler->xattr_name, btrfs_ino(inode),
+                          handler->xattr_name, btrfs_ino(BTRFS_I(inode)),
                           root->root_key.objectid, ret);
        else
                set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
@@ -288,7 +288,7 @@ static void inode_prop_iterator(void *ctx,
 int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
        int ret;
 
        ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
index 662821f1252c063b3659266833b8fb08fcc04365..a5da750c1087fdc118e3ba696962260a9a761fc6 100644 (file)
@@ -319,7 +319,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
        if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
                return 0;
 
-       fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
+       fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
        if (!fs_info->qgroup_ulist) {
                ret = -ENOMEM;
                goto out;
@@ -876,7 +876,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
                goto out;
        }
 
-       fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
+       fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
        if (!fs_info->qgroup_ulist) {
                ret = -ENOMEM;
                goto out;
@@ -1019,7 +1019,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        list_del(&quota_root->dirty_list);
 
        btrfs_tree_lock(quota_root->node);
-       clean_tree_block(trans, fs_info, quota_root->node);
+       clean_tree_block(fs_info, quota_root->node);
        btrfs_tree_unlock(quota_root->node);
        btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
 
@@ -1038,6 +1038,15 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
                list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 }
 
+static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
+                                     struct btrfs_qgroup *qgroup,
+                                     u64 num_bytes)
+{
+       btrfs_warn(fs_info,
+               "qgroup %llu reserved space underflow, have: %llu, to free: %llu",
+               qgroup->qgroupid, qgroup->reserved, num_bytes);
+       qgroup->reserved = 0;
+}
 /*
  * The easy accounting, if we are adding/removing the only ref for an extent
  * then this qgroup and all of the parent qgroups get their reference and
@@ -1065,8 +1074,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
        WARN_ON(sign < 0 && qgroup->excl < num_bytes);
        qgroup->excl += sign * num_bytes;
        qgroup->excl_cmpr += sign * num_bytes;
-       if (sign > 0)
-               qgroup->reserved -= num_bytes;
+       if (sign > 0) {
+               if (WARN_ON(qgroup->reserved < num_bytes))
+                       report_reserved_underflow(fs_info, qgroup, num_bytes);
+               else
+                       qgroup->reserved -= num_bytes;
+       }
 
        qgroup_dirty(fs_info, qgroup);
 
@@ -1086,8 +1099,13 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
                qgroup->rfer_cmpr += sign * num_bytes;
                WARN_ON(sign < 0 && qgroup->excl < num_bytes);
                qgroup->excl += sign * num_bytes;
-               if (sign > 0)
-                       qgroup->reserved -= num_bytes;
+               if (sign > 0) {
+                       if (WARN_ON(qgroup->reserved < num_bytes))
+                               report_reserved_underflow(fs_info, qgroup,
+                                                         num_bytes);
+                       else
+                               qgroup->reserved -= num_bytes;
+               }
                qgroup->excl_cmpr += sign * num_bytes;
                qgroup_dirty(fs_info, qgroup);
 
@@ -1156,7 +1174,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
        if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
                return -EINVAL;
 
-       tmp = ulist_alloc(GFP_NOFS);
+       tmp = ulist_alloc(GFP_KERNEL);
        if (!tmp)
                return -ENOMEM;
 
@@ -1205,7 +1223,7 @@ out:
        return ret;
 }
 
-int __del_qgroup_relation(struct btrfs_trans_handle *trans,
+static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 {
        struct btrfs_root *quota_root;
@@ -1216,7 +1234,7 @@ int __del_qgroup_relation(struct btrfs_trans_handle *trans,
        int ret = 0;
        int err;
 
-       tmp = ulist_alloc(GFP_NOFS);
+       tmp = ulist_alloc(GFP_KERNEL);
        if (!tmp)
                return -ENOMEM;
 
@@ -1446,8 +1464,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
        while (node) {
                record = rb_entry(node, struct btrfs_qgroup_extent_record,
                                  node);
-               ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
-                                          &record->old_roots);
+               if (WARN_ON(!record->old_roots))
+                       ret = btrfs_find_all_roots(NULL, fs_info,
+                                       record->bytenr, 0, &record->old_roots);
                if (ret < 0)
                        break;
                if (qgroup_to_skip)
@@ -1486,6 +1505,28 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
+int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_qgroup_extent_record *qrecord)
+{
+       struct ulist *old_root;
+       u64 bytenr = qrecord->bytenr;
+       int ret;
+
+       ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Here we don't need to get the lock of
+        * trans->transaction->delayed_refs, since inserted qrecord won't
+        * be deleted, only qrecord->node may be modified (new qrecord insert)
+        *
+        * So modifying qrecord->old_roots is safe here
+        */
+       qrecord->old_roots = old_root;
+       return 0;
+}
+
 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
                struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
                gfp_t gfp_flag)
@@ -1511,9 +1552,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
        spin_lock(&delayed_refs->lock);
        ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
        spin_unlock(&delayed_refs->lock);
-       if (ret > 0)
+       if (ret > 0) {
                kfree(record);
-       return 0;
+               return 0;
+       }
+       return btrfs_qgroup_trace_extent_post(fs_info, record);
 }
 
 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -1571,8 +1614,7 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
  * If we increment the root nodes slot counter past the number of
  * elements, 1 is returned to signal completion of the search.
  */
-static int adjust_slots_upwards(struct btrfs_root *root,
-                               struct btrfs_path *path, int root_level)
+static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
 {
        int level = 0;
        int nr, slot;
@@ -1713,7 +1755,7 @@ walk_down:
                                goto out;
 
                        /* Nonzero return here means we completed our search */
-                       ret = adjust_slots_upwards(root, path, root_level);
+                       ret = adjust_slots_upwards(path, root_level);
                        if (ret)
                                break;
 
@@ -1927,13 +1969,14 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
        u64 nr_old_roots = 0;
        int ret = 0;
 
+       if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
+               return 0;
+
        if (new_roots)
                nr_new_roots = new_roots->nnodes;
        if (old_roots)
                nr_old_roots = old_roots->nnodes;
 
-       if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
-               goto out_free;
        BUG_ON(!fs_info->quota_root);
 
        trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
@@ -2170,9 +2213,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                        goto out;
                }
 
-               rcu_read_lock();
                level_size = fs_info->nodesize;
-               rcu_read_unlock();
        }
 
        /*
@@ -2306,7 +2347,20 @@ out:
        return ret;
 }
 
-static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
+static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
+{
+       if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
+           qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
+               return false;
+
+       if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
+           qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
+               return false;
+
+       return true;
+}
+
+static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 {
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *qgroup;
@@ -2347,16 +2401,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
 
                qg = unode_aux_to_qgroup(unode);
 
-               if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
-                   qg->reserved + (s64)qg->rfer + num_bytes >
-                   qg->max_rfer) {
-                       ret = -EDQUOT;
-                       goto out;
-               }
-
-               if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
-                   qg->reserved + (s64)qg->excl + num_bytes >
-                   qg->max_excl) {
+               if (enforce && !qgroup_check_limits(qg, num_bytes)) {
                        ret = -EDQUOT;
                        goto out;
                }
@@ -2424,7 +2469,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 
                qg = unode_aux_to_qgroup(unode);
 
-               qg->reserved -= num_bytes;
+               if (WARN_ON(qg->reserved < num_bytes))
+                       report_reserved_underflow(fs_info, qg, num_bytes);
+               else
+                       qg->reserved -= num_bytes;
 
                list_for_each_entry(glist, &qg->groups, next_group) {
                        ret = ulist_add(fs_info->qgroup_ulist,
@@ -2439,11 +2487,6 @@ out:
        spin_unlock(&fs_info->qgroup_lock);
 }
 
-static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes)
-{
-       return btrfs_qgroup_free_refroot(root->fs_info, root->objectid,
-                                        num_bytes);
-}
 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
 {
        if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
@@ -2803,7 +2846,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
                return 0;
 
        changeset.bytes_changed = 0;
-       changeset.range_changed = ulist_alloc(GFP_NOFS);
+       ulist_init(&changeset.range_changed);
        ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
                        start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
        trace_btrfs_qgroup_reserve_data(inode, start, len,
@@ -2811,21 +2854,21 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
                                        QGROUP_RESERVE);
        if (ret < 0)
                goto cleanup;
-       ret = qgroup_reserve(root, changeset.bytes_changed);
+       ret = qgroup_reserve(root, changeset.bytes_changed, true);
        if (ret < 0)
                goto cleanup;
 
-       ulist_free(changeset.range_changed);
+       ulist_release(&changeset.range_changed);
        return ret;
 
 cleanup:
        /* cleanup already reserved ranges */
        ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(changeset.range_changed, &uiter)))
+       while ((unode = ulist_next(&changeset.range_changed, &uiter)))
                clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
                                 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
                                 GFP_NOFS);
-       ulist_free(changeset.range_changed);
+       ulist_release(&changeset.range_changed);
        return ret;
 }
 
@@ -2837,23 +2880,22 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
        int ret;
 
        changeset.bytes_changed = 0;
-       changeset.range_changed = ulist_alloc(GFP_NOFS);
-       if (!changeset.range_changed)
-               return -ENOMEM;
-
+       ulist_init(&changeset.range_changed);
        ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 
                        start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
        if (ret < 0)
                goto out;
 
        if (free) {
-               qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+               btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+                               BTRFS_I(inode)->root->objectid,
+                               changeset.bytes_changed);
                trace_op = QGROUP_FREE;
        }
        trace_btrfs_qgroup_release_data(inode, start, len,
                                        changeset.bytes_changed, trace_op);
 out:
-       ulist_free(changeset.range_changed);
+       ulist_release(&changeset.range_changed);
        return ret;
 }
 
@@ -2892,7 +2934,8 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
        return __btrfs_qgroup_release_data(inode, start, len, 0);
 }
 
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+                             bool enforce)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
@@ -2902,7 +2945,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
                return 0;
 
        BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
-       ret = qgroup_reserve(root, num_bytes);
+       ret = qgroup_reserve(root, num_bytes, enforce);
        if (ret < 0)
                return ret;
        atomic_add(num_bytes, &root->qgroup_meta_rsv);
@@ -2921,7 +2964,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
        reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
        if (reserved == 0)
                return;
-       qgroup_free(root, reserved);
+       btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
 }
 
 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
@@ -2935,7 +2978,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
        BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
        WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
        atomic_sub(num_bytes, &root->qgroup_meta_rsv);
-       qgroup_free(root, num_bytes);
+       btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
 }
 
 /*
@@ -2950,22 +2993,22 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
        int ret;
 
        changeset.bytes_changed = 0;
-       changeset.range_changed = ulist_alloc(GFP_NOFS);
-       if (WARN_ON(!changeset.range_changed))
-               return;
-
+       ulist_init(&changeset.range_changed);
        ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                        EXTENT_QGROUP_RESERVED, &changeset);
 
        WARN_ON(ret < 0);
        if (WARN_ON(changeset.bytes_changed)) {
                ULIST_ITER_INIT(&iter);
-               while ((unode = ulist_next(changeset.range_changed, &iter))) {
+               while ((unode = ulist_next(&changeset.range_changed, &iter))) {
                        btrfs_warn(BTRFS_I(inode)->root->fs_info,
                                "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
                                inode->i_ino, unode->val, unode->aux);
                }
-               qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+               btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+                               BTRFS_I(inode)->root->objectid,
+                               changeset.bytes_changed);
+
        }
-       ulist_free(changeset.range_changed);
+       ulist_release(&changeset.range_changed);
 }
index 416ae8e1d23c86bfecbbcbc7b9964629e5b21148..26932a8a19930bc48ff14ad3c154a7326ce3a111 100644 (file)
@@ -94,9 +94,10 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
                                         struct btrfs_fs_info *fs_info);
 /*
  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
- * So qgroup can account it at commit trans time.
+ * So qgroup can account it at transaction committing time.
  *
- * No lock version, caller must acquire delayed ref lock and allocate memory.
+ * No lock version, caller must acquire delayed ref lock and allocated memory,
+ * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
  *
  * Return 0 for success insert
  * Return >0 for existing record, caller can free @record safely.
@@ -107,12 +108,38 @@ int btrfs_qgroup_trace_extent_nolock(
                struct btrfs_delayed_ref_root *delayed_refs,
                struct btrfs_qgroup_extent_record *record);
 
+/*
+ * Post handler after qgroup_trace_extent_nolock().
+ *
+ * NOTE: Current qgroup does the expensive backref walk at transaction
+ * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
+ * new transaction.
+ * This is designed to allow btrfs_find_all_roots() to get correct new_roots
+ * result.
+ *
+ * However for old_roots there is no need to do backref walk at that time,
+ * since we search commit roots to walk backref and result will always be
+ * correct.
+ *
+ * Due to the nature of no lock version, we can't do backref there.
+ * So we must call btrfs_qgroup_trace_extent_post() after exiting
+ * spinlock context.
+ *
+ * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
+ * using current root, then we can move all expensive backref walk out of
+ * transaction committing, but not now as qgroup accounting will be wrong again.
+ */
+int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_qgroup_extent_record *qrecord);
+
 /*
  * Inform qgroup to trace one dirty extent, specified by @bytenr and
  * @num_bytes.
  * So qgroup can account it at commit trans time.
  *
- * Better encapsulated version.
+ * Better encapsulated version, with memory allocation and backref walk for
+ * commit roots.
+ * So this can sleep.
  *
  * Return 0 if the operation is done.
  * Return <0 for error, like memory allocation failure or invalid parameter
@@ -181,7 +208,8 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len);
 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
 int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
 
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes);
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+                             bool enforce);
 void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
 void btrfs_qgroup_check_reserved_leak(struct inode *inode);
index d2a9a1ee53611f081838790006ef6206e9bc2345..1571bf26dc077a0575b39977ead28e229d68550f 100644 (file)
@@ -677,11 +677,9 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
        struct btrfs_raid_bio *freeit = NULL;
        struct btrfs_raid_bio *cache_drop = NULL;
        int ret = 0;
-       int walk = 0;
 
        spin_lock_irqsave(&h->lock, flags);
        list_for_each_entry(cur, &h->hash_list, hash_list) {
-               walk++;
                if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
                        spin_lock(&cur->bio_list_lock);
 
index 379711048fb0407fa05c79f7bf12e92863a71da3..ddbde0f0836537aba470882528d808755d618cec 100644 (file)
@@ -1548,9 +1548,9 @@ again:
                prev = node;
                entry = rb_entry(node, struct btrfs_inode, rb_node);
 
-               if (objectid < btrfs_ino(&entry->vfs_inode))
+               if (objectid < btrfs_ino(entry))
                        node = node->rb_left;
-               else if (objectid > btrfs_ino(&entry->vfs_inode))
+               else if (objectid > btrfs_ino(entry))
                        node = node->rb_right;
                else
                        break;
@@ -1558,7 +1558,7 @@ again:
        if (!node) {
                while (prev) {
                        entry = rb_entry(prev, struct btrfs_inode, rb_node);
-                       if (objectid <= btrfs_ino(&entry->vfs_inode)) {
+                       if (objectid <= btrfs_ino(entry)) {
                                node = prev;
                                break;
                        }
@@ -1573,7 +1573,7 @@ again:
                        return inode;
                }
 
-               objectid = btrfs_ino(&entry->vfs_inode) + 1;
+               objectid = btrfs_ino(entry) + 1;
                if (cond_resched_lock(&root->inode_lock))
                        goto again;
 
@@ -1609,8 +1609,8 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
                return -ENOMEM;
 
        bytenr -= BTRFS_I(reloc_inode)->index_cnt;
-       ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
-                                      bytenr, 0);
+       ret = btrfs_lookup_file_extent(NULL, root, path,
+                       btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
        if (ret < 0)
                goto out;
        if (ret > 0) {
@@ -1698,11 +1698,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
                        if (first) {
                                inode = find_next_inode(root, key.objectid);
                                first = 0;
-                       } else if (inode && btrfs_ino(inode) < key.objectid) {
+                       } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
                                btrfs_add_delayed_iput(inode);
                                inode = find_next_inode(root, key.objectid);
                        }
-                       if (inode && btrfs_ino(inode) == key.objectid) {
+                       if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
                                end = key.offset +
                                      btrfs_file_extent_num_bytes(leaf, fi);
                                WARN_ON(!IS_ALIGNED(key.offset,
@@ -2088,7 +2088,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
                inode = find_next_inode(root, objectid);
                if (!inode)
                        break;
-               ino = btrfs_ino(inode);
+               ino = btrfs_ino(BTRFS_I(inode));
 
                if (ino > max_key->objectid) {
                        iput(inode);
@@ -3543,7 +3543,7 @@ truncate:
                goto out;
        }
 
-       ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode);
+       ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
 
        btrfs_end_transaction(trans);
        btrfs_btree_balance_dirty(fs_info);
@@ -4334,7 +4334,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
        rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
        BUG_ON(!rc->block_group);
 
-       ret = btrfs_inc_block_group_ro(extent_root, rc->block_group);
+       ret = btrfs_inc_block_group_ro(fs_info, rc->block_group);
        if (ret) {
                err = ret;
                goto out;
@@ -4347,8 +4347,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
                goto out;
        }
 
-       inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
-                                       path);
+       inode = lookup_free_space_inode(fs_info, rc->block_group, path);
        btrfs_free_path(path);
 
        if (!IS_ERR(inode))
index 4c6735491ee061cf7d6af58ded5064db2e4874cc..a08224eab8b47111b2fc6f7f51ac33e6ca69814e 100644 (file)
@@ -74,7 +74,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
  *
  * If we find something return 0, otherwise > 0, < 0 on error.
  */
-int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
+int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
                    struct btrfs_path *path, struct btrfs_root_item *root_item,
                    struct btrfs_key *root_key)
 {
@@ -207,7 +207,7 @@ out:
 }
 
 int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                     struct btrfs_key *key, struct btrfs_root_item *item)
+                     const struct btrfs_key *key, struct btrfs_root_item *item)
 {
        /*
         * Make sure generation v1 and v2 match. See update_root for details.
@@ -337,7 +337,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
 
 /* drop the root item for 'key' from 'root' */
 int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                  struct btrfs_key *key)
+                  const struct btrfs_key *key)
 {
        struct btrfs_path *path;
        int ret;
index 9a94670536a698c7508f289c263ff9408da18821..ff9a11c39f5e1d88f699e549401878b360775265 100644 (file)
@@ -282,9 +282,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
                               u64 *extent_physical,
                               struct btrfs_device **extent_dev,
                               int *extent_mirror_num);
-static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
-                             struct scrub_wr_ctx *wr_ctx,
-                             struct btrfs_fs_info *fs_info,
+static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
                              struct btrfs_device *dev,
                              int is_dev_replace);
 static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
@@ -501,7 +499,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
        spin_lock_init(&sctx->stat_lock);
        init_waitqueue_head(&sctx->list_wait);
 
-       ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
+       ret = scrub_setup_wr_ctx(&sctx->wr_ctx,
                                 fs_info->dev_replace.tgtdev, is_dev_replace);
        if (ret) {
                scrub_free_ctx(sctx);
@@ -3584,7 +3582,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                 * -> btrfs_scrub_pause()
                 */
                scrub_pause_on(fs_info);
-               ret = btrfs_inc_block_group_ro(root, cache);
+               ret = btrfs_inc_block_group_ro(fs_info, cache);
                if (!ret && is_dev_replace) {
                        /*
                         * If we are doing a device replace wait for any tasks
@@ -4084,9 +4082,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
        btrfs_put_bbio(bbio);
 }
 
-static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
-                             struct scrub_wr_ctx *wr_ctx,
-                             struct btrfs_fs_info *fs_info,
+static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
                              struct btrfs_device *dev,
                              int is_dev_replace)
 {
index b5ae7d3d189629ce3751d816b31b344fa4ad9999..da687dc79cce6155a278038a15775637c97ce3cc 100644 (file)
@@ -265,7 +265,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                           function, line, errstr);
                return;
        }
-       ACCESS_ONCE(trans->transaction->aborted) = errno;
+       WRITE_ONCE(trans->transaction->aborted, errno);
        /* Wake up anybody who may be waiting on this transaction */
        wake_up(&fs_info->transaction_wait);
        wake_up(&fs_info->transaction_blocked_wait);
@@ -1114,7 +1114,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
 
 static int btrfs_fill_super(struct super_block *sb,
                            struct btrfs_fs_devices *fs_devices,
-                           void *data, int silent)
+                           void *data)
 {
        struct inode *inode;
        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
@@ -1611,8 +1611,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        } else {
                snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
                btrfs_sb(s)->bdev_holder = fs_type;
-               error = btrfs_fill_super(s, fs_devices, data,
-                                        flags & MS_SILENT ? 1 : 0);
+               error = btrfs_fill_super(s, fs_devices, data);
        }
        if (error) {
                deactivate_locked_super(s);
index 0e0508f488b273929a783ec1048d57cb4b9d1761..6b3e0fc2fe7ac28affd1f966b6e5c08f7493d805 100644 (file)
@@ -474,7 +474,8 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
 
 static struct btrfs_trans_handle *
 start_transaction(struct btrfs_root *root, unsigned int num_items,
-                 unsigned int type, enum btrfs_reserve_flush_enum flush)
+                 unsigned int type, enum btrfs_reserve_flush_enum flush,
+                 bool enforce_qgroups)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
 
@@ -505,9 +506,10 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
         * Do the reservation before we join the transaction so we can do all
         * the appropriate flushing if need be.
         */
-       if (num_items > 0 && root != fs_info->chunk_root) {
+       if (num_items && root != fs_info->chunk_root) {
                qgroup_reserved = num_items * fs_info->nodesize;
-               ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved);
+               ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved,
+                                               enforce_qgroups);
                if (ret)
                        return ERR_PTR(ret);
 
@@ -613,8 +615,9 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
                                                   unsigned int num_items)
 {
        return start_transaction(root, num_items, TRANS_START,
-                                BTRFS_RESERVE_FLUSH_ALL);
+                                BTRFS_RESERVE_FLUSH_ALL, true);
 }
+
 struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
                                        struct btrfs_root *root,
                                        unsigned int num_items,
@@ -625,7 +628,14 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
        u64 num_bytes;
        int ret;
 
-       trans = btrfs_start_transaction(root, num_items);
+       /*
+        * We have two callers: unlink and block group removal.  The
+        * former should succeed even if we will temporarily exceed
+        * quota and the latter operates on the extent root so
+        * qgroup enforcement is ignored anyway.
+        */
+       trans = start_transaction(root, num_items, TRANS_START,
+                                 BTRFS_RESERVE_FLUSH_ALL, false);
        if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
                return trans;
 
@@ -654,25 +664,25 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush(
                                        unsigned int num_items)
 {
        return start_transaction(root, num_items, TRANS_START,
-                                BTRFS_RESERVE_FLUSH_LIMIT);
+                                BTRFS_RESERVE_FLUSH_LIMIT, true);
 }
 
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
 {
-       return start_transaction(root, 0, TRANS_JOIN,
-                                BTRFS_RESERVE_NO_FLUSH);
+       return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
+                                true);
 }
 
 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
 {
        return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
-                                BTRFS_RESERVE_NO_FLUSH);
+                                BTRFS_RESERVE_NO_FLUSH, true);
 }
 
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
 {
        return start_transaction(root, 0, TRANS_USERSPACE,
-                                BTRFS_RESERVE_NO_FLUSH);
+                                BTRFS_RESERVE_NO_FLUSH, true);
 }
 
 /*
@@ -691,7 +701,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
 struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
 {
        return start_transaction(root, 0, TRANS_ATTACH,
-                                BTRFS_RESERVE_NO_FLUSH);
+                                BTRFS_RESERVE_NO_FLUSH, true);
 }
 
 /*
@@ -707,7 +717,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
        struct btrfs_trans_handle *trans;
 
        trans = start_transaction(root, 0, TRANS_ATTACH,
-                                 BTRFS_RESERVE_NO_FLUSH);
+                                 BTRFS_RESERVE_NO_FLUSH, true);
        if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
                btrfs_wait_for_commit(root->fs_info, 0);
 
@@ -866,14 +876,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
        if (lock && !atomic_read(&info->open_ioctl_trans) &&
            should_end_transaction(trans) &&
-           ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
+           READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
                spin_lock(&info->trans_lock);
                if (cur_trans->state == TRANS_STATE_RUNNING)
                        cur_trans->state = TRANS_STATE_BLOCKED;
                spin_unlock(&info->trans_lock);
        }
 
-       if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
+       if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
                if (throttle)
                        return btrfs_commit_transaction(trans);
                else
@@ -1354,12 +1364,8 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
         * enabled. If this check races with the ioctl, rescan will
         * kick in anyway.
         */
-       mutex_lock(&fs_info->qgroup_ioctl_lock);
-       if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
-               mutex_unlock(&fs_info->qgroup_ioctl_lock);
+       if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
                return 0;
-       }
-       mutex_unlock(&fs_info->qgroup_ioctl_lock);
 
        /*
         * We are going to commit transaction, see btrfs_commit_transaction()
@@ -1504,7 +1510,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
        /* check if there is a file/dir which has the same name. */
        dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
-                                        btrfs_ino(parent_inode),
+                                        btrfs_ino(BTRFS_I(parent_inode)),
                                         dentry->d_name.name,
                                         dentry->d_name.len, 0);
        if (dir_item != NULL && !IS_ERR(dir_item)) {
@@ -1598,7 +1604,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         */
        ret = btrfs_add_root_ref(trans, fs_info, objectid,
                                 parent_root->root_key.objectid,
-                                btrfs_ino(parent_inode), index,
+                                btrfs_ino(BTRFS_I(parent_inode)), index,
                                 dentry->d_name.name, dentry->d_name.len);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
@@ -1940,7 +1946,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
        int ret;
 
        /* Stop the commit early if ->aborted is set */
-       if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+       if (unlikely(READ_ONCE(cur_trans->aborted))) {
                ret = cur_trans->aborted;
                btrfs_end_transaction(trans);
                return ret;
@@ -2080,7 +2086,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
                   atomic_read(&cur_trans->num_writers) == 1);
 
        /* ->aborted might be set after the previous check, so check it */
-       if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+       if (unlikely(READ_ONCE(cur_trans->aborted))) {
                ret = cur_trans->aborted;
                goto scrub_continue;
        }
@@ -2194,14 +2200,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
         * The tasks which save the space cache and inode cache may also
         * update ->aborted, check it.
         */
-       if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+       if (unlikely(READ_ONCE(cur_trans->aborted))) {
                ret = cur_trans->aborted;
                mutex_unlock(&fs_info->tree_log_mutex);
                mutex_unlock(&fs_info->reloc_mutex);
                goto scrub_continue;
        }
 
-       btrfs_prepare_extent_commit(trans, fs_info);
+       btrfs_prepare_extent_commit(fs_info);
 
        cur_trans = fs_info->running_transaction;
 
@@ -2251,7 +2257,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
                goto scrub_continue;
        }
 
-       ret = write_ctree_super(trans, fs_info, 0);
+       ret = write_all_supers(fs_info, 0);
        if (ret) {
                mutex_unlock(&fs_info->tree_log_mutex);
                goto scrub_continue;
index eeffff84f280958cfd117e860055d7b9fc683377..3806853cde08d802c708beb58e61f08b1ac0c567 100644 (file)
@@ -97,7 +97,7 @@
 #define LOG_WALK_REPLAY_ALL 3
 
 static int btrfs_log_inode(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, struct inode *inode,
+                          struct btrfs_root *root, struct btrfs_inode *inode,
                           int inode_only,
                           const loff_t start,
                           const loff_t end,
@@ -631,8 +631,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
         * file.  This must be done before the btrfs_drop_extents run
         * so we don't try to drop this extent.
         */
-       ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
-                                      start, 0);
+       ret = btrfs_lookup_file_extent(trans, root, path,
+                       btrfs_ino(BTRFS_I(inode)), start, 0);
 
        if (ret == 0 &&
            (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -843,7 +843,7 @@ out:
 static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
                                      struct btrfs_root *root,
                                      struct btrfs_path *path,
-                                     struct inode *dir,
+                                     struct btrfs_inode *dir,
                                      struct btrfs_dir_item *di)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -875,7 +875,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
        if (ret)
                goto out;
 
-       ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
+       ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name,
+                       name_len);
        if (ret)
                goto out;
        else
@@ -991,8 +992,8 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root,
                                  struct btrfs_path *path,
                                  struct btrfs_root *log_root,
-                                 struct inode *dir, struct inode *inode,
-                                 struct extent_buffer *eb,
+                                 struct btrfs_inode *dir,
+                                 struct btrfs_inode *inode,
                                  u64 inode_objectid, u64 parent_objectid,
                                  u64 ref_index, char *name, int namelen,
                                  int *search_done)
@@ -1047,12 +1048,11 @@ again:
                                            parent_objectid,
                                            victim_name,
                                            victim_name_len)) {
-                               inc_nlink(inode);
+                               inc_nlink(&inode->vfs_inode);
                                btrfs_release_path(path);
 
-                               ret = btrfs_unlink_inode(trans, root, dir,
-                                                        inode, victim_name,
-                                                        victim_name_len);
+                               ret = btrfs_unlink_inode(trans, root, dir, inode,
+                                               victim_name, victim_name_len);
                                kfree(victim_name);
                                if (ret)
                                        return ret;
@@ -1115,16 +1115,16 @@ again:
                                            victim_name_len)) {
                                ret = -ENOENT;
                                victim_parent = read_one_inode(root,
-                                                              parent_objectid);
+                                               parent_objectid);
                                if (victim_parent) {
-                                       inc_nlink(inode);
+                                       inc_nlink(&inode->vfs_inode);
                                        btrfs_release_path(path);
 
                                        ret = btrfs_unlink_inode(trans, root,
-                                                                victim_parent,
-                                                                inode,
-                                                                victim_name,
-                                                                victim_name_len);
+                                                       BTRFS_I(victim_parent),
+                                                       inode,
+                                                       victim_name,
+                                                       victim_name_len);
                                        if (!ret)
                                                ret = btrfs_run_delayed_items(
                                                                  trans,
@@ -1295,8 +1295,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
                        goto out;
 
                /* if we already have a perfect match, we're done */
-               if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
-                                 ref_index, name, namelen)) {
+               if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
+                                       btrfs_ino(BTRFS_I(inode)), ref_index,
+                                       name, namelen)) {
                        /*
                         * look for a conflicting back reference in the
                         * metadata. if we find one we have to unlink that name
@@ -1307,7 +1308,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 
                        if (!search_done) {
                                ret = __add_inode_ref(trans, root, path, log,
-                                                     dir, inode, eb,
+                                                     BTRFS_I(dir),
+                                                     BTRFS_I(inode),
                                                      inode_objectid,
                                                      parent_objectid,
                                                      ref_index, name, namelen,
@@ -1360,7 +1362,7 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
 }
 
 static int count_inode_extrefs(struct btrfs_root *root,
-                              struct inode *inode, struct btrfs_path *path)
+               struct btrfs_inode *inode, struct btrfs_path *path)
 {
        int ret = 0;
        int name_len;
@@ -1404,7 +1406,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
 }
 
 static int count_inode_refs(struct btrfs_root *root,
-                              struct inode *inode, struct btrfs_path *path)
+                       struct btrfs_inode *inode, struct btrfs_path *path)
 {
        int ret;
        struct btrfs_key key;
@@ -1477,19 +1479,19 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        int ret;
        u64 nlink = 0;
-       u64 ino = btrfs_ino(inode);
+       u64 ino = btrfs_ino(BTRFS_I(inode));
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-       ret = count_inode_refs(root, inode, path);
+       ret = count_inode_refs(root, BTRFS_I(inode), path);
        if (ret < 0)
                goto out;
 
        nlink = ret;
 
-       ret = count_inode_extrefs(root, inode, path);
+       ret = count_inode_extrefs(root, BTRFS_I(inode), path);
        if (ret < 0)
                goto out;
 
@@ -1769,7 +1771,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
        if (!exists)
                goto out;
 
-       ret = drop_one_dir_item(trans, root, path, dir, dst_di);
+       ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di);
        if (ret)
                goto out;
 
@@ -2052,8 +2054,8 @@ again:
                        }
 
                        inc_nlink(inode);
-                       ret = btrfs_unlink_inode(trans, root, dir, inode,
-                                                name, name_len);
+                       ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+                                       BTRFS_I(inode), name, name_len);
                        if (!ret)
                                ret = btrfs_run_delayed_items(trans, fs_info);
                        kfree(name);
@@ -2469,7 +2471,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                if (trans) {
                                        btrfs_tree_lock(next);
                                        btrfs_set_lock_blocking(next);
-                                       clean_tree_block(trans, fs_info, next);
+                                       clean_tree_block(fs_info, next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
                                }
@@ -2549,7 +2551,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                if (trans) {
                                        btrfs_tree_lock(next);
                                        btrfs_set_lock_blocking(next);
-                                       clean_tree_block(trans, fs_info, next);
+                                       clean_tree_block(fs_info, next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
                                }
@@ -2627,7 +2629,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                        if (trans) {
                                btrfs_tree_lock(next);
                                btrfs_set_lock_blocking(next);
-                               clean_tree_block(trans, fs_info, next);
+                               clean_tree_block(fs_info, next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
                        }
@@ -2958,7 +2960,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         * the running transaction open, so a full commit can't hop
         * in and cause problems either.
         */
-       ret = write_ctree_super(trans, fs_info, 1);
+       ret = write_all_supers(fs_info, 1);
        if (ret) {
                btrfs_set_log_full_commit(fs_info, trans);
                btrfs_abort_transaction(trans, ret);
@@ -3084,7 +3086,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
 int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 const char *name, int name_len,
-                                struct inode *dir, u64 index)
+                                struct btrfs_inode *dir, u64 index)
 {
        struct btrfs_root *log;
        struct btrfs_dir_item *di;
@@ -3094,14 +3096,14 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
        int bytes_del = 0;
        u64 dir_ino = btrfs_ino(dir);
 
-       if (BTRFS_I(dir)->logged_trans < trans->transid)
+       if (dir->logged_trans < trans->transid)
                return 0;
 
        ret = join_running_log_trans(root);
        if (ret)
                return 0;
 
-       mutex_lock(&BTRFS_I(dir)->log_mutex);
+       mutex_lock(&dir->log_mutex);
 
        log = root->log_root;
        path = btrfs_alloc_path();
@@ -3176,7 +3178,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 fail:
        btrfs_free_path(path);
 out_unlock:
-       mutex_unlock(&BTRFS_I(dir)->log_mutex);
+       mutex_unlock(&dir->log_mutex);
        if (ret == -ENOSPC) {
                btrfs_set_log_full_commit(root->fs_info, trans);
                ret = 0;
@@ -3192,25 +3194,25 @@ out_unlock:
 int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               const char *name, int name_len,
-                              struct inode *inode, u64 dirid)
+                              struct btrfs_inode *inode, u64 dirid)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_root *log;
        u64 index;
        int ret;
 
-       if (BTRFS_I(inode)->logged_trans < trans->transid)
+       if (inode->logged_trans < trans->transid)
                return 0;
 
        ret = join_running_log_trans(root);
        if (ret)
                return 0;
        log = root->log_root;
-       mutex_lock(&BTRFS_I(inode)->log_mutex);
+       mutex_lock(&inode->log_mutex);
 
        ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
                                  dirid, &index);
-       mutex_unlock(&BTRFS_I(inode)->log_mutex);
+       mutex_unlock(&inode->log_mutex);
        if (ret == -ENOSPC) {
                btrfs_set_log_full_commit(fs_info, trans);
                ret = 0;
@@ -3260,7 +3262,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
  * to replay anything deleted before the fsync
  */
 static noinline int log_dir_items(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root, struct inode *inode,
+                         struct btrfs_root *root, struct btrfs_inode *inode,
                          struct btrfs_path *path,
                          struct btrfs_path *dst_path, int key_type,
                          struct btrfs_log_ctx *ctx,
@@ -3450,7 +3452,7 @@ done:
  * key logged by this transaction.
  */
 static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root, struct inode *inode,
+                         struct btrfs_root *root, struct btrfs_inode *inode,
                          struct btrfs_path *path,
                          struct btrfs_path *dst_path,
                          struct btrfs_log_ctx *ctx)
@@ -3464,9 +3466,8 @@ again:
        min_key = 0;
        max_key = 0;
        while (1) {
-               ret = log_dir_items(trans, root, inode, path,
-                                   dst_path, key_type, ctx, min_key,
-                                   &max_key);
+               ret = log_dir_items(trans, root, inode, path, dst_path, key_type,
+                               ctx, min_key, &max_key);
                if (ret)
                        return ret;
                if (max_key == (u64)-1)
@@ -3595,34 +3596,34 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 
 static int log_inode_item(struct btrfs_trans_handle *trans,
                          struct btrfs_root *log, struct btrfs_path *path,
-                         struct inode *inode)
+                         struct btrfs_inode *inode)
 {
        struct btrfs_inode_item *inode_item;
        int ret;
 
        ret = btrfs_insert_empty_item(trans, log, path,
-                                     &BTRFS_I(inode)->location,
-                                     sizeof(*inode_item));
+                                     &inode->location, sizeof(*inode_item));
        if (ret && ret != -EEXIST)
                return ret;
        inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                    struct btrfs_inode_item);
-       fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
+       fill_inode_item(trans, path->nodes[0], inode_item, &inode->vfs_inode,
+                       0, 0);
        btrfs_release_path(path);
        return 0;
 }
 
 static noinline int copy_items(struct btrfs_trans_handle *trans,
-                              struct inode *inode,
+                              struct btrfs_inode *inode,
                               struct btrfs_path *dst_path,
                               struct btrfs_path *src_path, u64 *last_extent,
                               int start_slot, int nr, int inode_only,
                               u64 logged_isize)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
        unsigned long src_offset;
        unsigned long dst_offset;
-       struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
+       struct btrfs_root *log = inode->root->log_root;
        struct btrfs_file_extent_item *extent;
        struct btrfs_inode_item *inode_item;
        struct extent_buffer *src = src_path->nodes[0];
@@ -3633,7 +3634,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        char *ins_data;
        int i;
        struct list_head ordered_sums;
-       int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+       int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
        bool has_extents = false;
        bool need_find_last_extent = true;
        bool done = false;
@@ -3675,7 +3676,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                                    dst_path->slots[0],
                                                    struct btrfs_inode_item);
                        fill_inode_item(trans, dst_path->nodes[0], inode_item,
-                                       inode, inode_only == LOG_INODE_EXISTS,
+                                       &inode->vfs_inode,
+                                       inode_only == LOG_INODE_EXISTS,
                                        logged_isize);
                } else {
                        copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
@@ -3783,7 +3785,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        if (need_find_last_extent) {
                u64 len;
 
-               ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
+               ret = btrfs_prev_leaf(inode->root, src_path);
                if (ret < 0)
                        return ret;
                if (ret)
@@ -3825,8 +3827,8 @@ fill_holes:
        if (need_find_last_extent) {
                /* btrfs_prev_leaf could return 1 without releasing the path */
                btrfs_release_path(src_path);
-               ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
-                                       src_path, 0, 0);
+               ret = btrfs_search_slot(NULL, inode->root, &first_key,
+                               src_path, 0, 0);
                if (ret < 0)
                        return ret;
                ASSERT(ret == 0);
@@ -3846,7 +3848,7 @@ fill_holes:
                u64 extent_end;
 
                if (i >= btrfs_header_nritems(src_path->nodes[0])) {
-                       ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
+                       ret = btrfs_next_leaf(inode->root, src_path);
                        if (ret < 0)
                                return ret;
                        ASSERT(ret == 0);
@@ -3881,8 +3883,7 @@ fill_holes:
                offset = *last_extent;
                len = key.offset - *last_extent;
                ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
-                                              offset, 0, 0, len, 0, len, 0,
-                                              0, 0);
+                               offset, 0, 0, len, 0, len, 0, 0, 0);
                if (ret)
                        break;
                *last_extent = extent_end;
@@ -4055,7 +4056,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
 }
 
 static int log_one_extent(struct btrfs_trans_handle *trans,
-                         struct inode *inode, struct btrfs_root *root,
+                         struct btrfs_inode *inode, struct btrfs_root *root,
                          const struct extent_map *em,
                          struct btrfs_path *path,
                          const struct list_head *logged_list,
@@ -4072,8 +4073,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
        int extent_inserted = 0;
        bool ordered_io_err = false;
 
-       ret = wait_ordered_extents(trans, inode, root, em, logged_list,
-                                  &ordered_io_err);
+       ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em,
+                       logged_list, &ordered_io_err);
        if (ret)
                return ret;
 
@@ -4084,7 +4085,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 
        btrfs_init_map_token(&token);
 
-       ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+       ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
                                   em->start + em->len, NULL, 0, 1,
                                   sizeof(*fi), &extent_inserted);
        if (ret)
@@ -4150,7 +4151,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 
 static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
-                                    struct inode *inode,
+                                    struct btrfs_inode *inode,
                                     struct btrfs_path *path,
                                     struct list_head *logged_list,
                                     struct btrfs_log_ctx *ctx,
@@ -4159,14 +4160,14 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 {
        struct extent_map *em, *n;
        struct list_head extents;
-       struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map_tree *tree = &inode->extent_tree;
        u64 test_gen;
        int ret = 0;
        int num = 0;
 
        INIT_LIST_HEAD(&extents);
 
-       down_write(&BTRFS_I(inode)->dio_sem);
+       down_write(&inode->dio_sem);
        write_lock(&tree->lock);
        test_gen = root->fs_info->last_trans_committed;
 
@@ -4206,7 +4207,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
         * without writing to the log tree and the fsync must report the
         * file data write error and not commit the current transaction.
         */
-       ret = filemap_check_errors(inode->i_mapping);
+       ret = filemap_check_errors(inode->vfs_inode.i_mapping);
        if (ret)
                ctx->io_err = ret;
 process:
@@ -4235,13 +4236,13 @@ process:
        }
        WARN_ON(!list_empty(&extents));
        write_unlock(&tree->lock);
-       up_write(&BTRFS_I(inode)->dio_sem);
+       up_write(&inode->dio_sem);
 
        btrfs_release_path(path);
        return ret;
 }
 
-static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
+static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
                             struct btrfs_path *path, u64 *size_ret)
 {
        struct btrfs_key key;
@@ -4279,7 +4280,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
  */
 static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                               struct inode *inode,
+                               struct btrfs_inode *inode,
                                struct btrfs_path *path,
                                struct btrfs_path *dst_path)
 {
@@ -4374,7 +4375,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
  */
 static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
-                                  struct inode *inode,
+                                  struct btrfs_inode *inode,
                                   struct btrfs_path *path)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4385,7 +4386,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        struct btrfs_root *log = root->log_root;
        const u64 ino = btrfs_ino(inode);
-       const u64 i_size = i_size_read(inode);
+       const u64 i_size = i_size_read(&inode->vfs_inode);
 
        if (!btrfs_fs_incompat(fs_info, NO_HOLES))
                return 0;
@@ -4495,7 +4496,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
 static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                         const int slot,
                                         const struct btrfs_key *key,
-                                        struct inode *inode,
+                                        struct btrfs_inode *inode,
                                         u64 *other_ino)
 {
        int ret;
@@ -4551,9 +4552,8 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                }
 
                read_extent_buffer(eb, name, name_ptr, this_name_len);
-               di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
-                                          search_path, parent,
-                                          name, this_name_len, 0);
+               di = btrfs_lookup_dir_item(NULL, inode->root, search_path,
+                               parent, name, this_name_len, 0);
                if (di && !IS_ERR(di)) {
                        struct btrfs_key di_key;
 
@@ -4596,7 +4596,7 @@ out:
  * This handles both files and directories.
  */
 static int btrfs_log_inode(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, struct inode *inode,
+                          struct btrfs_root *root, struct btrfs_inode *inode,
                           int inode_only,
                           const loff_t start,
                           const loff_t end,
@@ -4618,7 +4618,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        int ins_nr;
        bool fast_search = false;
        u64 ino = btrfs_ino(inode);
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map_tree *em_tree = &inode->extent_tree;
        u64 logged_isize = 0;
        bool need_log_inode_item = true;
 
@@ -4639,9 +4639,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
 
        /* today the code can only do partial logging of directories */
-       if (S_ISDIR(inode->i_mode) ||
+       if (S_ISDIR(inode->vfs_inode.i_mode) ||
            (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-                      &BTRFS_I(inode)->runtime_flags) &&
+                      &inode->runtime_flags) &&
             inode_only >= LOG_INODE_EXISTS))
                max_key.type = BTRFS_XATTR_ITEM_KEY;
        else
@@ -4654,8 +4654,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
         * order for the log replay code to mark inodes for link count
         * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
         */
-       if (S_ISDIR(inode->i_mode) ||
-           BTRFS_I(inode)->generation > fs_info->last_trans_committed)
+       if (S_ISDIR(inode->vfs_inode.i_mode) ||
+           inode->generation > fs_info->last_trans_committed)
                ret = btrfs_commit_inode_delayed_items(trans, inode);
        else
                ret = btrfs_commit_inode_delayed_inode(inode);
@@ -4668,17 +4668,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
        if (inode_only == LOG_OTHER_INODE) {
                inode_only = LOG_INODE_EXISTS;
-               mutex_lock_nested(&BTRFS_I(inode)->log_mutex,
-                                 SINGLE_DEPTH_NESTING);
+               mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING);
        } else {
-               mutex_lock(&BTRFS_I(inode)->log_mutex);
+               mutex_lock(&inode->log_mutex);
        }
 
        /*
         * a brute force approach to making sure we get the most uptodate
         * copies of everything.
         */
-       if (S_ISDIR(inode->i_mode)) {
+       if (S_ISDIR(inode->vfs_inode.i_mode)) {
                int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
 
                if (inode_only == LOG_INODE_EXISTS)
@@ -4699,31 +4698,30 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                         * (zeroes), as if an expanding truncate happened,
                         * instead of getting a file of 4Kb only.
                         */
-                       err = logged_inode_size(log, inode, path,
-                                               &logged_isize);
+                       err = logged_inode_size(log, inode, path, &logged_isize);
                        if (err)
                                goto out_unlock;
                }
                if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-                            &BTRFS_I(inode)->runtime_flags)) {
+                            &inode->runtime_flags)) {
                        if (inode_only == LOG_INODE_EXISTS) {
                                max_key.type = BTRFS_XATTR_ITEM_KEY;
                                ret = drop_objectid_items(trans, log, path, ino,
                                                          max_key.type);
                        } else {
                                clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-                                         &BTRFS_I(inode)->runtime_flags);
+                                         &inode->runtime_flags);
                                clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                         &BTRFS_I(inode)->runtime_flags);
+                                         &inode->runtime_flags);
                                while(1) {
                                        ret = btrfs_truncate_inode_items(trans,
-                                                        log, inode, 0, 0);
+                                               log, &inode->vfs_inode, 0, 0);
                                        if (ret != -EAGAIN)
                                                break;
                                }
                        }
                } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                             &BTRFS_I(inode)->runtime_flags) ||
+                                             &inode->runtime_flags) ||
                           inode_only == LOG_INODE_EXISTS) {
                        if (inode_only == LOG_INODE_ALL)
                                fast_search = true;
@@ -4764,18 +4762,17 @@ again:
 
                if ((min_key.type == BTRFS_INODE_REF_KEY ||
                     min_key.type == BTRFS_INODE_EXTREF_KEY) &&
-                   BTRFS_I(inode)->generation == trans->transid) {
+                   inode->generation == trans->transid) {
                        u64 other_ino = 0;
 
                        ret = btrfs_check_ref_name_override(path->nodes[0],
-                                                           path->slots[0],
-                                                           &min_key, inode,
-                                                           &other_ino);
+                                       path->slots[0], &min_key, inode,
+                                       &other_ino);
                        if (ret < 0) {
                                err = ret;
                                goto out_unlock;
                        } else if (ret > 0 && ctx &&
-                                  other_ino != btrfs_ino(ctx->inode)) {
+                                  other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
                                struct btrfs_key inode_key;
                                struct inode *other_inode;
 
@@ -4823,9 +4820,10 @@ again:
                                 * update the log with the new name before we
                                 * unpin it.
                                 */
-                               err = btrfs_log_inode(trans, root, other_inode,
-                                                     LOG_OTHER_INODE,
-                                                     0, LLONG_MAX, ctx);
+                               err = btrfs_log_inode(trans, root,
+                                               BTRFS_I(other_inode),
+                                               LOG_OTHER_INODE, 0, LLONG_MAX,
+                                               ctx);
                                iput(other_inode);
                                if (err)
                                        goto out_unlock;
@@ -4979,25 +4977,25 @@ log_extents:
                write_unlock(&em_tree->lock);
        }
 
-       if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
+       if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) {
                ret = log_directory_changes(trans, root, inode, path, dst_path,
-                                           ctx);
+                                       ctx);
                if (ret) {
                        err = ret;
                        goto out_unlock;
                }
        }
 
-       spin_lock(&BTRFS_I(inode)->lock);
-       BTRFS_I(inode)->logged_trans = trans->transid;
-       BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
-       spin_unlock(&BTRFS_I(inode)->lock);
+       spin_lock(&inode->lock);
+       inode->logged_trans = trans->transid;
+       inode->last_log_commit = inode->last_sub_trans;
+       spin_unlock(&inode->lock);
 out_unlock:
        if (unlikely(err))
                btrfs_put_logged_extents(&logged_list);
        else
                btrfs_submit_logged_extents(&logged_list, log);
-       mutex_unlock(&BTRFS_I(inode)->log_mutex);
+       mutex_unlock(&inode->log_mutex);
 
        btrfs_free_path(path);
        btrfs_free_path(dst_path);
@@ -5021,13 +5019,13 @@ out_unlock:
  * we logged the inode or it might have also done the unlink).
  */
 static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
-                                         struct inode *inode)
+                                         struct btrfs_inode *inode)
 {
-       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        bool ret = false;
 
-       mutex_lock(&BTRFS_I(inode)->log_mutex);
-       if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) {
+       mutex_lock(&inode->log_mutex);
+       if (inode->last_unlink_trans > fs_info->last_trans_committed) {
                /*
                 * Make sure any commits to the log are forced to be full
                 * commits.
@@ -5035,7 +5033,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
                btrfs_set_log_full_commit(fs_info, trans);
                ret = true;
        }
-       mutex_unlock(&BTRFS_I(inode)->log_mutex);
+       mutex_unlock(&inode->log_mutex);
 
        return ret;
 }
@@ -5084,7 +5082,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                        BTRFS_I(inode)->logged_trans = trans->transid;
                smp_mb();
 
-               if (btrfs_must_commit_transaction(trans, inode)) {
+               if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) {
                        ret = 1;
                        break;
                }
@@ -5094,7 +5092,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 
                if (IS_ROOT(parent)) {
                        inode = d_inode(parent);
-                       if (btrfs_must_commit_transaction(trans, inode))
+                       if (btrfs_must_commit_transaction(trans, BTRFS_I(inode)))
                                ret = 1;
                        break;
                }
@@ -5159,7 +5157,7 @@ struct btrfs_dir_list {
  */
 static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                               struct inode *start_inode,
+                               struct btrfs_inode *start_inode,
                                struct btrfs_log_ctx *ctx)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5237,7 +5235,7 @@ process_leaf:
                                goto next_dir_inode;
                        }
 
-                       if (btrfs_inode_in_log(di_inode, trans->transid)) {
+                       if (btrfs_inode_in_log(BTRFS_I(di_inode), trans->transid)) {
                                iput(di_inode);
                                break;
                        }
@@ -5245,10 +5243,10 @@ process_leaf:
                        ctx->log_new_dentries = false;
                        if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
                                log_mode = LOG_INODE_ALL;
-                       ret = btrfs_log_inode(trans, root, di_inode,
+                       ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode),
                                              log_mode, 0, LLONG_MAX, ctx);
                        if (!ret &&
-                           btrfs_must_commit_transaction(trans, di_inode))
+                           btrfs_must_commit_transaction(trans, BTRFS_I(di_inode)))
                                ret = 1;
                        iput(di_inode);
                        if (ret)
@@ -5297,7 +5295,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        struct btrfs_key key;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       const u64 ino = btrfs_ino(inode);
+       const u64 ino = btrfs_ino(BTRFS_I(inode));
 
        path = btrfs_alloc_path();
        if (!path)
@@ -5365,14 +5363,14 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
 
                        if (ctx)
                                ctx->log_new_dentries = false;
-                       ret = btrfs_log_inode(trans, root, dir_inode,
+                       ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode),
                                              LOG_INODE_ALL, 0, LLONG_MAX, ctx);
                        if (!ret &&
-                           btrfs_must_commit_transaction(trans, dir_inode))
+                           btrfs_must_commit_transaction(trans, BTRFS_I(dir_inode)))
                                ret = 1;
                        if (!ret && ctx && ctx->log_new_dentries)
                                ret = log_new_dir_dentries(trans, root,
-                                                          dir_inode, ctx);
+                                                  BTRFS_I(dir_inode), ctx);
                        iput(dir_inode);
                        if (ret)
                                goto out;
@@ -5436,7 +5434,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
        if (ret)
                goto end_no_trans;
 
-       if (btrfs_inode_in_log(inode, trans->transid)) {
+       if (btrfs_inode_in_log(BTRFS_I(inode), trans->transid)) {
                ret = BTRFS_NO_LOG_SYNC;
                goto end_no_trans;
        }
@@ -5445,7 +5443,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
        if (ret)
                goto end_no_trans;
 
-       ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx);
+       ret = btrfs_log_inode(trans, root, BTRFS_I(inode), inode_only,
+                       start, end, ctx);
        if (ret)
                goto end_trans;
 
@@ -5521,7 +5520,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                        break;
 
                if (BTRFS_I(inode)->generation > last_committed) {
-                       ret = btrfs_log_inode(trans, root, inode,
+                       ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
                                              LOG_INODE_EXISTS,
                                              0, LLONG_MAX, ctx);
                        if (ret)
@@ -5535,7 +5534,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                old_parent = parent;
        }
        if (log_dentries)
-               ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+               ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), ctx);
        else
                ret = 0;
 end_trans:
@@ -5730,7 +5729,7 @@ error:
  * inodes, etc) are done.
  */
 void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
-                            struct inode *dir, struct inode *inode,
+                            struct btrfs_inode *dir, struct btrfs_inode *inode,
                             int for_rename)
 {
        /*
@@ -5743,23 +5742,23 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
         * into the file.  When the file is logged we check it and
         * don't log the parents if the file is fully on disk.
         */
-       mutex_lock(&BTRFS_I(inode)->log_mutex);
-       BTRFS_I(inode)->last_unlink_trans = trans->transid;
-       mutex_unlock(&BTRFS_I(inode)->log_mutex);
+       mutex_lock(&inode->log_mutex);
+       inode->last_unlink_trans = trans->transid;
+       mutex_unlock(&inode->log_mutex);
 
        /*
         * if this directory was already logged any new
         * names for this file/dir will get recorded
         */
        smp_mb();
-       if (BTRFS_I(dir)->logged_trans == trans->transid)
+       if (dir->logged_trans == trans->transid)
                return;
 
        /*
         * if the inode we're about to unlink was logged,
         * the log will be properly updated for any new names
         */
-       if (BTRFS_I(inode)->logged_trans == trans->transid)
+       if (inode->logged_trans == trans->transid)
                return;
 
        /*
@@ -5776,9 +5775,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
        return;
 
 record:
-       mutex_lock(&BTRFS_I(dir)->log_mutex);
-       BTRFS_I(dir)->last_unlink_trans = trans->transid;
-       mutex_unlock(&BTRFS_I(dir)->log_mutex);
+       mutex_lock(&dir->log_mutex);
+       dir->last_unlink_trans = trans->transid;
+       mutex_unlock(&dir->log_mutex);
 }
 
 /*
@@ -5794,11 +5793,11 @@ record:
  * parent root and tree of tree roots trees, etc) are done.
  */
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
-                                  struct inode *dir)
+                                  struct btrfs_inode *dir)
 {
-       mutex_lock(&BTRFS_I(dir)->log_mutex);
-       BTRFS_I(dir)->last_unlink_trans = trans->transid;
-       mutex_unlock(&BTRFS_I(dir)->log_mutex);
+       mutex_lock(&dir->log_mutex);
+       dir->last_unlink_trans = trans->transid;
+       mutex_unlock(&dir->log_mutex);
 }
 
 /*
@@ -5809,30 +5808,28 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
  * full transaction commit is required.
  */
 int btrfs_log_new_name(struct btrfs_trans_handle *trans,
-                       struct inode *inode, struct inode *old_dir,
+                       struct btrfs_inode *inode, struct btrfs_inode *old_dir,
                        struct dentry *parent)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root * root = BTRFS_I(inode)->root;
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+       struct btrfs_root *root = inode->root;
 
        /*
         * this will force the logging code to walk the dentry chain
         * up for the file
         */
-       if (S_ISREG(inode->i_mode))
-               BTRFS_I(inode)->last_unlink_trans = trans->transid;
+       if (S_ISREG(inode->vfs_inode.i_mode))
+               inode->last_unlink_trans = trans->transid;
 
        /*
         * if this inode hasn't been logged and directory we're renaming it
         * from hasn't been logged, we don't need to log it
         */
-       if (BTRFS_I(inode)->logged_trans <=
-           fs_info->last_trans_committed &&
-           (!old_dir || BTRFS_I(old_dir)->logged_trans <=
-                   fs_info->last_trans_committed))
+       if (inode->logged_trans <= fs_info->last_trans_committed &&
+           (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
                return 0;
 
-       return btrfs_log_inode_parent(trans, root, inode, parent, 0,
+       return btrfs_log_inode_parent(trans, root, &inode->vfs_inode, parent, 0,
                                      LLONG_MAX, 1, NULL);
 }
 
index ab858e31ccbc2210a01a9b7ce69bf52127d00b8b..483027f9a7f4f8dbc469c54c04ab47b4a9b4a116 100644 (file)
@@ -48,13 +48,13 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
 static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info,
                                             struct btrfs_trans_handle *trans)
 {
-       ACCESS_ONCE(fs_info->last_trans_log_full_commit) = trans->transid;
+       WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid);
 }
 
 static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info,
                                             struct btrfs_trans_handle *trans)
 {
-       return ACCESS_ONCE(fs_info->last_trans_log_full_commit) ==
+       return READ_ONCE(fs_info->last_trans_log_full_commit) ==
                trans->transid;
 }
 
@@ -72,19 +72,19 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 const char *name, int name_len,
-                                struct inode *dir, u64 index);
+                                struct btrfs_inode *dir, u64 index);
 int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               const char *name, int name_len,
-                              struct inode *inode, u64 dirid);
+                              struct btrfs_inode *inode, u64 dirid);
 void btrfs_end_log_trans(struct btrfs_root *root);
 int btrfs_pin_log_trans(struct btrfs_root *root);
 void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
-                            struct inode *dir, struct inode *inode,
+                            struct btrfs_inode *dir, struct btrfs_inode *inode,
                             int for_rename);
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
-                                  struct inode *dir);
+                                  struct btrfs_inode *dir);
 int btrfs_log_new_name(struct btrfs_trans_handle *trans,
-                       struct inode *inode, struct inode *old_dir,
+                       struct btrfs_inode *inode, struct btrfs_inode *old_dir,
                        struct dentry *parent);
 #endif
index b1434bb57e36e323a6cd30f08011f26410a99570..d8edf164f81cb44603f9d05f6ea6a6f79675c3c4 100644 (file)
@@ -52,13 +52,13 @@ void ulist_init(struct ulist *ulist)
 }
 
 /**
- * ulist_fini - free up additionally allocated memory for the ulist
+ * ulist_release - free up additionally allocated memory for the ulist
  * @ulist:     the ulist from which to free the additional memory
  *
  * This is useful in cases where the base 'struct ulist' has been statically
  * allocated.
  */
-static void ulist_fini(struct ulist *ulist)
+void ulist_release(struct ulist *ulist)
 {
        struct ulist_node *node;
        struct ulist_node *next;
@@ -79,7 +79,7 @@ static void ulist_fini(struct ulist *ulist)
  */
 void ulist_reinit(struct ulist *ulist)
 {
-       ulist_fini(ulist);
+       ulist_release(ulist);
        ulist_init(ulist);
 }
 
@@ -105,13 +105,13 @@ struct ulist *ulist_alloc(gfp_t gfp_mask)
  * ulist_free - free dynamically allocated ulist
  * @ulist:     ulist to free
  *
- * It is not necessary to call ulist_fini before.
+ * It is not necessary to call ulist_release before.
  */
 void ulist_free(struct ulist *ulist)
 {
        if (!ulist)
                return;
-       ulist_fini(ulist);
+       ulist_release(ulist);
        kfree(ulist);
 }
 
index a01a2c45825f5c8cfa569218850acd91c3cf3e1c..53c9136327337f0732e42e8e276e2335d02ac92d 100644 (file)
@@ -19,9 +19,6 @@
  *
  */
 struct ulist_iterator {
-#ifdef CONFIG_BTRFS_DEBUG
-       int i;
-#endif
        struct list_head *cur_list;  /* hint to start search */
 };
 
@@ -32,10 +29,6 @@ struct ulist_node {
        u64 val;                /* value to store */
        u64 aux;                /* auxiliary value saved along with the val */
 
-#ifdef CONFIG_BTRFS_DEBUG
-       int seqnum;             /* sequence number this node is added */
-#endif
-
        struct list_head list;  /* used to link node */
        struct rb_node rb_node; /* used to speed up search */
 };
@@ -51,6 +44,7 @@ struct ulist {
 };
 
 void ulist_init(struct ulist *ulist);
+void ulist_release(struct ulist *ulist);
 void ulist_reinit(struct ulist *ulist);
 struct ulist *ulist_alloc(gfp_t gfp_mask);
 void ulist_free(struct ulist *ulist);
index b2e70073a10dc3cf3e1bb37771e33ffb8263a8bb..13e55d13045d03d84d910594b8bf033a1f323adf 100644 (file)
@@ -134,8 +134,7 @@ const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
 };
 
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info,
-                               struct btrfs_device *device);
+                               struct btrfs_fs_info *fs_info);
 static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
@@ -2440,7 +2439,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path)
 
        if (seeding_dev) {
                mutex_lock(&fs_info->chunk_mutex);
-               ret = init_first_rw_device(trans, fs_info, device);
+               ret = init_first_rw_device(trans, fs_info);
                mutex_unlock(&fs_info->chunk_mutex);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
@@ -4584,8 +4583,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
                                / sizeof(struct btrfs_stripe) + 1)
 
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
-                              struct btrfs_fs_info *fs_info, u64 start,
-                              u64 type)
+                              u64 start, u64 type)
 {
        struct btrfs_fs_info *info = trans->fs_info;
        struct btrfs_fs_devices *fs_devices = info->fs_devices;
@@ -5009,12 +5007,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
        ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
        chunk_offset = find_next_chunk(fs_info);
-       return __btrfs_alloc_chunk(trans, fs_info, chunk_offset, type);
+       return __btrfs_alloc_chunk(trans, chunk_offset, type);
 }
 
 static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
-                                        struct btrfs_fs_info *fs_info,
-                                        struct btrfs_device *device)
+                                        struct btrfs_fs_info *fs_info)
 {
        struct btrfs_root *extent_root = fs_info->extent_root;
        u64 chunk_offset;
@@ -5024,14 +5021,13 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 
        chunk_offset = find_next_chunk(fs_info);
        alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
-       ret = __btrfs_alloc_chunk(trans, fs_info, chunk_offset, alloc_profile);
+       ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile);
        if (ret)
                return ret;
 
        sys_chunk_offset = find_next_chunk(fs_info);
        alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
-       ret = __btrfs_alloc_chunk(trans, fs_info, sys_chunk_offset,
-                                 alloc_profile);
+       ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile);
        return ret;
 }
 
index 9621c7f2503ed9b2e13961bc592d4a78e39feed2..b3cbf80c5acfa2ebe0541f14d60278f477664908 100644 (file)
@@ -47,8 +47,8 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
                return -ENOMEM;
 
        /* lookup the xattr by name */
-       di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
-                               strlen(name), 0);
+       di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(BTRFS_I(inode)),
+                       name, strlen(name), 0);
        if (!di) {
                ret = -ENODATA;
                goto out;
@@ -108,8 +108,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
        path->skip_release_on_error = 1;
 
        if (!value) {
-               di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
-                                       name, name_len, -1);
+               di = btrfs_lookup_xattr(trans, root, path,
+                               btrfs_ino(BTRFS_I(inode)), name, name_len, -1);
                if (!di && (flags & XATTR_REPLACE))
                        ret = -ENODATA;
                else if (IS_ERR(di))
@@ -128,8 +128,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
         */
        if (flags & XATTR_REPLACE) {
                ASSERT(inode_is_locked(inode));
-               di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
-                                       name, name_len, 0);
+               di = btrfs_lookup_xattr(NULL, root, path,
+                               btrfs_ino(BTRFS_I(inode)), name, name_len, 0);
                if (!di)
                        ret = -ENODATA;
                else if (IS_ERR(di))
@@ -140,7 +140,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                di = NULL;
        }
 
-       ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+       ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(BTRFS_I(inode)),
                                      name, name_len, value, size);
        if (ret == -EOVERFLOW) {
                /*
@@ -278,7 +278,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
         * NOTE: we set key.offset = 0; because we want to start with the
         * first xattr that we find and walk forward
         */
-       key.objectid = btrfs_ino(inode);
+       key.objectid = btrfs_ino(BTRFS_I(inode));
        key.type = BTRFS_XATTR_ITEM_KEY;
        key.offset = 0;
 
index 0e87401cf33535b03a1d2aa9da6e919d8a56a906..28484b3ebc98c76a563e7fe7fa4d3780ec2241ee 100644 (file)
@@ -2395,7 +2395,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
                            loff_t pos, loff_t *bytes)
 {
        struct inode *inode = mapping->host;
-       unsigned blocksize = 1 << inode->i_blkbits;
+       unsigned int blocksize = i_blocksize(inode);
        struct page *page;
        void *fsdata;
        pgoff_t index, curidx;
@@ -2475,8 +2475,8 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
                        get_block_t *get_block, loff_t *bytes)
 {
        struct inode *inode = mapping->host;
-       unsigned blocksize = 1 << inode->i_blkbits;
-       unsigned zerofrom;
+       unsigned int blocksize = i_blocksize(inode);
+       unsigned int zerofrom;
        int err;
 
        err = cont_expand_zero(file, mapping, pos, bytes);
@@ -2838,7 +2838,7 @@ int nobh_truncate_page(struct address_space *mapping,
        struct buffer_head map_bh;
        int err;
 
-       blocksize = 1 << inode->i_blkbits;
+       blocksize = i_blocksize(inode);
        length = offset & (blocksize - 1);
 
        /* Block boundary? Nothing to do */
@@ -2916,7 +2916,7 @@ int block_truncate_page(struct address_space *mapping,
        struct buffer_head *bh;
        int err;
 
-       blocksize = 1 << inode->i_blkbits;
+       blocksize = i_blocksize(inode);
        length = offset & (blocksize - 1);
 
        /* Block boundary? Nothing to do */
@@ -3028,7 +3028,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
        struct inode *inode = mapping->host;
        tmp.b_state = 0;
        tmp.b_blocknr = 0;
-       tmp.b_size = 1 << inode->i_blkbits;
+       tmp.b_size = i_blocksize(inode);
        get_block(inode, block, &tmp, 0);
        return tmp.b_blocknr;
 }
index e4b066cd912ad9ea249c3c88f81026a68186095c..f297a9e1864293d4eedfac5dea5957e65af1b25d 100644 (file)
@@ -391,6 +391,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
                        nr_pages = i;
                        if (nr_pages > 0) {
                                len = nr_pages << PAGE_SHIFT;
+                               osd_req_op_extent_update(req, 0, len);
                                break;
                        }
                        goto out_pages;
@@ -751,7 +752,7 @@ static int ceph_writepages_start(struct address_space *mapping,
        struct pagevec pvec;
        int done = 0;
        int rc = 0;
-       unsigned wsize = 1 << inode->i_blkbits;
+       unsigned int wsize = i_blocksize(inode);
        struct ceph_osd_request *req = NULL;
        int do_sync = 0;
        loff_t snap_size, i_size;
@@ -771,7 +772,7 @@ static int ceph_writepages_start(struct address_space *mapping,
             wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
             (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-       if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+       if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
                if (ci->i_wrbuffer_ref > 0) {
                        pr_warn_ratelimited(
                                "writepage_start %p %lld forced umount\n",
@@ -1017,8 +1018,7 @@ new_request:
                                        &ci->i_layout, vino,
                                        offset, &len, 0, num_ops,
                                        CEPH_OSD_OP_WRITE,
-                                       CEPH_OSD_FLAG_WRITE |
-                                       CEPH_OSD_FLAG_ONDISK,
+                                       CEPH_OSD_FLAG_WRITE,
                                        snapc, truncate_seq,
                                        truncate_size, false);
                if (IS_ERR(req)) {
@@ -1028,8 +1028,7 @@ new_request:
                                                min(num_ops,
                                                    CEPH_OSD_SLAB_OPS),
                                                CEPH_OSD_OP_WRITE,
-                                               CEPH_OSD_FLAG_WRITE |
-                                               CEPH_OSD_FLAG_ONDISK,
+                                               CEPH_OSD_FLAG_WRITE,
                                                snapc, truncate_seq,
                                                truncate_size, true);
                        BUG_ON(IS_ERR(req));
@@ -1194,7 +1193,7 @@ static int ceph_update_writeable_page(struct file *file,
        int r;
        struct ceph_snap_context *snapc, *oldest;
 
-       if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+       if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
                dout(" page %p forced umount\n", page);
                unlock_page(page);
                return -EIO;
@@ -1386,8 +1385,9 @@ static void ceph_restore_sigs(sigset_t *oldset)
 /*
  * vm ops
  */
-static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ceph_filemap_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct inode *inode = file_inode(vma->vm_file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_file_info *fi = vma->vm_file->private_data;
@@ -1416,7 +1416,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
            ci->i_inline_version == CEPH_INLINE_NONE) {
                current->journal_info = vma->vm_file;
-               ret = filemap_fault(vma, vmf);
+               ret = filemap_fault(vmf);
                current->journal_info = NULL;
        } else
                ret = -EAGAIN;
@@ -1477,8 +1477,9 @@ out_restore:
 /*
  * Reuse write_begin here for simplicity.
  */
-static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ceph_page_mkwrite(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct inode *inode = file_inode(vma->vm_file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_file_info *fi = vma->vm_file->private_data;
@@ -1679,8 +1680,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 
        req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                    ceph_vino(inode), 0, &len, 0, 1,
-                                   CEPH_OSD_OP_CREATE,
-                                   CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+                                   CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
                                    NULL, 0, 0, false);
        if (IS_ERR(req)) {
                err = PTR_ERR(req);
@@ -1697,8 +1697,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 
        req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                    ceph_vino(inode), 0, &len, 1, 3,
-                                   CEPH_OSD_OP_WRITE,
-                                   CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+                                   CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
                                    NULL, ci->i_truncate_seq,
                                    ci->i_truncate_size, false);
        if (IS_ERR(req)) {
@@ -1871,7 +1870,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
                goto out_unlock;
        }
 
-       wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK;
+       wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
        osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
        ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
        ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
index 5bc5d37b121712a2f288ede38b46420d13a2f0e5..4e7421caf3804c49ef052c02a736965e29437876 100644 (file)
@@ -234,7 +234,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
                fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
                                inode);
                if (fscache_cookie_enabled(ci->fscache)) {
-                       dout("fscache_file_set_cookie %p %p enabing cache\n",
+                       dout("fscache_file_set_cookie %p %p enabling cache\n",
                             inode, filp);
                }
        }
index 94fd76d04683d88103b42ff71a02490201a9783f..cd966f276a8d70ee9a3daa50c46eee5b1284f37a 100644 (file)
@@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
 /*
  * Return caps we have registered with the MDS(s) as 'wanted'.
  */
-int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
+int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
 {
        struct ceph_cap *cap;
        struct rb_node *p;
@@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
 
        for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
                cap = rb_entry(p, struct ceph_cap, ci_node);
-               if (!__cap_is_valid(cap))
+               if (check && !__cap_is_valid(cap))
                        continue;
                if (cap == ci->i_auth_cap)
                        mds_wanted |= cap->mds_wanted;
@@ -1184,6 +1184,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
                delayed = 1;
        }
        ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
+       if (want & ~cap->mds_wanted) {
+               /* user space may open/close single file frequently.
+                * This avoids droping mds_wanted immediately after
+                * requesting new mds_wanted.
+                */
+               __cap_set_timeouts(mdsc, ci);
+       }
 
        cap->issued &= retain;  /* drop bits we don't want */
        if (cap->implemented & ~cap->issued) {
@@ -2084,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
        dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
 
-       ceph_sync_write_wait(inode);
-
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret < 0)
                goto out;
@@ -2477,23 +2482,22 @@ again:
 
                if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
                        int mds_wanted;
-                       if (ACCESS_ONCE(mdsc->fsc->mount_state) ==
+                       if (READ_ONCE(mdsc->fsc->mount_state) ==
                            CEPH_MOUNT_SHUTDOWN) {
                                dout("get_cap_refs %p forced umount\n", inode);
                                *err = -EIO;
                                ret = 1;
                                goto out_unlock;
                        }
-                       mds_wanted = __ceph_caps_mds_wanted(ci);
-                       if ((mds_wanted & need) != need) {
+                       mds_wanted = __ceph_caps_mds_wanted(ci, false);
+                       if (need & ~(mds_wanted & need)) {
                                dout("get_cap_refs %p caps were dropped"
                                     " (session killed?)\n", inode);
                                *err = -ESTALE;
                                ret = 1;
                                goto out_unlock;
                        }
-                       if ((mds_wanted & file_wanted) ==
-                           (file_wanted & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR)))
+                       if (!(file_wanted & ~mds_wanted))
                                ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
                }
 
@@ -3404,6 +3408,7 @@ retry:
                        tcap->implemented |= issued;
                        if (cap == ci->i_auth_cap)
                                ci->i_auth_cap = tcap;
+
                        if (!list_empty(&ci->i_cap_flush_list) &&
                            ci->i_auth_cap == tcap) {
                                spin_lock(&mdsc->cap_dirty_lock);
@@ -3417,9 +3422,18 @@ retry:
        } else if (tsession) {
                /* add placeholder for the export tagert */
                int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
+               tcap = new_cap;
                ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
                             t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
 
+               if (!list_empty(&ci->i_cap_flush_list) &&
+                   ci->i_auth_cap == tcap) {
+                       spin_lock(&mdsc->cap_dirty_lock);
+                       list_move_tail(&ci->i_flushing_item,
+                                      &tcap->session->s_cap_flushing);
+                       spin_unlock(&mdsc->cap_dirty_lock);
+               }
+
                __ceph_remove_cap(cap, false);
                goto out_unlock;
        }
@@ -3924,9 +3938,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 }
 
 int ceph_encode_dentry_release(void **p, struct dentry *dentry,
+                              struct inode *dir,
                               int mds, int drop, int unless)
 {
-       struct inode *dir = d_inode(dentry->d_parent);
+       struct dentry *parent = NULL;
        struct ceph_mds_request_release *rel = *p;
        struct ceph_dentry_info *di = ceph_dentry(dentry);
        int force = 0;
@@ -3941,9 +3956,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
        spin_lock(&dentry->d_lock);
        if (di->lease_session && di->lease_session->s_mds == mds)
                force = 1;
+       if (!dir) {
+               parent = dget(dentry->d_parent);
+               dir = d_inode(parent);
+       }
        spin_unlock(&dentry->d_lock);
 
        ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
+       dput(parent);
 
        spin_lock(&dentry->d_lock);
        if (ret && di->lease_session && di->lease_session->s_mds == mds) {
index 39ff678e567fcb5c31d9729081119adaa4578def..f2ae393e2c31a2b3dbca7a5f81eeb5b81afa5e1b 100644 (file)
@@ -70,7 +70,7 @@ static int mdsc_show(struct seq_file *s, void *p)
 
                seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
 
-               if (req->r_got_unsafe)
+               if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
                        seq_puts(s, "\t(unsafe)");
                else
                        seq_puts(s, "\t");
index 8ab1fdf0bd49b74f380a578aea92ce738393403d..3e9ad501addfe92f171a40dffb93c65209819cbe 100644 (file)
@@ -371,7 +371,7 @@ more:
                /* hints to request -> mds selection code */
                req->r_direct_mode = USE_AUTH_MDS;
                req->r_direct_hash = ceph_frag_value(frag);
-               req->r_direct_is_hash = true;
+               __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
                if (fi->last_name) {
                        req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
                        if (!req->r_path2) {
@@ -417,7 +417,7 @@ more:
                fi->frag = frag;
                fi->last_readdir = req;
 
-               if (req->r_did_prepopulate) {
+               if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
                        fi->readdir_cache_idx = req->r_readdir_cache_idx;
                        if (fi->readdir_cache_idx < 0) {
                                /* preclude from marking dir ordered */
@@ -752,7 +752,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                mask |= CEPH_CAP_XATTR_SHARED;
        req->r_args.getattr.mask = cpu_to_le32(mask);
 
-       req->r_locked_dir = dir;
+       req->r_parent = dir;
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        err = ceph_mdsc_do_request(mdsc, NULL, req);
        err = ceph_handle_snapdir(req, dentry, err);
        dentry = ceph_finish_lookup(req, dentry, err);
@@ -813,7 +814,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
        }
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
-       req->r_locked_dir = dir;
+       req->r_parent = dir;
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_args.mknod.mode = cpu_to_le32(mode);
        req->r_args.mknod.rdev = cpu_to_le32(rdev);
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -864,7 +866,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
                ceph_mdsc_put_request(req);
                goto out;
        }
-       req->r_locked_dir = dir;
+       req->r_parent = dir;
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -913,7 +916,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
-       req->r_locked_dir = dir;
+       req->r_parent = dir;
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_args.mkdir.mode = cpu_to_le32(mode);
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -957,7 +961,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        req->r_old_dentry = dget(old_dentry);
-       req->r_locked_dir = dir;
+       req->r_parent = dir;
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
        /* release LINK_SHARED on source inode (mds will lock it) */
@@ -1023,7 +1028,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
        }
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
-       req->r_locked_dir = dir;
+       req->r_parent = dir;
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
        req->r_inode_drop = drop_caps_for_unlink(inode);
@@ -1066,7 +1072,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
        req->r_num_caps = 2;
        req->r_old_dentry = dget(old_dentry);
        req->r_old_dentry_dir = old_dir;
-       req->r_locked_dir = new_dir;
+       req->r_parent = new_dir;
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -1194,7 +1201,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
        struct inode *dir;
 
        if (flags & LOOKUP_RCU) {
-               parent = ACCESS_ONCE(dentry->d_parent);
+               parent = READ_ONCE(dentry->d_parent);
                dir = d_inode_rcu(parent);
                if (!dir)
                        return -ECHILD;
@@ -1237,11 +1244,12 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
                        return -ECHILD;
 
                op = ceph_snap(dir) == CEPH_SNAPDIR ?
-                       CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR;
+                       CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
                req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
                if (!IS_ERR(req)) {
                        req->r_dentry = dget(dentry);
-                       req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2;
+                       req->r_num_caps = 2;
+                       req->r_parent = dir;
 
                        mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
                        if (ceph_security_xattr_wanted(dir))
index 180bbef760f2c8c12fd94d458c246014634233dd..e8f11fa565c53ac58fddf402f6ade6320d47d490 100644 (file)
@@ -207,7 +207,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
        req->r_inode = d_inode(child);
        ihold(d_inode(child));
        req->r_ino2 = ceph_vino(d_inode(parent));
-       req->r_locked_dir = d_inode(parent);
+       req->r_parent = d_inode(parent);
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
 
index 045d30d2662485a4207945757659383fb314fa10..26cc95421cca6e62ef10bfd32b18cf1e526b7c51 100644 (file)
@@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file)
        spin_lock(&ci->i_ceph_lock);
        if (__ceph_is_any_real_caps(ci) &&
            (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
-               int mds_wanted = __ceph_caps_mds_wanted(ci);
+               int mds_wanted = __ceph_caps_mds_wanted(ci, true);
                int issued = __ceph_caps_issued(ci, NULL);
 
                dout("open %p fmode %d want %s issued %s using existing\n",
@@ -379,7 +379,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                mask |= CEPH_CAP_XATTR_SHARED;
        req->r_args.open.mask = cpu_to_le32(mask);
 
-       req->r_locked_dir = dir;           /* caller holds dir->i_mutex */
+       req->r_parent = dir;
+       set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        err = ceph_mdsc_do_request(mdsc,
                                   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
                                   req);
@@ -758,9 +759,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
                goto out;
        }
 
-       req->r_flags =  CEPH_OSD_FLAG_ORDERSNAP |
-                       CEPH_OSD_FLAG_ONDISK |
-                       CEPH_OSD_FLAG_WRITE;
+       req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
        ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
        ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
 
@@ -794,89 +793,6 @@ out:
        kfree(aio_work);
 }
 
-/*
- * Write commit request unsafe callback, called to tell us when a
- * request is unsafe (that is, in flight--has been handed to the
- * messenger to send to its target osd).  It is called again when
- * we've received a response message indicating the request is
- * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
- * is completed early (and unsuccessfully) due to a timeout or
- * interrupt.
- *
- * This is used if we requested both an ACK and ONDISK commit reply
- * from the OSD.
- */
-static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
-{
-       struct ceph_inode_info *ci = ceph_inode(req->r_inode);
-
-       dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
-               unsafe ? "un" : "");
-       if (unsafe) {
-               ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
-               spin_lock(&ci->i_unsafe_lock);
-               list_add_tail(&req->r_unsafe_item,
-                             &ci->i_unsafe_writes);
-               spin_unlock(&ci->i_unsafe_lock);
-
-               complete_all(&req->r_completion);
-       } else {
-               spin_lock(&ci->i_unsafe_lock);
-               list_del_init(&req->r_unsafe_item);
-               spin_unlock(&ci->i_unsafe_lock);
-               ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
-       }
-}
-
-/*
- * Wait on any unsafe replies for the given inode.  First wait on the
- * newest request, and make that the upper bound.  Then, if there are
- * more requests, keep waiting on the oldest as long as it is still older
- * than the original request.
- */
-void ceph_sync_write_wait(struct inode *inode)
-{
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       struct list_head *head = &ci->i_unsafe_writes;
-       struct ceph_osd_request *req;
-       u64 last_tid;
-
-       if (!S_ISREG(inode->i_mode))
-               return;
-
-       spin_lock(&ci->i_unsafe_lock);
-       if (list_empty(head))
-               goto out;
-
-       /* set upper bound as _last_ entry in chain */
-
-       req = list_last_entry(head, struct ceph_osd_request,
-                             r_unsafe_item);
-       last_tid = req->r_tid;
-
-       do {
-               ceph_osdc_get_request(req);
-               spin_unlock(&ci->i_unsafe_lock);
-
-               dout("sync_write_wait on tid %llu (until %llu)\n",
-                    req->r_tid, last_tid);
-               wait_for_completion(&req->r_done_completion);
-               ceph_osdc_put_request(req);
-
-               spin_lock(&ci->i_unsafe_lock);
-               /*
-                * from here on look at first entry in chain, since we
-                * only want to wait for anything older than last_tid
-                */
-               if (list_empty(head))
-                       break;
-               req = list_first_entry(head, struct ceph_osd_request,
-                                      r_unsafe_item);
-       } while (req->r_tid < last_tid);
-out:
-       spin_unlock(&ci->i_unsafe_lock);
-}
-
 static ssize_t
 ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                       struct ceph_snap_context *snapc,
@@ -915,9 +831,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                if (ret2 < 0)
                        dout("invalidate_inode_pages2_range returned %d\n", ret2);
 
-               flags = CEPH_OSD_FLAG_ORDERSNAP |
-                       CEPH_OSD_FLAG_ONDISK |
-                       CEPH_OSD_FLAG_WRITE;
+               flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
        } else {
                flags = CEPH_OSD_FLAG_READ;
        }
@@ -1116,10 +1030,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
        if (ret < 0)
                dout("invalidate_inode_pages2_range returned %d\n", ret);
 
-       flags = CEPH_OSD_FLAG_ORDERSNAP |
-               CEPH_OSD_FLAG_ONDISK |
-               CEPH_OSD_FLAG_WRITE |
-               CEPH_OSD_FLAG_ACK;
+       flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
 
        while ((len = iov_iter_count(from)) > 0) {
                size_t left;
@@ -1165,8 +1076,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                        goto out;
                }
 
-               /* get a second commit callback */
-               req->r_unsafe_callback = ceph_sync_write_unsafe;
                req->r_inode = inode;
 
                osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
@@ -1616,8 +1525,7 @@ static int ceph_zero_partial_object(struct inode *inode,
                                        ceph_vino(inode),
                                        offset, length,
                                        0, 1, op,
-                                       CEPH_OSD_FLAG_WRITE |
-                                       CEPH_OSD_FLAG_ONDISK,
+                                       CEPH_OSD_FLAG_WRITE,
                                        NULL, 0, 0, false);
        if (IS_ERR(req)) {
                ret = PTR_ERR(req);
index 5e659d054b40ae6faac23af26c5321c5af6ff69b..fd8f771f99b7d7c0943170df1003a1c78e423af0 100644 (file)
@@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_rdcache_gen = 0;
        ci->i_rdcache_revoking = 0;
 
-       INIT_LIST_HEAD(&ci->i_unsafe_writes);
        INIT_LIST_HEAD(&ci->i_unsafe_dirops);
        INIT_LIST_HEAD(&ci->i_unsafe_iops);
        spin_lock_init(&ci->i_unsafe_lock);
@@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode)
        return 1;
 }
 
-void ceph_evict_inode(struct inode *inode)
-{
-       /* wait unsafe sync writes */
-       ceph_sync_write_wait(inode);
-       truncate_inode_pages_final(&inode->i_data);
-       clear_inode(inode);
-}
-
 static inline blkcnt_t calc_inode_blocks(u64 size)
 {
        return (size + (1<<9) - 1) >> 9;
@@ -1016,7 +1007,9 @@ out:
 static void update_dentry_lease(struct dentry *dentry,
                                struct ceph_mds_reply_lease *lease,
                                struct ceph_mds_session *session,
-                               unsigned long from_time)
+                               unsigned long from_time,
+                               struct ceph_vino *tgt_vino,
+                               struct ceph_vino *dir_vino)
 {
        struct ceph_dentry_info *di = ceph_dentry(dentry);
        long unsigned duration = le32_to_cpu(lease->duration_ms);
@@ -1024,13 +1017,27 @@ static void update_dentry_lease(struct dentry *dentry,
        long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
        struct inode *dir;
 
+       /*
+        * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
+        * we expect a negative dentry.
+        */
+       if (!tgt_vino && d_really_is_positive(dentry))
+               return;
+
+       if (tgt_vino && (d_really_is_negative(dentry) ||
+                       !ceph_ino_compare(d_inode(dentry), tgt_vino)))
+               return;
+
        spin_lock(&dentry->d_lock);
        dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
             dentry, duration, ttl);
 
-       /* make lease_rdcache_gen match directory */
        dir = d_inode(dentry->d_parent);
 
+       /* make sure parent matches dir_vino */
+       if (!ceph_ino_compare(dir, dir_vino))
+               goto out_unlock;
+
        /* only track leases on regular dentries */
        if (ceph_snap(dir) != CEPH_NOSNAP)
                goto out_unlock;
@@ -1108,61 +1115,27 @@ out:
  *
  * Called with snap_rwsem (read).
  */
-int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
-                   struct ceph_mds_session *session)
+int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 {
+       struct ceph_mds_session *session = req->r_session;
        struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
        struct inode *in = NULL;
-       struct ceph_vino vino;
+       struct ceph_vino tvino, dvino;
        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
        int err = 0;
 
        dout("fill_trace %p is_dentry %d is_target %d\n", req,
             rinfo->head->is_dentry, rinfo->head->is_target);
 
-#if 0
-       /*
-        * Debugging hook:
-        *
-        * If we resend completed ops to a recovering mds, we get no
-        * trace.  Since that is very rare, pretend this is the case
-        * to ensure the 'no trace' handlers in the callers behave.
-        *
-        * Fill in inodes unconditionally to avoid breaking cap
-        * invariants.
-        */
-       if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
-               pr_info("fill_trace faking empty trace on %lld %s\n",
-                       req->r_tid, ceph_mds_op_name(rinfo->head->op));
-               if (rinfo->head->is_dentry) {
-                       rinfo->head->is_dentry = 0;
-                       err = fill_inode(req->r_locked_dir,
-                                        &rinfo->diri, rinfo->dirfrag,
-                                        session, req->r_request_started, -1);
-               }
-               if (rinfo->head->is_target) {
-                       rinfo->head->is_target = 0;
-                       ininfo = rinfo->targeti.in;
-                       vino.ino = le64_to_cpu(ininfo->ino);
-                       vino.snap = le64_to_cpu(ininfo->snapid);
-                       in = ceph_get_inode(sb, vino);
-                       err = fill_inode(in, &rinfo->targeti, NULL,
-                                        session, req->r_request_started,
-                                        req->r_fmode);
-                       iput(in);
-               }
-       }
-#endif
-
        if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
                dout("fill_trace reply is empty!\n");
-               if (rinfo->head->result == 0 && req->r_locked_dir)
+               if (rinfo->head->result == 0 && req->r_parent)
                        ceph_invalidate_dir_request(req);
                return 0;
        }
 
        if (rinfo->head->is_dentry) {
-               struct inode *dir = req->r_locked_dir;
+               struct inode *dir = req->r_parent;
 
                if (dir) {
                        err = fill_inode(dir, NULL,
@@ -1188,8 +1161,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                        dname.name = rinfo->dname;
                        dname.len = rinfo->dname_len;
                        dname.hash = full_name_hash(parent, dname.name, dname.len);
-                       vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
-                       vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+                       tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+                       tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
 retry_lookup:
                        dn = d_lookup(parent, &dname);
                        dout("d_lookup on parent=%p name=%.*s got %p\n",
@@ -1206,8 +1179,8 @@ retry_lookup:
                                }
                                err = 0;
                        } else if (d_really_is_positive(dn) &&
-                                  (ceph_ino(d_inode(dn)) != vino.ino ||
-                                   ceph_snap(d_inode(dn)) != vino.snap)) {
+                                  (ceph_ino(d_inode(dn)) != tvino.ino ||
+                                   ceph_snap(d_inode(dn)) != tvino.snap)) {
                                dout(" dn %p points to wrong inode %p\n",
                                     dn, d_inode(dn));
                                d_delete(dn);
@@ -1221,10 +1194,10 @@ retry_lookup:
        }
 
        if (rinfo->head->is_target) {
-               vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
-               vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+               tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+               tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
 
-               in = ceph_get_inode(sb, vino);
+               in = ceph_get_inode(sb, tvino);
                if (IS_ERR(in)) {
                        err = PTR_ERR(in);
                        goto done;
@@ -1233,8 +1206,8 @@ retry_lookup:
 
                err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
                                session, req->r_request_started,
-                               (!req->r_aborted && rinfo->head->result == 0) ?
-                               req->r_fmode : -1,
+                               (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+                               rinfo->head->result == 0) ?  req->r_fmode : -1,
                                &req->r_caps_reservation);
                if (err < 0) {
                        pr_err("fill_inode badness %p %llx.%llx\n",
@@ -1247,8 +1220,9 @@ retry_lookup:
         * ignore null lease/binding on snapdir ENOENT, or else we
         * will have trouble splicing in the virtual snapdir later
         */
-       if (rinfo->head->is_dentry && !req->r_aborted &&
-           req->r_locked_dir &&
+       if (rinfo->head->is_dentry &&
+            !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+           test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
            (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
                                               fsc->mount_options->snapdir_name,
                                               req->r_dentry->d_name.len))) {
@@ -1257,17 +1231,19 @@ retry_lookup:
                 * mknod symlink mkdir  : null -> new inode
                 * unlink               : linked -> null
                 */
-               struct inode *dir = req->r_locked_dir;
+               struct inode *dir = req->r_parent;
                struct dentry *dn = req->r_dentry;
                bool have_dir_cap, have_lease;
 
                BUG_ON(!dn);
                BUG_ON(!dir);
                BUG_ON(d_inode(dn->d_parent) != dir);
-               BUG_ON(ceph_ino(dir) !=
-                      le64_to_cpu(rinfo->diri.in->ino));
-               BUG_ON(ceph_snap(dir) !=
-                      le64_to_cpu(rinfo->diri.in->snapid));
+
+               dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+               dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+               BUG_ON(ceph_ino(dir) != dvino.ino);
+               BUG_ON(ceph_snap(dir) != dvino.snap);
 
                /* do we have a lease on the whole dir? */
                have_dir_cap =
@@ -1319,12 +1295,13 @@ retry_lookup:
                                ceph_dir_clear_ordered(dir);
                                dout("d_delete %p\n", dn);
                                d_delete(dn);
-                       } else {
-                               if (have_lease && d_unhashed(dn))
+                       } else if (have_lease) {
+                               if (d_unhashed(dn))
                                        d_add(dn, NULL);
                                update_dentry_lease(dn, rinfo->dlease,
                                                    session,
-                                                   req->r_request_started);
+                                                   req->r_request_started,
+                                                   NULL, &dvino);
                        }
                        goto done;
                }
@@ -1347,15 +1324,19 @@ retry_lookup:
                        have_lease = false;
                }
 
-               if (have_lease)
+               if (have_lease) {
+                       tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+                       tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
                        update_dentry_lease(dn, rinfo->dlease, session,
-                                           req->r_request_started);
+                                           req->r_request_started,
+                                           &tvino, &dvino);
+               }
                dout(" final dn %p\n", dn);
-       } else if (!req->r_aborted &&
-                  (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
-                   req->r_op == CEPH_MDS_OP_MKSNAP)) {
+       } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+                   req->r_op == CEPH_MDS_OP_MKSNAP) &&
+                  !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
                struct dentry *dn = req->r_dentry;
-               struct inode *dir = req->r_locked_dir;
+               struct inode *dir = req->r_parent;
 
                /* fill out a snapdir LOOKUPSNAP dentry */
                BUG_ON(!dn);
@@ -1370,6 +1351,26 @@ retry_lookup:
                        goto done;
                }
                req->r_dentry = dn;  /* may have spliced */
+       } else if (rinfo->head->is_dentry) {
+               struct ceph_vino *ptvino = NULL;
+
+               if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
+                   le32_to_cpu(rinfo->dlease->duration_ms)) {
+                       dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+                       dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+                       if (rinfo->head->is_target) {
+                               tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+                               tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+                               ptvino = &tvino;
+                       }
+
+                       update_dentry_lease(req->r_dentry, rinfo->dlease,
+                               session, req->r_request_started, ptvino,
+                               &dvino);
+               } else {
+                       dout("%s: no dentry lease or dir cap\n", __func__);
+               }
        }
 done:
        dout("fill_trace done err=%d\n", err);
@@ -1478,7 +1479,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
        u32 fpos_offset;
        struct ceph_readdir_cache_control cache_ctl = {};
 
-       if (req->r_aborted)
+       if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
                return readdir_prepopulate_inodes_only(req, session);
 
        if (rinfo->hash_order && req->r_path2) {
@@ -1523,14 +1524,14 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
        /* FIXME: release caps/leases if error occurs */
        for (i = 0; i < rinfo->dir_nr; i++) {
                struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
-               struct ceph_vino vino;
+               struct ceph_vino tvino, dvino;
 
                dname.name = rde->name;
                dname.len = rde->name_len;
                dname.hash = full_name_hash(parent, dname.name, dname.len);
 
-               vino.ino = le64_to_cpu(rde->inode.in->ino);
-               vino.snap = le64_to_cpu(rde->inode.in->snapid);
+               tvino.ino = le64_to_cpu(rde->inode.in->ino);
+               tvino.snap = le64_to_cpu(rde->inode.in->snapid);
 
                if (rinfo->hash_order) {
                        u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
@@ -1559,8 +1560,8 @@ retry_lookup:
                                goto out;
                        }
                } else if (d_really_is_positive(dn) &&
-                          (ceph_ino(d_inode(dn)) != vino.ino ||
-                           ceph_snap(d_inode(dn)) != vino.snap)) {
+                          (ceph_ino(d_inode(dn)) != tvino.ino ||
+                           ceph_snap(d_inode(dn)) != tvino.snap)) {
                        dout(" dn %p points to wrong inode %p\n",
                             dn, d_inode(dn));
                        d_delete(dn);
@@ -1572,7 +1573,7 @@ retry_lookup:
                if (d_really_is_positive(dn)) {
                        in = d_inode(dn);
                } else {
-                       in = ceph_get_inode(parent->d_sb, vino);
+                       in = ceph_get_inode(parent->d_sb, tvino);
                        if (IS_ERR(in)) {
                                dout("new_inode badness\n");
                                d_drop(dn);
@@ -1617,8 +1618,9 @@ retry_lookup:
 
                ceph_dentry(dn)->offset = rde->offset;
 
+               dvino = ceph_vino(d_inode(parent));
                update_dentry_lease(dn, rde->lease, req->r_session,
-                                   req->r_request_started);
+                                   req->r_request_started, &tvino, &dvino);
 
                if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
                        ret = fill_readdir_cache(d_inode(parent), dn,
@@ -1632,7 +1634,7 @@ next_item:
        }
 out:
        if (err == 0 && skipped == 0) {
-               req->r_did_prepopulate = true;
+               set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags);
                req->r_readdir_cache_idx = cache_ctl.index;
        }
        ceph_readdir_cache_release(&cache_ctl);
@@ -1720,7 +1722,7 @@ static void ceph_invalidate_work(struct work_struct *work)
 
        mutex_lock(&ci->i_truncate_mutex);
 
-       if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+       if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
                pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
                                    inode, ceph_ino(inode));
                mapping_set_error(inode->i_mapping, -EIO);
index 7d752d53353a24e742fff660ab33f2435727f67e..4c9c72f26eb90c6fd3693dc8f6ad7e9eda458ca3 100644 (file)
@@ -25,7 +25,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
                l.stripe_count = ci->i_layout.stripe_count;
                l.object_size = ci->i_layout.object_size;
                l.data_pool = ci->i_layout.pool_id;
-               l.preferred_osd = (s32)-1;
+               l.preferred_osd = -1;
                if (copy_to_user(arg, &l, sizeof(l)))
                        return -EFAULT;
        }
@@ -97,7 +97,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
                nl.data_pool = ci->i_layout.pool_id;
 
        /* this is obsolete, and always -1 */
-       nl.preferred_osd = le64_to_cpu(-1);
+       nl.preferred_osd = -1;
 
        err = __validate_layout(mdsc, &nl);
        if (err)
index c9d2e553a6c487f01bd11ed4c7a2c15ddfcd058d..c681762d76e66be1edf7004b1de8c13568ec6022 100644 (file)
@@ -547,8 +547,8 @@ void ceph_mdsc_release_request(struct kref *kref)
                ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
                iput(req->r_inode);
        }
-       if (req->r_locked_dir)
-               ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+       if (req->r_parent)
+               ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
        iput(req->r_target_inode);
        if (req->r_dentry)
                dput(req->r_dentry);
@@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 {
        dout("__unregister_request %p tid %lld\n", req, req->r_tid);
 
+       /* Never leave an unregistered request on an unsafe list! */
+       list_del_init(&req->r_unsafe_item);
+
        if (req->r_tid == mdsc->oldest_tid) {
                struct rb_node *p = rb_next(&req->r_node);
                mdsc->oldest_tid = 0;
@@ -644,13 +647,15 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 
        erase_request(&mdsc->request_tree, req);
 
-       if (req->r_unsafe_dir && req->r_got_unsafe) {
+       if (req->r_unsafe_dir  &&
+           test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
                struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
                spin_lock(&ci->i_unsafe_lock);
                list_del_init(&req->r_unsafe_dir_item);
                spin_unlock(&ci->i_unsafe_lock);
        }
-       if (req->r_target_inode && req->r_got_unsafe) {
+       if (req->r_target_inode &&
+           test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
                struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
                spin_lock(&ci->i_unsafe_lock);
                list_del_init(&req->r_unsafe_target_item);
@@ -667,6 +672,28 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
        ceph_mdsc_put_request(req);
 }
 
+/*
+ * Walk back up the dentry tree until we hit a dentry representing a
+ * non-snapshot inode. We do this using the rcu_read_lock (which must be held
+ * when calling this) to ensure that the objects won't disappear while we're
+ * working with them. Once we hit a candidate dentry, we attempt to take a
+ * reference to it, and return that as the result.
+ */
+static struct inode *get_nonsnap_parent(struct dentry *dentry)
+{
+       struct inode *inode = NULL;
+
+       while (dentry && !IS_ROOT(dentry)) {
+               inode = d_inode_rcu(dentry);
+               if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
+                       break;
+               dentry = dentry->d_parent;
+       }
+       if (inode)
+               inode = igrab(inode);
+       return inode;
+}
+
 /*
  * Choose mds to send request to next.  If there is a hint set in the
  * request (e.g., due to a prior forward hint from the mds), use that.
@@ -675,19 +702,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
  *
  * Called under mdsc->mutex.
  */
-static struct dentry *get_nonsnap_parent(struct dentry *dentry)
-{
-       /*
-        * we don't need to worry about protecting the d_parent access
-        * here because we never renaming inside the snapped namespace
-        * except to resplice to another snapdir, and either the old or new
-        * result is a valid result.
-        */
-       while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
-               dentry = dentry->d_parent;
-       return dentry;
-}
-
 static int __choose_mds(struct ceph_mds_client *mdsc,
                        struct ceph_mds_request *req)
 {
@@ -697,7 +711,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
        int mode = req->r_direct_mode;
        int mds = -1;
        u32 hash = req->r_direct_hash;
-       bool is_hash = req->r_direct_is_hash;
+       bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
 
        /*
         * is there a specific mds we should try?  ignore hint if we have
@@ -717,30 +731,39 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
        inode = NULL;
        if (req->r_inode) {
                inode = req->r_inode;
+               ihold(inode);
        } else if (req->r_dentry) {
                /* ignore race with rename; old or new d_parent is okay */
-               struct dentry *parent = req->r_dentry->d_parent;
-               struct inode *dir = d_inode(parent);
+               struct dentry *parent;
+               struct inode *dir;
+
+               rcu_read_lock();
+               parent = req->r_dentry->d_parent;
+               dir = req->r_parent ? : d_inode_rcu(parent);
 
-               if (dir->i_sb != mdsc->fsc->sb) {
-                       /* not this fs! */
+               if (!dir || dir->i_sb != mdsc->fsc->sb) {
+                       /*  not this fs or parent went negative */
                        inode = d_inode(req->r_dentry);
+                       if (inode)
+                               ihold(inode);
                } else if (ceph_snap(dir) != CEPH_NOSNAP) {
                        /* direct snapped/virtual snapdir requests
                         * based on parent dir inode */
-                       struct dentry *dn = get_nonsnap_parent(parent);
-                       inode = d_inode(dn);
+                       inode = get_nonsnap_parent(parent);
                        dout("__choose_mds using nonsnap parent %p\n", inode);
                } else {
                        /* dentry target */
                        inode = d_inode(req->r_dentry);
                        if (!inode || mode == USE_AUTH_MDS) {
                                /* dir + name */
-                               inode = dir;
+                               inode = igrab(dir);
                                hash = ceph_dentry_hash(dir, req->r_dentry);
                                is_hash = true;
+                       } else {
+                               ihold(inode);
                        }
                }
+               rcu_read_unlock();
        }
 
        dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
@@ -769,7 +792,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                                     (int)r, frag.ndist);
                                if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
                                    CEPH_MDS_STATE_ACTIVE)
-                                       return mds;
+                                       goto out;
                        }
 
                        /* since this file/dir wasn't known to be
@@ -784,7 +807,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                                     inode, ceph_vinop(inode), frag.frag, mds);
                                if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
                                    CEPH_MDS_STATE_ACTIVE)
-                                       return mds;
+                                       goto out;
                        }
                }
        }
@@ -797,6 +820,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
        if (!cap) {
                spin_unlock(&ci->i_ceph_lock);
+               iput(inode);
                goto random;
        }
        mds = cap->session->s_mds;
@@ -804,6 +828,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
             inode, ceph_vinop(inode), mds,
             cap == ci->i_auth_cap ? "auth " : "", cap);
        spin_unlock(&ci->i_ceph_lock);
+out:
+       iput(inode);
        return mds;
 
 random:
@@ -1036,7 +1062,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
        while (!list_empty(&session->s_unsafe)) {
                req = list_first_entry(&session->s_unsafe,
                                       struct ceph_mds_request, r_unsafe_item);
-               list_del_init(&req->r_unsafe_item);
                pr_warn_ratelimited(" dropping unsafe request %llu\n",
                                    req->r_tid);
                __unregister_request(mdsc, req);
@@ -1146,7 +1171,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 
                if (ci->i_wrbuffer_ref > 0 &&
-                   ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+                   READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
                        invalidate = true;
 
                while (!list_empty(&ci->i_cap_flush_list)) {
@@ -1775,18 +1800,23 @@ retry:
        return path;
 }
 
-static int build_dentry_path(struct dentry *dentry,
+static int build_dentry_path(struct dentry *dentry, struct inode *dir,
                             const char **ppath, int *ppathlen, u64 *pino,
                             int *pfreepath)
 {
        char *path;
 
-       if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
-               *pino = ceph_ino(d_inode(dentry->d_parent));
+       rcu_read_lock();
+       if (!dir)
+               dir = d_inode_rcu(dentry->d_parent);
+       if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
+               *pino = ceph_ino(dir);
+               rcu_read_unlock();
                *ppath = dentry->d_name.name;
                *ppathlen = dentry->d_name.len;
                return 0;
        }
+       rcu_read_unlock();
        path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
        if (IS_ERR(path))
                return PTR_ERR(path);
@@ -1822,8 +1852,8 @@ static int build_inode_path(struct inode *inode,
  * an explicit ino+path.
  */
 static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
-                                 const char *rpath, u64 rino,
-                                 const char **ppath, int *pathlen,
+                                 struct inode *rdiri, const char *rpath,
+                                 u64 rino, const char **ppath, int *pathlen,
                                  u64 *ino, int *freepath)
 {
        int r = 0;
@@ -1833,7 +1863,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
                dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
                     ceph_snap(rinode));
        } else if (rdentry) {
-               r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
+               r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
+                                       freepath);
                dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
                     *ppath);
        } else if (rpath || rino) {
@@ -1866,7 +1897,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
        int ret;
 
        ret = set_request_path_attr(req->r_inode, req->r_dentry,
-                             req->r_path1, req->r_ino1.ino,
+                             req->r_parent, req->r_path1, req->r_ino1.ino,
                              &path1, &pathlen1, &ino1, &freepath1);
        if (ret < 0) {
                msg = ERR_PTR(ret);
@@ -1874,6 +1905,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
        }
 
        ret = set_request_path_attr(NULL, req->r_old_dentry,
+                             req->r_old_dentry_dir,
                              req->r_path2, req->r_ino2.ino,
                              &path2, &pathlen2, &ino2, &freepath2);
        if (ret < 0) {
@@ -1927,10 +1959,13 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
                      mds, req->r_inode_drop, req->r_inode_unless, 0);
        if (req->r_dentry_drop)
                releases += ceph_encode_dentry_release(&p, req->r_dentry,
-                      mds, req->r_dentry_drop, req->r_dentry_unless);
+                               req->r_parent, mds, req->r_dentry_drop,
+                               req->r_dentry_unless);
        if (req->r_old_dentry_drop)
                releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
-                      mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
+                               req->r_old_dentry_dir, mds,
+                               req->r_old_dentry_drop,
+                               req->r_old_dentry_unless);
        if (req->r_old_inode_drop)
                releases += ceph_encode_inode_release(&p,
                      d_inode(req->r_old_dentry),
@@ -2012,7 +2047,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
        dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
             req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
 
-       if (req->r_got_unsafe) {
+       if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
                void *p;
                /*
                 * Replay.  Do not regenerate message (and rebuild
@@ -2061,16 +2096,16 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 
        rhead = msg->front.iov_base;
        rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
-       if (req->r_got_unsafe)
+       if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
                flags |= CEPH_MDS_FLAG_REPLAY;
-       if (req->r_locked_dir)
+       if (req->r_parent)
                flags |= CEPH_MDS_FLAG_WANT_DENTRY;
        rhead->flags = cpu_to_le32(flags);
        rhead->num_fwd = req->r_num_fwd;
        rhead->num_retry = req->r_attempts - 1;
        rhead->ino = 0;
 
-       dout(" r_locked_dir = %p\n", req->r_locked_dir);
+       dout(" r_parent = %p\n", req->r_parent);
        return 0;
 }
 
@@ -2084,8 +2119,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
        int mds = -1;
        int err = 0;
 
-       if (req->r_err || req->r_got_result) {
-               if (req->r_aborted)
+       if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
+               if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
                        __unregister_request(mdsc, req);
                goto out;
        }
@@ -2096,12 +2131,12 @@ static int __do_request(struct ceph_mds_client *mdsc,
                err = -EIO;
                goto finish;
        }
-       if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+       if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
                dout("do_request forced umount\n");
                err = -EIO;
                goto finish;
        }
-       if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
+       if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
                if (mdsc->mdsmap_err) {
                        err = mdsc->mdsmap_err;
                        dout("do_request mdsmap err %d\n", err);
@@ -2215,7 +2250,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
        while (p) {
                req = rb_entry(p, struct ceph_mds_request, r_node);
                p = rb_next(p);
-               if (req->r_got_unsafe)
+               if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
                        continue;
                if (req->r_attempts > 0)
                        continue; /* only new requests */
@@ -2250,11 +2285,11 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 
        dout("do_request on %p\n", req);
 
-       /* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
+       /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
        if (req->r_inode)
                ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
-       if (req->r_locked_dir)
-               ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+       if (req->r_parent)
+               ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
        if (req->r_old_dentry_dir)
                ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
                                  CEPH_CAP_PIN);
@@ -2289,7 +2324,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
        mutex_lock(&mdsc->mutex);
 
        /* only abort if we didn't race with a real reply */
-       if (req->r_got_result) {
+       if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
                err = le32_to_cpu(req->r_reply_info.head->result);
        } else if (err < 0) {
                dout("aborted request %lld with %d\n", req->r_tid, err);
@@ -2301,10 +2336,10 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
                 */
                mutex_lock(&req->r_fill_mutex);
                req->r_err = err;
-               req->r_aborted = true;
+               set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
                mutex_unlock(&req->r_fill_mutex);
 
-               if (req->r_locked_dir &&
+               if (req->r_parent &&
                    (req->r_op & CEPH_MDS_OP_WRITE))
                        ceph_invalidate_dir_request(req);
        } else {
@@ -2323,7 +2358,7 @@ out:
  */
 void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 {
-       struct inode *inode = req->r_locked_dir;
+       struct inode *inode = req->r_parent;
 
        dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
 
@@ -2379,14 +2414,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        }
 
        /* dup? */
-       if ((req->r_got_unsafe && !head->safe) ||
-           (req->r_got_safe && head->safe)) {
+       if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
+           (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
                pr_warn("got a dup %s reply on %llu from mds%d\n",
                           head->safe ? "safe" : "unsafe", tid, mds);
                mutex_unlock(&mdsc->mutex);
                goto out;
        }
-       if (req->r_got_safe) {
+       if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
                pr_warn("got unsafe after safe on %llu from mds%d\n",
                           tid, mds);
                mutex_unlock(&mdsc->mutex);
@@ -2425,10 +2460,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 
        if (head->safe) {
-               req->r_got_safe = true;
+               set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
                __unregister_request(mdsc, req);
 
-               if (req->r_got_unsafe) {
+               if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
                        /*
                         * We already handled the unsafe response, now do the
                         * cleanup.  No need to examine the response; the MDS
@@ -2437,7 +2472,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
                         * useful we could do with a revised return value.
                         */
                        dout("got safe reply %llu, mds%d\n", tid, mds);
-                       list_del_init(&req->r_unsafe_item);
 
                        /* last unsafe request during umount? */
                        if (mdsc->stopping && !__get_oldest_req(mdsc))
@@ -2446,7 +2480,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
                        goto out;
                }
        } else {
-               req->r_got_unsafe = true;
+               set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags);
                list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
                if (req->r_unsafe_dir) {
                        struct ceph_inode_info *ci =
@@ -2486,7 +2520,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        /* insert trace into our cache */
        mutex_lock(&req->r_fill_mutex);
        current->journal_info = req;
-       err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
+       err = ceph_fill_trace(mdsc->fsc->sb, req);
        if (err == 0) {
                if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
                                    req->r_op == CEPH_MDS_OP_LSSNAP))
@@ -2500,7 +2534,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        if (realm)
                ceph_put_snap_realm(mdsc, realm);
 
-       if (err == 0 && req->r_got_unsafe && req->r_target_inode) {
+       if (err == 0 && req->r_target_inode &&
+           test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
                struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
                spin_lock(&ci->i_unsafe_lock);
                list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
@@ -2508,12 +2543,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        }
 out_err:
        mutex_lock(&mdsc->mutex);
-       if (!req->r_aborted) {
+       if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
                if (err) {
                        req->r_err = err;
                } else {
                        req->r_reply =  ceph_msg_get(msg);
-                       req->r_got_result = true;
+                       set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
                }
        } else {
                dout("reply arrived after request %lld was aborted\n", tid);
@@ -2557,7 +2592,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
                goto out;  /* dup reply? */
        }
 
-       if (req->r_aborted) {
+       if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
                dout("forward tid %llu aborted, unregistering\n", tid);
                __unregister_request(mdsc, req);
        } else if (fwd_seq <= req->r_num_fwd) {
@@ -2567,7 +2602,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
                /* resend. forward race not possible; mds would drop */
                dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
                BUG_ON(req->r_err);
-               BUG_ON(req->r_got_result);
+               BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
                req->r_attempts = 0;
                req->r_num_fwd = fwd_seq;
                req->r_resend_mds = next_mds;
@@ -2732,7 +2767,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
        while (p) {
                req = rb_entry(p, struct ceph_mds_request, r_node);
                p = rb_next(p);
-               if (req->r_got_unsafe)
+               if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
                        continue;
                if (req->r_attempts == 0)
                        continue; /* only old requests */
@@ -3556,7 +3591,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 {
        u64 want_tid, want_flush;
 
-       if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+       if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
                return;
 
        dout("sync\n");
@@ -3587,7 +3622,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
  */
 static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
 {
-       if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+       if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
                return true;
        return atomic_read(&mdsc->num_sessions) <= skipped;
 }
index 3c6f77b7bb02107f9edc579fd5dc12ce57c8b7f5..ac0475a2daa749d3d689956cc45a2913c955ca8f 100644 (file)
@@ -202,9 +202,18 @@ struct ceph_mds_request {
        char *r_path1, *r_path2;
        struct ceph_vino r_ino1, r_ino2;
 
-       struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
+       struct inode *r_parent;             /* parent dir inode */
        struct inode *r_target_inode;       /* resulting inode */
 
+#define CEPH_MDS_R_DIRECT_IS_HASH      (1) /* r_direct_hash is valid */
+#define CEPH_MDS_R_ABORTED             (2) /* call was aborted */
+#define CEPH_MDS_R_GOT_UNSAFE          (3) /* got an unsafe reply */
+#define CEPH_MDS_R_GOT_SAFE            (4) /* got a safe reply */
+#define CEPH_MDS_R_GOT_RESULT          (5) /* got a result */
+#define CEPH_MDS_R_DID_PREPOPULATE     (6) /* prepopulated readdir */
+#define CEPH_MDS_R_PARENT_LOCKED       (7) /* is r_parent->i_rwsem wlocked? */
+       unsigned long   r_req_flags;
+
        struct mutex r_fill_mutex;
 
        union ceph_mds_request_args r_args;
@@ -216,7 +225,6 @@ struct ceph_mds_request {
        /* for choosing which mds to send this request to */
        int r_direct_mode;
        u32 r_direct_hash;      /* choose dir frag based on this dentry hash */
-       bool r_direct_is_hash;  /* true if r_direct_hash is valid */
 
        /* data payload is used for xattr ops */
        struct ceph_pagelist *r_pagelist;
@@ -234,7 +242,6 @@ struct ceph_mds_request {
        struct ceph_mds_reply_info_parsed r_reply_info;
        struct page *r_locked_page;
        int r_err;
-       bool r_aborted;
 
        unsigned long r_timeout;  /* optional.  jiffies, 0 is "wait forever" */
        unsigned long r_started;  /* start time to measure timeout against */
@@ -262,9 +269,7 @@ struct ceph_mds_request {
        ceph_mds_request_callback_t r_callback;
        ceph_mds_request_wait_callback_t r_wait_for_completion;
        struct list_head  r_unsafe_item;  /* per-session unsafe list item */
-       bool              r_got_unsafe, r_got_safe, r_got_result;
 
-       bool              r_did_prepopulate;
        long long         r_dir_release_cnt;
        long long         r_dir_ordered_cnt;
        int               r_readdir_cache_idx;
index 6bd20d707bfd885aff2f89a4b7266cc1c05fd5c8..0ec8d0114e57ba80fdc46b1acdc9b7de7373e276 100644 (file)
@@ -757,7 +757,6 @@ static const struct super_operations ceph_super_ops = {
        .destroy_inode  = ceph_destroy_inode,
        .write_inode    = ceph_write_inode,
        .drop_inode     = ceph_drop_inode,
-       .evict_inode    = ceph_evict_inode,
        .sync_fs        = ceph_sync_fs,
        .put_super      = ceph_put_super,
        .show_options   = ceph_show_options,
@@ -952,6 +951,14 @@ static int ceph_register_bdi(struct super_block *sb,
                fsc->backing_dev_info.ra_pages =
                        VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
 
+       if (fsc->mount_options->rsize > fsc->mount_options->rasize &&
+           fsc->mount_options->rsize >= PAGE_SIZE)
+               fsc->backing_dev_info.io_pages =
+                       (fsc->mount_options->rsize + PAGE_SIZE - 1)
+                       >> PAGE_SHIFT;
+       else if (fsc->mount_options->rsize == 0)
+               fsc->backing_dev_info.io_pages = ULONG_MAX;
+
        err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
                           atomic_long_inc_return(&bdi_seq));
        if (!err)
index 3373b61faefd0fac7d240438e5bb2dca7e3433db..e9410bcf41135b72d6a782c9d5dbf1df29bcd911 100644 (file)
@@ -45,8 +45,8 @@
 #define ceph_test_mount_opt(fsc, opt) \
        (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
 
-#define CEPH_RSIZE_DEFAULT             0           /* max read size */
-#define CEPH_RASIZE_DEFAULT            (8192*1024) /* readahead */
+#define CEPH_RSIZE_DEFAULT              (64*1024*1024) /* max read size */
+#define CEPH_RASIZE_DEFAULT             (8192*1024)    /* max readahead */
 #define CEPH_MAX_READDIR_DEFAULT        1024
 #define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
 #define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
@@ -343,7 +343,6 @@ struct ceph_inode_info {
        u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
        u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
 
-       struct list_head i_unsafe_writes; /* uncommitted sync writes */
        struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
        struct list_head i_unsafe_iops;   /* uncommitted mds inode ops */
        spinlock_t i_unsafe_lock;
@@ -602,7 +601,7 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
 }
 
 /* what the mds thinks we want */
-extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
+extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
 
 extern void ceph_caps_init(struct ceph_mds_client *mdsc);
 extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
@@ -753,7 +752,6 @@ extern const struct inode_operations ceph_file_iops;
 extern struct inode *ceph_alloc_inode(struct super_block *sb);
 extern void ceph_destroy_inode(struct inode *inode);
 extern int ceph_drop_inode(struct inode *inode);
-extern void ceph_evict_inode(struct inode *inode);
 
 extern struct inode *ceph_get_inode(struct super_block *sb,
                                    struct ceph_vino vino);
@@ -764,8 +762,7 @@ extern void ceph_fill_file_time(struct inode *inode, int issued,
                                u64 time_warp_seq, struct timespec *ctime,
                                struct timespec *mtime, struct timespec *atime);
 extern int ceph_fill_trace(struct super_block *sb,
-                          struct ceph_mds_request *req,
-                          struct ceph_mds_session *session);
+                          struct ceph_mds_request *req);
 extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                                    struct ceph_mds_session *session);
 
@@ -904,6 +901,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
 extern int ceph_encode_inode_release(void **p, struct inode *inode,
                                     int mds, int drop, int unless, int force);
 extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
+                                     struct inode *dir,
                                      int mds, int drop, int unless);
 
 extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
@@ -933,7 +931,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
                                  char *data, size_t len);
-extern void ceph_sync_write_wait(struct inode *inode);
+
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
 extern const struct file_operations ceph_snapdir_fops;
index 98dc842e724512cb5eb71d5f1d94ccb0afe1222e..aa3debbba82648944a1e1426516abebb268b7ca0 100644 (file)
@@ -3282,7 +3282,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
  * sure that it doesn't change while being written back.
  */
 static int
-cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+cifs_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
 
index 3f1181563fb173a268147f8626a072ac289269fa..7436c98b92c82dc43ad1a80fa5b5e49f26ade500 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -925,12 +925,11 @@ static int dax_insert_mapping(struct address_space *mapping,
 
 /**
  * dax_pfn_mkwrite - handle first write to DAX page
- * @vma: The virtual memory area where the fault occurred
  * @vmf: The description of the fault
  */
-int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int dax_pfn_mkwrite(struct vm_fault *vmf)
 {
-       struct file *file = vma->vm_file;
+       struct file *file = vmf->vma->vm_file;
        struct address_space *mapping = file->f_mapping;
        void *entry, **slot;
        pgoff_t index = vmf->pgoff;
@@ -1119,20 +1118,10 @@ static int dax_fault_return(int error)
        return VM_FAULT_SIGBUS;
 }
 
-/**
- * dax_iomap_fault - handle a page fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @ops: iomap ops passed from the file system
- *
- * When a page fault occurs, filesystems may call this helper in their fault
- * or mkwrite handler for DAX files. Assumes the caller has done all the
- * necessary locking for the page fault to proceed successfully.
- */
-int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-                       const struct iomap_ops *ops)
+static int dax_iomap_pte_fault(struct vm_fault *vmf,
+                              const struct iomap_ops *ops)
 {
-       struct address_space *mapping = vma->vm_file->f_mapping;
+       struct address_space *mapping = vmf->vma->vm_file->f_mapping;
        struct inode *inode = mapping->host;
        unsigned long vaddr = vmf->address;
        loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
@@ -1205,11 +1194,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
        case IOMAP_MAPPED:
                if (iomap.flags & IOMAP_F_NEW) {
                        count_vm_event(PGMAJFAULT);
-                       mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+                       mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
                        major = VM_FAULT_MAJOR;
                }
                error = dax_insert_mapping(mapping, iomap.bdev, sector,
-                               PAGE_SIZE, &entry, vma, vmf);
+                               PAGE_SIZE, &entry, vmf->vma, vmf);
                /* -EBUSY is fine, somebody else faulted on the same PTE */
                if (error == -EBUSY)
                        error = 0;
@@ -1247,7 +1236,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
        }
        return vmf_ret;
 }
-EXPORT_SYMBOL_GPL(dax_iomap_fault);
 
 #ifdef CONFIG_FS_DAX_PMD
 /*
@@ -1338,7 +1326,8 @@ fallback:
        return VM_FAULT_FALLBACK;
 }
 
-int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops)
+static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+                              const struct iomap_ops *ops)
 {
        struct vm_area_struct *vma = vmf->vma;
        struct address_space *mapping = vma->vm_file->f_mapping;
@@ -1446,5 +1435,34 @@ out:
        trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
        return result;
 }
-EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
+#else
+static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+                              const struct iomap_ops *ops)
+{
+       return VM_FAULT_FALLBACK;
+}
 #endif /* CONFIG_FS_DAX_PMD */
+
+/**
+ * dax_iomap_fault - handle a page fault on a DAX file
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in
+ * their fault handler for DAX files. dax_iomap_fault() assumes the caller
+ * has done all the necessary locking for page fault to proceed
+ * successfully.
+ */
+int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+                   const struct iomap_ops *ops)
+{
+       switch (pe_size) {
+       case PE_SIZE_PTE:
+               return dax_iomap_pte_fault(vmf, ops);
+       case PE_SIZE_PMD:
+               return dax_iomap_pmd_fault(vmf, ops);
+       default:
+               return VM_FAULT_FALLBACK;
+       }
+}
+EXPORT_SYMBOL_GPL(dax_iomap_fault);
index c87bae4376b848f4204c06016eeb223f675dc7db..a04ebea77de89b4a9bd74cd99f7a1740db7bbb4a 100644 (file)
@@ -587,7 +587,7 @@ static int dio_set_defer_completion(struct dio *dio)
 /*
  * Call into the fs to map some more disk blocks.  We record the current number
  * of available blocks at sdio->blocks_available.  These are in units of the
- * fs blocksize, (1 << inode->i_blkbits).
+ * fs blocksize, i_blocksize(inode).
  *
  * The fs is allowed to map lots of blocks at once.  If it wants to do that,
  * it uses the passed inode-relative block number as the file offset, as usual.
index 866bb18efefea9953250ba1cbdc145e7d4be49af..e00d45af84eab54cbbc60a21ece9b840851bc194 100644 (file)
@@ -123,7 +123,7 @@ void ecryptfs_destroy_kthread(void)
  * @lower_dentry: Lower dentry for file to open
  * @lower_mnt: Lower vfsmount for file to open
  *
- * This function gets a r/w file opened againt the lower dentry.
+ * This function gets a r/w file opened against the lower dentry.
  *
  * Returns zero on success; non-zero otherwise
  */
index bcb68fcc844515c67e7efc83710dabdce361f8b9..5ec16313da1a1cc59f462038f331f50c35f8b89d 100644 (file)
@@ -1895,7 +1895,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
         * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
         * Also, we do not currently supported nested exclusive wakeups.
         */
-       if (epds.events & EPOLLEXCLUSIVE) {
+       if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) {
                if (op == EPOLL_CTL_MOD)
                        goto error_tgt_fput;
                if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
index b0f241528a30a5e631e134556c1e9db9e3772c41..b21891a6bfca6611f9ad89a412594c2e92d695cc 100644 (file)
@@ -87,19 +87,19 @@ out_unlock:
  * The default page_lock and i_size verification done by non-DAX fault paths
  * is sufficient because ext2 doesn't support hole punching.
  */
-static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ext2_dax_fault(struct vm_fault *vmf)
 {
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct ext2_inode_info *ei = EXT2_I(inode);
        int ret;
 
        if (vmf->flags & FAULT_FLAG_WRITE) {
                sb_start_pagefault(inode->i_sb);
-               file_update_time(vma->vm_file);
+               file_update_time(vmf->vma->vm_file);
        }
        down_read(&ei->dax_sem);
 
-       ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
+       ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &ext2_iomap_ops);
 
        up_read(&ei->dax_sem);
        if (vmf->flags & FAULT_FLAG_WRITE)
@@ -107,16 +107,15 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return ret;
 }
 
-static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
-               struct vm_fault *vmf)
+static int ext2_dax_pfn_mkwrite(struct vm_fault *vmf)
 {
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct ext2_inode_info *ei = EXT2_I(inode);
        loff_t size;
        int ret;
 
        sb_start_pagefault(inode->i_sb);
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
        down_read(&ei->dax_sem);
 
        /* check that the faulting page hasn't raced with truncate */
@@ -124,7 +123,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
        else
-               ret = dax_pfn_mkwrite(vma, vmf);
+               ret = dax_pfn_mkwrite(vmf);
 
        up_read(&ei->dax_sem);
        sb_end_pagefault(inode->i_sb);
@@ -134,7 +133,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 static const struct vm_operations_struct ext2_dax_vm_ops = {
        .fault          = ext2_dax_fault,
        /*
-        * .pmd_fault is not supported for DAX because allocation in ext2
+        * .huge_fault is not supported for DAX because allocation in ext2
         * cannot be reliably aligned to huge page sizes and so pmd faults
         * will always fail and fail back to regular faults.
         */
index cee23b684f4784e5ed067f7bbd4e5fb7d3c57384..2fd17e8e498416a360ee04bc9a612df0096c5a42 100644 (file)
@@ -2483,8 +2483,8 @@ extern int ext4_writepage_trans_blocks(struct inode *);
 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
                             loff_t lstart, loff_t lend);
-extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
-extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_page_mkwrite(struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
 extern void ext4_da_update_reserve_space(struct inode *inode,
index 37e059202cd2fa333dff053b327d8faf85f9d4ac..e7f12a204cbcaee34d75b4ae1675127ff68c6d5c 100644 (file)
@@ -84,7 +84,7 @@
  *   --        writeout
  *     Writeout looks up whole page cache to see if a buffer is
  *     mapped, If there are not very many delayed buffers, then it is
- *     time comsuming.
+ *     time consuming.
  *
  * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
  * bigalloc and writeout can figure out if a block or a range of
index 13021a054fc080e4a75de2ecec11daa139cd7443..8210c1f43556f4358e9b602a93d158e5c0780c44 100644 (file)
@@ -253,19 +253,20 @@ out:
 }
 
 #ifdef CONFIG_FS_DAX
-static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ext4_dax_huge_fault(struct vm_fault *vmf,
+               enum page_entry_size pe_size)
 {
        int result;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct super_block *sb = inode->i_sb;
        bool write = vmf->flags & FAULT_FLAG_WRITE;
 
        if (write) {
                sb_start_pagefault(sb);
-               file_update_time(vma->vm_file);
+               file_update_time(vmf->vma->vm_file);
        }
        down_read(&EXT4_I(inode)->i_mmap_sem);
-       result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
+       result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
        up_read(&EXT4_I(inode)->i_mmap_sem);
        if (write)
                sb_end_pagefault(sb);
@@ -273,25 +274,9 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return result;
 }
 
-static int
-ext4_dax_pmd_fault(struct vm_fault *vmf)
+static int ext4_dax_fault(struct vm_fault *vmf)
 {
-       int result;
-       struct inode *inode = file_inode(vmf->vma->vm_file);
-       struct super_block *sb = inode->i_sb;
-       bool write = vmf->flags & FAULT_FLAG_WRITE;
-
-       if (write) {
-               sb_start_pagefault(sb);
-               file_update_time(vmf->vma->vm_file);
-       }
-       down_read(&EXT4_I(inode)->i_mmap_sem);
-       result = dax_iomap_pmd_fault(vmf, &ext4_iomap_ops);
-       up_read(&EXT4_I(inode)->i_mmap_sem);
-       if (write)
-               sb_end_pagefault(sb);
-
-       return result;
+       return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
 }
 
 /*
@@ -303,22 +288,21 @@ ext4_dax_pmd_fault(struct vm_fault *vmf)
  * wp_pfn_shared() fails. Thus fault gets retried and things work out as
  * desired.
  */
-static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
-                               struct vm_fault *vmf)
+static int ext4_dax_pfn_mkwrite(struct vm_fault *vmf)
 {
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct super_block *sb = inode->i_sb;
        loff_t size;
        int ret;
 
        sb_start_pagefault(sb);
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
        down_read(&EXT4_I(inode)->i_mmap_sem);
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
        else
-               ret = dax_pfn_mkwrite(vma, vmf);
+               ret = dax_pfn_mkwrite(vmf);
        up_read(&EXT4_I(inode)->i_mmap_sem);
        sb_end_pagefault(sb);
 
@@ -327,7 +311,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
        .fault          = ext4_dax_fault,
-       .pmd_fault      = ext4_dax_pmd_fault,
+       .huge_fault     = ext4_dax_huge_fault,
        .page_mkwrite   = ext4_dax_fault,
        .pfn_mkwrite    = ext4_dax_pfn_mkwrite,
 };
index 75212a6e69f8e7ceb05e1ac38ab46f9d71e0f7ce..971f663420803224b91febde1c6ce317d42fb429 100644 (file)
@@ -2221,7 +2221,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
 {
        struct inode *inode = mpd->inode;
        int err;
-       ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
+       ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
                                                        >> inode->i_blkbits;
 
        do {
@@ -3577,7 +3577,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
        if (overwrite)
                get_block_func = ext4_dio_get_block_overwrite;
        else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
-                  round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
+                  round_down(offset, i_blocksize(inode)) >= inode->i_size) {
                get_block_func = ext4_dio_get_block;
                dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
        } else if (is_sync_kiocb(iocb)) {
@@ -5179,7 +5179,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
         * do. We do the check mainly to optimize the common PAGE_SIZE ==
         * blocksize case
         */
-       if (offset > PAGE_SIZE - (1 << inode->i_blkbits))
+       if (offset > PAGE_SIZE - i_blocksize(inode))
                return;
        while (1) {
                page = find_lock_page(inode->i_mapping,
@@ -5821,8 +5821,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
        return !buffer_mapped(bh);
 }
 
-int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int ext4_page_mkwrite(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct page *page = vmf->page;
        loff_t size;
        unsigned long len;
@@ -5912,13 +5913,13 @@ out:
        return ret;
 }
 
-int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int ext4_filemap_fault(struct vm_fault *vmf)
 {
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        int err;
 
        down_read(&EXT4_I(inode)->i_mmap_sem);
-       err = filemap_fault(vma, vmf);
+       err = filemap_fault(vmf);
        up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return err;
index 10c62de642c6f9e24d82e6017f65e436683cb7ad..354dc1a894c29bb86dcafcd1b2a5d8a53ca7119d 100644 (file)
@@ -838,7 +838,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
        inode = page->mapping->host;
        sb = inode->i_sb;
        ngroups = ext4_get_groups_count(sb);
-       blocksize = 1 << inode->i_blkbits;
+       blocksize = i_blocksize(inode);
        blocks_per_page = PAGE_SIZE / blocksize;
 
        groups_per_page = blocks_per_page >> 1;
index 6fc14def0c707231ba6430dfb851307a7a13c3eb..578f8c33fb44ad34062e978277f5def1d8aeebe1 100644 (file)
@@ -187,7 +187,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
        if (PageUptodate(page))
                return 0;
 
-       blocksize = 1 << inode->i_blkbits;
+       blocksize = i_blocksize(inode);
        if (!page_has_buffers(page))
                create_empty_buffers(page, blocksize, 0);
 
index 49f10dce817dc9e4806b6a417b96391a1c794fd1..1edc86e874e37054a10d367b0a44c1f80b7bebfd 100644 (file)
 #include "trace.h"
 #include <trace/events/f2fs.h>
 
-static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
-                                               struct vm_fault *vmf)
+static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct dnode_of_data dn;
        int err;
@@ -58,7 +57,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
 
        f2fs_balance_fs(sbi, dn.node_changed);
 
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
        lock_page(page);
        if (unlikely(page->mapping != inode->i_mapping ||
                        page_offset(page) > i_size_read(inode) ||
index 2401c5dabb2a227b6511be01b1589ffa5180e636..e80bfd06daf5fc760a16a4a4315a791a1bc347e5 100644 (file)
@@ -2043,12 +2043,12 @@ static void fuse_vma_close(struct vm_area_struct *vma)
  * - sync(2)
  * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
  */
-static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int fuse_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
 
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
        lock_page(page);
        if (page->mapping != inode->i_mapping) {
                unlock_page(page);
index 016c11eaca7cdd687bbbf88fb8d979f4eddc6010..6fe2a59c6a9a5e8ba14e7ff4f4fefd729686bf58 100644 (file)
@@ -379,10 +379,10 @@ static int gfs2_allocate_page_backing(struct page *page)
  * blocks allocated on disk to back that page.
  */
 
-static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int gfs2_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_alloc_parms ap = { .aflags = 0, };
@@ -399,7 +399,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        if (ret)
                goto out;
 
-       gfs2_size_hint(vma->vm_file, pos, PAGE_SIZE);
+       gfs2_size_hint(vmf->vma->vm_file, pos, PAGE_SIZE);
 
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        ret = gfs2_glock_nq(&gh);
@@ -407,7 +407,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                goto out_uninit;
 
        /* Update file times before taking page lock */
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
 
        set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
        set_bit(GIF_SW_PAGED, &ip->i_flags);
index a3ec3ae7d34796877c2cfc7d08c631128569ca93..482081bcdf70c86da21f22ca1c443373b7327180 100644 (file)
@@ -38,7 +38,7 @@ static int hfs_get_last_session(struct super_block *sb,
 
        /* default values */
        *start = 0;
-       *size = sb->s_bdev->bd_inode->i_size >> 9;
+       *size = i_size_read(sb->s_bdev->bd_inode) >> 9;
 
        if (HFS_SB(sb)->session >= 0) {
                te.cdte_track = HFS_SB(sb)->session;
index ebb85e5f65499f3098fdecdc76333b40d7ad3d6b..e254fa0f069710aae74e88f41484de4d39ef7d0a 100644 (file)
@@ -132,7 +132,7 @@ static int hfsplus_get_last_session(struct super_block *sb,
 
        /* default values */
        *start = 0;
-       *size = sb->s_bdev->bd_inode->i_size >> 9;
+       *size = i_size_read(sb->s_bdev->bd_inode) >> 9;
 
        if (HFSPLUS_SB(sb)->session >= 0) {
                te.cdte_track = HFSPLUS_SB(sb)->session;
index d89f70bbb952f0d9b72a54f02a5d76c76f0a8888..0f85f24106054ace2f2d3133c0e649424b90a27f 100644 (file)
@@ -420,8 +420,8 @@ int
 iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
                const struct iomap_ops *ops)
 {
-       unsigned blocksize = (1 << inode->i_blkbits);
-       unsigned off = pos & (blocksize - 1);
+       unsigned int blocksize = i_blocksize(inode);
+       unsigned int off = pos & (blocksize - 1);
 
        /* Block boundary? Nothing to do */
        if (!off)
@@ -445,11 +445,10 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
        return length;
 }
 
-int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-               const struct iomap_ops *ops)
+int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 {
        struct page *page = vmf->page;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        unsigned long length;
        loff_t offset, size;
        ssize_t ret;
@@ -736,9 +735,9 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
                void *data, struct iomap *iomap)
 {
        struct iomap_dio *dio = data;
-       unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
-       unsigned fs_block_size = (1 << inode->i_blkbits), pad;
-       unsigned align = iov_iter_alignment(dio->submit.iter);
+       unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
+       unsigned int fs_block_size = i_blocksize(inode), pad;
+       unsigned int align = iov_iter_alignment(dio->submit.iter);
        struct iov_iter iter;
        struct bio *bio;
        bool need_zeroout = false;
index 2be7c9ce6663ad8614737878225b384ac1a62d16..c64c2574a0aad7eecf4922a6a005341c26b3fa24 100644 (file)
@@ -758,7 +758,7 @@ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
                                sb->s_blocksize - offset : toread;
 
                tmp_bh.b_state = 0;
-               tmp_bh.b_size = 1 << inode->i_blkbits;
+               tmp_bh.b_size = i_blocksize(inode);
                err = jfs_get_block(inode, blk, &tmp_bh, 0);
                if (err)
                        return err;
@@ -798,7 +798,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
                                sb->s_blocksize - offset : towrite;
 
                tmp_bh.b_state = 0;
-               tmp_bh.b_size = 1 << inode->i_blkbits;
+               tmp_bh.b_size = i_blocksize(inode);
                err = jfs_get_block(inode, blk, &tmp_bh, 1);
                if (err)
                        goto out;
index 439b946c48080f1ffad11427c8f4c3d68b2e9fa6..db5900aaa55a47c7d804ac8a51072a577298f517 100644 (file)
@@ -478,7 +478,7 @@ static void kernfs_drain(struct kernfs_node *kn)
                rwsem_release(&kn->dep_map, 1, _RET_IP_);
        }
 
-       kernfs_unmap_bin_file(kn);
+       kernfs_drain_open_files(kn);
 
        mutex_lock(&kernfs_mutex);
 }
index 78219d5644e90aacaf3aeb9fdfe2a234f4e31b84..35043a8c452905173487e10dc37f9d749cd1e9ab 100644 (file)
@@ -348,9 +348,9 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
        kernfs_put_active(of->kn);
 }
 
-static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kernfs_vma_fault(struct vm_fault *vmf)
 {
-       struct file *file = vma->vm_file;
+       struct file *file = vmf->vma->vm_file;
        struct kernfs_open_file *of = kernfs_of(file);
        int ret;
 
@@ -362,16 +362,15 @@ static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        ret = VM_FAULT_SIGBUS;
        if (of->vm_ops->fault)
-               ret = of->vm_ops->fault(vma, vmf);
+               ret = of->vm_ops->fault(vmf);
 
        kernfs_put_active(of->kn);
        return ret;
 }
 
-static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
-                                  struct vm_fault *vmf)
+static int kernfs_vma_page_mkwrite(struct vm_fault *vmf)
 {
-       struct file *file = vma->vm_file;
+       struct file *file = vmf->vma->vm_file;
        struct kernfs_open_file *of = kernfs_of(file);
        int ret;
 
@@ -383,7 +382,7 @@ static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
 
        ret = 0;
        if (of->vm_ops->page_mkwrite)
-               ret = of->vm_ops->page_mkwrite(vma, vmf);
+               ret = of->vm_ops->page_mkwrite(vmf);
        else
                file_update_time(file);
 
@@ -516,7 +515,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
                goto out_put;
 
        rc = 0;
-       of->mmapped = 1;
+       of->mmapped = true;
        of->vm_ops = vma->vm_ops;
        vma->vm_ops = &kernfs_vm_ops;
 out_put:
@@ -708,7 +707,8 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
        if (error)
                goto err_free;
 
-       ((struct seq_file *)file->private_data)->private = of;
+       of->seq_file = file->private_data;
+       of->seq_file->private = of;
 
        /* seq_file clears PWRITE unconditionally, restore it if WRITE */
        if (file->f_mode & FMODE_WRITE)
@@ -717,13 +717,22 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
        /* make sure we have open node struct */
        error = kernfs_get_open_node(kn, of);
        if (error)
-               goto err_close;
+               goto err_seq_release;
+
+       if (ops->open) {
+               /* nobody has access to @of yet, skip @of->mutex */
+               error = ops->open(of);
+               if (error)
+                       goto err_put_node;
+       }
 
        /* open succeeded, put active references */
        kernfs_put_active(kn);
        return 0;
 
-err_close:
+err_put_node:
+       kernfs_put_open_node(kn, of);
+err_seq_release:
        seq_release(inode, file);
 err_free:
        kfree(of->prealloc_buf);
@@ -733,11 +742,41 @@ err_out:
        return error;
 }
 
+/* used from release/drain to ensure that ->release() is called exactly once */
+static void kernfs_release_file(struct kernfs_node *kn,
+                               struct kernfs_open_file *of)
+{
+       /*
+        * @of is guaranteed to have no other file operations in flight and
+        * we just want to synchronize release and drain paths.
+        * @kernfs_open_file_mutex is enough.  @of->mutex can't be used
+        * here because drain path may be called from places which can
+        * cause circular dependency.
+        */
+       lockdep_assert_held(&kernfs_open_file_mutex);
+
+       if (!of->released) {
+               /*
+                * A file is never detached without being released and we
+                * need to be able to release files which are deactivated
+                * and being drained.  Don't use kernfs_ops().
+                */
+               kn->attr.ops->release(of);
+               of->released = true;
+       }
+}
+
 static int kernfs_fop_release(struct inode *inode, struct file *filp)
 {
        struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
        struct kernfs_open_file *of = kernfs_of(filp);
 
+       if (kn->flags & KERNFS_HAS_RELEASE) {
+               mutex_lock(&kernfs_open_file_mutex);
+               kernfs_release_file(kn, of);
+               mutex_unlock(&kernfs_open_file_mutex);
+       }
+
        kernfs_put_open_node(kn, of);
        seq_release(inode, filp);
        kfree(of->prealloc_buf);
@@ -746,12 +785,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
        return 0;
 }
 
-void kernfs_unmap_bin_file(struct kernfs_node *kn)
+void kernfs_drain_open_files(struct kernfs_node *kn)
 {
        struct kernfs_open_node *on;
        struct kernfs_open_file *of;
 
-       if (!(kn->flags & KERNFS_HAS_MMAP))
+       if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
                return;
 
        spin_lock_irq(&kernfs_open_node_lock);
@@ -763,10 +802,16 @@ void kernfs_unmap_bin_file(struct kernfs_node *kn)
                return;
 
        mutex_lock(&kernfs_open_file_mutex);
+
        list_for_each_entry(of, &on->files, list) {
                struct inode *inode = file_inode(of->file);
-               unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+               if (kn->flags & KERNFS_HAS_MMAP)
+                       unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+               kernfs_release_file(kn, of);
        }
+
        mutex_unlock(&kernfs_open_file_mutex);
 
        kernfs_put_open_node(kn, NULL);
@@ -965,6 +1010,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
                kn->flags |= KERNFS_HAS_SEQ_SHOW;
        if (ops->mmap)
                kn->flags |= KERNFS_HAS_MMAP;
+       if (ops->release)
+               kn->flags |= KERNFS_HAS_RELEASE;
 
        rc = kernfs_add_one(kn);
        if (rc) {
index bfd551bbf2312c10e71099dc4a49d076d36d2e28..3100987cf8baf7e923f788ca9d41350ca47de77d 100644 (file)
@@ -104,7 +104,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
  */
 extern const struct file_operations kernfs_file_fops;
 
-void kernfs_unmap_bin_file(struct kernfs_node *kn);
+void kernfs_drain_open_files(struct kernfs_node *kn);
 
 /*
  * symlink.c
index 1c13dd80744ff99cc0691476c3a2920eca9757cc..7e4ea3b9f4724f2b62f2aa7fe5d89844d07812cd 100644 (file)
@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
                dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
                sin6.sin6_family = AF_INET6;
                sin6.sin6_addr = ifa->addr;
+               if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+                       sin6.sin6_scope_id = ifa->idev->dev->ifindex;
                svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
                        (struct sockaddr *)&sin6);
        }
index 28af984a3d96f11f8848d0c5e04911773a3cc0f8..baff8f820c290e6256c274056171eee1f18c1cf7 100644 (file)
@@ -115,7 +115,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
                        SetPageUptodate(page);    
                        return;
                }
-               create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+               create_empty_buffers(page, i_blocksize(inode), 0);
        }
        head = page_buffers(page);
        page_bh = head;
index 39f57bef85318e343857e23452f60c478b804005..0c3905e0542e8902f5d988a0fe89491a378cbe51 100644 (file)
  * XXX: how are we excluding truncate/invalidate here? Maybe need to lock
  * page?
  */
-static int ncp_file_mmap_fault(struct vm_area_struct *area,
-                                       struct vm_fault *vmf)
+static int ncp_file_mmap_fault(struct vm_fault *vmf)
 {
-       struct inode *inode = file_inode(area->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        char *pg_addr;
        unsigned int already_read;
        unsigned int count;
@@ -90,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
         * -- nyc
         */
        count_vm_event(PGMAJFAULT);
-       mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT);
+       mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
        return VM_FAULT_MAJOR;
 }
 
index f32f272ee501183af0d281d3143449734f4b4452..97b111d79489c55e535c017f71379c6b3642f368 100644 (file)
@@ -525,7 +525,7 @@ static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len)
                return result;
        }
        if (result > len) {
-               pr_err("tcp: bug in recvmsg (%u > %Zu)\n", result, len);
+               pr_err("tcp: bug in recvmsg (%u > %zu)\n", result, len);
                return -EIO;                    
        }
        return result;
@@ -619,7 +619,7 @@ skipdata:;
                                        goto skipdata2;
                                }
                                if (datalen > req->datalen + 8) {
-                                       pr_err("tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8);
+                                       pr_err("tcp: Unexpected reply len %d (expected at most %zd)\n", datalen, req->datalen + 8);
                                        server->rcv.state = 3;
                                        goto skipdata;
                                }
index 2905479f214a4654223ec90ae13e21138ac2be4d..0ca370d23ddb2a771b9172b260b010da2969d81d 100644 (file)
@@ -381,7 +381,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
        struct blk_plug plug;
        int i;
 
-       dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
+       dprintk("%s enter, %zu@%lld\n", __func__, count, offset);
 
        /* At this point, header->page_aray is a (sequential) list of nfs_pages.
         * We want to write each, and if there is an error set pnfs_error
index eb094c6011d85bb7ce7bd544a3d203defd50b03a..fd0284c1dc328b92520aa0c39b4ca2a4b9899915 100644 (file)
@@ -1083,7 +1083,8 @@ struct svc_version nfs4_callback_version1 = {
        .vs_proc = nfs4_callback_procedures1,
        .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
        .vs_dispatch = NULL,
-       .vs_hidden = 1,
+       .vs_hidden = true,
+       .vs_need_cong_ctrl = true,
 };
 
 struct svc_version nfs4_callback_version4 = {
@@ -1092,5 +1093,6 @@ struct svc_version nfs4_callback_version4 = {
        .vs_proc = nfs4_callback_procedures1,
        .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
        .vs_dispatch = NULL,
-       .vs_hidden = 1,
+       .vs_hidden = true,
+       .vs_need_cong_ctrl = true,
 };
index 26dbe8b0c10dbf00c0aca1f800c1f99eb87c5cc7..668213984d68708c3d54cccce36ad3a0c048657e 100644 (file)
@@ -528,10 +528,10 @@ const struct address_space_operations nfs_file_aops = {
  * writable, implying that someone is about to modify the page through a
  * shared-writable mapping
  */
-static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int nfs_vm_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct file *filp = vma->vm_file;
+       struct file *filp = vmf->vma->vm_file;
        struct inode *inode = file_inode(filp);
        unsigned pagelen;
        int ret = VM_FAULT_NOPAGE;
index a3fc48ba4931d7a25c56077ac114ef279eaa4f3a..18f98e08544db97757a7701838996f368c27555d 100644 (file)
@@ -482,7 +482,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
        u32 j, idx;
        struct nfs_fh *fh;
 
-       dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
+       dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
                __func__, hdr->inode->i_ino,
                hdr->args.pgbase, (size_t)hdr->args.count, offset);
 
@@ -540,7 +540,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
        if (IS_ERR(ds_clnt))
                return PNFS_NOT_ATTEMPTED;
 
-       dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
+       dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
                __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
                offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
index 0ca4af8cca5d89f9d0ecb4d164c5164a62585b4f..d6acc688df7ed74334ae823af65a24c73f6cb4ff 100644 (file)
@@ -1751,7 +1751,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
        int vers;
        struct nfs_fh *fh;
 
-       dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
+       dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
                __func__, hdr->inode->i_ino,
                hdr->args.pgbase, (size_t)hdr->args.count, offset);
 
@@ -1828,7 +1828,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 
        vers = nfs4_ff_layout_ds_version(lseg, idx);
 
-       dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n",
+       dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
                __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
                offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count),
                vers);
index 2a4cdce939a05c3049933aa58c9f1356a921fe8c..8f3d2acb81c3d816f7bb12f78fa6c50fb8a20528 100644 (file)
@@ -291,7 +291,7 @@ objlayout_read_pagelist(struct nfs_pgio_header *hdr)
                              &hdr->args.pgbase,
                              hdr->args.offset, hdr->args.count);
 
-       dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
+       dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n",
                __func__, inode->i_ino, offset, count, hdr->res.eof);
 
        err = objio_read_pagelist(hdr);
index a06115e3161244f37f549b3d83629a620d0a2019..92b4b41d19d2a2e23b469ce993e3a9f159f5f578 100644 (file)
@@ -24,7 +24,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
 {
        struct nfsd4_layout_seg *seg = &args->lg_seg;
        struct super_block *sb = inode->i_sb;
-       u32 block_size = (1 << inode->i_blkbits);
+       u32 block_size = i_blocksize(inode);
        struct pnfs_block_extent *bex;
        struct iomap iomap;
        u32 device_generation = 0;
@@ -181,7 +181,7 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
        int nr_iomaps;
 
        nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
-                       lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+                       lcp->lc_up_len, &iomaps, i_blocksize(inode));
        if (nr_iomaps < 0)
                return nfserrno(nr_iomaps);
 
@@ -375,7 +375,7 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
        int nr_iomaps;
 
        nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
-                       lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+                       lcp->lc_up_len, &iomaps, i_blocksize(inode));
        if (nr_iomaps < 0)
                return nfserrno(nr_iomaps);
 
index 43e109cc0ccc39e8293a7c8926bcb1c105951714..e71f11b1a180c4c0ff0d3ea30d21b568e3c11511 100644 (file)
@@ -1102,6 +1102,7 @@ static struct flags {
        { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
        { NFSEXP_V4ROOT, {"v4root", ""}},
        { NFSEXP_PNFS, {"pnfs", ""}},
+       { NFSEXP_SECURITY_LABEL, {"security_label", ""}},
        { 0, {"", ""}}
 };
 
index d08cd88155c75278c4607f49c078622bf87ab5ee..838f90f3f890a00f0f0989e5c3abb79e20b273d0 100644 (file)
@@ -376,5 +376,4 @@ struct svc_version  nfsd_acl_version2 = {
                .vs_proc        = nfsd_acl_procedures2,
                .vs_dispatch    = nfsd_dispatch,
                .vs_xdrsize     = NFS3_SVC_XDRSIZE,
-               .vs_hidden      = 0,
 };
index 0c890347cde3d9559b0b0103c2c2c11825d51fae..dcb5f79076c0cb3cb12400575cb7e3d3cfa9e26d 100644 (file)
@@ -266,6 +266,5 @@ struct svc_version  nfsd_acl_version3 = {
                .vs_proc        = nfsd_acl_procedures3,
                .vs_dispatch    = nfsd_dispatch,
                .vs_xdrsize     = NFS3_SVC_XDRSIZE,
-               .vs_hidden      = 0,
 };
 
index d818e4ffd79f9acd01c5f08384cd99a1bfad7243..045c9081eabeb0242a0f60d49ec9177dc9c0c6f4 100644 (file)
@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
 
        fh_copy(&resp->fh, &argp->fh);
        resp->committed = argp->stable;
-       nfserr = nfsd_write(rqstp, &resp->fh, NULL,
-                                  argp->offset,
-                                  rqstp->rq_vec, argp->vlen,
-                                  &cnt,
-                                  &resp->committed);
+       nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
+                               rqstp->rq_vec, argp->vlen,
+                               &cnt, resp->committed);
        resp->count = cnt;
        RETURN_STATUS(nfserr);
 }
index eb78109d666c1a4d8cc62fea22919f824ae93024..0274db6e65d0d6775d0b6c9c9e72e2f0c6a5fa57 100644 (file)
@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
        p = xdr_inline_decode(xdr, length + 4);
        if (unlikely(p == NULL))
                goto out_overflow;
+       p += XDR_QUADLEN(length);
        hdr->nops = be32_to_cpup(p);
        return 0;
 out_overflow:
@@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
                                    struct nfsd4_callback *cb)
 {
        struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
-       struct nfs4_sessionid id;
-       int status;
+       int status = -ESERVERFAULT;
        __be32 *p;
        u32 dummy;
 
-       status = -ESERVERFAULT;
-
        /*
         * If the server returns different values for sessionID, slotID or
         * sequence number, the server is looney tunes.
@@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
        p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
        if (unlikely(p == NULL))
                goto out_overflow;
-       memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
-       if (memcmp(id.data, session->se_sessionid.data,
-                                       NFS4_MAX_SESSIONID_LEN) != 0) {
+
+       if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
                dprintk("NFS: %s Invalid session id\n", __func__);
                goto out;
        }
@@ -753,6 +750,14 @@ int set_callback_cred(void)
        return 0;
 }
 
+void cleanup_callback_cred(void)
+{
+       if (callback_cred) {
+               put_rpccred(callback_cred);
+               callback_cred = NULL;
+       }
+}
+
 static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
 {
        if (clp->cl_minorversion == 0) {
index 5b20577dcdd233162d8030003758274d7619d038..6b9b6cca469f427fed55ec5d892141e38be23eb4 100644 (file)
@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
 {
        __be32 status;
        u32 id = -1;
+
+       if (name == NULL || namelen == 0)
+               return nfserr_inval;
+
        status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
        *uid = make_kuid(&init_user_ns, id);
        if (!uid_valid(*uid))
@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
 {
        __be32 status;
        u32 id = -1;
+
+       if (name == NULL || namelen == 0)
+               return nfserr_inval;
+
        status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
        *gid = make_kgid(&init_user_ns, id);
        if (!gid_valid(*gid))
index 74a6e573e061afa73fba49d8c65a09b7d470229d..cbeeda1e94a2fbbba61e2adeeb4f9ba89287eaf9 100644 (file)
@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                   u32 *bmval, u32 *writable)
 {
        struct dentry *dentry = cstate->current_fh.fh_dentry;
+       struct svc_export *exp = cstate->current_fh.fh_export;
 
        if (!nfsd_attrs_supported(cstate->minorversion, bmval))
                return nfserr_attrnotsupp;
        if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
                return nfserr_attrnotsupp;
+       if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
+                       !(exp->ex_flags & NFSEXP_SECURITY_LABEL))
+               return nfserr_attrnotsupp;
        if (writable && !bmval_is_subset(bmval, writable))
                return nfserr_inval;
        if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
        status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
                                write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
-                               &write->wr_how_written);
+                               write->wr_how_written);
        fput(filp);
 
        write->wr_bytes_written = cnt;
@@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd
        return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
 }
 
+static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       /* ac_supported, ac_resp_access */
+       return (op_encode_hdr_size + 2)* sizeof(__be32);
+}
+
 static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
@@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
        return ret;
 }
 
+static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
+}
+
 static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
                XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
+}
+
 static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
                + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
+               * sizeof(__be32);
+}
+
 static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
+               (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
+}
+
 static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
@@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 }
 
 #ifdef CONFIG_NFSD_PNFS
+static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       u32 maxcount = 0, rlen = 0;
+
+       maxcount = svc_max_payload(rqstp);
+       rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
+
+       return (op_encode_hdr_size +
+               1 /* gd_layout_type*/ +
+               XDR_QUADLEN(rlen) +
+               2 /* gd_notify_types */) * sizeof(__be32);
+}
+
 /*
  * At this stage we don't really know what layout driver will handle the request,
  * so we need to define an arbitrary upper bound here.
@@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
 }
 #endif /* CONFIG_NFSD_PNFS */
 
+
+static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + 3) * sizeof(__be32);
+}
+
 static struct nfsd4_operation nfsd4_ops[] = {
        [OP_ACCESS] = {
                .op_func = (nfsd4op_func)nfsd4_access,
                .op_name = "OP_ACCESS",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
        },
        [OP_CLOSE] = {
                .op_func = (nfsd4op_func)nfsd4_close,
@@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_GETFH] = {
                .op_func = (nfsd4op_func)nfsd4_getfh,
                .op_name = "OP_GETFH",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
        },
        [OP_LINK] = {
                .op_func = (nfsd4op_func)nfsd4_link,
@@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_LOCKT] = {
                .op_func = (nfsd4op_func)nfsd4_lockt,
                .op_name = "OP_LOCKT",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
        },
        [OP_LOCKU] = {
                .op_func = (nfsd4op_func)nfsd4_locku,
@@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_lookup,
                .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
                .op_name = "OP_LOOKUP",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_LOOKUPP] = {
                .op_func = (nfsd4op_func)nfsd4_lookupp,
                .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
                .op_name = "OP_LOOKUPP",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_NVERIFY] = {
                .op_func = (nfsd4op_func)nfsd4_nverify,
                .op_name = "OP_NVERIFY",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_OPEN] = {
                .op_func = (nfsd4op_func)nfsd4_open,
@@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_READLINK] = {
                .op_func = (nfsd4op_func)nfsd4_readlink,
                .op_name = "OP_READLINK",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
        },
        [OP_REMOVE] = {
                .op_func = (nfsd4op_func)nfsd4_remove,
@@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_secinfo,
                .op_flags = OP_HANDLES_WRONGSEC,
                .op_name = "OP_SECINFO",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
        },
        [OP_SETATTR] = {
                .op_func = (nfsd4op_func)nfsd4_setattr,
@@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_VERIFY] = {
                .op_func = (nfsd4op_func)nfsd4_verify,
                .op_name = "OP_VERIFY",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_WRITE] = {
                .op_func = (nfsd4op_func)nfsd4_write,
@@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
                .op_flags = OP_HANDLES_WRONGSEC,
                .op_name = "OP_SECINFO_NO_NAME",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
        },
        [OP_TEST_STATEID] = {
                .op_func = (nfsd4op_func)nfsd4_test_stateid,
                .op_flags = ALLOWED_WITHOUT_FH,
                .op_name = "OP_TEST_STATEID",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
        },
        [OP_FREE_STATEID] = {
                .op_func = (nfsd4op_func)nfsd4_free_stateid,
@@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
                .op_flags = ALLOWED_WITHOUT_FH,
                .op_name = "OP_GETDEVICEINFO",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
        },
        [OP_LAYOUTGET] = {
                .op_func = (nfsd4op_func)nfsd4_layoutget,
@@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_SEEK] = {
                .op_func = (nfsd4op_func)nfsd4_seek,
                .op_name = "OP_SEEK",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
        },
 };
 
@@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
 
 int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-       struct nfsd4_operation *opdesc;
-       nfsd4op_rsize estimator;
-
        if (op->opnum == OP_ILLEGAL)
                return op_encode_hdr_size * sizeof(__be32);
-       opdesc = OPDESC(op);
-       estimator = opdesc->op_rsize_bop;
-       return estimator ? estimator(rqstp, op) : PAGE_SIZE;
+
+       BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
+       return OPDESC(op)->op_rsize_bop(rqstp, op);
 }
 
 void warn_on_nonidempotent_op(struct nfsd4_op *op)
@@ -2476,12 +2537,13 @@ static struct svc_procedure             nfsd_procedures4[2] = {
 };
 
 struct svc_version     nfsd_version4 = {
-               .vs_vers        = 4,
-               .vs_nproc       = 2,
-               .vs_proc        = nfsd_procedures4,
-               .vs_dispatch    = nfsd_dispatch,
-               .vs_xdrsize     = NFS4_SVC_XDRSIZE,
-               .vs_rpcb_optnl  = 1,
+       .vs_vers                = 4,
+       .vs_nproc               = 2,
+       .vs_proc                = nfsd_procedures4,
+       .vs_dispatch            = nfsd_dispatch,
+       .vs_xdrsize             = NFS4_SVC_XDRSIZE,
+       .vs_rpcb_optnl          = true,
+       .vs_need_cong_ctrl      = true,
 };
 
 /*
index a0dee8ae9f97f16a18e40ba19f8e84a45ad1a02b..e9ef50addddb4489534bc07f138cd8c321d9193d 100644 (file)
@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
 out_err:
        conn->cb_addr.ss_family = AF_UNSPEC;
        conn->cb_addrlen = 0;
-       dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+       dprintk("NFSD: this client (clientid %08x/%08x) "
                "will not receive delegations\n",
                clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 
@@ -7012,23 +7012,24 @@ nfs4_state_start(void)
 
        ret = set_callback_cred();
        if (ret)
-               return -ENOMEM;
+               return ret;
+
        laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
        if (laundry_wq == NULL) {
                ret = -ENOMEM;
-               goto out_recovery;
+               goto out_cleanup_cred;
        }
        ret = nfsd4_create_callback_queue();
        if (ret)
                goto out_free_laundry;
 
        set_max_delegations();
-
        return 0;
 
 out_free_laundry:
        destroy_workqueue(laundry_wq);
-out_recovery:
+out_cleanup_cred:
+       cleanup_callback_cred();
        return ret;
 }
 
@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void)
 {
        destroy_workqueue(laundry_wq);
        nfsd4_destroy_callback_queue();
+       cleanup_callback_cred();
 }
 
 static void
index 8fae53ce21d16c8406ff01425d924eb044edee34..382c1fd05b4c8dfe2973d466bae01d6963eb7c43 100644 (file)
@@ -58,7 +58,7 @@
 
 #define NFSDDBG_FACILITY               NFSDDBG_XDR
 
-u32 nfsd_suppattrs[3][3] = {
+const u32 nfsd_suppattrs[3][3] = {
        {NFSD4_SUPPORTED_ATTRS_WORD0,
         NFSD4_SUPPORTED_ATTRS_WORD1,
         NFSD4_SUPPORTED_ATTRS_WORD2},
@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
        READ_BUF(16);
        p = xdr_decode_hyper(p, &write->wr_offset);
        write->wr_stable_how = be32_to_cpup(p++);
-       if (write->wr_stable_how > 2)
+       if (write->wr_stable_how > NFS_FILE_SYNC)
                goto xdr_error;
        write->wr_buflen = be32_to_cpup(p++);
 
@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
                } else
                        max_reply += nfsd4_max_reply(argp->rqstp, op);
                /*
-                * OP_LOCK may return a conflicting lock.  (Special case
-                * because it will just skip encoding this if it runs
-                * out of xdr buffer space, and it is the only operation
-                * that behaves this way.)
+                * OP_LOCK and OP_LOCKT may return a conflicting lock.
+                * (Special case because it will just skip encoding this
+                * if it runs out of xdr buffer space, and it is the only
+                * operation that behaves this way.)
                 */
-               if (op->opnum == OP_LOCK)
+               if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT)
                        max_reply += NFS4_OPAQUE_LIMIT;
 
                if (op->status) {
@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
        DECODE_TAIL;
 }
 
-static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+                            struct svc_export *exp)
 {
-       if (IS_I_VERSION(inode)) {
+       if (exp->ex_flags & NFSEXP_V4ROOT) {
+               *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
+               *p++ = 0;
+       } else if (IS_I_VERSION(inode)) {
                p = xdr_encode_hyper(p, inode->i_version);
        } else {
                *p++ = cpu_to_be32(stat->ctime.tv_sec);
@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
        if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
             bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
-               err = security_inode_getsecctx(d_inode(dentry),
+               if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
+                       err = security_inode_getsecctx(d_inode(dentry),
                                                &context, &contextlen);
+               else
+                       err = -EOPNOTSUPP;
                contextsupport = (err == 0);
                if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
                        if (err == -EOPNOTSUPP)
@@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                p = xdr_reserve_space(xdr, 8);
                if (!p)
                        goto out_resource;
-               p = encode_change(p, &stat, d_inode(dentry));
+               p = encode_change(p, &stat, d_inode(dentry), exp);
        }
        if (bmval0 & FATTR4_WORD0_SIZE) {
                p = xdr_reserve_space(xdr, 8);
index d6b97b424ad1729ee8e542abbead3040c2e98ecb..96fd15979cbd44d4275b47276e1619b5c9ee3339 100644 (file)
@@ -578,7 +578,7 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
        struct kvec     *vec = &rqstp->rq_res.head[0];
 
        if (vec->iov_len + data->iov_len > PAGE_SIZE) {
-               printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n",
+               printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
                                data->iov_len);
                return 0;
        }
index f3b2f34b10a3f19cd018c9fd8d176dfae44ea70b..73e75ac905258c17bdc107c0c071e8d14df739f0 100644 (file)
@@ -536,6 +536,19 @@ out_free:
        return rv;
 }
 
+static ssize_t
+nfsd_print_version_support(char *buf, int remaining, const char *sep,
+               unsigned vers, unsigned minor)
+{
+       const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+       bool supported = !!nfsd_vers(vers, NFSD_TEST);
+
+       if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+               supported = false;
+       return snprintf(buf, remaining, format, sep,
+                       supported ? '+' : '-', vers, minor);
+}
+
 static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 {
        char *mesg = buf;
@@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                len = qword_get(&mesg, vers, size);
                if (len <= 0) return -EINVAL;
                do {
+                       enum vers_op cmd;
                        sign = *vers;
                        if (sign == '+' || sign == '-')
                                num = simple_strtol((vers+1), &minorp, 0);
@@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                        if (*minorp == '.') {
                                if (num != 4)
                                        return -EINVAL;
-                               minor = simple_strtoul(minorp+1, NULL, 0);
-                               if (minor == 0)
-                                       return -EINVAL;
-                               if (nfsd_minorversion(minor, sign == '-' ?
-                                                    NFSD_CLEAR : NFSD_SET) < 0)
+                               if (kstrtouint(minorp+1, 0, &minor) < 0)
                                        return -EINVAL;
-                               goto next;
-                       }
+                       } else
+                               minor = 0;
+                       cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
                        switch(num) {
                        case 2:
                        case 3:
-                       case 4:
-                               nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
+                               nfsd_vers(num, cmd);
                                break;
+                       case 4:
+                               if (nfsd_minorversion(minor, cmd) >= 0)
+                                       break;
                        default:
                                return -EINVAL;
                        }
-               next:
                        vers += len + 1;
                } while ((len = qword_get(&mesg, vers, size)) > 0);
                /* If all get turned off, turn them back on, as
@@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
        len = 0;
        sep = "";
        remaining = SIMPLE_TRANSACTION_LIMIT;
-       for (num=2 ; num <= 4 ; num++)
-               if (nfsd_vers(num, NFSD_AVAIL)) {
-                       len = snprintf(buf, remaining, "%s%c%d", sep,
-                                      nfsd_vers(num, NFSD_TEST)?'+':'-',
-                                      num);
-                       sep = " ";
-
-                       if (len >= remaining)
-                               break;
-                       remaining -= len;
-                       buf += len;
-                       tlen += len;
-               }
-       if (nfsd_vers(4, NFSD_AVAIL))
-               for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
-                    minor++) {
-                       len = snprintf(buf, remaining, " %c4.%u",
-                                       (nfsd_vers(4, NFSD_TEST) &&
-                                        nfsd_minorversion(minor, NFSD_TEST)) ?
-                                               '+' : '-',
-                                       minor);
-
+       for (num=2 ; num <= 4 ; num++) {
+               if (!nfsd_vers(num, NFSD_AVAIL))
+                       continue;
+               minor = 0;
+               do {
+                       len = nfsd_print_version_support(buf, remaining,
+                                       sep, num, minor);
                        if (len >= remaining)
-                               break;
+                               goto out;
                        remaining -= len;
                        buf += len;
                        tlen += len;
-               }
-
+                       minor++;
+                       sep = " ";
+               } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
+       }
+out:
        len = snprintf(buf, remaining, "\n");
        if (len >= remaining)
                return -EINVAL;
index d74c8c44dc3536ffdd6a93b0cd340121233e5a95..d96606801d47ae6ee9927a2991263780c00840d7 100644 (file)
@@ -362,16 +362,16 @@ void              nfsd_lockd_shutdown(void);
        FATTR4_WORD2_MODE_UMASK | \
        NFSD4_2_SECURITY_ATTRS)
 
-extern u32 nfsd_suppattrs[3][3];
+extern const u32 nfsd_suppattrs[3][3];
 
-static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
+static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
 {
        return !((bm1[0] & ~bm2[0]) ||
                 (bm1[1] & ~bm2[1]) ||
                 (bm1[2] & ~bm2[2]));
 }
 
-static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
+static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
 {
        return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
 }
index 010aff5c5a79f2e91eaefaa671f77cafdf4c1cb5..fa82b7707e8531f9b7e8065391c3f54387c2740d 100644 (file)
@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
                                        struct nfsd_attrstat  *resp)
 {
        __be32  nfserr;
-       int     stable = 1;
        unsigned long cnt = argp->len;
 
        dprintk("nfsd: WRITE    %s %d bytes at %d\n",
                SVCFH_fmt(&argp->fh),
                argp->len, argp->offset);
 
-       nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
-                                  argp->offset,
-                                  rqstp->rq_vec, argp->vlen,
-                                  &cnt,
-                                  &stable);
+       nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
+                               rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
        return nfsd_return_attrs(nfserr, resp);
 }
 
index e6bfd96734c006587bd1709d0df48818c2065789..efd66da992010ffe5aeb877e2e6f5ab0d850bced 100644 (file)
@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change)
        return 0;
 }
 
+static void
+nfsd_adjust_nfsd_versions4(void)
+{
+       unsigned i;
+
+       for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
+               if (nfsd_supported_minorversions[i])
+                       return;
+       }
+       nfsd_vers(4, NFSD_CLEAR);
+}
+
 int nfsd_minorversion(u32 minorversion, enum vers_op change)
 {
        if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
        switch(change) {
        case NFSD_SET:
                nfsd_supported_minorversions[minorversion] = true;
+               nfsd_vers(4, NFSD_SET);
                break;
        case NFSD_CLEAR:
                nfsd_supported_minorversions[minorversion] = false;
+               nfsd_adjust_nfsd_versions4();
                break;
        case NFSD_TEST:
                return nfsd_supported_minorversions[minorversion];
@@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
                dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
                sin6.sin6_family = AF_INET6;
                sin6.sin6_addr = ifa->addr;
+               if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+                       sin6.sin6_scope_id = ifa->idev->dev->ifindex;
                svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
        }
 
index 4516e8b7d776305d94fb89f86256ee3fc54dec27..005c911b34ac4553a2c02da05b4e5d975b660710 100644 (file)
@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
                struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
 extern int set_callback_cred(void);
+extern void cleanup_callback_cred(void);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
 extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
index 26c6fdb4bf67cf1e3e3a843e8e816d7a76eae265..19d50f600e8d48c6f493130076606a6213de258d 100644 (file)
@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        __be32          err;
        int             host_err;
        bool            get_write_count;
-       int             size_change = 0;
+       bool            size_change = (iap->ia_valid & ATTR_SIZE);
 
        if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
                accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        /* Get inode */
        err = fh_verify(rqstp, fhp, ftype, accmode);
        if (err)
-               goto out;
+               return err;
        if (get_write_count) {
                host_err = fh_want_write(fhp);
                if (host_err)
-                       return nfserrno(host_err);
+                       goto out;
        }
 
        dentry = fhp->fh_dentry;
@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                iap->ia_valid &= ~ATTR_MODE;
 
        if (!iap->ia_valid)
-               goto out;
+               return 0;
 
        nfsd_sanitize_attrs(inode, iap);
 
+       if (check_guard && guardtime != inode->i_ctime.tv_sec)
+               return nfserr_notsync;
+
        /*
         * The size case is special, it changes the file in addition to the
-        * attributes.
+        * attributes, and file systems don't expect it to be mixed with
+        * "random" attribute changes.  We thus split out the size change
+        * into a separate call to ->setattr, and do the rest as a separate
+        * setattr call.
         */
-       if (iap->ia_valid & ATTR_SIZE) {
+       if (size_change) {
                err = nfsd_get_write_access(rqstp, fhp, iap);
                if (err)
-                       goto out;
-               size_change = 1;
+                       return err;
+       }
 
+       fh_lock(fhp);
+       if (size_change) {
                /*
                 * RFC5661, Section 18.30.4:
                 *   Changing the size of a file with SETATTR indirectly
@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                 *
                 * (and similar for the older RFCs)
                 */
-               if (iap->ia_size != i_size_read(inode))
-                       iap->ia_valid |= ATTR_MTIME;
-       }
+               struct iattr size_attr = {
+                       .ia_valid       = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+                       .ia_size        = iap->ia_size,
+               };
 
-       iap->ia_valid |= ATTR_CTIME;
+               host_err = notify_change(dentry, &size_attr, NULL);
+               if (host_err)
+                       goto out_unlock;
+               iap->ia_valid &= ~ATTR_SIZE;
 
-       if (check_guard && guardtime != inode->i_ctime.tv_sec) {
-               err = nfserr_notsync;
-               goto out_put_write_access;
+               /*
+                * Avoid the additional setattr call below if the only other
+                * attribute that the client sends is the mtime, as we update
+                * it as part of the size change above.
+                */
+               if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+                       goto out_unlock;
        }
 
-       fh_lock(fhp);
+       iap->ia_valid |= ATTR_CTIME;
        host_err = notify_change(dentry, iap, NULL);
-       fh_unlock(fhp);
-       err = nfserrno(host_err);
 
-out_put_write_access:
+out_unlock:
+       fh_unlock(fhp);
        if (size_change)
                put_write_access(inode);
-       if (!err)
-               err = nfserrno(commit_metadata(fhp));
 out:
-       return err;
+       if (!host_err)
+               host_err = commit_metadata(fhp);
+       return nfserrno(host_err);
 }
 
 #if defined(CONFIG_NFSD_V4)
@@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file)
 __be32
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                                loff_t offset, struct kvec *vec, int vlen,
-                               unsigned long *cnt, int *stablep)
+                               unsigned long *cnt, int stable)
 {
        struct svc_export       *exp;
-       struct inode            *inode;
        mm_segment_t            oldfs;
        __be32                  err = 0;
        int                     host_err;
-       int                     stable = *stablep;
        int                     use_wgather;
        loff_t                  pos = offset;
        unsigned int            pflags = current->flags;
@@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                 */
                current->flags |= PF_LESS_THROTTLE;
 
-       inode = file_inode(file);
-       exp   = fhp->fh_export;
-
+       exp = fhp->fh_export;
        use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
 
        if (!EX_ISSYNC(exp))
-               stable = 0;
+               stable = NFS_UNSTABLE;
 
        if (stable && !use_wgather)
                flags |= RWF_SYNC;
@@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
  * N.B. After this call fhp needs an fh_put
  */
 __be32
-nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
-               loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
-               int *stablep)
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+          struct kvec *vec, int vlen, unsigned long *cnt, int stable)
 {
-       __be32                  err = 0;
+       struct file *file = NULL;
+       __be32 err = 0;
 
        trace_write_start(rqstp, fhp, offset, vlen);
 
-       if (file) {
-               err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
-                               NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
-               if (err)
-                       goto out;
-               trace_write_opened(rqstp, fhp, offset, vlen);
-               err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
-                               stablep);
-               trace_write_io_done(rqstp, fhp, offset, vlen);
-       } else {
-               err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
-               if (err)
-                       goto out;
+       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+       if (err)
+               goto out;
 
-               trace_write_opened(rqstp, fhp, offset, vlen);
-               if (cnt)
-                       err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
-                                            cnt, stablep);
-               trace_write_io_done(rqstp, fhp, offset, vlen);
-               fput(file);
-       }
+       trace_write_opened(rqstp, fhp, offset, vlen);
+       err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
+       trace_write_io_done(rqstp, fhp, offset, vlen);
+       fput(file);
 out:
        trace_write_done(rqstp, fhp, offset, vlen);
        return err;
index 0bf9e7bf5800af3855e3d93aaec194dcbea93ba6..db98c48c735aaae5a914a6e2073391ceab436a2f 100644 (file)
@@ -83,12 +83,12 @@ __be32              nfsd_readv(struct file *, loff_t, struct kvec *, int,
                                unsigned long *);
 __be32                 nfsd_read(struct svc_rqst *, struct svc_fh *,
                                loff_t, struct kvec *, int, unsigned long *);
-__be32                 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
-                               loff_t, struct kvec *,int, unsigned long *, int *);
+__be32                 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
+                               struct kvec *, int, unsigned long *, int);
 __be32         nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                struct file *file, loff_t offset,
                                struct kvec *vec, int vlen, unsigned long *cnt,
-                               int *stablep);
+                               int stable);
 __be32         nfsd_readlink(struct svc_rqst *, struct svc_fh *,
                                char *, int *);
 __be32         nfsd_symlink(struct svc_rqst *, struct svc_fh *,
index 2c90e285d7c6725089641bc2a3fcb336b35d054b..03b8ba933eb2a3dcc781eaf9e57594da5a40f48b 100644 (file)
@@ -34,7 +34,7 @@
 static inline unsigned long
 nilfs_palloc_groups_per_desc_block(const struct inode *inode)
 {
-       return (1UL << inode->i_blkbits) /
+       return i_blocksize(inode) /
                sizeof(struct nilfs_palloc_group_desc);
 }
 
index d5c23da43513cc60531cf3711afb27517858f5c1..c21e0b4454a6762a2d6bbbed84dd03858afa2185 100644 (file)
@@ -50,7 +50,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
                brelse(bh);
                BUG();
        }
-       memset(bh->b_data, 0, 1 << inode->i_blkbits);
+       memset(bh->b_data, 0, i_blocksize(inode));
        bh->b_bdev = inode->i_sb->s_bdev;
        bh->b_blocknr = blocknr;
        set_buffer_mapped(bh);
index 2e315f9f2e51d6f19fae04ced022777af1186cdf..06ffa135dfa651590a7a0dc25c76d0cb632bfe94 100644 (file)
@@ -119,7 +119,7 @@ nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 
 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 {
-       return 1 << btree->b_inode->i_blkbits;
+       return i_blocksize(btree->b_inode);
 }
 
 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
@@ -1870,7 +1870,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
                di = &dreq;
                ni = NULL;
        } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
-                          1 << btree->b_inode->i_blkbits)) {
+                          nilfs_btree_node_size(btree))) {
                di = &dreq;
                ni = &nreq;
        } else {
index 547381f3ce137f1371805fea00a94206bb8e1b56..c5fa3dee72fc0b962dec9d3134d8c0d723422134 100644 (file)
@@ -51,8 +51,9 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        return err;
 }
 
-static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int nilfs_page_mkwrite(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct page *page = vmf->page;
        struct inode *inode = file_inode(vma->vm_file);
        struct nilfs_transaction_info ti;
index c7f4fef9ebf5fa8b1215fc065dbdc68ab84dd039..7ffe71a8dfb9b3de3d2d952931fa36f0ac6d90c6 100644 (file)
@@ -51,7 +51,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n)
 {
        struct nilfs_root *root = NILFS_I(inode)->i_root;
 
-       inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
+       inode_add_bytes(inode, i_blocksize(inode) * n);
        if (root)
                atomic64_add(n, &root->blocks_count);
 }
@@ -60,7 +60,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n)
 {
        struct nilfs_root *root = NILFS_I(inode)->i_root;
 
-       inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
+       inode_sub_bytes(inode, i_blocksize(inode) * n);
        if (root)
                atomic64_sub(n, &root->blocks_count);
 }
index d56d3a5bea88d19f6bb1e1ed5854d8c890bd2d48..98835ed6bef40c8ccdc81a0707538a6489f8e57e 100644 (file)
@@ -57,7 +57,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
        set_buffer_mapped(bh);
 
        kaddr = kmap_atomic(bh->b_page);
-       memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
+       memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
        if (init_block)
                init_block(inode, bh, kaddr);
        flush_dcache_page(bh->b_page);
@@ -501,7 +501,7 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
        struct nilfs_mdt_info *mi = NILFS_MDT(inode);
 
        mi->mi_entry_size = entry_size;
-       mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size;
+       mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
        mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
 }
 
index bedcae2c28e623291d89a8056290092ff55d9dd9..7d18d62e8e079cf2e40a2364ec20d40f64407c39 100644 (file)
@@ -723,7 +723,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
 
                lock_page(page);
                if (!page_has_buffers(page))
-                       create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+                       create_empty_buffers(page, i_blocksize(inode), 0);
                unlock_page(page);
 
                bh = head = page_buffers(page);
index 11556b7d93ecdf934ede8efb431615d4d43ec0eb..88a31e9340a0fb8c658a48b764e66b0926345848 100644 (file)
@@ -608,7 +608,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
        int ret = 0;
        struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
        unsigned int block_end, block_start;
-       unsigned int bsize = 1 << inode->i_blkbits;
+       unsigned int bsize = i_blocksize(inode);
 
        if (!page_has_buffers(page))
                create_empty_buffers(page, bsize, 0);
index 7025d8c279991e36ec5f9eaaebf585254b3604dd..3e04279446e8d2270cf1fdf11b589a08765952f9 100644 (file)
@@ -2924,7 +2924,7 @@ again:
        /*
         * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
         * another try; otherwise, we are sure the MIGRATING state is there,
-        * drop the unneded state which blocked threads trying to DIRTY
+        * drop the unneeded state which blocked threads trying to DIRTY
         */
        spin_lock(&res->spinlock);
        BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
index 7b6a146327d7166f7505af53c1c50e07ae1310a1..8836305eb3786598ed6e9e29d3837ded620e3997 100644 (file)
@@ -808,7 +808,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
        /* We know that zero_from is block aligned */
        for (block_start = zero_from; block_start < zero_to;
             block_start = block_end) {
-               block_end = block_start + (1 << inode->i_blkbits);
+               block_end = block_start + i_blocksize(inode);
 
                /*
                 * block_start is block-aligned.  Bump it by one to force
index 429088786e93d95c6e107783ea4d09cbefaab290..098f5c71256966f492bb7d1e17b908776b48f7df 100644 (file)
 #include "ocfs2_trace.h"
 
 
-static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
+static int ocfs2_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        sigset_t oldset;
        int ret;
 
        ocfs2_block_signals(&oldset);
-       ret = filemap_fault(area, vmf);
+       ret = filemap_fault(vmf);
        ocfs2_unblock_signals(&oldset);
 
-       trace_ocfs2_fault(OCFS2_I(area->vm_file->f_mapping->host)->ip_blkno,
-                         area, vmf->page, vmf->pgoff);
+       trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno,
+                         vma, vmf->page, vmf->pgoff);
        return ret;
 }
 
@@ -127,10 +128,10 @@ out:
        return ret;
 }
 
-static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ocfs2_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct buffer_head *di_bh = NULL;
        sigset_t oldset;
        int ret;
@@ -160,7 +161,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
         */
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
-       ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page);
+       ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
 
        up_write(&OCFS2_I(inode)->ip_alloc_sem);
 
index b0ced669427e154cc67c7b4b4f9e4630f934919f..c4ab6fdf17a01426db5d6e2bb9638130147bee61 100644 (file)
@@ -400,8 +400,9 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
        /* remove the op from the in progress hash table */
        op = orangefs_devreq_remove_op(head.tag);
        if (!op) {
-               gossip_err("WARNING: No one's waiting for tag %llu\n",
-                          llu(head.tag));
+               gossip_debug(GOSSIP_DEV_DEBUG,
+                            "%s: No one's waiting for tag %llu\n",
+                            __func__, llu(head.tag));
                return ret;
        }
 
index 551bc74ed2b822fe0f211d90527397da2d86bb38..5cd617980fbfa2a38d8d08ac4017bae67679bd16 100644 (file)
@@ -136,12 +136,6 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
        return -EINVAL;
 }
 
-struct backing_dev_info orangefs_backing_dev_info = {
-       .name = "orangefs",
-       .ra_pages = 0,
-       .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-};
-
 /** ORANGEFS2 implementation of address space operations */
 const struct address_space_operations orangefs_address_operations = {
        .readpage = orangefs_readpage,
index 75375e90a63f32e4bac0ef3e996c960c673b250e..6333cbbdfef7ae652c1a4e6c4d2818ae1cd188d7 100644 (file)
@@ -344,6 +344,11 @@ int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
                     user_desc->size,
                     user_desc->count);
 
+       if (user_desc->total_size < 0 ||
+           user_desc->size < 0 ||
+           user_desc->count < 0)
+               goto out;
+
        /*
         * sanity check alignment and size of buffer that caller wants to
         * work with
index 27e75cf28b3a0c386c645bf714975c4772e14866..791912da97d7487d99c618ce8ac6f5fbe3cd11fb 100644 (file)
@@ -967,13 +967,13 @@ int orangefs_debugfs_new_client_string(void __user *arg)
        int ret;
 
        ret = copy_from_user(&client_debug_array_string,
-                                     (void __user *)arg,
-                                     ORANGEFS_MAX_DEBUG_STRING_LEN);
+                            (void __user *)arg,
+                            ORANGEFS_MAX_DEBUG_STRING_LEN);
 
        if (ret != 0) {
                pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
                        __func__);
-               return -EIO;
+               return -EFAULT;
        }
 
        /*
@@ -988,17 +988,18 @@ int orangefs_debugfs_new_client_string(void __user *arg)
         */
        client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
                '\0';
-       
+
        pr_info("%s: client debug array string has been received.\n",
                __func__);
 
        if (!help_string_initialized) {
 
                /* Build a proper debug help string. */
-               if (orangefs_prepare_debugfs_help_string(0)) {
+               ret = orangefs_prepare_debugfs_help_string(0);
+               if (ret) {
                        gossip_err("%s: no debug help string \n",
                                   __func__);
-                       return -EIO;
+                       return ret;
                }
 
        }
@@ -1011,7 +1012,7 @@ int orangefs_debugfs_new_client_string(void __user *arg)
 
        help_string_initialized++;
 
-       return ret;
+       return 0;
 }
 
 int orangefs_debugfs_new_debug(void __user *arg) 
index a3d84ffee9050a33532f3927ce9750937f27e22f..f380f9ed1b286a7eedfb41fa5f7f8f9259abfdf3 100644 (file)
@@ -50,8 +50,7 @@
  * Misc constants. Please retain them as multiples of 8!
  * Otherwise 32-64 bit interactions will be messed up :)
  */
-#define ORANGEFS_MAX_DEBUG_STRING_LEN  0x00000400
-#define ORANGEFS_MAX_DEBUG_ARRAY_LEN   0x00000800
+#define ORANGEFS_MAX_DEBUG_STRING_LEN  0x00000800
 
 /*
  * The maximum number of directory entries in a single request is 96.
index 3bf803d732c5b3702f735c5776cae6d249b85364..70355a9a25969bb681612cee734cf4341afd2428 100644 (file)
@@ -529,7 +529,6 @@ extern spinlock_t orangefs_htable_ops_in_progress_lock;
 extern int hash_table_size;
 
 extern const struct address_space_operations orangefs_address_operations;
-extern struct backing_dev_info orangefs_backing_dev_info;
 extern const struct inode_operations orangefs_file_inode_operations;
 extern const struct file_operations orangefs_file_operations;
 extern const struct inode_operations orangefs_symlink_inode_operations;
index 4113eb0495bf90549daca478dd0f8c5a7940680a..c1b5174cb5a9fd5fa0963c22478f1457503e6657 100644 (file)
@@ -80,11 +80,6 @@ static int __init orangefs_init(void)
        int ret = -1;
        __u32 i = 0;
 
-       ret = bdi_init(&orangefs_backing_dev_info);
-
-       if (ret)
-               return ret;
-
        if (op_timeout_secs < 0)
                op_timeout_secs = 0;
 
@@ -94,7 +89,7 @@ static int __init orangefs_init(void)
        /* initialize global book keeping data structures */
        ret = op_cache_initialize();
        if (ret < 0)
-               goto err;
+               goto out;
 
        ret = orangefs_inode_cache_initialize();
        if (ret < 0)
@@ -181,9 +176,6 @@ cleanup_inode:
 cleanup_op:
        op_cache_finalize();
 
-err:
-       bdi_destroy(&orangefs_backing_dev_info);
-
 out:
        return ret;
 }
@@ -207,8 +199,6 @@ static void __exit orangefs_exit(void)
 
        kfree(orangefs_htable_ops_in_progress);
 
-       bdi_destroy(&orangefs_backing_dev_info);
-
        pr_info("orangefs: module version %s unloaded\n", ORANGEFS_VERSION);
 }
 
index 084954448f1868840cbf3291cbb226206c2ffd51..afd2f523b2837988ae30975c231d83a21869c533 100644 (file)
  * Description:
  *                     Readahead cache buffer count and size.
  *
+ * What:               /sys/fs/orangefs/readahead_readcnt
+ * Date:               Jan 2017
+ * Contact:            Martin Brandenburg <martin@omnibond.com>
+ * Description:
+ *                     Number of buffers (in multiples of readahead_size)
+ *                     which can be read ahead for a single file at once.
+ *
  * What:               /sys/fs/orangefs/acache/...
  * Date:               Jun 2015
  * Contact:            Martin Brandenburg <martin@omnibond.com>
@@ -329,7 +336,8 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj,
                if (!(orangefs_features & ORANGEFS_FEATURE_READAHEAD) &&
                    (!strcmp(attr->attr.name, "readahead_count") ||
                    !strcmp(attr->attr.name, "readahead_size") ||
-                   !strcmp(attr->attr.name, "readahead_count_size"))) {
+                   !strcmp(attr->attr.name, "readahead_count_size") ||
+                   !strcmp(attr->attr.name, "readahead_readcnt"))) {
                        rc = -EINVAL;
                        goto out;
                }
@@ -360,6 +368,11 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj,
                                 "readahead_count_size"))
                        new_op->upcall.req.param.op =
                                ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT_SIZE;
+
+               else if (!strcmp(attr->attr.name,
+                                "readahead_readcnt"))
+                       new_op->upcall.req.param.op =
+                               ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT;
        } else if (!strcmp(kobj->name, ACACHE_KOBJ_ID)) {
                if (!strcmp(attr->attr.name, "timeout_msecs"))
                        new_op->upcall.req.param.op =
@@ -542,7 +555,8 @@ static ssize_t sysfs_service_op_store(struct kobject *kobj,
                if (!(orangefs_features & ORANGEFS_FEATURE_READAHEAD) &&
                    (!strcmp(attr->attr.name, "readahead_count") ||
                    !strcmp(attr->attr.name, "readahead_size") ||
-                   !strcmp(attr->attr.name, "readahead_count_size"))) {
+                   !strcmp(attr->attr.name, "readahead_count_size") ||
+                   !strcmp(attr->attr.name, "readahead_readcnt"))) {
                        rc = -EINVAL;
                        goto out;
                }
@@ -609,6 +623,15 @@ static ssize_t sysfs_service_op_store(struct kobject *kobj,
                        new_op->upcall.req.param.u.value32[0] = val1;
                        new_op->upcall.req.param.u.value32[1] = val2;
                        goto value_set;
+               } else if (!strcmp(attr->attr.name,
+                                  "readahead_readcnt")) {
+                       if ((val >= 0)) {
+                               new_op->upcall.req.param.op =
+                               ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT;
+                       } else {
+                               rc = 0;
+                               goto out;
+                       }
                }
 
        } else if (!strcmp(kobj->name, ACACHE_KOBJ_ID)) {
@@ -812,6 +835,10 @@ static struct orangefs_attribute readahead_count_size_attribute =
        __ATTR(readahead_count_size, 0664, sysfs_service_op_show,
               sysfs_service_op_store);
 
+static struct orangefs_attribute readahead_readcnt_attribute =
+       __ATTR(readahead_readcnt, 0664, sysfs_service_op_show,
+              sysfs_service_op_store);
+
 static struct orangefs_attribute perf_counter_reset_attribute =
        __ATTR(perf_counter_reset,
               0664,
@@ -838,6 +865,7 @@ static struct attribute *orangefs_default_attrs[] = {
        &readahead_count_attribute.attr,
        &readahead_size_attribute.attr,
        &readahead_count_size_attribute.attr,
+       &readahead_readcnt_attribute.attr,
        &perf_counter_reset_attribute.attr,
        &perf_history_size_attribute.attr,
        &perf_time_interval_secs_attribute.attr,
index 06af81f71e10233b4bd9afa23b952eef96fb5f77..9b96b99539d62501718bd752f26d059900095864 100644 (file)
@@ -306,7 +306,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
                break;
        case S_IFDIR:
                inode->i_size = PAGE_SIZE;
-               orangefs_inode->blksize = (1 << inode->i_blkbits);
+               orangefs_inode->blksize = i_blocksize(inode);
                spin_lock(&inode->i_lock);
                inode_set_bytes(inode, inode->i_size);
                spin_unlock(&inode->i_lock);
@@ -316,7 +316,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
                if (new) {
                        inode->i_size = (loff_t)strlen(new_op->
                            downcall.resp.getattr.link_target);
-                       orangefs_inode->blksize = (1 << inode->i_blkbits);
+                       orangefs_inode->blksize = i_blocksize(inode);
                        ret = strscpy(orangefs_inode->link_target,
                            new_op->downcall.resp.getattr.link_target,
                            ORANGEFS_NAME_MAX);
index af0b0e36d5595f5b996fd0d041e763976aacc931..b8249f8fdd8027b8552e4b019203b4ea2dd6ea2a 100644 (file)
@@ -182,6 +182,7 @@ enum orangefs_param_request_op {
        ORANGEFS_PARAM_REQUEST_OP_READAHEAD_SIZE = 26,
        ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT = 27,
        ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT_SIZE = 28,
+       ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT = 29,
 };
 
 struct orangefs_param_request_s {
index b73b4de8fb36468cc7153f4a95f4460bd28aff4c..1e1e182d571b4afa5033977a64b7710abb90ab61 100644 (file)
@@ -291,102 +291,70 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
                        rv      += nr_read;
                }
        } else {
-               /*
-                * Command line (1 string) occupies ARGV and maybe
-                * extends into ENVP.
-                */
-               if (len1 + len2 <= *pos)
-                       goto skip_argv_envp;
-               if (len1 <= *pos)
-                       goto skip_argv;
-
-               p = arg_start + *pos;
-               len = len1 - *pos;
-               while (count > 0 && len > 0) {
-                       unsigned int _count, l;
-                       int nr_read;
-                       bool final;
-
-                       _count = min3(count, len, PAGE_SIZE);
-                       nr_read = access_remote_vm(mm, p, page, _count, 0);
-                       if (nr_read < 0)
-                               rv = nr_read;
-                       if (nr_read <= 0)
-                               goto out_free_page;
-
-                       /*
-                        * Command line can be shorter than whole ARGV
-                        * even if last "marker" byte says it is not.
-                        */
-                       final = false;
-                       l = strnlen(page, nr_read);
-                       if (l < nr_read) {
-                               nr_read = l;
-                               final = true;
-                       }
-
-                       if (copy_to_user(buf, page, nr_read)) {
-                               rv = -EFAULT;
-                               goto out_free_page;
-                       }
-
-                       p       += nr_read;
-                       len     -= nr_read;
-                       buf     += nr_read;
-                       count   -= nr_read;
-                       rv      += nr_read;
-
-                       if (final)
-                               goto out_free_page;
-               }
-skip_argv:
                /*
                 * Command line (1 string) occupies ARGV and
                 * extends into ENVP.
                 */
-               if (len1 <= *pos) {
-                       p = env_start + *pos - len1;
-                       len = len1 + len2 - *pos;
-               } else {
-                       p = env_start;
-                       len = len2;
+               struct {
+                       unsigned long p;
+                       unsigned long len;
+               } cmdline[2] = {
+                       { .p = arg_start, .len = len1 },
+                       { .p = env_start, .len = len2 },
+               };
+               loff_t pos1 = *pos;
+               unsigned int i;
+
+               i = 0;
+               while (i < 2 && pos1 >= cmdline[i].len) {
+                       pos1 -= cmdline[i].len;
+                       i++;
                }
-               while (count > 0 && len > 0) {
-                       unsigned int _count, l;
-                       int nr_read;
-                       bool final;
-
-                       _count = min3(count, len, PAGE_SIZE);
-                       nr_read = access_remote_vm(mm, p, page, _count, 0);
-                       if (nr_read < 0)
-                               rv = nr_read;
-                       if (nr_read <= 0)
-                               goto out_free_page;
-
-                       /* Find EOS. */
-                       final = false;
-                       l = strnlen(page, nr_read);
-                       if (l < nr_read) {
-                               nr_read = l;
-                               final = true;
-                       }
-
-                       if (copy_to_user(buf, page, nr_read)) {
-                               rv = -EFAULT;
-                               goto out_free_page;
+               while (i < 2) {
+                       p = cmdline[i].p + pos1;
+                       len = cmdline[i].len - pos1;
+                       while (count > 0 && len > 0) {
+                               unsigned int _count, l;
+                               int nr_read;
+                               bool final;
+
+                               _count = min3(count, len, PAGE_SIZE);
+                               nr_read = access_remote_vm(mm, p, page, _count, 0);
+                               if (nr_read < 0)
+                                       rv = nr_read;
+                               if (nr_read <= 0)
+                                       goto out_free_page;
+
+                               /*
+                                * Command line can be shorter than whole ARGV
+                                * even if last "marker" byte says it is not.
+                                */
+                               final = false;
+                               l = strnlen(page, nr_read);
+                               if (l < nr_read) {
+                                       nr_read = l;
+                                       final = true;
+                               }
+
+                               if (copy_to_user(buf, page, nr_read)) {
+                                       rv = -EFAULT;
+                                       goto out_free_page;
+                               }
+
+                               p       += nr_read;
+                               len     -= nr_read;
+                               buf     += nr_read;
+                               count   -= nr_read;
+                               rv      += nr_read;
+
+                               if (final)
+                                       goto out_free_page;
                        }
 
-                       p       += nr_read;
-                       len     -= nr_read;
-                       buf     += nr_read;
-                       count   -= nr_read;
-                       rv      += nr_read;
-
-                       if (final)
-                               goto out_free_page;
+                       /* Only first chunk can be read partially. */
+                       pos1 = 0;
+                       i++;
                }
-skip_argv_envp:
-               ;
        }
 
 out_free_page:
@@ -729,11 +697,11 @@ static int proc_pid_permission(struct inode *inode, int mask)
        task = get_proc_task(inode);
        if (!task)
                return -ESRCH;
-       has_perms = has_pid_permissions(pid, task, 1);
+       has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
        put_task_struct(task);
 
        if (!has_perms) {
-               if (pid->hide_pid == 2) {
+               if (pid->hide_pid == HIDEPID_INVISIBLE) {
                        /*
                         * Let's make getdents(), stat(), and open()
                         * consistent with each other.  If a process
@@ -798,7 +766,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
 
                if (!IS_ERR_OR_NULL(mm)) {
                        /* ensure this mm_struct can't be freed */
-                       atomic_inc(&mm->mm_count);
+                       mmgrab(mm);
                        /* but do not pin its memory */
                        mmput(mm);
                }
@@ -845,7 +813,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
                return -ENOMEM;
 
        copied = 0;
-       if (!atomic_inc_not_zero(&mm->mm_users))
+       if (!mmget_not_zero(mm))
                goto free;
 
        /* Maybe we should limit FOLL_FORCE to actual ptrace users? */
@@ -953,7 +921,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
                return -ENOMEM;
 
        ret = 0;
-       if (!atomic_inc_not_zero(&mm->mm_users))
+       if (!mmget_not_zero(mm))
                goto free;
 
        down_read(&mm->mmap_sem);
@@ -1096,7 +1064,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
                if (p) {
                        if (atomic_read(&p->mm->mm_users) > 1) {
                                mm = p->mm;
-                               atomic_inc(&mm->mm_count);
+                               mmgrab(mm);
                        }
                        task_unlock(p);
                }
@@ -1769,7 +1737,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        stat->gid = GLOBAL_ROOT_GID;
        task = pid_task(proc_pid(inode), PIDTYPE_PID);
        if (task) {
-               if (!has_pid_permissions(pid, task, 2)) {
+               if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
                        rcu_read_unlock();
                        /*
                         * This doesn't prevent learning whether PID exists,
@@ -3200,7 +3168,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
                int len;
 
                cond_resched();
-               if (!has_pid_permissions(ns, iter.task, 2))
+               if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
                        continue;
 
                len = snprintf(name, sizeof(name), "%d", iter.tgid);
index f6a01f09f79d51457dae4226c9882bf60076c588..06c73904d497ad644fae969774fa8f8477486b4f 100644 (file)
@@ -57,9 +57,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
        struct rb_node *node = dir->subdir.rb_node;
 
        while (node) {
-               struct proc_dir_entry *de = container_of(node,
-                                                        struct proc_dir_entry,
-                                                        subdir_node);
+               struct proc_dir_entry *de = rb_entry(node,
+                                                    struct proc_dir_entry,
+                                                    subdir_node);
                int result = proc_match(len, name, de);
 
                if (result < 0)
@@ -80,8 +80,9 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
 
        /* Figure out where to put new node */
        while (*new) {
-               struct proc_dir_entry *this =
-                       container_of(*new, struct proc_dir_entry, subdir_node);
+               struct proc_dir_entry *this = rb_entry(*new,
+                                                      struct proc_dir_entry,
+                                                      subdir_node);
                int result = proc_match(de->namelen, de->name, this);
 
                parent = *new;
index 7ad9ed7958af47a355be9e27ed8501d98d4de0d1..2cc7a8030275c1bc98a0da9adb870b33d99f5433 100644 (file)
@@ -107,7 +107,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
 
        if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
                seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
-       if (pid->hide_pid != 0)
+       if (pid->hide_pid != HIDEPID_OFF)
                seq_printf(seq, ",hidepid=%u", pid->hide_pid);
 
        return 0;
index 0b80ad87b4d6699076317eae8b08eda14fa74c96..ea9f3d1ae83063289989539ede4a8c8ab8151485 100644 (file)
@@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
                phdr->p_flags   = PF_R|PF_W|PF_X;
                phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
                phdr->p_vaddr   = (size_t)m->addr;
-               phdr->p_paddr   = 0;
+               if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+                       phdr->p_paddr   = __pa(m->addr);
+               else
+                       phdr->p_paddr   = (elf_addr_t)-1;
                phdr->p_filesz  = phdr->p_memsz = m->size;
                phdr->p_align   = PAGE_SIZE;
        }
index 1988440b20496386303daec0190546261131a3b0..b90da888b81a3aed2a64d83b9167ab81036fc452 100644 (file)
@@ -58,7 +58,8 @@ int proc_parse_options(char *options, struct pid_namespace *pid)
                case Opt_hidepid:
                        if (match_int(&args[0], &option))
                                return 0;
-                       if (option < 0 || option > 2) {
+                       if (option < HIDEPID_OFF ||
+                           option > HIDEPID_INVISIBLE) {
                                pr_err("proc: hidepid value must be between 0 and 2.\n");
                                return 0;
                        }
index 8f96a49178d0d48ae220a029aa9c6f25c6bedab6..ee3efb229ef6a61839ad04f70122d7be286ca730 100644 (file)
@@ -167,7 +167,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
                return ERR_PTR(-ESRCH);
 
        mm = priv->mm;
-       if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+       if (!mm || !mmget_not_zero(mm))
                return NULL;
 
        down_read(&mm->mmap_sem);
@@ -1352,7 +1352,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        unsigned long end_vaddr;
        int ret = 0, copied = 0;
 
-       if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+       if (!mm || !mmget_not_zero(mm))
                goto out;
 
        ret = -EINVAL;
index 37175621e8906881adf4e034ce06224664120031..1ef97cfcf4228737dcf02a74efea47bb8b75c574 100644 (file)
@@ -219,7 +219,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
                return ERR_PTR(-ESRCH);
 
        mm = priv->mm;
-       if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+       if (!mm || !mmget_not_zero(mm))
                return NULL;
 
        down_read(&mm->mmap_sem);
index 5105b1599981e176908eb3ebc79d2352f43b9c96..885d445afa0d9b27aea63002413d78bee3bc77e7 100644 (file)
@@ -265,10 +265,10 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
  * On s390 the fault handler is used for memory regions that can't be mapped
  * directly with remap_pfn_range().
  */
-static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int mmap_vmcore_fault(struct vm_fault *vmf)
 {
 #ifdef CONFIG_S390
-       struct address_space *mapping = vma->vm_file->f_mapping;
+       struct address_space *mapping = vmf->vma->vm_file->f_mapping;
        pgoff_t index = vmf->pgoff;
        struct page *page;
        loff_t offset;
@@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
        }
        return 0;
 fail:
-       do_munmap(vma->vm_mm, from, len);
+       do_munmap(vma->vm_mm, from, len, NULL);
        return -EAGAIN;
 }
 
@@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 
        return 0;
 fail:
-       do_munmap(vma->vm_mm, vma->vm_start, len);
+       do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
        return -EAGAIN;
 }
 #else
index 729677e18e364fbff1d66e89fb93450d592f55e8..efab7b64925ba76e2aa0a2a39d9e659c0a94472e 100644 (file)
@@ -342,31 +342,35 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
        int ret;
 
-       ret = lz4_compress(in, inlen, out, &outlen, workspace);
-       if (ret) {
-               pr_err("lz4_compress error, ret = %d!\n", ret);
+       ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+       if (!ret) {
+               pr_err("LZ4_compress_default error; compression failed!\n");
                return -EIO;
        }
 
-       return outlen;
+       return ret;
 }
 
 static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
        int ret;
 
-       ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
-       if (ret) {
-               pr_err("lz4_decompress error, ret = %d!\n", ret);
+       ret = LZ4_decompress_safe(in, out, inlen, outlen);
+       if (ret < 0) {
+               /*
+                * LZ4_decompress_safe will return an error code
+                * (< 0) if decompression failed
+                */
+               pr_err("LZ4_decompress_safe error, ret = %d!\n", ret);
                return -EIO;
        }
 
-       return outlen;
+       return ret;
 }
 
 static void allocate_lz4(void)
 {
-       big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+       big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
        big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
        if (big_oops_buf) {
                workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
index 2f8c5c9bdaf600abdee1883ceab6633d5caf6467..b396eb09f288864ae27ce6a65108fa1e6f819820 100644 (file)
@@ -189,7 +189,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
        int ret = 0;
 
        th.t_trans_id = 0;
-       blocksize = 1 << inode->i_blkbits;
+       blocksize = i_blocksize(inode);
 
        if (logit) {
                reiserfs_write_lock(s);
index cfeae9b0a2b77c3d571c35e85ee7bd20d1357ff3..a6ab9d64ea1b32777fa79a42f6396f9f52bcab08 100644 (file)
@@ -525,7 +525,7 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
         * referenced in convert_tail_for_hole() that may be called from
         * reiserfs_get_block()
         */
-       bh_result->b_size = (1 << inode->i_blkbits);
+       bh_result->b_size = i_blocksize(inode);
 
        ret = reiserfs_get_block(inode, iblock, bh_result,
                                 create | GET_BLOCK_NO_DANGLE);
index e314cb30a181e1017f7df769f7faf1bef89bdef1..feabcde0290d27103a11813682091abd1a191661 100644 (file)
@@ -1166,7 +1166,7 @@ static int reiserfs_parse_options(struct super_block *s,
                        if (!strcmp(arg, "auto")) {
                                /* From JFS code, to auto-get the size. */
                                *blocks =
-                                   s->s_bdev->bd_inode->i_size >> s->
+                                   i_size_read(s->s_bdev->bd_inode) >> s->
                                    s_blocksize_bits;
                        } else {
                                *blocks = simple_strtoul(arg, &p, 0);
index ff4468bd18b02586394e8442abe2631b1fb548aa..95da653665485974cb8281fab13eb6515f6d857b 100644 (file)
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
        struct squashfs_lz4 *stream = strm;
        void *buff = stream->input, *data;
        int avail, i, bytes = length, res;
-       size_t dest_len = output->length;
 
        for (i = 0; i < b; i++) {
                avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
                put_bh(bh[i]);
        }
 
-       res = lz4_decompress_unknownoutputsize(stream->input, length,
-                                       stream->output, &dest_len);
-       if (res)
+       res = LZ4_decompress_safe(stream->input, stream->output,
+               length, output->length);
+
+       if (res < 0)
                return -EIO;
 
-       bytes = dest_len;
+       bytes = res;
        data = squashfs_first_page(output);
        buff = stream->output;
        while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
        }
        squashfs_finish_page(output);
 
-       return dest_len;
+       return res;
 }
 
 const struct squashfs_decompressor squashfs_lz4_comp_ops = {
index a268b7f27adf62fc8ffd5fddd0d7012d35bc5295..3f14d1ef086805ee56590d32f678c5099131ef5a 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -31,7 +31,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
        stat->atime = inode->i_atime;
        stat->mtime = inode->i_mtime;
        stat->ctime = inode->i_ctime;
-       stat->blksize = (1 << inode->i_blkbits);
+       stat->blksize = i_blocksize(inode);
        stat->blocks = inode->i_blocks;
 }
 
index b0d783774c963c97f32df7649feeb8fb7e4e3e4f..d9ae86f96df7b52255c259489b5343a7cd94faf8 100644 (file)
@@ -1506,11 +1506,10 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
  * mmap()d file has taken write protection fault and is being made writable.
  * UBIFS must ensure page is budgeted for.
  */
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
-                                struct vm_fault *vmf)
+static int ubifs_vm_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        struct ubifs_info *c = inode->i_sb->s_fs_info;
        struct timespec now = ubifs_current_time(inode);
        struct ubifs_budget_req req = { .new_page = 1 };
index 8ec6b3df0bc7f7dc47c3c690259df74023bb6841..a8d8f71ef8bdb577675284e9cb5f03d38f1d34f7 100644 (file)
@@ -1193,7 +1193,7 @@ int udf_setsize(struct inode *inode, loff_t newsize)
 {
        int err;
        struct udf_inode_info *iinfo;
-       int bsize = 1 << inode->i_blkbits;
+       int bsize = i_blocksize(inode);
 
        if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
              S_ISLNK(inode->i_mode)))
index 18406158e13fbf5e9b4e7041489f2d20e404c075..3c421d06a18e6ee1a7fde0d09030c9f7306cb989 100644 (file)
@@ -71,6 +71,13 @@ struct userfaultfd_fork_ctx {
        struct list_head list;
 };
 
+struct userfaultfd_unmap_ctx {
+       struct userfaultfd_ctx *ctx;
+       unsigned long start;
+       unsigned long end;
+       struct list_head list;
+};
+
 struct userfaultfd_wait_queue {
        struct uffd_msg msg;
        wait_queue_t wq;
@@ -681,16 +688,16 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
        userfaultfd_event_wait_completion(ctx, &ewq);
 }
 
-void madvise_userfault_dontneed(struct vm_area_struct *vma,
-                               struct vm_area_struct **prev,
-                               unsigned long start, unsigned long end)
+void userfaultfd_remove(struct vm_area_struct *vma,
+                       struct vm_area_struct **prev,
+                       unsigned long start, unsigned long end)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct userfaultfd_ctx *ctx;
        struct userfaultfd_wait_queue ewq;
 
        ctx = vma->vm_userfaultfd_ctx.ctx;
-       if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_MADVDONTNEED))
+       if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
                return;
 
        userfaultfd_ctx_get(ctx);
@@ -700,15 +707,101 @@ void madvise_userfault_dontneed(struct vm_area_struct *vma,
 
        msg_init(&ewq.msg);
 
-       ewq.msg.event = UFFD_EVENT_MADVDONTNEED;
-       ewq.msg.arg.madv_dn.start = start;
-       ewq.msg.arg.madv_dn.end = end;
+       ewq.msg.event = UFFD_EVENT_REMOVE;
+       ewq.msg.arg.remove.start = start;
+       ewq.msg.arg.remove.end = end;
 
        userfaultfd_event_wait_completion(ctx, &ewq);
 
        down_read(&mm->mmap_sem);
 }
 
+static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
+                         unsigned long start, unsigned long end)
+{
+       struct userfaultfd_unmap_ctx *unmap_ctx;
+
+       list_for_each_entry(unmap_ctx, unmaps, list)
+               if (unmap_ctx->ctx == ctx && unmap_ctx->start == start &&
+                   unmap_ctx->end == end)
+                       return true;
+
+       return false;
+}
+
+int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+                          unsigned long start, unsigned long end,
+                          struct list_head *unmaps)
+{
+       for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
+               struct userfaultfd_unmap_ctx *unmap_ctx;
+               struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+               if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+                   has_unmap_ctx(ctx, unmaps, start, end))
+                       continue;
+
+               unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+               if (!unmap_ctx)
+                       return -ENOMEM;
+
+               userfaultfd_ctx_get(ctx);
+               unmap_ctx->ctx = ctx;
+               unmap_ctx->start = start;
+               unmap_ctx->end = end;
+               list_add_tail(&unmap_ctx->list, unmaps);
+       }
+
+       return 0;
+}
+
+void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
+{
+       struct userfaultfd_unmap_ctx *ctx, *n;
+       struct userfaultfd_wait_queue ewq;
+
+       list_for_each_entry_safe(ctx, n, uf, list) {
+               msg_init(&ewq.msg);
+
+               ewq.msg.event = UFFD_EVENT_UNMAP;
+               ewq.msg.arg.remove.start = ctx->start;
+               ewq.msg.arg.remove.end = ctx->end;
+
+               userfaultfd_event_wait_completion(ctx->ctx, &ewq);
+
+               list_del(&ctx->list);
+               kfree(ctx);
+       }
+}
+
+void userfaultfd_exit(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma = mm->mmap;
+
+       /*
+        * We can do the vma walk without locking because the caller
+        * (exit_mm) knows it now has exclusive access
+        */
+       while (vma) {
+               struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+               if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
+                       struct userfaultfd_wait_queue ewq;
+
+                       userfaultfd_ctx_get(ctx);
+
+                       msg_init(&ewq.msg);
+                       ewq.msg.event = UFFD_EVENT_EXIT;
+
+                       userfaultfd_event_wait_completion(ctx, &ewq);
+
+                       ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
+               }
+
+               vma = vma->vm_next;
+       }
+}
+
 static int userfaultfd_release(struct inode *inode, struct file *file)
 {
        struct userfaultfd_ctx *ctx = file->private_data;
@@ -1514,6 +1607,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
                ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
                                   uffdio_copy.len);
                mmput(ctx->mm);
+       } else {
+               return -ENOSPC;
        }
        if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
                return -EFAULT;
@@ -1712,17 +1807,17 @@ static void init_once_userfaultfd_ctx(void *mem)
 }
 
 /**
- * userfaultfd_file_create - Creates an userfaultfd file pointer.
+ * userfaultfd_file_create - Creates a userfaultfd file pointer.
  * @flags: Flags for the userfaultfd file.
  *
- * This function creates an userfaultfd file pointer, w/out installing
+ * This function creates a userfaultfd file pointer, w/out installing
  * it into the fd table. This is useful when the userfaultfd file is
  * used during the initialization of data structures that require
  * extra setup after the userfaultfd creation. So the userfaultfd
  * creation is split into the file pointer creation phase, and the
  * file descriptor installation phase.  In this way races with
  * userspace closing the newly installed file descriptor can be
- * avoided.  Returns an userfaultfd file pointer, or a proper error
+ * avoided.  Returns a userfaultfd file pointer, or a proper error
  * pointer.
  */
 static struct file *userfaultfd_file_create(int flags)
@@ -1752,7 +1847,7 @@ static struct file *userfaultfd_file_create(int flags)
        ctx->released = false;
        ctx->mm = current->mm;
        /* prevent the mm struct to be freed */
-       atomic_inc(&ctx->mm->mm_count);
+       mmgrab(ctx->mm);
 
        file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
                                  O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
index 1ff9df7a3ce8620ef701a323bc89ddc1f447186f..bf65a9ea864293d48e5326178336680c2eb29758 100644 (file)
@@ -103,9 +103,9 @@ xfs_finish_page_writeback(
        unsigned int            bsize;
 
        ASSERT(bvec->bv_offset < PAGE_SIZE);
-       ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
+       ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
        ASSERT(end < PAGE_SIZE);
-       ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
+       ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
 
        bh = head = page_buffers(bvec->bv_page);
 
@@ -349,7 +349,7 @@ xfs_map_blocks(
 {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
-       ssize_t                 count = 1 << inode->i_blkbits;
+       ssize_t                 count = i_blocksize(inode);
        xfs_fileoff_t           offset_fsb, end_fsb;
        int                     error = 0;
        int                     bmapi_flags = XFS_BMAPI_ENTIRE;
@@ -758,7 +758,7 @@ xfs_aops_discard_page(
                        break;
                }
 next_buffer:
-               offset += 1 << inode->i_blkbits;
+               offset += i_blocksize(inode);
 
        } while ((bh = bh->b_this_page) != head);
 
@@ -846,7 +846,7 @@ xfs_writepage_map(
        LIST_HEAD(submit_list);
        struct xfs_ioend        *ioend, *next;
        struct buffer_head      *bh, *head;
-       ssize_t                 len = 1 << inode->i_blkbits;
+       ssize_t                 len = i_blocksize(inode);
        int                     error = 0;
        int                     count = 0;
        int                     uptodate = 1;
@@ -1210,7 +1210,7 @@ xfs_map_trim_size(
            offset + mapping_size >= i_size_read(inode)) {
                /* limit mapping to block that spans EOF */
                mapping_size = roundup_64(i_size_read(inode) - offset,
-                                         1 << inode->i_blkbits);
+                                         i_blocksize(inode));
        }
        if (mapping_size > LONG_MAX)
                mapping_size = LONG_MAX;
@@ -1241,7 +1241,7 @@ xfs_get_blocks(
                return -EIO;
 
        offset = (xfs_off_t)iblock << inode->i_blkbits;
-       ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+       ASSERT(bh_result->b_size >= i_blocksize(inode));
        size = bh_result->b_size;
 
        if (offset >= i_size_read(inode))
@@ -1389,7 +1389,7 @@ xfs_vm_set_page_dirty(
                        if (offset < end_offset)
                                set_buffer_dirty(bh);
                        bh = bh->b_this_page;
-                       offset += 1 << inode->i_blkbits;
+                       offset += i_blocksize(inode);
                } while (bh != head);
        }
        /*
index 022014016d806aabd95e81a4652a38576779dc3f..35703a80137208b81cb38ab91ce50e28d80a19f2 100644 (file)
@@ -754,7 +754,7 @@ xfs_file_fallocate(
                if (error)
                        goto out_unlock;
        } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
-               unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+               unsigned int blksize_mask = i_blocksize(inode) - 1;
 
                if (offset & blksize_mask || len & blksize_mask) {
                        error = -EINVAL;
@@ -776,7 +776,7 @@ xfs_file_fallocate(
                if (error)
                        goto out_unlock;
        } else if (mode & FALLOC_FL_INSERT_RANGE) {
-               unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+               unsigned int blksize_mask = i_blocksize(inode) - 1;
 
                new_size = i_size_read(inode) + len;
                if (offset & blksize_mask || len & blksize_mask) {
@@ -1379,22 +1379,21 @@ xfs_file_llseek(
  */
 STATIC int
 xfs_filemap_page_mkwrite(
-       struct vm_area_struct   *vma,
        struct vm_fault         *vmf)
 {
-       struct inode            *inode = file_inode(vma->vm_file);
+       struct inode            *inode = file_inode(vmf->vma->vm_file);
        int                     ret;
 
        trace_xfs_filemap_page_mkwrite(XFS_I(inode));
 
        sb_start_pagefault(inode->i_sb);
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
        if (IS_DAX(inode)) {
-               ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+               ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
        } else {
-               ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
+               ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
                ret = block_page_mkwrite_return(ret);
        }
 
@@ -1406,23 +1405,22 @@ xfs_filemap_page_mkwrite(
 
 STATIC int
 xfs_filemap_fault(
-       struct vm_area_struct   *vma,
        struct vm_fault         *vmf)
 {
-       struct inode            *inode = file_inode(vma->vm_file);
+       struct inode            *inode = file_inode(vmf->vma->vm_file);
        int                     ret;
 
        trace_xfs_filemap_fault(XFS_I(inode));
 
        /* DAX can shortcut the normal fault path on write faults! */
        if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
-               return xfs_filemap_page_mkwrite(vma, vmf);
+               return xfs_filemap_page_mkwrite(vmf);
 
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
        if (IS_DAX(inode))
-               ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+               ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
        else
-               ret = filemap_fault(vma, vmf);
+               ret = filemap_fault(vmf);
        xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
        return ret;
@@ -1431,13 +1429,14 @@ xfs_filemap_fault(
 /*
  * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
  * both read and write faults. Hence we need to handle both cases. There is no
- * ->pmd_mkwrite callout for huge pages, so we have a single function here to
+ * ->huge_mkwrite callout for huge pages, so we have a single function here to
  * handle both cases here. @flags carries the information on the type of fault
  * occuring.
  */
 STATIC int
-xfs_filemap_pmd_fault(
-       struct vm_fault         *vmf)
+xfs_filemap_huge_fault(
+       struct vm_fault         *vmf,
+       enum page_entry_size    pe_size)
 {
        struct inode            *inode = file_inode(vmf->vma->vm_file);
        struct xfs_inode        *ip = XFS_I(inode);
@@ -1446,7 +1445,7 @@ xfs_filemap_pmd_fault(
        if (!IS_DAX(inode))
                return VM_FAULT_FALLBACK;
 
-       trace_xfs_filemap_pmd_fault(ip);
+       trace_xfs_filemap_huge_fault(ip);
 
        if (vmf->flags & FAULT_FLAG_WRITE) {
                sb_start_pagefault(inode->i_sb);
@@ -1454,7 +1453,7 @@ xfs_filemap_pmd_fault(
        }
 
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-       ret = dax_iomap_pmd_fault(vmf, &xfs_iomap_ops);
+       ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
        xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
        if (vmf->flags & FAULT_FLAG_WRITE)
@@ -1471,11 +1470,10 @@ xfs_filemap_pmd_fault(
  */
 static int
 xfs_filemap_pfn_mkwrite(
-       struct vm_area_struct   *vma,
        struct vm_fault         *vmf)
 {
 
-       struct inode            *inode = file_inode(vma->vm_file);
+       struct inode            *inode = file_inode(vmf->vma->vm_file);
        struct xfs_inode        *ip = XFS_I(inode);
        int                     ret = VM_FAULT_NOPAGE;
        loff_t                  size;
@@ -1483,7 +1481,7 @@ xfs_filemap_pfn_mkwrite(
        trace_xfs_filemap_pfn_mkwrite(ip);
 
        sb_start_pagefault(inode->i_sb);
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
 
        /* check if the faulting page hasn't raced with truncate */
        xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
@@ -1491,7 +1489,7 @@ xfs_filemap_pfn_mkwrite(
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
        else if (IS_DAX(inode))
-               ret = dax_pfn_mkwrite(vma, vmf);
+               ret = dax_pfn_mkwrite(vmf);
        xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
        sb_end_pagefault(inode->i_sb);
        return ret;
@@ -1500,7 +1498,7 @@ xfs_filemap_pfn_mkwrite(
 
 static const struct vm_operations_struct xfs_file_vm_ops = {
        .fault          = xfs_filemap_fault,
-       .pmd_fault      = xfs_filemap_pmd_fault,
+       .huge_fault     = xfs_filemap_huge_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = xfs_filemap_page_mkwrite,
        .pfn_mkwrite    = xfs_filemap_pfn_mkwrite,
index fb7555e73a62ef586a13979d1296df5eb95cb905..383ac227ce2c324cee29b5e8a209fa6b98583192 100644 (file)
@@ -687,7 +687,7 @@ DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
 DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
 
 DEFINE_INODE_EVENT(xfs_filemap_fault);
-DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
+DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
 DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
 DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
 
index 9ed8b987185b45b1157993abf4b0fe5a6c0b23a8..3f38eb03649c93873c678677964425a1daf09c26 100644 (file)
@@ -223,6 +223,7 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)            (xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)    (cmpxchg(&((v)->counter), (old), (new)))
 
+#ifndef __atomic_add_unless
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
        int c, old;
@@ -231,5 +232,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
                c = old;
        return c;
 }
+#endif
 
 #endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h
new file mode 100644 (file)
index 0000000..57af9f2
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _ASM_GENERIC_KPROBES_H
+#define _ASM_GENERIC_KPROBES_H
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef CONFIG_KPROBES
+/*
+ * Blacklist ganerating macro. Specify functions which is not probed
+ * by using this macro.
+ */
+# define __NOKPROBE_SYMBOL(fname)                              \
+static unsigned long __used                                    \
+       __attribute__((__section__("_kprobe_blacklist")))       \
+       _kbl_addr_##fname = (unsigned long)fname;
+# define NOKPROBE_SYMBOL(fname)        __NOKPROBE_SYMBOL(fname)
+/* Use this to forbid a kprobes attach on very low level functions */
+# define __kprobes     __attribute__((__section__(".kprobes.text")))
+# define nokprobe_inline       __always_inline
+#else
+# define NOKPROBE_SYMBOL(fname)
+# define __kprobes
+# define nokprobe_inline       inline
+#endif
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+
+#endif /* _ASM_GENERIC_KPROBES_H */
index 18af2bcefe6a7cc564934479d8c0390cb77498fa..f4ca23b158b3b7aace85c4899385e980fd4c42b8 100644 (file)
@@ -36,6 +36,9 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma,
 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pmd_t *pmdp,
                                 pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+                                unsigned long address, pud_t *pudp,
+                                pud_t entry, int dirty);
 #else
 static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
                                        unsigned long address, pmd_t *pmdp,
@@ -44,6 +47,13 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
        BUILD_BUG();
        return 0;
 }
+static inline int pudp_set_access_flags(struct vm_area_struct *vma,
+                                       unsigned long address, pud_t *pudp,
+                                       pud_t entry, int dirty)
+{
+       BUILD_BUG();
+       return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
@@ -121,8 +131,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 }
 #endif
 
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
                                            unsigned long address,
                                            pmd_t *pmdp)
@@ -131,20 +141,40 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
        pmd_clear(pmdp);
        return pmd;
 }
+#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+                                           unsigned long address,
+                                           pud_t *pudp)
+{
+       pud_t pud = *pudp;
+
+       pud_clear(pudp);
+       return pud;
+}
+#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#endif
 
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
                                            unsigned long address, pmd_t *pmdp,
                                            int full)
 {
        return pmdp_huge_get_and_clear(mm, address, pmdp);
 }
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+                                           unsigned long address, pud_t *pudp,
+                                           int full)
+{
+       return pudp_huge_get_and_clear(mm, address, pudp);
+}
+#endif
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
                                            unsigned long address, pte_t *ptep,
@@ -181,6 +211,9 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
 extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
                              unsigned long address,
                              pmd_t *pmdp);
+extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
+                             unsigned long address,
+                             pud_t *pudp);
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -192,6 +225,30 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 }
 #endif
 
+#ifndef pte_savedwrite
+#define pte_savedwrite pte_write
+#endif
+
+#ifndef pte_mk_savedwrite
+#define pte_mk_savedwrite pte_mkwrite
+#endif
+
+#ifndef pte_clear_savedwrite
+#define pte_clear_savedwrite pte_wrprotect
+#endif
+
+#ifndef pmd_savedwrite
+#define pmd_savedwrite pmd_write
+#endif
+
+#ifndef pmd_mk_savedwrite
+#define pmd_mk_savedwrite pmd_mkwrite
+#endif
+
+#ifndef pmd_clear_savedwrite
+#define pmd_clear_savedwrite pmd_wrprotect
+#endif
+
 #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
@@ -208,6 +265,23 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
+#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+                                     unsigned long address, pud_t *pudp)
+{
+       pud_t old_pud = *pudp;
+
+       set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
+}
+#else
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+                                     unsigned long address, pud_t *pudp)
+{
+       BUILD_BUG();
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+#endif
 
 #ifndef pmdp_collapse_flush
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -273,12 +347,23 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 {
        return pmd_val(pmd_a) == pmd_val(pmd_b);
 }
+
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+       return pud_val(pud_a) == pud_val(pud_b);
+}
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 {
        BUILD_BUG();
        return 0;
 }
+
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+       BUILD_BUG();
+       return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
@@ -640,6 +725,15 @@ static inline int pmd_write(pmd_t pmd)
 #endif /* __HAVE_ARCH_PMD_WRITE */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
+       (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+        !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+static inline int pud_trans_huge(pud_t pud)
+{
+       return 0;
+}
+#endif
+
 #ifndef pmd_read_atomic
 static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 {
@@ -785,8 +879,10 @@ static inline int pmd_clear_huge(pmd_t *pmd)
  * e.g. see arch/arc: flush_pmd_tlb_range
  */
 #define flush_pmd_tlb_range(vma, addr, end)    flush_tlb_range(vma, addr, end)
+#define flush_pud_tlb_range(vma, addr, end)    flush_tlb_range(vma, addr, end)
 #else
 #define flush_pmd_tlb_range(vma, addr, end)    BUILD_BUG()
+#define flush_pud_tlb_range(vma, addr, end)    BUILD_BUG()
 #endif
 #endif
 
index 7eed8cf3130ada017b658649f543aba980085da1..4329bc6ef04b7b555337dc2f558ff7d7321668c4 100644 (file)
@@ -232,6 +232,20 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
                __tlb_remove_pmd_tlb_entry(tlb, pmdp, address);         \
        } while (0)
 
+/**
+ * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
+ * invalidation. This is a nop so far, because only x86 needs it.
+ */
+#ifndef __tlb_remove_pud_tlb_entry
+#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
+#endif
+
+#define tlb_remove_pud_tlb_entry(tlb, pudp, address)                   \
+       do {                                                            \
+               __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);       \
+               __tlb_remove_pud_tlb_entry(tlb, pudp, address);         \
+       } while (0)
+
 /*
  * For things like page tables caches (ie caching addresses "inside" the
  * page tables, like x86 does), for legacy reasons, flushing an
index 360e00cefd35679b49890234b5c369fb52b89e20..a0c812b0fa391d149b4f546db39bdc4bef207960 100644 (file)
@@ -64,3 +64,5 @@
 #define BCM2835_CLOCK_CAM1             46
 #define BCM2835_CLOCK_DSI0E            47
 #define BCM2835_CLOCK_DSI1E            48
+#define BCM2835_CLOCK_DSI0P            49
+#define BCM2835_CLOCK_DSI1P            50
diff --git a/include/dt-bindings/clock/exynos4415.h b/include/dt-bindings/clock/exynos4415.h
deleted file mode 100644 (file)
index 7eed551..0000000
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright (c) 2014 Samsung Electronics Co., Ltd.
- * Author: Chanwoo Choi <cw00.choi@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Device Tree binding constants for Samsung Exynos4415 clock controllers.
- */
-
-#ifndef _DT_BINDINGS_CLOCK_SAMSUNG_EXYNOS4415_CLOCK_H
-#define _DT_BINDINGS_CLOCK_SAMSUNG_EXYNOS4415_CLOCK_H
-
-/*
- * Let each exported clock get a unique index, which is used on DT-enabled
- * platforms to lookup the clock from a clock specifier. These indices are
- * therefore considered an ABI and so must not be changed. This implies
- * that new clocks should be added either in free spaces between clock groups
- * or at the end.
- */
-
-/*
- * Main CMU
- */
-
-#define CLK_OSCSEL                     1
-#define CLK_FIN_PLL                    2
-#define CLK_FOUT_APLL                  3
-#define CLK_FOUT_MPLL                  4
-#define CLK_FOUT_EPLL                  5
-#define CLK_FOUT_G3D_PLL               6
-#define CLK_FOUT_ISP_PLL               7
-#define CLK_FOUT_DISP_PLL              8
-
-/* Muxes */
-#define CLK_MOUT_MPLL_USER_L           16
-#define CLK_MOUT_GDL                   17
-#define CLK_MOUT_MPLL_USER_R           18
-#define CLK_MOUT_GDR                   19
-#define CLK_MOUT_EBI                   20
-#define CLK_MOUT_ACLK_200              21
-#define CLK_MOUT_ACLK_160              22
-#define CLK_MOUT_ACLK_100              23
-#define CLK_MOUT_ACLK_266              24
-#define CLK_MOUT_G3D_PLL               25
-#define CLK_MOUT_EPLL                  26
-#define CLK_MOUT_EBI_1                 27
-#define CLK_MOUT_ISP_PLL               28
-#define CLK_MOUT_DISP_PLL              29
-#define CLK_MOUT_MPLL_USER_T           30
-#define CLK_MOUT_ACLK_400_MCUISP       31
-#define CLK_MOUT_G3D_PLLSRC            32
-#define CLK_MOUT_CSIS1                 33
-#define CLK_MOUT_CSIS0                 34
-#define CLK_MOUT_CAM1                  35
-#define CLK_MOUT_FIMC3_LCLK            36
-#define CLK_MOUT_FIMC2_LCLK            37
-#define CLK_MOUT_FIMC1_LCLK            38
-#define CLK_MOUT_FIMC0_LCLK            39
-#define CLK_MOUT_MFC                   40
-#define CLK_MOUT_MFC_1                 41
-#define CLK_MOUT_MFC_0                 42
-#define CLK_MOUT_G3D                   43
-#define CLK_MOUT_G3D_1                 44
-#define CLK_MOUT_G3D_0                 45
-#define CLK_MOUT_MIPI0                 46
-#define CLK_MOUT_FIMD0                 47
-#define CLK_MOUT_TSADC_ISP             48
-#define CLK_MOUT_UART_ISP              49
-#define CLK_MOUT_SPI1_ISP              50
-#define CLK_MOUT_SPI0_ISP              51
-#define CLK_MOUT_PWM_ISP               52
-#define CLK_MOUT_AUDIO0                        53
-#define CLK_MOUT_TSADC                 54
-#define CLK_MOUT_MMC2                  55
-#define CLK_MOUT_MMC1                  56
-#define CLK_MOUT_MMC0                  57
-#define CLK_MOUT_UART3                 58
-#define CLK_MOUT_UART2                 59
-#define CLK_MOUT_UART1                 60
-#define CLK_MOUT_UART0                 61
-#define CLK_MOUT_SPI2                  62
-#define CLK_MOUT_SPI1                  63
-#define CLK_MOUT_SPI0                  64
-#define CLK_MOUT_SPDIF                 65
-#define CLK_MOUT_AUDIO2                        66
-#define CLK_MOUT_AUDIO1                        67
-#define CLK_MOUT_MPLL_USER_C           68
-#define CLK_MOUT_HPM                   69
-#define CLK_MOUT_CORE                  70
-#define CLK_MOUT_APLL                  71
-#define CLK_MOUT_PXLASYNC_CSIS1_FIMC   72
-#define CLK_MOUT_PXLASYNC_CSIS0_FIMC   73
-#define CLK_MOUT_JPEG                  74
-#define CLK_MOUT_JPEG1                 75
-#define CLK_MOUT_JPEG0                 76
-#define CLK_MOUT_ACLK_ISP0_300         77
-#define CLK_MOUT_ACLK_ISP0_400         78
-#define CLK_MOUT_ACLK_ISP0_300_USER    79
-#define CLK_MOUT_ACLK_ISP1_300         80
-#define CLK_MOUT_ACLK_ISP1_300_USER    81
-#define CLK_MOUT_HDMI                  82
-
-/* Dividers */
-#define CLK_DIV_GPL                    90
-#define CLK_DIV_GDL                    91
-#define CLK_DIV_GPR                    92
-#define CLK_DIV_GDR                    93
-#define CLK_DIV_ACLK_400_MCUISP                94
-#define CLK_DIV_EBI                    95
-#define CLK_DIV_ACLK_200               96
-#define CLK_DIV_ACLK_160               97
-#define CLK_DIV_ACLK_100               98
-#define CLK_DIV_ACLK_266               99
-#define CLK_DIV_CSIS1                  100
-#define CLK_DIV_CSIS0                  101
-#define CLK_DIV_CAM1                   102
-#define CLK_DIV_FIMC3_LCLK             103
-#define CLK_DIV_FIMC2_LCLK             104
-#define CLK_DIV_FIMC1_LCLK             105
-#define CLK_DIV_FIMC0_LCLK             106
-#define CLK_DIV_TV_BLK                 107
-#define CLK_DIV_MFC                    108
-#define CLK_DIV_G3D                    109
-#define CLK_DIV_MIPI0_PRE              110
-#define CLK_DIV_MIPI0                  111
-#define CLK_DIV_FIMD0                  112
-#define CLK_DIV_UART_ISP               113
-#define CLK_DIV_SPI1_ISP_PRE           114
-#define CLK_DIV_SPI1_ISP               115
-#define CLK_DIV_SPI0_ISP_PRE           116
-#define CLK_DIV_SPI0_ISP               117
-#define CLK_DIV_PWM_ISP                        118
-#define CLK_DIV_PCM0                   119
-#define CLK_DIV_AUDIO0                 120
-#define CLK_DIV_TSADC_PRE              121
-#define CLK_DIV_TSADC                  122
-#define CLK_DIV_MMC1_PRE               123
-#define CLK_DIV_MMC1                   124
-#define CLK_DIV_MMC0_PRE               125
-#define CLK_DIV_MMC0                   126
-#define CLK_DIV_MMC2_PRE               127
-#define CLK_DIV_MMC2                   128
-#define CLK_DIV_UART3                  129
-#define CLK_DIV_UART2                  130
-#define CLK_DIV_UART1                  131
-#define CLK_DIV_UART0                  132
-#define CLK_DIV_SPI1_PRE               133
-#define CLK_DIV_SPI1                   134
-#define CLK_DIV_SPI0_PRE               135
-#define CLK_DIV_SPI0                   136
-#define CLK_DIV_SPI2_PRE               137
-#define CLK_DIV_SPI2                   138
-#define CLK_DIV_PCM2                   139
-#define CLK_DIV_AUDIO2                 140
-#define CLK_DIV_PCM1                   141
-#define CLK_DIV_AUDIO1                 142
-#define CLK_DIV_I2S1                   143
-#define CLK_DIV_PXLASYNC_CSIS1_FIMC    144
-#define CLK_DIV_PXLASYNC_CSIS0_FIMC    145
-#define CLK_DIV_JPEG                   146
-#define CLK_DIV_CORE2                  147
-#define CLK_DIV_APLL                   148
-#define CLK_DIV_PCLK_DBG               149
-#define CLK_DIV_ATB                    150
-#define CLK_DIV_PERIPH                 151
-#define CLK_DIV_COREM1                 152
-#define CLK_DIV_COREM0                 153
-#define CLK_DIV_CORE                   154
-#define CLK_DIV_HPM                    155
-#define CLK_DIV_COPY                   156
-
-/* Gates */
-#define CLK_ASYNC_G3D                  180
-#define CLK_ASYNC_MFCL                 181
-#define CLK_ASYNC_TVX                  182
-#define CLK_PPMULEFT                   183
-#define CLK_GPIO_LEFT                  184
-#define CLK_PPMUIMAGE                  185
-#define CLK_QEMDMA2                    186
-#define CLK_QEROTATOR                  187
-#define CLK_SMMUMDMA2                  188
-#define CLK_SMMUROTATOR                        189
-#define CLK_MDMA2                      190
-#define CLK_ROTATOR                    191
-#define CLK_ASYNC_ISPMX                        192
-#define CLK_ASYNC_MAUDIOX              193
-#define CLK_ASYNC_MFCR                 194
-#define CLK_ASYNC_FSYSD                        195
-#define CLK_ASYNC_LCD0X                        196
-#define CLK_ASYNC_CAMX                 197
-#define CLK_PPMURIGHT                  198
-#define CLK_GPIO_RIGHT                 199
-#define CLK_ANTIRBK_APBIF              200
-#define CLK_EFUSE_WRITER_APBIF         201
-#define CLK_MONOCNT                    202
-#define CLK_TZPC6                      203
-#define CLK_PROVISIONKEY1              204
-#define CLK_PROVISIONKEY0              205
-#define CLK_CMU_ISPPART                        206
-#define CLK_TMU_APBIF                  207
-#define CLK_KEYIF                      208
-#define CLK_RTC                                209
-#define CLK_WDT                                210
-#define CLK_MCT                                211
-#define CLK_SECKEY                     212
-#define CLK_HDMI_CEC                   213
-#define CLK_TZPC5                      214
-#define CLK_TZPC4                      215
-#define CLK_TZPC3                      216
-#define CLK_TZPC2                      217
-#define CLK_TZPC1                      218
-#define CLK_TZPC0                      219
-#define CLK_CMU_COREPART               220
-#define CLK_CMU_TOPPART                        221
-#define CLK_PMU_APBIF                  222
-#define CLK_SYSREG                     223
-#define CLK_CHIP_ID                    224
-#define CLK_SMMUFIMC_LITE2             225
-#define CLK_FIMC_LITE2                 226
-#define CLK_PIXELASYNCM1               227
-#define CLK_PIXELASYNCM0               228
-#define CLK_PPMUCAMIF                  229
-#define CLK_SMMUJPEG                   230
-#define CLK_SMMUFIMC3                  231
-#define CLK_SMMUFIMC2                  232
-#define CLK_SMMUFIMC1                  233
-#define CLK_SMMUFIMC0                  234
-#define CLK_JPEG                       235
-#define CLK_CSIS1                      236
-#define CLK_CSIS0                      237
-#define CLK_FIMC3                      238
-#define CLK_FIMC2                      239
-#define CLK_FIMC1                      240
-#define CLK_FIMC0                      241
-#define CLK_PPMUTV                     242
-#define CLK_SMMUTV                     243
-#define CLK_HDMI                       244
-#define CLK_MIXER                      245
-#define CLK_VP                         246
-#define CLK_PPMUMFC_R                  247
-#define CLK_PPMUMFC_L                  248
-#define CLK_SMMUMFC_R                  249
-#define CLK_SMMUMFC_L                  250
-#define CLK_MFC                                251
-#define CLK_PPMUG3D                    252
-#define CLK_G3D                                253
-#define CLK_PPMULCD0                   254
-#define CLK_SMMUFIMD0                  255
-#define CLK_DSIM0                      256
-#define CLK_SMIES                      257
-#define CLK_MIE0                       258
-#define CLK_FIMD0                      259
-#define CLK_TSADC                      260
-#define CLK_PPMUFILE                   261
-#define CLK_NFCON                      262
-#define CLK_USBDEVICE                  263
-#define CLK_USBHOST                    264
-#define CLK_SROMC                      265
-#define CLK_SDMMC2                     266
-#define CLK_SDMMC1                     267
-#define CLK_SDMMC0                     268
-#define CLK_PDMA1                      269
-#define CLK_PDMA0                      270
-#define CLK_SPDIF                      271
-#define CLK_PWM                                272
-#define CLK_PCM2                       273
-#define CLK_PCM1                       274
-#define CLK_I2S1                       275
-#define CLK_SPI2                       276
-#define CLK_SPI1                       277
-#define CLK_SPI0                       278
-#define CLK_I2CHDMI                    279
-#define CLK_I2C7                       280
-#define CLK_I2C6                       281
-#define CLK_I2C5                       282
-#define CLK_I2C4                       283
-#define CLK_I2C3                       284
-#define CLK_I2C2                       285
-#define CLK_I2C1                       286
-#define CLK_I2C0                       287
-#define CLK_UART3                      288
-#define CLK_UART2                      289
-#define CLK_UART1                      290
-#define CLK_UART0                      291
-
-/* Special clocks */
-#define CLK_SCLK_PXLAYSNC_CSIS1_FIMC   330
-#define CLK_SCLK_PXLAYSNC_CSIS0_FIMC   331
-#define CLK_SCLK_JPEG                  332
-#define CLK_SCLK_CSIS1                 333
-#define CLK_SCLK_CSIS0                 334
-#define CLK_SCLK_CAM1                  335
-#define CLK_SCLK_FIMC3_LCLK            336
-#define CLK_SCLK_FIMC2_LCLK            337
-#define CLK_SCLK_FIMC1_LCLK            338
-#define CLK_SCLK_FIMC0_LCLK            339
-#define CLK_SCLK_PIXEL                 340
-#define CLK_SCLK_HDMI                  341
-#define CLK_SCLK_MIXER                 342
-#define CLK_SCLK_MFC                   343
-#define CLK_SCLK_G3D                   344
-#define CLK_SCLK_MIPIDPHY4L            345
-#define CLK_SCLK_MIPI0                 346
-#define CLK_SCLK_MDNIE0                        347
-#define CLK_SCLK_FIMD0                 348
-#define CLK_SCLK_PCM0                  349
-#define CLK_SCLK_AUDIO0                        350
-#define CLK_SCLK_TSADC                 351
-#define CLK_SCLK_EBI                   352
-#define CLK_SCLK_MMC2                  353
-#define CLK_SCLK_MMC1                  354
-#define CLK_SCLK_MMC0                  355
-#define CLK_SCLK_I2S                   356
-#define CLK_SCLK_PCM2                  357
-#define CLK_SCLK_PCM1                  358
-#define CLK_SCLK_AUDIO2                        359
-#define CLK_SCLK_AUDIO1                        360
-#define CLK_SCLK_SPDIF                 361
-#define CLK_SCLK_SPI2                  362
-#define CLK_SCLK_SPI1                  363
-#define CLK_SCLK_SPI0                  364
-#define CLK_SCLK_UART3                 365
-#define CLK_SCLK_UART2                 366
-#define CLK_SCLK_UART1                 367
-#define CLK_SCLK_UART0                 368
-#define CLK_SCLK_HDMIPHY               369
-
-/*
- * Total number of clocks of main CMU.
- * NOTE: Must be equal to last clock ID increased by one.
- */
-#define CLK_NR_CLKS                    370
-
-/*
- * CMU DMC
- */
-#define CLK_DMC_FOUT_MPLL              1
-#define CLK_DMC_FOUT_BPLL              2
-
-#define CLK_DMC_MOUT_MPLL              3
-#define CLK_DMC_MOUT_BPLL              4
-#define CLK_DMC_MOUT_DPHY              5
-#define CLK_DMC_MOUT_DMC_BUS           6
-
-#define CLK_DMC_DIV_DMC                        7
-#define CLK_DMC_DIV_DPHY               8
-#define CLK_DMC_DIV_DMC_PRE            9
-#define CLK_DMC_DIV_DMCP               10
-#define CLK_DMC_DIV_DMCD               11
-#define CLK_DMC_DIV_MPLL_PRE           12
-
-/*
- * Total number of clocks of CMU_DMC.
- * NOTE: Must be equal to highest clock ID increased by one.
- */
-#define NR_CLKS_DMC                    13
-
-#endif /* _DT_BINDINGS_CLOCK_SAMSUNG_EXYNOS4415_CLOCK_H */
index 4fa6bb2136e373205d47e592eb441db003c4d3df..be39d23e6a32ecf3e4fa24343ea518352db75f8e 100644 (file)
 
 #define CLK_PCLK_DECON                                 113
 
-#define DISP_NR_CLK                                    114
+#define CLK_PHYCLK_MIPIDPHY0_BITCLKDIV8_PHY            114
+#define CLK_PHYCLK_MIPIDPHY0_RXCLKESC0_PHY             115
+
+#define DISP_NR_CLK                                    116
 
 /* CMU_AUD */
 #define CLK_MOUT_AUD_PLL_USER                          1
diff --git a/include/dt-bindings/clock/hi3660-clock.h b/include/dt-bindings/clock/hi3660-clock.h
new file mode 100644 (file)
index 0000000..1c00b7f
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016-2017 Linaro Ltd.
+ * Copyright (c) 2016-2017 HiSilicon Technologies Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __DTS_HI3660_CLOCK_H
+#define __DTS_HI3660_CLOCK_H
+
+/* fixed rate clocks */
+#define HI3660_CLKIN_SYS               0
+#define HI3660_CLKIN_REF               1
+#define HI3660_CLK_FLL_SRC             2
+#define HI3660_CLK_PPLL0               3
+#define HI3660_CLK_PPLL1               4
+#define HI3660_CLK_PPLL2               5
+#define HI3660_CLK_PPLL3               6
+#define HI3660_CLK_SCPLL               7
+#define HI3660_PCLK                    8
+#define HI3660_CLK_UART0_DBG           9
+#define HI3660_CLK_UART6               10
+#define HI3660_OSC32K                  11
+#define HI3660_OSC19M                  12
+#define HI3660_CLK_480M                        13
+#define HI3660_CLK_INV                 14
+
+/* clk in crgctrl */
+#define HI3660_FACTOR_UART3            15
+#define HI3660_CLK_FACTOR_MMC          16
+#define HI3660_CLK_GATE_I2C0           17
+#define HI3660_CLK_GATE_I2C1           18
+#define HI3660_CLK_GATE_I2C2           19
+#define HI3660_CLK_GATE_I2C6           20
+#define HI3660_CLK_DIV_SYSBUS          21
+#define HI3660_CLK_DIV_320M            22
+#define HI3660_CLK_DIV_A53             23
+#define HI3660_CLK_GATE_SPI0           24
+#define HI3660_CLK_GATE_SPI2           25
+#define HI3660_PCIEPHY_REF             26
+#define HI3660_CLK_ABB_USB             27
+#define HI3660_HCLK_GATE_SDIO0         28
+#define HI3660_HCLK_GATE_SD            29
+#define HI3660_CLK_GATE_AOMM           30
+#define HI3660_PCLK_GPIO0              31
+#define HI3660_PCLK_GPIO1              32
+#define HI3660_PCLK_GPIO2              33
+#define HI3660_PCLK_GPIO3              34
+#define HI3660_PCLK_GPIO4              35
+#define HI3660_PCLK_GPIO5              36
+#define HI3660_PCLK_GPIO6              37
+#define HI3660_PCLK_GPIO7              38
+#define HI3660_PCLK_GPIO8              39
+#define HI3660_PCLK_GPIO9              40
+#define HI3660_PCLK_GPIO10             41
+#define HI3660_PCLK_GPIO11             42
+#define HI3660_PCLK_GPIO12             43
+#define HI3660_PCLK_GPIO13             44
+#define HI3660_PCLK_GPIO14             45
+#define HI3660_PCLK_GPIO15             46
+#define HI3660_PCLK_GPIO16             47
+#define HI3660_PCLK_GPIO17             48
+#define HI3660_PCLK_GPIO18             49
+#define HI3660_PCLK_GPIO19             50
+#define HI3660_PCLK_GPIO20             51
+#define HI3660_PCLK_GPIO21             52
+#define HI3660_CLK_GATE_SPI3           53
+#define HI3660_CLK_GATE_I2C7           54
+#define HI3660_CLK_GATE_I2C3           55
+#define HI3660_CLK_GATE_SPI1           56
+#define HI3660_CLK_GATE_UART1          57
+#define HI3660_CLK_GATE_UART2          58
+#define HI3660_CLK_GATE_UART4          59
+#define HI3660_CLK_GATE_UART5          60
+#define HI3660_CLK_GATE_I2C4           61
+#define HI3660_CLK_GATE_DMAC           62
+#define HI3660_PCLK_GATE_DSS           63
+#define HI3660_ACLK_GATE_DSS           64
+#define HI3660_CLK_GATE_LDI1           65
+#define HI3660_CLK_GATE_LDI0           66
+#define HI3660_CLK_GATE_VIVOBUS                67
+#define HI3660_CLK_GATE_EDC0           68
+#define HI3660_CLK_GATE_TXDPHY0_CFG    69
+#define HI3660_CLK_GATE_TXDPHY0_REF    70
+#define HI3660_CLK_GATE_TXDPHY1_CFG    71
+#define HI3660_CLK_GATE_TXDPHY1_REF    72
+#define HI3660_ACLK_GATE_USB3OTG       73
+#define HI3660_CLK_GATE_SPI4           74
+#define HI3660_CLK_GATE_SD             75
+#define HI3660_CLK_GATE_SDIO0          76
+#define HI3660_CLK_GATE_UFS_SUBSYS     77
+#define HI3660_PCLK_GATE_DSI0          78
+#define HI3660_PCLK_GATE_DSI1          79
+#define HI3660_ACLK_GATE_PCIE          80
+#define HI3660_PCLK_GATE_PCIE_SYS       81
+#define HI3660_CLK_GATE_PCIEAUX                82
+#define HI3660_PCLK_GATE_PCIE_PHY      83
+#define HI3660_CLK_ANDGT_LDI0          84
+#define HI3660_CLK_ANDGT_LDI1          85
+#define HI3660_CLK_ANDGT_EDC0          86
+#define HI3660_CLK_GATE_UFSPHY_GT      87
+#define HI3660_CLK_ANDGT_MMC           88
+#define HI3660_CLK_ANDGT_SD            89
+#define HI3660_CLK_A53HPM_ANDGT                90
+#define HI3660_CLK_ANDGT_SDIO          91
+#define HI3660_CLK_ANDGT_UART0         92
+#define HI3660_CLK_ANDGT_UART1         93
+#define HI3660_CLK_ANDGT_UARTH         94
+#define HI3660_CLK_ANDGT_SPI           95
+#define HI3660_CLK_VIVOBUS_ANDGT       96
+#define HI3660_CLK_AOMM_ANDGT          97
+#define HI3660_CLK_320M_PLL_GT         98
+#define HI3660_AUTODIV_EMMC0BUS                99
+#define HI3660_AUTODIV_SYSBUS          100
+#define HI3660_CLK_GATE_UFSPHY_CFG     101
+#define HI3660_CLK_GATE_UFSIO_REF      102
+#define HI3660_CLK_MUX_SYSBUS          103
+#define HI3660_CLK_MUX_UART0           104
+#define HI3660_CLK_MUX_UART1           105
+#define HI3660_CLK_MUX_UARTH           106
+#define HI3660_CLK_MUX_SPI             107
+#define HI3660_CLK_MUX_I2C             108
+#define HI3660_CLK_MUX_MMC_PLL         109
+#define HI3660_CLK_MUX_LDI1            110
+#define HI3660_CLK_MUX_LDI0            111
+#define HI3660_CLK_MUX_SD_PLL          112
+#define HI3660_CLK_MUX_SD_SYS          113
+#define HI3660_CLK_MUX_EDC0            114
+#define HI3660_CLK_MUX_SDIO_SYS                115
+#define HI3660_CLK_MUX_SDIO_PLL                116
+#define HI3660_CLK_MUX_VIVOBUS         117
+#define HI3660_CLK_MUX_A53HPM          118
+#define HI3660_CLK_MUX_320M            119
+#define HI3660_CLK_MUX_IOPERI          120
+#define HI3660_CLK_DIV_UART0           121
+#define HI3660_CLK_DIV_UART1           122
+#define HI3660_CLK_DIV_UARTH           123
+#define HI3660_CLK_DIV_MMC             124
+#define HI3660_CLK_DIV_SD              125
+#define HI3660_CLK_DIV_EDC0            126
+#define HI3660_CLK_DIV_LDI0            127
+#define HI3660_CLK_DIV_SDIO            128
+#define HI3660_CLK_DIV_LDI1            129
+#define HI3660_CLK_DIV_SPI             130
+#define HI3660_CLK_DIV_VIVOBUS         131
+#define HI3660_CLK_DIV_I2C             132
+#define HI3660_CLK_DIV_UFSPHY          133
+#define HI3660_CLK_DIV_CFGBUS          134
+#define HI3660_CLK_DIV_MMC0BUS         135
+#define HI3660_CLK_DIV_MMC1BUS         136
+#define HI3660_CLK_DIV_UFSPERI         137
+#define HI3660_CLK_DIV_AOMM            138
+#define HI3660_CLK_DIV_IOPERI          139
+
+/* clk in pmuctrl */
+#define HI3660_GATE_ABB_192            0
+
+/* clk in pctrl */
+#define HI3660_GATE_UFS_TCXO_EN                0
+#define HI3660_GATE_USB_TCXO_EN                1
+
+/* clk in sctrl */
+#define HI3660_PCLK_AO_GPIO0           0
+#define HI3660_PCLK_AO_GPIO1           1
+#define HI3660_PCLK_AO_GPIO2           2
+#define HI3660_PCLK_AO_GPIO3           3
+#define HI3660_PCLK_AO_GPIO4           4
+#define HI3660_PCLK_AO_GPIO5           5
+#define HI3660_PCLK_AO_GPIO6           6
+#define HI3660_PCLK_GATE_MMBUF         7
+#define HI3660_CLK_GATE_DSS_AXI_MM     8
+#define HI3660_PCLK_MMBUF_ANDGT                9
+#define HI3660_CLK_MMBUF_PLL_ANDGT     10
+#define HI3660_CLK_FLL_MMBUF_ANDGT     11
+#define HI3660_CLK_SYS_MMBUF_ANDGT     12
+#define HI3660_CLK_GATE_PCIEPHY_GT     13
+#define HI3660_ACLK_MUX_MMBUF          14
+#define HI3660_CLK_SW_MMBUF            15
+#define HI3660_CLK_DIV_AOBUS           16
+#define HI3660_PCLK_DIV_MMBUF          17
+#define HI3660_ACLK_DIV_MMBUF          18
+#define HI3660_CLK_DIV_PCIEPHY         19
+
+/* clk in iomcu */
+#define HI3660_CLK_I2C0_IOMCU          0
+#define HI3660_CLK_I2C1_IOMCU          1
+#define HI3660_CLK_I2C2_IOMCU          2
+#define HI3660_CLK_I2C6_IOMCU          3
+#define HI3660_CLK_IOMCU_PERI0         4
+
+#endif /* __DTS_HI3660_CLOCK_H */
index 1183347c383fe10d28f6c9171021283a2fefc4b8..a7a1a50f33efe98b00f6f818d2cca609019673bc 100644 (file)
 #define IMX7D_ADC_ROOT_CLK             436
 #define IMX7D_CLK_ARM                  437
 #define IMX7D_CKIL                     438
-#define IMX7D_CLK_END                  439
+#define IMX7D_OCOTP_CLK                        439
+#define IMX7D_CLK_END                  440
 #endif /* __DT_BINDINGS_CLOCK_IMX7D_H */
index 6240e5b0e9005e68dc034d1c82120b569adbb684..7e8a7be6dcdaa90051155131f93e119bdaa3f71a 100644 (file)
 #define GCC_WCSS5G_CLK                                 62
 #define GCC_WCSS5G_REF_CLK                             63
 #define GCC_WCSS5G_RTC_CLK                             64
+#define GCC_APSS_DDRPLL_VCO                            65
+#define GCC_SDCC_PLLDIV_CLK                            66
+#define GCC_FEPLL_VCO                                  67
+#define GCC_FEPLL125_CLK                               68
+#define GCC_FEPLL125DLY_CLK                            69
+#define GCC_FEPLL200_CLK                               70
+#define GCC_FEPLL500_CLK                               71
+#define GCC_FEPLL_WCSS2G_CLK                           72
+#define GCC_FEPLL_WCSS5G_CLK                           73
+#define GCC_APSS_CPU_PLLDIV_CLK                                74
+#define GCC_PCNOC_AHB_CLK_SRC                          75
 
 #define WIFI0_CPU_INIT_RESET                           0
 #define WIFI0_RADIO_SRIF_RESET                         1
index 9ab2c4087120c6cf8b20f21bc7dbd74a2137407b..787e448958bd492b811871931df38e08b8bf8c2c 100644 (file)
 #define CE3_H_CLK                              305
 #define USB_HS1_SYSTEM_CLK_SRC                 306
 #define USB_HS1_SYSTEM_CLK                     307
+#define EBI2_CLK                               308
+#define EBI2_AON_CLK                           309
 
 #endif
index 8fa535be2ebc1fe4839d4d0389ec13becd1c6e24..df47da0860f74f33e4f114a28e2f7c9b2142c8d4 100644 (file)
 #define GCC_USB30_MOCK_UTMI_CLK                        115
 #define GCC_USB3_PHY_AUX_CLK                   116
 #define GCC_USB_HS_SYSTEM_CLK                  117
+#define GCC_SDCC1_AHB_CLK                      118
 
 #endif
index 1828723eb621225d106622515be190f73540bfea..1f5c42254798e34ac6d6ea56a049f3e0a13dfc49 100644 (file)
 #define GCC_PCIE_PHY_COM_NOCSR_BCR                             102
 #define GCC_USB3_PHY_BCR                                       103
 #define GCC_USB3PHY_PHY_BCR                                    104
+#define GCC_MSS_RESTART                                                105
 
 
 /* Indexes for GDSCs */
index 5924cdb7133621f0e4a4427c1208b8cc6fbc0d80..96b63c00249eeb201fef1f994905d586b5608c72 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef _DT_BINDINGS_CLK_MSM_RPMCC_H
 #define _DT_BINDINGS_CLK_MSM_RPMCC_H
 
-/* apq8064 */
+/* RPM clocks */
 #define RPM_PXO_CLK                            0
 #define RPM_PXO_A_CLK                          1
 #define RPM_CXO_CLK                            2
@@ -38,7 +38,7 @@
 #define RPM_SFPB_CLK                           20
 #define RPM_SFPB_A_CLK                         21
 
-/* msm8916 */
+/* SMD RPM clocks */
 #define RPM_SMD_XO_CLK_SRC                             0
 #define RPM_SMD_XO_A_CLK_SRC                   1
 #define RPM_SMD_PCNOC_CLK                              2
 #define RPM_SMD_RF_CLK1_A_PIN                  23
 #define RPM_SMD_RF_CLK2_PIN                            24
 #define RPM_SMD_RF_CLK2_A_PIN                  25
+#define RPM_SMD_PNOC_CLK                       26
+#define RPM_SMD_PNOC_A_CLK                     27
+#define RPM_SMD_CNOC_CLK                       28
+#define RPM_SMD_CNOC_A_CLK                     29
+#define RPM_SMD_MMSSNOC_AHB_CLK                        30
+#define RPM_SMD_MMSSNOC_AHB_A_CLK              31
+#define RPM_SMD_GFX3D_CLK_SRC                  32
+#define RPM_SMD_GFX3D_A_CLK_SRC                        33
+#define RPM_SMD_OCMEMGX_CLK                    34
+#define RPM_SMD_OCMEMGX_A_CLK                  35
+#define RPM_SMD_CXO_D0                         36
+#define RPM_SMD_CXO_D0_A                       37
+#define RPM_SMD_CXO_D1                         38
+#define RPM_SMD_CXO_D1_A                       39
+#define RPM_SMD_CXO_A0                         40
+#define RPM_SMD_CXO_A0_A                       41
+#define RPM_SMD_CXO_A1                         42
+#define RPM_SMD_CXO_A1_A                       43
+#define RPM_SMD_CXO_A2                         44
+#define RPM_SMD_CXO_A2_A                       45
+#define RPM_SMD_DIV_CLK1                       46
+#define RPM_SMD_DIV_A_CLK1                     47
+#define RPM_SMD_DIV_CLK2                       48
+#define RPM_SMD_DIV_A_CLK2                     49
+#define RPM_SMD_DIFF_CLK                       50
+#define RPM_SMD_DIFF_A_CLK                     51
+#define RPM_SMD_CXO_D0_PIN                     52
+#define RPM_SMD_CXO_D0_A_PIN                   53
+#define RPM_SMD_CXO_D1_PIN                     54
+#define RPM_SMD_CXO_D1_A_PIN                   55
+#define RPM_SMD_CXO_A0_PIN                     56
+#define RPM_SMD_CXO_A0_A_PIN                   57
+#define RPM_SMD_CXO_A1_PIN                     58
+#define RPM_SMD_CXO_A1_A_PIN                   59
+#define RPM_SMD_CXO_A2_PIN                     60
+#define RPM_SMD_CXO_A2_A_PIN                   61
 
 #endif
index d141c1f0c77824e0ee4d42be87785b171b1457d9..eff4319d008bf8d723c39033112bfdaa453b9fc2 100644 (file)
 #define PCLK_TSADC             349
 #define PCLK_CPU               350
 #define PCLK_PERI              351
+#define PCLK_DDRUPCTL          352
+#define PCLK_PUBL              353
 
 /* hclk gates */
 #define HCLK_SDMMC             448
index 9a586e2d9c91fd3c81c74a3d927a2cd2973af563..d7b6c83ea63f1883e09aa8424d23667ee79e9832 100644 (file)
@@ -88,6 +88,7 @@
 #define SCLK_PVTM_GPU          124
 #define SCLK_CRYPTO            125
 #define SCLK_MIPIDSI_24M       126
+#define SCLK_VIP_OUT           127
 
 #define SCLK_MAC               151
 #define SCLK_MACREF_OUT                152
 #define PCLK_WDT               368
 #define PCLK_EFUSE256          369
 #define PCLK_EFUSE1024         370
+#define PCLK_ISP_IN            371
 
 /* hclk gates */
 #define HCLK_GPS               448
diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h
new file mode 100644 (file)
index 0000000..ee702c8
--- /dev/null
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd.
+ * Author: Elaine <zhangqing@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK3328_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_RK3328_H
+
+/* core clocks */
+#define PLL_APLL               1
+#define PLL_DPLL               2
+#define PLL_CPLL               3
+#define PLL_GPLL               4
+#define PLL_NPLL               5
+#define ARMCLK                 6
+
+/* sclk gates (special clocks) */
+#define SCLK_RTC32K            30
+#define SCLK_SDMMC_EXT         31
+#define SCLK_SPI               32
+#define SCLK_SDMMC             33
+#define SCLK_SDIO              34
+#define SCLK_EMMC              35
+#define SCLK_TSADC             36
+#define SCLK_SARADC            37
+#define SCLK_UART0             38
+#define SCLK_UART1             39
+#define SCLK_UART2             40
+#define SCLK_I2S0              41
+#define SCLK_I2S1              42
+#define SCLK_I2S2              43
+#define SCLK_I2S1_OUT          44
+#define SCLK_I2S2_OUT          45
+#define SCLK_SPDIF             46
+#define SCLK_TIMER0            47
+#define SCLK_TIMER1            48
+#define SCLK_TIMER2            49
+#define SCLK_TIMER3            50
+#define SCLK_TIMER4            51
+#define SCLK_TIMER5            52
+#define SCLK_WIFI              53
+#define SCLK_CIF_OUT           54
+#define SCLK_I2C0              55
+#define SCLK_I2C1              56
+#define SCLK_I2C2              57
+#define SCLK_I2C3              58
+#define SCLK_CRYPTO            59
+#define SCLK_PWM               60
+#define SCLK_PDM               61
+#define SCLK_EFUSE             62
+#define SCLK_OTP               63
+#define SCLK_DDRCLK            64
+#define SCLK_VDEC_CABAC                65
+#define SCLK_VDEC_CORE         66
+#define SCLK_VENC_DSP          67
+#define SCLK_VENC_CORE         68
+#define SCLK_RGA               69
+#define SCLK_HDMI_SFC          70
+#define SCLK_HDMI_CEC          71
+#define SCLK_USB3_REF          72
+#define SCLK_USB3_SUSPEND      73
+#define SCLK_SDMMC_DRV         74
+#define SCLK_SDIO_DRV          75
+#define SCLK_EMMC_DRV          76
+#define SCLK_SDMMC_EXT_DRV     77
+#define SCLK_SDMMC_SAMPLE      78
+#define SCLK_SDIO_SAMPLE       79
+#define SCLK_EMMC_SAMPLE       80
+#define SCLK_SDMMC_EXT_SAMPLE  81
+#define SCLK_VOP               82
+#define SCLK_MAC2PHY_RXTX      83
+#define SCLK_MAC2PHY_SRC       84
+#define SCLK_MAC2PHY_REF       85
+#define SCLK_MAC2PHY_OUT       86
+#define SCLK_MAC2IO_RX         87
+#define SCLK_MAC2IO_TX         88
+#define SCLK_MAC2IO_REFOUT     89
+#define SCLK_MAC2IO_REF                90
+#define SCLK_MAC2IO_OUT                91
+#define SCLK_TSP               92
+#define SCLK_HSADC_TSP         93
+#define SCLK_USB3PHY_REF       94
+#define SCLK_REF_USB3OTG       95
+#define SCLK_USB3OTG_REF       96
+#define SCLK_USB3OTG_SUSPEND   97
+#define SCLK_REF_USB3OTG_SRC   98
+#define SCLK_MAC2IO_SRC                99
+#define SCLK_MAC2IO            100
+#define SCLK_MAC2PHY           101
+
+/* dclk gates */
+#define DCLK_LCDC              120
+#define DCLK_HDMIPHY           121
+#define HDMIPHY                        122
+#define USB480M                        123
+#define DCLK_LCDC_SRC          124
+
+/* aclk gates */
+#define ACLK_AXISRAM           130
+#define ACLK_VOP_PRE           131
+#define ACLK_USB3OTG           132
+#define ACLK_RGA_PRE           133
+#define ACLK_DMAC              134
+#define ACLK_GPU               135
+#define ACLK_BUS_PRE           136
+#define ACLK_PERI_PRE          137
+#define ACLK_RKVDEC_PRE                138
+#define ACLK_RKVDEC            139
+#define ACLK_RKVENC            140
+#define ACLK_VPU_PRE           141
+#define ACLK_VIO_PRE           142
+#define ACLK_VPU               143
+#define ACLK_VIO               144
+#define ACLK_VOP               145
+#define ACLK_GMAC              146
+#define ACLK_H265              147
+#define ACLK_H264              148
+#define ACLK_MAC2PHY           149
+#define ACLK_MAC2IO            150
+#define ACLK_DCF               151
+#define ACLK_TSP               152
+#define ACLK_PERI              153
+#define ACLK_RGA               154
+#define ACLK_IEP               155
+#define ACLK_CIF               156
+#define ACLK_HDCP              157
+
+/* pclk gates */
+#define PCLK_GPIO0             200
+#define PCLK_GPIO1             201
+#define PCLK_GPIO2             202
+#define PCLK_GPIO3             203
+#define PCLK_GRF               204
+#define PCLK_I2C0              205
+#define PCLK_I2C1              206
+#define PCLK_I2C2              207
+#define PCLK_I2C3              208
+#define PCLK_SPI               209
+#define PCLK_UART0             210
+#define PCLK_UART1             211
+#define PCLK_UART2             212
+#define PCLK_TSADC             213
+#define PCLK_PWM               214
+#define PCLK_TIMER             215
+#define PCLK_BUS_PRE           216
+#define PCLK_PERI_PRE          217
+#define PCLK_HDMI_CTRL         218
+#define PCLK_HDMI_PHY          219
+#define PCLK_GMAC              220
+#define PCLK_H265              221
+#define PCLK_MAC2PHY           222
+#define PCLK_MAC2IO            223
+#define PCLK_USB3PHY_OTG       224
+#define PCLK_USB3PHY_PIPE      225
+#define PCLK_USB3_GRF          226
+#define PCLK_USB2_GRF          227
+#define PCLK_HDMIPHY           228
+#define PCLK_DDR               229
+#define PCLK_PERI              230
+#define PCLK_HDMI              231
+#define PCLK_HDCP              232
+#define PCLK_DCF               233
+#define PCLK_SARADC            234
+
+/* hclk gates */
+#define HCLK_PERI              308
+#define HCLK_TSP               309
+#define HCLK_GMAC              310
+#define HCLK_I2S0_8CH          311
+#define HCLK_I2S1_8CH          313
+#define HCLK_I2S2_2CH          313
+#define HCLK_SPDIF_8CH         314
+#define HCLK_VOP               315
+#define HCLK_NANDC             316
+#define HCLK_SDMMC             317
+#define HCLK_SDIO              318
+#define HCLK_EMMC              319
+#define HCLK_SDMMC_EXT         320
+#define HCLK_RKVDEC_PRE                321
+#define HCLK_RKVDEC            322
+#define HCLK_RKVENC            323
+#define HCLK_VPU_PRE           324
+#define HCLK_VIO_PRE           325
+#define HCLK_VPU               326
+#define HCLK_VIO               327
+#define HCLK_BUS_PRE           328
+#define HCLK_PERI_PRE          329
+#define HCLK_H264              330
+#define HCLK_CIF               331
+#define HCLK_OTG_PMU           332
+#define HCLK_OTG               333
+#define HCLK_HOST0             334
+#define HCLK_HOST0_ARB         335
+#define HCLK_CRYPTO_MST                336
+#define HCLK_CRYPTO_SLV                337
+#define HCLK_PDM               338
+#define HCLK_IEP               339
+#define HCLK_RGA               340
+#define HCLK_HDCP              341
+
+#define CLK_NR_CLKS            (HCLK_HDCP + 1)
+
+/* soft-reset indices */
+#define SRST_CORE0_PO          0
+#define SRST_CORE1_PO          1
+#define SRST_CORE2_PO          2
+#define SRST_CORE3_PO          3
+#define SRST_CORE0             4
+#define SRST_CORE1             5
+#define SRST_CORE2             6
+#define SRST_CORE3             7
+#define SRST_CORE0_DBG         8
+#define SRST_CORE1_DBG         9
+#define SRST_CORE2_DBG         10
+#define SRST_CORE3_DBG         11
+#define SRST_TOPDBG            12
+#define SRST_CORE_NIU          13
+#define SRST_STRC_A            14
+#define SRST_L2C               15
+
+#define SRST_A53_GIC           18
+#define SRST_DAP               19
+#define SRST_PMU_P             21
+#define SRST_EFUSE             22
+#define SRST_BUSSYS_H          23
+#define SRST_BUSSYS_P          24
+#define SRST_SPDIF             25
+#define SRST_INTMEM            26
+#define SRST_ROM               27
+#define SRST_GPIO0             28
+#define SRST_GPIO1             29
+#define SRST_GPIO2             30
+#define SRST_GPIO3             31
+
+#define SRST_I2S0              32
+#define SRST_I2S1              33
+#define SRST_I2S2              34
+#define SRST_I2S0_H            35
+#define SRST_I2S1_H            36
+#define SRST_I2S2_H            37
+#define SRST_UART0             38
+#define SRST_UART1             39
+#define SRST_UART2             40
+#define SRST_UART0_P           41
+#define SRST_UART1_P           42
+#define SRST_UART2_P           43
+#define SRST_I2C0              44
+#define SRST_I2C1              45
+#define SRST_I2C2              46
+#define SRST_I2C3              47
+
+#define SRST_I2C0_P            48
+#define SRST_I2C1_P            49
+#define SRST_I2C2_P            50
+#define SRST_I2C3_P            51
+#define SRST_EFUSE_SE_P                52
+#define SRST_EFUSE_NS_P                53
+#define SRST_PWM0              54
+#define SRST_PWM0_P            55
+#define SRST_DMA               56
+#define SRST_TSP_A             57
+#define SRST_TSP_H             58
+#define SRST_TSP               59
+#define SRST_TSP_HSADC         60
+#define SRST_DCF_A             61
+#define SRST_DCF_P             62
+
+#define SRST_SCR               64
+#define SRST_SPI               65
+#define SRST_TSADC             66
+#define SRST_TSADC_P           67
+#define SRST_CRYPTO            68
+#define SRST_SGRF              69
+#define SRST_GRF               70
+#define SRST_USB_GRF           71
+#define SRST_TIMER_6CH_P       72
+#define SRST_TIMER0            73
+#define SRST_TIMER1            74
+#define SRST_TIMER2            75
+#define SRST_TIMER3            76
+#define SRST_TIMER4            77
+#define SRST_TIMER5            78
+#define SRST_USB3GRF           79
+
+#define SRST_PHYNIU            80
+#define SRST_HDMIPHY           81
+#define SRST_VDAC              82
+#define SRST_ACODEC_p          83
+#define SRST_SARADC            85
+#define SRST_SARADC_P          86
+#define SRST_GRF_DDR           87
+#define SRST_DFIMON            88
+#define SRST_MSCH              89
+#define SRST_DDRMSCH           91
+#define SRST_DDRCTRL           92
+#define SRST_DDRCTRL_P         93
+#define SRST_DDRPHY            94
+#define SRST_DDRPHY_P          95
+
+#define SRST_GMAC_NIU_A                96
+#define SRST_GMAC_NIU_P                97
+#define SRST_GMAC2PHY_A                98
+#define SRST_GMAC2IO_A         99
+#define SRST_MACPHY            100
+#define SRST_OTP_PHY           101
+#define SRST_GPU_A             102
+#define SRST_GPU_NIU_A         103
+#define SRST_SDMMCEXT          104
+#define SRST_PERIPH_NIU_A      105
+#define SRST_PERIHP_NIU_H      106
+#define SRST_PERIHP_P          107
+#define SRST_PERIPHSYS_H       108
+#define SRST_MMC0              109
+#define SRST_SDIO              110
+#define SRST_EMMC              111
+
+#define SRST_USB2OTG_H         112
+#define SRST_USB2OTG           113
+#define SRST_USB2OTG_ADP       114
+#define SRST_USB2HOST_H                115
+#define SRST_USB2HOST_ARB      116
+#define SRST_USB2HOST_AUX      117
+#define SRST_USB2HOST_EHCIPHY  118
+#define SRST_USB2HOST_UTMI     119
+#define SRST_USB3OTG           120
+#define SRST_USBPOR            121
+#define SRST_USB2OTG_UTMI      122
+#define SRST_USB2HOST_PHY_UTMI 123
+#define SRST_USB3OTG_UTMI      124
+#define SRST_USB3PHY_U2                125
+#define SRST_USB3PHY_U3                126
+#define SRST_USB3PHY_PIPE      127
+
+#define SRST_VIO_A             128
+#define SRST_VIO_BUS_H         129
+#define SRST_VIO_H2P_H         130
+#define SRST_VIO_ARBI_H                131
+#define SRST_VOP_NIU_A         132
+#define SRST_VOP_A             133
+#define SRST_VOP_H             134
+#define SRST_VOP_D             135
+#define SRST_RGA               136
+#define SRST_RGA_NIU_A         137
+#define SRST_RGA_A             138
+#define SRST_RGA_H             139
+#define SRST_IEP_A             140
+#define SRST_IEP_H             141
+#define SRST_HDMI              142
+#define SRST_HDMI_P            143
+
+#define SRST_HDCP_A            144
+#define SRST_HDCP              145
+#define SRST_HDCP_H            146
+#define SRST_CIF_A             147
+#define SRST_CIF_H             148
+#define SRST_CIF_P             149
+#define SRST_OTP_P             150
+#define SRST_OTP_SBPI          151
+#define SRST_OTP_USER          152
+#define SRST_DDRCTRL_A         153
+#define SRST_DDRSTDY_P         154
+#define SRST_DDRSTDY           155
+#define SRST_PDM_H             156
+#define SRST_PDM               157
+#define SRST_USB3PHY_OTG_P     158
+#define SRST_USB3PHY_PIPE_P    159
+
+#define SRST_VCODEC_A          160
+#define SRST_VCODEC_NIU_A      161
+#define SRST_VCODEC_H          162
+#define SRST_VCODEC_NIU_H      163
+#define SRST_VDEC_A            164
+#define SRST_VDEC_NIU_A                165
+#define SRST_VDEC_H            166
+#define SRST_VDEC_NIU_H                167
+#define SRST_VDEC_CORE         168
+#define SRST_VDEC_CABAC                169
+#define SRST_DDRPHYDIV         175
+
+#define SRST_RKVENC_NIU_A      176
+#define SRST_RKVENC_NIU_H      177
+#define SRST_RKVENC_H265_A     178
+#define SRST_RKVENC_H265_P     179
+#define SRST_RKVENC_H265_CORE  180
+#define SRST_RKVENC_H265_DSP   181
+#define SRST_RKVENC_H264_A     182
+#define SRST_RKVENC_H264_H     183
+#define SRST_RKVENC_INTMEM     184
+
+#endif
diff --git a/include/dt-bindings/clock/ste-ab8500.h b/include/dt-bindings/clock/ste-ab8500.h
new file mode 100644 (file)
index 0000000..6731f1f
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef __STE_CLK_AB8500_H__
+#define __STE_CLK_AB8500_H__
+
+#define AB8500_SYSCLK_BUF2     0
+#define AB8500_SYSCLK_BUF3     1
+#define AB8500_SYSCLK_BUF4     2
+#define AB8500_SYSCLK_ULP      3
+#define AB8500_SYSCLK_INT      4
+#define AB8500_SYSCLK_AUDIO    5
+
+#endif
index 08bcab61b7140422d6f63d5d0fbbd473d5e05c11..49bb3c203e5c0988a2c8dd2ecfb9ad786bdf3f82 100644 (file)
 
 #define END_PRIMARY_CLK                14
 
+#define CLK_HSI                        14
+#define CLK_SYSCLK             15
+#define CLK_HDMI_CEC           16
+#define CLK_SPDIF              17
+#define CLK_USART1             18
+#define CLK_USART2             19
+#define CLK_USART3             20
+#define CLK_UART4              21
+#define CLK_UART5              22
+#define CLK_USART6             23
+#define CLK_UART7              24
+#define CLK_UART8              25
+#define CLK_I2C1               26
+#define CLK_I2C2               27
+#define CLK_I2C3               28
+#define CLK_I2C4               29
+#define CLK_LPTIMER            30
+
+#define END_PRIMARY_CLK_F7     31
+
 #endif
diff --git a/include/dt-bindings/clock/sun5i-ccu.h b/include/dt-bindings/clock/sun5i-ccu.h
new file mode 100644 (file)
index 0000000..aeb2e2f
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2016 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN5I_H_
+#define _DT_BINDINGS_CLK_SUN5I_H_
+
+#define CLK_HOSC               1
+
+#define CLK_CPU                        17
+
+#define CLK_AHB_OTG            23
+#define CLK_AHB_EHCI           24
+#define CLK_AHB_OHCI           25
+#define CLK_AHB_SS             26
+#define CLK_AHB_DMA            27
+#define CLK_AHB_BIST           28
+#define CLK_AHB_MMC0           29
+#define CLK_AHB_MMC1           30
+#define CLK_AHB_MMC2           31
+#define CLK_AHB_NAND           32
+#define CLK_AHB_SDRAM          33
+#define CLK_AHB_EMAC           34
+#define CLK_AHB_TS             35
+#define CLK_AHB_SPI0           36
+#define CLK_AHB_SPI1           37
+#define CLK_AHB_SPI2           38
+#define CLK_AHB_GPS            39
+#define CLK_AHB_HSTIMER                40
+#define CLK_AHB_VE             41
+#define CLK_AHB_TVE            42
+#define CLK_AHB_LCD            43
+#define CLK_AHB_CSI            44
+#define CLK_AHB_HDMI           45
+#define CLK_AHB_DE_BE          46
+#define CLK_AHB_DE_FE          47
+#define CLK_AHB_IEP            48
+#define CLK_AHB_GPU            49
+#define CLK_APB0_CODEC         50
+#define CLK_APB0_SPDIF         51
+#define CLK_APB0_I2S           52
+#define CLK_APB0_PIO           53
+#define CLK_APB0_IR            54
+#define CLK_APB0_KEYPAD                55
+#define CLK_APB1_I2C0          56
+#define CLK_APB1_I2C1          57
+#define CLK_APB1_I2C2          58
+#define CLK_APB1_UART0         59
+#define CLK_APB1_UART1         60
+#define CLK_APB1_UART2         61
+#define CLK_APB1_UART3         62
+#define CLK_NAND               63
+#define CLK_MMC0               64
+#define CLK_MMC1               65
+#define CLK_MMC2               66
+#define CLK_TS                 67
+#define CLK_SS                 68
+#define CLK_SPI0               69
+#define CLK_SPI1               70
+#define CLK_SPI2               71
+#define CLK_IR                 72
+#define CLK_I2S                        73
+#define CLK_SPDIF              74
+#define CLK_KEYPAD             75
+#define CLK_USB_OHCI           76
+#define CLK_USB_PHY0           77
+#define CLK_USB_PHY1           78
+#define CLK_GPS                        79
+#define CLK_DRAM_VE            80
+#define CLK_DRAM_CSI           81
+#define CLK_DRAM_TS            82
+#define CLK_DRAM_TVE           83
+#define CLK_DRAM_DE_FE         84
+#define CLK_DRAM_DE_BE         85
+#define CLK_DRAM_ACE           86
+#define CLK_DRAM_IEP           87
+#define CLK_DE_BE              88
+#define CLK_DE_FE              89
+#define CLK_TCON_CH0           90
+
+#define CLK_TCON_CH1           92
+#define CLK_CSI                        93
+#define CLK_VE                 94
+#define CLK_CODEC              95
+#define CLK_AVS                        96
+#define CLK_HDMI               97
+#define CLK_GPU                        98
+
+#define CLK_IEP                        100
+
+#endif /* _DT_BINDINGS_CLK_SUN5I_H_ */
diff --git a/include/dt-bindings/clock/sun8i-v3s-ccu.h b/include/dt-bindings/clock/sun8i-v3s-ccu.h
new file mode 100644 (file)
index 0000000..c0d5d55
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * Based on sun8i-h3-ccu.h, which is:
+ * Copyright (C) 2016 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN8I_V3S_H_
+#define _DT_BINDINGS_CLK_SUN8I_V3S_H_
+
+#define CLK_CPU                        14
+
+#define CLK_BUS_CE             20
+#define CLK_BUS_DMA            21
+#define CLK_BUS_MMC0           22
+#define CLK_BUS_MMC1           23
+#define CLK_BUS_MMC2           24
+#define CLK_BUS_DRAM           25
+#define CLK_BUS_EMAC           26
+#define CLK_BUS_HSTIMER                27
+#define CLK_BUS_SPI0           28
+#define CLK_BUS_OTG            29
+#define CLK_BUS_EHCI0          30
+#define CLK_BUS_OHCI0          31
+#define CLK_BUS_VE             32
+#define CLK_BUS_TCON0          33
+#define CLK_BUS_CSI            34
+#define CLK_BUS_DE             35
+#define CLK_BUS_CODEC          36
+#define CLK_BUS_PIO            37
+#define CLK_BUS_I2C0           38
+#define CLK_BUS_I2C1           39
+#define CLK_BUS_UART0          40
+#define CLK_BUS_UART1          41
+#define CLK_BUS_UART2          42
+#define CLK_BUS_EPHY           43
+#define CLK_BUS_DBG            44
+
+#define CLK_MMC0               45
+#define CLK_MMC0_SAMPLE                46
+#define CLK_MMC0_OUTPUT                47
+#define CLK_MMC1               48
+#define CLK_MMC1_SAMPLE                49
+#define CLK_MMC1_OUTPUT                50
+#define CLK_MMC2               51
+#define CLK_MMC2_SAMPLE                52
+#define CLK_MMC2_OUTPUT                53
+#define CLK_CE                 54
+#define CLK_SPI0               55
+#define CLK_USB_PHY0           56
+#define CLK_USB_OHCI0          57
+
+#define CLK_DRAM_VE            59
+#define CLK_DRAM_CSI           60
+#define CLK_DRAM_EHCI          61
+#define CLK_DRAM_OHCI          62
+#define CLK_DE                 63
+#define CLK_TCON0              64
+#define CLK_CSI_MISC           65
+#define CLK_CSI0_MCLK          66
+#define CLK_CSI1_SCLK          67
+#define CLK_CSI1_MCLK          68
+#define CLK_VE                 69
+#define CLK_AC_DIG             70
+#define CLK_AVS                        71
+
+#define CLK_MIPI_CSI           73
+
+#endif /* _DT_BINDINGS_CLK_SUN8I_V3S_H_ */
diff --git a/include/dt-bindings/clock/sun9i-a80-ccu.h b/include/dt-bindings/clock/sun9i-a80-ccu.h
new file mode 100644 (file)
index 0000000..6ea1492
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2016 Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_SUN9I_A80_CCU_H_
+#define _DT_BINDINGS_CLOCK_SUN9I_A80_CCU_H_
+
+#define CLK_PLL_AUDIO          2
+#define CLK_PLL_PERIPH0                3
+
+#define CLK_C0CPUX             12
+#define CLK_C1CPUX             13
+
+#define CLK_OUT_A              27
+#define CLK_OUT_B              28
+
+#define CLK_NAND0_0            29
+#define CLK_NAND0_1            30
+#define CLK_NAND1_0            31
+#define CLK_NAND1_1            32
+#define CLK_MMC0               33
+#define CLK_MMC0_SAMPLE                34
+#define CLK_MMC0_OUTPUT                35
+#define CLK_MMC1               36
+#define CLK_MMC1_SAMPLE                37
+#define CLK_MMC1_OUTPUT                38
+#define CLK_MMC2               39
+#define CLK_MMC2_SAMPLE                40
+#define CLK_MMC2_OUTPUT                41
+#define CLK_MMC3               42
+#define CLK_MMC3_SAMPLE                43
+#define CLK_MMC3_OUTPUT                44
+#define CLK_TS                 45
+#define CLK_SS                 46
+#define CLK_SPI0               47
+#define CLK_SPI1               48
+#define CLK_SPI2               49
+#define CLK_SPI3               50
+#define CLK_I2S0               51
+#define CLK_I2S1               52
+#define CLK_SPDIF              53
+#define CLK_SDRAM              54
+#define CLK_DE                 55
+#define CLK_EDP                        56
+#define CLK_MP                 57
+#define CLK_LCD0               58
+#define CLK_LCD1               59
+#define CLK_MIPI_DSI0          60
+#define CLK_MIPI_DSI1          61
+#define CLK_HDMI               62
+#define CLK_HDMI_SLOW          63
+#define CLK_MIPI_CSI           64
+#define CLK_CSI_ISP            65
+#define CLK_CSI_MISC           66
+#define CLK_CSI0_MCLK          67
+#define CLK_CSI1_MCLK          68
+#define CLK_FD                 69
+#define CLK_VE                 70
+#define CLK_AVS                        71
+#define CLK_GPU_CORE           72
+#define CLK_GPU_MEMORY         73
+#define CLK_GPU_AXI            74
+#define CLK_SATA               75
+#define CLK_AC97               76
+#define CLK_MIPI_HSI           77
+#define CLK_GPADC              78
+#define CLK_CIR_TX             79
+
+#define CLK_BUS_FD             80
+#define CLK_BUS_VE             81
+#define CLK_BUS_GPU_CTRL       82
+#define CLK_BUS_SS             83
+#define CLK_BUS_MMC            84
+#define CLK_BUS_NAND0          85
+#define CLK_BUS_NAND1          86
+#define CLK_BUS_SDRAM          87
+#define CLK_BUS_MIPI_HSI       88
+#define CLK_BUS_SATA           89
+#define CLK_BUS_TS             90
+#define CLK_BUS_SPI0           91
+#define CLK_BUS_SPI1           92
+#define CLK_BUS_SPI2           93
+#define CLK_BUS_SPI3           94
+
+#define CLK_BUS_OTG            95
+#define CLK_BUS_USB            96
+#define CLK_BUS_GMAC           97
+#define CLK_BUS_MSGBOX         98
+#define CLK_BUS_SPINLOCK       99
+#define CLK_BUS_HSTIMER                100
+#define CLK_BUS_DMA            101
+
+#define CLK_BUS_LCD0           102
+#define CLK_BUS_LCD1           103
+#define CLK_BUS_EDP            104
+#define CLK_BUS_CSI            105
+#define CLK_BUS_HDMI           106
+#define CLK_BUS_DE             107
+#define CLK_BUS_MP             108
+#define CLK_BUS_MIPI_DSI       109
+
+#define CLK_BUS_SPDIF          110
+#define CLK_BUS_PIO            111
+#define CLK_BUS_AC97           112
+#define CLK_BUS_I2S0           113
+#define CLK_BUS_I2S1           114
+#define CLK_BUS_LRADC          115
+#define CLK_BUS_GPADC          116
+#define CLK_BUS_TWD            117
+#define CLK_BUS_CIR_TX         118
+
+#define CLK_BUS_I2C0           119
+#define CLK_BUS_I2C1           120
+#define CLK_BUS_I2C2           121
+#define CLK_BUS_I2C3           122
+#define CLK_BUS_I2C4           123
+#define CLK_BUS_UART0          124
+#define CLK_BUS_UART1          125
+#define CLK_BUS_UART2          126
+#define CLK_BUS_UART3          127
+#define CLK_BUS_UART4          128
+#define CLK_BUS_UART5          129
+
+#endif /* _DT_BINDINGS_CLOCK_SUN9I_A80_CCU_H_ */
diff --git a/include/dt-bindings/clock/sun9i-a80-de.h b/include/dt-bindings/clock/sun9i-a80-de.h
new file mode 100644 (file)
index 0000000..3dad6c3
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_SUN9I_A80_DE_H_
+#define _DT_BINDINGS_CLOCK_SUN9I_A80_DE_H_
+
+#define CLK_FE0                        0
+#define CLK_FE1                        1
+#define CLK_FE2                        2
+#define CLK_IEP_DEU0           3
+#define CLK_IEP_DEU1           4
+#define CLK_BE0                        5
+#define CLK_BE1                        6
+#define CLK_BE2                        7
+#define CLK_IEP_DRC0           8
+#define CLK_IEP_DRC1           9
+#define CLK_MERGE              10
+
+#define CLK_DRAM_FE0           11
+#define CLK_DRAM_FE1           12
+#define CLK_DRAM_FE2           13
+#define CLK_DRAM_DEU0          14
+#define CLK_DRAM_DEU1          15
+#define CLK_DRAM_BE0           16
+#define CLK_DRAM_BE1           17
+#define CLK_DRAM_BE2           18
+#define CLK_DRAM_DRC0          19
+#define CLK_DRAM_DRC1          20
+
+#define CLK_BUS_FE0            21
+#define CLK_BUS_FE1            22
+#define CLK_BUS_FE2            23
+#define CLK_BUS_DEU0           24
+#define CLK_BUS_DEU1           25
+#define CLK_BUS_BE0            26
+#define CLK_BUS_BE1            27
+#define CLK_BUS_BE2            28
+#define CLK_BUS_DRC0           29
+#define CLK_BUS_DRC1           30
+
+#endif /* _DT_BINDINGS_CLOCK_SUN9I_A80_DE_H_ */
diff --git a/include/dt-bindings/clock/sun9i-a80-usb.h b/include/dt-bindings/clock/sun9i-a80-usb.h
new file mode 100644 (file)
index 0000000..783a60d
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_SUN9I_A80_USB_H_
+#define _DT_BINDINGS_CLOCK_SUN9I_A80_USB_H_
+
+#define CLK_BUS_HCI0   0
+#define CLK_USB_OHCI0  1
+#define CLK_BUS_HCI1   2
+#define CLK_BUS_HCI2   3
+#define CLK_USB_OHCI2  4
+
+#define CLK_USB0_PHY   5
+#define CLK_USB1_HSIC  6
+#define CLK_USB1_PHY   7
+#define CLK_USB2_HSIC  8
+#define CLK_USB2_PHY   9
+#define CLK_USB_HSIC   10
+
+#endif /* _DT_BINDINGS_CLOCK_SUN9I_A80_USB_H_ */
diff --git a/include/dt-bindings/reset/sun5i-ccu.h b/include/dt-bindings/reset/sun5i-ccu.h
new file mode 100644 (file)
index 0000000..c2b9726
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2016 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _RST_SUN5I_H_
+#define _RST_SUN5I_H_
+
+#define RST_USB_PHY0   0
+#define RST_USB_PHY1   1
+#define RST_GPS                2
+#define RST_DE_BE      3
+#define RST_DE_FE      4
+#define RST_TVE                5
+#define RST_LCD                6
+#define RST_CSI                7
+#define RST_VE         8
+#define RST_GPU                9
+#define RST_IEP                10
+
+#endif /* _RST_SUN5I_H_ */
diff --git a/include/dt-bindings/reset/sun8i-v3s-ccu.h b/include/dt-bindings/reset/sun8i-v3s-ccu.h
new file mode 100644 (file)
index 0000000..b58ef21
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2016 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ * Based on sun8i-v3s-ccu.h, which is
+ * Copyright (C) 2016 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_RST_SUN8I_V3S_H_
+#define _DT_BINDINGS_RST_SUN8I_V3S_H_
+
+#define RST_USB_PHY0           0
+
+#define RST_MBUS               1
+
+#define RST_BUS_CE             5
+#define RST_BUS_DMA            6
+#define RST_BUS_MMC0           7
+#define RST_BUS_MMC1           8
+#define RST_BUS_MMC2           9
+#define RST_BUS_DRAM           11
+#define RST_BUS_EMAC           12
+#define RST_BUS_HSTIMER                14
+#define RST_BUS_SPI0           15
+#define RST_BUS_OTG            17
+#define RST_BUS_EHCI0          18
+#define RST_BUS_OHCI0          22
+#define RST_BUS_VE             26
+#define RST_BUS_TCON0          27
+#define RST_BUS_CSI            30
+#define RST_BUS_DE             34
+#define RST_BUS_DBG            38
+#define RST_BUS_EPHY           39
+#define RST_BUS_CODEC          40
+#define RST_BUS_I2C0           46
+#define RST_BUS_I2C1           47
+#define RST_BUS_UART0          49
+#define RST_BUS_UART1          50
+#define RST_BUS_UART2          51
+
+#endif /* _DT_BINDINGS_RST_SUN8I_H3_H_ */
diff --git a/include/dt-bindings/reset/sun9i-a80-ccu.h b/include/dt-bindings/reset/sun9i-a80-ccu.h
new file mode 100644 (file)
index 0000000..4b8df4b
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2016 Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_RESET_SUN9I_A80_CCU_H_
+#define _DT_BINDINGS_RESET_SUN9I_A80_CCU_H_
+
+#define RST_BUS_FD             0
+#define RST_BUS_VE             1
+#define RST_BUS_GPU_CTRL       2
+#define RST_BUS_SS             3
+#define RST_BUS_MMC            4
+#define RST_BUS_NAND0          5
+#define RST_BUS_NAND1          6
+#define RST_BUS_SDRAM          7
+#define RST_BUS_SATA           8
+#define RST_BUS_TS             9
+#define RST_BUS_SPI0           10
+#define RST_BUS_SPI1           11
+#define RST_BUS_SPI2           12
+#define RST_BUS_SPI3           13
+
+#define RST_BUS_OTG            14
+#define RST_BUS_OTG_PHY                15
+#define RST_BUS_MIPI_HSI       16
+#define RST_BUS_GMAC           17
+#define RST_BUS_MSGBOX         18
+#define RST_BUS_SPINLOCK       19
+#define RST_BUS_HSTIMER                20
+#define RST_BUS_DMA            21
+
+#define RST_BUS_LCD0           22
+#define RST_BUS_LCD1           23
+#define RST_BUS_EDP            24
+#define RST_BUS_LVDS           25
+#define RST_BUS_CSI            26
+#define RST_BUS_HDMI0          27
+#define RST_BUS_HDMI1          28
+#define RST_BUS_DE             29
+#define RST_BUS_MP             30
+#define RST_BUS_GPU            31
+#define RST_BUS_MIPI_DSI       32
+
+#define RST_BUS_SPDIF          33
+#define RST_BUS_AC97           34
+#define RST_BUS_I2S0           35
+#define RST_BUS_I2S1           36
+#define RST_BUS_LRADC          37
+#define RST_BUS_GPADC          38
+#define RST_BUS_CIR_TX         39
+
+#define RST_BUS_I2C0           40
+#define RST_BUS_I2C1           41
+#define RST_BUS_I2C2           42
+#define RST_BUS_I2C3           43
+#define RST_BUS_I2C4           44
+#define RST_BUS_UART0          45
+#define RST_BUS_UART1          46
+#define RST_BUS_UART2          47
+#define RST_BUS_UART3          48
+#define RST_BUS_UART4          49
+#define RST_BUS_UART5          50
+
+#endif /* _DT_BINDINGS_RESET_SUN9I_A80_CCU_H_ */
diff --git a/include/dt-bindings/reset/sun9i-a80-de.h b/include/dt-bindings/reset/sun9i-a80-de.h
new file mode 100644 (file)
index 0000000..2050727
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2016 Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_RESET_SUN9I_A80_DE_H_
+#define _DT_BINDINGS_RESET_SUN9I_A80_DE_H_
+
+#define RST_FE0                0
+#define RST_FE1                1
+#define RST_FE2                2
+#define RST_DEU0       3
+#define RST_DEU1       4
+#define RST_BE0                5
+#define RST_BE1                6
+#define RST_BE2                7
+#define RST_DRC0       8
+#define RST_DRC1       9
+#define RST_MERGE      10
+
+#endif /* _DT_BINDINGS_RESET_SUN9I_A80_DE_H_ */
diff --git a/include/dt-bindings/reset/sun9i-a80-usb.h b/include/dt-bindings/reset/sun9i-a80-usb.h
new file mode 100644 (file)
index 0000000..ee49286
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2016 Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_RESET_SUN9I_A80_USB_H_
+#define _DT_BINDINGS_RESET_SUN9I_A80_USB_H_
+
+#define RST_USB0_HCI   0
+#define RST_USB1_HCI   1
+#define RST_USB2_HCI   2
+
+#define RST_USB0_PHY   3
+#define RST_USB1_HSIC  4
+#define RST_USB1_PHY   5
+#define RST_USB2_HSIC  6
+#define RST_USB2_PHY   7
+
+#endif /* _DT_BINDINGS_RESET_SUN9I_A80_USB_H_ */
index 7cf8a6c70a3f71c5eca6100dfda8d67f10b20e3b..8e521194f6fc4ad32138a51c962a365c74debaed 100644 (file)
@@ -183,7 +183,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 
 #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
 
-static inline unsigned bio_segments(struct bio *bio)
+static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec)
 {
        unsigned segs = 0;
        struct bio_vec bv;
@@ -205,12 +205,17 @@ static inline unsigned bio_segments(struct bio *bio)
                break;
        }
 
-       bio_for_each_segment(bv, bio, iter)
+       __bio_for_each_segment(bv, bio, iter, *bvec)
                segs++;
 
        return segs;
 }
 
+static inline unsigned bio_segments(struct bio *bio)
+{
+       return __bio_segments(bio, &bio->bi_iter);
+}
+
 /*
  * get a reference to a bio, so it won't disappear. the intended use is
  * something like:
@@ -384,6 +389,8 @@ extern void bio_put(struct bio *);
 extern void __bio_clone_fast(struct bio *, struct bio *);
 extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
+extern struct bio *bio_clone_bioset_partial(struct bio *, gfp_t,
+                                           struct bio_set *, int, int);
 
 extern struct bio_set *fs_bio_set;
 
index 8e4df3d6c8cd9dbd1ddd96e25e1abae9a4aa4b6f..001d30d727c56c4d46e5e572ded575ca50cc85ff 100644 (file)
@@ -33,6 +33,7 @@ struct blk_mq_hw_ctx {
        struct blk_mq_ctx       **ctxs;
        unsigned int            nr_ctx;
 
+       wait_queue_t            dispatch_wait;
        atomic_t                wait_index;
 
        struct blk_mq_tags      *tags;
@@ -160,6 +161,7 @@ enum {
        BLK_MQ_S_STOPPED        = 0,
        BLK_MQ_S_TAG_ACTIVE     = 1,
        BLK_MQ_S_SCHED_RESTART  = 2,
+       BLK_MQ_S_TAG_WAITING    = 3,
 
        BLK_MQ_MAX_DEPTH        = 10240,
 
index baff2e8fc8a82792045c3bb0112fb52cead7e6cb..5828489309bbd22a255f11064418dc3a6b9366de 100644 (file)
@@ -124,18 +124,20 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
 
 /*
  * Since detected data corruption should stop operation on the affected
- * structures, this returns false if the corruption condition is found.
+ * structures. Return value must be checked and sanely acted on by caller.
  */
+static inline __must_check bool check_data_corruption(bool v) { return v; }
 #define CHECK_DATA_CORRUPTION(condition, fmt, ...)                      \
-       do {                                                             \
-               if (unlikely(condition)) {                               \
+       check_data_corruption(({                                         \
+               bool corruption = unlikely(condition);                   \
+               if (corruption) {                                        \
                        if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
                                pr_err(fmt, ##__VA_ARGS__);              \
                                BUG();                                   \
                        } else                                           \
                                WARN(1, fmt, ##__VA_ARGS__);             \
-                       return false;                                    \
                }                                                        \
-       } while (0)
+               corruption;                                              \
+       }))
 
 #endif /* _LINUX_BUG_H */
index 03a6653d329a01b90803d373f7a955230d787675..2ea0c282f3dc9326f7b3c4b7a3883758831ed251 100644 (file)
@@ -22,7 +22,6 @@ struct ceph_osd_client;
  * completion callback for async writepages
  */
 typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
-typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
 
 #define CEPH_HOMELESS_OSD      -1
 
@@ -170,15 +169,12 @@ struct ceph_osd_request {
        unsigned int            r_num_ops;
 
        int               r_result;
-       bool              r_got_reply;
 
        struct ceph_osd_client *r_osdc;
        struct kref       r_kref;
        bool              r_mempool;
-       struct completion r_completion;
-       struct completion r_done_completion;  /* fsync waiter */
+       struct completion r_completion;       /* private to osd_client.c */
        ceph_osdc_callback_t r_callback;
-       ceph_osdc_unsafe_callback_t r_unsafe_callback;
        struct list_head  r_unsafe_item;
 
        struct inode *r_inode;                /* for use by callbacks */
index 9a9041784dcff383169a169c39203dd79f383438..938656f708078e8a2fd078bc7cace623a32b755c 100644 (file)
@@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
        case CEPH_POOL_TYPE_EC:
                return false;
        default:
-               BUG_ON(1);
+               BUG();
        }
 }
 
@@ -81,13 +81,6 @@ void ceph_oloc_copy(struct ceph_object_locator *dest,
                    const struct ceph_object_locator *src);
 void ceph_oloc_destroy(struct ceph_object_locator *oloc);
 
-/*
- * Maximum supported by kernel client object name length
- *
- * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
- */
-#define CEPH_MAX_OID_NAME_LEN 100
-
 /*
  * 51-char inline_name is long enough for all cephfs and all but one
  * rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be
@@ -173,8 +166,8 @@ struct ceph_osdmap {
         * the list of osds that store+replicate them. */
        struct crush_map *crush;
 
-       struct mutex crush_scratch_mutex;
-       int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
+       struct mutex crush_workspace_mutex;
+       void *crush_workspace;
 };
 
 static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd)
index 5c0da61cb763124c651e3287f4ecaf69f8cdfbc8..5d0018782d504ce61fa3dff2ed526636302b7ef4 100644 (file)
@@ -50,7 +50,7 @@ struct ceph_timespec {
 #define CEPH_PG_LAYOUT_LINEAR 2
 #define CEPH_PG_LAYOUT_HYBRID 3
 
-#define CEPH_PG_MAX_SIZE      16  /* max # osds in a single pg */
+#define CEPH_PG_MAX_SIZE      32  /* max # osds in a single pg */
 
 /*
  * placement group.
index 861b4677fc5b41134f96da33710a79735b827fe0..3c02404cfce9b239ab527d9748d092a4cc98cfb1 100644 (file)
@@ -148,14 +148,18 @@ struct cgroup_subsys_state {
  * set for a task.
  */
 struct css_set {
-       /* Reference count */
-       atomic_t refcount;
-
        /*
-        * List running through all cgroup groups in the same hash
-        * slot. Protected by css_set_lock
+        * Set of subsystem states, one for each subsystem. This array is
+        * immutable after creation apart from the init_css_set during
+        * subsystem registration (at boot time).
         */
-       struct hlist_node hlist;
+       struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+       /* reference count */
+       atomic_t refcount;
+
+       /* the default cgroup associated with this css_set */
+       struct cgroup *dfl_cgrp;
 
        /*
         * Lists running through all tasks using this cgroup group.
@@ -167,21 +171,29 @@ struct css_set {
        struct list_head tasks;
        struct list_head mg_tasks;
 
+       /* all css_task_iters currently walking this cset */
+       struct list_head task_iters;
+
        /*
-        * List of cgrp_cset_links pointing at cgroups referenced from this
-        * css_set.  Protected by css_set_lock.
+        * On the default hierarhcy, ->subsys[ssid] may point to a css
+        * attached to an ancestor instead of the cgroup this css_set is
+        * associated with.  The following node is anchored at
+        * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
+        * iterate through all css's attached to a given cgroup.
         */
-       struct list_head cgrp_links;
+       struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
 
-       /* the default cgroup associated with this css_set */
-       struct cgroup *dfl_cgrp;
+       /*
+        * List running through all cgroup groups in the same hash
+        * slot. Protected by css_set_lock
+        */
+       struct hlist_node hlist;
 
        /*
-        * Set of subsystem states, one for each subsystem. This array is
-        * immutable after creation apart from the init_css_set during
-        * subsystem registration (at boot time).
+        * List of cgrp_cset_links pointing at cgroups referenced from this
+        * css_set.  Protected by css_set_lock.
         */
-       struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+       struct list_head cgrp_links;
 
        /*
         * List of csets participating in the on-going migration either as
@@ -201,18 +213,6 @@ struct css_set {
        struct cgroup *mg_dst_cgrp;
        struct css_set *mg_dst_cset;
 
-       /*
-        * On the default hierarhcy, ->subsys[ssid] may point to a css
-        * attached to an ancestor instead of the cgroup this css_set is
-        * associated with.  The following node is anchored at
-        * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
-        * iterate through all css's attached to a given cgroup.
-        */
-       struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
-
-       /* all css_task_iters currently walking this cset */
-       struct list_head task_iters;
-
        /* dead and being drained, ignore for migration */
        bool dead;
 
@@ -388,6 +388,9 @@ struct cftype {
        struct list_head node;          /* anchored at ss->cfts */
        struct kernfs_ops *kf_ops;
 
+       int (*open)(struct kernfs_open_file *of);
+       void (*release)(struct kernfs_open_file *of);
+
        /*
         * read_u64() is a shortcut for the common case of returning a
         * single integer. Use it in place of read()
index c83c23f0577bd908df08298dcbde74a7961dcf8c..f6b43fbb141c9ad03c1e05880e8c2228743d076e 100644 (file)
@@ -266,7 +266,7 @@ void css_task_iter_end(struct css_task_iter *it);
  * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
  * @leader: the loop cursor
  * @dst_css: the destination css
- * @tset: takset to iterate
+ * @tset: taskset to iterate
  *
  * Iterate threadgroup leaders of @tset.  For single-task migrations, @tset
  * may not contain any.
diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h
new file mode 100644 (file)
index 0000000..e94290b
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License. See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#ifndef _CGROUP_RDMA_H
+#define _CGROUP_RDMA_H
+
+#include <linux/cgroup.h>
+
+enum rdmacg_resource_type {
+       RDMACG_RESOURCE_HCA_HANDLE,
+       RDMACG_RESOURCE_HCA_OBJECT,
+       RDMACG_RESOURCE_MAX,
+};
+
+#ifdef CONFIG_CGROUP_RDMA
+
+struct rdma_cgroup {
+       struct cgroup_subsys_state      css;
+
+       /*
+        * head to keep track of all resource pools
+        * that belongs to this cgroup.
+        */
+       struct list_head                rpools;
+};
+
+struct rdmacg_device {
+       struct list_head        dev_node;
+       struct list_head        rpools;
+       char                    *name;
+};
+
+/*
+ * APIs for RDMA/IB stack to publish when a device wants to
+ * participate in resource accounting
+ */
+int rdmacg_register_device(struct rdmacg_device *device);
+void rdmacg_unregister_device(struct rdmacg_device *device);
+
+/* APIs for RDMA/IB stack to charge/uncharge pool specific resources */
+int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
+                     struct rdmacg_device *device,
+                     enum rdmacg_resource_type index);
+void rdmacg_uncharge(struct rdma_cgroup *cg,
+                    struct rdmacg_device *device,
+                    enum rdmacg_resource_type index);
+#endif /* CONFIG_CGROUP_RDMA */
+#endif /* _CGROUP_RDMA_H */
index 0df0336acee9ec10fef79f878c299f1aae33b143..d0e597c445854531b21a0915d1c42190833e5a2e 100644 (file)
@@ -56,6 +56,10 @@ SUBSYS(hugetlb)
 SUBSYS(pids)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_RDMA)
+SUBSYS(rdma)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
index 6f0a91b37f683fd4788ef82e6fb49f1701abb5d5..03f32d0bd1d8a7be5a0fddbc058e0a7f6b059efe 100644 (file)
@@ -29,6 +29,7 @@ extern int __init cma_declare_contiguous(phys_addr_t base,
 extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
                                        unsigned int order_per_bit,
                                        struct cma **res_cma);
-extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align);
+extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
+                             gfp_t gfp_mask);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
 #endif
index 9e40be522793eb3d8f533cb3e2d7cccc0b3ceabf..aef47be2a5c1a3fd3ea75161f5bc93627772515c 100644 (file)
@@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long);
        compat_stack_t __user *__uss = uss; \
        struct task_struct *t = current; \
        put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \
-       put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \
+       put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
        put_user_ex(t->sas_ss_size, &__uss->ss_size); \
+       if (t->sas_ss_flags & SS_AUTODISARM) \
+               sas_ss_reset(t); \
 } while (0);
 
 asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
index fddd1a5eb322b7fd7b1a62d3385cac3d7e4b6f24..76e28c22980586a6aa1cc3114b79de7cb5001fcd 100644 (file)
 #define __attribute_const__    __attribute__((__const__))
 #define __maybe_unused         __attribute__((unused))
 #define __always_unused                __attribute__((unused))
+#define __mode(x)               __attribute__((mode(x)))
 
 /* gcc version specific checks */
 
 #endif
 #endif
 
+#ifdef CONFIG_STACK_VALIDATION
+#define annotate_unreachable() ({                                      \
+       asm("%c0:\t\n"                                                  \
+           ".pushsection __unreachable, \"a\"\t\n"                     \
+           ".long %c0b\t\n"                                            \
+           ".popsection\t\n" : : "i" (__LINE__));                      \
+})
+#else
+#define annotate_unreachable()
+#endif
+
 /*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
  * this in the preprocessor, but we can live with this because they're
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
-#define unreachable() __builtin_unreachable()
+#define unreachable() \
+       do { annotate_unreachable(); __builtin_unreachable(); } while (0)
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone      __attribute__((__noclone__, __optimize__("no-tracer")))
index 91c30cba984e70af923202ea0cca58f6b6cd3e32..f8110051188f475c8574aa04787fb04dc9e42e05 100644 (file)
@@ -105,29 +105,36 @@ struct ftrace_branch_data {
        };
 };
 
+struct ftrace_likely_data {
+       struct ftrace_branch_data       data;
+       unsigned long                   constant;
+};
+
 /*
  * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
  * to disable branch tracing on a per file basis.
  */
 #if defined(CONFIG_TRACE_BRANCH_PROFILING) \
     && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
-void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
+void ftrace_likely_update(struct ftrace_likely_data *f, int val,
+                         int expect, int is_constant);
 
 #define likely_notrace(x)      __builtin_expect(!!(x), 1)
 #define unlikely_notrace(x)    __builtin_expect(!!(x), 0)
 
-#define __branch_check__(x, expect) ({                                 \
+#define __branch_check__(x, expect, is_constant) ({                    \
                        int ______r;                                    \
-                       static struct ftrace_branch_data                \
+                       static struct ftrace_likely_data                \
                                __attribute__((__aligned__(4)))         \
                                __attribute__((section("_ftrace_annotated_branch"))) \
                                ______f = {                             \
-                               .func = __func__,                       \
-                               .file = __FILE__,                       \
-                               .line = __LINE__,                       \
+                               .data.func = __func__,                  \
+                               .data.file = __FILE__,                  \
+                               .data.line = __LINE__,                  \
                        };                                              \
-                       ______r = likely_notrace(x);                    \
-                       ftrace_likely_update(&______f, ______r, expect); \
+                       ______r = __builtin_expect(!!(x), expect);      \
+                       ftrace_likely_update(&______f, ______r,         \
+                                            expect, is_constant);      \
                        ______r;                                        \
                })
 
@@ -137,10 +144,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
  * written by Daniel Walker.
  */
 # ifndef likely
-#  define likely(x)    (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1))
+#  define likely(x)    (__branch_check__(x, 1, __builtin_constant_p(x)))
 # endif
 # ifndef unlikely
-#  define unlikely(x)  (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
+#  define unlikely(x)  (__branch_check__(x, 0, __builtin_constant_p(x)))
 # endif
 
 #ifdef CONFIG_PROFILE_ALL_BRANCHES
@@ -570,12 +577,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
        (_________p1); \
 })
 
-/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
-#ifdef CONFIG_KPROBES
-# define __kprobes     __attribute__((__section__(".kprobes.text")))
-# define nokprobe_inline       __always_inline
-#else
-# define __kprobes
-# define nokprobe_inline       inline
-#endif
 #endif /* __LINUX_COMPILER_H */
index be8f12b8f1950499380c10de27ab6928df25fd8e..fbecbd089d75f4a9eead25b1439e6629893c3639 100644 (file)
@@ -135,13 +135,6 @@ struct crush_bucket {
        __u32 size;      /* num items */
        __s32 *items;
 
-       /*
-        * cached random permutation: used for uniform bucket and for
-        * the linear search fallback for the other bucket types.
-        */
-       __u32 perm_x;  /* @x for which *perm is defined */
-       __u32 perm_n;  /* num elements of *perm that are permuted/defined */
-       __u32 *perm;
 };
 
 struct crush_bucket_uniform {
@@ -211,6 +204,21 @@ struct crush_map {
         * device fails. */
        __u8 chooseleaf_stable;
 
+       /*
+        * This value is calculated after decode or construction by
+        * the builder. It is exposed here (rather than having a
+        * 'build CRUSH working space' function) so that callers can
+        * reserve a static buffer, allocate space on the stack, or
+        * otherwise avoid calling into the heap allocator if they
+        * want to. The size of the working space depends on the map,
+        * while the size of the scratch vector passed to the mapper
+        * depends on the size of the desired result set.
+        *
+        * Nothing stops the caller from allocating both in one swell
+        * foop and passing in two points, though.
+        */
+       size_t working_size;
+
 #ifndef __KERNEL__
        /*
         * version 0 (original) of straw_calc has various flaws.  version 1
@@ -248,4 +256,23 @@ static inline int crush_calc_tree_node(int i)
        return ((i+1) << 1)-1;
 }
 
+/*
+ * These data structures are private to the CRUSH implementation. They
+ * are exposed in this header file because builder needs their
+ * definitions to calculate the total working size.
+ *
+ * Moving this out of the crush map allow us to treat the CRUSH map as
+ * immutable within the mapper and removes the requirement for a CRUSH
+ * map lock.
+ */
+struct crush_work_bucket {
+       __u32 perm_x; /* @x for which *perm is defined */
+       __u32 perm_n; /* num elements of *perm that are permuted/defined */
+       __u32 *perm;  /* Permutation of the bucket's items */
+};
+
+struct crush_work {
+       struct crush_work_bucket **work; /* Per-bucket working store */
+};
+
 #endif
index 5dfd5b1125d2b257a4a00d1e77661613ca2227ec..c95e19e1ff11c5f69e4b3d05d328ce071ef74856 100644 (file)
@@ -15,6 +15,20 @@ extern int crush_do_rule(const struct crush_map *map,
                         int ruleno,
                         int x, int *result, int result_max,
                         const __u32 *weights, int weight_max,
-                        int *scratch);
+                        void *cwin);
+
+/*
+ * Returns the exact amount of workspace that will need to be used
+ * for a given combination of crush_map and result_max. The caller can
+ * then allocate this much on its own, either on the stack, in a
+ * per-thread long-lived buffer, or however it likes.
+ */
+static inline size_t crush_work_size(const struct crush_map *map,
+                                    int result_max)
+{
+       return map->working_size + result_max * 3 * sizeof(__u32);
+}
+
+void crush_init_workspace(const struct crush_map *map, void *v);
 
 #endif
index 1e77ff5818f14e72f66e90d06275eb8994cfc3eb..d8a3dc042e1cbb81f1c3a906ee6fc0d28fead8f8 100644 (file)
@@ -38,8 +38,8 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
                const struct iomap_ops *ops);
-int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-                       const struct iomap_ops *ops);
+int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+                   const struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
@@ -71,19 +71,13 @@ static inline unsigned int dax_radix_order(void *entry)
                return PMD_SHIFT - PAGE_SHIFT;
        return 0;
 }
-int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops);
 #else
 static inline unsigned int dax_radix_order(void *entry)
 {
        return 0;
 }
-static inline int dax_iomap_pmd_fault(struct vm_fault *vmf,
-               const struct iomap_ops *ops)
-{
-       return VM_FAULT_FALLBACK;
-}
 #endif
-int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
+int dax_pfn_mkwrite(struct vm_fault *vmf);
 
 static inline bool vma_is_dax(struct vm_area_struct *vma)
 {
index c965e44694997ea0ea18ca17fd1a7eb1596b3a53..591b6c16f9c12e08c6e91c51f6309fb3d73b8533 100644 (file)
@@ -562,7 +562,7 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * @inode: inode to select the dentry from multiple layers (can be NULL)
  * @flags: open flags to control copy-up behavior
  *
- * If dentry is on an union/overlay, then return the underlying, real dentry.
+ * If dentry is on a union/overlay, then return the underlying, real dentry.
  * Otherwise return the dentry itself.
  *
  * See also: Documentation/filesystems/vfs.txt
@@ -581,7 +581,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
  * d_real_inode - Return the real inode
  * @dentry: The dentry to query
  *
- * If dentry is on an union/overlay, then return the underlying, real inode.
+ * If dentry is on a union/overlay, then return the underlying, real inode.
  * Otherwise return d_inode().
  */
 static inline struct inode *d_real_inode(const struct dentry *dentry)
index bd684fc8ec1d83b80f393e9b2e5dc5cfb0776ad6..30c4570e928dfe871bc84382f14eb49b5cac018e 100644 (file)
@@ -925,6 +925,7 @@ struct device {
 #ifdef CONFIG_NUMA
        int             numa_node;      /* NUMA node this device is close to */
 #endif
+       const struct dma_map_ops *dma_ops;
        u64             *dma_mask;      /* dma mask (if dma'able device) */
        u64             coherent_dma_mask;/* Like dma_mask, but for
                                             alloc_coherent mappings as
@@ -1139,6 +1140,7 @@ static inline bool device_supports_offline(struct device *dev)
 extern void lock_device_hotplug(void);
 extern void unlock_device_hotplug(void);
 extern int lock_device_hotplug_sysfs(void);
+void assert_held_device_hotplug(void);
 extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
index fec734df1524799e1e7137e943098c907d0fce8c..b67bf6ac907d8f324494efaf1d441b0ee7955a13 100644 (file)
@@ -112,7 +112,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 }
 
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-                                      unsigned int order);
+                                      unsigned int order, gfp_t gfp_mask);
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
                                 int count);
 
@@ -145,7 +145,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 
 static inline
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-                                      unsigned int order)
+                                      unsigned int order, gfp_t gfp_mask)
 {
        return NULL;
 }
index c24721a33b4c5d7816d1da34d3440a2d0d67be2c..0977317c6835c2526428f61c12fcfab976650b99 100644 (file)
@@ -134,7 +134,8 @@ struct dma_map_ops {
        int is_phys;
 };
 
-extern struct dma_map_ops dma_noop_ops;
+extern const struct dma_map_ops dma_noop_ops;
+extern const struct dma_map_ops dma_virt_ops;
 
 #define DMA_BIT_MASK(n)        (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
@@ -171,14 +172,26 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
 
 #ifdef CONFIG_HAS_DMA
 #include <asm/dma-mapping.h>
+static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+       if (dev && dev->dma_ops)
+               return dev->dma_ops;
+       return get_arch_dma_ops(dev ? dev->bus : NULL);
+}
+
+static inline void set_dma_ops(struct device *dev,
+                              const struct dma_map_ops *dma_ops)
+{
+       dev->dma_ops = dma_ops;
+}
 #else
 /*
  * Define the dma api to allow compilation but not linking of
  * dma dependent code.  Code that depends on the dma-mapping
  * API needs to set 'depends on HAS_DMA' in its Kconfig
  */
-extern struct dma_map_ops bad_dma_ops;
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+extern const struct dma_map_ops bad_dma_ops;
+static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
 {
        return &bad_dma_ops;
 }
@@ -189,7 +202,7 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
                                              enum dma_data_direction dir,
                                              unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        dma_addr_t addr;
 
        kmemcheck_mark_initialized(ptr, size);
@@ -208,7 +221,7 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
                                          enum dma_data_direction dir,
                                          unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
        if (ops->unmap_page)
@@ -224,7 +237,7 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
                                   int nents, enum dma_data_direction dir,
                                   unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        int i, ents;
        struct scatterlist *s;
 
@@ -242,7 +255,7 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
                                      int nents, enum dma_data_direction dir,
                                      unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
        debug_dma_unmap_sg(dev, sg, nents, dir);
@@ -256,7 +269,7 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev,
                                            enum dma_data_direction dir,
                                            unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        dma_addr_t addr;
 
        kmemcheck_mark_initialized(page_address(page) + offset, size);
@@ -272,7 +285,7 @@ static inline void dma_unmap_page_attrs(struct device *dev,
                                        enum dma_data_direction dir,
                                        unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
        if (ops->unmap_page)
@@ -286,7 +299,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev,
                                          enum dma_data_direction dir,
                                          unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        dma_addr_t addr;
 
        BUG_ON(!valid_dma_direction(dir));
@@ -307,7 +320,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
                                      size_t size, enum dma_data_direction dir,
                                      unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
        if (ops->unmap_resource)
@@ -319,7 +332,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
                                           size_t size,
                                           enum dma_data_direction dir)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
        if (ops->sync_single_for_cpu)
@@ -331,7 +344,7 @@ static inline void dma_sync_single_for_device(struct device *dev,
                                              dma_addr_t addr, size_t size,
                                              enum dma_data_direction dir)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
        if (ops->sync_single_for_device)
@@ -371,7 +384,7 @@ static inline void
 dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
                    int nelems, enum dma_data_direction dir)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
        if (ops->sync_sg_for_cpu)
@@ -383,7 +396,7 @@ static inline void
 dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
                       int nelems, enum dma_data_direction dir)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
        if (ops->sync_sg_for_device)
@@ -428,7 +441,7 @@ static inline int
 dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
               dma_addr_t dma_addr, size_t size, unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        BUG_ON(!ops);
        if (ops->mmap)
                return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
@@ -446,7 +459,7 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
                      dma_addr_t dma_addr, size_t size,
                      unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        BUG_ON(!ops);
        if (ops->get_sgtable)
                return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
@@ -464,7 +477,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
                                       dma_addr_t *dma_handle, gfp_t flag,
                                       unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        void *cpu_addr;
 
        BUG_ON(!ops);
@@ -486,7 +499,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
                                     void *cpu_addr, dma_addr_t dma_handle,
                                     unsigned long attrs)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!ops);
        WARN_ON(irqs_disabled());
@@ -544,7 +557,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 #ifndef HAVE_ARCH_DMA_SUPPORTED
 static inline int dma_supported(struct device *dev, u64 mask)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        if (!ops)
                return 0;
@@ -557,7 +570,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 #ifndef HAVE_ARCH_DMA_SET_MASK
 static inline int dma_set_mask(struct device *dev, u64 mask)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
 
        if (ops->set_dma_mask)
                return ops->set_dma_mask(dev, mask);
index c930cbc193420978dc20b8310abc4f3a445fd69c..c64f2cb7d3647ceb4817acbdc49b8ce4bbe7a697 100644 (file)
@@ -655,6 +655,11 @@ struct inode {
        void                    *i_private; /* fs or device private pointer */
 };
 
+static inline unsigned int i_blocksize(const struct inode *node)
+{
+       return (1 << node->i_blkbits);
+}
+
 static inline int inode_unhashed(struct inode *inode)
 {
        return hlist_unhashed(&inode->i_hash);
index a1e8277120c71b93b4b85db620dcee91925ef333..c46eab5bc8937f1b27f9e11faa221b6cccc6994e 100644 (file)
@@ -73,7 +73,7 @@ struct diu_ad {
        /* Word 0(32-bit) in DDR memory */
 /*     __u16 comp; */
 /*     __u16 pixel_s:2; */
-/*     __u16 pallete:1; */
+/*     __u16 palette:1; */
 /*     __u16 red_c:2; */
 /*     __u16 green_c:2; */
 /*     __u16 blue_c:2; */
@@ -142,7 +142,7 @@ struct diu_ad {
 struct diu {
        __be32 desc[3];
        __be32 gamma;
-       __be32 pallete;
+       __be32 palette;
        __be32 cursor;
        __be32 curs_pos;
        __be32 diu_mode;
index 0fe0b6295ab58edfe6745467487fe19beba0d723..db373b9d322361f7553cfbe36026d918fb9d09ee 100644 (file)
@@ -541,7 +541,7 @@ static inline bool pm_suspended_storage(void)
 #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 /* The below functions must be run on a range from a single zone. */
 extern int alloc_contig_range(unsigned long start, unsigned long end,
-                             unsigned migratetype);
+                             unsigned migratetype, gfp_t gfp_mask);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
 #endif
 
index f0029e786205a38a7255260a4d99665912399f1b..a3762d49ba397244f105bade8ee058879c2f08e4 100644 (file)
@@ -6,6 +6,18 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                         pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
                         struct vm_area_struct *vma);
 extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+                        pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+                        struct vm_area_struct *vma);
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+#else
+static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
+{
+}
+#endif
+
 extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
 extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                                          unsigned long addr,
@@ -17,6 +29,9 @@ extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
 extern int zap_huge_pmd(struct mmu_gather *tlb,
                        struct vm_area_struct *vma,
                        pmd_t *pmd, unsigned long addr);
+extern int zap_huge_pud(struct mmu_gather *tlb,
+                       struct vm_area_struct *vma,
+                       pud_t *pud, unsigned long addr);
 extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                        unsigned long addr, unsigned long end,
                        unsigned char *vec);
@@ -26,8 +41,10 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                        unsigned long addr, pgprot_t newprot,
                        int prot_numa);
-int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
-                       pfn_t pfn, bool write);
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+                       pmd_t *pmd, pfn_t pfn, bool write);
+int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+                       pud_t *pud, pfn_t pfn, bool write);
 enum transparent_hugepage_flag {
        TRANSPARENT_HUGEPAGE_FLAG,
        TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
@@ -58,13 +75,14 @@ extern struct kobj_attribute shmem_enabled_attr;
 #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
-               pmd_t *pmd, int flags);
-
 #define HPAGE_PMD_SHIFT PMD_SHIFT
 #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
 #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
 
+#define HPAGE_PUD_SHIFT PUD_SHIFT
+#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
+#define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
+
 extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 
 #define transparent_hugepage_enabled(__vma)                            \
@@ -118,6 +136,17 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
                bool freeze, struct page *page);
 
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+               unsigned long address);
+
+#define split_huge_pud(__vma, __pud, __address)                                \
+       do {                                                            \
+               pud_t *____pud = (__pud);                               \
+               if (pud_trans_huge(*____pud)                            \
+                                       || pud_devmap(*____pud))        \
+                       __split_huge_pud(__vma, __pud, __address);      \
+       }  while (0)
+
 extern int hugepage_madvise(struct vm_area_struct *vma,
                            unsigned long *vm_flags, int advice);
 extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -126,6 +155,8 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
                                    long adjust_next);
 extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
                struct vm_area_struct *vma);
+extern spinlock_t *__pud_trans_huge_lock(pud_t *pud,
+               struct vm_area_struct *vma);
 /* mmap_sem must be held on entry */
 static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
                struct vm_area_struct *vma)
@@ -136,6 +167,15 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
        else
                return NULL;
 }
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+               struct vm_area_struct *vma)
+{
+       VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
+       if (pud_trans_huge(*pud) || pud_devmap(*pud))
+               return __pud_trans_huge_lock(pud, vma);
+       else
+               return NULL;
+}
 static inline int hpage_nr_pages(struct page *page)
 {
        if (unlikely(PageTransHuge(page)))
@@ -143,6 +183,11 @@ static inline int hpage_nr_pages(struct page *page)
        return 1;
 }
 
+struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
+               pmd_t *pmd, int flags);
+struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+               pud_t *pud, int flags);
+
 extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
@@ -157,6 +202,11 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
        return is_huge_zero_page(pmd_page(pmd));
 }
 
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+       return false;
+}
+
 struct page *mm_get_huge_zero_page(struct mm_struct *mm);
 void mm_put_huge_zero_page(struct mm_struct *mm);
 
@@ -167,6 +217,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
 
+#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
+
 #define hpage_nr_pages(x) 1
 
 #define transparent_hugepage_enabled(__vma) 0
@@ -195,6 +249,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
                unsigned long address, bool freeze, struct page *page) {}
 
+#define split_huge_pud(__vma, __pmd, __address)        \
+       do { } while (0)
+
 static inline int hugepage_madvise(struct vm_area_struct *vma,
                                   unsigned long *vm_flags, int advice)
 {
@@ -212,6 +269,11 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 {
        return NULL;
 }
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+               struct vm_area_struct *vma)
+{
+       return NULL;
+}
 
 static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
@@ -223,6 +285,11 @@ static inline bool is_huge_zero_page(struct page *page)
        return false;
 }
 
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+       return false;
+}
+
 static inline void mm_put_huge_zero_page(struct mm_struct *mm)
 {
        return;
@@ -233,6 +300,12 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
 {
        return NULL;
 }
+
+static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
+               unsigned long addr, pud_t *pud, int flags)
+{
+       return NULL;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
index 7b23a3316dcb1f24694d759a7b5ddaf149ce5376..bed8fbb45f31fb1b8efdf68c319b33fe8b55bab8 100644 (file)
@@ -283,6 +283,7 @@ enum i2c_slave_event {
 
 extern int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb);
 extern int i2c_slave_unregister(struct i2c_client *client);
+extern bool i2c_detect_slave_mode(struct device *dev);
 
 static inline int i2c_slave_event(struct i2c_client *client,
                                  enum i2c_slave_event event, u8 *val)
index 3c01b89aed67623ca22e10c3b70e913197620cd4..bf70b3ef0a073bbcb301ddf0141d90e8ac6fa018 100644 (file)
 #ifndef __IDR_H__
 #define __IDR_H__
 
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/init.h>
-#include <linux/rcupdate.h>
+#include <linux/radix-tree.h>
+#include <linux/gfp.h>
+#include <linux/percpu.h>
+
+struct idr {
+       struct radix_tree_root  idr_rt;
+       unsigned int            idr_next;
+};
 
 /*
- * Using 6 bits at each layer allows us to allocate 7 layers out of each page.
- * 8 bits only gave us 3 layers out of every pair of pages, which is less
- * efficient except for trees with a largest element between 192-255 inclusive.
+ * The IDR API does not expose the tagging functionality of the radix tree
+ * to users.  Use tag 0 to track whether a node has free space below it.
  */
-#define IDR_BITS 6
-#define IDR_SIZE (1 << IDR_BITS)
-#define IDR_MASK ((1 << IDR_BITS)-1)
-
-struct idr_layer {
-       int                     prefix; /* the ID prefix of this idr_layer */
-       int                     layer;  /* distance from leaf */
-       struct idr_layer __rcu  *ary[1<<IDR_BITS];
-       int                     count;  /* When zero, we can release it */
-       union {
-               /* A zero bit means "space here" */
-               DECLARE_BITMAP(bitmap, IDR_SIZE);
-               struct rcu_head         rcu_head;
-       };
-};
+#define IDR_FREE       0
 
-struct idr {
-       struct idr_layer __rcu  *hint;  /* the last layer allocated from */
-       struct idr_layer __rcu  *top;
-       int                     layers; /* only valid w/o concurrent changes */
-       int                     cur;    /* current pos for cyclic allocation */
-       spinlock_t              lock;
-       int                     id_free_cnt;
-       struct idr_layer        *id_free;
-};
+/* Set the IDR flag and the IDR_FREE tag */
+#define IDR_RT_MARKER          ((__force gfp_t)(3 << __GFP_BITS_SHIFT))
 
-#define IDR_INIT(name)                                                 \
+#define IDR_INIT                                                       \
 {                                                                      \
-       .lock                   = __SPIN_LOCK_UNLOCKED(name.lock),      \
+       .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)                        \
 }
-#define DEFINE_IDR(name)       struct idr name = IDR_INIT(name)
+#define DEFINE_IDR(name)       struct idr name = IDR_INIT
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -62,9 +44,9 @@ struct idr {
  * idr_alloc_cyclic() if it is free (otherwise the search will start from
  * this position).
  */
-static inline unsigned int idr_get_cursor(struct idr *idr)
+static inline unsigned int idr_get_cursor(const struct idr *idr)
 {
-       return READ_ONCE(idr->cur);
+       return READ_ONCE(idr->idr_next);
 }
 
 /**
@@ -77,7 +59,7 @@ static inline unsigned int idr_get_cursor(struct idr *idr)
  */
 static inline void idr_set_cursor(struct idr *idr, unsigned int val)
 {
-       WRITE_ONCE(idr->cur, val);
+       WRITE_ONCE(idr->idr_next, val);
 }
 
 /**
@@ -97,22 +79,31 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
  * period).
  */
 
-/*
- * This is what we export.
- */
-
-void *idr_find_slowpath(struct idr *idp, int id);
 void idr_preload(gfp_t gfp_mask);
-int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask);
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask);
-int idr_for_each(struct idr *idp,
+int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t);
+int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t);
+int idr_for_each(const struct idr *,
                 int (*fn)(int id, void *p, void *data), void *data);
-void *idr_get_next(struct idr *idp, int *nextid);
-void *idr_replace(struct idr *idp, void *ptr, int id);
-void idr_remove(struct idr *idp, int id);
-void idr_destroy(struct idr *idp);
-void idr_init(struct idr *idp);
-bool idr_is_empty(struct idr *idp);
+void *idr_get_next(struct idr *, int *nextid);
+void *idr_replace(struct idr *, void *, int id);
+void idr_destroy(struct idr *);
+
+static inline void *idr_remove(struct idr *idr, int id)
+{
+       return radix_tree_delete_item(&idr->idr_rt, id, NULL);
+}
+
+static inline void idr_init(struct idr *idr)
+{
+       INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
+       idr->idr_next = 0;
+}
+
+static inline bool idr_is_empty(const struct idr *idr)
+{
+       return radix_tree_empty(&idr->idr_rt) &&
+               radix_tree_tagged(&idr->idr_rt, IDR_FREE);
+}
 
 /**
  * idr_preload_end - end preload section started with idr_preload()
@@ -137,19 +128,14 @@ static inline void idr_preload_end(void)
  * This function can be called under rcu_read_lock(), given that the leaf
  * pointers lifetimes are correctly managed.
  */
-static inline void *idr_find(struct idr *idr, int id)
+static inline void *idr_find(const struct idr *idr, int id)
 {
-       struct idr_layer *hint = rcu_dereference_raw(idr->hint);
-
-       if (hint && (id & ~IDR_MASK) == hint->prefix)
-               return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
-
-       return idr_find_slowpath(idr, id);
+       return radix_tree_lookup(&idr->idr_rt, id);
 }
 
 /**
  * idr_for_each_entry - iterate over an idr's elements of a given type
- * @idp:     idr handle
+ * @idr:     idr handle
  * @entry:   the type * to use as cursor
  * @id:      id entry's key
  *
@@ -157,57 +143,60 @@ static inline void *idr_find(struct idr *idr, int id)
  * after normal terminatinon @entry is left with the value NULL.  This
  * is convenient for a "not found" value.
  */
-#define idr_for_each_entry(idp, entry, id)                     \
-       for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
+#define idr_for_each_entry(idr, entry, id)                     \
+       for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
 
 /**
- * idr_for_each_entry - continue iteration over an idr's elements of a given type
- * @idp:     idr handle
+ * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type
+ * @idr:     idr handle
  * @entry:   the type * to use as cursor
  * @id:      id entry's key
  *
  * Continue to iterate over list of given type, continuing after
  * the current position.
  */
-#define idr_for_each_entry_continue(idp, entry, id)                    \
-       for ((entry) = idr_get_next((idp), &(id));                      \
+#define idr_for_each_entry_continue(idr, entry, id)                    \
+       for ((entry) = idr_get_next((idr), &(id));                      \
             entry;                                                     \
-            ++id, (entry) = idr_get_next((idp), &(id)))
+            ++id, (entry) = idr_get_next((idr), &(id)))
 
 /*
  * IDA - IDR based id allocator, use when translation from id to
  * pointer isn't necessary.
- *
- * IDA_BITMAP_LONGS is calculated to be one less to accommodate
- * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
  */
 #define IDA_CHUNK_SIZE         128     /* 128 bytes per chunk */
-#define IDA_BITMAP_LONGS       (IDA_CHUNK_SIZE / sizeof(long) - 1)
+#define IDA_BITMAP_LONGS       (IDA_CHUNK_SIZE / sizeof(long))
 #define IDA_BITMAP_BITS        (IDA_BITMAP_LONGS * sizeof(long) * 8)
 
 struct ida_bitmap {
-       long                    nr_busy;
        unsigned long           bitmap[IDA_BITMAP_LONGS];
 };
 
+DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
+
 struct ida {
-       struct idr              idr;
-       struct ida_bitmap       *free_bitmap;
+       struct radix_tree_root  ida_rt;
 };
 
-#define IDA_INIT(name)         { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
-#define DEFINE_IDA(name)       struct ida name = IDA_INIT(name)
+#define IDA_INIT       {                                               \
+       .ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),          \
+}
+#define DEFINE_IDA(name)       struct ida name = IDA_INIT
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
 void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
-void ida_init(struct ida *ida);
 
 int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
                   gfp_t gfp_mask);
 void ida_simple_remove(struct ida *ida, unsigned int id);
 
+static inline void ida_init(struct ida *ida)
+{
+       INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
+}
+
 /**
  * ida_get_new - allocate new ID
  * @ida:       idr handle
@@ -220,11 +209,8 @@ static inline int ida_get_new(struct ida *ida, int *p_id)
        return ida_get_new_above(ida, 0, p_id);
 }
 
-static inline bool ida_is_empty(struct ida *ida)
+static inline bool ida_is_empty(const struct ida *ida)
 {
-       return idr_is_empty(&ida->idr);
+       return radix_tree_empty(&ida->ida_rt);
 }
-
-void __init idr_init_cache(void);
-
 #endif /* __IDR_H__ */
index 891459caa278001d8412d1e2fa729bbbbbe7bfcf..7291810067eb636dfa622b8b385ac45d7a520395 100644 (file)
@@ -79,8 +79,7 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
                bool *did_zero, const struct iomap_ops *ops);
 int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
                const struct iomap_ops *ops);
-int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-               const struct iomap_ops *ops);
+int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                loff_t start, loff_t len, const struct iomap_ops *ops);
 
index 1c30014ed1760811a42f05a47ad3943e58b53579..d29e1e21bf3f80bdbade2da428a5e5366a549d9f 100644 (file)
@@ -17,7 +17,7 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/hrtimer.h>
+#include <linux/ktime.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/io.h>
index 78c5d5ae385740f38086ceaa5babf63baef76877..f1045b2c6a00a0543da1892eebff3f120c32a679 100644 (file)
@@ -100,7 +100,7 @@ struct ipmi_user_hndl {
 
 /* Create a new user of the IPMI layer on the given interface number. */
 int ipmi_create_user(unsigned int          if_num,
-                    struct ipmi_user_hndl *handler,
+                    const struct ipmi_user_hndl *handler,
                     void                  *handler_data,
                     ipmi_user_t           *user);
 
index b63d6b7b0db0a295432320d432da5005f718a0f2..8e06d758ee48a2d92da7b9cfba79175334423b48 100644 (file)
@@ -89,11 +89,17 @@ extern bool static_key_initialized;
 
 struct static_key {
        atomic_t enabled;
-/* Set lsb bit to 1 if branch is default true, 0 ot */
-       struct jump_entry *entries;
-#ifdef CONFIG_MODULES
-       struct static_key_mod *next;
-#endif
+/*
+ * bit 0 => 1 if key is initially true
+ *         0 if initially false
+ * bit 1 => 1 if points to struct static_key_mod
+ *         0 if points to struct jump_entry
+ */
+       union {
+               unsigned long type;
+               struct jump_entry *entries;
+               struct static_key_mod *next;
+       };
 };
 
 #else
@@ -118,9 +124,10 @@ struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_TYPE_FALSE        0UL
-#define JUMP_TYPE_TRUE 1UL
-#define JUMP_TYPE_MASK 1UL
+#define JUMP_TYPE_FALSE                0UL
+#define JUMP_TYPE_TRUE         1UL
+#define JUMP_TYPE_LINKED       2UL
+#define JUMP_TYPE_MASK         3UL
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
index 820c0ad54a0117596e63bae6845b1ebd771c3412..c908b25bf5a558bf7b07abb050cd5a148ed62780 100644 (file)
@@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order);
 void kasan_cache_create(struct kmem_cache *cache, size_t *size,
                        unsigned long *flags);
 void kasan_cache_shrink(struct kmem_cache *cache);
-void kasan_cache_destroy(struct kmem_cache *cache);
+void kasan_cache_shutdown(struct kmem_cache *cache);
 
 void kasan_poison_slab(struct page *page);
 void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
                                      size_t *size,
                                      unsigned long *flags) {}
 static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
-static inline void kasan_cache_destroy(struct kmem_cache *cache) {}
+static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
 
 static inline void kasan_poison_slab(struct page *page) {}
 static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
index 8f2e059e4d45559b54c1fbd087181865beac7af7..4d748603e818307feb491f26cafb9c4170586e5d 100644 (file)
@@ -8,7 +8,7 @@
 
 /*
  * The use of "&&" / "||" is limited in certain expressions.
- * The followings enable to calculate "and" / "or" with macro expansion only.
+ * The following enable to calculate "and" / "or" with macro expansion only.
  */
 #define __and(x, y)                    ___and(x, y)
 #define ___and(x, y)                   ____and(__ARG_PLACEHOLDER_##x, y)
index cb09238f6d32be355a9b7e2347e48746de7eae77..4c26dc3a8295a9fb2910dcb18946ee86f54ceac8 100644 (file)
 )
 
 /*
- * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors and
- * for negative dividends if the divisor variable type is unsigned.
+ * Divide positive or negative dividend by positive or negative divisor
+ * and round to closest integer. Result is undefined for negative
+ * divisors if he dividend variable type is unsigned and for negative
+ * dividends if the divisor variable type is unsigned.
  */
 #define DIV_ROUND_CLOSEST(x, divisor)(                 \
 {                                                      \
        typeof(x) __x = x;                              \
        typeof(divisor) __d = divisor;                  \
        (((typeof(x))-1) > 0 ||                         \
-        ((typeof(divisor))-1) > 0 || (__x) > 0) ?      \
+        ((typeof(divisor))-1) > 0 ||                   \
+        (((__x) > 0) == ((__d) > 0))) ?                \
                (((__x) + ((__d) / 2)) / (__d)) :       \
                (((__x) - ((__d) / 2)) / (__d));        \
 }                                                      \
index 7056238fd9f5cfd0b495edbbd75093f9141977e5..a9b11b8d06f2ebcee9ecc40f143c1468d182236a 100644 (file)
@@ -46,6 +46,7 @@ enum kernfs_node_flag {
        KERNFS_SUICIDAL         = 0x0400,
        KERNFS_SUICIDED         = 0x0800,
        KERNFS_EMPTY_DIR        = 0x1000,
+       KERNFS_HAS_RELEASE      = 0x2000,
 };
 
 /* @flags for kernfs_create_root() */
@@ -175,6 +176,7 @@ struct kernfs_open_file {
        /* published fields */
        struct kernfs_node      *kn;
        struct file             *file;
+       struct seq_file         *seq_file;
        void                    *priv;
 
        /* private fields, do not use outside kernfs proper */
@@ -185,11 +187,19 @@ struct kernfs_open_file {
        char                    *prealloc_buf;
 
        size_t                  atomic_write_len;
-       bool                    mmapped;
+       bool                    mmapped:1;
+       bool                    released:1;
        const struct vm_operations_struct *vm_ops;
 };
 
 struct kernfs_ops {
+       /*
+        * Optional open/release methods.  Both are called with
+        * @of->seq_file populated.
+        */
+       int (*open)(struct kernfs_open_file *of);
+       void (*release)(struct kernfs_open_file *of);
+
        /*
         * Read is handled by either seq_file or raw_read().
         *
index 16ddfb8b304a330ab2e2594cab78bc481930f32c..c328e4f7dcadb4276bddcfbcc97caa323c48a0a3 100644 (file)
@@ -29,7 +29,7 @@
  *             <jkenisto@us.ibm.com>  and Prasanna S Panchamukhi
  *             <prasanna@in.ibm.com> added function-return probes.
  */
-#include <linux/compiler.h>    /* for __kprobes */
+#include <linux/compiler.h>
 #include <linux/linkage.h>
 #include <linux/list.h>
 #include <linux/notifier.h>
@@ -40,9 +40,9 @@
 #include <linux/rcupdate.h>
 #include <linux/mutex.h>
 #include <linux/ftrace.h>
+#include <asm/kprobes.h>
 
 #ifdef CONFIG_KPROBES
-#include <asm/kprobes.h>
 
 /* kprobe_status settings */
 #define KPROBE_HIT_ACTIVE      0x00000001
@@ -51,6 +51,7 @@
 #define KPROBE_HIT_SSDONE      0x00000008
 
 #else /* CONFIG_KPROBES */
+#include <asm-generic/kprobes.h>
 typedef int kprobe_opcode_t;
 struct arch_specific_insn {
        int dummy;
@@ -509,18 +510,4 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
 }
 #endif
 
-#ifdef CONFIG_KPROBES
-/*
- * Blacklist ganerating macro. Specify functions which is not probed
- * by using this macro.
- */
-#define __NOKPROBE_SYMBOL(fname)                       \
-static unsigned long __used                            \
-       __attribute__((section("_kprobe_blacklist")))   \
-       _kbl_addr_##fname = (unsigned long)fname;
-#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname)
-#else
-#define NOKPROBE_SYMBOL(fname)
-#endif
-
 #endif /* _LINUX_KPROBES_H */
index 6b784c59f321f413b30fec0ba6048db7f3cf617b..394e3d9213b8d58cab811737cc8dbf1029650852 100644 (file)
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
  */
-#define LZ4_MEM_COMPRESS       (16384)
-#define LZ4HC_MEM_COMPRESS     (262144 + (2 * sizeof(unsigned char *)))
 
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>       /* memset, memcpy */
+
+/*-************************************************************************
+ *     CONSTANTS
+ **************************************************************************/
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  */
-static inline size_t lz4_compressbound(size_t isize)
-{
-       return isize + (isize / 255) + 16;
-}
+#define LZ4_MEMORY_USAGE 14
+
+#define LZ4_MAX_INPUT_SIZE     0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)       (\
+       (unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+       ? 0 \
+       : (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG     (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL                       3
+#define LZ4HC_DEFAULT_CLEVEL                   9
+#define LZ4HC_MAX_CLEVEL                       16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE - 1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *     STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
+#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64       4
+#define LZ4_STREAMDECODESIZE            (LZ4_STREAMDECODESIZE_U64 * \
+       sizeof(unsigned long long))
 
 /*
- * lz4_compress()
- *     src     : source address of the original data
- *     src_len : size of the original data
- *     dst     : output buffer address of the compressed data
- *             This requires 'dst' of size LZ4_COMPRESSBOUND.
- *     dst_len : is the output size, which is returned after compress done
- *     workmem : address of the working memory.
- *             This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *     return  : Success if return 0
- *               Error if return (< 0)
- *     note :  Destination buffer and workmem must be already allocated with
- *             the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len,
-               unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *     src     : source address of the original data
-  *     src_len : size of the original data
-  *     dst     : output buffer address of the compressed data
-  *            This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *     dst_len : is the output size, which is returned after compress done
-  *     workmem : address of the working memory.
-  *            This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *     return  : Success if return 0
-  *               Error if return (< 0)
-  *     note :  Destination buffer and workmem must be already allocated with
-  *             the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-               unsigned char *dst, size_t *dst_len, void *wrkmem);
+ * LZ4_stream_t - information structure to track an LZ4 stream.
+ */
+typedef struct {
+       uint32_t hashTable[LZ4_HASH_SIZE_U32];
+       uint32_t currentOffset;
+       uint32_t initCheck;
+       const uint8_t *dictionary;
+       uint8_t *bufferStart;
+       uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+       unsigned long long table[LZ4_STREAMSIZE_U64];
+       LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
 
 /*
- * lz4_decompress()
- *     src     : source address of the compressed data
- *     src_len : is the input size, whcih is returned after decompress done
- *     dest    : output buffer address of the decompressed data
- *     actual_dest_len: is the size of uncompressed data, supposing it's known
- *     return  : Success if return 0
- *               Error if return (< 0)
- *     note :  Destination buffer must be already allocated.
- *             slightly faster than lz4_decompress_unknownoutputsize()
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-               unsigned char *dest, size_t actual_dest_len);
+ * LZ4_streamHC_t - information structure to track an LZ4HC stream.
+ */
+typedef struct {
+       unsigned int     hashTable[LZ4HC_HASHTABLESIZE];
+       unsigned short   chainTable[LZ4HC_MAXD];
+       /* next block to continue on current prefix */
+       const unsigned char *end;
+       /* All index relative to this position */
+       const unsigned char *base;
+       /* alternate base for extDict */
+       const unsigned char *dictBase;
+       /* below that point, need extDict */
+       unsigned int     dictLimit;
+       /* below that point, no more dict */
+       unsigned int     lowLimit;
+       /* index from which to continue dict update */
+       unsigned int     nextToUpdate;
+       unsigned int     compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+       size_t table[LZ4_STREAMHCSIZE_SIZET];
+       LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
 
 /*
- * lz4_decompress_unknownoutputsize()
- *     src     : source address of the compressed data
- *     src_len : is the input size, therefore the compressed size
- *     dest    : output buffer address of the decompressed data
- *     dest_len: is the max size of the destination buffer, which is
- *                     returned with actual size of decompressed data after
- *                     decompress done
- *     return  : Success if return 0
- *               Error if return (< 0)
- *     note :  Destination buffer must be already allocated.
- */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-               unsigned char *dest, size_t *dest_len);
+ * LZ4_streamDecode_t - information structure to track an
+ *     LZ4 stream during decompression.
+ *
+ * init this structure using LZ4_setStreamDecode (or memset()) before first use
+ */
+typedef struct {
+       const uint8_t *externalDict;
+       size_t extDictSize;
+       const uint8_t *prefixEnd;
+       size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+       unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+       LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *     SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS       LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS     LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *     Compression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_compressBound() - Max. output size in worst case szenarios
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ * (data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
+{
+       return LZ4_COMPRESSBOUND(isize);
+}
+
+/**
+ * LZ4_compress_default() - Compress data from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *     which must be already allocated
+ * @wrkmem: address of the working memory.
+ *     This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Compresses 'sourceSize' bytes from buffer 'source'
+ * into already allocated 'dest' buffer of size 'maxOutputSize'.
+ * Compression is guaranteed to succeed if
+ * 'maxOutputSize' >= LZ4_compressBound(inputSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'source' into a more limited 'dest' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * As a consequence, 'dest' content is not valid.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *     (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+       int maxOutputSize, void *wrkmem);
+
+/**
+ * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *     which must be already allocated
+ * @acceleration: acceleration factor
+ * @wrkmem: address of the working memory.
+ *     This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Same as LZ4_compress_default(), but allows to select an "acceleration"
+ * factor. The larger the acceleration value, the faster the algorithm,
+ * but also the lesser the compression. It's a trade-off. It can be fine tuned,
+ * with each successive value providing roughly +~3% to speed.
+ * An acceleration value of "1" is the same as regular LZ4_compress_default()
+ * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *     (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+       int maxOutputSize, int acceleration, void *wrkmem);
+
+/**
+ * LZ4_compress_destSize() - Compress as much data as possible
+ *     from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @sourceSizePtr: will be modified to indicate how many bytes where read
+ *     from 'source' to fill 'dest'. New value is necessarily <= old value.
+ * @targetDestSize: Size of buffer 'dest' which must be already allocated
+ * @wrkmem: address of the working memory.
+ *     This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * possible from 'source'.
+ *
+ * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize)
+ *     or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+       int targetDestSize, void *wrkmem);
+
+/*-************************************************************************
+ *     Decompression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * Decompresses data from 'source' into 'dest'.
+ * This function fully respect memory boundaries for properly formed
+ * compressed data.
+ * It is a bit faster than LZ4_decompress_safe().
+ * However, it does not provide any protection against intentionally
+ * modified data stream (malicious input).
+ * Use this function in trusted environment only
+ * (data to decode comes from a trusted source).
+ *
+ * Return: number of bytes read from the source buffer
+ *     or a negative result if decompression fails.
+ */
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe() - Decompression protected against buffer overflow
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * Decompresses data fom 'source' into 'dest'.
+ * If the source stream is detected malformed, the function will
+ * stop decoding and return a negative result.
+ * This function is protected against buffer overflow exploits,
+ * including malicious data packets. It never writes outside output buffer,
+ * nor reads outside input buffer.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+       int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize'
+ *     at position 'source' into buffer 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the decompressed data which must be
+ *     already allocated
+ * @compressedSize: is the precise full size of the compressed block.
+ * @targetOutputSize: the decompression operation will try
+ *     to stop as soon as 'targetOutputSize' has been reached
+ * @maxDecompressedSize: is the size of destination buffer
+ *
+ * This function decompresses a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ * This function never writes outside of output buffer,
+ * and never reads outside of input buffer.
+ * It is therefore protected against malicious data packets.
+ *
+ * Return: the number of bytes decoded in the destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ *
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+       int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+/*-************************************************************************
+ *     LZ4 HC Compression
+ **************************************************************************/
+
+/**
+ * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @dstCapacity: full or partial size of buffer 'dst',
+ *     which must be already allocated
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *     value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *     Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ * @wrkmem: address of the working memory.
+ *     This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Compress data from 'src' into 'dst', using the more powerful
+ * but slower "HC" algorithm. Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ *
+ * Return : the number of bytes written into 'dst' or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+       int compressionLevel, void *wrkmem);
+
+/**
+ * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
+ * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *     value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *     Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *
+ * An LZ4_streamHC_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_streamHC_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+
+/**
+ * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC
+ * @streamHCPtr: pointer to the LZ4HC_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4HC_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int    LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+       int dictSize);
+
+/**
+ * LZ4_compress_HC_continue() - Compress 'src' using data from previously
+ *     compressed blocks as a dictionary using the HC algorithm
+ * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *     which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *     which must be already allocated
+ *
+ * These functions compress data in successive blocks of any size, using
+ * previous blocks as dictionary. One key assumption is that previous
+ * blocks (up to 64 KB) remain read-accessible while
+ * compressing next blocks. There is an exception for ring buffers,
+ * which can be smaller than 64 KB.
+ * Ring buffers scenario is automatically detected and handled by
+ * LZ4_compress_HC_continue().
+ * Before starting compression, state must be properly initialized,
+ * using LZ4_resetStreamHC().
+ * A first "fictional block" can then be designated as
+ * initial dictionary, using LZ4_loadDictHC() (Optional).
+ * Then, use LZ4_compress_HC_continue()
+ * to compress each successive block. Previous memory blocks
+ * (including initial dictionary when present) must remain accessible
+ * and unmodified during compression.
+ * 'dst' buffer should be sized to handle worst case scenarios, using
+ *  LZ4_compressBound(), to ensure operation success.
+ *  If, for any reason, previous data blocks can't be preserved unmodified
+ *  in memory during next compression block,
+ *  you must save it to a safer memory space, using LZ4_saveDictHC().
+ * Return value of LZ4_saveDictHC() is the size of dictionary
+ * effectively saved into 'safeBuffer'.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+       char *dst, int srcSize, int maxDstSize);
+
+/**
+ * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream
+ * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @maxDictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDictHC() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_HC_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= maxDictSize),
+ *     or 0 if error.
+ */
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+       int maxDictSize);
+
+/*-*********************************************
+ *     Streaming Compression Functions
+ ***********************************************/
+
+/**
+ * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure
+ * @LZ4_stream: pointer to the 'LZ4_stream_t' structure
+ *
+ * An LZ4_stream_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_stream_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/**
+ * LZ4_loadDict() - Load a static dictionary into LZ4_stream
+ * @streamPtr: pointer to the LZ4_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+       int dictSize);
+
+/**
+ * LZ4_saveDict() - Save static dictionary from LZ4_stream
+ * @streamPtr: pointer to the 'LZ4_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @dictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_fast_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *     or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/**
+ * LZ4_compress_fast_continue() - Compress 'src' using data from previously
+ *     compressed blocks as a dictionary
+ * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *     which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *     which must be already allocated
+ * @acceleration: acceleration factor
+ *
+ * Compress buffer content 'src', using data from previously compressed blocks
+ * as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still
+ * be present and unmodified !
+ * If maxDstSize >= LZ4_compressBound(srcSize),
+ * compression is guaranteed to succeed, and runs faster.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+       char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/**
+ * LZ4_setStreamDecode() - Instruct where to find dictionary
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @dictionary: dictionary to use
+ * @dictSize: size of dictionary
+ *
+ * Use this function to instruct where to find the dictionary.
+ *     Setting a size of 0 is allowed (same effect as reset).
+ *
+ * Return: 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *dictionary, int dictSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *source, char *dest, int compressedSize,
+       int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode()
+ *     followed by LZ4_decompress_safe_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+       int compressedSize, int maxDecompressedSize, const char *dictStart,
+       int dictSize);
+
+/**
+ * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode()
+ *     followed by LZ4_decompress_fast_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+       int originalSize, const char *dictStart, int dictSize);
+
 #endif
index 38bcf00cbed3e53bb14a15dafb38db82d94db13c..bdfc65af4152047c7b34f9b459f095a6d675cd11 100644 (file)
@@ -42,6 +42,7 @@ struct memblock_type {
        unsigned long max;      /* size of the allocated array */
        phys_addr_t total_size; /* size of all regions */
        struct memblock_region *regions;
+       char *name;
 };
 
 struct memblock {
index 093607f90b9116f091d631a5e7516cff8d816cbd..b723a686fc1048f47eb94132ebdfdc0da0b0e6ea 100644 (file)
@@ -109,9 +109,6 @@ extern void unregister_memory_notifier(struct notifier_block *nb);
 extern int register_memory_isolate_notifier(struct notifier_block *nb);
 extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 extern int register_new_memory(int, struct mem_section *);
-extern int memory_block_change_state(struct memory_block *mem,
-                                    unsigned long to_state,
-                                    unsigned long from_state_req);
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern int unregister_memory_section(struct mem_section *);
 #endif
index 6483a6fdce594a9036cad2cd859dff64c81e87cf..ffb21e79204d6a1e91acfe414c264261ab4a115c 100644 (file)
 
 /* RTC_CTRL_REG bitfields */
 #define TPS65910_RTC_CTRL_STOP_RTC                     0x01 /*0=stop, 1=run */
+#define TPS65910_RTC_CTRL_AUTO_COMP                    0x04
 #define TPS65910_RTC_CTRL_GET_TIME                     0x40
 
 /* RTC_STATUS_REG bitfields */
index 27d7c95fd0da0cf88ecb0d7c7a03c0b5c96e0fe4..504d54c71bdba2e9c4f8d1dc84d0d937cf31fcc4 100644 (file)
@@ -90,7 +90,7 @@ struct mbus_hw_ops {
 };
 
 struct mbus_device *
-mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
                     struct mbus_hw_ops *hw_ops, int index,
                     void __iomem *mmio_va);
 void mbus_unregister_device(struct mbus_device *mbdev);
index ae8d475a9385226a7af27a39d40a10937b0721e0..fa76b516fa473bdfd803d09e0206923153613d65 100644 (file)
@@ -37,7 +37,7 @@ extern int migrate_page(struct address_space *,
                        struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
                unsigned long private, enum migrate_mode mode, int reason);
-extern bool isolate_movable_page(struct page *page, isolate_mode_t mode);
+extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
 extern void putback_movable_page(struct page *page);
 
 extern int migrate_prep(void);
@@ -56,6 +56,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
                free_page_t free, unsigned long private, enum migrate_mode mode,
                int reason)
        { return -ENOSYS; }
+static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
+       { return -EBUSY; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
index e965e5090d9622d57f1b16c1b7f9fe2d18532473..a858bcb6220b5d090c2ca828527b38e6d7c0380e 100644 (file)
@@ -109,7 +109,7 @@ static inline void mlx4_u64_to_mac(u8 *addr, u64 mac)
        int i;
 
        for (i = ETH_ALEN; i > 0; i--) {
-               addr[i - 1] = mac && 0xFF;
+               addr[i - 1] = mac & 0xFF;
                mac >>= 8;
        }
 }
index 574bc157a27c3e358aae6fa6f5690a15c2a82635..0d65dd72c0f49e230613ac268d29c7b377962836 100644 (file)
@@ -314,6 +314,9 @@ struct vm_fault {
        unsigned long address;          /* Faulting virtual address */
        pmd_t *pmd;                     /* Pointer to pmd entry matching
                                         * the 'address' */
+       pud_t *pud;                     /* Pointer to pud entry matching
+                                        * the 'address'
+                                        */
        pte_t orig_pte;                 /* Value of PTE at the time of fault */
 
        struct page *cow_page;          /* Page handler may use for COW fault */
@@ -341,6 +344,13 @@ struct vm_fault {
                                         */
 };
 
+/* page entry size for vm->huge_fault() */
+enum page_entry_size {
+       PE_SIZE_PTE = 0,
+       PE_SIZE_PMD,
+       PE_SIZE_PUD,
+};
+
 /*
  * These are the virtual MM functions - opening of an area, closing and
  * unmapping it (needed to keep files on disk up-to-date etc), pointer
@@ -350,17 +360,17 @@ struct vm_operations_struct {
        void (*open)(struct vm_area_struct * area);
        void (*close)(struct vm_area_struct * area);
        int (*mremap)(struct vm_area_struct * area);
-       int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
-       int (*pmd_fault)(struct vm_fault *vmf);
+       int (*fault)(struct vm_fault *vmf);
+       int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
        void (*map_pages)(struct vm_fault *vmf,
                        pgoff_t start_pgoff, pgoff_t end_pgoff);
 
        /* notification that a previously read-only page is about to become
         * writable, if an error is returned it will cause a SIGBUS */
-       int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+       int (*page_mkwrite)(struct vm_fault *vmf);
 
        /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
-       int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+       int (*pfn_mkwrite)(struct vm_fault *vmf);
 
        /* called by access_process_vm when get_user_pages() fails, typically
         * for use by special VMAs that can switch between memory and hardware
@@ -416,6 +426,10 @@ static inline int pmd_devmap(pmd_t pmd)
 {
        return 0;
 }
+static inline int pud_devmap(pud_t pud)
+{
+       return 0;
+}
 #endif
 
 /*
@@ -1154,16 +1168,6 @@ extern void pagefault_out_of_memory(void);
 
 extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
 
-int shmem_zero_setup(struct vm_area_struct *);
-#ifdef CONFIG_SHMEM
-bool shmem_mapping(struct address_space *mapping);
-#else
-static inline bool shmem_mapping(struct address_space *mapping)
-{
-       return false;
-}
-#endif
-
 extern bool can_do_mlock(void);
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
@@ -1191,6 +1195,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 
 /**
  * mm_walk - callbacks for walk_page_range
+ * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ *            this handler should only handle pud_trans_huge() puds.
+ *            the pmd_entry or pte_entry callbacks will be used for
+ *            regular PUDs.
  * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
  *            this handler is required to be able to handle
  *            pmd_trans_huge() pmds.  They may simply choose to
@@ -1210,6 +1218,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
  * (see the comment on walk_page_range() for more details)
  */
 struct mm_walk {
+       int (*pud_entry)(pud_t *pud, unsigned long addr,
+                        unsigned long next, struct mm_walk *walk);
        int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
                         unsigned long next, struct mm_walk *walk);
        int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1793,8 +1803,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
        return ptl;
 }
 
-extern void __init pagecache_init(void);
+/*
+ * No scalability reason to split PUD locks yet, but follow the same pattern
+ * as the PMD locks to make it easier if we decide to.  The VM should not be
+ * considered ready to switch to split PUD locks yet; there may be places
+ * which need to be converted from page_table_lock.
+ */
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+       return &mm->page_table_lock;
+}
 
+static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
+{
+       spinlock_t *ptl = pud_lockptr(mm, pud);
+
+       spin_lock(ptl);
+       return ptl;
+}
+
+extern void __init pagecache_init(void);
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, unsigned long * zones_size,
                unsigned long zone_start_pfn, unsigned long *zholes_size);
@@ -2003,8 +2031,10 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *,
        unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
        struct mempolicy *, struct vm_userfaultfd_ctx);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
-extern int split_vma(struct mm_struct *,
-       struct vm_area_struct *, unsigned long addr, int new_below);
+extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
+       unsigned long addr, int new_below);
+extern int split_vma(struct mm_struct *, struct vm_area_struct *,
+       unsigned long addr, int new_below);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
        struct rb_node **, struct rb_node *);
@@ -2052,18 +2082,22 @@ extern int install_special_mapping(struct mm_struct *mm,
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
-       unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
+       unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
+       struct list_head *uf);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot, unsigned long flags,
-       vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
-extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+       vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
+       struct list_head *uf);
+extern int do_munmap(struct mm_struct *, unsigned long, size_t,
+                    struct list_head *uf);
 
 static inline unsigned long
 do_mmap_pgoff(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot, unsigned long flags,
-       unsigned long pgoff, unsigned long *populate)
+       unsigned long pgoff, unsigned long *populate,
+       struct list_head *uf)
 {
-       return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
+       return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
 }
 
 #ifdef CONFIG_MMU
@@ -2124,10 +2158,10 @@ extern void truncate_inode_pages_range(struct address_space *,
 extern void truncate_inode_pages_final(struct address_space *);
 
 /* generic vm_area_ops exported for stackable file systems */
-extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
+extern int filemap_fault(struct vm_fault *vmf);
 extern void filemap_map_pages(struct vm_fault *vmf,
                pgoff_t start_pgoff, pgoff_t end_pgoff);
-extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int filemap_page_mkwrite(struct vm_fault *vmf);
 
 /* mm/page-writeback.c */
 int write_one_page(struct page *page, int wait);
index 41d376e7116dccae22d9881312cfb660cd9fd58e..e030a68ead7e211fcd1e19277fd3708e665bf78b 100644 (file)
@@ -50,6 +50,13 @@ static __always_inline void add_page_to_lru_list(struct page *page,
        list_add(&page->lru, &lruvec->lists[lru]);
 }
 
+static __always_inline void add_page_to_lru_list_tail(struct page *page,
+                               struct lruvec *lruvec, enum lru_list lru)
+{
+       update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
+       list_add_tail(&page->lru, &lruvec->lists[lru]);
+}
+
 static __always_inline void del_page_from_lru_list(struct page *page,
                                struct lruvec *lruvec, enum lru_list lru)
 {
index 808751d7b737e28cd0b933d19b6544cae85bab79..4f6d440ad78551e919cf3988d436de1b9b2a80c2 100644 (file)
@@ -407,8 +407,27 @@ struct mm_struct {
        unsigned long task_size;                /* size of task vm space */
        unsigned long highest_vm_end;           /* highest vma end address */
        pgd_t * pgd;
-       atomic_t mm_users;                      /* How many users with user space? */
-       atomic_t mm_count;                      /* How many references to "struct mm_struct" (users count as 1) */
+
+       /**
+        * @mm_users: The number of users including userspace.
+        *
+        * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
+        * to 0 (i.e. when the task exits and there are no other temporary
+        * reference holders), we also release a reference on @mm_count
+        * (which may then free the &struct mm_struct if @mm_count also
+        * drops to 0).
+        */
+       atomic_t mm_users;
+
+       /**
+        * @mm_count: The number of references to &struct mm_struct
+        * (@mm_users count as 1).
+        *
+        * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
+        * &struct mm_struct is freed.
+        */
+       atomic_t mm_count;
+
        atomic_long_t nr_ptes;                  /* PTE page table pages */
 #if CONFIG_PGTABLE_LEVELS > 2
        atomic_long_t nr_pmds;                  /* PMD page table pages */
index a1a210d59961a855964b03ee9a7eae7c74d7c86f..51891fb0d3ce075e9343495e54de6d8d03211ac9 100644 (file)
@@ -381,6 +381,19 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
        ___pmd;                                                         \
 })
 
+#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud)            \
+({                                                                     \
+       unsigned long ___haddr = __haddr & HPAGE_PUD_MASK;              \
+       struct mm_struct *___mm = (__vma)->vm_mm;                       \
+       pud_t ___pud;                                                   \
+                                                                       \
+       ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud);          \
+       mmu_notifier_invalidate_range(___mm, ___haddr,                  \
+                                     ___haddr + HPAGE_PUD_SIZE);       \
+                                                                       \
+       ___pud;                                                         \
+})
+
 #define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)           \
 ({                                                                     \
        unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;              \
@@ -475,6 +488,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 #define pmdp_clear_young_notify pmdp_test_and_clear_young
 #define        ptep_clear_flush_notify ptep_clear_flush
 #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
+#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
 #define set_pte_at_notify set_pte_at
 
index 82fc632fd11d1f70a40826778df66efff20d0c76..8e02b3750fe0f6e18afeb8cfc096705f23405728 100644 (file)
@@ -236,8 +236,6 @@ struct lruvec {
 #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
 #define LRU_ALL             ((1 << NR_LRU_LISTS) - 1)
 
-/* Isolate clean file */
-#define ISOLATE_CLEAN          ((__force isolate_mode_t)0x1)
 /* Isolate unmapped file */
 #define ISOLATE_UNMAPPED       ((__force isolate_mode_t)0x2)
 /* Isolate for asynchronous migration */
index 7b3d487d8b3ff29decb7409f55f57e35dc529ef4..b532ce524dae9e9e987854111b53445ab8cc54c0 100644 (file)
@@ -14,7 +14,7 @@
  * @DevId - Chip Device ID
  * @qinfo - pointer to qinfo records describing the chip
  * @numchips - number of chips including virual RWW partitions
- * @chipshift - Chip/partiton size 2^chipshift
+ * @chipshift - Chip/partition size 2^chipshift
  * @chips - per-chip data structure
  */
 struct lpddr_private {
index f40f0ab3847a8caaf46bd4d5f224c65014f501cc..97456b2539e46d6232dda804f6a434db6fd7134f 100644 (file)
@@ -330,6 +330,7 @@ struct napi_struct {
 
 enum {
        NAPI_STATE_SCHED,       /* Poll is scheduled */
+       NAPI_STATE_MISSED,      /* reschedule a napi */
        NAPI_STATE_DISABLE,     /* Disable pending */
        NAPI_STATE_NPSVC,       /* Netpoll - don't dequeue from poll_list */
        NAPI_STATE_HASHED,      /* In NAPI hash (busy polling possible) */
@@ -338,12 +339,13 @@ enum {
 };
 
 enum {
-       NAPIF_STATE_SCHED        = (1UL << NAPI_STATE_SCHED),
-       NAPIF_STATE_DISABLE      = (1UL << NAPI_STATE_DISABLE),
-       NAPIF_STATE_NPSVC        = (1UL << NAPI_STATE_NPSVC),
-       NAPIF_STATE_HASHED       = (1UL << NAPI_STATE_HASHED),
-       NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
-       NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
+       NAPIF_STATE_SCHED        = BIT(NAPI_STATE_SCHED),
+       NAPIF_STATE_MISSED       = BIT(NAPI_STATE_MISSED),
+       NAPIF_STATE_DISABLE      = BIT(NAPI_STATE_DISABLE),
+       NAPIF_STATE_NPSVC        = BIT(NAPI_STATE_NPSVC),
+       NAPIF_STATE_HASHED       = BIT(NAPI_STATE_HASHED),
+       NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
+       NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
 };
 
 enum gro_result {
@@ -414,20 +416,7 @@ static inline bool napi_disable_pending(struct napi_struct *n)
        return test_bit(NAPI_STATE_DISABLE, &n->state);
 }
 
-/**
- *     napi_schedule_prep - check if NAPI can be scheduled
- *     @n: NAPI context
- *
- * Test if NAPI routine is already running, and if not mark
- * it as running.  This is used as a condition variable to
- * insure only one NAPI poll instance runs.  We also make
- * sure there is no pending NAPI disable.
- */
-static inline bool napi_schedule_prep(struct napi_struct *n)
-{
-       return !napi_disable_pending(n) &&
-               !test_and_set_bit(NAPI_STATE_SCHED, &n->state);
-}
+bool napi_schedule_prep(struct napi_struct *n);
 
 /**
  *     napi_schedule - schedule NAPI poll
index bf240a3cbf9990835e3e3106eff701ec86f8c73e..a72fd04aa5e1eac103dd94165edece813663b456 100644 (file)
@@ -29,6 +29,30 @@ enum nvme_rdma_cm_status {
        NVME_RDMA_CM_INVALID_ORD        = 0x08,
 };
 
+static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
+{
+       switch (status) {
+       case NVME_RDMA_CM_INVALID_LEN:
+               return "invalid length";
+       case NVME_RDMA_CM_INVALID_RECFMT:
+               return "invalid record format";
+       case NVME_RDMA_CM_INVALID_QID:
+               return "invalid queue ID";
+       case NVME_RDMA_CM_INVALID_HSQSIZE:
+               return "invalid host SQ size";
+       case NVME_RDMA_CM_INVALID_HRQSIZE:
+               return "invalid host RQ size";
+       case NVME_RDMA_CM_NO_RSC:
+               return "resource not found";
+       case NVME_RDMA_CM_INVALID_IRD:
+               return "invalid IRD";
+       case NVME_RDMA_CM_INVALID_ORD:
+               return "Invalid ORD";
+       default:
+               return "unrecognized reason";
+       }
+}
+
 /**
  * struct nvme_rdma_cm_req - rdma connect request
  *
index 0b676a02cf3e0899cb27c068bb6d3c1225a2506a..c43d435d422552d029bd569157d9aa352348747f 100644 (file)
@@ -579,6 +579,12 @@ struct nvme_write_zeroes_cmd {
        __le16                  appmask;
 };
 
+/* Features */
+
+struct nvme_feat_auto_pst {
+       __le64 entries[32];
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -644,7 +650,9 @@ struct nvme_identify {
        __le32                  nsid;
        __u64                   rsvd2[2];
        union nvme_data_ptr     dptr;
-       __le32                  cns;
+       __u8                    cns;
+       __u8                    rsvd3;
+       __le16                  ctrlid;
        __u32                   rsvd11[5];
 };
 
index 033fc7bbcefaa3a45b9474732433c4ffe712c6fe..a49b3259cad728e4cbe8e2e90f820980c8e0862c 100644 (file)
@@ -90,6 +90,13 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot)
 {
        return pfn_pmd(pfn_t_to_pfn(pfn), pgprot);
 }
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
+{
+       return pfn_pud(pfn_t_to_pfn(pfn), pgprot);
+}
+#endif
 #endif
 
 #ifdef __HAVE_ARCH_PTE_DEVMAP
@@ -106,5 +113,10 @@ static inline bool pfn_t_devmap(pfn_t pfn)
 }
 pte_t pte_mkdevmap(pte_t pte);
 pmd_t pmd_mkdevmap(pmd_t pmd);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+       defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+pud_t pud_mkdevmap(pud_t pud);
 #endif
+#endif /* __HAVE_ARCH_PTE_DEVMAP */
+
 #endif /* _LINUX_PFN_T_H_ */
index 23705a53abbaad29347a2724f24e80c7f44e9341..298ead5512e55d2dd47aed1a035560cbbae80ab7 100644 (file)
@@ -191,10 +191,10 @@ pid_t pid_vnr(struct pid *pid);
 #define do_each_pid_thread(pid, type, task)                            \
        do_each_pid_task(pid, type, task) {                             \
                struct task_struct *tg___ = task;                       \
-               do {
+               for_each_thread(tg___, task) {
 
 #define while_each_pid_thread(pid, type, task)                         \
-               } while_each_thread(tg___, task);                       \
+               }                                                       \
                task = tg___;                                           \
        } while_each_pid_task(pid, type, task)
 #endif /* _LINUX_PID_H */
index 34cce96741bc1dfb9966cb8b84da8eb6452464ed..c2a989dee876360d85f305965e34c1dddc0a9c12 100644 (file)
@@ -21,6 +21,12 @@ struct pidmap {
 
 struct fs_pin;
 
+enum { /* definitions for pid_namespace's hide_pid field */
+       HIDEPID_OFF       = 0,
+       HIDEPID_NO_ACCESS = 1,
+       HIDEPID_INVISIBLE = 2,
+};
+
 struct pid_namespace {
        struct kref kref;
        struct pidmap pidmap[PIDMAP_ENTRIES];
diff --git a/include/linux/platform_data/rtc-m48t86.h b/include/linux/platform_data/rtc-m48t86.h
deleted file mode 100644 (file)
index 915d6b4..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * ST M48T86 / Dallas DS12887 RTC driver
- * Copyright (c) 2006 Tower Technologies
- *
- * Author: Alessandro Zummo <a.zummo@towertech.it>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-struct m48t86_ops
-{
-       void (*writebyte)(unsigned char value, unsigned long addr);
-       unsigned char (*readbyte)(unsigned long addr);
-};
index 18e908324549d47e82de5f5017842e1e2d14a432..a5c0a71ec9147692f6a4dc995b71375f977bb71c 100644 (file)
 #define LSCR1_GRAY2(x)            (((x) & 0xf) << 4)
 #define LSCR1_GRAY1(x)            (((x) & 0xf))
 
-#define DMACR_BURST    (1 << 31)
-#define DMACR_HM(x)    (((x) & 0xf) << 16)
-#define DMACR_TM(x)    ((x) & 0xf)
-
 struct imx_fb_videomode {
        struct fb_videomode mode;
        u32 pcr;
diff --git a/include/linux/platform_data/x86/clk-pmc-atom.h b/include/linux/platform_data/x86/clk-pmc-atom.h
new file mode 100644 (file)
index 0000000..3ab8922
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Intel Atom platform clocks for BayTrail and CherryTrail SoC.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Irina Tirdea <irina.tirdea@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __PLATFORM_DATA_X86_CLK_PMC_ATOM_H
+#define __PLATFORM_DATA_X86_CLK_PMC_ATOM_H
+
+/**
+ * struct pmc_clk - PMC platform clock configuration
+ *
+ * @name:      identified, typically pmc_plt_clk_<x>, x=[0..5]
+ * @freq:      in Hz, 19.2MHz  and 25MHz (Baytrail only) supported
+ * @parent_name: one of 'xtal' or 'osc'
+ */
+struct pmc_clk {
+       const char *name;
+       unsigned long freq;
+       const char *parent_name;
+};
+
+/**
+ * struct pmc_clk_data - common PMC clock configuration
+ *
+ * @base:      PMC clock register base offset
+ * @clks:      pointer to set of registered clocks, typically 0..5
+ */
+struct pmc_clk_data {
+       void __iomem *base;
+       const struct pmc_clk *clks;
+};
+
+#endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */
diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h
new file mode 100644 (file)
index 0000000..e4905fe
--- /dev/null
@@ -0,0 +1,158 @@
+/*
+ * Intel Atom SOC Power Management Controller Header File
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef PMC_ATOM_H
+#define PMC_ATOM_H
+
+/* ValleyView Power Control Unit PCI Device ID */
+#define        PCI_DEVICE_ID_VLV_PMC   0x0F1C
+/* CherryTrail Power Control Unit PCI Device ID */
+#define        PCI_DEVICE_ID_CHT_PMC   0x229C
+
+/* PMC Memory mapped IO registers */
+#define        PMC_BASE_ADDR_OFFSET    0x44
+#define        PMC_BASE_ADDR_MASK      0xFFFFFE00
+#define        PMC_MMIO_REG_LEN        0x100
+#define        PMC_REG_BIT_WIDTH       32
+
+/* BIOS uses FUNC_DIS to disable specific function */
+#define        PMC_FUNC_DIS            0x34
+#define        PMC_FUNC_DIS_2          0x38
+
+/* CHT specific bits in FUNC_DIS2 register */
+#define        BIT_FD_GMM              BIT(3)
+#define        BIT_FD_ISH              BIT(4)
+
+/* S0ix wake event control */
+#define        PMC_S0IX_WAKE_EN        0x3C
+
+#define        BIT_LPC_CLOCK_RUN               BIT(4)
+#define        BIT_SHARED_IRQ_GPSC             BIT(5)
+#define        BIT_ORED_DEDICATED_IRQ_GPSS     BIT(18)
+#define        BIT_ORED_DEDICATED_IRQ_GPSC     BIT(19)
+#define        BIT_SHARED_IRQ_GPSS             BIT(20)
+
+#define        PMC_WAKE_EN_SETTING     ~(BIT_LPC_CLOCK_RUN | \
+                               BIT_SHARED_IRQ_GPSC | \
+                               BIT_ORED_DEDICATED_IRQ_GPSS | \
+                               BIT_ORED_DEDICATED_IRQ_GPSC | \
+                               BIT_SHARED_IRQ_GPSS)
+
+/* The timers accumulate time spent in sleep state */
+#define        PMC_S0IR_TMR            0x80
+#define        PMC_S0I1_TMR            0x84
+#define        PMC_S0I2_TMR            0x88
+#define        PMC_S0I3_TMR            0x8C
+#define        PMC_S0_TMR              0x90
+/* Sleep state counter is in units of of 32us */
+#define        PMC_TMR_SHIFT           5
+
+/* Power status of power islands */
+#define        PMC_PSS                 0x98
+
+#define PMC_PSS_BIT_GBE                        BIT(0)
+#define PMC_PSS_BIT_SATA               BIT(1)
+#define PMC_PSS_BIT_HDA                        BIT(2)
+#define PMC_PSS_BIT_SEC                        BIT(3)
+#define PMC_PSS_BIT_PCIE               BIT(4)
+#define PMC_PSS_BIT_LPSS               BIT(5)
+#define PMC_PSS_BIT_LPE                        BIT(6)
+#define PMC_PSS_BIT_DFX                        BIT(7)
+#define PMC_PSS_BIT_USH_CTRL           BIT(8)
+#define PMC_PSS_BIT_USH_SUS            BIT(9)
+#define PMC_PSS_BIT_USH_VCCS           BIT(10)
+#define PMC_PSS_BIT_USH_VCCA           BIT(11)
+#define PMC_PSS_BIT_OTG_CTRL           BIT(12)
+#define PMC_PSS_BIT_OTG_VCCS           BIT(13)
+#define PMC_PSS_BIT_OTG_VCCA_CLK       BIT(14)
+#define PMC_PSS_BIT_OTG_VCCA           BIT(15)
+#define PMC_PSS_BIT_USB                        BIT(16)
+#define PMC_PSS_BIT_USB_SUS            BIT(17)
+
+/* CHT specific bits in PSS register */
+#define        PMC_PSS_BIT_CHT_UFS             BIT(7)
+#define        PMC_PSS_BIT_CHT_UXD             BIT(11)
+#define        PMC_PSS_BIT_CHT_UXD_FD          BIT(12)
+#define        PMC_PSS_BIT_CHT_UX_ENG          BIT(15)
+#define        PMC_PSS_BIT_CHT_USB_SUS         BIT(16)
+#define        PMC_PSS_BIT_CHT_GMM             BIT(17)
+#define        PMC_PSS_BIT_CHT_ISH             BIT(18)
+#define        PMC_PSS_BIT_CHT_DFX_MASTER      BIT(26)
+#define        PMC_PSS_BIT_CHT_DFX_CLUSTER1    BIT(27)
+#define        PMC_PSS_BIT_CHT_DFX_CLUSTER2    BIT(28)
+#define        PMC_PSS_BIT_CHT_DFX_CLUSTER3    BIT(29)
+#define        PMC_PSS_BIT_CHT_DFX_CLUSTER4    BIT(30)
+#define        PMC_PSS_BIT_CHT_DFX_CLUSTER5    BIT(31)
+
+/* These registers reflect D3 status of functions */
+#define        PMC_D3_STS_0            0xA0
+
+#define        BIT_LPSS1_F0_DMA        BIT(0)
+#define        BIT_LPSS1_F1_PWM1       BIT(1)
+#define        BIT_LPSS1_F2_PWM2       BIT(2)
+#define        BIT_LPSS1_F3_HSUART1    BIT(3)
+#define        BIT_LPSS1_F4_HSUART2    BIT(4)
+#define        BIT_LPSS1_F5_SPI        BIT(5)
+#define        BIT_LPSS1_F6_XXX        BIT(6)
+#define        BIT_LPSS1_F7_XXX        BIT(7)
+#define        BIT_SCC_EMMC            BIT(8)
+#define        BIT_SCC_SDIO            BIT(9)
+#define        BIT_SCC_SDCARD          BIT(10)
+#define        BIT_SCC_MIPI            BIT(11)
+#define        BIT_HDA                 BIT(12)
+#define        BIT_LPE                 BIT(13)
+#define        BIT_OTG                 BIT(14)
+#define        BIT_USH                 BIT(15)
+#define        BIT_GBE                 BIT(16)
+#define        BIT_SATA                BIT(17)
+#define        BIT_USB_EHCI            BIT(18)
+#define        BIT_SEC                 BIT(19)
+#define        BIT_PCIE_PORT0          BIT(20)
+#define        BIT_PCIE_PORT1          BIT(21)
+#define        BIT_PCIE_PORT2          BIT(22)
+#define        BIT_PCIE_PORT3          BIT(23)
+#define        BIT_LPSS2_F0_DMA        BIT(24)
+#define        BIT_LPSS2_F1_I2C1       BIT(25)
+#define        BIT_LPSS2_F2_I2C2       BIT(26)
+#define        BIT_LPSS2_F3_I2C3       BIT(27)
+#define        BIT_LPSS2_F4_I2C4       BIT(28)
+#define        BIT_LPSS2_F5_I2C5       BIT(29)
+#define        BIT_LPSS2_F6_I2C6       BIT(30)
+#define        BIT_LPSS2_F7_I2C7       BIT(31)
+
+#define        PMC_D3_STS_1            0xA4
+#define        BIT_SMB                 BIT(0)
+#define        BIT_OTG_SS_PHY          BIT(1)
+#define        BIT_USH_SS_PHY          BIT(2)
+#define        BIT_DFX                 BIT(3)
+
+/* CHT specific bits in PMC_D3_STS_1 register */
+#define        BIT_STS_GMM             BIT(1)
+#define        BIT_STS_ISH             BIT(2)
+
+/* PMC I/O Registers */
+#define        ACPI_BASE_ADDR_OFFSET   0x40
+#define        ACPI_BASE_ADDR_MASK     0xFFFFFE00
+#define        ACPI_MMIO_REG_LEN       0x100
+
+#define        PM1_CNT                 0x4
+#define        SLEEP_TYPE_MASK         0xFFFFECFF
+#define        SLEEP_TYPE_S5           0x1C00
+#define        SLEEP_ENABLE            0x2000
+
+extern int pmc_atom_read(int offset, u32 *value);
+extern int pmc_atom_write(int offset, u32 value);
+
+#endif /* PMC_ATOM_H */
index 52bda854593b4a9eae9bad2d9990f9dd166ed609..3e5735064b7166837e7eae615a4cde2d3a9cd727 100644 (file)
 #define _LINUX_RADIX_TREE_H
 
 #include <linux/bitops.h>
-#include <linux/preempt.h>
-#include <linux/types.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/preempt.h>
 #include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
 
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
@@ -94,7 +96,7 @@ struct radix_tree_node {
        unsigned char   count;          /* Total entry count */
        unsigned char   exceptional;    /* Exceptional entry count */
        struct radix_tree_node *parent;         /* Used when ascending tree */
-       void *private_data;                     /* For tree user */
+       struct radix_tree_root *root;           /* The tree we belong to */
        union {
                struct list_head private_list;  /* For tree user */
                struct rcu_head rcu_head;       /* Used when freeing node */
@@ -103,7 +105,10 @@ struct radix_tree_node {
        unsigned long   tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
+/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
+#define ROOT_IS_IDR    ((__force gfp_t)(1 << __GFP_BITS_SHIFT))
+#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT + 1)
+
 struct radix_tree_root {
        gfp_t                   gfp_mask;
        struct radix_tree_node  __rcu *rnode;
@@ -123,7 +128,7 @@ do {                                                                        \
        (root)->rnode = NULL;                                           \
 } while (0)
 
-static inline bool radix_tree_empty(struct radix_tree_root *root)
+static inline bool radix_tree_empty(const struct radix_tree_root *root)
 {
        return root->rnode == NULL;
 }
@@ -216,10 +221,8 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
  */
 
 /**
- * radix_tree_deref_slot       - dereference a slot
- * @pslot:     pointer to slot, returned by radix_tree_lookup_slot
- * Returns:    item that was stored in that slot with any direct pointer flag
- *             removed.
+ * radix_tree_deref_slot - dereference a slot
+ * @slot: slot pointer, returned by radix_tree_lookup_slot
  *
  * For use with radix_tree_lookup_slot().  Caller must hold tree at least read
  * locked across slot lookup and dereference. Not required if write lock is
@@ -227,26 +230,27 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
  *
  * radix_tree_deref_retry must be used to confirm validity of the pointer if
  * only the read lock is held.
+ *
+ * Return: entry stored in that slot.
  */
-static inline void *radix_tree_deref_slot(void **pslot)
+static inline void *radix_tree_deref_slot(void __rcu **slot)
 {
-       return rcu_dereference(*pslot);
+       return rcu_dereference(*slot);
 }
 
 /**
- * radix_tree_deref_slot_protected     - dereference a slot without RCU lock but with tree lock held
- * @pslot:     pointer to slot, returned by radix_tree_lookup_slot
- * Returns:    item that was stored in that slot with any direct pointer flag
- *             removed.
- *
- * Similar to radix_tree_deref_slot but only used during migration when a pages
- * mapping is being moved. The caller does not hold the RCU read lock but it
- * must hold the tree lock to prevent parallel updates.
+ * radix_tree_deref_slot_protected - dereference a slot with tree lock held
+ * @slot: slot pointer, returned by radix_tree_lookup_slot
+ *
+ * Similar to radix_tree_deref_slot.  The caller does not hold the RCU read
+ * lock but it must hold the tree lock to prevent parallel updates.
+ *
+ * Return: entry stored in that slot.
  */
-static inline void *radix_tree_deref_slot_protected(void **pslot,
+static inline void *radix_tree_deref_slot_protected(void __rcu **slot,
                                                        spinlock_t *treelock)
 {
-       return rcu_dereference_protected(*pslot, lockdep_is_held(treelock));
+       return rcu_dereference_protected(*slot, lockdep_is_held(treelock));
 }
 
 /**
@@ -282,9 +286,9 @@ static inline int radix_tree_exception(void *arg)
        return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
+int __radix_tree_create(struct radix_tree_root *, unsigned long index,
                        unsigned order, struct radix_tree_node **nodep,
-                       void ***slotp);
+                       void __rcu ***slotp);
 int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
                        unsigned order, void *);
 static inline int radix_tree_insert(struct radix_tree_root *root,
@@ -292,55 +296,56 @@ static inline int radix_tree_insert(struct radix_tree_root *root,
 {
        return __radix_tree_insert(root, index, 0, entry);
 }
-void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
-                         struct radix_tree_node **nodep, void ***slotp);
-void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
-void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
+                         struct radix_tree_node **nodep, void __rcu ***slotp);
+void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
+void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *,
+                                       unsigned long index);
 typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
-void __radix_tree_replace(struct radix_tree_root *root,
-                         struct radix_tree_node *node,
-                         void **slot, void *item,
+void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *,
+                         void __rcu **slot, void *entry,
                          radix_tree_update_node_t update_node, void *private);
 void radix_tree_iter_replace(struct radix_tree_root *,
-               const struct radix_tree_iter *, void **slot, void *item);
-void radix_tree_replace_slot(struct radix_tree_root *root,
-                            void **slot, void *item);
-void __radix_tree_delete_node(struct radix_tree_root *root,
-                             struct radix_tree_node *node,
+               const struct radix_tree_iter *, void __rcu **slot, void *entry);
+void radix_tree_replace_slot(struct radix_tree_root *,
+                            void __rcu **slot, void *entry);
+void __radix_tree_delete_node(struct radix_tree_root *,
+                             struct radix_tree_node *,
                              radix_tree_update_node_t update_node,
                              void *private);
+void radix_tree_iter_delete(struct radix_tree_root *,
+                       struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
-void radix_tree_clear_tags(struct radix_tree_root *root,
-                          struct radix_tree_node *node,
-                          void **slot);
-unsigned int radix_tree_gang_lookup(struct radix_tree_root *root,
+void radix_tree_clear_tags(struct radix_tree_root *, struct radix_tree_node *,
+                          void __rcu **slot);
+unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
                        void **results, unsigned long first_index,
                        unsigned int max_items);
-unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
-                       void ***results, unsigned long *indices,
+unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *,
+                       void __rcu ***results, unsigned long *indices,
                        unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
 void radix_tree_init(void);
-void *radix_tree_tag_set(struct radix_tree_root *root,
+void *radix_tree_tag_set(struct radix_tree_root *,
                        unsigned long index, unsigned int tag);
-void *radix_tree_tag_clear(struct radix_tree_root *root,
+void *radix_tree_tag_clear(struct radix_tree_root *,
                        unsigned long index, unsigned int tag);
-int radix_tree_tag_get(struct radix_tree_root *root,
+int radix_tree_tag_get(const struct radix_tree_root *,
                        unsigned long index, unsigned int tag);
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
+void radix_tree_iter_tag_set(struct radix_tree_root *,
+               const struct radix_tree_iter *iter, unsigned int tag);
+void radix_tree_iter_tag_clear(struct radix_tree_root *,
                const struct radix_tree_iter *iter, unsigned int tag);
-unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
-               unsigned long first_index, unsigned int max_items,
-               unsigned int tag);
-unsigned int
-radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
-               unsigned long first_index, unsigned int max_items,
-               unsigned int tag);
-int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *,
+               void **results, unsigned long first_index,
+               unsigned int max_items, unsigned int tag);
+unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *,
+               void __rcu ***results, unsigned long first_index,
+               unsigned int max_items, unsigned int tag);
+int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
 {
@@ -352,10 +357,14 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
                        unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
                        unsigned new_order, void *);
+void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
+                       gfp_t, int end);
 
-#define RADIX_TREE_ITER_TAG_MASK       0x00FF  /* tag index in lower byte */
-#define RADIX_TREE_ITER_TAGGED         0x0100  /* lookup tagged slots */
-#define RADIX_TREE_ITER_CONTIG         0x0200  /* stop at first hole */
+enum {
+       RADIX_TREE_ITER_TAG_MASK = 0x0f,        /* tag index in lower nybble */
+       RADIX_TREE_ITER_TAGGED   = 0x10,        /* lookup tagged slots */
+       RADIX_TREE_ITER_CONTIG   = 0x20,        /* stop at first hole */
+};
 
 /**
  * radix_tree_iter_init - initialize radix tree iterator
@@ -364,7 +373,7 @@ int radix_tree_join(struct radix_tree_root *, unsigned long index,
  * @start:     iteration starting index
  * Returns:    NULL
  */
-static __always_inline void **
+static __always_inline void __rcu **
 radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 {
        /*
@@ -393,9 +402,45 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
  * Also it fills @iter with data about chunk: position in the tree (index),
  * its end (next_index), and constructs a bit mask for tagged iterating (tags).
  */
-void **radix_tree_next_chunk(struct radix_tree_root *root,
+void __rcu **radix_tree_next_chunk(const struct radix_tree_root *,
                             struct radix_tree_iter *iter, unsigned flags);
 
+/**
+ * radix_tree_iter_lookup - look up an index in the radix tree
+ * @root: radix tree root
+ * @iter: iterator state
+ * @index: key to look up
+ *
+ * If @index is present in the radix tree, this function returns the slot
+ * containing it and updates @iter to describe the entry.  If @index is not
+ * present, it returns NULL.
+ */
+static inline void __rcu **
+radix_tree_iter_lookup(const struct radix_tree_root *root,
+                       struct radix_tree_iter *iter, unsigned long index)
+{
+       radix_tree_iter_init(iter, index);
+       return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG);
+}
+
+/**
+ * radix_tree_iter_find - find a present entry
+ * @root: radix tree root
+ * @iter: iterator state
+ * @index: start location
+ *
+ * This function returns the slot containing the entry with the lowest index
+ * which is at least @index.  If @index is larger than any present entry, this
+ * function returns NULL.  The @iter is updated to describe the entry found.
+ */
+static inline void __rcu **
+radix_tree_iter_find(const struct radix_tree_root *root,
+                       struct radix_tree_iter *iter, unsigned long index)
+{
+       radix_tree_iter_init(iter, index);
+       return radix_tree_next_chunk(root, iter, 0);
+}
+
 /**
  * radix_tree_iter_retry - retry this chunk of the iteration
  * @iter:      iterator state
@@ -406,7 +451,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
  * and continue the iteration.
  */
 static inline __must_check
-void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter)
 {
        iter->next_index = iter->index;
        iter->tags = 0;
@@ -429,7 +474,7 @@ __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots)
  * have been invalidated by an insertion or deletion.  Call this function
  * before releasing the lock to continue the iteration from the next index.
  */
-void **__must_check radix_tree_iter_resume(void **slot,
+void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot,
                                        struct radix_tree_iter *iter);
 
 /**
@@ -445,11 +490,11 @@ radix_tree_chunk_size(struct radix_tree_iter *iter)
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
-                               unsigned flags);
+void __rcu **__radix_tree_next_slot(void __rcu **slot,
+                               struct radix_tree_iter *iter, unsigned flags);
 #else
 /* Can't happen without sibling entries, but the compiler can't tell that */
-static inline void ** __radix_tree_next_slot(void **slot,
+static inline void __rcu **__radix_tree_next_slot(void __rcu **slot,
                                struct radix_tree_iter *iter, unsigned flags)
 {
        return slot;
@@ -475,8 +520,8 @@ static inline void ** __radix_tree_next_slot(void **slot,
  * b) we are doing non-tagged iteration, and iter->index and iter->next_index
  *    have been set up so that radix_tree_chunk_size() returns 1 or 0.
  */
-static __always_inline void **
-radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
+static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot,
+                               struct radix_tree_iter *iter, unsigned flags)
 {
        if (flags & RADIX_TREE_ITER_TAGGED) {
                iter->tags >>= 1;
@@ -514,7 +559,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
        return NULL;
 
  found:
-       if (unlikely(radix_tree_is_internal_node(*slot)))
+       if (unlikely(radix_tree_is_internal_node(rcu_dereference_raw(*slot))))
                return __radix_tree_next_slot(slot, iter, flags);
        return slot;
 }
index d076183e49bec0b0c843f8b067fbd3d25cc4bce0..9702b6e183bc6ab2432573170e1b43db4ff19288 100644 (file)
@@ -90,7 +90,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)     \
        old->rbaugmented = rbcompute(old);                              \
 }                                                                      \
 rbstatic const struct rb_augment_callbacks rbname = {                  \
-       rbname ## _propagate, rbname ## _copy, rbname ## _rotate        \
+       .propagate = rbname ## _propagate,                              \
+       .copy = rbname ## _copy,                                        \
+       .rotate = rbname ## _rotate                                     \
 };
 
 
index 600aadf9cca445a437f97a5c71a21febf537845f..0023fee4bbbcb4f38c4d3f6f9181e57261c4188a 100644 (file)
@@ -1,54 +1,10 @@
 #ifndef _LINUX_REFCOUNT_H
 #define _LINUX_REFCOUNT_H
 
-/*
- * Variant of atomic_t specialized for reference counts.
- *
- * The interface matches the atomic_t interface (to aid in porting) but only
- * provides the few functions one should use for reference counting.
- *
- * It differs in that the counter saturates at UINT_MAX and will not move once
- * there. This avoids wrapping the counter and causing 'spurious'
- * use-after-free issues.
- *
- * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
- * and provide only what is strictly required for refcounts.
- *
- * The increments are fully relaxed; these will not provide ordering. The
- * rationale is that whatever is used to obtain the object we're increasing the
- * reference count on will provide the ordering. For locked data structures,
- * its the lock acquire, for RCU/lockless data structures its the dependent
- * load.
- *
- * Do note that inc_not_zero() provides a control dependency which will order
- * future stores against the inc, this ensures we'll never modify the object
- * if we did not in fact acquire a reference.
- *
- * The decrements will provide release order, such that all the prior loads and
- * stores will be issued before, it also provides a control dependency, which
- * will order us against the subsequent free().
- *
- * The control dependency is against the load of the cmpxchg (ll/sc) that
- * succeeded. This means the stores aren't fully ordered, but this is fine
- * because the 1->0 transition indicates no concurrency.
- *
- * Note that the allocator is responsible for ordering things between free()
- * and alloc().
- *
- */
-
 #include <linux/atomic.h>
-#include <linux/bug.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
-
-#ifdef CONFIG_DEBUG_REFCOUNT
-#define REFCOUNT_WARN(cond, str) WARN_ON(cond)
-#define __refcount_check       __must_check
-#else
-#define REFCOUNT_WARN(cond, str) (void)(cond)
-#define __refcount_check
-#endif
+#include <linux/kernel.h>
 
 typedef struct refcount_struct {
        atomic_t refs;
@@ -66,229 +22,21 @@ static inline unsigned int refcount_read(const refcount_t *r)
        return atomic_read(&r->refs);
 }
 
-static inline __refcount_check
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
-       unsigned int old, new, val = atomic_read(&r->refs);
-
-       for (;;) {
-               if (!val)
-                       return false;
-
-               if (unlikely(val == UINT_MAX))
-                       return true;
-
-               new = val + i;
-               if (new < val)
-                       new = UINT_MAX;
-               old = atomic_cmpxchg_relaxed(&r->refs, val, new);
-               if (old == val)
-                       break;
-
-               val = old;
-       }
-
-       REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
-
-       return true;
-}
-
-static inline void refcount_add(unsigned int i, refcount_t *r)
-{
-       REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
-}
-
-/*
- * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
- *
- * Provides no memory ordering, it is assumed the caller has guaranteed the
- * object memory to be stable (RCU, etc.). It does provide a control dependency
- * and thereby orders future stores. See the comment on top.
- */
-static inline __refcount_check
-bool refcount_inc_not_zero(refcount_t *r)
-{
-       unsigned int old, new, val = atomic_read(&r->refs);
-
-       for (;;) {
-               new = val + 1;
-
-               if (!val)
-                       return false;
-
-               if (unlikely(!new))
-                       return true;
-
-               old = atomic_cmpxchg_relaxed(&r->refs, val, new);
-               if (old == val)
-                       break;
-
-               val = old;
-       }
-
-       REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
-
-       return true;
-}
-
-/*
- * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
- *
- * Provides no memory ordering, it is assumed the caller already has a
- * reference on the object, will WARN when this is not so.
- */
-static inline void refcount_inc(refcount_t *r)
-{
-       REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
-}
-
-/*
- * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
- * decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
-{
-       unsigned int old, new, val = atomic_read(&r->refs);
-
-       for (;;) {
-               if (unlikely(val == UINT_MAX))
-                       return false;
-
-               new = val - i;
-               if (new > val) {
-                       REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
-                       return false;
-               }
-
-               old = atomic_cmpxchg_release(&r->refs, val, new);
-               if (old == val)
-                       break;
-
-               val = old;
-       }
-
-       return !new;
-}
-
-static inline __refcount_check
-bool refcount_dec_and_test(refcount_t *r)
-{
-       return refcount_sub_and_test(1, r);
-}
+extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
+extern void refcount_add(unsigned int i, refcount_t *r);
 
-/*
- * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
- * when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before.
- */
-static inline
-void refcount_dec(refcount_t *r)
-{
-       REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
-}
-
-/*
- * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the
- * success thereof.
- *
- * Like all decrement operations, it provides release memory order and provides
- * a control dependency.
- *
- * It can be used like a try-delete operator; this explicit case is provided
- * and not cmpxchg in generic, because that would allow implementing unsafe
- * operations.
- */
-static inline __refcount_check
-bool refcount_dec_if_one(refcount_t *r)
-{
-       return atomic_cmpxchg_release(&r->refs, 1, 0) == 1;
-}
-
-/*
- * No atomic_t counterpart, it decrements unless the value is 1, in which case
- * it will return false.
- *
- * Was often done like: atomic_add_unless(&var, -1, 1)
- */
-static inline __refcount_check
-bool refcount_dec_not_one(refcount_t *r)
-{
-       unsigned int old, new, val = atomic_read(&r->refs);
+extern __must_check bool refcount_inc_not_zero(refcount_t *r);
+extern void refcount_inc(refcount_t *r);
 
-       for (;;) {
-               if (unlikely(val == UINT_MAX))
-                       return true;
+extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
+extern void refcount_sub(unsigned int i, refcount_t *r);
 
-               if (val == 1)
-                       return false;
+extern __must_check bool refcount_dec_and_test(refcount_t *r);
+extern void refcount_dec(refcount_t *r);
 
-               new = val - 1;
-               if (new > val) {
-                       REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
-                       return true;
-               }
-
-               old = atomic_cmpxchg_release(&r->refs, val, new);
-               if (old == val)
-                       break;
-
-               val = old;
-       }
-
-       return true;
-}
-
-/*
- * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
- * to decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
-{
-       if (refcount_dec_not_one(r))
-               return false;
-
-       mutex_lock(lock);
-       if (!refcount_dec_and_test(r)) {
-               mutex_unlock(lock);
-               return false;
-       }
-
-       return true;
-}
-
-/*
- * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
- * decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
-{
-       if (refcount_dec_not_one(r))
-               return false;
-
-       spin_lock(lock);
-       if (!refcount_dec_and_test(r)) {
-               spin_unlock(lock);
-               return false;
-       }
-
-       return true;
-}
+extern __must_check bool refcount_dec_if_one(refcount_t *r);
+extern __must_check bool refcount_dec_not_one(refcount_t *r);
+extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
+extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
 
 #endif /* _LINUX_REFCOUNT_H */
index 15321fb1df6b5a70263fef971654ec709b1731ba..8c89e902df3e7ad35ecbff3009e2d88d06026b2a 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/rwsem.h>
 #include <linux/memcontrol.h>
+#include <linux/highmem.h>
 
 /*
  * The anon_vma heads a list of private "related" vmas, to scan if
@@ -196,41 +197,30 @@ int page_referenced(struct page *, int is_locked,
 
 int try_to_unmap(struct page *, enum ttu_flags flags);
 
-/*
- * Used by uprobes to replace a userspace page safely
- */
-pte_t *__page_check_address(struct page *, struct mm_struct *,
-                               unsigned long, spinlock_t **, int);
-
-static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm,
-                                       unsigned long address,
-                                       spinlock_t **ptlp, int sync)
-{
-       pte_t *ptep;
+/* Avoid racy checks */
+#define PVMW_SYNC              (1 << 0)
+/* Look for migarion entries rather than present PTEs */
+#define PVMW_MIGRATION         (1 << 1)
 
-       __cond_lock(*ptlp, ptep = __page_check_address(page, mm, address,
-                                                      ptlp, sync));
-       return ptep;
-}
+struct page_vma_mapped_walk {
+       struct page *page;
+       struct vm_area_struct *vma;
+       unsigned long address;
+       pmd_t *pmd;
+       pte_t *pte;
+       spinlock_t *ptl;
+       unsigned int flags;
+};
 
-/*
- * Used by idle page tracking to check if a page was referenced via page
- * tables.
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
-                                 unsigned long address, pmd_t **pmdp,
-                                 pte_t **ptep, spinlock_t **ptlp);
-#else
-static inline bool page_check_address_transhuge(struct page *page,
-                               struct mm_struct *mm, unsigned long address,
-                               pmd_t **pmdp, pte_t **ptep, spinlock_t **ptlp)
+static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
 {
-       *ptep = page_check_address(page, mm, address, ptlp, 0);
-       *pmdp = NULL;
-       return !!*ptep;
+       if (pvmw->pte)
+               pte_unmap(pvmw->pte);
+       if (pvmw->ptl)
+               spin_unlock(pvmw->ptl);
 }
-#endif
+
+bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
 
 /*
  * Used by swapoff to help locate where page is expected in vma.
diff --git a/include/linux/rodata_test.h b/include/linux/rodata_test.h
new file mode 100644 (file)
index 0000000..ea05f6c
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * rodata_test.h: functional test for mark_rodata_ro function
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifndef _RODATA_TEST_H
+#define _RODATA_TEST_H
+
+#ifdef CONFIG_DEBUG_RODATA_TEST
+extern const int rodata_test_data;
+void rodata_test(void);
+#else
+static inline void rodata_test(void) {}
+#endif
+
+#endif /* _RODATA_TEST_H */
index 451e241f32c5d25f7c1667314eb17b9de9981b03..4a28deb5f210a103d486fa12c509405f92f4ca60 100644 (file)
@@ -2904,6 +2904,28 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
  */
 extern struct mm_struct * mm_alloc(void);
 
+/**
+ * mmgrab() - Pin a &struct mm_struct.
+ * @mm: The &struct mm_struct to pin.
+ *
+ * Make sure that @mm will not get freed even after the owning task
+ * exits. This doesn't guarantee that the associated address space
+ * will still exist later on and mmget_not_zero() has to be used before
+ * accessing it.
+ *
+ * This is a preferred way to to pin @mm for a longer/unbounded amount
+ * of time.
+ *
+ * Use mmdrop() to release the reference acquired by mmgrab().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmgrab(struct mm_struct *mm)
+{
+       atomic_inc(&mm->mm_count);
+}
+
 /* mmdrop drops the mm and the page tables */
 extern void __mmdrop(struct mm_struct *);
 static inline void mmdrop(struct mm_struct *mm)
@@ -2926,6 +2948,27 @@ static inline void mmdrop_async(struct mm_struct *mm)
        }
 }
 
+/**
+ * mmget() - Pin the address space associated with a &struct mm_struct.
+ * @mm: The address space to pin.
+ *
+ * Make sure that the address space of the given &struct mm_struct doesn't
+ * go away. This does not protect against parts of the address space being
+ * modified or freed, however.
+ *
+ * Never use this function to pin this address space for an
+ * unbounded/indefinite amount of time.
+ *
+ * Use mmput() to release the reference acquired by mmget().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmget(struct mm_struct *mm)
+{
+       atomic_inc(&mm->mm_users);
+}
+
 static inline bool mmget_not_zero(struct mm_struct *mm)
 {
        return atomic_inc_not_zero(&mm->mm_users);
index deee23d012e7f8f6864845040b5a9a4586689809..04b124fca51e36e635c7039fad0cd7d9741df253 100644 (file)
@@ -27,6 +27,7 @@ typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer,
                size_t len, bool send);
 
 #ifdef CONFIG_BLK_SED_OPAL
+void free_opal_dev(struct opal_dev *dev);
 bool opal_unlock_from_suspend(struct opal_dev *dev);
 struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv);
 int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
@@ -51,6 +52,10 @@ static inline bool is_sed_ioctl(unsigned int cmd)
        return false;
 }
 #else
+static inline void free_opal_dev(struct opal_dev *dev)
+{
+}
+
 static inline bool is_sed_ioctl(unsigned int cmd)
 {
        return false;
index d0efd6e6c20a6a6a39273639dbd33f3d77c2e156..4fc222f8755d82113deba19eea85e8c47a844818 100644 (file)
@@ -21,7 +21,7 @@ struct sem_array {
        struct list_head        list_id;        /* undo requests on this array */
        int                     sem_nsems;      /* no. of semaphores in array */
        int                     complex_count;  /* pending complex operations */
-       bool                    complex_mode;   /* no parallel simple ops */
+       unsigned int            use_global_lock;/* >0: global lock required */
 };
 
 #ifdef CONFIG_SYSVIPC
index fdaac9d4d46d28a4106ef7295e1ced78bfda9afb..a7d6bd2a918f73a71abcc2daf92fb412c22ac342 100644 (file)
@@ -57,7 +57,14 @@ extern int shmem_zero_setup(struct vm_area_struct *);
 extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
                unsigned long len, unsigned long pgoff, unsigned long flags);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+#ifdef CONFIG_SHMEM
 extern bool shmem_mapping(struct address_space *mapping);
+#else
+static inline bool shmem_mapping(struct address_space *mapping)
+{
+       return false;
+}
+#endif /* CONFIG_SHMEM */
 extern void shmem_unlock_mapping(struct address_space *mapping);
 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
                                        pgoff_t index, gfp_t gfp_mask);
index 3f22932e67a45ce7314651e720830c4083f8c458..f4199e758f97d5a90518d2336c955b3d3c4fe182 100644 (file)
@@ -7,7 +7,7 @@ struct mtd_partition;
  * struct flash_platform_data: board-specific flash data
  * @name: optional flash device name (eg, as used with mtdparts=)
  * @parts: optional array of mtd_partitions for static partitioning
- * @nr_parts: number of mtd_partitions for static partitoning
+ * @nr_parts: number of mtd_partitions for static partitioning
  * @type: optional flash device type (e.g. m25p80 vs m25p64), for use
  *     with chips that can't be queried for JEDEC or other IDs
  *
index 8a511c0985aafe0a18722c9dd701cf8326dcad59..20d157a518a7dcb14763f5bfd7e317c0a6537387 100644 (file)
@@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
        kref_put(&h->ref, cd->cache_put);
 }
 
-static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
+static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
+       if (!test_bit(CACHE_VALID, &h->flags))
+               return false;
+
        return  (h->expiry_time < seconds_since_boot()) ||
                (detail->flush_time >= h->last_refresh);
 }
@@ -227,6 +230,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd);
 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
                                        umode_t, struct cache_detail *);
 extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
+extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
 
 /* Must store cache_detail in seq_file->private if using next three functions */
 extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
index cfda6adcf33cfcf3c28e46066ec294c6d2902389..245fc59b73247d744682c128bfcae1270e146c26 100644 (file)
@@ -109,6 +109,15 @@ struct rpcrdma_msg {
        } rm_body;
 };
 
+/*
+ * XDR sizes, in quads
+ */
+enum {
+       rpcrdma_fixed_maxsz     = 4,
+       rpcrdma_segment_maxsz   = 4,
+       rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz,
+};
+
 /*
  * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
  */
index 7321ae933867566013a250623564d722d2800305..e770abeed32d7117c4f2d363f9d7370a60d2c55f 100644 (file)
@@ -400,10 +400,14 @@ struct svc_version {
        struct svc_procedure *  vs_proc;        /* per-procedure info */
        u32                     vs_xdrsize;     /* xdrsize needed for this version */
 
-       unsigned int            vs_hidden : 1,  /* Don't register with portmapper.
-                                                * Only used for nfsacl so far. */
-                               vs_rpcb_optnl:1;/* Don't care the result of register.
-                                                * Only used for nfsv4. */
+       /* Don't register with rpcbind */
+       bool                    vs_hidden;
+
+       /* Don't care if the rpcbind registration fails */
+       bool                    vs_rpcb_optnl;
+
+       /* Need xprt with congestion control */
+       bool                    vs_need_cong_ctrl;
 
        /* Override dispatch function (e.g. when caching replies).
         * A return value of 0 means drop the request. 
index 757fb963696c76b3ab24f754ff89424fb428fc79..b105f73e3ca26355b2ee8b32651b48526a945899 100644 (file)
@@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
  * completes.
  */
 struct svc_rdma_op_ctxt {
-       struct list_head free;
+       struct list_head list;
        struct svc_rdma_op_ctxt *read_hdr;
        struct svc_rdma_fastreg_mr *frmr;
        int hdr_count;
@@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt {
        struct ib_cqe cqe;
        struct ib_cqe reg_cqe;
        struct ib_cqe inv_cqe;
-       struct list_head dto_q;
        u32 byte_len;
        u32 position;
        struct svcxprt_rdma *xprt;
@@ -141,7 +140,8 @@ struct svcxprt_rdma {
        atomic_t             sc_sq_avail;       /* SQEs ready to be consumed */
        unsigned int         sc_sq_depth;       /* Depth of SQ */
        unsigned int         sc_rq_depth;       /* Depth of RQ */
-       u32                  sc_max_requests;   /* Forward credits */
+       __be32               sc_fc_credits;     /* Forward credits */
+       u32                  sc_max_requests;   /* Max requests */
        u32                  sc_max_bc_requests;/* Backward credits */
        int                  sc_max_req_size;   /* Size of each RQ WR buf */
 
@@ -171,7 +171,6 @@ struct svcxprt_rdma {
 
        wait_queue_head_t    sc_send_wait;      /* SQ exhaustion waitlist */
        unsigned long        sc_flags;
-       struct list_head     sc_dto_q;          /* DTO tasklet I/O pending Q */
        struct list_head     sc_read_complete_q;
        struct work_struct   sc_work;
 };
@@ -214,11 +213,7 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
 extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
 extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
                                            __be32, __be64, u32);
-extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
-                                            struct rpcrdma_msg *,
-                                            struct rpcrdma_msg *,
-                                            enum rpcrdma_proc);
-extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
+extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
index 7440290f64acd3694dfc5c17618c55f6253aae01..ddb7f94a9d06ecc48828b7b00230662b85768d64 100644 (file)
@@ -67,6 +67,7 @@ struct svc_xprt {
 #define XPT_CACHE_AUTH 11              /* cache auth info */
 #define XPT_LOCAL      12              /* connection from loopback interface */
 #define XPT_KILL_TEMP   13             /* call xpo_kill_temp_xprt before closing */
+#define XPT_CONG_CTRL  14              /* has congestion control */
 
        struct svc_serv         *xpt_server;    /* service for transport */
        atomic_t                xpt_reserved;   /* space on outq that is rsvd */
index 5a209b84fd9e48e937d6d5c44592038f3f65276d..c7bdf895179c92b6b318f31a0f39f4a1d6b93078 100644 (file)
@@ -61,6 +61,8 @@ struct timer_list {
 #define TIMER_ARRAYSHIFT       22
 #define TIMER_ARRAYMASK                0xFFC00000
 
+#define TIMER_TRACE_FLAGMASK   (TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE)
+
 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
                .entry = { .next = TIMER_ENTRY_STATIC },        \
                .function = (_function),                        \
index f431861f22f1d8fa9a8e5f1353e4774147ceaa2a..0468548acebfef5431ea7bfd6f565cfdfb73f348 100644 (file)
@@ -61,10 +61,18 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
                                        unsigned long from, unsigned long to,
                                        unsigned long len);
 
-extern void madvise_userfault_dontneed(struct vm_area_struct *vma,
-                                      struct vm_area_struct **prev,
-                                      unsigned long start,
-                                      unsigned long end);
+extern void userfaultfd_remove(struct vm_area_struct *vma,
+                              struct vm_area_struct **prev,
+                              unsigned long start,
+                              unsigned long end);
+
+extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+                                 unsigned long start, unsigned long end,
+                                 struct list_head *uf);
+extern void userfaultfd_unmap_complete(struct mm_struct *mm,
+                                      struct list_head *uf);
+
+extern void userfaultfd_exit(struct mm_struct *mm);
 
 #else /* CONFIG_USERFAULTFD */
 
@@ -112,12 +120,29 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
 {
 }
 
-static inline void madvise_userfault_dontneed(struct vm_area_struct *vma,
-                                             struct vm_area_struct **prev,
-                                             unsigned long start,
-                                             unsigned long end)
+static inline void userfaultfd_remove(struct vm_area_struct *vma,
+                                     struct vm_area_struct **prev,
+                                     unsigned long start,
+                                     unsigned long end)
 {
 }
+
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+                                        unsigned long start, unsigned long end,
+                                        struct list_head *uf)
+{
+       return 0;
+}
+
+static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
+                                             struct list_head *uf)
+{
+}
+
+static inline void userfaultfd_exit(struct mm_struct *mm)
+{
+}
+
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
index 35a4d8185b51cd83492743c043aea3ffa1eca9f9..a786e5e8973bff15e47c91b5ba8abbcf7e1bf4c8 100644 (file)
@@ -117,6 +117,7 @@ struct watchdog_device {
 #define WDOG_NO_WAY_OUT                1       /* Is 'nowayout' feature set ? */
 #define WDOG_STOP_ON_REBOOT    2       /* Should be stopped on reboot */
 #define WDOG_HW_RUNNING                3       /* True if HW watchdog running */
+#define WDOG_STOP_ON_UNREGISTER        4       /* Should be stopped on unregister */
        struct list_head deferred;
 };
 
@@ -151,6 +152,12 @@ static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd)
        set_bit(WDOG_STOP_ON_REBOOT, &wdd->status);
 }
 
+/* Use the following function to stop the watchdog when unregistering it */
+static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd)
+{
+       set_bit(WDOG_STOP_ON_UNREGISTER, &wdd->status);
+}
+
 /* Use the following function to check if a timeout value is invalid */
 static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t)
 {
index a26cc437293cffbd2259951563eafcd8f194c686..bde063cefd047c1bb42ac02a02522ae88863c723 100644 (file)
@@ -106,9 +106,9 @@ struct work_struct {
 #endif
 };
 
-#define WORK_DATA_INIT()       ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
+#define WORK_DATA_INIT()       ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
 #define WORK_DATA_STATIC_INIT()        \
-       ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
+       ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC))
 
 struct delayed_work {
        struct work_struct work;
index 5527d910ba3d12ee622cd23bd5aa8f62f2926043..a3c0cbd7c88824a297cd725e36c931c223c3e106 100644 (file)
@@ -46,7 +46,7 @@ enum writeback_sync_modes {
  */
 enum wb_reason {
        WB_REASON_BACKGROUND,
-       WB_REASON_TRY_TO_FREE_PAGES,
+       WB_REASON_VMSCAN,
        WB_REASON_SYNC,
        WB_REASON_PERIODIC,
        WB_REASON_LAPTOP_TIMER,
index e1006b391cdc53044fa99003ad354b82109d97ad..bee1404391dd5565abe2d324f4a196f0602fc032 100644 (file)
@@ -174,10 +174,10 @@ typedef void (*v4l2_ctrl_notify_fnc)(struct v4l2_ctrl *ctrl, void *priv);
  *             not freed when the control is deleted. Should this be needed
  *             then a new internal bitfield can be added to tell the framework
  *             to free this pointer.
- * @p_cur:     The control's current value represented via an union with
+ * @p_cur:     The control's current value represented via a union with
  *             provides a standard way of accessing control types
  *             through a pointer.
- * @p_new:     The control's new value represented via an union with provides
+ * @p_new:     The control's new value represented via a union with provides
  *             a standard way of accessing control types
  *             through a pointer.
  */
index c92dc03c852825fd3f92df6bbae0e7e77409c6d4..ead1aa6d003ef97b09847e8175b704da31c341d1 100644 (file)
@@ -1948,7 +1948,7 @@ struct cfg80211_deauth_request {
  * struct cfg80211_disassoc_request - Disassociation request data
  *
  * This structure provides information needed to complete IEEE 802.11
- * disassocation.
+ * disassociation.
  *
  * @bss: the BSS to disassociate from
  * @ie: Extra IEs to add to Disassociation frame or %NULL
index b9a08cd1d97d8486294197ccd72d3d4538f5e7ca..a3bab3c5ecfb302e5df2d9e5d5ec36e2c368ef9b 100644 (file)
@@ -3392,7 +3392,7 @@ enum ieee80211_reconfig_type {
  *     since there won't be any time to beacon before the switch anyway.
  * @pre_channel_switch: This is an optional callback that is called
  *     before a channel switch procedure is started (ie. when a STA
- *     gets a CSA or an userspace initiated channel-switch), allowing
+ *     gets a CSA or a userspace initiated channel-switch), allowing
  *     the driver to prepare for the channel switch.
  * @post_channel_switch: This is an optional callback that is called
  *     after a channel switch procedure is completed, allowing the
index 89f5bd4e1d5201c847ff77823b6b4159a741ef2f..0f1813c1368795994e012d00c607499879c130aa 100644 (file)
@@ -60,6 +60,7 @@
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
 #include <linux/uaccess.h>
+#include <linux/cgroup_rdma.h>
 
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
@@ -1356,6 +1357,12 @@ struct ib_fmr_attr {
 
 struct ib_umem;
 
+struct ib_rdmacg_object {
+#ifdef CONFIG_CGROUP_RDMA
+       struct rdma_cgroup      *cg;            /* owner rdma cgroup */
+#endif
+};
+
 struct ib_ucontext {
        struct ib_device       *device;
        struct list_head        pd_list;
@@ -1388,6 +1395,8 @@ struct ib_ucontext {
        struct list_head        no_private_counters;
        int                     odp_mrs_count;
 #endif
+
+       struct ib_rdmacg_object cg_obj;
 };
 
 struct ib_uobject {
@@ -1395,6 +1404,7 @@ struct ib_uobject {
        struct ib_ucontext     *context;        /* associated user context */
        void                   *object;         /* containing object */
        struct list_head        list;           /* link to context's list */
+       struct ib_rdmacg_object cg_obj;         /* rdmacg object */
        int                     id;             /* index into kernel idr */
        struct kref             ref;
        struct rw_semaphore     mutex;          /* protects .live */
@@ -1843,53 +1853,6 @@ struct ib_cache {
        struct ib_port_cache   *ports;
 };
 
-struct ib_dma_mapping_ops {
-       int             (*mapping_error)(struct ib_device *dev,
-                                        u64 dma_addr);
-       u64             (*map_single)(struct ib_device *dev,
-                                     void *ptr, size_t size,
-                                     enum dma_data_direction direction);
-       void            (*unmap_single)(struct ib_device *dev,
-                                       u64 addr, size_t size,
-                                       enum dma_data_direction direction);
-       u64             (*map_page)(struct ib_device *dev,
-                                   struct page *page, unsigned long offset,
-                                   size_t size,
-                                   enum dma_data_direction direction);
-       void            (*unmap_page)(struct ib_device *dev,
-                                     u64 addr, size_t size,
-                                     enum dma_data_direction direction);
-       int             (*map_sg)(struct ib_device *dev,
-                                 struct scatterlist *sg, int nents,
-                                 enum dma_data_direction direction);
-       void            (*unmap_sg)(struct ib_device *dev,
-                                   struct scatterlist *sg, int nents,
-                                   enum dma_data_direction direction);
-       int             (*map_sg_attrs)(struct ib_device *dev,
-                                       struct scatterlist *sg, int nents,
-                                       enum dma_data_direction direction,
-                                       unsigned long attrs);
-       void            (*unmap_sg_attrs)(struct ib_device *dev,
-                                         struct scatterlist *sg, int nents,
-                                         enum dma_data_direction direction,
-                                         unsigned long attrs);
-       void            (*sync_single_for_cpu)(struct ib_device *dev,
-                                              u64 dma_handle,
-                                              size_t size,
-                                              enum dma_data_direction dir);
-       void            (*sync_single_for_device)(struct ib_device *dev,
-                                                 u64 dma_handle,
-                                                 size_t size,
-                                                 enum dma_data_direction dir);
-       void            *(*alloc_coherent)(struct ib_device *dev,
-                                          size_t size,
-                                          u64 *dma_handle,
-                                          gfp_t flag);
-       void            (*free_coherent)(struct ib_device *dev,
-                                        size_t size, void *cpu_addr,
-                                        u64 dma_handle);
-};
-
 struct iw_cm_verbs;
 
 struct ib_port_immutable {
@@ -1900,8 +1863,6 @@ struct ib_port_immutable {
 };
 
 struct ib_device {
-       struct device                *dma_device;
-
        char                          name[IB_DEVICE_NAME_MAX];
 
        struct list_head              event_handler_list;
@@ -2151,7 +2112,6 @@ struct ib_device {
                                                           struct ib_rwq_ind_table_init_attr *init_attr,
                                                           struct ib_udata *udata);
        int                        (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
-       struct ib_dma_mapping_ops   *dma_ops;
 
        struct module               *owner;
        struct device                dev;
@@ -2178,6 +2138,10 @@ struct ib_device {
        struct attribute_group       *hw_stats_ag;
        struct rdma_hw_stats         *hw_stats;
 
+#ifdef CONFIG_CGROUP_RDMA
+       struct rdmacg_device         cg_device;
+#endif
+
        /**
         * The following mandatory functions are used only at device
         * registration.  Keep functions such as these at the end of this
@@ -3043,9 +3007,7 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
  */
 static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->mapping_error(dev, dma_addr);
-       return dma_mapping_error(dev->dma_device, dma_addr);
+       return dma_mapping_error(&dev->dev, dma_addr);
 }
 
 /**
@@ -3059,9 +3021,7 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
                                    void *cpu_addr, size_t size,
                                    enum dma_data_direction direction)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->map_single(dev, cpu_addr, size, direction);
-       return dma_map_single(dev->dma_device, cpu_addr, size, direction);
+       return dma_map_single(&dev->dev, cpu_addr, size, direction);
 }
 
 /**
@@ -3075,28 +3035,7 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
                                       u64 addr, size_t size,
                                       enum dma_data_direction direction)
 {
-       if (dev->dma_ops)
-               dev->dma_ops->unmap_single(dev, addr, size, direction);
-       else
-               dma_unmap_single(dev->dma_device, addr, size, direction);
-}
-
-static inline u64 ib_dma_map_single_attrs(struct ib_device *dev,
-                                         void *cpu_addr, size_t size,
-                                         enum dma_data_direction direction,
-                                         unsigned long dma_attrs)
-{
-       return dma_map_single_attrs(dev->dma_device, cpu_addr, size,
-                                   direction, dma_attrs);
-}
-
-static inline void ib_dma_unmap_single_attrs(struct ib_device *dev,
-                                            u64 addr, size_t size,
-                                            enum dma_data_direction direction,
-                                            unsigned long dma_attrs)
-{
-       return dma_unmap_single_attrs(dev->dma_device, addr, size,
-                                     direction, dma_attrs);
+       dma_unmap_single(&dev->dev, addr, size, direction);
 }
 
 /**
@@ -3113,9 +3052,7 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
                                  size_t size,
                                         enum dma_data_direction direction)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->map_page(dev, page, offset, size, direction);
-       return dma_map_page(dev->dma_device, page, offset, size, direction);
+       return dma_map_page(&dev->dev, page, offset, size, direction);
 }
 
 /**
@@ -3129,10 +3066,7 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
                                     u64 addr, size_t size,
                                     enum dma_data_direction direction)
 {
-       if (dev->dma_ops)
-               dev->dma_ops->unmap_page(dev, addr, size, direction);
-       else
-               dma_unmap_page(dev->dma_device, addr, size, direction);
+       dma_unmap_page(&dev->dev, addr, size, direction);
 }
 
 /**
@@ -3146,9 +3080,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
                                struct scatterlist *sg, int nents,
                                enum dma_data_direction direction)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->map_sg(dev, sg, nents, direction);
-       return dma_map_sg(dev->dma_device, sg, nents, direction);
+       return dma_map_sg(&dev->dev, sg, nents, direction);
 }
 
 /**
@@ -3162,10 +3094,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
                                   struct scatterlist *sg, int nents,
                                   enum dma_data_direction direction)
 {
-       if (dev->dma_ops)
-               dev->dma_ops->unmap_sg(dev, sg, nents, direction);
-       else
-               dma_unmap_sg(dev->dma_device, sg, nents, direction);
+       dma_unmap_sg(&dev->dev, sg, nents, direction);
 }
 
 static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
@@ -3173,12 +3102,7 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
                                      enum dma_data_direction direction,
                                      unsigned long dma_attrs)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->map_sg_attrs(dev, sg, nents, direction,
-                                                 dma_attrs);
-       else
-               return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
-                                       dma_attrs);
+       return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
 }
 
 static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
@@ -3186,12 +3110,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
                                         enum dma_data_direction direction,
                                         unsigned long dma_attrs)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->unmap_sg_attrs(dev, sg, nents, direction,
-                                                 dma_attrs);
-       else
-               dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
-                                  dma_attrs);
+       dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
 }
 /**
  * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
@@ -3233,10 +3152,7 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
                                              size_t size,
                                              enum dma_data_direction dir)
 {
-       if (dev->dma_ops)
-               dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir);
-       else
-               dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+       dma_sync_single_for_cpu(&dev->dev, addr, size, dir);
 }
 
 /**
@@ -3251,10 +3167,7 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
                                                 size_t size,
                                                 enum dma_data_direction dir)
 {
-       if (dev->dma_ops)
-               dev->dma_ops->sync_single_for_device(dev, addr, size, dir);
-       else
-               dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+       dma_sync_single_for_device(&dev->dev, addr, size, dir);
 }
 
 /**
@@ -3266,19 +3179,10 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
  */
 static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
                                           size_t size,
-                                          u64 *dma_handle,
+                                          dma_addr_t *dma_handle,
                                           gfp_t flag)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-       else {
-               dma_addr_t handle;
-               void *ret;
-
-               ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag);
-               *dma_handle = handle;
-               return ret;
-       }
+       return dma_alloc_coherent(&dev->dev, size, dma_handle, flag);
 }
 
 /**
@@ -3290,12 +3194,9 @@ static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
  */
 static inline void ib_dma_free_coherent(struct ib_device *dev,
                                        size_t size, void *cpu_addr,
-                                       u64 dma_handle)
+                                       dma_addr_t dma_handle)
 {
-       if (dev->dma_ops)
-               dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-       else
-               dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+       dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle);
 }
 
 /**
old mode 100755 (executable)
new mode 100644 (file)
index 88d18a8ceb59f9c6e3c5630a491cf6d3c91cf73c..a3c3cab643a9528dd5f8ad50dfac29b53384f1db 100644 (file)
@@ -184,7 +184,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
 
 TRACE_EVENT_CONDITION(btrfs_get_extent,
 
-       TP_PROTO(struct btrfs_root *root, struct inode *inode,
+       TP_PROTO(struct btrfs_root *root, struct btrfs_inode *inode,
                 struct extent_map *map),
 
        TP_ARGS(root, inode, map),
index 593f586545eba9477006405d288a731f67e4372e..39123c06a5661316a80dd677b43b1e581a17e2e7 100644 (file)
@@ -119,6 +119,7 @@ enum rxrpc_recvmsg_trace {
        rxrpc_recvmsg_full,
        rxrpc_recvmsg_hole,
        rxrpc_recvmsg_next,
+       rxrpc_recvmsg_requeue,
        rxrpc_recvmsg_return,
        rxrpc_recvmsg_terminal,
        rxrpc_recvmsg_to_be_accepted,
@@ -277,6 +278,7 @@ enum rxrpc_congest_change {
        EM(rxrpc_recvmsg_full,                  "FULL") \
        EM(rxrpc_recvmsg_hole,                  "HOLE") \
        EM(rxrpc_recvmsg_next,                  "NEXT") \
+       EM(rxrpc_recvmsg_requeue,               "REQU") \
        EM(rxrpc_recvmsg_return,                "RETN") \
        EM(rxrpc_recvmsg_terminal,              "TERM") \
        EM(rxrpc_recvmsg_to_be_accepted,        "TBAC") \
index 1bca99dbb98f868ee2b066c5b80f55a59ecee086..80787eafba99f485f4dd5f32dc72e0e7902dd5a1 100644 (file)
@@ -36,6 +36,13 @@ DEFINE_EVENT(timer_class, timer_init,
        TP_ARGS(timer)
 );
 
+#define decode_timer_flags(flags)                      \
+       __print_flags(flags, "|",                       \
+               {  TIMER_MIGRATING,     "M" },          \
+               {  TIMER_DEFERRABLE,    "D" },          \
+               {  TIMER_PINNED,        "P" },          \
+               {  TIMER_IRQSAFE,       "I" })
+
 /**
  * timer_start - called when the timer is started
  * @timer:     pointer to struct timer_list
@@ -65,9 +72,12 @@ TRACE_EVENT(timer_start,
                __entry->flags          = flags;
        ),
 
-       TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x",
+       TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] cpu=%u idx=%u flags=%s",
                  __entry->timer, __entry->function, __entry->expires,
-                 (long)__entry->expires - __entry->now, __entry->flags)
+                 (long)__entry->expires - __entry->now,
+                 __entry->flags & TIMER_CPUMASK,
+                 __entry->flags >> TIMER_ARRAYSHIFT,
+                 decode_timer_flags(__entry->flags & TIMER_TRACE_FLAGMASK))
 );
 
 /**
index 2ccd9ccbf9efeaa7794eda0ca20589d42cf67784..7bd8783a590ff99aed51310d4e4e4f05f2f7cb38 100644 (file)
@@ -31,7 +31,7 @@
 
 #define WB_WORK_REASON                                                 \
        EM( WB_REASON_BACKGROUND,               "background")           \
-       EM( WB_REASON_TRY_TO_FREE_PAGES,        "try_to_free_pages")    \
+       EM( WB_REASON_VMSCAN,                   "vmscan")               \
        EM( WB_REASON_SYNC,                     "sync")                 \
        EM( WB_REASON_PERIODIC,                 "periodic")             \
        EM( WB_REASON_LAPTOP_TIMER,             "laptop_timer")         \
index 021ed331dd716d0b663cb9b77d51595999155047..744b3d0609687b82bb198ed76c158417c22f09f9 100644 (file)
@@ -113,17 +113,13 @@ struct autofs_dev_ioctl {
 
 static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
 {
-       memset(in, 0, sizeof(struct autofs_dev_ioctl));
+       memset(in, 0, AUTOFS_DEV_IOCTL_SIZE);
        in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
        in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
-       in->size = sizeof(struct autofs_dev_ioctl);
+       in->size = AUTOFS_DEV_IOCTL_SIZE;
        in->ioctlfd = -1;
 }
 
-/*
- * If you change this make sure you make the corresponding change
- * to autofs-dev-ioctl.c:lookup_ioctl()
- */
 enum {
        /* Get various version info */
        AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
@@ -160,8 +156,6 @@ enum {
        AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
 };
 
-#define AUTOFS_IOCTL 0x93
-
 #define AUTOFS_DEV_IOCTL_VERSION \
        _IOWR(AUTOFS_IOCTL, \
              AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
index 1bfc3ed8b2841d7a5c099f1a2bd3310720720a6a..aa63451ef20aacdf3677dca02b990a3e8e73cdc0 100644 (file)
@@ -61,12 +61,23 @@ struct autofs_packet_expire {
        char name[NAME_MAX+1];
 };
 
-#define AUTOFS_IOC_READY      _IO(0x93, 0x60)
-#define AUTOFS_IOC_FAIL       _IO(0x93, 0x61)
-#define AUTOFS_IOC_CATATONIC  _IO(0x93, 0x62)
-#define AUTOFS_IOC_PROTOVER   _IOR(0x93, 0x63, int)
-#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93, 0x64, compat_ulong_t)
-#define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93, 0x64, unsigned long)
-#define AUTOFS_IOC_EXPIRE     _IOR(0x93, 0x65, struct autofs_packet_expire)
+#define AUTOFS_IOCTL 0x93
+
+enum {
+       AUTOFS_IOC_READY_CMD = 0x60,
+       AUTOFS_IOC_FAIL_CMD,
+       AUTOFS_IOC_CATATONIC_CMD,
+       AUTOFS_IOC_PROTOVER_CMD,
+       AUTOFS_IOC_SETTIMEOUT_CMD,
+       AUTOFS_IOC_EXPIRE_CMD,
+};
+
+#define AUTOFS_IOC_READY        _IO(AUTOFS_IOCTL, AUTOFS_IOC_READY_CMD)
+#define AUTOFS_IOC_FAIL         _IO(AUTOFS_IOCTL, AUTOFS_IOC_FAIL_CMD)
+#define AUTOFS_IOC_CATATONIC    _IO(AUTOFS_IOCTL, AUTOFS_IOC_CATATONIC_CMD)
+#define AUTOFS_IOC_PROTOVER     _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOVER_CMD, int)
+#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, compat_ulong_t)
+#define AUTOFS_IOC_SETTIMEOUT   _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, unsigned long)
+#define AUTOFS_IOC_EXPIRE       _IOR(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_CMD, struct autofs_packet_expire)
 
 #endif /* _UAPI_LINUX_AUTO_FS_H */
index 8f8f1bdcca8c07c8b237daf71525768d4685fc0f..7c6da423d54ee6f3298cdf479d91a900cb5b2c4c 100644 (file)
@@ -148,10 +148,16 @@ union autofs_v5_packet_union {
        autofs_packet_expire_direct_t expire_direct;
 };
 
-#define AUTOFS_IOC_EXPIRE_MULTI                _IOW(0x93, 0x66, int)
-#define AUTOFS_IOC_EXPIRE_INDIRECT     AUTOFS_IOC_EXPIRE_MULTI
-#define AUTOFS_IOC_EXPIRE_DIRECT       AUTOFS_IOC_EXPIRE_MULTI
-#define AUTOFS_IOC_PROTOSUBVER         _IOR(0x93, 0x67, int)
-#define AUTOFS_IOC_ASKUMOUNT           _IOR(0x93, 0x70, int)
+enum {
+       AUTOFS_IOC_EXPIRE_MULTI_CMD = 0x66, /* AUTOFS_IOC_EXPIRE_CMD + 1 */
+       AUTOFS_IOC_PROTOSUBVER_CMD,
+       AUTOFS_IOC_ASKUMOUNT_CMD = 0x70, /* AUTOFS_DEV_IOCTL_VERSION_CMD - 1 */
+};
+
+#define AUTOFS_IOC_EXPIRE_MULTI    _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int)
+#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI
+#define AUTOFS_IOC_EXPIRE_DIRECT   AUTOFS_IOC_EXPIRE_MULTI
+#define AUTOFS_IOC_PROTOSUBVER     _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int)
+#define AUTOFS_IOC_ASKUMOUNT       _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int)
 
 #endif /* _LINUX_AUTO_FS4_H */
index d0a2b8e89813934725dd217da8081cff7d2ce813..bbd5116ea7397e4020ecc190b2daebb0dbf53eb0 100644 (file)
@@ -18,6 +18,8 @@
 #ifndef _LINUX_MQUEUE_H
 #define _LINUX_MQUEUE_H
 
+#include <linux/types.h>
+
 #define MQ_PRIO_MAX    32768
 /* per-uid limit of kernel memory used by mqueue, in bytes */
 #define MQ_BYTES_MAX   819200
index 7550e9176a54ea839ec792d13f223ac0be82f1a1..c111a91adcc05ab5de87f561aef99e50fbe94666 100644 (file)
@@ -3,7 +3,6 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-#include <linux/sysctl.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 
index 3efc0ca18345e397e6c9704f2777d61b192ed455..79da349f10605be2aa890e10b563008082cfd9ff 100644 (file)
@@ -2,6 +2,7 @@
 #define _UAPI_XT_HASHLIMIT_H
 
 #include <linux/types.h>
+#include <linux/limits.h>
 #include <linux/if.h>
 
 /* timings are in milliseconds. */
index 0df7bd5d2fb17cf4b9df3b300451cce13075d194..c3be256107c6421432e8a63b041306e224874c2e 100644 (file)
@@ -32,7 +32,8 @@
 #define NFSEXP_ASYNC           0x0010
 #define NFSEXP_GATHERED_WRITES 0x0020
 #define NFSEXP_NOREADDIRPLUS    0x0040
-/* 80 100 currently unused */
+#define NFSEXP_SECURITY_LABEL  0x0080
+/* 0x100 currently unused */
 #define NFSEXP_NOHIDE          0x0200
 #define NFSEXP_NOSUBTREECHECK  0x0400
 #define        NFSEXP_NOAUTHNLM        0x0800          /* Don't authenticate NLM requests - just trust */
@@ -53,7 +54,7 @@
 #define NFSEXP_PNFS            0x20000
 
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS                0x3FE7F
+#define NFSEXP_ALLFLAGS                0x3FEFF
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS   (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
index 9ac4b68c54d18ed806534b1de726a1ea4b86ca7d..c055947c5c989fa7e399a7b0dcaba8640014b548 100644 (file)
  * means the userland is reading).
  */
 #define UFFD_API ((__u64)0xAA)
-#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK |           \
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT |           \
+                          UFFD_FEATURE_EVENT_FORK |            \
                           UFFD_FEATURE_EVENT_REMAP |           \
-                          UFFD_FEATURE_EVENT_MADVDONTNEED |    \
+                          UFFD_FEATURE_EVENT_REMOVE |  \
+                          UFFD_FEATURE_EVENT_UNMAP |           \
                           UFFD_FEATURE_MISSING_HUGETLBFS |     \
                           UFFD_FEATURE_MISSING_SHMEM)
 #define UFFD_API_IOCTLS                                \
@@ -92,7 +94,7 @@ struct uffd_msg {
                struct {
                        __u64   start;
                        __u64   end;
-               } madv_dn;
+               } remove;
 
                struct {
                        /* unused reserved fields */
@@ -109,7 +111,9 @@ struct uffd_msg {
 #define UFFD_EVENT_PAGEFAULT   0x12
 #define UFFD_EVENT_FORK                0x13
 #define UFFD_EVENT_REMAP       0x14
-#define UFFD_EVENT_MADVDONTNEED        0x15
+#define UFFD_EVENT_REMOVE      0x15
+#define UFFD_EVENT_UNMAP       0x16
+#define UFFD_EVENT_EXIT                0x17
 
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE      (1<<0)  /* If this was a write fault */
@@ -155,9 +159,11 @@ struct uffdio_api {
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP         (1<<0)
 #define UFFD_FEATURE_EVENT_FORK                        (1<<1)
 #define UFFD_FEATURE_EVENT_REMAP               (1<<2)
-#define UFFD_FEATURE_EVENT_MADVDONTNEED                (1<<3)
+#define UFFD_FEATURE_EVENT_REMOVE              (1<<3)
 #define UFFD_FEATURE_MISSING_HUGETLBFS         (1<<4)
 #define UFFD_FEATURE_MISSING_SHMEM             (1<<5)
+#define UFFD_FEATURE_EVENT_UNMAP               (1<<6)
+#define UFFD_FEATURE_EVENT_EXIT                        (1<<7)
        __u64 features;
 
        __u64 ioctls;
index 95251512e2c4cbffa40c61443a659607b90e81bd..44b587b49904f5f55f40a81a02d6464da8aafed3 100644 (file)
@@ -18,7 +18,7 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
        return PARAVIRT_LAZY_NONE;
 }
 
-extern struct dma_map_ops *xen_dma_ops;
+extern const struct dma_map_ops *xen_dma_ops;
 
 #ifdef CONFIG_XEN
 void __init xen_early_init(void);
index 56806bc90c2fbe0227a87b8c3bec2c38256d95a5..7fb7112d667c41247f46e1e00ab84de7669a11ba 100644 (file)
@@ -181,7 +181,7 @@ struct grant_entry_header {
 };
 
 /*
- * Version 2 of the grant entry structure, here is an union because three
+ * Version 2 of the grant entry structure, here is a union because three
  * different types are suppotted: full_page, sub_page and transitive.
  */
 union grant_entry_v2 {
index 8c39615165b75d52490838ccb52a67ca1c791c18..a92f27da4a272ec873707bc4b7a68e46a4340b86 100644 (file)
@@ -1078,6 +1078,16 @@ config CGROUP_PIDS
          since the PIDs limit only affects a process's ability to fork, not to
          attach to a cgroup.
 
+config CGROUP_RDMA
+       bool "RDMA controller"
+       help
+         Provides enforcement of RDMA resources defined by IB stack.
+         It is fairly easy for consumers to exhaust RDMA resources, which
+         can result into resource unavailability to other consumers.
+         RDMA controller is designed to stop this from happening.
+         Attaching processes with active RDMA resources to the cgroup
+         hierarchy is allowed even if can cross the hierarchy's limit.
+
 config CGROUP_FREEZER
        bool "Freezer controller"
        help
index b32ad7d97ac94f52a0c50acd2a904e8a0c2f888d..981f286c1d16ab57dff10cb10099f573b62f07b9 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/dirent.h>
 #include <linux/syscalls.h>
 #include <linux/utime.h>
+#include <linux/file.h>
 
 static ssize_t __init xwrite(int fd, const char *p, size_t count)
 {
@@ -647,6 +648,7 @@ static int __init populate_rootfs(void)
                        printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
                free_initrd();
 #endif
+               flush_delayed_fput();
                /*
                 * Try loading default modules from initramfs.  This gives
                 * us a chance to load before device_initcalls.
index 24ea48745061aff8bf529dfa49706757356c27af..ae9f2008fb86834f20d8160936601ee908740a66 100644 (file)
@@ -71,7 +71,6 @@
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/perf_event.h>
-#include <linux/file.h>
 #include <linux/ptrace.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
@@ -83,6 +82,7 @@
 #include <linux/proc_ns.h>
 #include <linux/io.h>
 #include <linux/cache.h>
+#include <linux/rodata_test.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -554,7 +554,7 @@ asmlinkage __visible void __init start_kernel(void)
        if (WARN(!irqs_disabled(),
                 "Interrupts were enabled *very* early, fixing it\n"))
                local_irq_disable();
-       idr_init_cache();
+       radix_tree_init();
 
        /*
         * Allow workqueue creation and work item queueing/cancelling
@@ -569,7 +569,6 @@ asmlinkage __visible void __init start_kernel(void)
        trace_init();
 
        context_tracking_init();
-       radix_tree_init();
        /* init some links before init_ISA_irqs() */
        early_irq_init();
        init_IRQ();
@@ -936,9 +935,10 @@ __setup("rodata=", set_debug_rodata);
 #ifdef CONFIG_STRICT_KERNEL_RWX
 static void mark_readonly(void)
 {
-       if (rodata_enabled)
+       if (rodata_enabled) {
                mark_rodata_ro();
-       else
+               rodata_test();
+       } else
                pr_info("Kernel memory protection disabled.\n");
 }
 #else
@@ -960,8 +960,6 @@ static int __ref kernel_init(void *unused)
        system_state = SYSTEM_RUNNING;
        numa_default_policy();
 
-       flush_delayed_fput();
-
        rcu_end_inkernel_boot();
 
        if (ramdisk_execute_command) {
index 7a2d8f0c8ae506d631bf3fc455f53b042ef29401..4fdd970314315a6cd29df52902a12759aff41869 100644 (file)
@@ -558,6 +558,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
  */
 static int wq_sleep(struct mqueue_inode_info *info, int sr,
                    ktime_t *timeout, struct ext_wait_queue *ewp)
+       __releases(&info->lock)
 {
        int retval;
        signed long time;
index 3ec5742b5640f5d265ea8f0d9b8f986cf25b8f1c..e468cd1c12f0d6bb6d78fc583d4fb83e13a2c927 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -158,23 +158,43 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #define SEMMSL_FAST    256 /* 512 bytes on stack */
 #define SEMOPM_FAST    64  /* ~ 372 bytes on stack */
 
+/*
+ * Switching from the mode suitable for simple ops
+ * to the mode for complex ops is costly. Therefore:
+ * use some hysteresis
+ */
+#define USE_GLOBAL_LOCK_HYSTERESIS     10
+
 /*
  * Locking:
  * a) global sem_lock() for read/write
  *     sem_undo.id_next,
  *     sem_array.complex_count,
- *     sem_array.complex_mode
  *     sem_array.pending{_alter,_const},
  *     sem_array.sem_undo
  *
  * b) global or semaphore sem_lock() for read/write:
  *     sem_array.sem_base[i].pending_{const,alter}:
- *     sem_array.complex_mode (for read)
  *
  * c) special:
  *     sem_undo_list.list_proc:
  *     * undo_list->lock for write
  *     * rcu for read
+ *     use_global_lock:
+ *     * global sem_lock() for write
+ *     * either local or global sem_lock() for read.
+ *
+ * Memory ordering:
+ * Most ordering is enforced by using spin_lock() and spin_unlock().
+ * The special case is use_global_lock:
+ * Setting it from non-zero to 0 is a RELEASE, this is ensured by
+ * using smp_store_release().
+ * Testing if it is non-zero is an ACQUIRE, this is ensured by using
+ * smp_load_acquire().
+ * Setting it from 0 to non-zero must be ordered with regards to
+ * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
+ * is inside a spin_lock() and after a write from 0 to non-zero a
+ * spin_lock()+spin_unlock() is done.
  */
 
 #define sc_semmsl      sem_ctls[0]
@@ -273,29 +293,22 @@ static void complexmode_enter(struct sem_array *sma)
        int i;
        struct sem *sem;
 
-       if (sma->complex_mode)  {
-               /* We are already in complex_mode. Nothing to do */
+       if (sma->use_global_lock > 0)  {
+               /*
+                * We are already in global lock mode.
+                * Nothing to do, just reset the
+                * counter until we return to simple mode.
+                */
+               sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
                return;
        }
-
-       /* We need a full barrier after seting complex_mode:
-        * The write to complex_mode must be visible
-        * before we read the first sem->lock spinlock state.
-        */
-       smp_store_mb(sma->complex_mode, true);
+       sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
 
        for (i = 0; i < sma->sem_nsems; i++) {
                sem = sma->sem_base + i;
-               spin_unlock_wait(&sem->lock);
+               spin_lock(&sem->lock);
+               spin_unlock(&sem->lock);
        }
-       /*
-        * spin_unlock_wait() is not a memory barriers, it is only a
-        * control barrier. The code must pair with spin_unlock(&sem->lock),
-        * thus just the control barrier is insufficient.
-        *
-        * smp_rmb() is sufficient, as writes cannot pass the control barrier.
-        */
-       smp_rmb();
 }
 
 /*
@@ -310,13 +323,17 @@ static void complexmode_tryleave(struct sem_array *sma)
                 */
                return;
        }
-       /*
-        * Immediately after setting complex_mode to false,
-        * a simple op can start. Thus: all memory writes
-        * performed by the current operation must be visible
-        * before we set complex_mode to false.
-        */
-       smp_store_release(&sma->complex_mode, false);
+       if (sma->use_global_lock == 1) {
+               /*
+                * Immediately after setting use_global_lock to 0,
+                * a simple op can start. Thus: all memory writes
+                * performed by the current operation must be visible
+                * before we set use_global_lock to 0.
+                */
+               smp_store_release(&sma->use_global_lock, 0);
+       } else {
+               sma->use_global_lock--;
+       }
 }
 
 #define SEM_GLOBAL_LOCK        (-1)
@@ -346,30 +363,23 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
         * Optimized locking is possible if no complex operation
         * is either enqueued or processed right now.
         *
-        * Both facts are tracked by complex_mode.
+        * Both facts are tracked by use_global_mode.
         */
        sem = sma->sem_base + sops->sem_num;
 
        /*
-        * Initial check for complex_mode. Just an optimization,
+        * Initial check for use_global_lock. Just an optimization,
         * no locking, no memory barrier.
         */
-       if (!sma->complex_mode) {
+       if (!sma->use_global_lock) {
                /*
                 * It appears that no complex operation is around.
                 * Acquire the per-semaphore lock.
                 */
                spin_lock(&sem->lock);
 
-               /*
-                * See 51d7d5205d33
-                * ("powerpc: Add smp_mb() to arch_spin_is_locked()"):
-                * A full barrier is required: the write of sem->lock
-                * must be visible before the read is executed
-                */
-               smp_mb();
-
-               if (!smp_load_acquire(&sma->complex_mode)) {
+               /* pairs with smp_store_release() */
+               if (!smp_load_acquire(&sma->use_global_lock)) {
                        /* fast path successful! */
                        return sops->sem_num;
                }
@@ -379,19 +389,26 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
        /* slow path: acquire the full lock */
        ipc_lock_object(&sma->sem_perm);
 
-       if (sma->complex_count == 0) {
-               /* False alarm:
-                * There is no complex operation, thus we can switch
-                * back to the fast path.
+       if (sma->use_global_lock == 0) {
+               /*
+                * The use_global_lock mode ended while we waited for
+                * sma->sem_perm.lock. Thus we must switch to locking
+                * with sem->lock.
+                * Unlike in the fast path, there is no need to recheck
+                * sma->use_global_lock after we have acquired sem->lock:
+                * We own sma->sem_perm.lock, thus use_global_lock cannot
+                * change.
                 */
                spin_lock(&sem->lock);
+
                ipc_unlock_object(&sma->sem_perm);
                return sops->sem_num;
        } else {
-               /* Not a false alarm, thus complete the sequence for a
-                * full lock.
+               /*
+                * Not a false alarm, thus continue to use the global lock
+                * mode. No need for complexmode_enter(), this was done by
+                * the caller that has set use_global_mode to non-zero.
                 */
-               complexmode_enter(sma);
                return SEM_GLOBAL_LOCK;
        }
 }
@@ -495,7 +512,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
        }
 
        sma->complex_count = 0;
-       sma->complex_mode = true; /* dropped by sem_unlock below */
+       sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
        INIT_LIST_HEAD(&sma->pending_alter);
        INIT_LIST_HEAD(&sma->pending_const);
        INIT_LIST_HEAD(&sma->list_id);
index 81203e8ba013597895444a02cdd90c5798f70c34..06ea9ef7f54a77267ebe9405c998884fa3d7df73 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -374,12 +374,12 @@ void exit_shm(struct task_struct *task)
        up_write(&shm_ids(ns).rwsem);
 }
 
-static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int shm_fault(struct vm_fault *vmf)
 {
-       struct file *file = vma->vm_file;
+       struct file *file = vmf->vma->vm_file;
        struct shm_file_data *sfd = shm_file_data(file);
 
-       return sfd->vm_ops->fault(vma, vmf);
+       return sfd->vm_ops->fault(vmf);
 }
 
 #ifdef CONFIG_NUMA
@@ -1091,8 +1091,8 @@ out_unlock1:
  * "raddr" thing points to kernel space, and there has to be a wrapper around
  * this.
  */
-long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
-             unsigned long shmlba)
+long do_shmat(int shmid, char __user *shmaddr, int shmflg,
+             ulong *raddr, unsigned long shmlba)
 {
        struct shmid_kernel *shp;
        unsigned long addr;
@@ -1113,8 +1113,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
                goto out;
        else if ((addr = (ulong)shmaddr)) {
                if (addr & (shmlba - 1)) {
-                       if (shmflg & SHM_RND)
-                               addr &= ~(shmlba - 1);     /* round down */
+                       /*
+                        * Round down to the nearest multiple of shmlba.
+                        * For sane do_mmap_pgoff() parameters, avoid
+                        * round downs that trigger nil-page and MAP_FIXED.
+                        */
+                       if ((shmflg & SHM_RND) && addr >= shmlba)
+                               addr &= ~(shmlba - 1);
                        else
 #ifndef __ARCH_FORCE_SHMLBA
                                if (addr & ~PAGE_MASK)
@@ -1222,7 +1227,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
                        goto invalid;
        }
 
-       addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
+       addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
        *raddr = addr;
        err = 0;
        if (IS_ERR_VALUE(addr))
@@ -1329,7 +1334,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
                         */
                        file = vma->vm_file;
                        size = i_size_read(file_inode(vma->vm_file));
-                       do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+                       do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
                        /*
                         * We discovered the size of the shm segment, so
                         * break out of here and fall through to the next
@@ -1356,7 +1361,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
                if ((vma->vm_ops == &shm_vm_ops) &&
                    ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
                    (vma->vm_file == file))
-                       do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+                       do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
                vma = next;
        }
 
@@ -1365,7 +1370,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
         * given
         */
        if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
-               do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+               do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
                retval = 0;
        }
 
index 12c679f769c6f82af6d65caf8f06d2707f86fb3f..b302b4731d16547a88e4ddc6db21e130ae821113 100644 (file)
@@ -64,10 +64,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
-obj-$(CONFIG_CGROUPS) += cgroup.o
-obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
-obj-$(CONFIG_CGROUP_PIDS) += cgroup_pids.o
-obj-$(CONFIG_CPUSETS) += cpuset.o
+obj-$(CONFIG_CGROUPS) += cgroup/
 obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
index 3fc6e39b223e2cb4dc2985d33696a858a116d8d0..796b68d001198a39186cba850fe8161476a17bfa 100644 (file)
@@ -33,7 +33,7 @@
  * - out of bounds or malformed jumps
  * The second pass is all possible path descent from the 1st insn.
  * Since it's analyzing all pathes through the program, the length of the
- * analysis is limited to 32k insn, which may be hit even if total number of
+ * analysis is limited to 64k insn, which may be hit even if total number of
  * insn is less then 4K, but there are too many branches that change stack/regs.
  * Number of 'branches to be analyzed' is limited to 1k
  *
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
deleted file mode 100644 (file)
index 53bbca7..0000000
+++ /dev/null
@@ -1,6658 +0,0 @@
-/*
- *  Generic process-grouping system.
- *
- *  Based originally on the cpuset system, extracted by Paul Menage
- *  Copyright (C) 2006 Google, Inc
- *
- *  Notifications support
- *  Copyright (C) 2009 Nokia Corporation
- *  Author: Kirill A. Shutemov
- *
- *  Copyright notices from the original cpuset code:
- *  --------------------------------------------------
- *  Copyright (C) 2003 BULL SA.
- *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
- *
- *  Portions derived from Patrick Mochel's sysfs code.
- *  sysfs is Copyright (c) 2001-3 Patrick Mochel
- *
- *  2003-10-10 Written by Simon Derr.
- *  2003-10-22 Updates by Stephen Hemminger.
- *  2004 May-July Rework by Paul Jackson.
- *  ---------------------------------------------------
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of the Linux
- *  distribution for more details.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/cgroup.h>
-#include <linux/cred.h>
-#include <linux/ctype.h>
-#include <linux/errno.h>
-#include <linux/init_task.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/magic.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/proc_fs.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/percpu-rwsem.h>
-#include <linux/string.h>
-#include <linux/sort.h>
-#include <linux/kmod.h>
-#include <linux/delayacct.h>
-#include <linux/cgroupstats.h>
-#include <linux/hashtable.h>
-#include <linux/pid_namespace.h>
-#include <linux/idr.h>
-#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
-#include <linux/kthread.h>
-#include <linux/delay.h>
-#include <linux/atomic.h>
-#include <linux/cpuset.h>
-#include <linux/proc_ns.h>
-#include <linux/nsproxy.h>
-#include <linux/file.h>
-#include <net/sock.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/cgroup.h>
-
-/*
- * pidlists linger the following amount before being destroyed.  The goal
- * is avoiding frequent destruction in the middle of consecutive read calls
- * Expiring in the middle is a performance problem not a correctness one.
- * 1 sec should be enough.
- */
-#define CGROUP_PIDLIST_DESTROY_DELAY   HZ
-
-#define CGROUP_FILE_NAME_MAX           (MAX_CGROUP_TYPE_NAMELEN +      \
-                                        MAX_CFTYPE_NAME + 2)
-
-/*
- * cgroup_mutex is the master lock.  Any modification to cgroup or its
- * hierarchy must be performed while holding it.
- *
- * css_set_lock protects task->cgroups pointer, the list of css_set
- * objects, and the chain of tasks off each css_set.
- *
- * These locks are exported if CONFIG_PROVE_RCU so that accessors in
- * cgroup.h can use them for lockdep annotations.
- */
-#ifdef CONFIG_PROVE_RCU
-DEFINE_MUTEX(cgroup_mutex);
-DEFINE_SPINLOCK(css_set_lock);
-EXPORT_SYMBOL_GPL(cgroup_mutex);
-EXPORT_SYMBOL_GPL(css_set_lock);
-#else
-static DEFINE_MUTEX(cgroup_mutex);
-static DEFINE_SPINLOCK(css_set_lock);
-#endif
-
-/*
- * Protects cgroup_idr and css_idr so that IDs can be released without
- * grabbing cgroup_mutex.
- */
-static DEFINE_SPINLOCK(cgroup_idr_lock);
-
-/*
- * Protects cgroup_file->kn for !self csses.  It synchronizes notifications
- * against file removal/re-creation across css hiding.
- */
-static DEFINE_SPINLOCK(cgroup_file_kn_lock);
-
-/*
- * Protects cgroup_subsys->release_agent_path.  Modifying it also requires
- * cgroup_mutex.  Reading requires either cgroup_mutex or this spinlock.
- */
-static DEFINE_SPINLOCK(release_agent_path_lock);
-
-struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
-
-#define cgroup_assert_mutex_or_rcu_locked()                            \
-       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
-                          !lockdep_is_held(&cgroup_mutex),             \
-                          "cgroup_mutex or RCU read lock required");
-
-/*
- * cgroup destruction makes heavy use of work items and there can be a lot
- * of concurrent destructions.  Use a separate workqueue so that cgroup
- * destruction work items don't end up filling up max_active of system_wq
- * which may lead to deadlock.
- */
-static struct workqueue_struct *cgroup_destroy_wq;
-
-/*
- * pidlist destructions need to be flushed on cgroup destruction.  Use a
- * separate workqueue as flush domain.
- */
-static struct workqueue_struct *cgroup_pidlist_destroy_wq;
-
-/* generate an array of cgroup subsystem pointers */
-#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
-static struct cgroup_subsys *cgroup_subsys[] = {
-#include <linux/cgroup_subsys.h>
-};
-#undef SUBSYS
-
-/* array of cgroup subsystem names */
-#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
-static const char *cgroup_subsys_name[] = {
-#include <linux/cgroup_subsys.h>
-};
-#undef SUBSYS
-
-/* array of static_keys for cgroup_subsys_enabled() and cgroup_subsys_on_dfl() */
-#define SUBSYS(_x)                                                             \
-       DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key);                 \
-       DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key);                  \
-       EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key);                      \
-       EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
-#include <linux/cgroup_subsys.h>
-#undef SUBSYS
-
-#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
-static struct static_key_true *cgroup_subsys_enabled_key[] = {
-#include <linux/cgroup_subsys.h>
-};
-#undef SUBSYS
-
-#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
-static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
-#include <linux/cgroup_subsys.h>
-};
-#undef SUBSYS
-
-/*
- * The default hierarchy, reserved for the subsystems that are otherwise
- * unattached - it never has more than a single cgroup, and all tasks are
- * part of that cgroup.
- */
-struct cgroup_root cgrp_dfl_root;
-EXPORT_SYMBOL_GPL(cgrp_dfl_root);
-
-/*
- * The default hierarchy always exists but is hidden until mounted for the
- * first time.  This is for backward compatibility.
- */
-static bool cgrp_dfl_visible;
-
-/* Controllers blocked by the commandline in v1 */
-static u16 cgroup_no_v1_mask;
-
-/* some controllers are not supported in the default hierarchy */
-static u16 cgrp_dfl_inhibit_ss_mask;
-
-/* some controllers are implicitly enabled on the default hierarchy */
-static unsigned long cgrp_dfl_implicit_ss_mask;
-
-/* The list of hierarchy roots */
-
-static LIST_HEAD(cgroup_roots);
-static int cgroup_root_count;
-
-/* hierarchy ID allocation and mapping, protected by cgroup_mutex */
-static DEFINE_IDR(cgroup_hierarchy_idr);
-
-/*
- * Assign a monotonically increasing serial number to csses.  It guarantees
- * cgroups with bigger numbers are newer than those with smaller numbers.
- * Also, as csses are always appended to the parent's ->children list, it
- * guarantees that sibling csses are always sorted in the ascending serial
- * number order on the list.  Protected by cgroup_mutex.
- */
-static u64 css_serial_nr_next = 1;
-
-/*
- * These bitmask flags indicate whether tasks in the fork and exit paths have
- * fork/exit handlers to call. This avoids us having to do extra work in the
- * fork/exit path to check which subsystems have fork/exit callbacks.
- */
-static u16 have_fork_callback __read_mostly;
-static u16 have_exit_callback __read_mostly;
-static u16 have_free_callback __read_mostly;
-
-/* cgroup namespace for init task */
-struct cgroup_namespace init_cgroup_ns = {
-       .count          = { .counter = 2, },
-       .user_ns        = &init_user_ns,
-       .ns.ops         = &cgroupns_operations,
-       .ns.inum        = PROC_CGROUP_INIT_INO,
-       .root_cset      = &init_css_set,
-};
-
-/* Ditto for the can_fork callback. */
-static u16 have_canfork_callback __read_mostly;
-
-static struct file_system_type cgroup2_fs_type;
-static struct cftype cgroup_dfl_base_files[];
-static struct cftype cgroup_legacy_base_files[];
-
-static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
-static void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
-static int cgroup_apply_control(struct cgroup *cgrp);
-static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
-static void css_task_iter_advance(struct css_task_iter *it);
-static int cgroup_destroy_locked(struct cgroup *cgrp);
-static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
-                                             struct cgroup_subsys *ss);
-static void css_release(struct percpu_ref *ref);
-static void kill_css(struct cgroup_subsys_state *css);
-static int cgroup_addrm_files(struct cgroup_subsys_state *css,
-                             struct cgroup *cgrp, struct cftype cfts[],
-                             bool is_add);
-
-/**
- * cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID
- * @ssid: subsys ID of interest
- *
- * cgroup_subsys_enabled() can only be used with literal subsys names which
- * is fine for individual subsystems but unsuitable for cgroup core.  This
- * is slower static_key_enabled() based test indexed by @ssid.
- */
-static bool cgroup_ssid_enabled(int ssid)
-{
-       if (CGROUP_SUBSYS_COUNT == 0)
-               return false;
-
-       return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
-}
-
-static bool cgroup_ssid_no_v1(int ssid)
-{
-       return cgroup_no_v1_mask & (1 << ssid);
-}
-
-/**
- * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
- * @cgrp: the cgroup of interest
- *
- * The default hierarchy is the v2 interface of cgroup and this function
- * can be used to test whether a cgroup is on the default hierarchy for
- * cases where a subsystem should behave differnetly depending on the
- * interface version.
- *
- * The set of behaviors which change on the default hierarchy are still
- * being determined and the mount option is prefixed with __DEVEL__.
- *
- * List of changed behaviors:
- *
- * - Mount options "noprefix", "xattr", "clone_children", "release_agent"
- *   and "name" are disallowed.
- *
- * - When mounting an existing superblock, mount options should match.
- *
- * - Remount is disallowed.
- *
- * - rename(2) is disallowed.
- *
- * - "tasks" is removed.  Everything should be at process granularity.  Use
- *   "cgroup.procs" instead.
- *
- * - "cgroup.procs" is not sorted.  pids will be unique unless they got
- *   recycled inbetween reads.
- *
- * - "release_agent" and "notify_on_release" are removed.  Replacement
- *   notification mechanism will be implemented.
- *
- * - "cgroup.clone_children" is removed.
- *
- * - "cgroup.subtree_populated" is available.  Its value is 0 if the cgroup
- *   and its descendants contain no task; otherwise, 1.  The file also
- *   generates kernfs notification which can be monitored through poll and
- *   [di]notify when the value of the file changes.
- *
- * - cpuset: tasks will be kept in empty cpusets when hotplug happens and
- *   take masks of ancestors with non-empty cpus/mems, instead of being
- *   moved to an ancestor.
- *
- * - cpuset: a task can be moved into an empty cpuset, and again it takes
- *   masks of ancestors.
- *
- * - memcg: use_hierarchy is on by default and the cgroup file for the flag
- *   is not created.
- *
- * - blkcg: blk-throttle becomes properly hierarchical.
- *
- * - debug: disallowed on the default hierarchy.
- */
-static bool cgroup_on_dfl(const struct cgroup *cgrp)
-{
-       return cgrp->root == &cgrp_dfl_root;
-}
-
-/* IDR wrappers which synchronize using cgroup_idr_lock */
-static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
-                           gfp_t gfp_mask)
-{
-       int ret;
-
-       idr_preload(gfp_mask);
-       spin_lock_bh(&cgroup_idr_lock);
-       ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
-       spin_unlock_bh(&cgroup_idr_lock);
-       idr_preload_end();
-       return ret;
-}
-
-static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
-{
-       void *ret;
-
-       spin_lock_bh(&cgroup_idr_lock);
-       ret = idr_replace(idr, ptr, id);
-       spin_unlock_bh(&cgroup_idr_lock);
-       return ret;
-}
-
-static void cgroup_idr_remove(struct idr *idr, int id)
-{
-       spin_lock_bh(&cgroup_idr_lock);
-       idr_remove(idr, id);
-       spin_unlock_bh(&cgroup_idr_lock);
-}
-
-static struct cgroup *cgroup_parent(struct cgroup *cgrp)
-{
-       struct cgroup_subsys_state *parent_css = cgrp->self.parent;
-
-       if (parent_css)
-               return container_of(parent_css, struct cgroup, self);
-       return NULL;
-}
-
-/* subsystems visibly enabled on a cgroup */
-static u16 cgroup_control(struct cgroup *cgrp)
-{
-       struct cgroup *parent = cgroup_parent(cgrp);
-       u16 root_ss_mask = cgrp->root->subsys_mask;
-
-       if (parent)
-               return parent->subtree_control;
-
-       if (cgroup_on_dfl(cgrp))
-               root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
-                                 cgrp_dfl_implicit_ss_mask);
-       return root_ss_mask;
-}
-
-/* subsystems enabled on a cgroup */
-static u16 cgroup_ss_mask(struct cgroup *cgrp)
-{
-       struct cgroup *parent = cgroup_parent(cgrp);
-
-       if (parent)
-               return parent->subtree_ss_mask;
-
-       return cgrp->root->subsys_mask;
-}
-
-/**
- * cgroup_css - obtain a cgroup's css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns @cgrp->self)
- *
- * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
- * function must be called either under cgroup_mutex or rcu_read_lock() and
- * the caller is responsible for pinning the returned css if it wants to
- * keep accessing it outside the said locks.  This function may return
- * %NULL if @cgrp doesn't have @subsys_id enabled.
- */
-static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
-                                             struct cgroup_subsys *ss)
-{
-       if (ss)
-               return rcu_dereference_check(cgrp->subsys[ss->id],
-                                       lockdep_is_held(&cgroup_mutex));
-       else
-               return &cgrp->self;
-}
-
-/**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns @cgrp->self)
- *
- * Similar to cgroup_css() but returns the effective css, which is defined
- * as the matching css of the nearest ancestor including self which has @ss
- * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
- * function is guaranteed to return non-NULL css.
- */
-static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
-                                               struct cgroup_subsys *ss)
-{
-       lockdep_assert_held(&cgroup_mutex);
-
-       if (!ss)
-               return &cgrp->self;
-
-       /*
-        * This function is used while updating css associations and thus
-        * can't test the csses directly.  Test ss_mask.
-        */
-       while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
-               cgrp = cgroup_parent(cgrp);
-               if (!cgrp)
-                       return NULL;
-       }
-
-       return cgroup_css(cgrp, ss);
-}
-
-/**
- * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest
- *
- * Find and get the effective css of @cgrp for @ss.  The effective css is
- * defined as the matching css of the nearest ancestor including self which
- * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
- * the root css is returned, so this function always returns a valid css.
- * The returned css must be put using css_put().
- */
-struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
-                                            struct cgroup_subsys *ss)
-{
-       struct cgroup_subsys_state *css;
-
-       rcu_read_lock();
-
-       do {
-               css = cgroup_css(cgrp, ss);
-
-               if (css && css_tryget_online(css))
-                       goto out_unlock;
-               cgrp = cgroup_parent(cgrp);
-       } while (cgrp);
-
-       css = init_css_set.subsys[ss->id];
-       css_get(css);
-out_unlock:
-       rcu_read_unlock();
-       return css;
-}
-
-/* convenient tests for these bits */
-static inline bool cgroup_is_dead(const struct cgroup *cgrp)
-{
-       return !(cgrp->self.flags & CSS_ONLINE);
-}
-
-static void cgroup_get(struct cgroup *cgrp)
-{
-       WARN_ON_ONCE(cgroup_is_dead(cgrp));
-       css_get(&cgrp->self);
-}
-
-static bool cgroup_tryget(struct cgroup *cgrp)
-{
-       return css_tryget(&cgrp->self);
-}
-
-struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
-{
-       struct cgroup *cgrp = of->kn->parent->priv;
-       struct cftype *cft = of_cft(of);
-
-       /*
-        * This is open and unprotected implementation of cgroup_css().
-        * seq_css() is only called from a kernfs file operation which has
-        * an active reference on the file.  Because all the subsystem
-        * files are drained before a css is disassociated with a cgroup,
-        * the matching css from the cgroup's subsys table is guaranteed to
-        * be and stay valid until the enclosing operation is complete.
-        */
-       if (cft->ss)
-               return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
-       else
-               return &cgrp->self;
-}
-EXPORT_SYMBOL_GPL(of_css);
-
-static int notify_on_release(const struct cgroup *cgrp)
-{
-       return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
-}
-
-/**
- * for_each_css - iterate all css's of a cgroup
- * @css: the iteration cursor
- * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
- * @cgrp: the target cgroup to iterate css's of
- *
- * Should be called under cgroup_[tree_]mutex.
- */
-#define for_each_css(css, ssid, cgrp)                                  \
-       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)        \
-               if (!((css) = rcu_dereference_check(                    \
-                               (cgrp)->subsys[(ssid)],                 \
-                               lockdep_is_held(&cgroup_mutex)))) { }   \
-               else
-
-/**
- * for_each_e_css - iterate all effective css's of a cgroup
- * @css: the iteration cursor
- * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
- * @cgrp: the target cgroup to iterate css's of
- *
- * Should be called under cgroup_[tree_]mutex.
- */
-#define for_each_e_css(css, ssid, cgrp)                                        \
-       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)        \
-               if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
-                       ;                                               \
-               else
-
-/**
- * for_each_subsys - iterate all enabled cgroup subsystems
- * @ss: the iteration cursor
- * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
- */
-#define for_each_subsys(ss, ssid)                                      \
-       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT &&                \
-            (((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
-
-/**
- * do_each_subsys_mask - filter for_each_subsys with a bitmask
- * @ss: the iteration cursor
- * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
- * @ss_mask: the bitmask
- *
- * The block will only run for cases where the ssid-th bit (1 << ssid) of
- * @ss_mask is set.
- */
-#define do_each_subsys_mask(ss, ssid, ss_mask) do {                    \
-       unsigned long __ss_mask = (ss_mask);                            \
-       if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \
-               (ssid) = 0;                                             \
-               break;                                                  \
-       }                                                               \
-       for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) {       \
-               (ss) = cgroup_subsys[ssid];                             \
-               {
-
-#define while_each_subsys_mask()                                       \
-               }                                                       \
-       }                                                               \
-} while (false)
-
-/* iterate across the hierarchies */
-#define for_each_root(root)                                            \
-       list_for_each_entry((root), &cgroup_roots, root_list)
-
-/* iterate over child cgrps, lock should be held throughout iteration */
-#define cgroup_for_each_live_child(child, cgrp)                                \
-       list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
-               if (({ lockdep_assert_held(&cgroup_mutex);              \
-                      cgroup_is_dead(child); }))                       \
-                       ;                                               \
-               else
-
-/* walk live descendants in preorder */
-#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)         \
-       css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL))  \
-               if (({ lockdep_assert_held(&cgroup_mutex);              \
-                      (dsct) = (d_css)->cgroup;                        \
-                      cgroup_is_dead(dsct); }))                        \
-                       ;                                               \
-               else
-
-/* walk live descendants in postorder */
-#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp)                \
-       css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
-               if (({ lockdep_assert_held(&cgroup_mutex);              \
-                      (dsct) = (d_css)->cgroup;                        \
-                      cgroup_is_dead(dsct); }))                        \
-                       ;                                               \
-               else
-
-static void cgroup_release_agent(struct work_struct *work);
-static void check_for_release(struct cgroup *cgrp);
-
-/*
- * A cgroup can be associated with multiple css_sets as different tasks may
- * belong to different cgroups on different hierarchies.  In the other
- * direction, a css_set is naturally associated with multiple cgroups.
- * This M:N relationship is represented by the following link structure
- * which exists for each association and allows traversing the associations
- * from both sides.
- */
-struct cgrp_cset_link {
-       /* the cgroup and css_set this link associates */
-       struct cgroup           *cgrp;
-       struct css_set          *cset;
-
-       /* list of cgrp_cset_links anchored at cgrp->cset_links */
-       struct list_head        cset_link;
-
-       /* list of cgrp_cset_links anchored at css_set->cgrp_links */
-       struct list_head        cgrp_link;
-};
-
-/*
- * The default css_set - used by init and its children prior to any
- * hierarchies being mounted. It contains a pointer to the root state
- * for each subsystem. Also used to anchor the list of css_sets. Not
- * reference-counted, to improve performance when child cgroups
- * haven't been created.
- */
-struct css_set init_css_set = {
-       .refcount               = ATOMIC_INIT(1),
-       .cgrp_links             = LIST_HEAD_INIT(init_css_set.cgrp_links),
-       .tasks                  = LIST_HEAD_INIT(init_css_set.tasks),
-       .mg_tasks               = LIST_HEAD_INIT(init_css_set.mg_tasks),
-       .mg_preload_node        = LIST_HEAD_INIT(init_css_set.mg_preload_node),
-       .mg_node                = LIST_HEAD_INIT(init_css_set.mg_node),
-       .task_iters             = LIST_HEAD_INIT(init_css_set.task_iters),
-};
-
-static int css_set_count       = 1;    /* 1 for init_css_set */
-
-/**
- * css_set_populated - does a css_set contain any tasks?
- * @cset: target css_set
- */
-static bool css_set_populated(struct css_set *cset)
-{
-       lockdep_assert_held(&css_set_lock);
-
-       return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
-}
-
-/**
- * cgroup_update_populated - updated populated count of a cgroup
- * @cgrp: the target cgroup
- * @populated: inc or dec populated count
- *
- * One of the css_sets associated with @cgrp is either getting its first
- * task or losing the last.  Update @cgrp->populated_cnt accordingly.  The
- * count is propagated towards root so that a given cgroup's populated_cnt
- * is zero iff the cgroup and all its descendants don't contain any tasks.
- *
- * @cgrp's interface file "cgroup.populated" is zero if
- * @cgrp->populated_cnt is zero and 1 otherwise.  When @cgrp->populated_cnt
- * changes from or to zero, userland is notified that the content of the
- * interface file has changed.  This can be used to detect when @cgrp and
- * its descendants become populated or empty.
- */
-static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
-{
-       lockdep_assert_held(&css_set_lock);
-
-       do {
-               bool trigger;
-
-               if (populated)
-                       trigger = !cgrp->populated_cnt++;
-               else
-                       trigger = !--cgrp->populated_cnt;
-
-               if (!trigger)
-                       break;
-
-               check_for_release(cgrp);
-               cgroup_file_notify(&cgrp->events_file);
-
-               cgrp = cgroup_parent(cgrp);
-       } while (cgrp);
-}
-
-/**
- * css_set_update_populated - update populated state of a css_set
- * @cset: target css_set
- * @populated: whether @cset is populated or depopulated
- *
- * @cset is either getting the first task or losing the last.  Update the
- * ->populated_cnt of all associated cgroups accordingly.
- */
-static void css_set_update_populated(struct css_set *cset, bool populated)
-{
-       struct cgrp_cset_link *link;
-
-       lockdep_assert_held(&css_set_lock);
-
-       list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
-               cgroup_update_populated(link->cgrp, populated);
-}
-
-/**
- * css_set_move_task - move a task from one css_set to another
- * @task: task being moved
- * @from_cset: css_set @task currently belongs to (may be NULL)
- * @to_cset: new css_set @task is being moved to (may be NULL)
- * @use_mg_tasks: move to @to_cset->mg_tasks instead of ->tasks
- *
- * Move @task from @from_cset to @to_cset.  If @task didn't belong to any
- * css_set, @from_cset can be NULL.  If @task is being disassociated
- * instead of moved, @to_cset can be NULL.
- *
- * This function automatically handles populated_cnt updates and
- * css_task_iter adjustments but the caller is responsible for managing
- * @from_cset and @to_cset's reference counts.
- */
-static void css_set_move_task(struct task_struct *task,
-                             struct css_set *from_cset, struct css_set *to_cset,
-                             bool use_mg_tasks)
-{
-       lockdep_assert_held(&css_set_lock);
-
-       if (to_cset && !css_set_populated(to_cset))
-               css_set_update_populated(to_cset, true);
-
-       if (from_cset) {
-               struct css_task_iter *it, *pos;
-
-               WARN_ON_ONCE(list_empty(&task->cg_list));
-
-               /*
-                * @task is leaving, advance task iterators which are
-                * pointing to it so that they can resume at the next
-                * position.  Advancing an iterator might remove it from
-                * the list, use safe walk.  See css_task_iter_advance*()
-                * for details.
-                */
-               list_for_each_entry_safe(it, pos, &from_cset->task_iters,
-                                        iters_node)
-                       if (it->task_pos == &task->cg_list)
-                               css_task_iter_advance(it);
-
-               list_del_init(&task->cg_list);
-               if (!css_set_populated(from_cset))
-                       css_set_update_populated(from_cset, false);
-       } else {
-               WARN_ON_ONCE(!list_empty(&task->cg_list));
-       }
-
-       if (to_cset) {
-               /*
-                * We are synchronized through cgroup_threadgroup_rwsem
-                * against PF_EXITING setting such that we can't race
-                * against cgroup_exit() changing the css_set to
-                * init_css_set and dropping the old one.
-                */
-               WARN_ON_ONCE(task->flags & PF_EXITING);
-
-               rcu_assign_pointer(task->cgroups, to_cset);
-               list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
-                                                            &to_cset->tasks);
-       }
-}
-
-/*
- * hash table for cgroup groups. This improves the performance to find
- * an existing css_set. This hash doesn't (currently) take into
- * account cgroups in empty hierarchies.
- */
-#define CSS_SET_HASH_BITS      7
-static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
-
-static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
-{
-       unsigned long key = 0UL;
-       struct cgroup_subsys *ss;
-       int i;
-
-       for_each_subsys(ss, i)
-               key += (unsigned long)css[i];
-       key = (key >> 16) ^ key;
-
-       return key;
-}
-
-static void put_css_set_locked(struct css_set *cset)
-{
-       struct cgrp_cset_link *link, *tmp_link;
-       struct cgroup_subsys *ss;
-       int ssid;
-
-       lockdep_assert_held(&css_set_lock);
-
-       if (!atomic_dec_and_test(&cset->refcount))
-               return;
-
-       /* This css_set is dead. unlink it and release cgroup and css refs */
-       for_each_subsys(ss, ssid) {
-               list_del(&cset->e_cset_node[ssid]);
-               css_put(cset->subsys[ssid]);
-       }
-       hash_del(&cset->hlist);
-       css_set_count--;
-
-       list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
-               list_del(&link->cset_link);
-               list_del(&link->cgrp_link);
-               if (cgroup_parent(link->cgrp))
-                       cgroup_put(link->cgrp);
-               kfree(link);
-       }
-
-       kfree_rcu(cset, rcu_head);
-}
-
-static void put_css_set(struct css_set *cset)
-{
-       unsigned long flags;
-
-       /*
-        * Ensure that the refcount doesn't hit zero while any readers
-        * can see it. Similar to atomic_dec_and_lock(), but for an
-        * rwlock
-        */
-       if (atomic_add_unless(&cset->refcount, -1, 1))
-               return;
-
-       spin_lock_irqsave(&css_set_lock, flags);
-       put_css_set_locked(cset);
-       spin_unlock_irqrestore(&css_set_lock, flags);
-}
-
-/*
- * refcounted get/put for css_set objects
- */
-static inline void get_css_set(struct css_set *cset)
-{
-       atomic_inc(&cset->refcount);
-}
-
-/**
- * compare_css_sets - helper function for find_existing_css_set().
- * @cset: candidate css_set being tested
- * @old_cset: existing css_set for a task
- * @new_cgrp: cgroup that's being entered by the task
- * @template: desired set of css pointers in css_set (pre-calculated)
- *
- * Returns true if "cset" matches "old_cset" except for the hierarchy
- * which "new_cgrp" belongs to, for which it should match "new_cgrp".
- */
-static bool compare_css_sets(struct css_set *cset,
-                            struct css_set *old_cset,
-                            struct cgroup *new_cgrp,
-                            struct cgroup_subsys_state *template[])
-{
-       struct list_head *l1, *l2;
-
-       /*
-        * On the default hierarchy, there can be csets which are
-        * associated with the same set of cgroups but different csses.
-        * Let's first ensure that csses match.
-        */
-       if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
-               return false;
-
-       /*
-        * Compare cgroup pointers in order to distinguish between
-        * different cgroups in hierarchies.  As different cgroups may
-        * share the same effective css, this comparison is always
-        * necessary.
-        */
-       l1 = &cset->cgrp_links;
-       l2 = &old_cset->cgrp_links;
-       while (1) {
-               struct cgrp_cset_link *link1, *link2;
-               struct cgroup *cgrp1, *cgrp2;
-
-               l1 = l1->next;
-               l2 = l2->next;
-               /* See if we reached the end - both lists are equal length. */
-               if (l1 == &cset->cgrp_links) {
-                       BUG_ON(l2 != &old_cset->cgrp_links);
-                       break;
-               } else {
-                       BUG_ON(l2 == &old_cset->cgrp_links);
-               }
-               /* Locate the cgroups associated with these links. */
-               link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
-               link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
-               cgrp1 = link1->cgrp;
-               cgrp2 = link2->cgrp;
-               /* Hierarchies should be linked in the same order. */
-               BUG_ON(cgrp1->root != cgrp2->root);
-
-               /*
-                * If this hierarchy is the hierarchy of the cgroup
-                * that's changing, then we need to check that this
-                * css_set points to the new cgroup; if it's any other
-                * hierarchy, then this css_set should point to the
-                * same cgroup as the old css_set.
-                */
-               if (cgrp1->root == new_cgrp->root) {
-                       if (cgrp1 != new_cgrp)
-                               return false;
-               } else {
-                       if (cgrp1 != cgrp2)
-                               return false;
-               }
-       }
-       return true;
-}
-
-/**
- * find_existing_css_set - init css array and find the matching css_set
- * @old_cset: the css_set that we're using before the cgroup transition
- * @cgrp: the cgroup that we're moving into
- * @template: out param for the new set of csses, should be clear on entry
- */
-static struct css_set *find_existing_css_set(struct css_set *old_cset,
-                                       struct cgroup *cgrp,
-                                       struct cgroup_subsys_state *template[])
-{
-       struct cgroup_root *root = cgrp->root;
-       struct cgroup_subsys *ss;
-       struct css_set *cset;
-       unsigned long key;
-       int i;
-
-       /*
-        * Build the set of subsystem state objects that we want to see in the
-        * new css_set. while subsystems can change globally, the entries here
-        * won't change, so no need for locking.
-        */
-       for_each_subsys(ss, i) {
-               if (root->subsys_mask & (1UL << i)) {
-                       /*
-                        * @ss is in this hierarchy, so we want the
-                        * effective css from @cgrp.
-                        */
-                       template[i] = cgroup_e_css(cgrp, ss);
-               } else {
-                       /*
-                        * @ss is not in this hierarchy, so we don't want
-                        * to change the css.
-                        */
-                       template[i] = old_cset->subsys[i];
-               }
-       }
-
-       key = css_set_hash(template);
-       hash_for_each_possible(css_set_table, cset, hlist, key) {
-               if (!compare_css_sets(cset, old_cset, cgrp, template))
-                       continue;
-
-               /* This css_set matches what we need */
-               return cset;
-       }
-
-       /* No existing cgroup group matched */
-       return NULL;
-}
-
-static void free_cgrp_cset_links(struct list_head *links_to_free)
-{
-       struct cgrp_cset_link *link, *tmp_link;
-
-       list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
-               list_del(&link->cset_link);
-               kfree(link);
-       }
-}
-
-/**
- * allocate_cgrp_cset_links - allocate cgrp_cset_links
- * @count: the number of links to allocate
- * @tmp_links: list_head the allocated links are put on
- *
- * Allocate @count cgrp_cset_link structures and chain them on @tmp_links
- * through ->cset_link.  Returns 0 on success or -errno.
- */
-static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
-{
-       struct cgrp_cset_link *link;
-       int i;
-
-       INIT_LIST_HEAD(tmp_links);
-
-       for (i = 0; i < count; i++) {
-               link = kzalloc(sizeof(*link), GFP_KERNEL);
-               if (!link) {
-                       free_cgrp_cset_links(tmp_links);
-                       return -ENOMEM;
-               }
-               list_add(&link->cset_link, tmp_links);
-       }
-       return 0;
-}
-
-/**
- * link_css_set - a helper function to link a css_set to a cgroup
- * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links()
- * @cset: the css_set to be linked
- * @cgrp: the destination cgroup
- */
-static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
-                        struct cgroup *cgrp)
-{
-       struct cgrp_cset_link *link;
-
-       BUG_ON(list_empty(tmp_links));
-
-       if (cgroup_on_dfl(cgrp))
-               cset->dfl_cgrp = cgrp;
-
-       link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
-       link->cset = cset;
-       link->cgrp = cgrp;
-
-       /*
-        * Always add links to the tail of the lists so that the lists are
-        * in choronological order.
-        */
-       list_move_tail(&link->cset_link, &cgrp->cset_links);
-       list_add_tail(&link->cgrp_link, &cset->cgrp_links);
-
-       if (cgroup_parent(cgrp))
-               cgroup_get(cgrp);
-}
-
-/**
- * find_css_set - return a new css_set with one cgroup updated
- * @old_cset: the baseline css_set
- * @cgrp: the cgroup to be updated
- *
- * Return a new css_set that's equivalent to @old_cset, but with @cgrp
- * substituted into the appropriate hierarchy.
- */
-static struct css_set *find_css_set(struct css_set *old_cset,
-                                   struct cgroup *cgrp)
-{
-       struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
-       struct css_set *cset;
-       struct list_head tmp_links;
-       struct cgrp_cset_link *link;
-       struct cgroup_subsys *ss;
-       unsigned long key;
-       int ssid;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       /* First see if we already have a cgroup group that matches
-        * the desired set */
-       spin_lock_irq(&css_set_lock);
-       cset = find_existing_css_set(old_cset, cgrp, template);
-       if (cset)
-               get_css_set(cset);
-       spin_unlock_irq(&css_set_lock);
-
-       if (cset)
-               return cset;
-
-       cset = kzalloc(sizeof(*cset), GFP_KERNEL);
-       if (!cset)
-               return NULL;
-
-       /* Allocate all the cgrp_cset_link objects that we'll need */
-       if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
-               kfree(cset);
-               return NULL;
-       }
-
-       atomic_set(&cset->refcount, 1);
-       INIT_LIST_HEAD(&cset->cgrp_links);
-       INIT_LIST_HEAD(&cset->tasks);
-       INIT_LIST_HEAD(&cset->mg_tasks);
-       INIT_LIST_HEAD(&cset->mg_preload_node);
-       INIT_LIST_HEAD(&cset->mg_node);
-       INIT_LIST_HEAD(&cset->task_iters);
-       INIT_HLIST_NODE(&cset->hlist);
-
-       /* Copy the set of subsystem state objects generated in
-        * find_existing_css_set() */
-       memcpy(cset->subsys, template, sizeof(cset->subsys));
-
-       spin_lock_irq(&css_set_lock);
-       /* Add reference counts and links from the new css_set. */
-       list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
-               struct cgroup *c = link->cgrp;
-
-               if (c->root == cgrp->root)
-                       c = cgrp;
-               link_css_set(&tmp_links, cset, c);
-       }
-
-       BUG_ON(!list_empty(&tmp_links));
-
-       css_set_count++;
-
-       /* Add @cset to the hash table */
-       key = css_set_hash(cset->subsys);
-       hash_add(css_set_table, &cset->hlist, key);
-
-       for_each_subsys(ss, ssid) {
-               struct cgroup_subsys_state *css = cset->subsys[ssid];
-
-               list_add_tail(&cset->e_cset_node[ssid],
-                             &css->cgroup->e_csets[ssid]);
-               css_get(css);
-       }
-
-       spin_unlock_irq(&css_set_lock);
-
-       return cset;
-}
-
-static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
-{
-       struct cgroup *root_cgrp = kf_root->kn->priv;
-
-       return root_cgrp->root;
-}
-
-static int cgroup_init_root_id(struct cgroup_root *root)
-{
-       int id;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
-       if (id < 0)
-               return id;
-
-       root->hierarchy_id = id;
-       return 0;
-}
-
-static void cgroup_exit_root_id(struct cgroup_root *root)
-{
-       lockdep_assert_held(&cgroup_mutex);
-
-       idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
-}
-
-static void cgroup_free_root(struct cgroup_root *root)
-{
-       if (root) {
-               idr_destroy(&root->cgroup_idr);
-               kfree(root);
-       }
-}
-
-static void cgroup_destroy_root(struct cgroup_root *root)
-{
-       struct cgroup *cgrp = &root->cgrp;
-       struct cgrp_cset_link *link, *tmp_link;
-
-       trace_cgroup_destroy_root(root);
-
-       cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
-
-       BUG_ON(atomic_read(&root->nr_cgrps));
-       BUG_ON(!list_empty(&cgrp->self.children));
-
-       /* Rebind all subsystems back to the default hierarchy */
-       WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
-
-       /*
-        * Release all the links from cset_links to this hierarchy's
-        * root cgroup
-        */
-       spin_lock_irq(&css_set_lock);
-
-       list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
-               list_del(&link->cset_link);
-               list_del(&link->cgrp_link);
-               kfree(link);
-       }
-
-       spin_unlock_irq(&css_set_lock);
-
-       if (!list_empty(&root->root_list)) {
-               list_del(&root->root_list);
-               cgroup_root_count--;
-       }
-
-       cgroup_exit_root_id(root);
-
-       mutex_unlock(&cgroup_mutex);
-
-       kernfs_destroy_root(root->kf_root);
-       cgroup_free_root(root);
-}
-
-/*
- * look up cgroup associated with current task's cgroup namespace on the
- * specified hierarchy
- */
-static struct cgroup *
-current_cgns_cgroup_from_root(struct cgroup_root *root)
-{
-       struct cgroup *res = NULL;
-       struct css_set *cset;
-
-       lockdep_assert_held(&css_set_lock);
-
-       rcu_read_lock();
-
-       cset = current->nsproxy->cgroup_ns->root_cset;
-       if (cset == &init_css_set) {
-               res = &root->cgrp;
-       } else {
-               struct cgrp_cset_link *link;
-
-               list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
-                       struct cgroup *c = link->cgrp;
-
-                       if (c->root == root) {
-                               res = c;
-                               break;
-                       }
-               }
-       }
-       rcu_read_unlock();
-
-       BUG_ON(!res);
-       return res;
-}
-
-/* look up cgroup associated with given css_set on the specified hierarchy */
-static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
-                                           struct cgroup_root *root)
-{
-       struct cgroup *res = NULL;
-
-       lockdep_assert_held(&cgroup_mutex);
-       lockdep_assert_held(&css_set_lock);
-
-       if (cset == &init_css_set) {
-               res = &root->cgrp;
-       } else {
-               struct cgrp_cset_link *link;
-
-               list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
-                       struct cgroup *c = link->cgrp;
-
-                       if (c->root == root) {
-                               res = c;
-                               break;
-                       }
-               }
-       }
-
-       BUG_ON(!res);
-       return res;
-}
-
-/*
- * Return the cgroup for "task" from the given hierarchy. Must be
- * called with cgroup_mutex and css_set_lock held.
- */
-static struct cgroup *task_cgroup_from_root(struct task_struct *task,
-                                           struct cgroup_root *root)
-{
-       /*
-        * No need to lock the task - since we hold cgroup_mutex the
-        * task can't change groups, so the only thing that can happen
-        * is that it exits and its css is set back to init_css_set.
-        */
-       return cset_cgroup_from_root(task_css_set(task), root);
-}
-
-/*
- * A task must hold cgroup_mutex to modify cgroups.
- *
- * Any task can increment and decrement the count field without lock.
- * So in general, code holding cgroup_mutex can't rely on the count
- * field not changing.  However, if the count goes to zero, then only
- * cgroup_attach_task() can increment it again.  Because a count of zero
- * means that no tasks are currently attached, therefore there is no
- * way a task attached to that cgroup can fork (the other way to
- * increment the count).  So code holding cgroup_mutex can safely
- * assume that if the count is zero, it will stay zero. Similarly, if
- * a task holds cgroup_mutex on a cgroup with zero count, it
- * knows that the cgroup won't be removed, as cgroup_rmdir()
- * needs that mutex.
- *
- * A cgroup can only be deleted if both its 'count' of using tasks
- * is zero, and its list of 'children' cgroups is empty.  Since all
- * tasks in the system use _some_ cgroup, and since there is always at
- * least one task in the system (init, pid == 1), therefore, root cgroup
- * always has either children cgroups and/or using tasks.  So we don't
- * need a special hack to ensure that root cgroup cannot be deleted.
- *
- * P.S.  One more locking exception.  RCU is used to guard the
- * update of a tasks cgroup pointer by cgroup_attach_task()
- */
-
-static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
-static const struct file_operations proc_cgroupstats_operations;
-
-static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
-                             char *buf)
-{
-       struct cgroup_subsys *ss = cft->ss;
-
-       if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
-           !(cgrp->root->flags & CGRP_ROOT_NOPREFIX))
-               snprintf(buf, CGROUP_FILE_NAME_MAX, "%s.%s",
-                        cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
-                        cft->name);
-       else
-               strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
-       return buf;
-}
-
-/**
- * cgroup_file_mode - deduce file mode of a control file
- * @cft: the control file in question
- *
- * S_IRUGO for read, S_IWUSR for write.
- */
-static umode_t cgroup_file_mode(const struct cftype *cft)
-{
-       umode_t mode = 0;
-
-       if (cft->read_u64 || cft->read_s64 || cft->seq_show)
-               mode |= S_IRUGO;
-
-       if (cft->write_u64 || cft->write_s64 || cft->write) {
-               if (cft->flags & CFTYPE_WORLD_WRITABLE)
-                       mode |= S_IWUGO;
-               else
-                       mode |= S_IWUSR;
-       }
-
-       return mode;
-}
-
-/**
- * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask
- * @subtree_control: the new subtree_control mask to consider
- * @this_ss_mask: available subsystems
- *
- * On the default hierarchy, a subsystem may request other subsystems to be
- * enabled together through its ->depends_on mask.  In such cases, more
- * subsystems than specified in "cgroup.subtree_control" may be enabled.
- *
- * This function calculates which subsystems need to be enabled if
- * @subtree_control is to be applied while restricted to @this_ss_mask.
- */
-static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
-{
-       u16 cur_ss_mask = subtree_control;
-       struct cgroup_subsys *ss;
-       int ssid;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
-
-       while (true) {
-               u16 new_ss_mask = cur_ss_mask;
-
-               do_each_subsys_mask(ss, ssid, cur_ss_mask) {
-                       new_ss_mask |= ss->depends_on;
-               } while_each_subsys_mask();
-
-               /*
-                * Mask out subsystems which aren't available.  This can
-                * happen only if some depended-upon subsystems were bound
-                * to non-default hierarchies.
-                */
-               new_ss_mask &= this_ss_mask;
-
-               if (new_ss_mask == cur_ss_mask)
-                       break;
-               cur_ss_mask = new_ss_mask;
-       }
-
-       return cur_ss_mask;
-}
-
-/**
- * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
- * @kn: the kernfs_node being serviced
- *
- * This helper undoes cgroup_kn_lock_live() and should be invoked before
- * the method finishes if locking succeeded.  Note that once this function
- * returns the cgroup returned by cgroup_kn_lock_live() may become
- * inaccessible any time.  If the caller intends to continue to access the
- * cgroup, it should pin it before invoking this function.
- */
-static void cgroup_kn_unlock(struct kernfs_node *kn)
-{
-       struct cgroup *cgrp;
-
-       if (kernfs_type(kn) == KERNFS_DIR)
-               cgrp = kn->priv;
-       else
-               cgrp = kn->parent->priv;
-
-       mutex_unlock(&cgroup_mutex);
-
-       kernfs_unbreak_active_protection(kn);
-       cgroup_put(cgrp);
-}
-
-/**
- * cgroup_kn_lock_live - locking helper for cgroup kernfs methods
- * @kn: the kernfs_node being serviced
- * @drain_offline: perform offline draining on the cgroup
- *
- * This helper is to be used by a cgroup kernfs method currently servicing
- * @kn.  It breaks the active protection, performs cgroup locking and
- * verifies that the associated cgroup is alive.  Returns the cgroup if
- * alive; otherwise, %NULL.  A successful return should be undone by a
- * matching cgroup_kn_unlock() invocation.  If @drain_offline is %true, the
- * cgroup is drained of offlining csses before return.
- *
- * Any cgroup kernfs method implementation which requires locking the
- * associated cgroup should use this helper.  It avoids nesting cgroup
- * locking under kernfs active protection and allows all kernfs operations
- * including self-removal.
- */
-static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn,
-                                         bool drain_offline)
-{
-       struct cgroup *cgrp;
-
-       if (kernfs_type(kn) == KERNFS_DIR)
-               cgrp = kn->priv;
-       else
-               cgrp = kn->parent->priv;
-
-       /*
-        * We're gonna grab cgroup_mutex which nests outside kernfs
-        * active_ref.  cgroup liveliness check alone provides enough
-        * protection against removal.  Ensure @cgrp stays accessible and
-        * break the active_ref protection.
-        */
-       if (!cgroup_tryget(cgrp))
-               return NULL;
-       kernfs_break_active_protection(kn);
-
-       if (drain_offline)
-               cgroup_lock_and_drain_offline(cgrp);
-       else
-               mutex_lock(&cgroup_mutex);
-
-       if (!cgroup_is_dead(cgrp))
-               return cgrp;
-
-       cgroup_kn_unlock(kn);
-       return NULL;
-}
-
-static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
-{
-       char name[CGROUP_FILE_NAME_MAX];
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       if (cft->file_offset) {
-               struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
-               struct cgroup_file *cfile = (void *)css + cft->file_offset;
-
-               spin_lock_irq(&cgroup_file_kn_lock);
-               cfile->kn = NULL;
-               spin_unlock_irq(&cgroup_file_kn_lock);
-       }
-
-       kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
-}
-
-/**
- * css_clear_dir - remove subsys files in a cgroup directory
- * @css: taget css
- */
-static void css_clear_dir(struct cgroup_subsys_state *css)
-{
-       struct cgroup *cgrp = css->cgroup;
-       struct cftype *cfts;
-
-       if (!(css->flags & CSS_VISIBLE))
-               return;
-
-       css->flags &= ~CSS_VISIBLE;
-
-       list_for_each_entry(cfts, &css->ss->cfts, node)
-               cgroup_addrm_files(css, cgrp, cfts, false);
-}
-
-/**
- * css_populate_dir - create subsys files in a cgroup directory
- * @css: target css
- *
- * On failure, no file is added.
- */
-static int css_populate_dir(struct cgroup_subsys_state *css)
-{
-       struct cgroup *cgrp = css->cgroup;
-       struct cftype *cfts, *failed_cfts;
-       int ret;
-
-       if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
-               return 0;
-
-       if (!css->ss) {
-               if (cgroup_on_dfl(cgrp))
-                       cfts = cgroup_dfl_base_files;
-               else
-                       cfts = cgroup_legacy_base_files;
-
-               return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
-       }
-
-       list_for_each_entry(cfts, &css->ss->cfts, node) {
-               ret = cgroup_addrm_files(css, cgrp, cfts, true);
-               if (ret < 0) {
-                       failed_cfts = cfts;
-                       goto err;
-               }
-       }
-
-       css->flags |= CSS_VISIBLE;
-
-       return 0;
-err:
-       list_for_each_entry(cfts, &css->ss->cfts, node) {
-               if (cfts == failed_cfts)
-                       break;
-               cgroup_addrm_files(css, cgrp, cfts, false);
-       }
-       return ret;
-}
-
-static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
-{
-       struct cgroup *dcgrp = &dst_root->cgrp;
-       struct cgroup_subsys *ss;
-       int ssid, i, ret;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       do_each_subsys_mask(ss, ssid, ss_mask) {
-               /*
-                * If @ss has non-root csses attached to it, can't move.
-                * If @ss is an implicit controller, it is exempt from this
-                * rule and can be stolen.
-                */
-               if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
-                   !ss->implicit_on_dfl)
-                       return -EBUSY;
-
-               /* can't move between two non-dummy roots either */
-               if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
-                       return -EBUSY;
-       } while_each_subsys_mask();
-
-       do_each_subsys_mask(ss, ssid, ss_mask) {
-               struct cgroup_root *src_root = ss->root;
-               struct cgroup *scgrp = &src_root->cgrp;
-               struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
-               struct css_set *cset;
-
-               WARN_ON(!css || cgroup_css(dcgrp, ss));
-
-               /* disable from the source */
-               src_root->subsys_mask &= ~(1 << ssid);
-               WARN_ON(cgroup_apply_control(scgrp));
-               cgroup_finalize_control(scgrp, 0);
-
-               /* rebind */
-               RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
-               rcu_assign_pointer(dcgrp->subsys[ssid], css);
-               ss->root = dst_root;
-               css->cgroup = dcgrp;
-
-               spin_lock_irq(&css_set_lock);
-               hash_for_each(css_set_table, i, cset, hlist)
-                       list_move_tail(&cset->e_cset_node[ss->id],
-                                      &dcgrp->e_csets[ss->id]);
-               spin_unlock_irq(&css_set_lock);
-
-               /* default hierarchy doesn't enable controllers by default */
-               dst_root->subsys_mask |= 1 << ssid;
-               if (dst_root == &cgrp_dfl_root) {
-                       static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
-               } else {
-                       dcgrp->subtree_control |= 1 << ssid;
-                       static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
-               }
-
-               ret = cgroup_apply_control(dcgrp);
-               if (ret)
-                       pr_warn("partial failure to rebind %s controller (err=%d)\n",
-                               ss->name, ret);
-
-               if (ss->bind)
-                       ss->bind(css);
-       } while_each_subsys_mask();
-
-       kernfs_activate(dcgrp->kn);
-       return 0;
-}
-
-static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
-                           struct kernfs_root *kf_root)
-{
-       int len = 0;
-       char *buf = NULL;
-       struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
-       struct cgroup *ns_cgroup;
-
-       buf = kmalloc(PATH_MAX, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-
-       spin_lock_irq(&css_set_lock);
-       ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
-       len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
-       spin_unlock_irq(&css_set_lock);
-
-       if (len >= PATH_MAX)
-               len = -ERANGE;
-       else if (len > 0) {
-               seq_escape(sf, buf, " \t\n\\");
-               len = 0;
-       }
-       kfree(buf);
-       return len;
-}
-
-static int cgroup_show_options(struct seq_file *seq,
-                              struct kernfs_root *kf_root)
-{
-       struct cgroup_root *root = cgroup_root_from_kf(kf_root);
-       struct cgroup_subsys *ss;
-       int ssid;
-
-       if (root != &cgrp_dfl_root)
-               for_each_subsys(ss, ssid)
-                       if (root->subsys_mask & (1 << ssid))
-                               seq_show_option(seq, ss->legacy_name, NULL);
-       if (root->flags & CGRP_ROOT_NOPREFIX)
-               seq_puts(seq, ",noprefix");
-       if (root->flags & CGRP_ROOT_XATTR)
-               seq_puts(seq, ",xattr");
-
-       spin_lock(&release_agent_path_lock);
-       if (strlen(root->release_agent_path))
-               seq_show_option(seq, "release_agent",
-                               root->release_agent_path);
-       spin_unlock(&release_agent_path_lock);
-
-       if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
-               seq_puts(seq, ",clone_children");
-       if (strlen(root->name))
-               seq_show_option(seq, "name", root->name);
-       return 0;
-}
-
-struct cgroup_sb_opts {
-       u16 subsys_mask;
-       unsigned int flags;
-       char *release_agent;
-       bool cpuset_clone_children;
-       char *name;
-       /* User explicitly requested empty subsystem */
-       bool none;
-};
-
-static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
-{
-       char *token, *o = data;
-       bool all_ss = false, one_ss = false;
-       u16 mask = U16_MAX;
-       struct cgroup_subsys *ss;
-       int nr_opts = 0;
-       int i;
-
-#ifdef CONFIG_CPUSETS
-       mask = ~((u16)1 << cpuset_cgrp_id);
-#endif
-
-       memset(opts, 0, sizeof(*opts));
-
-       while ((token = strsep(&o, ",")) != NULL) {
-               nr_opts++;
-
-               if (!*token)
-                       return -EINVAL;
-               if (!strcmp(token, "none")) {
-                       /* Explicitly have no subsystems */
-                       opts->none = true;
-                       continue;
-               }
-               if (!strcmp(token, "all")) {
-                       /* Mutually exclusive option 'all' + subsystem name */
-                       if (one_ss)
-                               return -EINVAL;
-                       all_ss = true;
-                       continue;
-               }
-               if (!strcmp(token, "noprefix")) {
-                       opts->flags |= CGRP_ROOT_NOPREFIX;
-                       continue;
-               }
-               if (!strcmp(token, "clone_children")) {
-                       opts->cpuset_clone_children = true;
-                       continue;
-               }
-               if (!strcmp(token, "xattr")) {
-                       opts->flags |= CGRP_ROOT_XATTR;
-                       continue;
-               }
-               if (!strncmp(token, "release_agent=", 14)) {
-                       /* Specifying two release agents is forbidden */
-                       if (opts->release_agent)
-                               return -EINVAL;
-                       opts->release_agent =
-                               kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
-                       if (!opts->release_agent)
-                               return -ENOMEM;
-                       continue;
-               }
-               if (!strncmp(token, "name=", 5)) {
-                       const char *name = token + 5;
-                       /* Can't specify an empty name */
-                       if (!strlen(name))
-                               return -EINVAL;
-                       /* Must match [\w.-]+ */
-                       for (i = 0; i < strlen(name); i++) {
-                               char c = name[i];
-                               if (isalnum(c))
-                                       continue;
-                               if ((c == '.') || (c == '-') || (c == '_'))
-                                       continue;
-                               return -EINVAL;
-                       }
-                       /* Specifying two names is forbidden */
-                       if (opts->name)
-                               return -EINVAL;
-                       opts->name = kstrndup(name,
-                                             MAX_CGROUP_ROOT_NAMELEN - 1,
-                                             GFP_KERNEL);
-                       if (!opts->name)
-                               return -ENOMEM;
-
-                       continue;
-               }
-
-               for_each_subsys(ss, i) {
-                       if (strcmp(token, ss->legacy_name))
-                               continue;
-                       if (!cgroup_ssid_enabled(i))
-                               continue;
-                       if (cgroup_ssid_no_v1(i))
-                               continue;
-
-                       /* Mutually exclusive option 'all' + subsystem name */
-                       if (all_ss)
-                               return -EINVAL;
-                       opts->subsys_mask |= (1 << i);
-                       one_ss = true;
-
-                       break;
-               }
-               if (i == CGROUP_SUBSYS_COUNT)
-                       return -ENOENT;
-       }
-
-       /*
-        * If the 'all' option was specified select all the subsystems,
-        * otherwise if 'none', 'name=' and a subsystem name options were
-        * not specified, let's default to 'all'
-        */
-       if (all_ss || (!one_ss && !opts->none && !opts->name))
-               for_each_subsys(ss, i)
-                       if (cgroup_ssid_enabled(i) && !cgroup_ssid_no_v1(i))
-                               opts->subsys_mask |= (1 << i);
-
-       /*
-        * We either have to specify by name or by subsystems. (So all
-        * empty hierarchies must have a name).
-        */
-       if (!opts->subsys_mask && !opts->name)
-               return -EINVAL;
-
-       /*
-        * Option noprefix was introduced just for backward compatibility
-        * with the old cpuset, so we allow noprefix only if mounting just
-        * the cpuset subsystem.
-        */
-       if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
-               return -EINVAL;
-
-       /* Can't specify "none" and some subsystems */
-       if (opts->subsys_mask && opts->none)
-               return -EINVAL;
-
-       return 0;
-}
-
-static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
-{
-       int ret = 0;
-       struct cgroup_root *root = cgroup_root_from_kf(kf_root);
-       struct cgroup_sb_opts opts;
-       u16 added_mask, removed_mask;
-
-       if (root == &cgrp_dfl_root) {
-               pr_err("remount is not allowed\n");
-               return -EINVAL;
-       }
-
-       cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
-
-       /* See what subsystems are wanted */
-       ret = parse_cgroupfs_options(data, &opts);
-       if (ret)
-               goto out_unlock;
-
-       if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
-               pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
-                       task_tgid_nr(current), current->comm);
-
-       added_mask = opts.subsys_mask & ~root->subsys_mask;
-       removed_mask = root->subsys_mask & ~opts.subsys_mask;
-
-       /* Don't allow flags or name to change at remount */
-       if ((opts.flags ^ root->flags) ||
-           (opts.name && strcmp(opts.name, root->name))) {
-               pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
-                      opts.flags, opts.name ?: "", root->flags, root->name);
-               ret = -EINVAL;
-               goto out_unlock;
-       }
-
-       /* remounting is not allowed for populated hierarchies */
-       if (!list_empty(&root->cgrp.self.children)) {
-               ret = -EBUSY;
-               goto out_unlock;
-       }
-
-       ret = rebind_subsystems(root, added_mask);
-       if (ret)
-               goto out_unlock;
-
-       WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
-
-       if (opts.release_agent) {
-               spin_lock(&release_agent_path_lock);
-               strcpy(root->release_agent_path, opts.release_agent);
-               spin_unlock(&release_agent_path_lock);
-       }
-
-       trace_cgroup_remount(root);
-
- out_unlock:
-       kfree(opts.release_agent);
-       kfree(opts.name);
-       mutex_unlock(&cgroup_mutex);
-       return ret;
-}
-
-/*
- * To reduce the fork() overhead for systems that are not actually using
- * their cgroups capability, we don't maintain the lists running through
- * each css_set to its tasks until we see the list actually used - in other
- * words after the first mount.
- */
-static bool use_task_css_set_links __read_mostly;
-
-static void cgroup_enable_task_cg_lists(void)
-{
-       struct task_struct *p, *g;
-
-       spin_lock_irq(&css_set_lock);
-
-       if (use_task_css_set_links)
-               goto out_unlock;
-
-       use_task_css_set_links = true;
-
-       /*
-        * We need tasklist_lock because RCU is not safe against
-        * while_each_thread(). Besides, a forking task that has passed
-        * cgroup_post_fork() without seeing use_task_css_set_links = 1
-        * is not guaranteed to have its child immediately visible in the
-        * tasklist if we walk through it with RCU.
-        */
-       read_lock(&tasklist_lock);
-       do_each_thread(g, p) {
-               WARN_ON_ONCE(!list_empty(&p->cg_list) ||
-                            task_css_set(p) != &init_css_set);
-
-               /*
-                * We should check if the process is exiting, otherwise
-                * it will race with cgroup_exit() in that the list
-                * entry won't be deleted though the process has exited.
-                * Do it while holding siglock so that we don't end up
-                * racing against cgroup_exit().
-                *
-                * Interrupts were already disabled while acquiring
-                * the css_set_lock, so we do not need to disable it
-                * again when acquiring the sighand->siglock here.
-                */
-               spin_lock(&p->sighand->siglock);
-               if (!(p->flags & PF_EXITING)) {
-                       struct css_set *cset = task_css_set(p);
-
-                       if (!css_set_populated(cset))
-                               css_set_update_populated(cset, true);
-                       list_add_tail(&p->cg_list, &cset->tasks);
-                       get_css_set(cset);
-               }
-               spin_unlock(&p->sighand->siglock);
-       } while_each_thread(g, p);
-       read_unlock(&tasklist_lock);
-out_unlock:
-       spin_unlock_irq(&css_set_lock);
-}
-
-static void init_cgroup_housekeeping(struct cgroup *cgrp)
-{
-       struct cgroup_subsys *ss;
-       int ssid;
-
-       INIT_LIST_HEAD(&cgrp->self.sibling);
-       INIT_LIST_HEAD(&cgrp->self.children);
-       INIT_LIST_HEAD(&cgrp->cset_links);
-       INIT_LIST_HEAD(&cgrp->pidlists);
-       mutex_init(&cgrp->pidlist_mutex);
-       cgrp->self.cgroup = cgrp;
-       cgrp->self.flags |= CSS_ONLINE;
-
-       for_each_subsys(ss, ssid)
-               INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
-
-       init_waitqueue_head(&cgrp->offline_waitq);
-       INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
-}
-
-static void init_cgroup_root(struct cgroup_root *root,
-                            struct cgroup_sb_opts *opts)
-{
-       struct cgroup *cgrp = &root->cgrp;
-
-       INIT_LIST_HEAD(&root->root_list);
-       atomic_set(&root->nr_cgrps, 1);
-       cgrp->root = root;
-       init_cgroup_housekeeping(cgrp);
-       idr_init(&root->cgroup_idr);
-
-       root->flags = opts->flags;
-       if (opts->release_agent)
-               strcpy(root->release_agent_path, opts->release_agent);
-       if (opts->name)
-               strcpy(root->name, opts->name);
-       if (opts->cpuset_clone_children)
-               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
-}
-
-static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
-{
-       LIST_HEAD(tmp_links);
-       struct cgroup *root_cgrp = &root->cgrp;
-       struct css_set *cset;
-       int i, ret;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
-       if (ret < 0)
-               goto out;
-       root_cgrp->id = ret;
-       root_cgrp->ancestor_ids[0] = ret;
-
-       ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
-                             GFP_KERNEL);
-       if (ret)
-               goto out;
-
-       /*
-        * We're accessing css_set_count without locking css_set_lock here,
-        * but that's OK - it can only be increased by someone holding
-        * cgroup_lock, and that's us.  Later rebinding may disable
-        * controllers on the default hierarchy and thus create new csets,
-        * which can't be more than the existing ones.  Allocate 2x.
-        */
-       ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
-       if (ret)
-               goto cancel_ref;
-
-       ret = cgroup_init_root_id(root);
-       if (ret)
-               goto cancel_ref;
-
-       root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
-                                          KERNFS_ROOT_CREATE_DEACTIVATED,
-                                          root_cgrp);
-       if (IS_ERR(root->kf_root)) {
-               ret = PTR_ERR(root->kf_root);
-               goto exit_root_id;
-       }
-       root_cgrp->kn = root->kf_root->kn;
-
-       ret = css_populate_dir(&root_cgrp->self);
-       if (ret)
-               goto destroy_root;
-
-       ret = rebind_subsystems(root, ss_mask);
-       if (ret)
-               goto destroy_root;
-
-       trace_cgroup_setup_root(root);
-
-       /*
-        * There must be no failure case after here, since rebinding takes
-        * care of subsystems' refcounts, which are explicitly dropped in
-        * the failure exit path.
-        */
-       list_add(&root->root_list, &cgroup_roots);
-       cgroup_root_count++;
-
-       /*
-        * Link the root cgroup in this hierarchy into all the css_set
-        * objects.
-        */
-       spin_lock_irq(&css_set_lock);
-       hash_for_each(css_set_table, i, cset, hlist) {
-               link_css_set(&tmp_links, cset, root_cgrp);
-               if (css_set_populated(cset))
-                       cgroup_update_populated(root_cgrp, true);
-       }
-       spin_unlock_irq(&css_set_lock);
-
-       BUG_ON(!list_empty(&root_cgrp->self.children));
-       BUG_ON(atomic_read(&root->nr_cgrps) != 1);
-
-       kernfs_activate(root_cgrp->kn);
-       ret = 0;
-       goto out;
-
-destroy_root:
-       kernfs_destroy_root(root->kf_root);
-       root->kf_root = NULL;
-exit_root_id:
-       cgroup_exit_root_id(root);
-cancel_ref:
-       percpu_ref_exit(&root_cgrp->self.refcnt);
-out:
-       free_cgrp_cset_links(&tmp_links);
-       return ret;
-}
-
-static struct dentry *cgroup_mount(struct file_system_type *fs_type,
-                        int flags, const char *unused_dev_name,
-                        void *data)
-{
-       bool is_v2 = fs_type == &cgroup2_fs_type;
-       struct super_block *pinned_sb = NULL;
-       struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
-       struct cgroup_subsys *ss;
-       struct cgroup_root *root;
-       struct cgroup_sb_opts opts;
-       struct dentry *dentry;
-       int ret;
-       int i;
-       bool new_sb;
-
-       get_cgroup_ns(ns);
-
-       /* Check if the caller has permission to mount. */
-       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
-               put_cgroup_ns(ns);
-               return ERR_PTR(-EPERM);
-       }
-
-       /*
-        * The first time anyone tries to mount a cgroup, enable the list
-        * linking each css_set to its tasks and fix up all existing tasks.
-        */
-       if (!use_task_css_set_links)
-               cgroup_enable_task_cg_lists();
-
-       if (is_v2) {
-               if (data) {
-                       pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
-                       put_cgroup_ns(ns);
-                       return ERR_PTR(-EINVAL);
-               }
-               cgrp_dfl_visible = true;
-               root = &cgrp_dfl_root;
-               cgroup_get(&root->cgrp);
-               goto out_mount;
-       }
-
-       cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
-
-       /* First find the desired set of subsystems */
-       ret = parse_cgroupfs_options(data, &opts);
-       if (ret)
-               goto out_unlock;
-
-       /*
-        * Destruction of cgroup root is asynchronous, so subsystems may
-        * still be dying after the previous unmount.  Let's drain the
-        * dying subsystems.  We just need to ensure that the ones
-        * unmounted previously finish dying and don't care about new ones
-        * starting.  Testing ref liveliness is good enough.
-        */
-       for_each_subsys(ss, i) {
-               if (!(opts.subsys_mask & (1 << i)) ||
-                   ss->root == &cgrp_dfl_root)
-                       continue;
-
-               if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
-                       mutex_unlock(&cgroup_mutex);
-                       msleep(10);
-                       ret = restart_syscall();
-                       goto out_free;
-               }
-               cgroup_put(&ss->root->cgrp);
-       }
-
-       for_each_root(root) {
-               bool name_match = false;
-
-               if (root == &cgrp_dfl_root)
-                       continue;
-
-               /*
-                * If we asked for a name then it must match.  Also, if
-                * name matches but sybsys_mask doesn't, we should fail.
-                * Remember whether name matched.
-                */
-               if (opts.name) {
-                       if (strcmp(opts.name, root->name))
-                               continue;
-                       name_match = true;
-               }
-
-               /*
-                * If we asked for subsystems (or explicitly for no
-                * subsystems) then they must match.
-                */
-               if ((opts.subsys_mask || opts.none) &&
-                   (opts.subsys_mask != root->subsys_mask)) {
-                       if (!name_match)
-                               continue;
-                       ret = -EBUSY;
-                       goto out_unlock;
-               }
-
-               if (root->flags ^ opts.flags)
-                       pr_warn("new mount options do not match the existing superblock, will be ignored\n");
-
-               /*
-                * We want to reuse @root whose lifetime is governed by its
-                * ->cgrp.  Let's check whether @root is alive and keep it
-                * that way.  As cgroup_kill_sb() can happen anytime, we
-                * want to block it by pinning the sb so that @root doesn't
-                * get killed before mount is complete.
-                *
-                * With the sb pinned, tryget_live can reliably indicate
-                * whether @root can be reused.  If it's being killed,
-                * drain it.  We can use wait_queue for the wait but this
-                * path is super cold.  Let's just sleep a bit and retry.
-                */
-               pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
-               if (IS_ERR(pinned_sb) ||
-                   !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
-                       mutex_unlock(&cgroup_mutex);
-                       if (!IS_ERR_OR_NULL(pinned_sb))
-                               deactivate_super(pinned_sb);
-                       msleep(10);
-                       ret = restart_syscall();
-                       goto out_free;
-               }
-
-               ret = 0;
-               goto out_unlock;
-       }
-
-       /*
-        * No such thing, create a new one.  name= matching without subsys
-        * specification is allowed for already existing hierarchies but we
-        * can't create new one without subsys specification.
-        */
-       if (!opts.subsys_mask && !opts.none) {
-               ret = -EINVAL;
-               goto out_unlock;
-       }
-
-       /* Hierarchies may only be created in the initial cgroup namespace. */
-       if (ns != &init_cgroup_ns) {
-               ret = -EPERM;
-               goto out_unlock;
-       }
-
-       root = kzalloc(sizeof(*root), GFP_KERNEL);
-       if (!root) {
-               ret = -ENOMEM;
-               goto out_unlock;
-       }
-
-       init_cgroup_root(root, &opts);
-
-       ret = cgroup_setup_root(root, opts.subsys_mask);
-       if (ret)
-               cgroup_free_root(root);
-
-out_unlock:
-       mutex_unlock(&cgroup_mutex);
-out_free:
-       kfree(opts.release_agent);
-       kfree(opts.name);
-
-       if (ret) {
-               put_cgroup_ns(ns);
-               return ERR_PTR(ret);
-       }
-out_mount:
-       dentry = kernfs_mount(fs_type, flags, root->kf_root,
-                             is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
-                             &new_sb);
-
-       /*
-        * In non-init cgroup namespace, instead of root cgroup's
-        * dentry, we return the dentry corresponding to the
-        * cgroupns->root_cgrp.
-        */
-       if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
-               struct dentry *nsdentry;
-               struct cgroup *cgrp;
-
-               mutex_lock(&cgroup_mutex);
-               spin_lock_irq(&css_set_lock);
-
-               cgrp = cset_cgroup_from_root(ns->root_cset, root);
-
-               spin_unlock_irq(&css_set_lock);
-               mutex_unlock(&cgroup_mutex);
-
-               nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
-               dput(dentry);
-               dentry = nsdentry;
-       }
-
-       if (IS_ERR(dentry) || !new_sb)
-               cgroup_put(&root->cgrp);
-
-       /*
-        * If @pinned_sb, we're reusing an existing root and holding an
-        * extra ref on its sb.  Mount is complete.  Put the extra ref.
-        */
-       if (pinned_sb) {
-               WARN_ON(new_sb);
-               deactivate_super(pinned_sb);
-       }
-
-       put_cgroup_ns(ns);
-       return dentry;
-}
-
-static void cgroup_kill_sb(struct super_block *sb)
-{
-       struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
-       struct cgroup_root *root = cgroup_root_from_kf(kf_root);
-
-       /*
-        * If @root doesn't have any mounts or children, start killing it.
-        * This prevents new mounts by disabling percpu_ref_tryget_live().
-        * cgroup_mount() may wait for @root's release.
-        *
-        * And don't kill the default root.
-        */
-       if (!list_empty(&root->cgrp.self.children) ||
-           root == &cgrp_dfl_root)
-               cgroup_put(&root->cgrp);
-       else
-               percpu_ref_kill(&root->cgrp.self.refcnt);
-
-       kernfs_kill_sb(sb);
-}
-
-static struct file_system_type cgroup_fs_type = {
-       .name = "cgroup",
-       .mount = cgroup_mount,
-       .kill_sb = cgroup_kill_sb,
-       .fs_flags = FS_USERNS_MOUNT,
-};
-
-static struct file_system_type cgroup2_fs_type = {
-       .name = "cgroup2",
-       .mount = cgroup_mount,
-       .kill_sb = cgroup_kill_sb,
-       .fs_flags = FS_USERNS_MOUNT,
-};
-
-static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
-                                struct cgroup_namespace *ns)
-{
-       struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
-
-       return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
-}
-
-int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-                  struct cgroup_namespace *ns)
-{
-       int ret;
-
-       mutex_lock(&cgroup_mutex);
-       spin_lock_irq(&css_set_lock);
-
-       ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
-
-       spin_unlock_irq(&css_set_lock);
-       mutex_unlock(&cgroup_mutex);
-
-       return ret;
-}
-EXPORT_SYMBOL_GPL(cgroup_path_ns);
-
-/**
- * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
- * @task: target task
- * @buf: the buffer to write the path into
- * @buflen: the length of the buffer
- *
- * Determine @task's cgroup on the first (the one with the lowest non-zero
- * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
- * function grabs cgroup_mutex and shouldn't be used inside locks used by
- * cgroup controller callbacks.
- *
- * Return value is the same as kernfs_path().
- */
-int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
-{
-       struct cgroup_root *root;
-       struct cgroup *cgrp;
-       int hierarchy_id = 1;
-       int ret;
-
-       mutex_lock(&cgroup_mutex);
-       spin_lock_irq(&css_set_lock);
-
-       root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
-
-       if (root) {
-               cgrp = task_cgroup_from_root(task, root);
-               ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
-       } else {
-               /* if no hierarchy exists, everyone is in "/" */
-               ret = strlcpy(buf, "/", buflen);
-       }
-
-       spin_unlock_irq(&css_set_lock);
-       mutex_unlock(&cgroup_mutex);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(task_cgroup_path);
-
-/* used to track tasks and other necessary states during migration */
-struct cgroup_taskset {
-       /* the src and dst cset list running through cset->mg_node */
-       struct list_head        src_csets;
-       struct list_head        dst_csets;
-
-       /* the subsys currently being processed */
-       int                     ssid;
-
-       /*
-        * Fields for cgroup_taskset_*() iteration.
-        *
-        * Before migration is committed, the target migration tasks are on
-        * ->mg_tasks of the csets on ->src_csets.  After, on ->mg_tasks of
-        * the csets on ->dst_csets.  ->csets point to either ->src_csets
-        * or ->dst_csets depending on whether migration is committed.
-        *
-        * ->cur_csets and ->cur_task point to the current task position
-        * during iteration.
-        */
-       struct list_head        *csets;
-       struct css_set          *cur_cset;
-       struct task_struct      *cur_task;
-};
-
-#define CGROUP_TASKSET_INIT(tset)      (struct cgroup_taskset){        \
-       .src_csets              = LIST_HEAD_INIT(tset.src_csets),       \
-       .dst_csets              = LIST_HEAD_INIT(tset.dst_csets),       \
-       .csets                  = &tset.src_csets,                      \
-}
-
-/**
- * cgroup_taskset_add - try to add a migration target task to a taskset
- * @task: target task
- * @tset: target taskset
- *
- * Add @task, which is a migration target, to @tset.  This function becomes
- * noop if @task doesn't need to be migrated.  @task's css_set should have
- * been added as a migration source and @task->cg_list will be moved from
- * the css_set's tasks list to mg_tasks one.
- */
-static void cgroup_taskset_add(struct task_struct *task,
-                              struct cgroup_taskset *tset)
-{
-       struct css_set *cset;
-
-       lockdep_assert_held(&css_set_lock);
-
-       /* @task either already exited or can't exit until the end */
-       if (task->flags & PF_EXITING)
-               return;
-
-       /* leave @task alone if post_fork() hasn't linked it yet */
-       if (list_empty(&task->cg_list))
-               return;
-
-       cset = task_css_set(task);
-       if (!cset->mg_src_cgrp)
-               return;
-
-       list_move_tail(&task->cg_list, &cset->mg_tasks);
-       if (list_empty(&cset->mg_node))
-               list_add_tail(&cset->mg_node, &tset->src_csets);
-       if (list_empty(&cset->mg_dst_cset->mg_node))
-               list_move_tail(&cset->mg_dst_cset->mg_node,
-                              &tset->dst_csets);
-}
-
-/**
- * cgroup_taskset_first - reset taskset and return the first task
- * @tset: taskset of interest
- * @dst_cssp: output variable for the destination css
- *
- * @tset iteration is initialized and the first task is returned.
- */
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
-                                        struct cgroup_subsys_state **dst_cssp)
-{
-       tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
-       tset->cur_task = NULL;
-
-       return cgroup_taskset_next(tset, dst_cssp);
-}
-
-/**
- * cgroup_taskset_next - iterate to the next task in taskset
- * @tset: taskset of interest
- * @dst_cssp: output variable for the destination css
- *
- * Return the next task in @tset.  Iteration must have been initialized
- * with cgroup_taskset_first().
- */
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
-                                       struct cgroup_subsys_state **dst_cssp)
-{
-       struct css_set *cset = tset->cur_cset;
-       struct task_struct *task = tset->cur_task;
-
-       while (&cset->mg_node != tset->csets) {
-               if (!task)
-                       task = list_first_entry(&cset->mg_tasks,
-                                               struct task_struct, cg_list);
-               else
-                       task = list_next_entry(task, cg_list);
-
-               if (&task->cg_list != &cset->mg_tasks) {
-                       tset->cur_cset = cset;
-                       tset->cur_task = task;
-
-                       /*
-                        * This function may be called both before and
-                        * after cgroup_taskset_migrate().  The two cases
-                        * can be distinguished by looking at whether @cset
-                        * has its ->mg_dst_cset set.
-                        */
-                       if (cset->mg_dst_cset)
-                               *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
-                       else
-                               *dst_cssp = cset->subsys[tset->ssid];
-
-                       return task;
-               }
-
-               cset = list_next_entry(cset, mg_node);
-               task = NULL;
-       }
-
-       return NULL;
-}
-
-/**
- * cgroup_taskset_migrate - migrate a taskset
- * @tset: taget taskset
- * @root: cgroup root the migration is taking place on
- *
- * Migrate tasks in @tset as setup by migration preparation functions.
- * This function fails iff one of the ->can_attach callbacks fails and
- * guarantees that either all or none of the tasks in @tset are migrated.
- * @tset is consumed regardless of success.
- */
-static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
-                                 struct cgroup_root *root)
-{
-       struct cgroup_subsys *ss;
-       struct task_struct *task, *tmp_task;
-       struct css_set *cset, *tmp_cset;
-       int ssid, failed_ssid, ret;
-
-       /* methods shouldn't be called if no task is actually migrating */
-       if (list_empty(&tset->src_csets))
-               return 0;
-
-       /* check that we can legitimately attach to the cgroup */
-       do_each_subsys_mask(ss, ssid, root->subsys_mask) {
-               if (ss->can_attach) {
-                       tset->ssid = ssid;
-                       ret = ss->can_attach(tset);
-                       if (ret) {
-                               failed_ssid = ssid;
-                               goto out_cancel_attach;
-                       }
-               }
-       } while_each_subsys_mask();
-
-       /*
-        * Now that we're guaranteed success, proceed to move all tasks to
-        * the new cgroup.  There are no failure cases after here, so this
-        * is the commit point.
-        */
-       spin_lock_irq(&css_set_lock);
-       list_for_each_entry(cset, &tset->src_csets, mg_node) {
-               list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
-                       struct css_set *from_cset = task_css_set(task);
-                       struct css_set *to_cset = cset->mg_dst_cset;
-
-                       get_css_set(to_cset);
-                       css_set_move_task(task, from_cset, to_cset, true);
-                       put_css_set_locked(from_cset);
-               }
-       }
-       spin_unlock_irq(&css_set_lock);
-
-       /*
-        * Migration is committed, all target tasks are now on dst_csets.
-        * Nothing is sensitive to fork() after this point.  Notify
-        * controllers that migration is complete.
-        */
-       tset->csets = &tset->dst_csets;
-
-       do_each_subsys_mask(ss, ssid, root->subsys_mask) {
-               if (ss->attach) {
-                       tset->ssid = ssid;
-                       ss->attach(tset);
-               }
-       } while_each_subsys_mask();
-
-       ret = 0;
-       goto out_release_tset;
-
-out_cancel_attach:
-       do_each_subsys_mask(ss, ssid, root->subsys_mask) {
-               if (ssid == failed_ssid)
-                       break;
-               if (ss->cancel_attach) {
-                       tset->ssid = ssid;
-                       ss->cancel_attach(tset);
-               }
-       } while_each_subsys_mask();
-out_release_tset:
-       spin_lock_irq(&css_set_lock);
-       list_splice_init(&tset->dst_csets, &tset->src_csets);
-       list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
-               list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
-               list_del_init(&cset->mg_node);
-       }
-       spin_unlock_irq(&css_set_lock);
-       return ret;
-}
-
-/**
- * cgroup_may_migrate_to - verify whether a cgroup can be migration destination
- * @dst_cgrp: destination cgroup to test
- *
- * On the default hierarchy, except for the root, subtree_control must be
- * zero for migration destination cgroups with tasks so that child cgroups
- * don't compete against tasks.
- */
-static bool cgroup_may_migrate_to(struct cgroup *dst_cgrp)
-{
-       return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) ||
-               !dst_cgrp->subtree_control;
-}
-
-/**
- * cgroup_migrate_finish - cleanup after attach
- * @preloaded_csets: list of preloaded css_sets
- *
- * Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst().  See
- * those functions for details.
- */
-static void cgroup_migrate_finish(struct list_head *preloaded_csets)
-{
-       struct css_set *cset, *tmp_cset;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       spin_lock_irq(&css_set_lock);
-       list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
-               cset->mg_src_cgrp = NULL;
-               cset->mg_dst_cgrp = NULL;
-               cset->mg_dst_cset = NULL;
-               list_del_init(&cset->mg_preload_node);
-               put_css_set_locked(cset);
-       }
-       spin_unlock_irq(&css_set_lock);
-}
-
-/**
- * cgroup_migrate_add_src - add a migration source css_set
- * @src_cset: the source css_set to add
- * @dst_cgrp: the destination cgroup
- * @preloaded_csets: list of preloaded css_sets
- *
- * Tasks belonging to @src_cset are about to be migrated to @dst_cgrp.  Pin
- * @src_cset and add it to @preloaded_csets, which should later be cleaned
- * up by cgroup_migrate_finish().
- *
- * This function may be called without holding cgroup_threadgroup_rwsem
- * even if the target is a process.  Threads may be created and destroyed
- * but as long as cgroup_mutex is not dropped, no new css_set can be put
- * into play and the preloaded css_sets are guaranteed to cover all
- * migrations.
- */
-static void cgroup_migrate_add_src(struct css_set *src_cset,
-                                  struct cgroup *dst_cgrp,
-                                  struct list_head *preloaded_csets)
-{
-       struct cgroup *src_cgrp;
-
-       lockdep_assert_held(&cgroup_mutex);
-       lockdep_assert_held(&css_set_lock);
-
-       /*
-        * If ->dead, @src_set is associated with one or more dead cgroups
-        * and doesn't contain any migratable tasks.  Ignore it early so
-        * that the rest of migration path doesn't get confused by it.
-        */
-       if (src_cset->dead)
-               return;
-
-       src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
-
-       if (!list_empty(&src_cset->mg_preload_node))
-               return;
-
-       WARN_ON(src_cset->mg_src_cgrp);
-       WARN_ON(src_cset->mg_dst_cgrp);
-       WARN_ON(!list_empty(&src_cset->mg_tasks));
-       WARN_ON(!list_empty(&src_cset->mg_node));
-
-       src_cset->mg_src_cgrp = src_cgrp;
-       src_cset->mg_dst_cgrp = dst_cgrp;
-       get_css_set(src_cset);
-       list_add(&src_cset->mg_preload_node, preloaded_csets);
-}
-
-/**
- * cgroup_migrate_prepare_dst - prepare destination css_sets for migration
- * @preloaded_csets: list of preloaded source css_sets
- *
- * Tasks are about to be moved and all the source css_sets have been
- * preloaded to @preloaded_csets.  This function looks up and pins all
- * destination css_sets, links each to its source, and append them to
- * @preloaded_csets.
- *
- * This function must be called after cgroup_migrate_add_src() has been
- * called on each migration source css_set.  After migration is performed
- * using cgroup_migrate(), cgroup_migrate_finish() must be called on
- * @preloaded_csets.
- */
-static int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
-{
-       LIST_HEAD(csets);
-       struct css_set *src_cset, *tmp_cset;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       /* look up the dst cset for each src cset and link it to src */
-       list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
-               struct css_set *dst_cset;
-
-               dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
-               if (!dst_cset)
-                       goto err;
-
-               WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
-
-               /*
-                * If src cset equals dst, it's noop.  Drop the src.
-                * cgroup_migrate() will skip the cset too.  Note that we
-                * can't handle src == dst as some nodes are used by both.
-                */
-               if (src_cset == dst_cset) {
-                       src_cset->mg_src_cgrp = NULL;
-                       src_cset->mg_dst_cgrp = NULL;
-                       list_del_init(&src_cset->mg_preload_node);
-                       put_css_set(src_cset);
-                       put_css_set(dst_cset);
-                       continue;
-               }
-
-               src_cset->mg_dst_cset = dst_cset;
-
-               if (list_empty(&dst_cset->mg_preload_node))
-                       list_add(&dst_cset->mg_preload_node, &csets);
-               else
-                       put_css_set(dst_cset);
-       }
-
-       list_splice_tail(&csets, preloaded_csets);
-       return 0;
-err:
-       cgroup_migrate_finish(&csets);
-       return -ENOMEM;
-}
-
-/**
- * cgroup_migrate - migrate a process or task to a cgroup
- * @leader: the leader of the process or the task to migrate
- * @threadgroup: whether @leader points to the whole process or a single task
- * @root: cgroup root migration is taking place on
- *
- * Migrate a process or task denoted by @leader.  If migrating a process,
- * the caller must be holding cgroup_threadgroup_rwsem.  The caller is also
- * responsible for invoking cgroup_migrate_add_src() and
- * cgroup_migrate_prepare_dst() on the targets before invoking this
- * function and following up with cgroup_migrate_finish().
- *
- * As long as a controller's ->can_attach() doesn't fail, this function is
- * guaranteed to succeed.  This means that, excluding ->can_attach()
- * failure, when migrating multiple targets, the success or failure can be
- * decided for all targets by invoking group_migrate_prepare_dst() before
- * actually starting migrating.
- */
-static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
-                         struct cgroup_root *root)
-{
-       struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
-       struct task_struct *task;
-
-       /*
-        * Prevent freeing of tasks while we take a snapshot. Tasks that are
-        * already PF_EXITING could be freed from underneath us unless we
-        * take an rcu_read_lock.
-        */
-       spin_lock_irq(&css_set_lock);
-       rcu_read_lock();
-       task = leader;
-       do {
-               cgroup_taskset_add(task, &tset);
-               if (!threadgroup)
-                       break;
-       } while_each_thread(leader, task);
-       rcu_read_unlock();
-       spin_unlock_irq(&css_set_lock);
-
-       return cgroup_taskset_migrate(&tset, root);
-}
-
-/**
- * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
- * @dst_cgrp: the cgroup to attach to
- * @leader: the task or the leader of the threadgroup to be attached
- * @threadgroup: attach the whole threadgroup?
- *
- * Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
- */
-static int cgroup_attach_task(struct cgroup *dst_cgrp,
-                             struct task_struct *leader, bool threadgroup)
-{
-       LIST_HEAD(preloaded_csets);
-       struct task_struct *task;
-       int ret;
-
-       if (!cgroup_may_migrate_to(dst_cgrp))
-               return -EBUSY;
-
-       /* look up all src csets */
-       spin_lock_irq(&css_set_lock);
-       rcu_read_lock();
-       task = leader;
-       do {
-               cgroup_migrate_add_src(task_css_set(task), dst_cgrp,
-                                      &preloaded_csets);
-               if (!threadgroup)
-                       break;
-       } while_each_thread(leader, task);
-       rcu_read_unlock();
-       spin_unlock_irq(&css_set_lock);
-
-       /* prepare dst csets and commit */
-       ret = cgroup_migrate_prepare_dst(&preloaded_csets);
-       if (!ret)
-               ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
-
-       cgroup_migrate_finish(&preloaded_csets);
-
-       if (!ret)
-               trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
-
-       return ret;
-}
-
-static int cgroup_procs_write_permission(struct task_struct *task,
-                                        struct cgroup *dst_cgrp,
-                                        struct kernfs_open_file *of)
-{
-       const struct cred *cred = current_cred();
-       const struct cred *tcred = get_task_cred(task);
-       int ret = 0;
-
-       /*
-        * even if we're attaching all tasks in the thread group, we only
-        * need to check permissions on one of them.
-        */
-       if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
-           !uid_eq(cred->euid, tcred->uid) &&
-           !uid_eq(cred->euid, tcred->suid))
-               ret = -EACCES;
-
-       if (!ret && cgroup_on_dfl(dst_cgrp)) {
-               struct super_block *sb = of->file->f_path.dentry->d_sb;
-               struct cgroup *cgrp;
-               struct inode *inode;
-
-               spin_lock_irq(&css_set_lock);
-               cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
-               spin_unlock_irq(&css_set_lock);
-
-               while (!cgroup_is_descendant(dst_cgrp, cgrp))
-                       cgrp = cgroup_parent(cgrp);
-
-               ret = -ENOMEM;
-               inode = kernfs_get_inode(sb, cgrp->procs_file.kn);
-               if (inode) {
-                       ret = inode_permission(inode, MAY_WRITE);
-                       iput(inode);
-               }
-       }
-
-       put_cred(tcred);
-       return ret;
-}
-
-/*
- * Find the task_struct of the task to attach by vpid and pass it along to the
- * function to attach either it or all tasks in its threadgroup. Will lock
- * cgroup_mutex and threadgroup.
- */
-static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
-                                   size_t nbytes, loff_t off, bool threadgroup)
-{
-       struct task_struct *tsk;
-       struct cgroup_subsys *ss;
-       struct cgroup *cgrp;
-       pid_t pid;
-       int ssid, ret;
-
-       if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
-               return -EINVAL;
-
-       cgrp = cgroup_kn_lock_live(of->kn, false);
-       if (!cgrp)
-               return -ENODEV;
-
-       percpu_down_write(&cgroup_threadgroup_rwsem);
-       rcu_read_lock();
-       if (pid) {
-               tsk = find_task_by_vpid(pid);
-               if (!tsk) {
-                       ret = -ESRCH;
-                       goto out_unlock_rcu;
-               }
-       } else {
-               tsk = current;
-       }
-
-       if (threadgroup)
-               tsk = tsk->group_leader;
-
-       /*
-        * Workqueue threads may acquire PF_NO_SETAFFINITY and become
-        * trapped in a cpuset, or RT worker may be born in a cgroup
-        * with no rt_runtime allocated.  Just say no.
-        */
-       if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
-               ret = -EINVAL;
-               goto out_unlock_rcu;
-       }
-
-       get_task_struct(tsk);
-       rcu_read_unlock();
-
-       ret = cgroup_procs_write_permission(tsk, cgrp, of);
-       if (!ret)
-               ret = cgroup_attach_task(cgrp, tsk, threadgroup);
-
-       put_task_struct(tsk);
-       goto out_unlock_threadgroup;
-
-out_unlock_rcu:
-       rcu_read_unlock();
-out_unlock_threadgroup:
-       percpu_up_write(&cgroup_threadgroup_rwsem);
-       for_each_subsys(ss, ssid)
-               if (ss->post_attach)
-                       ss->post_attach();
-       cgroup_kn_unlock(of->kn);
-       return ret ?: nbytes;
-}
-
-/**
- * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
- * @from: attach to all cgroups of a given task
- * @tsk: the task to be attached
- */
-int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
-{
-       struct cgroup_root *root;
-       int retval = 0;
-
-       mutex_lock(&cgroup_mutex);
-       percpu_down_write(&cgroup_threadgroup_rwsem);
-       for_each_root(root) {
-               struct cgroup *from_cgrp;
-
-               if (root == &cgrp_dfl_root)
-                       continue;
-
-               spin_lock_irq(&css_set_lock);
-               from_cgrp = task_cgroup_from_root(from, root);
-               spin_unlock_irq(&css_set_lock);
-
-               retval = cgroup_attach_task(from_cgrp, tsk, false);
-               if (retval)
-                       break;
-       }
-       percpu_up_write(&cgroup_threadgroup_rwsem);
-       mutex_unlock(&cgroup_mutex);
-
-       return retval;
-}
-EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
-
-static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
-                                 char *buf, size_t nbytes, loff_t off)
-{
-       return __cgroup_procs_write(of, buf, nbytes, off, false);
-}
-
-static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
-                                 char *buf, size_t nbytes, loff_t off)
-{
-       return __cgroup_procs_write(of, buf, nbytes, off, true);
-}
-
-static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
-                                         char *buf, size_t nbytes, loff_t off)
-{
-       struct cgroup *cgrp;
-
-       BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
-
-       cgrp = cgroup_kn_lock_live(of->kn, false);
-       if (!cgrp)
-               return -ENODEV;
-       spin_lock(&release_agent_path_lock);
-       strlcpy(cgrp->root->release_agent_path, strstrip(buf),
-               sizeof(cgrp->root->release_agent_path));
-       spin_unlock(&release_agent_path_lock);
-       cgroup_kn_unlock(of->kn);
-       return nbytes;
-}
-
-static int cgroup_release_agent_show(struct seq_file *seq, void *v)
-{
-       struct cgroup *cgrp = seq_css(seq)->cgroup;
-
-       spin_lock(&release_agent_path_lock);
-       seq_puts(seq, cgrp->root->release_agent_path);
-       spin_unlock(&release_agent_path_lock);
-       seq_putc(seq, '\n');
-       return 0;
-}
-
-static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
-{
-       seq_puts(seq, "0\n");
-       return 0;
-}
-
-static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
-{
-       struct cgroup_subsys *ss;
-       bool printed = false;
-       int ssid;
-
-       do_each_subsys_mask(ss, ssid, ss_mask) {
-               if (printed)
-                       seq_putc(seq, ' ');
-               seq_printf(seq, "%s", ss->name);
-               printed = true;
-       } while_each_subsys_mask();
-       if (printed)
-               seq_putc(seq, '\n');
-}
-
-/* show controllers which are enabled from the parent */
-static int cgroup_controllers_show(struct seq_file *seq, void *v)
-{
-       struct cgroup *cgrp = seq_css(seq)->cgroup;
-
-       cgroup_print_ss_mask(seq, cgroup_control(cgrp));
-       return 0;
-}
-
-/* show controllers which are enabled for a given cgroup's children */
-static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
-{
-       struct cgroup *cgrp = seq_css(seq)->cgroup;
-
-       cgroup_print_ss_mask(seq, cgrp->subtree_control);
-       return 0;
-}
-
-/**
- * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
- * @cgrp: root of the subtree to update csses for
- *
- * @cgrp's control masks have changed and its subtree's css associations
- * need to be updated accordingly.  This function looks up all css_sets
- * which are attached to the subtree, creates the matching updated css_sets
- * and migrates the tasks to the new ones.
- */
-static int cgroup_update_dfl_csses(struct cgroup *cgrp)
-{
-       LIST_HEAD(preloaded_csets);
-       struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
-       struct cgroup_subsys_state *d_css;
-       struct cgroup *dsct;
-       struct css_set *src_cset;
-       int ret;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       percpu_down_write(&cgroup_threadgroup_rwsem);
-
-       /* look up all csses currently attached to @cgrp's subtree */
-       spin_lock_irq(&css_set_lock);
-       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
-               struct cgrp_cset_link *link;
-
-               list_for_each_entry(link, &dsct->cset_links, cset_link)
-                       cgroup_migrate_add_src(link->cset, dsct,
-                                              &preloaded_csets);
-       }
-       spin_unlock_irq(&css_set_lock);
-
-       /* NULL dst indicates self on default hierarchy */
-       ret = cgroup_migrate_prepare_dst(&preloaded_csets);
-       if (ret)
-               goto out_finish;
-
-       spin_lock_irq(&css_set_lock);
-       list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
-               struct task_struct *task, *ntask;
-
-               /* src_csets precede dst_csets, break on the first dst_cset */
-               if (!src_cset->mg_src_cgrp)
-                       break;
-
-               /* all tasks in src_csets need to be migrated */
-               list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
-                       cgroup_taskset_add(task, &tset);
-       }
-       spin_unlock_irq(&css_set_lock);
-
-       ret = cgroup_taskset_migrate(&tset, cgrp->root);
-out_finish:
-       cgroup_migrate_finish(&preloaded_csets);
-       percpu_up_write(&cgroup_threadgroup_rwsem);
-       return ret;
-}
-
-/**
- * cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses
- * @cgrp: root of the target subtree
- *
- * Because css offlining is asynchronous, userland may try to re-enable a
- * controller while the previous css is still around.  This function grabs
- * cgroup_mutex and drains the previous css instances of @cgrp's subtree.
- */
-static void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
-       __acquires(&cgroup_mutex)
-{
-       struct cgroup *dsct;
-       struct cgroup_subsys_state *d_css;
-       struct cgroup_subsys *ss;
-       int ssid;
-
-restart:
-       mutex_lock(&cgroup_mutex);
-
-       cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
-               for_each_subsys(ss, ssid) {
-                       struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
-                       DEFINE_WAIT(wait);
-
-                       if (!css || !percpu_ref_is_dying(&css->refcnt))
-                               continue;
-
-                       cgroup_get(dsct);
-                       prepare_to_wait(&dsct->offline_waitq, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-
-                       mutex_unlock(&cgroup_mutex);
-                       schedule();
-                       finish_wait(&dsct->offline_waitq, &wait);
-
-                       cgroup_put(dsct);
-                       goto restart;
-               }
-       }
-}
-
-/**
- * cgroup_save_control - save control masks of a subtree
- * @cgrp: root of the target subtree
- *
- * Save ->subtree_control and ->subtree_ss_mask to the respective old_
- * prefixed fields for @cgrp's subtree including @cgrp itself.
- */
-static void cgroup_save_control(struct cgroup *cgrp)
-{
-       struct cgroup *dsct;
-       struct cgroup_subsys_state *d_css;
-
-       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
-               dsct->old_subtree_control = dsct->subtree_control;
-               dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
-       }
-}
-
-/**
- * cgroup_propagate_control - refresh control masks of a subtree
- * @cgrp: root of the target subtree
- *
- * For @cgrp and its subtree, ensure ->subtree_ss_mask matches
- * ->subtree_control and propagate controller availability through the
- * subtree so that descendants don't have unavailable controllers enabled.
- */
-static void cgroup_propagate_control(struct cgroup *cgrp)
-{
-       struct cgroup *dsct;
-       struct cgroup_subsys_state *d_css;
-
-       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
-               dsct->subtree_control &= cgroup_control(dsct);
-               dsct->subtree_ss_mask =
-                       cgroup_calc_subtree_ss_mask(dsct->subtree_control,
-                                                   cgroup_ss_mask(dsct));
-       }
-}
-
-/**
- * cgroup_restore_control - restore control masks of a subtree
- * @cgrp: root of the target subtree
- *
- * Restore ->subtree_control and ->subtree_ss_mask from the respective old_
- * prefixed fields for @cgrp's subtree including @cgrp itself.
- */
-static void cgroup_restore_control(struct cgroup *cgrp)
-{
-       struct cgroup *dsct;
-       struct cgroup_subsys_state *d_css;
-
-       cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
-               dsct->subtree_control = dsct->old_subtree_control;
-               dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
-       }
-}
-
-static bool css_visible(struct cgroup_subsys_state *css)
-{
-       struct cgroup_subsys *ss = css->ss;
-       struct cgroup *cgrp = css->cgroup;
-
-       if (cgroup_control(cgrp) & (1 << ss->id))
-               return true;
-       if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
-               return false;
-       return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
-}
-
-/**
- * cgroup_apply_control_enable - enable or show csses according to control
- * @cgrp: root of the target subtree
- *
- * Walk @cgrp's subtree and create new csses or make the existing ones
- * visible.  A css is created invisible if it's being implicitly enabled
- * through dependency.  An invisible css is made visible when the userland
- * explicitly enables it.
- *
- * Returns 0 on success, -errno on failure.  On failure, csses which have
- * been processed already aren't cleaned up.  The caller is responsible for
- * cleaning up with cgroup_apply_control_disble().
- */
-static int cgroup_apply_control_enable(struct cgroup *cgrp)
-{
-       struct cgroup *dsct;
-       struct cgroup_subsys_state *d_css;
-       struct cgroup_subsys *ss;
-       int ssid, ret;
-
-       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
-               for_each_subsys(ss, ssid) {
-                       struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
-
-                       WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
-
-                       if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
-                               continue;
-
-                       if (!css) {
-                               css = css_create(dsct, ss);
-                               if (IS_ERR(css))
-                                       return PTR_ERR(css);
-                       }
-
-                       if (css_visible(css)) {
-                               ret = css_populate_dir(css);
-                               if (ret)
-                                       return ret;
-                       }
-               }
-       }
-
-       return 0;
-}
-
-/**
- * cgroup_apply_control_disable - kill or hide csses according to control
- * @cgrp: root of the target subtree
- *
- * Walk @cgrp's subtree and kill and hide csses so that they match
- * cgroup_ss_mask() and cgroup_visible_mask().
- *
- * A css is hidden when the userland requests it to be disabled while other
- * subsystems are still depending on it.  The css must not actively control
- * resources and be in the vanilla state if it's made visible again later.
- * Controllers which may be depended upon should provide ->css_reset() for
- * this purpose.
- */
-static void cgroup_apply_control_disable(struct cgroup *cgrp)
-{
-       struct cgroup *dsct;
-       struct cgroup_subsys_state *d_css;
-       struct cgroup_subsys *ss;
-       int ssid;
-
-       cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
-               for_each_subsys(ss, ssid) {
-                       struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
-
-                       WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
-
-                       if (!css)
-                               continue;
-
-                       if (css->parent &&
-                           !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
-                               kill_css(css);
-                       } else if (!css_visible(css)) {
-                               css_clear_dir(css);
-                               if (ss->css_reset)
-                                       ss->css_reset(css);
-                       }
-               }
-       }
-}
-
-/**
- * cgroup_apply_control - apply control mask updates to the subtree
- * @cgrp: root of the target subtree
- *
- * subsystems can be enabled and disabled in a subtree using the following
- * steps.
- *
- * 1. Call cgroup_save_control() to stash the current state.
- * 2. Update ->subtree_control masks in the subtree as desired.
- * 3. Call cgroup_apply_control() to apply the changes.
- * 4. Optionally perform other related operations.
- * 5. Call cgroup_finalize_control() to finish up.
- *
- * This function implements step 3 and propagates the mask changes
- * throughout @cgrp's subtree, updates csses accordingly and perform
- * process migrations.
- */
-static int cgroup_apply_control(struct cgroup *cgrp)
-{
-       int ret;
-
-       cgroup_propagate_control(cgrp);
-
-       ret = cgroup_apply_control_enable(cgrp);
-       if (ret)
-               return ret;
-
-       /*
-        * At this point, cgroup_e_css() results reflect the new csses
-        * making the following cgroup_update_dfl_csses() properly update
-        * css associations of all tasks in the subtree.
-        */
-       ret = cgroup_update_dfl_csses(cgrp);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-/**
- * cgroup_finalize_control - finalize control mask update
- * @cgrp: root of the target subtree
- * @ret: the result of the update
- *
- * Finalize control mask update.  See cgroup_apply_control() for more info.
- */
-static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
-{
-       if (ret) {
-               cgroup_restore_control(cgrp);
-               cgroup_propagate_control(cgrp);
-       }
-
-       cgroup_apply_control_disable(cgrp);
-}
-
-/* change the enabled child controllers for a cgroup in the default hierarchy */
-static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
-                                           char *buf, size_t nbytes,
-                                           loff_t off)
-{
-       u16 enable = 0, disable = 0;
-       struct cgroup *cgrp, *child;
-       struct cgroup_subsys *ss;
-       char *tok;
-       int ssid, ret;
-
-       /*
-        * Parse input - space separated list of subsystem names prefixed
-        * with either + or -.
-        */
-       buf = strstrip(buf);
-       while ((tok = strsep(&buf, " "))) {
-               if (tok[0] == '\0')
-                       continue;
-               do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
-                       if (!cgroup_ssid_enabled(ssid) ||
-                           strcmp(tok + 1, ss->name))
-                               continue;
-
-                       if (*tok == '+') {
-                               enable |= 1 << ssid;
-                               disable &= ~(1 << ssid);
-                       } else if (*tok == '-') {
-                               disable |= 1 << ssid;
-                               enable &= ~(1 << ssid);
-                       } else {
-                               return -EINVAL;
-                       }
-                       break;
-               } while_each_subsys_mask();
-               if (ssid == CGROUP_SUBSYS_COUNT)
-                       return -EINVAL;
-       }
-
-       cgrp = cgroup_kn_lock_live(of->kn, true);
-       if (!cgrp)
-               return -ENODEV;
-
-       for_each_subsys(ss, ssid) {
-               if (enable & (1 << ssid)) {
-                       if (cgrp->subtree_control & (1 << ssid)) {
-                               enable &= ~(1 << ssid);
-                               continue;
-                       }
-
-                       if (!(cgroup_control(cgrp) & (1 << ssid))) {
-                               ret = -ENOENT;
-                               goto out_unlock;
-                       }
-               } else if (disable & (1 << ssid)) {
-                       if (!(cgrp->subtree_control & (1 << ssid))) {
-                               disable &= ~(1 << ssid);
-                               continue;
-                       }
-
-                       /* a child has it enabled? */
-                       cgroup_for_each_live_child(child, cgrp) {
-                               if (child->subtree_control & (1 << ssid)) {
-                                       ret = -EBUSY;
-                                       goto out_unlock;
-                               }
-                       }
-               }
-       }
-
-       if (!enable && !disable) {
-               ret = 0;
-               goto out_unlock;
-       }
-
-       /*
-        * Except for the root, subtree_control must be zero for a cgroup
-        * with tasks so that child cgroups don't compete against tasks.
-        */
-       if (enable && cgroup_parent(cgrp)) {
-               struct cgrp_cset_link *link;
-
-               /*
-                * Because namespaces pin csets too, @cgrp->cset_links
-                * might not be empty even when @cgrp is empty.  Walk and
-                * verify each cset.
-                */
-               spin_lock_irq(&css_set_lock);
-
-               ret = 0;
-               list_for_each_entry(link, &cgrp->cset_links, cset_link) {
-                       if (css_set_populated(link->cset)) {
-                               ret = -EBUSY;
-                               break;
-                       }
-               }
-
-               spin_unlock_irq(&css_set_lock);
-
-               if (ret)
-                       goto out_unlock;
-       }
-
-       /* save and update control masks and prepare csses */
-       cgroup_save_control(cgrp);
-
-       cgrp->subtree_control |= enable;
-       cgrp->subtree_control &= ~disable;
-
-       ret = cgroup_apply_control(cgrp);
-
-       cgroup_finalize_control(cgrp, ret);
-
-       kernfs_activate(cgrp->kn);
-       ret = 0;
-out_unlock:
-       cgroup_kn_unlock(of->kn);
-       return ret ?: nbytes;
-}
-
-static int cgroup_events_show(struct seq_file *seq, void *v)
-{
-       seq_printf(seq, "populated %d\n",
-                  cgroup_is_populated(seq_css(seq)->cgroup));
-       return 0;
-}
-
-static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
-                                size_t nbytes, loff_t off)
-{
-       struct cgroup *cgrp = of->kn->parent->priv;
-       struct cftype *cft = of->kn->priv;
-       struct cgroup_subsys_state *css;
-       int ret;
-
-       if (cft->write)
-               return cft->write(of, buf, nbytes, off);
-
-       /*
-        * kernfs guarantees that a file isn't deleted with operations in
-        * flight, which means that the matching css is and stays alive and
-        * doesn't need to be pinned.  The RCU locking is not necessary
-        * either.  It's just for the convenience of using cgroup_css().
-        */
-       rcu_read_lock();
-       css = cgroup_css(cgrp, cft->ss);
-       rcu_read_unlock();
-
-       if (cft->write_u64) {
-               unsigned long long v;
-               ret = kstrtoull(buf, 0, &v);
-               if (!ret)
-                       ret = cft->write_u64(css, cft, v);
-       } else if (cft->write_s64) {
-               long long v;
-               ret = kstrtoll(buf, 0, &v);
-               if (!ret)
-                       ret = cft->write_s64(css, cft, v);
-       } else {
-               ret = -EINVAL;
-       }
-
-       return ret ?: nbytes;
-}
-
-static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
-{
-       return seq_cft(seq)->seq_start(seq, ppos);
-}
-
-static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
-{
-       return seq_cft(seq)->seq_next(seq, v, ppos);
-}
-
-static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
-{
-       seq_cft(seq)->seq_stop(seq, v);
-}
-
-static int cgroup_seqfile_show(struct seq_file *m, void *arg)
-{
-       struct cftype *cft = seq_cft(m);
-       struct cgroup_subsys_state *css = seq_css(m);
-
-       if (cft->seq_show)
-               return cft->seq_show(m, arg);
-
-       if (cft->read_u64)
-               seq_printf(m, "%llu\n", cft->read_u64(css, cft));
-       else if (cft->read_s64)
-               seq_printf(m, "%lld\n", cft->read_s64(css, cft));
-       else
-               return -EINVAL;
-       return 0;
-}
-
-static struct kernfs_ops cgroup_kf_single_ops = {
-       .atomic_write_len       = PAGE_SIZE,
-       .write                  = cgroup_file_write,
-       .seq_show               = cgroup_seqfile_show,
-};
-
-static struct kernfs_ops cgroup_kf_ops = {
-       .atomic_write_len       = PAGE_SIZE,
-       .write                  = cgroup_file_write,
-       .seq_start              = cgroup_seqfile_start,
-       .seq_next               = cgroup_seqfile_next,
-       .seq_stop               = cgroup_seqfile_stop,
-       .seq_show               = cgroup_seqfile_show,
-};
-
-/*
- * cgroup_rename - Only allow simple rename of directories in place.
- */
-static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
-                        const char *new_name_str)
-{
-       struct cgroup *cgrp = kn->priv;
-       int ret;
-
-       if (kernfs_type(kn) != KERNFS_DIR)
-               return -ENOTDIR;
-       if (kn->parent != new_parent)
-               return -EIO;
-
-       /*
-        * This isn't a proper migration and its usefulness is very
-        * limited.  Disallow on the default hierarchy.
-        */
-       if (cgroup_on_dfl(cgrp))
-               return -EPERM;
-
-       /*
-        * We're gonna grab cgroup_mutex which nests outside kernfs
-        * active_ref.  kernfs_rename() doesn't require active_ref
-        * protection.  Break them before grabbing cgroup_mutex.
-        */
-       kernfs_break_active_protection(new_parent);
-       kernfs_break_active_protection(kn);
-
-       mutex_lock(&cgroup_mutex);
-
-       ret = kernfs_rename(kn, new_parent, new_name_str);
-       if (!ret)
-               trace_cgroup_rename(cgrp);
-
-       mutex_unlock(&cgroup_mutex);
-
-       kernfs_unbreak_active_protection(kn);
-       kernfs_unbreak_active_protection(new_parent);
-       return ret;
-}
-
-/* set uid and gid of cgroup dirs and files to that of the creator */
-static int cgroup_kn_set_ugid(struct kernfs_node *kn)
-{
-       struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
-                              .ia_uid = current_fsuid(),
-                              .ia_gid = current_fsgid(), };
-
-       if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
-           gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
-               return 0;
-
-       return kernfs_setattr(kn, &iattr);
-}
-
-static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
-                          struct cftype *cft)
-{
-       char name[CGROUP_FILE_NAME_MAX];
-       struct kernfs_node *kn;
-       struct lock_class_key *key = NULL;
-       int ret;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       key = &cft->lockdep_key;
-#endif
-       kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
-                                 cgroup_file_mode(cft), 0, cft->kf_ops, cft,
-                                 NULL, key);
-       if (IS_ERR(kn))
-               return PTR_ERR(kn);
-
-       ret = cgroup_kn_set_ugid(kn);
-       if (ret) {
-               kernfs_remove(kn);
-               return ret;
-       }
-
-       if (cft->file_offset) {
-               struct cgroup_file *cfile = (void *)css + cft->file_offset;
-
-               spin_lock_irq(&cgroup_file_kn_lock);
-               cfile->kn = kn;
-               spin_unlock_irq(&cgroup_file_kn_lock);
-       }
-
-       return 0;
-}
-
-/**
- * cgroup_addrm_files - add or remove files to a cgroup directory
- * @css: the target css
- * @cgrp: the target cgroup (usually css->cgroup)
- * @cfts: array of cftypes to be added
- * @is_add: whether to add or remove
- *
- * Depending on @is_add, add or remove files defined by @cfts on @cgrp.
- * For removals, this function never fails.
- */
-static int cgroup_addrm_files(struct cgroup_subsys_state *css,
-                             struct cgroup *cgrp, struct cftype cfts[],
-                             bool is_add)
-{
-       struct cftype *cft, *cft_end = NULL;
-       int ret = 0;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-restart:
-       for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
-               /* does cft->flags tell us to skip this file on @cgrp? */
-               if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
-                       continue;
-               if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
-                       continue;
-               if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
-                       continue;
-               if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
-                       continue;
-
-               if (is_add) {
-                       ret = cgroup_add_file(css, cgrp, cft);
-                       if (ret) {
-                               pr_warn("%s: failed to add %s, err=%d\n",
-                                       __func__, cft->name, ret);
-                               cft_end = cft;
-                               is_add = false;
-                               goto restart;
-                       }
-               } else {
-                       cgroup_rm_file(cgrp, cft);
-               }
-       }
-       return ret;
-}
-
-static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
-{
-       LIST_HEAD(pending);
-       struct cgroup_subsys *ss = cfts[0].ss;
-       struct cgroup *root = &ss->root->cgrp;
-       struct cgroup_subsys_state *css;
-       int ret = 0;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       /* add/rm files for all cgroups created before */
-       css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
-               struct cgroup *cgrp = css->cgroup;
-
-               if (!(css->flags & CSS_VISIBLE))
-                       continue;
-
-               ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
-               if (ret)
-                       break;
-       }
-
-       if (is_add && !ret)
-               kernfs_activate(root->kn);
-       return ret;
-}
-
-static void cgroup_exit_cftypes(struct cftype *cfts)
-{
-       struct cftype *cft;
-
-       for (cft = cfts; cft->name[0] != '\0'; cft++) {
-               /* free copy for custom atomic_write_len, see init_cftypes() */
-               if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
-                       kfree(cft->kf_ops);
-               cft->kf_ops = NULL;
-               cft->ss = NULL;
-
-               /* revert flags set by cgroup core while adding @cfts */
-               cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
-       }
-}
-
-static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
-{
-       struct cftype *cft;
-
-       for (cft = cfts; cft->name[0] != '\0'; cft++) {
-               struct kernfs_ops *kf_ops;
-
-               WARN_ON(cft->ss || cft->kf_ops);
-
-               if (cft->seq_start)
-                       kf_ops = &cgroup_kf_ops;
-               else
-                       kf_ops = &cgroup_kf_single_ops;
-
-               /*
-                * Ugh... if @cft wants a custom max_write_len, we need to
-                * make a copy of kf_ops to set its atomic_write_len.
-                */
-               if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
-                       kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
-                       if (!kf_ops) {
-                               cgroup_exit_cftypes(cfts);
-                               return -ENOMEM;
-                       }
-                       kf_ops->atomic_write_len = cft->max_write_len;
-               }
-
-               cft->kf_ops = kf_ops;
-               cft->ss = ss;
-       }
-
-       return 0;
-}
-
-static int cgroup_rm_cftypes_locked(struct cftype *cfts)
-{
-       lockdep_assert_held(&cgroup_mutex);
-
-       if (!cfts || !cfts[0].ss)
-               return -ENOENT;
-
-       list_del(&cfts->node);
-       cgroup_apply_cftypes(cfts, false);
-       cgroup_exit_cftypes(cfts);
-       return 0;
-}
-
-/**
- * cgroup_rm_cftypes - remove an array of cftypes from a subsystem
- * @cfts: zero-length name terminated array of cftypes
- *
- * Unregister @cfts.  Files described by @cfts are removed from all
- * existing cgroups and all future cgroups won't have them either.  This
- * function can be called anytime whether @cfts' subsys is attached or not.
- *
- * Returns 0 on successful unregistration, -ENOENT if @cfts is not
- * registered.
- */
-int cgroup_rm_cftypes(struct cftype *cfts)
-{
-       int ret;
-
-       mutex_lock(&cgroup_mutex);
-       ret = cgroup_rm_cftypes_locked(cfts);
-       mutex_unlock(&cgroup_mutex);
-       return ret;
-}
-
-/**
- * cgroup_add_cftypes - add an array of cftypes to a subsystem
- * @ss: target cgroup subsystem
- * @cfts: zero-length name terminated array of cftypes
- *
- * Register @cfts to @ss.  Files described by @cfts are created for all
- * existing cgroups to which @ss is attached and all future cgroups will
- * have them too.  This function can be called anytime whether @ss is
- * attached or not.
- *
- * Returns 0 on successful registration, -errno on failure.  Note that this
- * function currently returns 0 as long as @cfts registration is successful
- * even if some file creation attempts on existing cgroups fail.
- */
-static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
-{
-       int ret;
-
-       if (!cgroup_ssid_enabled(ss->id))
-               return 0;
-
-       if (!cfts || cfts[0].name[0] == '\0')
-               return 0;
-
-       ret = cgroup_init_cftypes(ss, cfts);
-       if (ret)
-               return ret;
-
-       mutex_lock(&cgroup_mutex);
-
-       list_add_tail(&cfts->node, &ss->cfts);
-       ret = cgroup_apply_cftypes(cfts, true);
-       if (ret)
-               cgroup_rm_cftypes_locked(cfts);
-
-       mutex_unlock(&cgroup_mutex);
-       return ret;
-}
-
-/**
- * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy
- * @ss: target cgroup subsystem
- * @cfts: zero-length name terminated array of cftypes
- *
- * Similar to cgroup_add_cftypes() but the added files are only used for
- * the default hierarchy.
- */
-int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
-{
-       struct cftype *cft;
-
-       for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
-               cft->flags |= __CFTYPE_ONLY_ON_DFL;
-       return cgroup_add_cftypes(ss, cfts);
-}
-
-/**
- * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies
- * @ss: target cgroup subsystem
- * @cfts: zero-length name terminated array of cftypes
- *
- * Similar to cgroup_add_cftypes() but the added files are only used for
- * the legacy hierarchies.
- */
-int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
-{
-       struct cftype *cft;
-
-       for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
-               cft->flags |= __CFTYPE_NOT_ON_DFL;
-       return cgroup_add_cftypes(ss, cfts);
-}
-
-/**
- * cgroup_file_notify - generate a file modified event for a cgroup_file
- * @cfile: target cgroup_file
- *
- * @cfile must have been obtained by setting cftype->file_offset.
- */
-void cgroup_file_notify(struct cgroup_file *cfile)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&cgroup_file_kn_lock, flags);
-       if (cfile->kn)
-               kernfs_notify(cfile->kn);
-       spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
-}
-
-/**
- * cgroup_task_count - count the number of tasks in a cgroup.
- * @cgrp: the cgroup in question
- *
- * Return the number of tasks in the cgroup.  The returned number can be
- * higher than the actual number of tasks due to css_set references from
- * namespace roots and temporary usages.
- */
-static int cgroup_task_count(const struct cgroup *cgrp)
-{
-       int count = 0;
-       struct cgrp_cset_link *link;
-
-       spin_lock_irq(&css_set_lock);
-       list_for_each_entry(link, &cgrp->cset_links, cset_link)
-               count += atomic_read(&link->cset->refcount);
-       spin_unlock_irq(&css_set_lock);
-       return count;
-}
-
-/**
- * css_next_child - find the next child of a given css
- * @pos: the current position (%NULL to initiate traversal)
- * @parent: css whose children to walk
- *
- * This function returns the next child of @parent and should be called
- * under either cgroup_mutex or RCU read lock.  The only requirement is
- * that @parent and @pos are accessible.  The next sibling is guaranteed to
- * be returned regardless of their states.
- *
- * If a subsystem synchronizes ->css_online() and the start of iteration, a
- * css which finished ->css_online() is guaranteed to be visible in the
- * future iterations and will stay visible until the last reference is put.
- * A css which hasn't finished ->css_online() or already finished
- * ->css_offline() may show up during traversal.  It's each subsystem's
- * responsibility to synchronize against on/offlining.
- */
-struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
-                                          struct cgroup_subsys_state *parent)
-{
-       struct cgroup_subsys_state *next;
-
-       cgroup_assert_mutex_or_rcu_locked();
-
-       /*
-        * @pos could already have been unlinked from the sibling list.
-        * Once a cgroup is removed, its ->sibling.next is no longer
-        * updated when its next sibling changes.  CSS_RELEASED is set when
-        * @pos is taken off list, at which time its next pointer is valid,
-        * and, as releases are serialized, the one pointed to by the next
-        * pointer is guaranteed to not have started release yet.  This
-        * implies that if we observe !CSS_RELEASED on @pos in this RCU
-        * critical section, the one pointed to by its next pointer is
-        * guaranteed to not have finished its RCU grace period even if we
-        * have dropped rcu_read_lock() inbetween iterations.
-        *
-        * If @pos has CSS_RELEASED set, its next pointer can't be
-        * dereferenced; however, as each css is given a monotonically
-        * increasing unique serial number and always appended to the
-        * sibling list, the next one can be found by walking the parent's
-        * children until the first css with higher serial number than
-        * @pos's.  While this path can be slower, it happens iff iteration
-        * races against release and the race window is very small.
-        */
-       if (!pos) {
-               next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
-       } else if (likely(!(pos->flags & CSS_RELEASED))) {
-               next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
-       } else {
-               list_for_each_entry_rcu(next, &parent->children, sibling)
-                       if (next->serial_nr > pos->serial_nr)
-                               break;
-       }
-
-       /*
-        * @next, if not pointing to the head, can be dereferenced and is
-        * the next sibling.
-        */
-       if (&next->sibling != &parent->children)
-               return next;
-       return NULL;
-}
-
-/**
- * css_next_descendant_pre - find the next descendant for pre-order walk
- * @pos: the current position (%NULL to initiate traversal)
- * @root: css whose descendants to walk
- *
- * To be used by css_for_each_descendant_pre().  Find the next descendant
- * to visit for pre-order traversal of @root's descendants.  @root is
- * included in the iteration and the first node to be visited.
- *
- * While this function requires cgroup_mutex or RCU read locking, it
- * doesn't require the whole traversal to be contained in a single critical
- * section.  This function will return the correct next descendant as long
- * as both @pos and @root are accessible and @pos is a descendant of @root.
- *
- * If a subsystem synchronizes ->css_online() and the start of iteration, a
- * css which finished ->css_online() is guaranteed to be visible in the
- * future iterations and will stay visible until the last reference is put.
- * A css which hasn't finished ->css_online() or already finished
- * ->css_offline() may show up during traversal.  It's each subsystem's
- * responsibility to synchronize against on/offlining.
- */
-struct cgroup_subsys_state *
-css_next_descendant_pre(struct cgroup_subsys_state *pos,
-                       struct cgroup_subsys_state *root)
-{
-       struct cgroup_subsys_state *next;
-
-       cgroup_assert_mutex_or_rcu_locked();
-
-       /* if first iteration, visit @root */
-       if (!pos)
-               return root;
-
-       /* visit the first child if exists */
-       next = css_next_child(NULL, pos);
-       if (next)
-               return next;
-
-       /* no child, visit my or the closest ancestor's next sibling */
-       while (pos != root) {
-               next = css_next_child(pos, pos->parent);
-               if (next)
-                       return next;
-               pos = pos->parent;
-       }
-
-       return NULL;
-}
-
-/**
- * css_rightmost_descendant - return the rightmost descendant of a css
- * @pos: css of interest
- *
- * Return the rightmost descendant of @pos.  If there's no descendant, @pos
- * is returned.  This can be used during pre-order traversal to skip
- * subtree of @pos.
- *
- * While this function requires cgroup_mutex or RCU read locking, it
- * doesn't require the whole traversal to be contained in a single critical
- * section.  This function will return the correct rightmost descendant as
- * long as @pos is accessible.
- */
-struct cgroup_subsys_state *
-css_rightmost_descendant(struct cgroup_subsys_state *pos)
-{
-       struct cgroup_subsys_state *last, *tmp;
-
-       cgroup_assert_mutex_or_rcu_locked();
-
-       do {
-               last = pos;
-               /* ->prev isn't RCU safe, walk ->next till the end */
-               pos = NULL;
-               css_for_each_child(tmp, last)
-                       pos = tmp;
-       } while (pos);
-
-       return last;
-}
-
-static struct cgroup_subsys_state *
-css_leftmost_descendant(struct cgroup_subsys_state *pos)
-{
-       struct cgroup_subsys_state *last;
-
-       do {
-               last = pos;
-               pos = css_next_child(NULL, pos);
-       } while (pos);
-
-       return last;
-}
-
-/**
- * css_next_descendant_post - find the next descendant for post-order walk
- * @pos: the current position (%NULL to initiate traversal)
- * @root: css whose descendants to walk
- *
- * To be used by css_for_each_descendant_post().  Find the next descendant
- * to visit for post-order traversal of @root's descendants.  @root is
- * included in the iteration and the last node to be visited.
- *
- * While this function requires cgroup_mutex or RCU read locking, it
- * doesn't require the whole traversal to be contained in a single critical
- * section.  This function will return the correct next descendant as long
- * as both @pos and @cgroup are accessible and @pos is a descendant of
- * @cgroup.
- *
- * If a subsystem synchronizes ->css_online() and the start of iteration, a
- * css which finished ->css_online() is guaranteed to be visible in the
- * future iterations and will stay visible until the last reference is put.
- * A css which hasn't finished ->css_online() or already finished
- * ->css_offline() may show up during traversal.  It's each subsystem's
- * responsibility to synchronize against on/offlining.
- */
-struct cgroup_subsys_state *
-css_next_descendant_post(struct cgroup_subsys_state *pos,
-                        struct cgroup_subsys_state *root)
-{
-       struct cgroup_subsys_state *next;
-
-       cgroup_assert_mutex_or_rcu_locked();
-
-       /* if first iteration, visit leftmost descendant which may be @root */
-       if (!pos)
-               return css_leftmost_descendant(root);
-
-       /* if we visited @root, we're done */
-       if (pos == root)
-               return NULL;
-
-       /* if there's an unvisited sibling, visit its leftmost descendant */
-       next = css_next_child(pos, pos->parent);
-       if (next)
-               return css_leftmost_descendant(next);
-
-       /* no sibling left, visit parent */
-       return pos->parent;
-}
-
-/**
- * css_has_online_children - does a css have online children
- * @css: the target css
- *
- * Returns %true if @css has any online children; otherwise, %false.  This
- * function can be called from any context but the caller is responsible
- * for synchronizing against on/offlining as necessary.
- */
-bool css_has_online_children(struct cgroup_subsys_state *css)
-{
-       struct cgroup_subsys_state *child;
-       bool ret = false;
-
-       rcu_read_lock();
-       css_for_each_child(child, css) {
-               if (child->flags & CSS_ONLINE) {
-                       ret = true;
-                       break;
-               }
-       }
-       rcu_read_unlock();
-       return ret;
-}
-
-/**
- * css_task_iter_advance_css_set - advance a task itererator to the next css_set
- * @it: the iterator to advance
- *
- * Advance @it to the next css_set to walk.
- */
-static void css_task_iter_advance_css_set(struct css_task_iter *it)
-{
-       struct list_head *l = it->cset_pos;
-       struct cgrp_cset_link *link;
-       struct css_set *cset;
-
-       lockdep_assert_held(&css_set_lock);
-
-       /* Advance to the next non-empty css_set */
-       do {
-               l = l->next;
-               if (l == it->cset_head) {
-                       it->cset_pos = NULL;
-                       it->task_pos = NULL;
-                       return;
-               }
-
-               if (it->ss) {
-                       cset = container_of(l, struct css_set,
-                                           e_cset_node[it->ss->id]);
-               } else {
-                       link = list_entry(l, struct cgrp_cset_link, cset_link);
-                       cset = link->cset;
-               }
-       } while (!css_set_populated(cset));
-
-       it->cset_pos = l;
-
-       if (!list_empty(&cset->tasks))
-               it->task_pos = cset->tasks.next;
-       else
-               it->task_pos = cset->mg_tasks.next;
-
-       it->tasks_head = &cset->tasks;
-       it->mg_tasks_head = &cset->mg_tasks;
-
-       /*
-        * We don't keep css_sets locked across iteration steps and thus
-        * need to take steps to ensure that iteration can be resumed after
-        * the lock is re-acquired.  Iteration is performed at two levels -
-        * css_sets and tasks in them.
-        *
-        * Once created, a css_set never leaves its cgroup lists, so a
-        * pinned css_set is guaranteed to stay put and we can resume
-        * iteration afterwards.
-        *
-        * Tasks may leave @cset across iteration steps.  This is resolved
-        * by registering each iterator with the css_set currently being
-        * walked and making css_set_move_task() advance iterators whose
-        * next task is leaving.
-        */
-       if (it->cur_cset) {
-               list_del(&it->iters_node);
-               put_css_set_locked(it->cur_cset);
-       }
-       get_css_set(cset);
-       it->cur_cset = cset;
-       list_add(&it->iters_node, &cset->task_iters);
-}
-
-static void css_task_iter_advance(struct css_task_iter *it)
-{
-       struct list_head *l = it->task_pos;
-
-       lockdep_assert_held(&css_set_lock);
-       WARN_ON_ONCE(!l);
-
-       /*
-        * Advance iterator to find next entry.  cset->tasks is consumed
-        * first and then ->mg_tasks.  After ->mg_tasks, we move onto the
-        * next cset.
-        */
-       l = l->next;
-
-       if (l == it->tasks_head)
-               l = it->mg_tasks_head->next;
-
-       if (l == it->mg_tasks_head)
-               css_task_iter_advance_css_set(it);
-       else
-               it->task_pos = l;
-}
-
-/**
- * css_task_iter_start - initiate task iteration
- * @css: the css to walk tasks of
- * @it: the task iterator to use
- *
- * Initiate iteration through the tasks of @css.  The caller can call
- * css_task_iter_next() to walk through the tasks until the function
- * returns NULL.  On completion of iteration, css_task_iter_end() must be
- * called.
- */
-void css_task_iter_start(struct cgroup_subsys_state *css,
-                        struct css_task_iter *it)
-{
-       /* no one should try to iterate before mounting cgroups */
-       WARN_ON_ONCE(!use_task_css_set_links);
-
-       memset(it, 0, sizeof(*it));
-
-       spin_lock_irq(&css_set_lock);
-
-       it->ss = css->ss;
-
-       if (it->ss)
-               it->cset_pos = &css->cgroup->e_csets[css->ss->id];
-       else
-               it->cset_pos = &css->cgroup->cset_links;
-
-       it->cset_head = it->cset_pos;
-
-       css_task_iter_advance_css_set(it);
-
-       spin_unlock_irq(&css_set_lock);
-}
-
-/**
- * css_task_iter_next - return the next task for the iterator
- * @it: the task iterator being iterated
- *
- * The "next" function for task iteration.  @it should have been
- * initialized via css_task_iter_start().  Returns NULL when the iteration
- * reaches the end.
- */
-struct task_struct *css_task_iter_next(struct css_task_iter *it)
-{
-       if (it->cur_task) {
-               put_task_struct(it->cur_task);
-               it->cur_task = NULL;
-       }
-
-       spin_lock_irq(&css_set_lock);
-
-       if (it->task_pos) {
-               it->cur_task = list_entry(it->task_pos, struct task_struct,
-                                         cg_list);
-               get_task_struct(it->cur_task);
-               css_task_iter_advance(it);
-       }
-
-       spin_unlock_irq(&css_set_lock);
-
-       return it->cur_task;
-}
-
-/**
- * css_task_iter_end - finish task iteration
- * @it: the task iterator to finish
- *
- * Finish task iteration started by css_task_iter_start().
- */
-void css_task_iter_end(struct css_task_iter *it)
-{
-       if (it->cur_cset) {
-               spin_lock_irq(&css_set_lock);
-               list_del(&it->iters_node);
-               put_css_set_locked(it->cur_cset);
-               spin_unlock_irq(&css_set_lock);
-       }
-
-       if (it->cur_task)
-               put_task_struct(it->cur_task);
-}
-
-/**
- * cgroup_trasnsfer_tasks - move tasks from one cgroup to another
- * @to: cgroup to which the tasks will be moved
- * @from: cgroup in which the tasks currently reside
- *
- * Locking rules between cgroup_post_fork() and the migration path
- * guarantee that, if a task is forking while being migrated, the new child
- * is guaranteed to be either visible in the source cgroup after the
- * parent's migration is complete or put into the target cgroup.  No task
- * can slip out of migration through forking.
- */
-int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
-{
-       LIST_HEAD(preloaded_csets);
-       struct cgrp_cset_link *link;
-       struct css_task_iter it;
-       struct task_struct *task;
-       int ret;
-
-       if (!cgroup_may_migrate_to(to))
-               return -EBUSY;
-
-       mutex_lock(&cgroup_mutex);
-
-       percpu_down_write(&cgroup_threadgroup_rwsem);
-
-       /* all tasks in @from are being moved, all csets are source */
-       spin_lock_irq(&css_set_lock);
-       list_for_each_entry(link, &from->cset_links, cset_link)
-               cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
-       spin_unlock_irq(&css_set_lock);
-
-       ret = cgroup_migrate_prepare_dst(&preloaded_csets);
-       if (ret)
-               goto out_err;
-
-       /*
-        * Migrate tasks one-by-one until @from is empty.  This fails iff
-        * ->can_attach() fails.
-        */
-       do {
-               css_task_iter_start(&from->self, &it);
-               task = css_task_iter_next(&it);
-               if (task)
-                       get_task_struct(task);
-               css_task_iter_end(&it);
-
-               if (task) {
-                       ret = cgroup_migrate(task, false, to->root);
-                       if (!ret)
-                               trace_cgroup_transfer_tasks(to, task, false);
-                       put_task_struct(task);
-               }
-       } while (task && !ret);
-out_err:
-       cgroup_migrate_finish(&preloaded_csets);
-       percpu_up_write(&cgroup_threadgroup_rwsem);
-       mutex_unlock(&cgroup_mutex);
-       return ret;
-}
-
-/*
- * Stuff for reading the 'tasks'/'procs' files.
- *
- * Reading this file can return large amounts of data if a cgroup has
- * *lots* of attached tasks. So it may need several calls to read(),
- * but we cannot guarantee that the information we produce is correct
- * unless we produce it entirely atomically.
- *
- */
-
-/* which pidlist file are we talking about? */
-enum cgroup_filetype {
-       CGROUP_FILE_PROCS,
-       CGROUP_FILE_TASKS,
-};
-
-/*
- * A pidlist is a list of pids that virtually represents the contents of one
- * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
- * a pair (one each for procs, tasks) for each pid namespace that's relevant
- * to the cgroup.
- */
-struct cgroup_pidlist {
-       /*
-        * used to find which pidlist is wanted. doesn't change as long as
-        * this particular list stays in the list.
-       */
-       struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
-       /* array of xids */
-       pid_t *list;
-       /* how many elements the above list has */
-       int length;
-       /* each of these stored in a list by its cgroup */
-       struct list_head links;
-       /* pointer to the cgroup we belong to, for list removal purposes */
-       struct cgroup *owner;
-       /* for delayed destruction */
-       struct delayed_work destroy_dwork;
-};
-
-/*
- * The following two functions "fix" the issue where there are more pids
- * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
- * TODO: replace with a kernel-wide solution to this problem
- */
-#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
-static void *pidlist_allocate(int count)
-{
-       if (PIDLIST_TOO_LARGE(count))
-               return vmalloc(count * sizeof(pid_t));
-       else
-               return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
-}
-
-static void pidlist_free(void *p)
-{
-       kvfree(p);
-}
-
-/*
- * Used to destroy all pidlists lingering waiting for destroy timer.  None
- * should be left afterwards.
- */
-static void cgroup_pidlist_destroy_all(struct cgroup *cgrp)
-{
-       struct cgroup_pidlist *l, *tmp_l;
-
-       mutex_lock(&cgrp->pidlist_mutex);
-       list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
-               mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
-       mutex_unlock(&cgrp->pidlist_mutex);
-
-       flush_workqueue(cgroup_pidlist_destroy_wq);
-       BUG_ON(!list_empty(&cgrp->pidlists));
-}
-
-static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
-{
-       struct delayed_work *dwork = to_delayed_work(work);
-       struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
-                                               destroy_dwork);
-       struct cgroup_pidlist *tofree = NULL;
-
-       mutex_lock(&l->owner->pidlist_mutex);
-
-       /*
-        * Destroy iff we didn't get queued again.  The state won't change
-        * as destroy_dwork can only be queued while locked.
-        */
-       if (!delayed_work_pending(dwork)) {
-               list_del(&l->links);
-               pidlist_free(l->list);
-               put_pid_ns(l->key.ns);
-               tofree = l;
-       }
-
-       mutex_unlock(&l->owner->pidlist_mutex);
-       kfree(tofree);
-}
-
-/*
- * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
- * Returns the number of unique elements.
- */
-static int pidlist_uniq(pid_t *list, int length)
-{
-       int src, dest = 1;
-
-       /*
-        * we presume the 0th element is unique, so i starts at 1. trivial
-        * edge cases first; no work needs to be done for either
-        */
-       if (length == 0 || length == 1)
-               return length;
-       /* src and dest walk down the list; dest counts unique elements */
-       for (src = 1; src < length; src++) {
-               /* find next unique element */
-               while (list[src] == list[src-1]) {
-                       src++;
-                       if (src == length)
-                               goto after;
-               }
-               /* dest always points to where the next unique element goes */
-               list[dest] = list[src];
-               dest++;
-       }
-after:
-       return dest;
-}
-
-/*
- * The two pid files - task and cgroup.procs - guaranteed that the result
- * is sorted, which forced this whole pidlist fiasco.  As pid order is
- * different per namespace, each namespace needs differently sorted list,
- * making it impossible to use, for example, single rbtree of member tasks
- * sorted by task pointer.  As pidlists can be fairly large, allocating one
- * per open file is dangerous, so cgroup had to implement shared pool of
- * pidlists keyed by cgroup and namespace.
- *
- * All this extra complexity was caused by the original implementation
- * committing to an entirely unnecessary property.  In the long term, we
- * want to do away with it.  Explicitly scramble sort order if on the
- * default hierarchy so that no such expectation exists in the new
- * interface.
- *
- * Scrambling is done by swapping every two consecutive bits, which is
- * non-identity one-to-one mapping which disturbs sort order sufficiently.
- */
-static pid_t pid_fry(pid_t pid)
-{
-       unsigned a = pid & 0x55555555;
-       unsigned b = pid & 0xAAAAAAAA;
-
-       return (a << 1) | (b >> 1);
-}
-
-static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid)
-{
-       if (cgroup_on_dfl(cgrp))
-               return pid_fry(pid);
-       else
-               return pid;
-}
-
-static int cmppid(const void *a, const void *b)
-{
-       return *(pid_t *)a - *(pid_t *)b;
-}
-
-static int fried_cmppid(const void *a, const void *b)
-{
-       return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b);
-}
-
-static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
-                                                 enum cgroup_filetype type)
-{
-       struct cgroup_pidlist *l;
-       /* don't need task_nsproxy() if we're looking at ourself */
-       struct pid_namespace *ns = task_active_pid_ns(current);
-
-       lockdep_assert_held(&cgrp->pidlist_mutex);
-
-       list_for_each_entry(l, &cgrp->pidlists, links)
-               if (l->key.type == type && l->key.ns == ns)
-                       return l;
-       return NULL;
-}
-
-/*
- * find the appropriate pidlist for our purpose (given procs vs tasks)
- * returns with the lock on that pidlist already held, and takes care
- * of the use count, or returns NULL with no locks held if we're out of
- * memory.
- */
-static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
-                                               enum cgroup_filetype type)
-{
-       struct cgroup_pidlist *l;
-
-       lockdep_assert_held(&cgrp->pidlist_mutex);
-
-       l = cgroup_pidlist_find(cgrp, type);
-       if (l)
-               return l;
-
-       /* entry not found; create a new one */
-       l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
-       if (!l)
-               return l;
-
-       INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
-       l->key.type = type;
-       /* don't need task_nsproxy() if we're looking at ourself */
-       l->key.ns = get_pid_ns(task_active_pid_ns(current));
-       l->owner = cgrp;
-       list_add(&l->links, &cgrp->pidlists);
-       return l;
-}
-
-/*
- * Load a cgroup's pidarray with either procs' tgids or tasks' pids
- */
-static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
-                             struct cgroup_pidlist **lp)
-{
-       pid_t *array;
-       int length;
-       int pid, n = 0; /* used for populating the array */
-       struct css_task_iter it;
-       struct task_struct *tsk;
-       struct cgroup_pidlist *l;
-
-       lockdep_assert_held(&cgrp->pidlist_mutex);
-
-       /*
-        * If cgroup gets more users after we read count, we won't have
-        * enough space - tough.  This race is indistinguishable to the
-        * caller from the case that the additional cgroup users didn't
-        * show up until sometime later on.
-        */
-       length = cgroup_task_count(cgrp);
-       array = pidlist_allocate(length);
-       if (!array)
-               return -ENOMEM;
-       /* now, populate the array */
-       css_task_iter_start(&cgrp->self, &it);
-       while ((tsk = css_task_iter_next(&it))) {
-               if (unlikely(n == length))
-                       break;
-               /* get tgid or pid for procs or tasks file respectively */
-               if (type == CGROUP_FILE_PROCS)
-                       pid = task_tgid_vnr(tsk);
-               else
-                       pid = task_pid_vnr(tsk);
-               if (pid > 0) /* make sure to only use valid results */
-                       array[n++] = pid;
-       }
-       css_task_iter_end(&it);
-       length = n;
-       /* now sort & (if procs) strip out duplicates */
-       if (cgroup_on_dfl(cgrp))
-               sort(array, length, sizeof(pid_t), fried_cmppid, NULL);
-       else
-               sort(array, length, sizeof(pid_t), cmppid, NULL);
-       if (type == CGROUP_FILE_PROCS)
-               length = pidlist_uniq(array, length);
-
-       l = cgroup_pidlist_find_create(cgrp, type);
-       if (!l) {
-               pidlist_free(array);
-               return -ENOMEM;
-       }
-
-       /* store array, freeing old if necessary */
-       pidlist_free(l->list);
-       l->list = array;
-       l->length = length;
-       *lp = l;
-       return 0;
-}
-
-/**
- * cgroupstats_build - build and fill cgroupstats
- * @stats: cgroupstats to fill information into
- * @dentry: A dentry entry belonging to the cgroup for which stats have
- * been requested.
- *
- * Build and fill cgroupstats so that taskstats can export it to user
- * space.
- */
-int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
-{
-       struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
-       struct cgroup *cgrp;
-       struct css_task_iter it;
-       struct task_struct *tsk;
-
-       /* it should be kernfs_node belonging to cgroupfs and is a directory */
-       if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
-           kernfs_type(kn) != KERNFS_DIR)
-               return -EINVAL;
-
-       mutex_lock(&cgroup_mutex);
-
-       /*
-        * We aren't being called from kernfs and there's no guarantee on
-        * @kn->priv's validity.  For this and css_tryget_online_from_dir(),
-        * @kn->priv is RCU safe.  Let's do the RCU dancing.
-        */
-       rcu_read_lock();
-       cgrp = rcu_dereference(kn->priv);
-       if (!cgrp || cgroup_is_dead(cgrp)) {
-               rcu_read_unlock();
-               mutex_unlock(&cgroup_mutex);
-               return -ENOENT;
-       }
-       rcu_read_unlock();
-
-       css_task_iter_start(&cgrp->self, &it);
-       while ((tsk = css_task_iter_next(&it))) {
-               switch (tsk->state) {
-               case TASK_RUNNING:
-                       stats->nr_running++;
-                       break;
-               case TASK_INTERRUPTIBLE:
-                       stats->nr_sleeping++;
-                       break;
-               case TASK_UNINTERRUPTIBLE:
-                       stats->nr_uninterruptible++;
-                       break;
-               case TASK_STOPPED:
-                       stats->nr_stopped++;
-                       break;
-               default:
-                       if (delayacct_is_task_waiting_on_io(tsk))
-                               stats->nr_io_wait++;
-                       break;
-               }
-       }
-       css_task_iter_end(&it);
-
-       mutex_unlock(&cgroup_mutex);
-       return 0;
-}
-
-
-/*
- * seq_file methods for the tasks/procs files. The seq_file position is the
- * next pid to display; the seq_file iterator is a pointer to the pid
- * in the cgroup->l->list array.
- */
-
-static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
-{
-       /*
-        * Initially we receive a position value that corresponds to
-        * one more than the last pid shown (or 0 on the first call or
-        * after a seek to the start). Use a binary-search to find the
-        * next pid to display, if any
-        */
-       struct kernfs_open_file *of = s->private;
-       struct cgroup *cgrp = seq_css(s)->cgroup;
-       struct cgroup_pidlist *l;
-       enum cgroup_filetype type = seq_cft(s)->private;
-       int index = 0, pid = *pos;
-       int *iter, ret;
-
-       mutex_lock(&cgrp->pidlist_mutex);
-
-       /*
-        * !NULL @of->priv indicates that this isn't the first start()
-        * after open.  If the matching pidlist is around, we can use that.
-        * Look for it.  Note that @of->priv can't be used directly.  It
-        * could already have been destroyed.
-        */
-       if (of->priv)
-               of->priv = cgroup_pidlist_find(cgrp, type);
-
-       /*
-        * Either this is the first start() after open or the matching
-        * pidlist has been destroyed inbetween.  Create a new one.
-        */
-       if (!of->priv) {
-               ret = pidlist_array_load(cgrp, type,
-                                        (struct cgroup_pidlist **)&of->priv);
-               if (ret)
-                       return ERR_PTR(ret);
-       }
-       l = of->priv;
-
-       if (pid) {
-               int end = l->length;
-
-               while (index < end) {
-                       int mid = (index + end) / 2;
-                       if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) {
-                               index = mid;
-                               break;
-                       } else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid)
-                               index = mid + 1;
-                       else
-                               end = mid;
-               }
-       }
-       /* If we're off the end of the array, we're done */
-       if (index >= l->length)
-               return NULL;
-       /* Update the abstract position to be the actual pid that we found */
-       iter = l->list + index;
-       *pos = cgroup_pid_fry(cgrp, *iter);
-       return iter;
-}
-
-static void cgroup_pidlist_stop(struct seq_file *s, void *v)
-{
-       struct kernfs_open_file *of = s->private;
-       struct cgroup_pidlist *l = of->priv;
-
-       if (l)
-               mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
-                                CGROUP_PIDLIST_DESTROY_DELAY);
-       mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
-}
-
-static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
-{
-       struct kernfs_open_file *of = s->private;
-       struct cgroup_pidlist *l = of->priv;
-       pid_t *p = v;
-       pid_t *end = l->list + l->length;
-       /*
-        * Advance to the next pid in the array. If this goes off the
-        * end, we're done
-        */
-       p++;
-       if (p >= end) {
-               return NULL;
-       } else {
-               *pos = cgroup_pid_fry(seq_css(s)->cgroup, *p);
-               return p;
-       }
-}
-
-static int cgroup_pidlist_show(struct seq_file *s, void *v)
-{
-       seq_printf(s, "%d\n", *(int *)v);
-
-       return 0;
-}
-
-static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
-                                        struct cftype *cft)
-{
-       return notify_on_release(css->cgroup);
-}
-
-static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
-                                         struct cftype *cft, u64 val)
-{
-       if (val)
-               set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
-       else
-               clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
-       return 0;
-}
-
-static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
-                                     struct cftype *cft)
-{
-       return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
-}
-
-static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
-                                      struct cftype *cft, u64 val)
-{
-       if (val)
-               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
-       else
-               clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
-       return 0;
-}
-
-/* cgroup core interface files for the default hierarchy */
-static struct cftype cgroup_dfl_base_files[] = {
-       {
-               .name = "cgroup.procs",
-               .file_offset = offsetof(struct cgroup, procs_file),
-               .seq_start = cgroup_pidlist_start,
-               .seq_next = cgroup_pidlist_next,
-               .seq_stop = cgroup_pidlist_stop,
-               .seq_show = cgroup_pidlist_show,
-               .private = CGROUP_FILE_PROCS,
-               .write = cgroup_procs_write,
-       },
-       {
-               .name = "cgroup.controllers",
-               .seq_show = cgroup_controllers_show,
-       },
-       {
-               .name = "cgroup.subtree_control",
-               .seq_show = cgroup_subtree_control_show,
-               .write = cgroup_subtree_control_write,
-       },
-       {
-               .name = "cgroup.events",
-               .flags = CFTYPE_NOT_ON_ROOT,
-               .file_offset = offsetof(struct cgroup, events_file),
-               .seq_show = cgroup_events_show,
-       },
-       { }     /* terminate */
-};
-
-/* cgroup core interface files for the legacy hierarchies */
-static struct cftype cgroup_legacy_base_files[] = {
-       {
-               .name = "cgroup.procs",
-               .seq_start = cgroup_pidlist_start,
-               .seq_next = cgroup_pidlist_next,
-               .seq_stop = cgroup_pidlist_stop,
-               .seq_show = cgroup_pidlist_show,
-               .private = CGROUP_FILE_PROCS,
-               .write = cgroup_procs_write,
-       },
-       {
-               .name = "cgroup.clone_children",
-               .read_u64 = cgroup_clone_children_read,
-               .write_u64 = cgroup_clone_children_write,
-       },
-       {
-               .name = "cgroup.sane_behavior",
-               .flags = CFTYPE_ONLY_ON_ROOT,
-               .seq_show = cgroup_sane_behavior_show,
-       },
-       {
-               .name = "tasks",
-               .seq_start = cgroup_pidlist_start,
-               .seq_next = cgroup_pidlist_next,
-               .seq_stop = cgroup_pidlist_stop,
-               .seq_show = cgroup_pidlist_show,
-               .private = CGROUP_FILE_TASKS,
-               .write = cgroup_tasks_write,
-       },
-       {
-               .name = "notify_on_release",
-               .read_u64 = cgroup_read_notify_on_release,
-               .write_u64 = cgroup_write_notify_on_release,
-       },
-       {
-               .name = "release_agent",
-               .flags = CFTYPE_ONLY_ON_ROOT,
-               .seq_show = cgroup_release_agent_show,
-               .write = cgroup_release_agent_write,
-               .max_write_len = PATH_MAX - 1,
-       },
-       { }     /* terminate */
-};
-
-/*
- * css destruction is four-stage process.
- *
- * 1. Destruction starts.  Killing of the percpu_ref is initiated.
- *    Implemented in kill_css().
- *
- * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
- *    and thus css_tryget_online() is guaranteed to fail, the css can be
- *    offlined by invoking offline_css().  After offlining, the base ref is
- *    put.  Implemented in css_killed_work_fn().
- *
- * 3. When the percpu_ref reaches zero, the only possible remaining
- *    accessors are inside RCU read sections.  css_release() schedules the
- *    RCU callback.
- *
- * 4. After the grace period, the css can be freed.  Implemented in
- *    css_free_work_fn().
- *
- * It is actually hairier because both step 2 and 4 require process context
- * and thus involve punting to css->destroy_work adding two additional
- * steps to the already complex sequence.
- */
-static void css_free_work_fn(struct work_struct *work)
-{
-       struct cgroup_subsys_state *css =
-               container_of(work, struct cgroup_subsys_state, destroy_work);
-       struct cgroup_subsys *ss = css->ss;
-       struct cgroup *cgrp = css->cgroup;
-
-       percpu_ref_exit(&css->refcnt);
-
-       if (ss) {
-               /* css free path */
-               struct cgroup_subsys_state *parent = css->parent;
-               int id = css->id;
-
-               ss->css_free(css);
-               cgroup_idr_remove(&ss->css_idr, id);
-               cgroup_put(cgrp);
-
-               if (parent)
-                       css_put(parent);
-       } else {
-               /* cgroup free path */
-               atomic_dec(&cgrp->root->nr_cgrps);
-               cgroup_pidlist_destroy_all(cgrp);
-               cancel_work_sync(&cgrp->release_agent_work);
-
-               if (cgroup_parent(cgrp)) {
-                       /*
-                        * We get a ref to the parent, and put the ref when
-                        * this cgroup is being freed, so it's guaranteed
-                        * that the parent won't be destroyed before its
-                        * children.
-                        */
-                       cgroup_put(cgroup_parent(cgrp));
-                       kernfs_put(cgrp->kn);
-                       kfree(cgrp);
-               } else {
-                       /*
-                        * This is root cgroup's refcnt reaching zero,
-                        * which indicates that the root should be
-                        * released.
-                        */
-                       cgroup_destroy_root(cgrp->root);
-               }
-       }
-}
-
-static void css_free_rcu_fn(struct rcu_head *rcu_head)
-{
-       struct cgroup_subsys_state *css =
-               container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
-
-       INIT_WORK(&css->destroy_work, css_free_work_fn);
-       queue_work(cgroup_destroy_wq, &css->destroy_work);
-}
-
-static void css_release_work_fn(struct work_struct *work)
-{
-       struct cgroup_subsys_state *css =
-               container_of(work, struct cgroup_subsys_state, destroy_work);
-       struct cgroup_subsys *ss = css->ss;
-       struct cgroup *cgrp = css->cgroup;
-
-       mutex_lock(&cgroup_mutex);
-
-       css->flags |= CSS_RELEASED;
-       list_del_rcu(&css->sibling);
-
-       if (ss) {
-               /* css release path */
-               cgroup_idr_replace(&ss->css_idr, NULL, css->id);
-               if (ss->css_released)
-                       ss->css_released(css);
-       } else {
-               /* cgroup release path */
-               trace_cgroup_release(cgrp);
-
-               cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
-               cgrp->id = -1;
-
-               /*
-                * There are two control paths which try to determine
-                * cgroup from dentry without going through kernfs -
-                * cgroupstats_build() and css_tryget_online_from_dir().
-                * Those are supported by RCU protecting clearing of
-                * cgrp->kn->priv backpointer.
-                */
-               if (cgrp->kn)
-                       RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
-                                        NULL);
-
-               cgroup_bpf_put(cgrp);
-       }
-
-       mutex_unlock(&cgroup_mutex);
-
-       call_rcu(&css->rcu_head, css_free_rcu_fn);
-}
-
-static void css_release(struct percpu_ref *ref)
-{
-       struct cgroup_subsys_state *css =
-               container_of(ref, struct cgroup_subsys_state, refcnt);
-
-       INIT_WORK(&css->destroy_work, css_release_work_fn);
-       queue_work(cgroup_destroy_wq, &css->destroy_work);
-}
-
-static void init_and_link_css(struct cgroup_subsys_state *css,
-                             struct cgroup_subsys *ss, struct cgroup *cgrp)
-{
-       lockdep_assert_held(&cgroup_mutex);
-
-       cgroup_get(cgrp);
-
-       memset(css, 0, sizeof(*css));
-       css->cgroup = cgrp;
-       css->ss = ss;
-       css->id = -1;
-       INIT_LIST_HEAD(&css->sibling);
-       INIT_LIST_HEAD(&css->children);
-       css->serial_nr = css_serial_nr_next++;
-       atomic_set(&css->online_cnt, 0);
-
-       if (cgroup_parent(cgrp)) {
-               css->parent = cgroup_css(cgroup_parent(cgrp), ss);
-               css_get(css->parent);
-       }
-
-       BUG_ON(cgroup_css(cgrp, ss));
-}
-
-/* invoke ->css_online() on a new CSS and mark it online if successful */
-static int online_css(struct cgroup_subsys_state *css)
-{
-       struct cgroup_subsys *ss = css->ss;
-       int ret = 0;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       if (ss->css_online)
-               ret = ss->css_online(css);
-       if (!ret) {
-               css->flags |= CSS_ONLINE;
-               rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
-
-               atomic_inc(&css->online_cnt);
-               if (css->parent)
-                       atomic_inc(&css->parent->online_cnt);
-       }
-       return ret;
-}
-
-/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
-static void offline_css(struct cgroup_subsys_state *css)
-{
-       struct cgroup_subsys *ss = css->ss;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       if (!(css->flags & CSS_ONLINE))
-               return;
-
-       if (ss->css_reset)
-               ss->css_reset(css);
-
-       if (ss->css_offline)
-               ss->css_offline(css);
-
-       css->flags &= ~CSS_ONLINE;
-       RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
-
-       wake_up_all(&css->cgroup->offline_waitq);
-}
-
-/**
- * css_create - create a cgroup_subsys_state
- * @cgrp: the cgroup new css will be associated with
- * @ss: the subsys of new css
- *
- * Create a new css associated with @cgrp - @ss pair.  On success, the new
- * css is online and installed in @cgrp.  This function doesn't create the
- * interface files.  Returns 0 on success, -errno on failure.
- */
-static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
-                                             struct cgroup_subsys *ss)
-{
-       struct cgroup *parent = cgroup_parent(cgrp);
-       struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
-       struct cgroup_subsys_state *css;
-       int err;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       css = ss->css_alloc(parent_css);
-       if (!css)
-               css = ERR_PTR(-ENOMEM);
-       if (IS_ERR(css))
-               return css;
-
-       init_and_link_css(css, ss, cgrp);
-
-       err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
-       if (err)
-               goto err_free_css;
-
-       err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
-       if (err < 0)
-               goto err_free_css;
-       css->id = err;
-
-       /* @css is ready to be brought online now, make it visible */
-       list_add_tail_rcu(&css->sibling, &parent_css->children);
-       cgroup_idr_replace(&ss->css_idr, css, css->id);
-
-       err = online_css(css);
-       if (err)
-               goto err_list_del;
-
-       if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
-           cgroup_parent(parent)) {
-               pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
-                       current->comm, current->pid, ss->name);
-               if (!strcmp(ss->name, "memory"))
-                       pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
-               ss->warned_broken_hierarchy = true;
-       }
-
-       return css;
-
-err_list_del:
-       list_del_rcu(&css->sibling);
-err_free_css:
-       call_rcu(&css->rcu_head, css_free_rcu_fn);
-       return ERR_PTR(err);
-}
-
-/*
- * The returned cgroup is fully initialized including its control mask, but
- * it isn't associated with its kernfs_node and doesn't have the control
- * mask applied.
- */
-static struct cgroup *cgroup_create(struct cgroup *parent)
-{
-       struct cgroup_root *root = parent->root;
-       struct cgroup *cgrp, *tcgrp;
-       int level = parent->level + 1;
-       int ret;
-
-       /* allocate the cgroup and its ID, 0 is reserved for the root */
-       cgrp = kzalloc(sizeof(*cgrp) +
-                      sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL);
-       if (!cgrp)
-               return ERR_PTR(-ENOMEM);
-
-       ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
-       if (ret)
-               goto out_free_cgrp;
-
-       /*
-        * Temporarily set the pointer to NULL, so idr_find() won't return
-        * a half-baked cgroup.
-        */
-       cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
-       if (cgrp->id < 0) {
-               ret = -ENOMEM;
-               goto out_cancel_ref;
-       }
-
-       init_cgroup_housekeeping(cgrp);
-
-       cgrp->self.parent = &parent->self;
-       cgrp->root = root;
-       cgrp->level = level;
-
-       for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp))
-               cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
-
-       if (notify_on_release(parent))
-               set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
-
-       if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
-               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
-
-       cgrp->self.serial_nr = css_serial_nr_next++;
-
-       /* allocation complete, commit to creation */
-       list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
-       atomic_inc(&root->nr_cgrps);
-       cgroup_get(parent);
-
-       /*
-        * @cgrp is now fully operational.  If something fails after this
-        * point, it'll be released via the normal destruction path.
-        */
-       cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
-
-       /*
-        * On the default hierarchy, a child doesn't automatically inherit
-        * subtree_control from the parent.  Each is configured manually.
-        */
-       if (!cgroup_on_dfl(cgrp))
-               cgrp->subtree_control = cgroup_control(cgrp);
-
-       if (parent)
-               cgroup_bpf_inherit(cgrp, parent);
-
-       cgroup_propagate_control(cgrp);
-
-       return cgrp;
-
-out_cancel_ref:
-       percpu_ref_exit(&cgrp->self.refcnt);
-out_free_cgrp:
-       kfree(cgrp);
-       return ERR_PTR(ret);
-}
-
-static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
-                       umode_t mode)
-{
-       struct cgroup *parent, *cgrp;
-       struct kernfs_node *kn;
-       int ret;
-
-       /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
-       if (strchr(name, '\n'))
-               return -EINVAL;
-
-       parent = cgroup_kn_lock_live(parent_kn, false);
-       if (!parent)
-               return -ENODEV;
-
-       cgrp = cgroup_create(parent);
-       if (IS_ERR(cgrp)) {
-               ret = PTR_ERR(cgrp);
-               goto out_unlock;
-       }
-
-       /* create the directory */
-       kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
-       if (IS_ERR(kn)) {
-               ret = PTR_ERR(kn);
-               goto out_destroy;
-       }
-       cgrp->kn = kn;
-
-       /*
-        * This extra ref will be put in cgroup_free_fn() and guarantees
-        * that @cgrp->kn is always accessible.
-        */
-       kernfs_get(kn);
-
-       ret = cgroup_kn_set_ugid(kn);
-       if (ret)
-               goto out_destroy;
-
-       ret = css_populate_dir(&cgrp->self);
-       if (ret)
-               goto out_destroy;
-
-       ret = cgroup_apply_control_enable(cgrp);
-       if (ret)
-               goto out_destroy;
-
-       trace_cgroup_mkdir(cgrp);
-
-       /* let's create and online css's */
-       kernfs_activate(kn);
-
-       ret = 0;
-       goto out_unlock;
-
-out_destroy:
-       cgroup_destroy_locked(cgrp);
-out_unlock:
-       cgroup_kn_unlock(parent_kn);
-       return ret;
-}
-
-/*
- * This is called when the refcnt of a css is confirmed to be killed.
- * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
- * initate destruction and put the css ref from kill_css().
- */
-static void css_killed_work_fn(struct work_struct *work)
-{
-       struct cgroup_subsys_state *css =
-               container_of(work, struct cgroup_subsys_state, destroy_work);
-
-       mutex_lock(&cgroup_mutex);
-
-       do {
-               offline_css(css);
-               css_put(css);
-               /* @css can't go away while we're holding cgroup_mutex */
-               css = css->parent;
-       } while (css && atomic_dec_and_test(&css->online_cnt));
-
-       mutex_unlock(&cgroup_mutex);
-}
-
-/* css kill confirmation processing requires process context, bounce */
-static void css_killed_ref_fn(struct percpu_ref *ref)
-{
-       struct cgroup_subsys_state *css =
-               container_of(ref, struct cgroup_subsys_state, refcnt);
-
-       if (atomic_dec_and_test(&css->online_cnt)) {
-               INIT_WORK(&css->destroy_work, css_killed_work_fn);
-               queue_work(cgroup_destroy_wq, &css->destroy_work);
-       }
-}
-
-/**
- * kill_css - destroy a css
- * @css: css to destroy
- *
- * This function initiates destruction of @css by removing cgroup interface
- * files and putting its base reference.  ->css_offline() will be invoked
- * asynchronously once css_tryget_online() is guaranteed to fail and when
- * the reference count reaches zero, @css will be released.
- */
-static void kill_css(struct cgroup_subsys_state *css)
-{
-       lockdep_assert_held(&cgroup_mutex);
-
-       /*
-        * This must happen before css is disassociated with its cgroup.
-        * See seq_css() for details.
-        */
-       css_clear_dir(css);
-
-       /*
-        * Killing would put the base ref, but we need to keep it alive
-        * until after ->css_offline().
-        */
-       css_get(css);
-
-       /*
-        * cgroup core guarantees that, by the time ->css_offline() is
-        * invoked, no new css reference will be given out via
-        * css_tryget_online().  We can't simply call percpu_ref_kill() and
-        * proceed to offlining css's because percpu_ref_kill() doesn't
-        * guarantee that the ref is seen as killed on all CPUs on return.
-        *
-        * Use percpu_ref_kill_and_confirm() to get notifications as each
-        * css is confirmed to be seen as killed on all CPUs.
-        */
-       percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
-}
-
-/**
- * cgroup_destroy_locked - the first stage of cgroup destruction
- * @cgrp: cgroup to be destroyed
- *
- * css's make use of percpu refcnts whose killing latency shouldn't be
- * exposed to userland and are RCU protected.  Also, cgroup core needs to
- * guarantee that css_tryget_online() won't succeed by the time
- * ->css_offline() is invoked.  To satisfy all the requirements,
- * destruction is implemented in the following two steps.
- *
- * s1. Verify @cgrp can be destroyed and mark it dying.  Remove all
- *     userland visible parts and start killing the percpu refcnts of
- *     css's.  Set up so that the next stage will be kicked off once all
- *     the percpu refcnts are confirmed to be killed.
- *
- * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
- *     rest of destruction.  Once all cgroup references are gone, the
- *     cgroup is RCU-freed.
- *
- * This function implements s1.  After this step, @cgrp is gone as far as
- * the userland is concerned and a new cgroup with the same name may be
- * created.  As cgroup doesn't care about the names internally, this
- * doesn't cause any problem.
- */
-static int cgroup_destroy_locked(struct cgroup *cgrp)
-       __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
-{
-       struct cgroup_subsys_state *css;
-       struct cgrp_cset_link *link;
-       int ssid;
-
-       lockdep_assert_held(&cgroup_mutex);
-
-       /*
-        * Only migration can raise populated from zero and we're already
-        * holding cgroup_mutex.
-        */
-       if (cgroup_is_populated(cgrp))
-               return -EBUSY;
-
-       /*
-        * Make sure there's no live children.  We can't test emptiness of
-        * ->self.children as dead children linger on it while being
-        * drained; otherwise, "rmdir parent/child parent" may fail.
-        */
-       if (css_has_online_children(&cgrp->self))
-               return -EBUSY;
-
-       /*
-        * Mark @cgrp and the associated csets dead.  The former prevents
-        * further task migration and child creation by disabling
-        * cgroup_lock_live_group().  The latter makes the csets ignored by
-        * the migration path.
-        */
-       cgrp->self.flags &= ~CSS_ONLINE;
-
-       spin_lock_irq(&css_set_lock);
-       list_for_each_entry(link, &cgrp->cset_links, cset_link)
-               link->cset->dead = true;
-       spin_unlock_irq(&css_set_lock);
-
-       /* initiate massacre of all css's */
-       for_each_css(css, ssid, cgrp)
-               kill_css(css);
-
-       /*
-        * Remove @cgrp directory along with the base files.  @cgrp has an
-        * extra ref on its kn.
-        */
-       kernfs_remove(cgrp->kn);
-
-       check_for_release(cgroup_parent(cgrp));
-
-       /* put the base reference */
-       percpu_ref_kill(&cgrp->self.refcnt);
-
-       return 0;
-};
-
-static int cgroup_rmdir(struct kernfs_node *kn)
-{
-       struct cgroup *cgrp;
-       int ret = 0;
-
-       cgrp = cgroup_kn_lock_live(kn, false);
-       if (!cgrp)
-               return 0;
-
-       ret = cgroup_destroy_locked(cgrp);
-
-       if (!ret)
-               trace_cgroup_rmdir(cgrp);
-
-       cgroup_kn_unlock(kn);
-       return ret;
-}
-
-static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
-       .remount_fs             = cgroup_remount,
-       .show_options           = cgroup_show_options,
-       .mkdir                  = cgroup_mkdir,
-       .rmdir                  = cgroup_rmdir,
-       .rename                 = cgroup_rename,
-       .show_path              = cgroup_show_path,
-};
-
-static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
-{
-       struct cgroup_subsys_state *css;
-
-       pr_debug("Initializing cgroup subsys %s\n", ss->name);
-
-       mutex_lock(&cgroup_mutex);
-
-       idr_init(&ss->css_idr);
-       INIT_LIST_HEAD(&ss->cfts);
-
-       /* Create the root cgroup state for this subsystem */
-       ss->root = &cgrp_dfl_root;
-       css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
-       /* We don't handle early failures gracefully */
-       BUG_ON(IS_ERR(css));
-       init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
-
-       /*
-        * Root csses are never destroyed and we can't initialize
-        * percpu_ref during early init.  Disable refcnting.
-        */
-       css->flags |= CSS_NO_REF;
-
-       if (early) {
-               /* allocation can't be done safely during early init */
-               css->id = 1;
-       } else {
-               css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
-               BUG_ON(css->id < 0);
-       }
-
-       /* Update the init_css_set to contain a subsys
-        * pointer to this state - since the subsystem is
-        * newly registered, all tasks and hence the
-        * init_css_set is in the subsystem's root cgroup. */
-       init_css_set.subsys[ss->id] = css;
-
-       have_fork_callback |= (bool)ss->fork << ss->id;
-       have_exit_callback |= (bool)ss->exit << ss->id;
-       have_free_callback |= (bool)ss->free << ss->id;
-       have_canfork_callback |= (bool)ss->can_fork << ss->id;
-
-       /* At system boot, before all subsystems have been
-        * registered, no tasks have been forked, so we don't
-        * need to invoke fork callbacks here. */
-       BUG_ON(!list_empty(&init_task.tasks));
-
-       BUG_ON(online_css(css));
-
-       mutex_unlock(&cgroup_mutex);
-}
-
-/**
- * cgroup_init_early - cgroup initialization at system boot
- *
- * Initialize cgroups at system boot, and initialize any
- * subsystems that request early init.
- */
-int __init cgroup_init_early(void)
-{
-       static struct cgroup_sb_opts __initdata opts;
-       struct cgroup_subsys *ss;
-       int i;
-
-       init_cgroup_root(&cgrp_dfl_root, &opts);
-       cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
-
-       RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
-
-       for_each_subsys(ss, i) {
-               WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
-                    "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
-                    i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
-                    ss->id, ss->name);
-               WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
-                    "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
-
-               ss->id = i;
-               ss->name = cgroup_subsys_name[i];
-               if (!ss->legacy_name)
-                       ss->legacy_name = cgroup_subsys_name[i];
-
-               if (ss->early_init)
-                       cgroup_init_subsys(ss, true);
-       }
-       return 0;
-}
-
-static u16 cgroup_disable_mask __initdata;
-
-/**
- * cgroup_init - cgroup initialization
- *
- * Register cgroup filesystem and /proc file, and initialize
- * any subsystems that didn't request early init.
- */
-int __init cgroup_init(void)
-{
-       struct cgroup_subsys *ss;
-       int ssid;
-
-       BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
-       BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
-       BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
-       BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
-
-       /*
-        * The latency of the synchronize_sched() is too high for cgroups,
-        * avoid it at the cost of forcing all readers into the slow path.
-        */
-       rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
-
-       get_user_ns(init_cgroup_ns.user_ns);
-
-       mutex_lock(&cgroup_mutex);
-
-       /*
-        * Add init_css_set to the hash table so that dfl_root can link to
-        * it during init.
-        */
-       hash_add(css_set_table, &init_css_set.hlist,
-                css_set_hash(init_css_set.subsys));
-
-       BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
-
-       mutex_unlock(&cgroup_mutex);
-
-       for_each_subsys(ss, ssid) {
-               if (ss->early_init) {
-                       struct cgroup_subsys_state *css =
-                               init_css_set.subsys[ss->id];
-
-                       css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
-                                                  GFP_KERNEL);
-                       BUG_ON(css->id < 0);
-               } else {
-                       cgroup_init_subsys(ss, false);
-               }
-
-               list_add_tail(&init_css_set.e_cset_node[ssid],
-                             &cgrp_dfl_root.cgrp.e_csets[ssid]);
-
-               /*
-                * Setting dfl_root subsys_mask needs to consider the
-                * disabled flag and cftype registration needs kmalloc,
-                * both of which aren't available during early_init.
-                */
-               if (cgroup_disable_mask & (1 << ssid)) {
-                       static_branch_disable(cgroup_subsys_enabled_key[ssid]);
-                       printk(KERN_INFO "Disabling %s control group subsystem\n",
-                              ss->name);
-                       continue;
-               }
-
-               if (cgroup_ssid_no_v1(ssid))
-                       printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
-                              ss->name);
-
-               cgrp_dfl_root.subsys_mask |= 1 << ss->id;
-
-               if (ss->implicit_on_dfl)
-                       cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
-               else if (!ss->dfl_cftypes)
-                       cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
-
-               if (ss->dfl_cftypes == ss->legacy_cftypes) {
-                       WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
-               } else {
-                       WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
-                       WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
-               }
-
-               if (ss->bind)
-                       ss->bind(init_css_set.subsys[ssid]);
-       }
-
-       /* init_css_set.subsys[] has been updated, re-hash */
-       hash_del(&init_css_set.hlist);
-       hash_add(css_set_table, &init_css_set.hlist,
-                css_set_hash(init_css_set.subsys));
-
-       WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
-       WARN_ON(register_filesystem(&cgroup_fs_type));
-       WARN_ON(register_filesystem(&cgroup2_fs_type));
-       WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
-
-       return 0;
-}
-
-static int __init cgroup_wq_init(void)
-{
-       /*
-        * There isn't much point in executing destruction path in
-        * parallel.  Good chunk is serialized with cgroup_mutex anyway.
-        * Use 1 for @max_active.
-        *
-        * We would prefer to do this in cgroup_init() above, but that
-        * is called before init_workqueues(): so leave this until after.
-        */
-       cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
-       BUG_ON(!cgroup_destroy_wq);
-
-       /*
-        * Used to destroy pidlists and separate to serve as flush domain.
-        * Cap @max_active to 1 too.
-        */
-       cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
-                                                   0, 1);
-       BUG_ON(!cgroup_pidlist_destroy_wq);
-
-       return 0;
-}
-core_initcall(cgroup_wq_init);
-
-/*
- * proc_cgroup_show()
- *  - Print task's cgroup paths into seq_file, one line for each hierarchy
- *  - Used for /proc/<pid>/cgroup.
- */
-int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
-                    struct pid *pid, struct task_struct *tsk)
-{
-       char *buf;
-       int retval;
-       struct cgroup_root *root;
-
-       retval = -ENOMEM;
-       buf = kmalloc(PATH_MAX, GFP_KERNEL);
-       if (!buf)
-               goto out;
-
-       mutex_lock(&cgroup_mutex);
-       spin_lock_irq(&css_set_lock);
-
-       for_each_root(root) {
-               struct cgroup_subsys *ss;
-               struct cgroup *cgrp;
-               int ssid, count = 0;
-
-               if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
-                       continue;
-
-               seq_printf(m, "%d:", root->hierarchy_id);
-               if (root != &cgrp_dfl_root)
-                       for_each_subsys(ss, ssid)
-                               if (root->subsys_mask & (1 << ssid))
-                                       seq_printf(m, "%s%s", count++ ? "," : "",
-                                                  ss->legacy_name);
-               if (strlen(root->name))
-                       seq_printf(m, "%sname=%s", count ? "," : "",
-                                  root->name);
-               seq_putc(m, ':');
-
-               cgrp = task_cgroup_from_root(tsk, root);
-
-               /*
-                * On traditional hierarchies, all zombie tasks show up as
-                * belonging to the root cgroup.  On the default hierarchy,
-                * while a zombie doesn't show up in "cgroup.procs" and
-                * thus can't be migrated, its /proc/PID/cgroup keeps
-                * reporting the cgroup it belonged to before exiting.  If
-                * the cgroup is removed before the zombie is reaped,
-                * " (deleted)" is appended to the cgroup path.
-                */
-               if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
-                       retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
-                                               current->nsproxy->cgroup_ns);
-                       if (retval >= PATH_MAX)
-                               retval = -ENAMETOOLONG;
-                       if (retval < 0)
-                               goto out_unlock;
-
-                       seq_puts(m, buf);
-               } else {
-                       seq_puts(m, "/");
-               }
-
-               if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
-                       seq_puts(m, " (deleted)\n");
-               else
-                       seq_putc(m, '\n');
-       }
-
-       retval = 0;
-out_unlock:
-       spin_unlock_irq(&css_set_lock);
-       mutex_unlock(&cgroup_mutex);
-       kfree(buf);
-out:
-       return retval;
-}
-
-/* Display information about each subsystem and each hierarchy */
-static int proc_cgroupstats_show(struct seq_file *m, void *v)
-{
-       struct cgroup_subsys *ss;
-       int i;
-
-       seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
-       /*
-        * ideally we don't want subsystems moving around while we do this.
-        * cgroup_mutex is also necessary to guarantee an atomic snapshot of
-        * subsys/hierarchy state.
-        */
-       mutex_lock(&cgroup_mutex);
-
-       for_each_subsys(ss, i)
-               seq_printf(m, "%s\t%d\t%d\t%d\n",
-                          ss->legacy_name, ss->root->hierarchy_id,
-                          atomic_read(&ss->root->nr_cgrps),
-                          cgroup_ssid_enabled(i));
-
-       mutex_unlock(&cgroup_mutex);
-       return 0;
-}
-
-static int cgroupstats_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, proc_cgroupstats_show, NULL);
-}
-
-static const struct file_operations proc_cgroupstats_operations = {
-       .open = cgroupstats_open,
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-};
-
-/**
- * cgroup_fork - initialize cgroup related fields during copy_process()
- * @child: pointer to task_struct of forking parent process.
- *
- * A task is associated with the init_css_set until cgroup_post_fork()
- * attaches it to the parent's css_set.  Empty cg_list indicates that
- * @child isn't holding reference to its css_set.
- */
-void cgroup_fork(struct task_struct *child)
-{
-       RCU_INIT_POINTER(child->cgroups, &init_css_set);
-       INIT_LIST_HEAD(&child->cg_list);
-}
-
-/**
- * cgroup_can_fork - called on a new task before the process is exposed
- * @child: the task in question.
- *
- * This calls the subsystem can_fork() callbacks. If the can_fork() callback
- * returns an error, the fork aborts with that error code. This allows for
- * a cgroup subsystem to conditionally allow or deny new forks.
- */
-int cgroup_can_fork(struct task_struct *child)
-{
-       struct cgroup_subsys *ss;
-       int i, j, ret;
-
-       do_each_subsys_mask(ss, i, have_canfork_callback) {
-               ret = ss->can_fork(child);
-               if (ret)
-                       goto out_revert;
-       } while_each_subsys_mask();
-
-       return 0;
-
-out_revert:
-       for_each_subsys(ss, j) {
-               if (j >= i)
-                       break;
-               if (ss->cancel_fork)
-                       ss->cancel_fork(child);
-       }
-
-       return ret;
-}
-
-/**
- * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
- * @child: the task in question
- *
- * This calls the cancel_fork() callbacks if a fork failed *after*
- * cgroup_can_fork() succeded.
- */
-void cgroup_cancel_fork(struct task_struct *child)
-{
-       struct cgroup_subsys *ss;
-       int i;
-
-       for_each_subsys(ss, i)
-               if (ss->cancel_fork)
-                       ss->cancel_fork(child);
-}
-
-/**
- * cgroup_post_fork - called on a new task after adding it to the task list
- * @child: the task in question
- *
- * Adds the task to the list running through its css_set if necessary and
- * call the subsystem fork() callbacks.  Has to be after the task is
- * visible on the task list in case we race with the first call to
- * cgroup_task_iter_start() - to guarantee that the new task ends up on its
- * list.
- */
-void cgroup_post_fork(struct task_struct *child)
-{
-       struct cgroup_subsys *ss;
-       int i;
-
-       /*
-        * This may race against cgroup_enable_task_cg_lists().  As that
-        * function sets use_task_css_set_links before grabbing
-        * tasklist_lock and we just went through tasklist_lock to add
-        * @child, it's guaranteed that either we see the set
-        * use_task_css_set_links or cgroup_enable_task_cg_lists() sees
-        * @child during its iteration.
-        *
-        * If we won the race, @child is associated with %current's
-        * css_set.  Grabbing css_set_lock guarantees both that the
-        * association is stable, and, on completion of the parent's
-        * migration, @child is visible in the source of migration or
-        * already in the destination cgroup.  This guarantee is necessary
-        * when implementing operations which need to migrate all tasks of
-        * a cgroup to another.
-        *
-        * Note that if we lose to cgroup_enable_task_cg_lists(), @child
-        * will remain in init_css_set.  This is safe because all tasks are
-        * in the init_css_set before cg_links is enabled and there's no
-        * operation which transfers all tasks out of init_css_set.
-        */
-       if (use_task_css_set_links) {
-               struct css_set *cset;
-
-               spin_lock_irq(&css_set_lock);
-               cset = task_css_set(current);
-               if (list_empty(&child->cg_list)) {
-                       get_css_set(cset);
-                       css_set_move_task(child, NULL, cset, false);
-               }
-               spin_unlock_irq(&css_set_lock);
-       }
-
-       /*
-        * Call ss->fork().  This must happen after @child is linked on
-        * css_set; otherwise, @child might change state between ->fork()
-        * and addition to css_set.
-        */
-       do_each_subsys_mask(ss, i, have_fork_callback) {
-               ss->fork(child);
-       } while_each_subsys_mask();
-}
-
-/**
- * cgroup_exit - detach cgroup from exiting task
- * @tsk: pointer to task_struct of exiting process
- *
- * Description: Detach cgroup from @tsk and release it.
- *
- * Note that cgroups marked notify_on_release force every task in
- * them to take the global cgroup_mutex mutex when exiting.
- * This could impact scaling on very large systems.  Be reluctant to
- * use notify_on_release cgroups where very high task exit scaling
- * is required on large systems.
- *
- * We set the exiting tasks cgroup to the root cgroup (top_cgroup).  We
- * call cgroup_exit() while the task is still competent to handle
- * notify_on_release(), then leave the task attached to the root cgroup in
- * each hierarchy for the remainder of its exit.  No need to bother with
- * init_css_set refcnting.  init_css_set never goes away and we can't race
- * with migration path - PF_EXITING is visible to migration path.
- */
-void cgroup_exit(struct task_struct *tsk)
-{
-       struct cgroup_subsys *ss;
-       struct css_set *cset;
-       int i;
-
-       /*
-        * Unlink from @tsk from its css_set.  As migration path can't race
-        * with us, we can check css_set and cg_list without synchronization.
-        */
-       cset = task_css_set(tsk);
-
-       if (!list_empty(&tsk->cg_list)) {
-               spin_lock_irq(&css_set_lock);
-               css_set_move_task(tsk, cset, NULL, false);
-               spin_unlock_irq(&css_set_lock);
-       } else {
-               get_css_set(cset);
-       }
-
-       /* see cgroup_post_fork() for details */
-       do_each_subsys_mask(ss, i, have_exit_callback) {
-               ss->exit(tsk);
-       } while_each_subsys_mask();
-}
-
-void cgroup_free(struct task_struct *task)
-{
-       struct css_set *cset = task_css_set(task);
-       struct cgroup_subsys *ss;
-       int ssid;
-
-       do_each_subsys_mask(ss, ssid, have_free_callback) {
-               ss->free(task);
-       } while_each_subsys_mask();
-
-       put_css_set(cset);
-}
-
-static void check_for_release(struct cgroup *cgrp)
-{
-       if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
-           !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
-               schedule_work(&cgrp->release_agent_work);
-}
-
-/*
- * Notify userspace when a cgroup is released, by running the
- * configured release agent with the name of the cgroup (path
- * relative to the root of cgroup file system) as the argument.
- *
- * Most likely, this user command will try to rmdir this cgroup.
- *
- * This races with the possibility that some other task will be
- * attached to this cgroup before it is removed, or that some other
- * user task will 'mkdir' a child cgroup of this cgroup.  That's ok.
- * The presumed 'rmdir' will fail quietly if this cgroup is no longer
- * unused, and this cgroup will be reprieved from its death sentence,
- * to continue to serve a useful existence.  Next time it's released,
- * we will get notified again, if it still has 'notify_on_release' set.
- *
- * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
- * means only wait until the task is successfully execve()'d.  The
- * separate release agent task is forked by call_usermodehelper(),
- * then control in this thread returns here, without waiting for the
- * release agent task.  We don't bother to wait because the caller of
- * this routine has no use for the exit status of the release agent
- * task, so no sense holding our caller up for that.
- */
-static void cgroup_release_agent(struct work_struct *work)
-{
-       struct cgroup *cgrp =
-               container_of(work, struct cgroup, release_agent_work);
-       char *pathbuf = NULL, *agentbuf = NULL;
-       char *argv[3], *envp[3];
-       int ret;
-
-       mutex_lock(&cgroup_mutex);
-
-       pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
-       agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
-       if (!pathbuf || !agentbuf)
-               goto out;
-
-       spin_lock_irq(&css_set_lock);
-       ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
-       spin_unlock_irq(&css_set_lock);
-       if (ret < 0 || ret >= PATH_MAX)
-               goto out;
-
-       argv[0] = agentbuf;
-       argv[1] = pathbuf;
-       argv[2] = NULL;
-
-       /* minimal command environment */
-       envp[0] = "HOME=/";
-       envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-       envp[2] = NULL;
-
-       mutex_unlock(&cgroup_mutex);
-       call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-       goto out_free;
-out:
-       mutex_unlock(&cgroup_mutex);
-out_free:
-       kfree(agentbuf);
-       kfree(pathbuf);
-}
-
-static int __init cgroup_disable(char *str)
-{
-       struct cgroup_subsys *ss;
-       char *token;
-       int i;
-
-       while ((token = strsep(&str, ",")) != NULL) {
-               if (!*token)
-                       continue;
-
-               for_each_subsys(ss, i) {
-                       if (strcmp(token, ss->name) &&
-                           strcmp(token, ss->legacy_name))
-                               continue;
-                       cgroup_disable_mask |= 1 << i;
-               }
-       }
-       return 1;
-}
-__setup("cgroup_disable=", cgroup_disable);
-
-static int __init cgroup_no_v1(char *str)
-{
-       struct cgroup_subsys *ss;
-       char *token;
-       int i;
-
-       while ((token = strsep(&str, ",")) != NULL) {
-               if (!*token)
-                       continue;
-
-               if (!strcmp(token, "all")) {
-                       cgroup_no_v1_mask = U16_MAX;
-                       break;
-               }
-
-               for_each_subsys(ss, i) {
-                       if (strcmp(token, ss->name) &&
-                           strcmp(token, ss->legacy_name))
-                               continue;
-
-                       cgroup_no_v1_mask |= 1 << i;
-               }
-       }
-       return 1;
-}
-__setup("cgroup_no_v1=", cgroup_no_v1);
-
-/**
- * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
- * @dentry: directory dentry of interest
- * @ss: subsystem of interest
- *
- * If @dentry is a directory for a cgroup which has @ss enabled on it, try
- * to get the corresponding css and return it.  If such css doesn't exist
- * or can't be pinned, an ERR_PTR value is returned.
- */
-struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
-                                                      struct cgroup_subsys *ss)
-{
-       struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
-       struct file_system_type *s_type = dentry->d_sb->s_type;
-       struct cgroup_subsys_state *css = NULL;
-       struct cgroup *cgrp;
-
-       /* is @dentry a cgroup dir? */
-       if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
-           !kn || kernfs_type(kn) != KERNFS_DIR)
-               return ERR_PTR(-EBADF);
-
-       rcu_read_lock();
-
-       /*
-        * This path doesn't originate from kernfs and @kn could already
-        * have been or be removed at any point.  @kn->priv is RCU
-        * protected for this access.  See css_release_work_fn() for details.
-        */
-       cgrp = rcu_dereference(kn->priv);
-       if (cgrp)
-               css = cgroup_css(cgrp, ss);
-
-       if (!css || !css_tryget_online(css))
-               css = ERR_PTR(-ENOENT);
-
-       rcu_read_unlock();
-       return css;
-}
-
-/**
- * css_from_id - lookup css by id
- * @id: the cgroup id
- * @ss: cgroup subsys to be looked into
- *
- * Returns the css if there's valid one with @id, otherwise returns NULL.
- * Should be called under rcu_read_lock().
- */
-struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
-{
-       WARN_ON_ONCE(!rcu_read_lock_held());
-       return idr_find(&ss->css_idr, id);
-}
-
-/**
- * cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path
- * @path: path on the default hierarchy
- *
- * Find the cgroup at @path on the default hierarchy, increment its
- * reference count and return it.  Returns pointer to the found cgroup on
- * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
- * if @path points to a non-directory.
- */
-struct cgroup *cgroup_get_from_path(const char *path)
-{
-       struct kernfs_node *kn;
-       struct cgroup *cgrp;
-
-       mutex_lock(&cgroup_mutex);
-
-       kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
-       if (kn) {
-               if (kernfs_type(kn) == KERNFS_DIR) {
-                       cgrp = kn->priv;
-                       cgroup_get(cgrp);
-               } else {
-                       cgrp = ERR_PTR(-ENOTDIR);
-               }
-               kernfs_put(kn);
-       } else {
-               cgrp = ERR_PTR(-ENOENT);
-       }
-
-       mutex_unlock(&cgroup_mutex);
-       return cgrp;
-}
-EXPORT_SYMBOL_GPL(cgroup_get_from_path);
-
-/**
- * cgroup_get_from_fd - get a cgroup pointer from a fd
- * @fd: fd obtained by open(cgroup2_dir)
- *
- * Find the cgroup from a fd which should be obtained
- * by opening a cgroup directory.  Returns a pointer to the
- * cgroup on success. ERR_PTR is returned if the cgroup
- * cannot be found.
- */
-struct cgroup *cgroup_get_from_fd(int fd)
-{
-       struct cgroup_subsys_state *css;
-       struct cgroup *cgrp;
-       struct file *f;
-
-       f = fget_raw(fd);
-       if (!f)
-               return ERR_PTR(-EBADF);
-
-       css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
-       fput(f);
-       if (IS_ERR(css))
-               return ERR_CAST(css);
-
-       cgrp = css->cgroup;
-       if (!cgroup_on_dfl(cgrp)) {
-               cgroup_put(cgrp);
-               return ERR_PTR(-EBADF);
-       }
-
-       return cgrp;
-}
-EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
-
-/*
- * sock->sk_cgrp_data handling.  For more info, see sock_cgroup_data
- * definition in cgroup-defs.h.
- */
-#ifdef CONFIG_SOCK_CGROUP_DATA
-
-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
-
-DEFINE_SPINLOCK(cgroup_sk_update_lock);
-static bool cgroup_sk_alloc_disabled __read_mostly;
-
-void cgroup_sk_alloc_disable(void)
-{
-       if (cgroup_sk_alloc_disabled)
-               return;
-       pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
-       cgroup_sk_alloc_disabled = true;
-}
-
-#else
-
-#define cgroup_sk_alloc_disabled       false
-
-#endif
-
-void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
-{
-       if (cgroup_sk_alloc_disabled)
-               return;
-
-       /* Socket clone path */
-       if (skcd->val) {
-               cgroup_get(sock_cgroup_ptr(skcd));
-               return;
-       }
-
-       rcu_read_lock();
-
-       while (true) {
-               struct css_set *cset;
-
-               cset = task_css_set(current);
-               if (likely(cgroup_tryget(cset->dfl_cgrp))) {
-                       skcd->val = (unsigned long)cset->dfl_cgrp;
-                       break;
-               }
-               cpu_relax();
-       }
-
-       rcu_read_unlock();
-}
-
-void cgroup_sk_free(struct sock_cgroup_data *skcd)
-{
-       cgroup_put(sock_cgroup_ptr(skcd));
-}
-
-#endif /* CONFIG_SOCK_CGROUP_DATA */
-
-/* cgroup namespaces */
-
-static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
-{
-       return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
-}
-
-static void dec_cgroup_namespaces(struct ucounts *ucounts)
-{
-       dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
-}
-
-static struct cgroup_namespace *alloc_cgroup_ns(void)
-{
-       struct cgroup_namespace *new_ns;
-       int ret;
-
-       new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
-       if (!new_ns)
-               return ERR_PTR(-ENOMEM);
-       ret = ns_alloc_inum(&new_ns->ns);
-       if (ret) {
-               kfree(new_ns);
-               return ERR_PTR(ret);
-       }
-       atomic_set(&new_ns->count, 1);
-       new_ns->ns.ops = &cgroupns_operations;
-       return new_ns;
-}
-
-void free_cgroup_ns(struct cgroup_namespace *ns)
-{
-       put_css_set(ns->root_cset);
-       dec_cgroup_namespaces(ns->ucounts);
-       put_user_ns(ns->user_ns);
-       ns_free_inum(&ns->ns);
-       kfree(ns);
-}
-EXPORT_SYMBOL(free_cgroup_ns);
-
-struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
-                                       struct user_namespace *user_ns,
-                                       struct cgroup_namespace *old_ns)
-{
-       struct cgroup_namespace *new_ns;
-       struct ucounts *ucounts;
-       struct css_set *cset;
-
-       BUG_ON(!old_ns);
-
-       if (!(flags & CLONE_NEWCGROUP)) {
-               get_cgroup_ns(old_ns);
-               return old_ns;
-       }
-
-       /* Allow only sysadmin to create cgroup namespace. */
-       if (!ns_capable(user_ns, CAP_SYS_ADMIN))
-               return ERR_PTR(-EPERM);
-
-       ucounts = inc_cgroup_namespaces(user_ns);
-       if (!ucounts)
-               return ERR_PTR(-ENOSPC);
-
-       /* It is not safe to take cgroup_mutex here */
-       spin_lock_irq(&css_set_lock);
-       cset = task_css_set(current);
-       get_css_set(cset);
-       spin_unlock_irq(&css_set_lock);
-
-       new_ns = alloc_cgroup_ns();
-       if (IS_ERR(new_ns)) {
-               put_css_set(cset);
-               dec_cgroup_namespaces(ucounts);
-               return new_ns;
-       }
-
-       new_ns->user_ns = get_user_ns(user_ns);
-       new_ns->ucounts = ucounts;
-       new_ns->root_cset = cset;
-
-       return new_ns;
-}
-
-static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
-{
-       return container_of(ns, struct cgroup_namespace, ns);
-}
-
-static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
-{
-       struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
-
-       if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
-           !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
-               return -EPERM;
-
-       /* Don't need to do anything if we are attaching to our own cgroupns. */
-       if (cgroup_ns == nsproxy->cgroup_ns)
-               return 0;
-
-       get_cgroup_ns(cgroup_ns);
-       put_cgroup_ns(nsproxy->cgroup_ns);
-       nsproxy->cgroup_ns = cgroup_ns;
-
-       return 0;
-}
-
-static struct ns_common *cgroupns_get(struct task_struct *task)
-{
-       struct cgroup_namespace *ns = NULL;
-       struct nsproxy *nsproxy;
-
-       task_lock(task);
-       nsproxy = task->nsproxy;
-       if (nsproxy) {
-               ns = nsproxy->cgroup_ns;
-               get_cgroup_ns(ns);
-       }
-       task_unlock(task);
-
-       return ns ? &ns->ns : NULL;
-}
-
-static void cgroupns_put(struct ns_common *ns)
-{
-       put_cgroup_ns(to_cg_ns(ns));
-}
-
-static struct user_namespace *cgroupns_owner(struct ns_common *ns)
-{
-       return to_cg_ns(ns)->user_ns;
-}
-
-const struct proc_ns_operations cgroupns_operations = {
-       .name           = "cgroup",
-       .type           = CLONE_NEWCGROUP,
-       .get            = cgroupns_get,
-       .put            = cgroupns_put,
-       .install        = cgroupns_install,
-       .owner          = cgroupns_owner,
-};
-
-static __init int cgroup_namespaces_init(void)
-{
-       return 0;
-}
-subsys_initcall(cgroup_namespaces_init);
-
-#ifdef CONFIG_CGROUP_BPF
-int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
-                     enum bpf_attach_type type, bool overridable)
-{
-       struct cgroup *parent = cgroup_parent(cgrp);
-       int ret;
-
-       mutex_lock(&cgroup_mutex);
-       ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
-       mutex_unlock(&cgroup_mutex);
-       return ret;
-}
-#endif /* CONFIG_CGROUP_BPF */
-
-#ifdef CONFIG_CGROUP_DEBUG
-static struct cgroup_subsys_state *
-debug_css_alloc(struct cgroup_subsys_state *parent_css)
-{
-       struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
-
-       if (!css)
-               return ERR_PTR(-ENOMEM);
-
-       return css;
-}
-
-static void debug_css_free(struct cgroup_subsys_state *css)
-{
-       kfree(css);
-}
-
-static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
-                               struct cftype *cft)
-{
-       return cgroup_task_count(css->cgroup);
-}
-
-static u64 current_css_set_read(struct cgroup_subsys_state *css,
-                               struct cftype *cft)
-{
-       return (u64)(unsigned long)current->cgroups;
-}
-
-static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
-                                        struct cftype *cft)
-{
-       u64 count;
-
-       rcu_read_lock();
-       count = atomic_read(&task_css_set(current)->refcount);
-       rcu_read_unlock();
-       return count;
-}
-
-static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
-{
-       struct cgrp_cset_link *link;
-       struct css_set *cset;
-       char *name_buf;
-
-       name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
-       if (!name_buf)
-               return -ENOMEM;
-
-       spin_lock_irq(&css_set_lock);
-       rcu_read_lock();
-       cset = rcu_dereference(current->cgroups);
-       list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
-               struct cgroup *c = link->cgrp;
-
-               cgroup_name(c, name_buf, NAME_MAX + 1);
-               seq_printf(seq, "Root %d group %s\n",
-                          c->root->hierarchy_id, name_buf);
-       }
-       rcu_read_unlock();
-       spin_unlock_irq(&css_set_lock);
-       kfree(name_buf);
-       return 0;
-}
-
-#define MAX_TASKS_SHOWN_PER_CSS 25
-static int cgroup_css_links_read(struct seq_file *seq, void *v)
-{
-       struct cgroup_subsys_state *css = seq_css(seq);
-       struct cgrp_cset_link *link;
-
-       spin_lock_irq(&css_set_lock);
-       list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
-               struct css_set *cset = link->cset;
-               struct task_struct *task;
-               int count = 0;
-
-               seq_printf(seq, "css_set %p\n", cset);
-
-               list_for_each_entry(task, &cset->tasks, cg_list) {
-                       if (count++ > MAX_TASKS_SHOWN_PER_CSS)
-                               goto overflow;
-                       seq_printf(seq, "  task %d\n", task_pid_vnr(task));
-               }
-
-               list_for_each_entry(task, &cset->mg_tasks, cg_list) {
-                       if (count++ > MAX_TASKS_SHOWN_PER_CSS)
-                               goto overflow;
-                       seq_printf(seq, "  task %d\n", task_pid_vnr(task));
-               }
-               continue;
-       overflow:
-               seq_puts(seq, "  ...\n");
-       }
-       spin_unlock_irq(&css_set_lock);
-       return 0;
-}
-
-static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
-{
-       return (!cgroup_is_populated(css->cgroup) &&
-               !css_has_online_children(&css->cgroup->self));
-}
-
-static struct cftype debug_files[] =  {
-       {
-               .name = "taskcount",
-               .read_u64 = debug_taskcount_read,
-       },
-
-       {
-               .name = "current_css_set",
-               .read_u64 = current_css_set_read,
-       },
-
-       {
-               .name = "current_css_set_refcount",
-               .read_u64 = current_css_set_refcount_read,
-       },
-
-       {
-               .name = "current_css_set_cg_links",
-               .seq_show = current_css_set_cg_links_read,
-       },
-
-       {
-               .name = "cgroup_css_links",
-               .seq_show = cgroup_css_links_read,
-       },
-
-       {
-               .name = "releasable",
-               .read_u64 = releasable_read,
-       },
-
-       { }     /* terminate */
-};
-
-struct cgroup_subsys debug_cgrp_subsys = {
-       .css_alloc = debug_css_alloc,
-       .css_free = debug_css_free,
-       .legacy_cftypes = debug_files,
-};
-#endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
new file mode 100644 (file)
index 0000000..387348a
--- /dev/null
@@ -0,0 +1,6 @@
+obj-y := cgroup.o namespace.o cgroup-v1.o
+
+obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
+obj-$(CONFIG_CGROUP_PIDS) += pids.o
+obj-$(CONFIG_CGROUP_RDMA) += rdma.o
+obj-$(CONFIG_CPUSETS) += cpuset.o
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
new file mode 100644 (file)
index 0000000..9203bfb
--- /dev/null
@@ -0,0 +1,214 @@
+#ifndef __CGROUP_INTERNAL_H
+#define __CGROUP_INTERNAL_H
+
+#include <linux/cgroup.h>
+#include <linux/kernfs.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+
+/*
+ * A cgroup can be associated with multiple css_sets as different tasks may
+ * belong to different cgroups on different hierarchies.  In the other
+ * direction, a css_set is naturally associated with multiple cgroups.
+ * This M:N relationship is represented by the following link structure
+ * which exists for each association and allows traversing the associations
+ * from both sides.
+ */
+struct cgrp_cset_link {
+       /* the cgroup and css_set this link associates */
+       struct cgroup           *cgrp;
+       struct css_set          *cset;
+
+       /* list of cgrp_cset_links anchored at cgrp->cset_links */
+       struct list_head        cset_link;
+
+       /* list of cgrp_cset_links anchored at css_set->cgrp_links */
+       struct list_head        cgrp_link;
+};
+
+/* used to track tasks and csets during migration */
+struct cgroup_taskset {
+       /* the src and dst cset list running through cset->mg_node */
+       struct list_head        src_csets;
+       struct list_head        dst_csets;
+
+       /* the subsys currently being processed */
+       int                     ssid;
+
+       /*
+        * Fields for cgroup_taskset_*() iteration.
+        *
+        * Before migration is committed, the target migration tasks are on
+        * ->mg_tasks of the csets on ->src_csets.  After, on ->mg_tasks of
+        * the csets on ->dst_csets.  ->csets point to either ->src_csets
+        * or ->dst_csets depending on whether migration is committed.
+        *
+        * ->cur_csets and ->cur_task point to the current task position
+        * during iteration.
+        */
+       struct list_head        *csets;
+       struct css_set          *cur_cset;
+       struct task_struct      *cur_task;
+};
+
+/* migration context also tracks preloading */
+struct cgroup_mgctx {
+       /*
+        * Preloaded source and destination csets.  Used to guarantee
+        * atomic success or failure on actual migration.
+        */
+       struct list_head        preloaded_src_csets;
+       struct list_head        preloaded_dst_csets;
+
+       /* tasks and csets to migrate */
+       struct cgroup_taskset   tset;
+
+       /* subsystems affected by migration */
+       u16                     ss_mask;
+};
+
+#define CGROUP_TASKSET_INIT(tset)                                              \
+{                                                                              \
+       .src_csets              = LIST_HEAD_INIT(tset.src_csets),               \
+       .dst_csets              = LIST_HEAD_INIT(tset.dst_csets),               \
+       .csets                  = &tset.src_csets,                              \
+}
+
+#define CGROUP_MGCTX_INIT(name)                                                        \
+{                                                                              \
+       LIST_HEAD_INIT(name.preloaded_src_csets),                               \
+       LIST_HEAD_INIT(name.preloaded_dst_csets),                               \
+       CGROUP_TASKSET_INIT(name.tset),                                         \
+}
+
+#define DEFINE_CGROUP_MGCTX(name)                                              \
+       struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
+
+struct cgroup_sb_opts {
+       u16 subsys_mask;
+       unsigned int flags;
+       char *release_agent;
+       bool cpuset_clone_children;
+       char *name;
+       /* User explicitly requested empty subsystem */
+       bool none;
+};
+
+extern struct mutex cgroup_mutex;
+extern spinlock_t css_set_lock;
+extern struct cgroup_subsys *cgroup_subsys[];
+extern struct list_head cgroup_roots;
+extern struct file_system_type cgroup_fs_type;
+
+/* iterate across the hierarchies */
+#define for_each_root(root)                                            \
+       list_for_each_entry((root), &cgroup_roots, root_list)
+
+/**
+ * for_each_subsys - iterate all enabled cgroup subsystems
+ * @ss: the iteration cursor
+ * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
+ */
+#define for_each_subsys(ss, ssid)                                      \
+       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT &&                \
+            (((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
+
+static inline bool cgroup_is_dead(const struct cgroup *cgrp)
+{
+       return !(cgrp->self.flags & CSS_ONLINE);
+}
+
+static inline bool notify_on_release(const struct cgroup *cgrp)
+{
+       return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+}
+
+void put_css_set_locked(struct css_set *cset);
+
+static inline void put_css_set(struct css_set *cset)
+{
+       unsigned long flags;
+
+       /*
+        * Ensure that the refcount doesn't hit zero while any readers
+        * can see it. Similar to atomic_dec_and_lock(), but for an
+        * rwlock
+        */
+       if (atomic_add_unless(&cset->refcount, -1, 1))
+               return;
+
+       spin_lock_irqsave(&css_set_lock, flags);
+       put_css_set_locked(cset);
+       spin_unlock_irqrestore(&css_set_lock, flags);
+}
+
+/*
+ * refcounted get/put for css_set objects
+ */
+static inline void get_css_set(struct css_set *cset)
+{
+       atomic_inc(&cset->refcount);
+}
+
+bool cgroup_ssid_enabled(int ssid);
+bool cgroup_on_dfl(const struct cgroup *cgrp);
+
+struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root);
+struct cgroup *task_cgroup_from_root(struct task_struct *task,
+                                    struct cgroup_root *root);
+struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline);
+void cgroup_kn_unlock(struct kernfs_node *kn);
+int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+                         struct cgroup_namespace *ns);
+
+void cgroup_free_root(struct cgroup_root *root);
+void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
+int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
+int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
+struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
+                              struct cgroup_root *root, unsigned long magic,
+                              struct cgroup_namespace *ns);
+
+bool cgroup_may_migrate_to(struct cgroup *dst_cgrp);
+void cgroup_migrate_finish(struct cgroup_mgctx *mgctx);
+void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp,
+                           struct cgroup_mgctx *mgctx);
+int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx);
+int cgroup_migrate(struct task_struct *leader, bool threadgroup,
+                  struct cgroup_mgctx *mgctx);
+
+int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
+                      bool threadgroup);
+ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+                            size_t nbytes, loff_t off, bool threadgroup);
+ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
+                          loff_t off);
+
+void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
+
+int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode);
+int cgroup_rmdir(struct kernfs_node *kn);
+int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
+                    struct kernfs_root *kf_root);
+
+/*
+ * namespace.c
+ */
+extern const struct proc_ns_operations cgroupns_operations;
+
+/*
+ * cgroup-v1.c
+ */
+extern struct cftype cgroup1_base_files[];
+extern const struct file_operations proc_cgroupstats_operations;
+extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops;
+
+bool cgroup1_ssid_disabled(int ssid);
+void cgroup1_pidlist_destroy_all(struct cgroup *cgrp);
+void cgroup1_release_agent(struct work_struct *work);
+void cgroup1_check_for_release(struct cgroup *cgrp);
+struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
+                            void *data, unsigned long magic,
+                            struct cgroup_namespace *ns);
+
+#endif /* __CGROUP_INTERNAL_H */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
new file mode 100644 (file)
index 0000000..fc34bcf
--- /dev/null
@@ -0,0 +1,1395 @@
+#include "cgroup-internal.h"
+
+#include <linux/ctype.h>
+#include <linux/kmod.h>
+#include <linux/sort.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/delayacct.h>
+#include <linux/pid_namespace.h>
+#include <linux/cgroupstats.h>
+
+#include <trace/events/cgroup.h>
+
+/*
+ * pidlists linger the following amount before being destroyed.  The goal
+ * is avoiding frequent destruction in the middle of consecutive read calls
+ * Expiring in the middle is a performance problem not a correctness one.
+ * 1 sec should be enough.
+ */
+#define CGROUP_PIDLIST_DESTROY_DELAY   HZ
+
+/* Controllers blocked by the commandline in v1 */
+static u16 cgroup_no_v1_mask;
+
+/*
+ * pidlist destructions need to be flushed on cgroup destruction.  Use a
+ * separate workqueue as flush domain.
+ */
+static struct workqueue_struct *cgroup_pidlist_destroy_wq;
+
+/*
+ * Protects cgroup_subsys->release_agent_path.  Modifying it also requires
+ * cgroup_mutex.  Reading requires either cgroup_mutex or this spinlock.
+ */
+static DEFINE_SPINLOCK(release_agent_path_lock);
+
+bool cgroup1_ssid_disabled(int ssid)
+{
+       return cgroup_no_v1_mask & (1 << ssid);
+}
+
+/**
+ * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
+ * @from: attach to all cgroups of a given task
+ * @tsk: the task to be attached
+ */
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+{
+       struct cgroup_root *root;
+       int retval = 0;
+
+       mutex_lock(&cgroup_mutex);
+       percpu_down_write(&cgroup_threadgroup_rwsem);
+       for_each_root(root) {
+               struct cgroup *from_cgrp;
+
+               if (root == &cgrp_dfl_root)
+                       continue;
+
+               spin_lock_irq(&css_set_lock);
+               from_cgrp = task_cgroup_from_root(from, root);
+               spin_unlock_irq(&css_set_lock);
+
+               retval = cgroup_attach_task(from_cgrp, tsk, false);
+               if (retval)
+                       break;
+       }
+       percpu_up_write(&cgroup_threadgroup_rwsem);
+       mutex_unlock(&cgroup_mutex);
+
+       return retval;
+}
+EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
+
+/**
+ * cgroup_trasnsfer_tasks - move tasks from one cgroup to another
+ * @to: cgroup to which the tasks will be moved
+ * @from: cgroup in which the tasks currently reside
+ *
+ * Locking rules between cgroup_post_fork() and the migration path
+ * guarantee that, if a task is forking while being migrated, the new child
+ * is guaranteed to be either visible in the source cgroup after the
+ * parent's migration is complete or put into the target cgroup.  No task
+ * can slip out of migration through forking.
+ */
+int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
+{
+       DEFINE_CGROUP_MGCTX(mgctx);
+       struct cgrp_cset_link *link;
+       struct css_task_iter it;
+       struct task_struct *task;
+       int ret;
+
+       if (cgroup_on_dfl(to))
+               return -EINVAL;
+
+       if (!cgroup_may_migrate_to(to))
+               return -EBUSY;
+
+       mutex_lock(&cgroup_mutex);
+
+       percpu_down_write(&cgroup_threadgroup_rwsem);
+
+       /* all tasks in @from are being moved, all csets are source */
+       spin_lock_irq(&css_set_lock);
+       list_for_each_entry(link, &from->cset_links, cset_link)
+               cgroup_migrate_add_src(link->cset, to, &mgctx);
+       spin_unlock_irq(&css_set_lock);
+
+       ret = cgroup_migrate_prepare_dst(&mgctx);
+       if (ret)
+               goto out_err;
+
+       /*
+        * Migrate tasks one-by-one until @from is empty.  This fails iff
+        * ->can_attach() fails.
+        */
+       do {
+               css_task_iter_start(&from->self, &it);
+               task = css_task_iter_next(&it);
+               if (task)
+                       get_task_struct(task);
+               css_task_iter_end(&it);
+
+               if (task) {
+                       ret = cgroup_migrate(task, false, &mgctx);
+                       if (!ret)
+                               trace_cgroup_transfer_tasks(to, task, false);
+                       put_task_struct(task);
+               }
+       } while (task && !ret);
+out_err:
+       cgroup_migrate_finish(&mgctx);
+       percpu_up_write(&cgroup_threadgroup_rwsem);
+       mutex_unlock(&cgroup_mutex);
+       return ret;
+}
+
+/*
+ * Stuff for reading the 'tasks'/'procs' files.
+ *
+ * Reading this file can return large amounts of data if a cgroup has
+ * *lots* of attached tasks. So it may need several calls to read(),
+ * but we cannot guarantee that the information we produce is correct
+ * unless we produce it entirely atomically.
+ *
+ */
+
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+       CGROUP_FILE_PROCS,
+       CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
+struct cgroup_pidlist {
+       /*
+        * used to find which pidlist is wanted. doesn't change as long as
+        * this particular list stays in the list.
+       */
+       struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
+       /* array of xids */
+       pid_t *list;
+       /* how many elements the above list has */
+       int length;
+       /* each of these stored in a list by its cgroup */
+       struct list_head links;
+       /* pointer to the cgroup we belong to, for list removal purposes */
+       struct cgroup *owner;
+       /* for delayed destruction */
+       struct delayed_work destroy_dwork;
+};
+
+/*
+ * The following two functions "fix" the issue where there are more pids
+ * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
+ * TODO: replace with a kernel-wide solution to this problem
+ */
+#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
+static void *pidlist_allocate(int count)
+{
+       if (PIDLIST_TOO_LARGE(count))
+               return vmalloc(count * sizeof(pid_t));
+       else
+               return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
+}
+
+static void pidlist_free(void *p)
+{
+       kvfree(p);
+}
+
+/*
+ * Used to destroy all pidlists lingering waiting for destroy timer.  None
+ * should be left afterwards.
+ */
+void cgroup1_pidlist_destroy_all(struct cgroup *cgrp)
+{
+       struct cgroup_pidlist *l, *tmp_l;
+
+       mutex_lock(&cgrp->pidlist_mutex);
+       list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
+               mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
+       mutex_unlock(&cgrp->pidlist_mutex);
+
+       flush_workqueue(cgroup_pidlist_destroy_wq);
+       BUG_ON(!list_empty(&cgrp->pidlists));
+}
+
+static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
+{
+       struct delayed_work *dwork = to_delayed_work(work);
+       struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
+                                               destroy_dwork);
+       struct cgroup_pidlist *tofree = NULL;
+
+       mutex_lock(&l->owner->pidlist_mutex);
+
+       /*
+        * Destroy iff we didn't get queued again.  The state won't change
+        * as destroy_dwork can only be queued while locked.
+        */
+       if (!delayed_work_pending(dwork)) {
+               list_del(&l->links);
+               pidlist_free(l->list);
+               put_pid_ns(l->key.ns);
+               tofree = l;
+       }
+
+       mutex_unlock(&l->owner->pidlist_mutex);
+       kfree(tofree);
+}
+
+/*
+ * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
+ * Returns the number of unique elements.
+ */
+static int pidlist_uniq(pid_t *list, int length)
+{
+       int src, dest = 1;
+
+       /*
+        * we presume the 0th element is unique, so i starts at 1. trivial
+        * edge cases first; no work needs to be done for either
+        */
+       if (length == 0 || length == 1)
+               return length;
+       /* src and dest walk down the list; dest counts unique elements */
+       for (src = 1; src < length; src++) {
+               /* find next unique element */
+               while (list[src] == list[src-1]) {
+                       src++;
+                       if (src == length)
+                               goto after;
+               }
+               /* dest always points to where the next unique element goes */
+               list[dest] = list[src];
+               dest++;
+       }
+after:
+       return dest;
+}
+
+/*
+ * The two pid files - task and cgroup.procs - guaranteed that the result
+ * is sorted, which forced this whole pidlist fiasco.  As pid order is
+ * different per namespace, each namespace needs differently sorted list,
+ * making it impossible to use, for example, single rbtree of member tasks
+ * sorted by task pointer.  As pidlists can be fairly large, allocating one
+ * per open file is dangerous, so cgroup had to implement shared pool of
+ * pidlists keyed by cgroup and namespace.
+ */
+static int cmppid(const void *a, const void *b)
+{
+       return *(pid_t *)a - *(pid_t *)b;
+}
+
+static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
+                                                 enum cgroup_filetype type)
+{
+       struct cgroup_pidlist *l;
+       /* don't need task_nsproxy() if we're looking at ourself */
+       struct pid_namespace *ns = task_active_pid_ns(current);
+
+       lockdep_assert_held(&cgrp->pidlist_mutex);
+
+       list_for_each_entry(l, &cgrp->pidlists, links)
+               if (l->key.type == type && l->key.ns == ns)
+                       return l;
+       return NULL;
+}
+
+/*
+ * find the appropriate pidlist for our purpose (given procs vs tasks)
+ * returns with the lock on that pidlist already held, and takes care
+ * of the use count, or returns NULL with no locks held if we're out of
+ * memory.
+ */
+static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
+                                               enum cgroup_filetype type)
+{
+       struct cgroup_pidlist *l;
+
+       lockdep_assert_held(&cgrp->pidlist_mutex);
+
+       l = cgroup_pidlist_find(cgrp, type);
+       if (l)
+               return l;
+
+       /* entry not found; create a new one */
+       l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
+       if (!l)
+               return l;
+
+       INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
+       l->key.type = type;
+       /* don't need task_nsproxy() if we're looking at ourself */
+       l->key.ns = get_pid_ns(task_active_pid_ns(current));
+       l->owner = cgrp;
+       list_add(&l->links, &cgrp->pidlists);
+       return l;
+}
+
+/**
+ * cgroup_task_count - count the number of tasks in a cgroup.
+ * @cgrp: the cgroup in question
+ *
+ * Return the number of tasks in the cgroup.  The returned number can be
+ * higher than the actual number of tasks due to css_set references from
+ * namespace roots and temporary usages.
+ */
+static int cgroup_task_count(const struct cgroup *cgrp)
+{
+       int count = 0;
+       struct cgrp_cset_link *link;
+
+       spin_lock_irq(&css_set_lock);
+       list_for_each_entry(link, &cgrp->cset_links, cset_link)
+               count += atomic_read(&link->cset->refcount);
+       spin_unlock_irq(&css_set_lock);
+       return count;
+}
+
+/*
+ * Load a cgroup's pidarray with either procs' tgids or tasks' pids
+ */
+static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
+                             struct cgroup_pidlist **lp)
+{
+       pid_t *array;
+       int length;
+       int pid, n = 0; /* used for populating the array */
+       struct css_task_iter it;
+       struct task_struct *tsk;
+       struct cgroup_pidlist *l;
+
+       lockdep_assert_held(&cgrp->pidlist_mutex);
+
+       /*
+        * If cgroup gets more users after we read count, we won't have
+        * enough space - tough.  This race is indistinguishable to the
+        * caller from the case that the additional cgroup users didn't
+        * show up until sometime later on.
+        */
+       length = cgroup_task_count(cgrp);
+       array = pidlist_allocate(length);
+       if (!array)
+               return -ENOMEM;
+       /* now, populate the array */
+       css_task_iter_start(&cgrp->self, &it);
+       while ((tsk = css_task_iter_next(&it))) {
+               if (unlikely(n == length))
+                       break;
+               /* get tgid or pid for procs or tasks file respectively */
+               if (type == CGROUP_FILE_PROCS)
+                       pid = task_tgid_vnr(tsk);
+               else
+                       pid = task_pid_vnr(tsk);
+               if (pid > 0) /* make sure to only use valid results */
+                       array[n++] = pid;
+       }
+       css_task_iter_end(&it);
+       length = n;
+       /* now sort & (if procs) strip out duplicates */
+       sort(array, length, sizeof(pid_t), cmppid, NULL);
+       if (type == CGROUP_FILE_PROCS)
+               length = pidlist_uniq(array, length);
+
+       l = cgroup_pidlist_find_create(cgrp, type);
+       if (!l) {
+               pidlist_free(array);
+               return -ENOMEM;
+       }
+
+       /* store array, freeing old if necessary */
+       pidlist_free(l->list);
+       l->list = array;
+       l->length = length;
+       *lp = l;
+       return 0;
+}
+
+/*
+ * seq_file methods for the tasks/procs files. The seq_file position is the
+ * next pid to display; the seq_file iterator is a pointer to the pid
+ * in the cgroup->l->list array.
+ */
+
+static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
+{
+       /*
+        * Initially we receive a position value that corresponds to
+        * one more than the last pid shown (or 0 on the first call or
+        * after a seek to the start). Use a binary-search to find the
+        * next pid to display, if any
+        */
+       struct kernfs_open_file *of = s->private;
+       struct cgroup *cgrp = seq_css(s)->cgroup;
+       struct cgroup_pidlist *l;
+       enum cgroup_filetype type = seq_cft(s)->private;
+       int index = 0, pid = *pos;
+       int *iter, ret;
+
+       mutex_lock(&cgrp->pidlist_mutex);
+
+       /*
+        * !NULL @of->priv indicates that this isn't the first start()
+        * after open.  If the matching pidlist is around, we can use that.
+        * Look for it.  Note that @of->priv can't be used directly.  It
+        * could already have been destroyed.
+        */
+       if (of->priv)
+               of->priv = cgroup_pidlist_find(cgrp, type);
+
+       /*
+        * Either this is the first start() after open or the matching
+        * pidlist has been destroyed inbetween.  Create a new one.
+        */
+       if (!of->priv) {
+               ret = pidlist_array_load(cgrp, type,
+                                        (struct cgroup_pidlist **)&of->priv);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
+       l = of->priv;
+
+       if (pid) {
+               int end = l->length;
+
+               while (index < end) {
+                       int mid = (index + end) / 2;
+                       if (l->list[mid] == pid) {
+                               index = mid;
+                               break;
+                       } else if (l->list[mid] <= pid)
+                               index = mid + 1;
+                       else
+                               end = mid;
+               }
+       }
+       /* If we're off the end of the array, we're done */
+       if (index >= l->length)
+               return NULL;
+       /* Update the abstract position to be the actual pid that we found */
+       iter = l->list + index;
+       *pos = *iter;
+       return iter;
+}
+
+static void cgroup_pidlist_stop(struct seq_file *s, void *v)
+{
+       struct kernfs_open_file *of = s->private;
+       struct cgroup_pidlist *l = of->priv;
+
+       if (l)
+               mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
+                                CGROUP_PIDLIST_DESTROY_DELAY);
+       mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
+}
+
+static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct kernfs_open_file *of = s->private;
+       struct cgroup_pidlist *l = of->priv;
+       pid_t *p = v;
+       pid_t *end = l->list + l->length;
+       /*
+        * Advance to the next pid in the array. If this goes off the
+        * end, we're done
+        */
+       p++;
+       if (p >= end) {
+               return NULL;
+       } else {
+               *pos = *p;
+               return p;
+       }
+}
+
+static int cgroup_pidlist_show(struct seq_file *s, void *v)
+{
+       seq_printf(s, "%d\n", *(int *)v);
+
+       return 0;
+}
+
+static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
+                                 char *buf, size_t nbytes, loff_t off)
+{
+       return __cgroup_procs_write(of, buf, nbytes, off, false);
+}
+
+static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
+                                         char *buf, size_t nbytes, loff_t off)
+{
+       struct cgroup *cgrp;
+
+       BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+
+       cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!cgrp)
+               return -ENODEV;
+       spin_lock(&release_agent_path_lock);
+       strlcpy(cgrp->root->release_agent_path, strstrip(buf),
+               sizeof(cgrp->root->release_agent_path));
+       spin_unlock(&release_agent_path_lock);
+       cgroup_kn_unlock(of->kn);
+       return nbytes;
+}
+
+static int cgroup_release_agent_show(struct seq_file *seq, void *v)
+{
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       spin_lock(&release_agent_path_lock);
+       seq_puts(seq, cgrp->root->release_agent_path);
+       spin_unlock(&release_agent_path_lock);
+       seq_putc(seq, '\n');
+       return 0;
+}
+
+static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
+{
+       seq_puts(seq, "0\n");
+       return 0;
+}
+
+static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
+                                        struct cftype *cft)
+{
+       return notify_on_release(css->cgroup);
+}
+
+static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
+                                         struct cftype *cft, u64 val)
+{
+       if (val)
+               set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
+       else
+               clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
+       return 0;
+}
+
+static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
+                                     struct cftype *cft)
+{
+       return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
+}
+
+static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
+                                      struct cftype *cft, u64 val)
+{
+       if (val)
+               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
+       else
+               clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
+       return 0;
+}
+
+/* cgroup core interface files for the legacy hierarchies */
+struct cftype cgroup1_base_files[] = {
+       {
+               .name = "cgroup.procs",
+               .seq_start = cgroup_pidlist_start,
+               .seq_next = cgroup_pidlist_next,
+               .seq_stop = cgroup_pidlist_stop,
+               .seq_show = cgroup_pidlist_show,
+               .private = CGROUP_FILE_PROCS,
+               .write = cgroup_procs_write,
+       },
+       {
+               .name = "cgroup.clone_children",
+               .read_u64 = cgroup_clone_children_read,
+               .write_u64 = cgroup_clone_children_write,
+       },
+       {
+               .name = "cgroup.sane_behavior",
+               .flags = CFTYPE_ONLY_ON_ROOT,
+               .seq_show = cgroup_sane_behavior_show,
+       },
+       {
+               .name = "tasks",
+               .seq_start = cgroup_pidlist_start,
+               .seq_next = cgroup_pidlist_next,
+               .seq_stop = cgroup_pidlist_stop,
+               .seq_show = cgroup_pidlist_show,
+               .private = CGROUP_FILE_TASKS,
+               .write = cgroup_tasks_write,
+       },
+       {
+               .name = "notify_on_release",
+               .read_u64 = cgroup_read_notify_on_release,
+               .write_u64 = cgroup_write_notify_on_release,
+       },
+       {
+               .name = "release_agent",
+               .flags = CFTYPE_ONLY_ON_ROOT,
+               .seq_show = cgroup_release_agent_show,
+               .write = cgroup_release_agent_write,
+               .max_write_len = PATH_MAX - 1,
+       },
+       { }     /* terminate */
+};
+
+/* Display information about each subsystem and each hierarchy */
+static int proc_cgroupstats_show(struct seq_file *m, void *v)
+{
+       struct cgroup_subsys *ss;
+       int i;
+
+       seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
+       /*
+        * ideally we don't want subsystems moving around while we do this.
+        * cgroup_mutex is also necessary to guarantee an atomic snapshot of
+        * subsys/hierarchy state.
+        */
+       mutex_lock(&cgroup_mutex);
+
+       for_each_subsys(ss, i)
+               seq_printf(m, "%s\t%d\t%d\t%d\n",
+                          ss->legacy_name, ss->root->hierarchy_id,
+                          atomic_read(&ss->root->nr_cgrps),
+                          cgroup_ssid_enabled(i));
+
+       mutex_unlock(&cgroup_mutex);
+       return 0;
+}
+
+static int cgroupstats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, proc_cgroupstats_show, NULL);
+}
+
+const struct file_operations proc_cgroupstats_operations = {
+       .open = cgroupstats_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+/**
+ * cgroupstats_build - build and fill cgroupstats
+ * @stats: cgroupstats to fill information into
+ * @dentry: A dentry entry belonging to the cgroup for which stats have
+ * been requested.
+ *
+ * Build and fill cgroupstats so that taskstats can export it to user
+ * space.
+ */
+int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
+{
+       struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
+       struct cgroup *cgrp;
+       struct css_task_iter it;
+       struct task_struct *tsk;
+
+       /* it should be kernfs_node belonging to cgroupfs and is a directory */
+       if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
+           kernfs_type(kn) != KERNFS_DIR)
+               return -EINVAL;
+
+       mutex_lock(&cgroup_mutex);
+
+       /*
+        * We aren't being called from kernfs and there's no guarantee on
+        * @kn->priv's validity.  For this and css_tryget_online_from_dir(),
+        * @kn->priv is RCU safe.  Let's do the RCU dancing.
+        */
+       rcu_read_lock();
+       cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
+       if (!cgrp || cgroup_is_dead(cgrp)) {
+               rcu_read_unlock();
+               mutex_unlock(&cgroup_mutex);
+               return -ENOENT;
+       }
+       rcu_read_unlock();
+
+       css_task_iter_start(&cgrp->self, &it);
+       while ((tsk = css_task_iter_next(&it))) {
+               switch (tsk->state) {
+               case TASK_RUNNING:
+                       stats->nr_running++;
+                       break;
+               case TASK_INTERRUPTIBLE:
+                       stats->nr_sleeping++;
+                       break;
+               case TASK_UNINTERRUPTIBLE:
+                       stats->nr_uninterruptible++;
+                       break;
+               case TASK_STOPPED:
+                       stats->nr_stopped++;
+                       break;
+               default:
+                       if (delayacct_is_task_waiting_on_io(tsk))
+                               stats->nr_io_wait++;
+                       break;
+               }
+       }
+       css_task_iter_end(&it);
+
+       mutex_unlock(&cgroup_mutex);
+       return 0;
+}
+
+void cgroup1_check_for_release(struct cgroup *cgrp)
+{
+       if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
+           !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
+               schedule_work(&cgrp->release_agent_work);
+}
+
+/*
+ * Notify userspace when a cgroup is released, by running the
+ * configured release agent with the name of the cgroup (path
+ * relative to the root of cgroup file system) as the argument.
+ *
+ * Most likely, this user command will try to rmdir this cgroup.
+ *
+ * This races with the possibility that some other task will be
+ * attached to this cgroup before it is removed, or that some other
+ * user task will 'mkdir' a child cgroup of this cgroup.  That's ok.
+ * The presumed 'rmdir' will fail quietly if this cgroup is no longer
+ * unused, and this cgroup will be reprieved from its death sentence,
+ * to continue to serve a useful existence.  Next time it's released,
+ * we will get notified again, if it still has 'notify_on_release' set.
+ *
+ * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
+ * means only wait until the task is successfully execve()'d.  The
+ * separate release agent task is forked by call_usermodehelper(),
+ * then control in this thread returns here, without waiting for the
+ * release agent task.  We don't bother to wait because the caller of
+ * this routine has no use for the exit status of the release agent
+ * task, so no sense holding our caller up for that.
+ */
+void cgroup1_release_agent(struct work_struct *work)
+{
+       struct cgroup *cgrp =
+               container_of(work, struct cgroup, release_agent_work);
+       char *pathbuf = NULL, *agentbuf = NULL;
+       char *argv[3], *envp[3];
+       int ret;
+
+       mutex_lock(&cgroup_mutex);
+
+       pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+       agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+       if (!pathbuf || !agentbuf)
+               goto out;
+
+       spin_lock_irq(&css_set_lock);
+       ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+       spin_unlock_irq(&css_set_lock);
+       if (ret < 0 || ret >= PATH_MAX)
+               goto out;
+
+       argv[0] = agentbuf;
+       argv[1] = pathbuf;
+       argv[2] = NULL;
+
+       /* minimal command environment */
+       envp[0] = "HOME=/";
+       envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+       envp[2] = NULL;
+
+       mutex_unlock(&cgroup_mutex);
+       call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+       goto out_free;
+out:
+       mutex_unlock(&cgroup_mutex);
+out_free:
+       kfree(agentbuf);
+       kfree(pathbuf);
+}
+
+/*
+ * cgroup_rename - Only allow simple rename of directories in place.
+ */
+static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
+                         const char *new_name_str)
+{
+       struct cgroup *cgrp = kn->priv;
+       int ret;
+
+       if (kernfs_type(kn) != KERNFS_DIR)
+               return -ENOTDIR;
+       if (kn->parent != new_parent)
+               return -EIO;
+
+       /*
+        * We're gonna grab cgroup_mutex which nests outside kernfs
+        * active_ref.  kernfs_rename() doesn't require active_ref
+        * protection.  Break them before grabbing cgroup_mutex.
+        */
+       kernfs_break_active_protection(new_parent);
+       kernfs_break_active_protection(kn);
+
+       mutex_lock(&cgroup_mutex);
+
+       ret = kernfs_rename(kn, new_parent, new_name_str);
+       if (!ret)
+               trace_cgroup_rename(cgrp);
+
+       mutex_unlock(&cgroup_mutex);
+
+       kernfs_unbreak_active_protection(kn);
+       kernfs_unbreak_active_protection(new_parent);
+       return ret;
+}
+
+static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
+{
+       struct cgroup_root *root = cgroup_root_from_kf(kf_root);
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       for_each_subsys(ss, ssid)
+               if (root->subsys_mask & (1 << ssid))
+                       seq_show_option(seq, ss->legacy_name, NULL);
+       if (root->flags & CGRP_ROOT_NOPREFIX)
+               seq_puts(seq, ",noprefix");
+       if (root->flags & CGRP_ROOT_XATTR)
+               seq_puts(seq, ",xattr");
+
+       spin_lock(&release_agent_path_lock);
+       if (strlen(root->release_agent_path))
+               seq_show_option(seq, "release_agent",
+                               root->release_agent_path);
+       spin_unlock(&release_agent_path_lock);
+
+       if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
+               seq_puts(seq, ",clone_children");
+       if (strlen(root->name))
+               seq_show_option(seq, "name", root->name);
+       return 0;
+}
+
+static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
+{
+       char *token, *o = data;
+       bool all_ss = false, one_ss = false;
+       u16 mask = U16_MAX;
+       struct cgroup_subsys *ss;
+       int nr_opts = 0;
+       int i;
+
+#ifdef CONFIG_CPUSETS
+       mask = ~((u16)1 << cpuset_cgrp_id);
+#endif
+
+       memset(opts, 0, sizeof(*opts));
+
+       while ((token = strsep(&o, ",")) != NULL) {
+               nr_opts++;
+
+               if (!*token)
+                       return -EINVAL;
+               if (!strcmp(token, "none")) {
+                       /* Explicitly have no subsystems */
+                       opts->none = true;
+                       continue;
+               }
+               if (!strcmp(token, "all")) {
+                       /* Mutually exclusive option 'all' + subsystem name */
+                       if (one_ss)
+                               return -EINVAL;
+                       all_ss = true;
+                       continue;
+               }
+               if (!strcmp(token, "noprefix")) {
+                       opts->flags |= CGRP_ROOT_NOPREFIX;
+                       continue;
+               }
+               if (!strcmp(token, "clone_children")) {
+                       opts->cpuset_clone_children = true;
+                       continue;
+               }
+               if (!strcmp(token, "xattr")) {
+                       opts->flags |= CGRP_ROOT_XATTR;
+                       continue;
+               }
+               if (!strncmp(token, "release_agent=", 14)) {
+                       /* Specifying two release agents is forbidden */
+                       if (opts->release_agent)
+                               return -EINVAL;
+                       opts->release_agent =
+                               kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
+                       if (!opts->release_agent)
+                               return -ENOMEM;
+                       continue;
+               }
+               if (!strncmp(token, "name=", 5)) {
+                       const char *name = token + 5;
+                       /* Can't specify an empty name */
+                       if (!strlen(name))
+                               return -EINVAL;
+                       /* Must match [\w.-]+ */
+                       for (i = 0; i < strlen(name); i++) {
+                               char c = name[i];
+                               if (isalnum(c))
+                                       continue;
+                               if ((c == '.') || (c == '-') || (c == '_'))
+                                       continue;
+                               return -EINVAL;
+                       }
+                       /* Specifying two names is forbidden */
+                       if (opts->name)
+                               return -EINVAL;
+                       opts->name = kstrndup(name,
+                                             MAX_CGROUP_ROOT_NAMELEN - 1,
+                                             GFP_KERNEL);
+                       if (!opts->name)
+                               return -ENOMEM;
+
+                       continue;
+               }
+
+               for_each_subsys(ss, i) {
+                       if (strcmp(token, ss->legacy_name))
+                               continue;
+                       if (!cgroup_ssid_enabled(i))
+                               continue;
+                       if (cgroup1_ssid_disabled(i))
+                               continue;
+
+                       /* Mutually exclusive option 'all' + subsystem name */
+                       if (all_ss)
+                               return -EINVAL;
+                       opts->subsys_mask |= (1 << i);
+                       one_ss = true;
+
+                       break;
+               }
+               if (i == CGROUP_SUBSYS_COUNT)
+                       return -ENOENT;
+       }
+
+       /*
+        * If the 'all' option was specified select all the subsystems,
+        * otherwise if 'none', 'name=' and a subsystem name options were
+        * not specified, let's default to 'all'
+        */
+       if (all_ss || (!one_ss && !opts->none && !opts->name))
+               for_each_subsys(ss, i)
+                       if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i))
+                               opts->subsys_mask |= (1 << i);
+
+       /*
+        * We either have to specify by name or by subsystems. (So all
+        * empty hierarchies must have a name).
+        */
+       if (!opts->subsys_mask && !opts->name)
+               return -EINVAL;
+
+       /*
+        * Option noprefix was introduced just for backward compatibility
+        * with the old cpuset, so we allow noprefix only if mounting just
+        * the cpuset subsystem.
+        */
+       if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
+               return -EINVAL;
+
+       /* Can't specify "none" and some subsystems */
+       if (opts->subsys_mask && opts->none)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data)
+{
+       int ret = 0;
+       struct cgroup_root *root = cgroup_root_from_kf(kf_root);
+       struct cgroup_sb_opts opts;
+       u16 added_mask, removed_mask;
+
+       cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
+
+       /* See what subsystems are wanted */
+       ret = parse_cgroupfs_options(data, &opts);
+       if (ret)
+               goto out_unlock;
+
+       if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
+               pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
+                       task_tgid_nr(current), current->comm);
+
+       added_mask = opts.subsys_mask & ~root->subsys_mask;
+       removed_mask = root->subsys_mask & ~opts.subsys_mask;
+
+       /* Don't allow flags or name to change at remount */
+       if ((opts.flags ^ root->flags) ||
+           (opts.name && strcmp(opts.name, root->name))) {
+               pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
+                      opts.flags, opts.name ?: "", root->flags, root->name);
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       /* remounting is not allowed for populated hierarchies */
+       if (!list_empty(&root->cgrp.self.children)) {
+               ret = -EBUSY;
+               goto out_unlock;
+       }
+
+       ret = rebind_subsystems(root, added_mask);
+       if (ret)
+               goto out_unlock;
+
+       WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
+
+       if (opts.release_agent) {
+               spin_lock(&release_agent_path_lock);
+               strcpy(root->release_agent_path, opts.release_agent);
+               spin_unlock(&release_agent_path_lock);
+       }
+
+       trace_cgroup_remount(root);
+
+ out_unlock:
+       kfree(opts.release_agent);
+       kfree(opts.name);
+       mutex_unlock(&cgroup_mutex);
+       return ret;
+}
+
+struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
+       .rename                 = cgroup1_rename,
+       .show_options           = cgroup1_show_options,
+       .remount_fs             = cgroup1_remount,
+       .mkdir                  = cgroup_mkdir,
+       .rmdir                  = cgroup_rmdir,
+       .show_path              = cgroup_show_path,
+};
+
+struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
+                            void *data, unsigned long magic,
+                            struct cgroup_namespace *ns)
+{
+       struct super_block *pinned_sb = NULL;
+       struct cgroup_sb_opts opts;
+       struct cgroup_root *root;
+       struct cgroup_subsys *ss;
+       struct dentry *dentry;
+       int i, ret;
+
+       cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
+
+       /* First find the desired set of subsystems */
+       ret = parse_cgroupfs_options(data, &opts);
+       if (ret)
+               goto out_unlock;
+
+       /*
+        * Destruction of cgroup root is asynchronous, so subsystems may
+        * still be dying after the previous unmount.  Let's drain the
+        * dying subsystems.  We just need to ensure that the ones
+        * unmounted previously finish dying and don't care about new ones
+        * starting.  Testing ref liveliness is good enough.
+        */
+       for_each_subsys(ss, i) {
+               if (!(opts.subsys_mask & (1 << i)) ||
+                   ss->root == &cgrp_dfl_root)
+                       continue;
+
+               if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
+                       mutex_unlock(&cgroup_mutex);
+                       msleep(10);
+                       ret = restart_syscall();
+                       goto out_free;
+               }
+               cgroup_put(&ss->root->cgrp);
+       }
+
+       for_each_root(root) {
+               bool name_match = false;
+
+               if (root == &cgrp_dfl_root)
+                       continue;
+
+               /*
+                * If we asked for a name then it must match.  Also, if
+                * name matches but sybsys_mask doesn't, we should fail.
+                * Remember whether name matched.
+                */
+               if (opts.name) {
+                       if (strcmp(opts.name, root->name))
+                               continue;
+                       name_match = true;
+               }
+
+               /*
+                * If we asked for subsystems (or explicitly for no
+                * subsystems) then they must match.
+                */
+               if ((opts.subsys_mask || opts.none) &&
+                   (opts.subsys_mask != root->subsys_mask)) {
+                       if (!name_match)
+                               continue;
+                       ret = -EBUSY;
+                       goto out_unlock;
+               }
+
+               if (root->flags ^ opts.flags)
+                       pr_warn("new mount options do not match the existing superblock, will be ignored\n");
+
+               /*
+                * We want to reuse @root whose lifetime is governed by its
+                * ->cgrp.  Let's check whether @root is alive and keep it
+                * that way.  As cgroup_kill_sb() can happen anytime, we
+                * want to block it by pinning the sb so that @root doesn't
+                * get killed before mount is complete.
+                *
+                * With the sb pinned, tryget_live can reliably indicate
+                * whether @root can be reused.  If it's being killed,
+                * drain it.  We can use wait_queue for the wait but this
+                * path is super cold.  Let's just sleep a bit and retry.
+                */
+               pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
+               if (IS_ERR(pinned_sb) ||
+                   !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+                       mutex_unlock(&cgroup_mutex);
+                       if (!IS_ERR_OR_NULL(pinned_sb))
+                               deactivate_super(pinned_sb);
+                       msleep(10);
+                       ret = restart_syscall();
+                       goto out_free;
+               }
+
+               ret = 0;
+               goto out_unlock;
+       }
+
+       /*
+        * No such thing, create a new one.  name= matching without subsys
+        * specification is allowed for already existing hierarchies but we
+        * can't create new one without subsys specification.
+        */
+       if (!opts.subsys_mask && !opts.none) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       /* Hierarchies may only be created in the initial cgroup namespace. */
+       if (ns != &init_cgroup_ns) {
+               ret = -EPERM;
+               goto out_unlock;
+       }
+
+       root = kzalloc(sizeof(*root), GFP_KERNEL);
+       if (!root) {
+               ret = -ENOMEM;
+               goto out_unlock;
+       }
+
+       init_cgroup_root(root, &opts);
+
+       ret = cgroup_setup_root(root, opts.subsys_mask);
+       if (ret)
+               cgroup_free_root(root);
+
+out_unlock:
+       mutex_unlock(&cgroup_mutex);
+out_free:
+       kfree(opts.release_agent);
+       kfree(opts.name);
+
+       if (ret)
+               return ERR_PTR(ret);
+
+       dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
+                                CGROUP_SUPER_MAGIC, ns);
+
+       /*
+        * If @pinned_sb, we're reusing an existing root and holding an
+        * extra ref on its sb.  Mount is complete.  Put the extra ref.
+        */
+       if (pinned_sb)
+               deactivate_super(pinned_sb);
+
+       return dentry;
+}
+
+static int __init cgroup1_wq_init(void)
+{
+       /*
+        * Used to destroy pidlists and separate to serve as flush domain.
+        * Cap @max_active to 1 too.
+        */
+       cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
+                                                   0, 1);
+       BUG_ON(!cgroup_pidlist_destroy_wq);
+       return 0;
+}
+core_initcall(cgroup1_wq_init);
+
+static int __init cgroup_no_v1(char *str)
+{
+       struct cgroup_subsys *ss;
+       char *token;
+       int i;
+
+       while ((token = strsep(&str, ",")) != NULL) {
+               if (!*token)
+                       continue;
+
+               if (!strcmp(token, "all")) {
+                       cgroup_no_v1_mask = U16_MAX;
+                       break;
+               }
+
+               for_each_subsys(ss, i) {
+                       if (strcmp(token, ss->name) &&
+                           strcmp(token, ss->legacy_name))
+                               continue;
+
+                       cgroup_no_v1_mask |= 1 << i;
+               }
+       }
+       return 1;
+}
+__setup("cgroup_no_v1=", cgroup_no_v1);
+
+
+#ifdef CONFIG_CGROUP_DEBUG
+static struct cgroup_subsys_state *
+debug_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+       struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+       if (!css)
+               return ERR_PTR(-ENOMEM);
+
+       return css;
+}
+
+static void debug_css_free(struct cgroup_subsys_state *css)
+{
+       kfree(css);
+}
+
+static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
+                               struct cftype *cft)
+{
+       return cgroup_task_count(css->cgroup);
+}
+
+static u64 current_css_set_read(struct cgroup_subsys_state *css,
+                               struct cftype *cft)
+{
+       return (u64)(unsigned long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
+                                        struct cftype *cft)
+{
+       u64 count;
+
+       rcu_read_lock();
+       count = atomic_read(&task_css_set(current)->refcount);
+       rcu_read_unlock();
+       return count;
+}
+
+static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
+{
+       struct cgrp_cset_link *link;
+       struct css_set *cset;
+       char *name_buf;
+
+       name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
+       if (!name_buf)
+               return -ENOMEM;
+
+       spin_lock_irq(&css_set_lock);
+       rcu_read_lock();
+       cset = rcu_dereference(current->cgroups);
+       list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
+               struct cgroup *c = link->cgrp;
+
+               cgroup_name(c, name_buf, NAME_MAX + 1);
+               seq_printf(seq, "Root %d group %s\n",
+                          c->root->hierarchy_id, name_buf);
+       }
+       rcu_read_unlock();
+       spin_unlock_irq(&css_set_lock);
+       kfree(name_buf);
+       return 0;
+}
+
+#define MAX_TASKS_SHOWN_PER_CSS 25
+static int cgroup_css_links_read(struct seq_file *seq, void *v)
+{
+       struct cgroup_subsys_state *css = seq_css(seq);
+       struct cgrp_cset_link *link;
+
+       spin_lock_irq(&css_set_lock);
+       list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
+               struct css_set *cset = link->cset;
+               struct task_struct *task;
+               int count = 0;
+
+               seq_printf(seq, "css_set %p\n", cset);
+
+               list_for_each_entry(task, &cset->tasks, cg_list) {
+                       if (count++ > MAX_TASKS_SHOWN_PER_CSS)
+                               goto overflow;
+                       seq_printf(seq, "  task %d\n", task_pid_vnr(task));
+               }
+
+               list_for_each_entry(task, &cset->mg_tasks, cg_list) {
+                       if (count++ > MAX_TASKS_SHOWN_PER_CSS)
+                               goto overflow;
+                       seq_printf(seq, "  task %d\n", task_pid_vnr(task));
+               }
+               continue;
+       overflow:
+               seq_puts(seq, "  ...\n");
+       }
+       spin_unlock_irq(&css_set_lock);
+       return 0;
+}
+
+static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       return (!cgroup_is_populated(css->cgroup) &&
+               !css_has_online_children(&css->cgroup->self));
+}
+
+static struct cftype debug_files[] =  {
+       {
+               .name = "taskcount",
+               .read_u64 = debug_taskcount_read,
+       },
+
+       {
+               .name = "current_css_set",
+               .read_u64 = current_css_set_read,
+       },
+
+       {
+               .name = "current_css_set_refcount",
+               .read_u64 = current_css_set_refcount_read,
+       },
+
+       {
+               .name = "current_css_set_cg_links",
+               .seq_show = current_css_set_cg_links_read,
+       },
+
+       {
+               .name = "cgroup_css_links",
+               .seq_show = cgroup_css_links_read,
+       },
+
+       {
+               .name = "releasable",
+               .read_u64 = releasable_read,
+       },
+
+       { }     /* terminate */
+};
+
+struct cgroup_subsys debug_cgrp_subsys = {
+       .css_alloc = debug_css_alloc,
+       .css_free = debug_css_free,
+       .legacy_cftypes = debug_files,
+};
+#endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
new file mode 100644 (file)
index 0000000..e8f87bf
--- /dev/null
@@ -0,0 +1,5067 @@
+/*
+ *  Generic process-grouping system.
+ *
+ *  Based originally on the cpuset system, extracted by Paul Menage
+ *  Copyright (C) 2006 Google, Inc
+ *
+ *  Notifications support
+ *  Copyright (C) 2009 Nokia Corporation
+ *  Author: Kirill A. Shutemov
+ *
+ *  Copyright notices from the original cpuset code:
+ *  --------------------------------------------------
+ *  Copyright (C) 2003 BULL SA.
+ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *
+ *  Portions derived from Patrick Mochel's sysfs code.
+ *  sysfs is Copyright (c) 2001-3 Patrick Mochel
+ *
+ *  2003-10-10 Written by Simon Derr.
+ *  2003-10-22 Updates by Stephen Hemminger.
+ *  2004 May-July Rework by Paul Jackson.
+ *  ---------------------------------------------------
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "cgroup-internal.h"
+
+#include <linux/cred.h>
+#include <linux/errno.h>
+#include <linux/init_task.h>
+#include <linux/kernel.h>
+#include <linux/magic.h>
+#include <linux/mutex.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/string.h>
+#include <linux/hashtable.h>
+#include <linux/idr.h>
+#include <linux/kthread.h>
+#include <linux/atomic.h>
+#include <linux/cpuset.h>
+#include <linux/proc_ns.h>
+#include <linux/nsproxy.h>
+#include <linux/file.h>
+#include <net/sock.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/cgroup.h>
+
+#define CGROUP_FILE_NAME_MAX           (MAX_CGROUP_TYPE_NAMELEN +      \
+                                        MAX_CFTYPE_NAME + 2)
+
+/*
+ * cgroup_mutex is the master lock.  Any modification to cgroup or its
+ * hierarchy must be performed while holding it.
+ *
+ * css_set_lock protects task->cgroups pointer, the list of css_set
+ * objects, and the chain of tasks off each css_set.
+ *
+ * These locks are exported if CONFIG_PROVE_RCU so that accessors in
+ * cgroup.h can use them for lockdep annotations.
+ */
+DEFINE_MUTEX(cgroup_mutex);
+DEFINE_SPINLOCK(css_set_lock);
+
+#ifdef CONFIG_PROVE_RCU
+EXPORT_SYMBOL_GPL(cgroup_mutex);
+EXPORT_SYMBOL_GPL(css_set_lock);
+#endif
+
+/*
+ * Protects cgroup_idr and css_idr so that IDs can be released without
+ * grabbing cgroup_mutex.
+ */
+static DEFINE_SPINLOCK(cgroup_idr_lock);
+
+/*
+ * Protects cgroup_file->kn for !self csses.  It synchronizes notifications
+ * against file removal/re-creation across css hiding.
+ */
+static DEFINE_SPINLOCK(cgroup_file_kn_lock);
+
+struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+
+#define cgroup_assert_mutex_or_rcu_locked()                            \
+       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
+                          !lockdep_is_held(&cgroup_mutex),             \
+                          "cgroup_mutex or RCU read lock required");
+
+/*
+ * cgroup destruction makes heavy use of work items and there can be a lot
+ * of concurrent destructions.  Use a separate workqueue so that cgroup
+ * destruction work items don't end up filling up max_active of system_wq
+ * which may lead to deadlock.
+ */
+static struct workqueue_struct *cgroup_destroy_wq;
+
+/* generate an array of cgroup subsystem pointers */
+#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
+struct cgroup_subsys *cgroup_subsys[] = {
+#include <linux/cgroup_subsys.h>
+};
+#undef SUBSYS
+
+/* array of cgroup subsystem names */
+#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
+static const char *cgroup_subsys_name[] = {
+#include <linux/cgroup_subsys.h>
+};
+#undef SUBSYS
+
+/* array of static_keys for cgroup_subsys_enabled() and cgroup_subsys_on_dfl() */
+#define SUBSYS(_x)                                                             \
+       DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key);                 \
+       DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key);                  \
+       EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key);                      \
+       EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
+static struct static_key_true *cgroup_subsys_enabled_key[] = {
+#include <linux/cgroup_subsys.h>
+};
+#undef SUBSYS
+
+#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
+static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
+#include <linux/cgroup_subsys.h>
+};
+#undef SUBSYS
+
+/*
+ * The default hierarchy, reserved for the subsystems that are otherwise
+ * unattached - it never has more than a single cgroup, and all tasks are
+ * part of that cgroup.
+ */
+struct cgroup_root cgrp_dfl_root;
+EXPORT_SYMBOL_GPL(cgrp_dfl_root);
+
+/*
+ * The default hierarchy always exists but is hidden until mounted for the
+ * first time.  This is for backward compatibility.
+ */
+static bool cgrp_dfl_visible;
+
+/* some controllers are not supported in the default hierarchy */
+static u16 cgrp_dfl_inhibit_ss_mask;
+
+/* some controllers are implicitly enabled on the default hierarchy */
+static u16 cgrp_dfl_implicit_ss_mask;
+
+/* The list of hierarchy roots */
+LIST_HEAD(cgroup_roots);
+static int cgroup_root_count;
+
+/* hierarchy ID allocation and mapping, protected by cgroup_mutex */
+static DEFINE_IDR(cgroup_hierarchy_idr);
+
+/*
+ * Assign a monotonically increasing serial number to csses.  It guarantees
+ * cgroups with bigger numbers are newer than those with smaller numbers.
+ * Also, as csses are always appended to the parent's ->children list, it
+ * guarantees that sibling csses are always sorted in the ascending serial
+ * number order on the list.  Protected by cgroup_mutex.
+ */
+static u64 css_serial_nr_next = 1;
+
+/*
+ * These bitmasks identify subsystems with specific features to avoid
+ * having to do iterative checks repeatedly.
+ */
+static u16 have_fork_callback __read_mostly;
+static u16 have_exit_callback __read_mostly;
+static u16 have_free_callback __read_mostly;
+static u16 have_canfork_callback __read_mostly;
+
+/* cgroup namespace for init task */
+struct cgroup_namespace init_cgroup_ns = {
+       .count          = { .counter = 2, },
+       .user_ns        = &init_user_ns,
+       .ns.ops         = &cgroupns_operations,
+       .ns.inum        = PROC_CGROUP_INIT_INO,
+       .root_cset      = &init_css_set,
+};
+
+static struct file_system_type cgroup2_fs_type;
+static struct cftype cgroup_base_files[];
+
+static int cgroup_apply_control(struct cgroup *cgrp);
+static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
+static void css_task_iter_advance(struct css_task_iter *it);
+static int cgroup_destroy_locked(struct cgroup *cgrp);
+static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
+                                             struct cgroup_subsys *ss);
+static void css_release(struct percpu_ref *ref);
+static void kill_css(struct cgroup_subsys_state *css);
+static int cgroup_addrm_files(struct cgroup_subsys_state *css,
+                             struct cgroup *cgrp, struct cftype cfts[],
+                             bool is_add);
+
+/**
+ * cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID
+ * @ssid: subsys ID of interest
+ *
+ * cgroup_subsys_enabled() can only be used with literal subsys names which
+ * is fine for individual subsystems but unsuitable for cgroup core.  This
+ * is slower static_key_enabled() based test indexed by @ssid.
+ */
+bool cgroup_ssid_enabled(int ssid)
+{
+       if (CGROUP_SUBSYS_COUNT == 0)
+               return false;
+
+       return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
+}
+
+/**
+ * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
+ * @cgrp: the cgroup of interest
+ *
+ * The default hierarchy is the v2 interface of cgroup and this function
+ * can be used to test whether a cgroup is on the default hierarchy for
+ * cases where a subsystem should behave differnetly depending on the
+ * interface version.
+ *
+ * The set of behaviors which change on the default hierarchy are still
+ * being determined and the mount option is prefixed with __DEVEL__.
+ *
+ * List of changed behaviors:
+ *
+ * - Mount options "noprefix", "xattr", "clone_children", "release_agent"
+ *   and "name" are disallowed.
+ *
+ * - When mounting an existing superblock, mount options should match.
+ *
+ * - Remount is disallowed.
+ *
+ * - rename(2) is disallowed.
+ *
+ * - "tasks" is removed.  Everything should be at process granularity.  Use
+ *   "cgroup.procs" instead.
+ *
+ * - "cgroup.procs" is not sorted.  pids will be unique unless they got
+ *   recycled inbetween reads.
+ *
+ * - "release_agent" and "notify_on_release" are removed.  Replacement
+ *   notification mechanism will be implemented.
+ *
+ * - "cgroup.clone_children" is removed.
+ *
+ * - "cgroup.subtree_populated" is available.  Its value is 0 if the cgroup
+ *   and its descendants contain no task; otherwise, 1.  The file also
+ *   generates kernfs notification which can be monitored through poll and
+ *   [di]notify when the value of the file changes.
+ *
+ * - cpuset: tasks will be kept in empty cpusets when hotplug happens and
+ *   take masks of ancestors with non-empty cpus/mems, instead of being
+ *   moved to an ancestor.
+ *
+ * - cpuset: a task can be moved into an empty cpuset, and again it takes
+ *   masks of ancestors.
+ *
+ * - memcg: use_hierarchy is on by default and the cgroup file for the flag
+ *   is not created.
+ *
+ * - blkcg: blk-throttle becomes properly hierarchical.
+ *
+ * - debug: disallowed on the default hierarchy.
+ */
+bool cgroup_on_dfl(const struct cgroup *cgrp)
+{
+       return cgrp->root == &cgrp_dfl_root;
+}
+
+/* IDR wrappers which synchronize using cgroup_idr_lock */
+static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
+                           gfp_t gfp_mask)
+{
+       int ret;
+
+       idr_preload(gfp_mask);
+       spin_lock_bh(&cgroup_idr_lock);
+       ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
+       spin_unlock_bh(&cgroup_idr_lock);
+       idr_preload_end();
+       return ret;
+}
+
+static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
+{
+       void *ret;
+
+       spin_lock_bh(&cgroup_idr_lock);
+       ret = idr_replace(idr, ptr, id);
+       spin_unlock_bh(&cgroup_idr_lock);
+       return ret;
+}
+
+static void cgroup_idr_remove(struct idr *idr, int id)
+{
+       spin_lock_bh(&cgroup_idr_lock);
+       idr_remove(idr, id);
+       spin_unlock_bh(&cgroup_idr_lock);
+}
+
+static struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+       struct cgroup_subsys_state *parent_css = cgrp->self.parent;
+
+       if (parent_css)
+               return container_of(parent_css, struct cgroup, self);
+       return NULL;
+}
+
+/* subsystems visibly enabled on a cgroup */
+static u16 cgroup_control(struct cgroup *cgrp)
+{
+       struct cgroup *parent = cgroup_parent(cgrp);
+       u16 root_ss_mask = cgrp->root->subsys_mask;
+
+       if (parent)
+               return parent->subtree_control;
+
+       if (cgroup_on_dfl(cgrp))
+               root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
+                                 cgrp_dfl_implicit_ss_mask);
+       return root_ss_mask;
+}
+
+/* subsystems enabled on a cgroup */
+static u16 cgroup_ss_mask(struct cgroup *cgrp)
+{
+       struct cgroup *parent = cgroup_parent(cgrp);
+
+       if (parent)
+               return parent->subtree_ss_mask;
+
+       return cgrp->root->subsys_mask;
+}
+
+/**
+ * cgroup_css - obtain a cgroup's css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
+ *
+ * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
+ * function must be called either under cgroup_mutex or rcu_read_lock() and
+ * the caller is responsible for pinning the returned css if it wants to
+ * keep accessing it outside the said locks.  This function may return
+ * %NULL if @cgrp doesn't have @subsys_id enabled.
+ */
+static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
+                                             struct cgroup_subsys *ss)
+{
+       if (ss)
+               return rcu_dereference_check(cgrp->subsys[ss->id],
+                                       lockdep_is_held(&cgroup_mutex));
+       else
+               return &cgrp->self;
+}
+
+/**
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
+ *
+ * Similar to cgroup_css() but returns the effective css, which is defined
+ * as the matching css of the nearest ancestor including self which has @ss
+ * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
+ * function is guaranteed to return non-NULL css.
+ */
+static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+                                               struct cgroup_subsys *ss)
+{
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (!ss)
+               return &cgrp->self;
+
+       /*
+        * This function is used while updating css associations and thus
+        * can't test the csses directly.  Test ss_mask.
+        */
+       while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
+               cgrp = cgroup_parent(cgrp);
+               if (!cgrp)
+                       return NULL;
+       }
+
+       return cgroup_css(cgrp, ss);
+}
+
+/**
+ * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest
+ *
+ * Find and get the effective css of @cgrp for @ss.  The effective css is
+ * defined as the matching css of the nearest ancestor including self which
+ * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
+ * the root css is returned, so this function always returns a valid css.
+ * The returned css must be put using css_put().
+ */
+struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
+                                            struct cgroup_subsys *ss)
+{
+       struct cgroup_subsys_state *css;
+
+       rcu_read_lock();
+
+       do {
+               css = cgroup_css(cgrp, ss);
+
+               if (css && css_tryget_online(css))
+                       goto out_unlock;
+               cgrp = cgroup_parent(cgrp);
+       } while (cgrp);
+
+       css = init_css_set.subsys[ss->id];
+       css_get(css);
+out_unlock:
+       rcu_read_unlock();
+       return css;
+}
+
+static void cgroup_get(struct cgroup *cgrp)
+{
+       WARN_ON_ONCE(cgroup_is_dead(cgrp));
+       css_get(&cgrp->self);
+}
+
+static bool cgroup_tryget(struct cgroup *cgrp)
+{
+       return css_tryget(&cgrp->self);
+}
+
+struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
+{
+       struct cgroup *cgrp = of->kn->parent->priv;
+       struct cftype *cft = of_cft(of);
+
+       /*
+        * This is open and unprotected implementation of cgroup_css().
+        * seq_css() is only called from a kernfs file operation which has
+        * an active reference on the file.  Because all the subsystem
+        * files are drained before a css is disassociated with a cgroup,
+        * the matching css from the cgroup's subsys table is guaranteed to
+        * be and stay valid until the enclosing operation is complete.
+        */
+       if (cft->ss)
+               return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
+       else
+               return &cgrp->self;
+}
+EXPORT_SYMBOL_GPL(of_css);
+
+/**
+ * for_each_css - iterate all css's of a cgroup
+ * @css: the iteration cursor
+ * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
+ * @cgrp: the target cgroup to iterate css's of
+ *
+ * Should be called under cgroup_[tree_]mutex.
+ */
+#define for_each_css(css, ssid, cgrp)                                  \
+       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)        \
+               if (!((css) = rcu_dereference_check(                    \
+                               (cgrp)->subsys[(ssid)],                 \
+                               lockdep_is_held(&cgroup_mutex)))) { }   \
+               else
+
+/**
+ * for_each_e_css - iterate all effective css's of a cgroup
+ * @css: the iteration cursor
+ * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
+ * @cgrp: the target cgroup to iterate css's of
+ *
+ * Should be called under cgroup_[tree_]mutex.
+ */
+#define for_each_e_css(css, ssid, cgrp)                                        \
+       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)        \
+               if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
+                       ;                                               \
+               else
+
+/**
+ * do_each_subsys_mask - filter for_each_subsys with a bitmask
+ * @ss: the iteration cursor
+ * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
+ * @ss_mask: the bitmask
+ *
+ * The block will only run for cases where the ssid-th bit (1 << ssid) of
+ * @ss_mask is set.
+ */
+#define do_each_subsys_mask(ss, ssid, ss_mask) do {                    \
+       unsigned long __ss_mask = (ss_mask);                            \
+       if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \
+               (ssid) = 0;                                             \
+               break;                                                  \
+       }                                                               \
+       for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) {       \
+               (ss) = cgroup_subsys[ssid];                             \
+               {
+
+#define while_each_subsys_mask()                                       \
+               }                                                       \
+       }                                                               \
+} while (false)
+
+/* iterate over child cgrps, lock should be held throughout iteration */
+#define cgroup_for_each_live_child(child, cgrp)                                \
+       list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
+               if (({ lockdep_assert_held(&cgroup_mutex);              \
+                      cgroup_is_dead(child); }))                       \
+                       ;                                               \
+               else
+
+/* walk live descendants in preorder */
+#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)         \
+       css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL))  \
+               if (({ lockdep_assert_held(&cgroup_mutex);              \
+                      (dsct) = (d_css)->cgroup;                        \
+                      cgroup_is_dead(dsct); }))                        \
+                       ;                                               \
+               else
+
+/* walk live descendants in postorder */
+#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp)                \
+       css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
+               if (({ lockdep_assert_held(&cgroup_mutex);              \
+                      (dsct) = (d_css)->cgroup;                        \
+                      cgroup_is_dead(dsct); }))                        \
+                       ;                                               \
+               else
+
+/*
+ * The default css_set - used by init and its children prior to any
+ * hierarchies being mounted. It contains a pointer to the root state
+ * for each subsystem. Also used to anchor the list of css_sets. Not
+ * reference-counted, to improve performance when child cgroups
+ * haven't been created.
+ */
+struct css_set init_css_set = {
+       .refcount               = ATOMIC_INIT(1),
+       .tasks                  = LIST_HEAD_INIT(init_css_set.tasks),
+       .mg_tasks               = LIST_HEAD_INIT(init_css_set.mg_tasks),
+       .task_iters             = LIST_HEAD_INIT(init_css_set.task_iters),
+       .cgrp_links             = LIST_HEAD_INIT(init_css_set.cgrp_links),
+       .mg_preload_node        = LIST_HEAD_INIT(init_css_set.mg_preload_node),
+       .mg_node                = LIST_HEAD_INIT(init_css_set.mg_node),
+};
+
+static int css_set_count       = 1;    /* 1 for init_css_set */
+
+/**
+ * css_set_populated - does a css_set contain any tasks?
+ * @cset: target css_set
+ */
+static bool css_set_populated(struct css_set *cset)
+{
+       lockdep_assert_held(&css_set_lock);
+
+       return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
+}
+
+/**
+ * cgroup_update_populated - updated populated count of a cgroup
+ * @cgrp: the target cgroup
+ * @populated: inc or dec populated count
+ *
+ * One of the css_sets associated with @cgrp is either getting its first
+ * task or losing the last.  Update @cgrp->populated_cnt accordingly.  The
+ * count is propagated towards root so that a given cgroup's populated_cnt
+ * is zero iff the cgroup and all its descendants don't contain any tasks.
+ *
+ * @cgrp's interface file "cgroup.populated" is zero if
+ * @cgrp->populated_cnt is zero and 1 otherwise.  When @cgrp->populated_cnt
+ * changes from or to zero, userland is notified that the content of the
+ * interface file has changed.  This can be used to detect when @cgrp and
+ * its descendants become populated or empty.
+ */
+static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
+{
+       lockdep_assert_held(&css_set_lock);
+
+       do {
+               bool trigger;
+
+               if (populated)
+                       trigger = !cgrp->populated_cnt++;
+               else
+                       trigger = !--cgrp->populated_cnt;
+
+               if (!trigger)
+                       break;
+
+               cgroup1_check_for_release(cgrp);
+               cgroup_file_notify(&cgrp->events_file);
+
+               cgrp = cgroup_parent(cgrp);
+       } while (cgrp);
+}
+
+/**
+ * css_set_update_populated - update populated state of a css_set
+ * @cset: target css_set
+ * @populated: whether @cset is populated or depopulated
+ *
+ * @cset is either getting the first task or losing the last.  Update the
+ * ->populated_cnt of all associated cgroups accordingly.
+ */
+static void css_set_update_populated(struct css_set *cset, bool populated)
+{
+       struct cgrp_cset_link *link;
+
+       lockdep_assert_held(&css_set_lock);
+
+       list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
+               cgroup_update_populated(link->cgrp, populated);
+}
+
+/**
+ * css_set_move_task - move a task from one css_set to another
+ * @task: task being moved
+ * @from_cset: css_set @task currently belongs to (may be NULL)
+ * @to_cset: new css_set @task is being moved to (may be NULL)
+ * @use_mg_tasks: move to @to_cset->mg_tasks instead of ->tasks
+ *
+ * Move @task from @from_cset to @to_cset.  If @task didn't belong to any
+ * css_set, @from_cset can be NULL.  If @task is being disassociated
+ * instead of moved, @to_cset can be NULL.
+ *
+ * This function automatically handles populated_cnt updates and
+ * css_task_iter adjustments but the caller is responsible for managing
+ * @from_cset and @to_cset's reference counts.
+ */
+static void css_set_move_task(struct task_struct *task,
+                             struct css_set *from_cset, struct css_set *to_cset,
+                             bool use_mg_tasks)
+{
+       lockdep_assert_held(&css_set_lock);
+
+       if (to_cset && !css_set_populated(to_cset))
+               css_set_update_populated(to_cset, true);
+
+       if (from_cset) {
+               struct css_task_iter *it, *pos;
+
+               WARN_ON_ONCE(list_empty(&task->cg_list));
+
+               /*
+                * @task is leaving, advance task iterators which are
+                * pointing to it so that they can resume at the next
+                * position.  Advancing an iterator might remove it from
+                * the list, use safe walk.  See css_task_iter_advance*()
+                * for details.
+                */
+               list_for_each_entry_safe(it, pos, &from_cset->task_iters,
+                                        iters_node)
+                       if (it->task_pos == &task->cg_list)
+                               css_task_iter_advance(it);
+
+               list_del_init(&task->cg_list);
+               if (!css_set_populated(from_cset))
+                       css_set_update_populated(from_cset, false);
+       } else {
+               WARN_ON_ONCE(!list_empty(&task->cg_list));
+       }
+
+       if (to_cset) {
+               /*
+                * We are synchronized through cgroup_threadgroup_rwsem
+                * against PF_EXITING setting such that we can't race
+                * against cgroup_exit() changing the css_set to
+                * init_css_set and dropping the old one.
+                */
+               WARN_ON_ONCE(task->flags & PF_EXITING);
+
+               rcu_assign_pointer(task->cgroups, to_cset);
+               list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
+                                                            &to_cset->tasks);
+       }
+}
+
+/*
+ * hash table for cgroup groups. This improves the performance to find
+ * an existing css_set. This hash doesn't (currently) take into
+ * account cgroups in empty hierarchies.
+ */
+#define CSS_SET_HASH_BITS      7
+static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
+
+static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
+{
+       unsigned long key = 0UL;
+       struct cgroup_subsys *ss;
+       int i;
+
+       for_each_subsys(ss, i)
+               key += (unsigned long)css[i];
+       key = (key >> 16) ^ key;
+
+       return key;
+}
+
+void put_css_set_locked(struct css_set *cset)
+{
+       struct cgrp_cset_link *link, *tmp_link;
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       lockdep_assert_held(&css_set_lock);
+
+       if (!atomic_dec_and_test(&cset->refcount))
+               return;
+
+       /* This css_set is dead. unlink it and release cgroup and css refs */
+       for_each_subsys(ss, ssid) {
+               list_del(&cset->e_cset_node[ssid]);
+               css_put(cset->subsys[ssid]);
+       }
+       hash_del(&cset->hlist);
+       css_set_count--;
+
+       list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
+               list_del(&link->cset_link);
+               list_del(&link->cgrp_link);
+               if (cgroup_parent(link->cgrp))
+                       cgroup_put(link->cgrp);
+               kfree(link);
+       }
+
+       kfree_rcu(cset, rcu_head);
+}
+
+/**
+ * compare_css_sets - helper function for find_existing_css_set().
+ * @cset: candidate css_set being tested
+ * @old_cset: existing css_set for a task
+ * @new_cgrp: cgroup that's being entered by the task
+ * @template: desired set of css pointers in css_set (pre-calculated)
+ *
+ * Returns true if "cset" matches "old_cset" except for the hierarchy
+ * which "new_cgrp" belongs to, for which it should match "new_cgrp".
+ */
+static bool compare_css_sets(struct css_set *cset,
+                            struct css_set *old_cset,
+                            struct cgroup *new_cgrp,
+                            struct cgroup_subsys_state *template[])
+{
+       struct list_head *l1, *l2;
+
+       /*
+        * On the default hierarchy, there can be csets which are
+        * associated with the same set of cgroups but different csses.
+        * Let's first ensure that csses match.
+        */
+       if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
+               return false;
+
+       /*
+        * Compare cgroup pointers in order to distinguish between
+        * different cgroups in hierarchies.  As different cgroups may
+        * share the same effective css, this comparison is always
+        * necessary.
+        */
+       l1 = &cset->cgrp_links;
+       l2 = &old_cset->cgrp_links;
+       while (1) {
+               struct cgrp_cset_link *link1, *link2;
+               struct cgroup *cgrp1, *cgrp2;
+
+               l1 = l1->next;
+               l2 = l2->next;
+               /* See if we reached the end - both lists are equal length. */
+               if (l1 == &cset->cgrp_links) {
+                       BUG_ON(l2 != &old_cset->cgrp_links);
+                       break;
+               } else {
+                       BUG_ON(l2 == &old_cset->cgrp_links);
+               }
+               /* Locate the cgroups associated with these links. */
+               link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
+               link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
+               cgrp1 = link1->cgrp;
+               cgrp2 = link2->cgrp;
+               /* Hierarchies should be linked in the same order. */
+               BUG_ON(cgrp1->root != cgrp2->root);
+
+               /*
+                * If this hierarchy is the hierarchy of the cgroup
+                * that's changing, then we need to check that this
+                * css_set points to the new cgroup; if it's any other
+                * hierarchy, then this css_set should point to the
+                * same cgroup as the old css_set.
+                */
+               if (cgrp1->root == new_cgrp->root) {
+                       if (cgrp1 != new_cgrp)
+                               return false;
+               } else {
+                       if (cgrp1 != cgrp2)
+                               return false;
+               }
+       }
+       return true;
+}
+
+/**
+ * find_existing_css_set - init css array and find the matching css_set
+ * @old_cset: the css_set that we're using before the cgroup transition
+ * @cgrp: the cgroup that we're moving into
+ * @template: out param for the new set of csses, should be clear on entry
+ */
+static struct css_set *find_existing_css_set(struct css_set *old_cset,
+                                       struct cgroup *cgrp,
+                                       struct cgroup_subsys_state *template[])
+{
+       struct cgroup_root *root = cgrp->root;
+       struct cgroup_subsys *ss;
+       struct css_set *cset;
+       unsigned long key;
+       int i;
+
+       /*
+        * Build the set of subsystem state objects that we want to see in the
+        * new css_set. while subsystems can change globally, the entries here
+        * won't change, so no need for locking.
+        */
+       for_each_subsys(ss, i) {
+               if (root->subsys_mask & (1UL << i)) {
+                       /*
+                        * @ss is in this hierarchy, so we want the
+                        * effective css from @cgrp.
+                        */
+                       template[i] = cgroup_e_css(cgrp, ss);
+               } else {
+                       /*
+                        * @ss is not in this hierarchy, so we don't want
+                        * to change the css.
+                        */
+                       template[i] = old_cset->subsys[i];
+               }
+       }
+
+       key = css_set_hash(template);
+       hash_for_each_possible(css_set_table, cset, hlist, key) {
+               if (!compare_css_sets(cset, old_cset, cgrp, template))
+                       continue;
+
+               /* This css_set matches what we need */
+               return cset;
+       }
+
+       /* No existing cgroup group matched */
+       return NULL;
+}
+
+static void free_cgrp_cset_links(struct list_head *links_to_free)
+{
+       struct cgrp_cset_link *link, *tmp_link;
+
+       list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
+               list_del(&link->cset_link);
+               kfree(link);
+       }
+}
+
+/**
+ * allocate_cgrp_cset_links - allocate cgrp_cset_links
+ * @count: the number of links to allocate
+ * @tmp_links: list_head the allocated links are put on
+ *
+ * Allocate @count cgrp_cset_link structures and chain them on @tmp_links
+ * through ->cset_link.  Returns 0 on success or -errno.
+ */
+static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
+{
+       struct cgrp_cset_link *link;
+       int i;
+
+       INIT_LIST_HEAD(tmp_links);
+
+       for (i = 0; i < count; i++) {
+               link = kzalloc(sizeof(*link), GFP_KERNEL);
+               if (!link) {
+                       free_cgrp_cset_links(tmp_links);
+                       return -ENOMEM;
+               }
+               list_add(&link->cset_link, tmp_links);
+       }
+       return 0;
+}
+
+/**
+ * link_css_set - a helper function to link a css_set to a cgroup
+ * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links()
+ * @cset: the css_set to be linked
+ * @cgrp: the destination cgroup
+ */
+static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
+                        struct cgroup *cgrp)
+{
+       struct cgrp_cset_link *link;
+
+       BUG_ON(list_empty(tmp_links));
+
+       if (cgroup_on_dfl(cgrp))
+               cset->dfl_cgrp = cgrp;
+
+       link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
+       link->cset = cset;
+       link->cgrp = cgrp;
+
+       /*
+        * Always add links to the tail of the lists so that the lists are
+        * in choronological order.
+        */
+       list_move_tail(&link->cset_link, &cgrp->cset_links);
+       list_add_tail(&link->cgrp_link, &cset->cgrp_links);
+
+       if (cgroup_parent(cgrp))
+               cgroup_get(cgrp);
+}
+
+/**
+ * find_css_set - return a new css_set with one cgroup updated
+ * @old_cset: the baseline css_set
+ * @cgrp: the cgroup to be updated
+ *
+ * Return a new css_set that's equivalent to @old_cset, but with @cgrp
+ * substituted into the appropriate hierarchy.
+ */
+static struct css_set *find_css_set(struct css_set *old_cset,
+                                   struct cgroup *cgrp)
+{
+       struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
+       struct css_set *cset;
+       struct list_head tmp_links;
+       struct cgrp_cset_link *link;
+       struct cgroup_subsys *ss;
+       unsigned long key;
+       int ssid;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       /* First see if we already have a cgroup group that matches
+        * the desired set */
+       spin_lock_irq(&css_set_lock);
+       cset = find_existing_css_set(old_cset, cgrp, template);
+       if (cset)
+               get_css_set(cset);
+       spin_unlock_irq(&css_set_lock);
+
+       if (cset)
+               return cset;
+
+       cset = kzalloc(sizeof(*cset), GFP_KERNEL);
+       if (!cset)
+               return NULL;
+
+       /* Allocate all the cgrp_cset_link objects that we'll need */
+       if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
+               kfree(cset);
+               return NULL;
+       }
+
+       atomic_set(&cset->refcount, 1);
+       INIT_LIST_HEAD(&cset->tasks);
+       INIT_LIST_HEAD(&cset->mg_tasks);
+       INIT_LIST_HEAD(&cset->task_iters);
+       INIT_HLIST_NODE(&cset->hlist);
+       INIT_LIST_HEAD(&cset->cgrp_links);
+       INIT_LIST_HEAD(&cset->mg_preload_node);
+       INIT_LIST_HEAD(&cset->mg_node);
+
+       /* Copy the set of subsystem state objects generated in
+        * find_existing_css_set() */
+       memcpy(cset->subsys, template, sizeof(cset->subsys));
+
+       spin_lock_irq(&css_set_lock);
+       /* Add reference counts and links from the new css_set. */
+       list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
+               struct cgroup *c = link->cgrp;
+
+               if (c->root == cgrp->root)
+                       c = cgrp;
+               link_css_set(&tmp_links, cset, c);
+       }
+
+       BUG_ON(!list_empty(&tmp_links));
+
+       css_set_count++;
+
+       /* Add @cset to the hash table */
+       key = css_set_hash(cset->subsys);
+       hash_add(css_set_table, &cset->hlist, key);
+
+       for_each_subsys(ss, ssid) {
+               struct cgroup_subsys_state *css = cset->subsys[ssid];
+
+               list_add_tail(&cset->e_cset_node[ssid],
+                             &css->cgroup->e_csets[ssid]);
+               css_get(css);
+       }
+
+       spin_unlock_irq(&css_set_lock);
+
+       return cset;
+}
+
+struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
+{
+       struct cgroup *root_cgrp = kf_root->kn->priv;
+
+       return root_cgrp->root;
+}
+
+static int cgroup_init_root_id(struct cgroup_root *root)
+{
+       int id;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
+       if (id < 0)
+               return id;
+
+       root->hierarchy_id = id;
+       return 0;
+}
+
+static void cgroup_exit_root_id(struct cgroup_root *root)
+{
+       lockdep_assert_held(&cgroup_mutex);
+
+       idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
+}
+
+void cgroup_free_root(struct cgroup_root *root)
+{
+       if (root) {
+               idr_destroy(&root->cgroup_idr);
+               kfree(root);
+       }
+}
+
+static void cgroup_destroy_root(struct cgroup_root *root)
+{
+       struct cgroup *cgrp = &root->cgrp;
+       struct cgrp_cset_link *link, *tmp_link;
+
+       trace_cgroup_destroy_root(root);
+
+       cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
+
+       BUG_ON(atomic_read(&root->nr_cgrps));
+       BUG_ON(!list_empty(&cgrp->self.children));
+
+       /* Rebind all subsystems back to the default hierarchy */
+       WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
+
+       /*
+        * Release all the links from cset_links to this hierarchy's
+        * root cgroup
+        */
+       spin_lock_irq(&css_set_lock);
+
+       list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
+               list_del(&link->cset_link);
+               list_del(&link->cgrp_link);
+               kfree(link);
+       }
+
+       spin_unlock_irq(&css_set_lock);
+
+       if (!list_empty(&root->root_list)) {
+               list_del(&root->root_list);
+               cgroup_root_count--;
+       }
+
+       cgroup_exit_root_id(root);
+
+       mutex_unlock(&cgroup_mutex);
+
+       kernfs_destroy_root(root->kf_root);
+       cgroup_free_root(root);
+}
+
+/*
+ * look up cgroup associated with current task's cgroup namespace on the
+ * specified hierarchy
+ */
+static struct cgroup *
+current_cgns_cgroup_from_root(struct cgroup_root *root)
+{
+       struct cgroup *res = NULL;
+       struct css_set *cset;
+
+       lockdep_assert_held(&css_set_lock);
+
+       rcu_read_lock();
+
+       cset = current->nsproxy->cgroup_ns->root_cset;
+       if (cset == &init_css_set) {
+               res = &root->cgrp;
+       } else {
+               struct cgrp_cset_link *link;
+
+               list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
+                       struct cgroup *c = link->cgrp;
+
+                       if (c->root == root) {
+                               res = c;
+                               break;
+                       }
+               }
+       }
+       rcu_read_unlock();
+
+       BUG_ON(!res);
+       return res;
+}
+
+/* look up cgroup associated with given css_set on the specified hierarchy */
+static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
+                                           struct cgroup_root *root)
+{
+       struct cgroup *res = NULL;
+
+       lockdep_assert_held(&cgroup_mutex);
+       lockdep_assert_held(&css_set_lock);
+
+       if (cset == &init_css_set) {
+               res = &root->cgrp;
+       } else {
+               struct cgrp_cset_link *link;
+
+               list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
+                       struct cgroup *c = link->cgrp;
+
+                       if (c->root == root) {
+                               res = c;
+                               break;
+                       }
+               }
+       }
+
+       BUG_ON(!res);
+       return res;
+}
+
+/*
+ * Return the cgroup for "task" from the given hierarchy. Must be
+ * called with cgroup_mutex and css_set_lock held.
+ */
+struct cgroup *task_cgroup_from_root(struct task_struct *task,
+                                    struct cgroup_root *root)
+{
+       /*
+        * No need to lock the task - since we hold cgroup_mutex the
+        * task can't change groups, so the only thing that can happen
+        * is that it exits and its css is set back to init_css_set.
+        */
+       return cset_cgroup_from_root(task_css_set(task), root);
+}
+
+/*
+ * A task must hold cgroup_mutex to modify cgroups.
+ *
+ * Any task can increment and decrement the count field without lock.
+ * So in general, code holding cgroup_mutex can't rely on the count
+ * field not changing.  However, if the count goes to zero, then only
+ * cgroup_attach_task() can increment it again.  Because a count of zero
+ * means that no tasks are currently attached, therefore there is no
+ * way a task attached to that cgroup can fork (the other way to
+ * increment the count).  So code holding cgroup_mutex can safely
+ * assume that if the count is zero, it will stay zero. Similarly, if
+ * a task holds cgroup_mutex on a cgroup with zero count, it
+ * knows that the cgroup won't be removed, as cgroup_rmdir()
+ * needs that mutex.
+ *
+ * A cgroup can only be deleted if both its 'count' of using tasks
+ * is zero, and its list of 'children' cgroups is empty.  Since all
+ * tasks in the system use _some_ cgroup, and since there is always at
+ * least one task in the system (init, pid == 1), therefore, root cgroup
+ * always has either children cgroups and/or using tasks.  So we don't
+ * need a special hack to ensure that root cgroup cannot be deleted.
+ *
+ * P.S.  One more locking exception.  RCU is used to guard the
+ * update of a tasks cgroup pointer by cgroup_attach_task()
+ */
+
+static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
+
+static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
+                             char *buf)
+{
+       struct cgroup_subsys *ss = cft->ss;
+
+       if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
+           !(cgrp->root->flags & CGRP_ROOT_NOPREFIX))
+               snprintf(buf, CGROUP_FILE_NAME_MAX, "%s.%s",
+                        cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
+                        cft->name);
+       else
+               strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
+       return buf;
+}
+
+/**
+ * cgroup_file_mode - deduce file mode of a control file
+ * @cft: the control file in question
+ *
+ * S_IRUGO for read, S_IWUSR for write.
+ */
+static umode_t cgroup_file_mode(const struct cftype *cft)
+{
+       umode_t mode = 0;
+
+       if (cft->read_u64 || cft->read_s64 || cft->seq_show)
+               mode |= S_IRUGO;
+
+       if (cft->write_u64 || cft->write_s64 || cft->write) {
+               if (cft->flags & CFTYPE_WORLD_WRITABLE)
+                       mode |= S_IWUGO;
+               else
+                       mode |= S_IWUSR;
+       }
+
+       return mode;
+}
+
+/**
+ * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask
+ * @subtree_control: the new subtree_control mask to consider
+ * @this_ss_mask: available subsystems
+ *
+ * On the default hierarchy, a subsystem may request other subsystems to be
+ * enabled together through its ->depends_on mask.  In such cases, more
+ * subsystems than specified in "cgroup.subtree_control" may be enabled.
+ *
+ * This function calculates which subsystems need to be enabled if
+ * @subtree_control is to be applied while restricted to @this_ss_mask.
+ */
+static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
+{
+       u16 cur_ss_mask = subtree_control;
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
+
+       while (true) {
+               u16 new_ss_mask = cur_ss_mask;
+
+               do_each_subsys_mask(ss, ssid, cur_ss_mask) {
+                       new_ss_mask |= ss->depends_on;
+               } while_each_subsys_mask();
+
+               /*
+                * Mask out subsystems which aren't available.  This can
+                * happen only if some depended-upon subsystems were bound
+                * to non-default hierarchies.
+                */
+               new_ss_mask &= this_ss_mask;
+
+               if (new_ss_mask == cur_ss_mask)
+                       break;
+               cur_ss_mask = new_ss_mask;
+       }
+
+       return cur_ss_mask;
+}
+
+/**
+ * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
+ * @kn: the kernfs_node being serviced
+ *
+ * This helper undoes cgroup_kn_lock_live() and should be invoked before
+ * the method finishes if locking succeeded.  Note that once this function
+ * returns the cgroup returned by cgroup_kn_lock_live() may become
+ * inaccessible any time.  If the caller intends to continue to access the
+ * cgroup, it should pin it before invoking this function.
+ */
+void cgroup_kn_unlock(struct kernfs_node *kn)
+{
+       struct cgroup *cgrp;
+
+       if (kernfs_type(kn) == KERNFS_DIR)
+               cgrp = kn->priv;
+       else
+               cgrp = kn->parent->priv;
+
+       mutex_unlock(&cgroup_mutex);
+
+       kernfs_unbreak_active_protection(kn);
+       cgroup_put(cgrp);
+}
+
+/**
+ * cgroup_kn_lock_live - locking helper for cgroup kernfs methods
+ * @kn: the kernfs_node being serviced
+ * @drain_offline: perform offline draining on the cgroup
+ *
+ * This helper is to be used by a cgroup kernfs method currently servicing
+ * @kn.  It breaks the active protection, performs cgroup locking and
+ * verifies that the associated cgroup is alive.  Returns the cgroup if
+ * alive; otherwise, %NULL.  A successful return should be undone by a
+ * matching cgroup_kn_unlock() invocation.  If @drain_offline is %true, the
+ * cgroup is drained of offlining csses before return.
+ *
+ * Any cgroup kernfs method implementation which requires locking the
+ * associated cgroup should use this helper.  It avoids nesting cgroup
+ * locking under kernfs active protection and allows all kernfs operations
+ * including self-removal.
+ */
+struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
+{
+       struct cgroup *cgrp;
+
+       if (kernfs_type(kn) == KERNFS_DIR)
+               cgrp = kn->priv;
+       else
+               cgrp = kn->parent->priv;
+
+       /*
+        * We're gonna grab cgroup_mutex which nests outside kernfs
+        * active_ref.  cgroup liveliness check alone provides enough
+        * protection against removal.  Ensure @cgrp stays accessible and
+        * break the active_ref protection.
+        */
+       if (!cgroup_tryget(cgrp))
+               return NULL;
+       kernfs_break_active_protection(kn);
+
+       if (drain_offline)
+               cgroup_lock_and_drain_offline(cgrp);
+       else
+               mutex_lock(&cgroup_mutex);
+
+       if (!cgroup_is_dead(cgrp))
+               return cgrp;
+
+       cgroup_kn_unlock(kn);
+       return NULL;
+}
+
+static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
+{
+       char name[CGROUP_FILE_NAME_MAX];
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (cft->file_offset) {
+               struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
+               struct cgroup_file *cfile = (void *)css + cft->file_offset;
+
+               spin_lock_irq(&cgroup_file_kn_lock);
+               cfile->kn = NULL;
+               spin_unlock_irq(&cgroup_file_kn_lock);
+       }
+
+       kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
+}
+
+/**
+ * css_clear_dir - remove subsys files in a cgroup directory
+ * @css: taget css
+ */
+static void css_clear_dir(struct cgroup_subsys_state *css)
+{
+       struct cgroup *cgrp = css->cgroup;
+       struct cftype *cfts;
+
+       if (!(css->flags & CSS_VISIBLE))
+               return;
+
+       css->flags &= ~CSS_VISIBLE;
+
+       list_for_each_entry(cfts, &css->ss->cfts, node)
+               cgroup_addrm_files(css, cgrp, cfts, false);
+}
+
+/**
+ * css_populate_dir - create subsys files in a cgroup directory
+ * @css: target css
+ *
+ * On failure, no file is added.
+ */
+static int css_populate_dir(struct cgroup_subsys_state *css)
+{
+       struct cgroup *cgrp = css->cgroup;
+       struct cftype *cfts, *failed_cfts;
+       int ret;
+
+       if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
+               return 0;
+
+       if (!css->ss) {
+               if (cgroup_on_dfl(cgrp))
+                       cfts = cgroup_base_files;
+               else
+                       cfts = cgroup1_base_files;
+
+               return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
+       }
+
+       list_for_each_entry(cfts, &css->ss->cfts, node) {
+               ret = cgroup_addrm_files(css, cgrp, cfts, true);
+               if (ret < 0) {
+                       failed_cfts = cfts;
+                       goto err;
+               }
+       }
+
+       css->flags |= CSS_VISIBLE;
+
+       return 0;
+err:
+       list_for_each_entry(cfts, &css->ss->cfts, node) {
+               if (cfts == failed_cfts)
+                       break;
+               cgroup_addrm_files(css, cgrp, cfts, false);
+       }
+       return ret;
+}
+
+int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
+{
+       struct cgroup *dcgrp = &dst_root->cgrp;
+       struct cgroup_subsys *ss;
+       int ssid, i, ret;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       do_each_subsys_mask(ss, ssid, ss_mask) {
+               /*
+                * If @ss has non-root csses attached to it, can't move.
+                * If @ss is an implicit controller, it is exempt from this
+                * rule and can be stolen.
+                */
+               if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
+                   !ss->implicit_on_dfl)
+                       return -EBUSY;
+
+               /* can't move between two non-dummy roots either */
+               if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
+                       return -EBUSY;
+       } while_each_subsys_mask();
+
+       do_each_subsys_mask(ss, ssid, ss_mask) {
+               struct cgroup_root *src_root = ss->root;
+               struct cgroup *scgrp = &src_root->cgrp;
+               struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
+               struct css_set *cset;
+
+               WARN_ON(!css || cgroup_css(dcgrp, ss));
+
+               /* disable from the source */
+               src_root->subsys_mask &= ~(1 << ssid);
+               WARN_ON(cgroup_apply_control(scgrp));
+               cgroup_finalize_control(scgrp, 0);
+
+               /* rebind */
+               RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
+               rcu_assign_pointer(dcgrp->subsys[ssid], css);
+               ss->root = dst_root;
+               css->cgroup = dcgrp;
+
+               spin_lock_irq(&css_set_lock);
+               hash_for_each(css_set_table, i, cset, hlist)
+                       list_move_tail(&cset->e_cset_node[ss->id],
+                                      &dcgrp->e_csets[ss->id]);
+               spin_unlock_irq(&css_set_lock);
+
+               /* default hierarchy doesn't enable controllers by default */
+               dst_root->subsys_mask |= 1 << ssid;
+               if (dst_root == &cgrp_dfl_root) {
+                       static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
+               } else {
+                       dcgrp->subtree_control |= 1 << ssid;
+                       static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
+               }
+
+               ret = cgroup_apply_control(dcgrp);
+               if (ret)
+                       pr_warn("partial failure to rebind %s controller (err=%d)\n",
+                               ss->name, ret);
+
+               if (ss->bind)
+                       ss->bind(css);
+       } while_each_subsys_mask();
+
+       kernfs_activate(dcgrp->kn);
+       return 0;
+}
+
+int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
+                    struct kernfs_root *kf_root)
+{
+       int len = 0;
+       char *buf = NULL;
+       struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
+       struct cgroup *ns_cgroup;
+
+       buf = kmalloc(PATH_MAX, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       spin_lock_irq(&css_set_lock);
+       ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
+       len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
+       spin_unlock_irq(&css_set_lock);
+
+       if (len >= PATH_MAX)
+               len = -ERANGE;
+       else if (len > 0) {
+               seq_escape(sf, buf, " \t\n\\");
+               len = 0;
+       }
+       kfree(buf);
+       return len;
+}
+
+static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
+{
+       pr_err("remount is not allowed\n");
+       return -EINVAL;
+}
+
+/*
+ * To reduce the fork() overhead for systems that are not actually using
+ * their cgroups capability, we don't maintain the lists running through
+ * each css_set to its tasks until we see the list actually used - in other
+ * words after the first mount.
+ */
+static bool use_task_css_set_links __read_mostly;
+
+static void cgroup_enable_task_cg_lists(void)
+{
+       struct task_struct *p, *g;
+
+       spin_lock_irq(&css_set_lock);
+
+       if (use_task_css_set_links)
+               goto out_unlock;
+
+       use_task_css_set_links = true;
+
+       /*
+        * We need tasklist_lock because RCU is not safe against
+        * while_each_thread(). Besides, a forking task that has passed
+        * cgroup_post_fork() without seeing use_task_css_set_links = 1
+        * is not guaranteed to have its child immediately visible in the
+        * tasklist if we walk through it with RCU.
+        */
+       read_lock(&tasklist_lock);
+       do_each_thread(g, p) {
+               WARN_ON_ONCE(!list_empty(&p->cg_list) ||
+                            task_css_set(p) != &init_css_set);
+
+               /*
+                * We should check if the process is exiting, otherwise
+                * it will race with cgroup_exit() in that the list
+                * entry won't be deleted though the process has exited.
+                * Do it while holding siglock so that we don't end up
+                * racing against cgroup_exit().
+                *
+                * Interrupts were already disabled while acquiring
+                * the css_set_lock, so we do not need to disable it
+                * again when acquiring the sighand->siglock here.
+                */
+               spin_lock(&p->sighand->siglock);
+               if (!(p->flags & PF_EXITING)) {
+                       struct css_set *cset = task_css_set(p);
+
+                       if (!css_set_populated(cset))
+                               css_set_update_populated(cset, true);
+                       list_add_tail(&p->cg_list, &cset->tasks);
+                       get_css_set(cset);
+               }
+               spin_unlock(&p->sighand->siglock);
+       } while_each_thread(g, p);
+       read_unlock(&tasklist_lock);
+out_unlock:
+       spin_unlock_irq(&css_set_lock);
+}
+
+static void init_cgroup_housekeeping(struct cgroup *cgrp)
+{
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       INIT_LIST_HEAD(&cgrp->self.sibling);
+       INIT_LIST_HEAD(&cgrp->self.children);
+       INIT_LIST_HEAD(&cgrp->cset_links);
+       INIT_LIST_HEAD(&cgrp->pidlists);
+       mutex_init(&cgrp->pidlist_mutex);
+       cgrp->self.cgroup = cgrp;
+       cgrp->self.flags |= CSS_ONLINE;
+
+       for_each_subsys(ss, ssid)
+               INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
+
+       init_waitqueue_head(&cgrp->offline_waitq);
+       INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
+}
+
+void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
+{
+       struct cgroup *cgrp = &root->cgrp;
+
+       INIT_LIST_HEAD(&root->root_list);
+       atomic_set(&root->nr_cgrps, 1);
+       cgrp->root = root;
+       init_cgroup_housekeeping(cgrp);
+       idr_init(&root->cgroup_idr);
+
+       root->flags = opts->flags;
+       if (opts->release_agent)
+               strcpy(root->release_agent_path, opts->release_agent);
+       if (opts->name)
+               strcpy(root->name, opts->name);
+       if (opts->cpuset_clone_children)
+               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
+}
+
+int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
+{
+       LIST_HEAD(tmp_links);
+       struct cgroup *root_cgrp = &root->cgrp;
+       struct kernfs_syscall_ops *kf_sops;
+       struct css_set *cset;
+       int i, ret;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
+       if (ret < 0)
+               goto out;
+       root_cgrp->id = ret;
+       root_cgrp->ancestor_ids[0] = ret;
+
+       ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
+                             GFP_KERNEL);
+       if (ret)
+               goto out;
+
+       /*
+        * We're accessing css_set_count without locking css_set_lock here,
+        * but that's OK - it can only be increased by someone holding
+        * cgroup_lock, and that's us.  Later rebinding may disable
+        * controllers on the default hierarchy and thus create new csets,
+        * which can't be more than the existing ones.  Allocate 2x.
+        */
+       ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
+       if (ret)
+               goto cancel_ref;
+
+       ret = cgroup_init_root_id(root);
+       if (ret)
+               goto cancel_ref;
+
+       kf_sops = root == &cgrp_dfl_root ?
+               &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
+
+       root->kf_root = kernfs_create_root(kf_sops,
+                                          KERNFS_ROOT_CREATE_DEACTIVATED,
+                                          root_cgrp);
+       if (IS_ERR(root->kf_root)) {
+               ret = PTR_ERR(root->kf_root);
+               goto exit_root_id;
+       }
+       root_cgrp->kn = root->kf_root->kn;
+
+       ret = css_populate_dir(&root_cgrp->self);
+       if (ret)
+               goto destroy_root;
+
+       ret = rebind_subsystems(root, ss_mask);
+       if (ret)
+               goto destroy_root;
+
+       trace_cgroup_setup_root(root);
+
+       /*
+        * There must be no failure case after here, since rebinding takes
+        * care of subsystems' refcounts, which are explicitly dropped in
+        * the failure exit path.
+        */
+       list_add(&root->root_list, &cgroup_roots);
+       cgroup_root_count++;
+
+       /*
+        * Link the root cgroup in this hierarchy into all the css_set
+        * objects.
+        */
+       spin_lock_irq(&css_set_lock);
+       hash_for_each(css_set_table, i, cset, hlist) {
+               link_css_set(&tmp_links, cset, root_cgrp);
+               if (css_set_populated(cset))
+                       cgroup_update_populated(root_cgrp, true);
+       }
+       spin_unlock_irq(&css_set_lock);
+
+       BUG_ON(!list_empty(&root_cgrp->self.children));
+       BUG_ON(atomic_read(&root->nr_cgrps) != 1);
+
+       kernfs_activate(root_cgrp->kn);
+       ret = 0;
+       goto out;
+
+destroy_root:
+       kernfs_destroy_root(root->kf_root);
+       root->kf_root = NULL;
+exit_root_id:
+       cgroup_exit_root_id(root);
+cancel_ref:
+       percpu_ref_exit(&root_cgrp->self.refcnt);
+out:
+       free_cgrp_cset_links(&tmp_links);
+       return ret;
+}
+
+struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
+                              struct cgroup_root *root, unsigned long magic,
+                              struct cgroup_namespace *ns)
+{
+       struct dentry *dentry;
+       bool new_sb;
+
+       dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
+
+       /*
+        * In non-init cgroup namespace, instead of root cgroup's dentry,
+        * we return the dentry corresponding to the cgroupns->root_cgrp.
+        */
+       if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
+               struct dentry *nsdentry;
+               struct cgroup *cgrp;
+
+               mutex_lock(&cgroup_mutex);
+               spin_lock_irq(&css_set_lock);
+
+               cgrp = cset_cgroup_from_root(ns->root_cset, root);
+
+               spin_unlock_irq(&css_set_lock);
+               mutex_unlock(&cgroup_mutex);
+
+               nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
+               dput(dentry);
+               dentry = nsdentry;
+       }
+
+       if (IS_ERR(dentry) || !new_sb)
+               cgroup_put(&root->cgrp);
+
+       return dentry;
+}
+
+static struct dentry *cgroup_mount(struct file_system_type *fs_type,
+                        int flags, const char *unused_dev_name,
+                        void *data)
+{
+       struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
+       struct dentry *dentry;
+
+       get_cgroup_ns(ns);
+
+       /* Check if the caller has permission to mount. */
+       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
+               put_cgroup_ns(ns);
+               return ERR_PTR(-EPERM);
+       }
+
+       /*
+        * The first time anyone tries to mount a cgroup, enable the list
+        * linking each css_set to its tasks and fix up all existing tasks.
+        */
+       if (!use_task_css_set_links)
+               cgroup_enable_task_cg_lists();
+
+       if (fs_type == &cgroup2_fs_type) {
+               if (data) {
+                       pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
+                       put_cgroup_ns(ns);
+                       return ERR_PTR(-EINVAL);
+               }
+               cgrp_dfl_visible = true;
+               cgroup_get(&cgrp_dfl_root.cgrp);
+
+               dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
+                                        CGROUP2_SUPER_MAGIC, ns);
+       } else {
+               dentry = cgroup1_mount(&cgroup_fs_type, flags, data,
+                                      CGROUP_SUPER_MAGIC, ns);
+       }
+
+       put_cgroup_ns(ns);
+       return dentry;
+}
+
+static void cgroup_kill_sb(struct super_block *sb)
+{
+       struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
+       struct cgroup_root *root = cgroup_root_from_kf(kf_root);
+
+       /*
+        * If @root doesn't have any mounts or children, start killing it.
+        * This prevents new mounts by disabling percpu_ref_tryget_live().
+        * cgroup_mount() may wait for @root's release.
+        *
+        * And don't kill the default root.
+        */
+       if (!list_empty(&root->cgrp.self.children) ||
+           root == &cgrp_dfl_root)
+               cgroup_put(&root->cgrp);
+       else
+               percpu_ref_kill(&root->cgrp.self.refcnt);
+
+       kernfs_kill_sb(sb);
+}
+
+struct file_system_type cgroup_fs_type = {
+       .name = "cgroup",
+       .mount = cgroup_mount,
+       .kill_sb = cgroup_kill_sb,
+       .fs_flags = FS_USERNS_MOUNT,
+};
+
+static struct file_system_type cgroup2_fs_type = {
+       .name = "cgroup2",
+       .mount = cgroup_mount,
+       .kill_sb = cgroup_kill_sb,
+       .fs_flags = FS_USERNS_MOUNT,
+};
+
+int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+                         struct cgroup_namespace *ns)
+{
+       struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
+
+       return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
+}
+
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+                  struct cgroup_namespace *ns)
+{
+       int ret;
+
+       mutex_lock(&cgroup_mutex);
+       spin_lock_irq(&css_set_lock);
+
+       ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
+
+       spin_unlock_irq(&css_set_lock);
+       mutex_unlock(&cgroup_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(cgroup_path_ns);
+
+/**
+ * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
+ * @task: target task
+ * @buf: the buffer to write the path into
+ * @buflen: the length of the buffer
+ *
+ * Determine @task's cgroup on the first (the one with the lowest non-zero
+ * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
+ * function grabs cgroup_mutex and shouldn't be used inside locks used by
+ * cgroup controller callbacks.
+ *
+ * Return value is the same as kernfs_path().
+ */
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+{
+       struct cgroup_root *root;
+       struct cgroup *cgrp;
+       int hierarchy_id = 1;
+       int ret;
+
+       mutex_lock(&cgroup_mutex);
+       spin_lock_irq(&css_set_lock);
+
+       root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
+
+       if (root) {
+               cgrp = task_cgroup_from_root(task, root);
+               ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
+       } else {
+               /* if no hierarchy exists, everyone is in "/" */
+               ret = strlcpy(buf, "/", buflen);
+       }
+
+       spin_unlock_irq(&css_set_lock);
+       mutex_unlock(&cgroup_mutex);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(task_cgroup_path);
+
+/**
+ * cgroup_migrate_add_task - add a migration target task to a migration context
+ * @task: target task
+ * @mgctx: target migration context
+ *
+ * Add @task, which is a migration target, to @mgctx->tset.  This function
+ * becomes noop if @task doesn't need to be migrated.  @task's css_set
+ * should have been added as a migration source and @task->cg_list will be
+ * moved from the css_set's tasks list to mg_tasks one.
+ */
+static void cgroup_migrate_add_task(struct task_struct *task,
+                                   struct cgroup_mgctx *mgctx)
+{
+       struct css_set *cset;
+
+       lockdep_assert_held(&css_set_lock);
+
+       /* @task either already exited or can't exit until the end */
+       if (task->flags & PF_EXITING)
+               return;
+
+       /* leave @task alone if post_fork() hasn't linked it yet */
+       if (list_empty(&task->cg_list))
+               return;
+
+       cset = task_css_set(task);
+       if (!cset->mg_src_cgrp)
+               return;
+
+       list_move_tail(&task->cg_list, &cset->mg_tasks);
+       if (list_empty(&cset->mg_node))
+               list_add_tail(&cset->mg_node,
+                             &mgctx->tset.src_csets);
+       if (list_empty(&cset->mg_dst_cset->mg_node))
+               list_add_tail(&cset->mg_dst_cset->mg_node,
+                             &mgctx->tset.dst_csets);
+}
+
+/**
+ * cgroup_taskset_first - reset taskset and return the first task
+ * @tset: taskset of interest
+ * @dst_cssp: output variable for the destination css
+ *
+ * @tset iteration is initialized and the first task is returned.
+ */
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+                                        struct cgroup_subsys_state **dst_cssp)
+{
+       tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
+       tset->cur_task = NULL;
+
+       return cgroup_taskset_next(tset, dst_cssp);
+}
+
+/**
+ * cgroup_taskset_next - iterate to the next task in taskset
+ * @tset: taskset of interest
+ * @dst_cssp: output variable for the destination css
+ *
+ * Return the next task in @tset.  Iteration must have been initialized
+ * with cgroup_taskset_first().
+ */
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+                                       struct cgroup_subsys_state **dst_cssp)
+{
+       struct css_set *cset = tset->cur_cset;
+       struct task_struct *task = tset->cur_task;
+
+       while (&cset->mg_node != tset->csets) {
+               if (!task)
+                       task = list_first_entry(&cset->mg_tasks,
+                                               struct task_struct, cg_list);
+               else
+                       task = list_next_entry(task, cg_list);
+
+               if (&task->cg_list != &cset->mg_tasks) {
+                       tset->cur_cset = cset;
+                       tset->cur_task = task;
+
+                       /*
+                        * This function may be called both before and
+                        * after cgroup_taskset_migrate().  The two cases
+                        * can be distinguished by looking at whether @cset
+                        * has its ->mg_dst_cset set.
+                        */
+                       if (cset->mg_dst_cset)
+                               *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
+                       else
+                               *dst_cssp = cset->subsys[tset->ssid];
+
+                       return task;
+               }
+
+               cset = list_next_entry(cset, mg_node);
+               task = NULL;
+       }
+
+       return NULL;
+}
+
+/**
+ * cgroup_taskset_migrate - migrate a taskset
+ * @mgctx: migration context
+ *
+ * Migrate tasks in @mgctx as setup by migration preparation functions.
+ * This function fails iff one of the ->can_attach callbacks fails and
+ * guarantees that either all or none of the tasks in @mgctx are migrated.
+ * @mgctx is consumed regardless of success.
+ */
+static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
+{
+       struct cgroup_taskset *tset = &mgctx->tset;
+       struct cgroup_subsys *ss;
+       struct task_struct *task, *tmp_task;
+       struct css_set *cset, *tmp_cset;
+       int ssid, failed_ssid, ret;
+
+       /* methods shouldn't be called if no task is actually migrating */
+       if (list_empty(&tset->src_csets))
+               return 0;
+
+       /* check that we can legitimately attach to the cgroup */
+       do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
+               if (ss->can_attach) {
+                       tset->ssid = ssid;
+                       ret = ss->can_attach(tset);
+                       if (ret) {
+                               failed_ssid = ssid;
+                               goto out_cancel_attach;
+                       }
+               }
+       } while_each_subsys_mask();
+
+       /*
+        * Now that we're guaranteed success, proceed to move all tasks to
+        * the new cgroup.  There are no failure cases after here, so this
+        * is the commit point.
+        */
+       spin_lock_irq(&css_set_lock);
+       list_for_each_entry(cset, &tset->src_csets, mg_node) {
+               list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
+                       struct css_set *from_cset = task_css_set(task);
+                       struct css_set *to_cset = cset->mg_dst_cset;
+
+                       get_css_set(to_cset);
+                       css_set_move_task(task, from_cset, to_cset, true);
+                       put_css_set_locked(from_cset);
+               }
+       }
+       spin_unlock_irq(&css_set_lock);
+
+       /*
+        * Migration is committed, all target tasks are now on dst_csets.
+        * Nothing is sensitive to fork() after this point.  Notify
+        * controllers that migration is complete.
+        */
+       tset->csets = &tset->dst_csets;
+
+       do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
+               if (ss->attach) {
+                       tset->ssid = ssid;
+                       ss->attach(tset);
+               }
+       } while_each_subsys_mask();
+
+       ret = 0;
+       goto out_release_tset;
+
+out_cancel_attach:
+       do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
+               if (ssid == failed_ssid)
+                       break;
+               if (ss->cancel_attach) {
+                       tset->ssid = ssid;
+                       ss->cancel_attach(tset);
+               }
+       } while_each_subsys_mask();
+out_release_tset:
+       spin_lock_irq(&css_set_lock);
+       list_splice_init(&tset->dst_csets, &tset->src_csets);
+       list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
+               list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
+               list_del_init(&cset->mg_node);
+       }
+       spin_unlock_irq(&css_set_lock);
+       return ret;
+}
+
+/**
+ * cgroup_may_migrate_to - verify whether a cgroup can be migration destination
+ * @dst_cgrp: destination cgroup to test
+ *
+ * On the default hierarchy, except for the root, subtree_control must be
+ * zero for migration destination cgroups with tasks so that child cgroups
+ * don't compete against tasks.
+ */
+bool cgroup_may_migrate_to(struct cgroup *dst_cgrp)
+{
+       return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) ||
+               !dst_cgrp->subtree_control;
+}
+
+/**
+ * cgroup_migrate_finish - cleanup after attach
+ * @mgctx: migration context
+ *
+ * Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst().  See
+ * those functions for details.
+ */
+void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
+{
+       LIST_HEAD(preloaded);
+       struct css_set *cset, *tmp_cset;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       spin_lock_irq(&css_set_lock);
+
+       list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
+       list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
+
+       list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
+               cset->mg_src_cgrp = NULL;
+               cset->mg_dst_cgrp = NULL;
+               cset->mg_dst_cset = NULL;
+               list_del_init(&cset->mg_preload_node);
+               put_css_set_locked(cset);
+       }
+
+       spin_unlock_irq(&css_set_lock);
+}
+
+/**
+ * cgroup_migrate_add_src - add a migration source css_set
+ * @src_cset: the source css_set to add
+ * @dst_cgrp: the destination cgroup
+ * @mgctx: migration context
+ *
+ * Tasks belonging to @src_cset are about to be migrated to @dst_cgrp.  Pin
+ * @src_cset and add it to @mgctx->src_csets, which should later be cleaned
+ * up by cgroup_migrate_finish().
+ *
+ * This function may be called without holding cgroup_threadgroup_rwsem
+ * even if the target is a process.  Threads may be created and destroyed
+ * but as long as cgroup_mutex is not dropped, no new css_set can be put
+ * into play and the preloaded css_sets are guaranteed to cover all
+ * migrations.
+ */
+void cgroup_migrate_add_src(struct css_set *src_cset,
+                           struct cgroup *dst_cgrp,
+                           struct cgroup_mgctx *mgctx)
+{
+       struct cgroup *src_cgrp;
+
+       lockdep_assert_held(&cgroup_mutex);
+       lockdep_assert_held(&css_set_lock);
+
+       /*
+        * If ->dead, @src_set is associated with one or more dead cgroups
+        * and doesn't contain any migratable tasks.  Ignore it early so
+        * that the rest of migration path doesn't get confused by it.
+        */
+       if (src_cset->dead)
+               return;
+
+       src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
+
+       if (!list_empty(&src_cset->mg_preload_node))
+               return;
+
+       WARN_ON(src_cset->mg_src_cgrp);
+       WARN_ON(src_cset->mg_dst_cgrp);
+       WARN_ON(!list_empty(&src_cset->mg_tasks));
+       WARN_ON(!list_empty(&src_cset->mg_node));
+
+       src_cset->mg_src_cgrp = src_cgrp;
+       src_cset->mg_dst_cgrp = dst_cgrp;
+       get_css_set(src_cset);
+       list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
+}
+
+/**
+ * cgroup_migrate_prepare_dst - prepare destination css_sets for migration
+ * @mgctx: migration context
+ *
+ * Tasks are about to be moved and all the source css_sets have been
+ * preloaded to @mgctx->preloaded_src_csets.  This function looks up and
+ * pins all destination css_sets, links each to its source, and append them
+ * to @mgctx->preloaded_dst_csets.
+ *
+ * This function must be called after cgroup_migrate_add_src() has been
+ * called on each migration source css_set.  After migration is performed
+ * using cgroup_migrate(), cgroup_migrate_finish() must be called on
+ * @mgctx.
+ */
+int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
+{
+       struct css_set *src_cset, *tmp_cset;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       /* look up the dst cset for each src cset and link it to src */
+       list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
+                                mg_preload_node) {
+               struct css_set *dst_cset;
+               struct cgroup_subsys *ss;
+               int ssid;
+
+               dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
+               if (!dst_cset)
+                       goto err;
+
+               WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
+
+               /*
+                * If src cset equals dst, it's noop.  Drop the src.
+                * cgroup_migrate() will skip the cset too.  Note that we
+                * can't handle src == dst as some nodes are used by both.
+                */
+               if (src_cset == dst_cset) {
+                       src_cset->mg_src_cgrp = NULL;
+                       src_cset->mg_dst_cgrp = NULL;
+                       list_del_init(&src_cset->mg_preload_node);
+                       put_css_set(src_cset);
+                       put_css_set(dst_cset);
+                       continue;
+               }
+
+               src_cset->mg_dst_cset = dst_cset;
+
+               if (list_empty(&dst_cset->mg_preload_node))
+                       list_add_tail(&dst_cset->mg_preload_node,
+                                     &mgctx->preloaded_dst_csets);
+               else
+                       put_css_set(dst_cset);
+
+               for_each_subsys(ss, ssid)
+                       if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
+                               mgctx->ss_mask |= 1 << ssid;
+       }
+
+       return 0;
+err:
+       cgroup_migrate_finish(mgctx);
+       return -ENOMEM;
+}
+
+/**
+ * cgroup_migrate - migrate a process or task to a cgroup
+ * @leader: the leader of the process or the task to migrate
+ * @threadgroup: whether @leader points to the whole process or a single task
+ * @mgctx: migration context
+ *
+ * Migrate a process or task denoted by @leader.  If migrating a process,
+ * the caller must be holding cgroup_threadgroup_rwsem.  The caller is also
+ * responsible for invoking cgroup_migrate_add_src() and
+ * cgroup_migrate_prepare_dst() on the targets before invoking this
+ * function and following up with cgroup_migrate_finish().
+ *
+ * As long as a controller's ->can_attach() doesn't fail, this function is
+ * guaranteed to succeed.  This means that, excluding ->can_attach()
+ * failure, when migrating multiple targets, the success or failure can be
+ * decided for all targets by invoking group_migrate_prepare_dst() before
+ * actually starting migrating.
+ */
+int cgroup_migrate(struct task_struct *leader, bool threadgroup,
+                  struct cgroup_mgctx *mgctx)
+{
+       struct task_struct *task;
+
+       /*
+        * Prevent freeing of tasks while we take a snapshot. Tasks that are
+        * already PF_EXITING could be freed from underneath us unless we
+        * take an rcu_read_lock.
+        */
+       spin_lock_irq(&css_set_lock);
+       rcu_read_lock();
+       task = leader;
+       do {
+               cgroup_migrate_add_task(task, mgctx);
+               if (!threadgroup)
+                       break;
+       } while_each_thread(leader, task);
+       rcu_read_unlock();
+       spin_unlock_irq(&css_set_lock);
+
+       return cgroup_migrate_execute(mgctx);
+}
+
+/**
+ * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
+ * @dst_cgrp: the cgroup to attach to
+ * @leader: the task or the leader of the threadgroup to be attached
+ * @threadgroup: attach the whole threadgroup?
+ *
+ * Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
+ */
+int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
+                      bool threadgroup)
+{
+       DEFINE_CGROUP_MGCTX(mgctx);
+       struct task_struct *task;
+       int ret;
+
+       if (!cgroup_may_migrate_to(dst_cgrp))
+               return -EBUSY;
+
+       /* look up all src csets */
+       spin_lock_irq(&css_set_lock);
+       rcu_read_lock();
+       task = leader;
+       do {
+               cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
+               if (!threadgroup)
+                       break;
+       } while_each_thread(leader, task);
+       rcu_read_unlock();
+       spin_unlock_irq(&css_set_lock);
+
+       /* prepare dst csets and commit */
+       ret = cgroup_migrate_prepare_dst(&mgctx);
+       if (!ret)
+               ret = cgroup_migrate(leader, threadgroup, &mgctx);
+
+       cgroup_migrate_finish(&mgctx);
+
+       if (!ret)
+               trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+
+       return ret;
+}
+
+static int cgroup_procs_write_permission(struct task_struct *task,
+                                        struct cgroup *dst_cgrp,
+                                        struct kernfs_open_file *of)
+{
+       int ret = 0;
+
+       if (cgroup_on_dfl(dst_cgrp)) {
+               struct super_block *sb = of->file->f_path.dentry->d_sb;
+               struct cgroup *cgrp;
+               struct inode *inode;
+
+               spin_lock_irq(&css_set_lock);
+               cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
+               spin_unlock_irq(&css_set_lock);
+
+               while (!cgroup_is_descendant(dst_cgrp, cgrp))
+                       cgrp = cgroup_parent(cgrp);
+
+               ret = -ENOMEM;
+               inode = kernfs_get_inode(sb, cgrp->procs_file.kn);
+               if (inode) {
+                       ret = inode_permission(inode, MAY_WRITE);
+                       iput(inode);
+               }
+       } else {
+               const struct cred *cred = current_cred();
+               const struct cred *tcred = get_task_cred(task);
+
+               /*
+                * even if we're attaching all tasks in the thread group,
+                * we only need to check permissions on one of them.
+                */
+               if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+                   !uid_eq(cred->euid, tcred->uid) &&
+                   !uid_eq(cred->euid, tcred->suid))
+                       ret = -EACCES;
+               put_cred(tcred);
+       }
+
+       return ret;
+}
+
+/*
+ * Find the task_struct of the task to attach by vpid and pass it along to the
+ * function to attach either it or all tasks in its threadgroup. Will lock
+ * cgroup_mutex and threadgroup.
+ */
+ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+                            size_t nbytes, loff_t off, bool threadgroup)
+{
+       struct task_struct *tsk;
+       struct cgroup_subsys *ss;
+       struct cgroup *cgrp;
+       pid_t pid;
+       int ssid, ret;
+
+       if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
+               return -EINVAL;
+
+       cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!cgrp)
+               return -ENODEV;
+
+       percpu_down_write(&cgroup_threadgroup_rwsem);
+       rcu_read_lock();
+       if (pid) {
+               tsk = find_task_by_vpid(pid);
+               if (!tsk) {
+                       ret = -ESRCH;
+                       goto out_unlock_rcu;
+               }
+       } else {
+               tsk = current;
+       }
+
+       if (threadgroup)
+               tsk = tsk->group_leader;
+
+       /*
+        * Workqueue threads may acquire PF_NO_SETAFFINITY and become
+        * trapped in a cpuset, or RT worker may be born in a cgroup
+        * with no rt_runtime allocated.  Just say no.
+        */
+       if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
+               ret = -EINVAL;
+               goto out_unlock_rcu;
+       }
+
+       get_task_struct(tsk);
+       rcu_read_unlock();
+
+       ret = cgroup_procs_write_permission(tsk, cgrp, of);
+       if (!ret)
+               ret = cgroup_attach_task(cgrp, tsk, threadgroup);
+
+       put_task_struct(tsk);
+       goto out_unlock_threadgroup;
+
+out_unlock_rcu:
+       rcu_read_unlock();
+out_unlock_threadgroup:
+       percpu_up_write(&cgroup_threadgroup_rwsem);
+       for_each_subsys(ss, ssid)
+               if (ss->post_attach)
+                       ss->post_attach();
+       cgroup_kn_unlock(of->kn);
+       return ret ?: nbytes;
+}
+
+ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
+                          loff_t off)
+{
+       return __cgroup_procs_write(of, buf, nbytes, off, true);
+}
+
+static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
+{
+       struct cgroup_subsys *ss;
+       bool printed = false;
+       int ssid;
+
+       do_each_subsys_mask(ss, ssid, ss_mask) {
+               if (printed)
+                       seq_putc(seq, ' ');
+               seq_printf(seq, "%s", ss->name);
+               printed = true;
+       } while_each_subsys_mask();
+       if (printed)
+               seq_putc(seq, '\n');
+}
+
+/* show controllers which are enabled from the parent */
+static int cgroup_controllers_show(struct seq_file *seq, void *v)
+{
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       cgroup_print_ss_mask(seq, cgroup_control(cgrp));
+       return 0;
+}
+
+/* show controllers which are enabled for a given cgroup's children */
+static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
+{
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       cgroup_print_ss_mask(seq, cgrp->subtree_control);
+       return 0;
+}
+
+/**
+ * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
+ * @cgrp: root of the subtree to update csses for
+ *
+ * @cgrp's control masks have changed and its subtree's css associations
+ * need to be updated accordingly.  This function looks up all css_sets
+ * which are attached to the subtree, creates the matching updated css_sets
+ * and migrates the tasks to the new ones.
+ */
+static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+{
+       DEFINE_CGROUP_MGCTX(mgctx);
+       struct cgroup_subsys_state *d_css;
+       struct cgroup *dsct;
+       struct css_set *src_cset;
+       int ret;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       percpu_down_write(&cgroup_threadgroup_rwsem);
+
+       /* look up all csses currently attached to @cgrp's subtree */
+       spin_lock_irq(&css_set_lock);
+       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+               struct cgrp_cset_link *link;
+
+               list_for_each_entry(link, &dsct->cset_links, cset_link)
+                       cgroup_migrate_add_src(link->cset, dsct, &mgctx);
+       }
+       spin_unlock_irq(&css_set_lock);
+
+       /* NULL dst indicates self on default hierarchy */
+       ret = cgroup_migrate_prepare_dst(&mgctx);
+       if (ret)
+               goto out_finish;
+
+       spin_lock_irq(&css_set_lock);
+       list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
+               struct task_struct *task, *ntask;
+
+               /* all tasks in src_csets need to be migrated */
+               list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
+                       cgroup_migrate_add_task(task, &mgctx);
+       }
+       spin_unlock_irq(&css_set_lock);
+
+       ret = cgroup_migrate_execute(&mgctx);
+out_finish:
+       cgroup_migrate_finish(&mgctx);
+       percpu_up_write(&cgroup_threadgroup_rwsem);
+       return ret;
+}
+
+/**
+ * cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses
+ * @cgrp: root of the target subtree
+ *
+ * Because css offlining is asynchronous, userland may try to re-enable a
+ * controller while the previous css is still around.  This function grabs
+ * cgroup_mutex and drains the previous css instances of @cgrp's subtree.
+ */
+void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
+       __acquires(&cgroup_mutex)
+{
+       struct cgroup *dsct;
+       struct cgroup_subsys_state *d_css;
+       struct cgroup_subsys *ss;
+       int ssid;
+
+restart:
+       mutex_lock(&cgroup_mutex);
+
+       cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
+               for_each_subsys(ss, ssid) {
+                       struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
+                       DEFINE_WAIT(wait);
+
+                       if (!css || !percpu_ref_is_dying(&css->refcnt))
+                               continue;
+
+                       cgroup_get(dsct);
+                       prepare_to_wait(&dsct->offline_waitq, &wait,
+                                       TASK_UNINTERRUPTIBLE);
+
+                       mutex_unlock(&cgroup_mutex);
+                       schedule();
+                       finish_wait(&dsct->offline_waitq, &wait);
+
+                       cgroup_put(dsct);
+                       goto restart;
+               }
+       }
+}
+
+/**
+ * cgroup_save_control - save control masks of a subtree
+ * @cgrp: root of the target subtree
+ *
+ * Save ->subtree_control and ->subtree_ss_mask to the respective old_
+ * prefixed fields for @cgrp's subtree including @cgrp itself.
+ */
+static void cgroup_save_control(struct cgroup *cgrp)
+{
+       struct cgroup *dsct;
+       struct cgroup_subsys_state *d_css;
+
+       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+               dsct->old_subtree_control = dsct->subtree_control;
+               dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
+       }
+}
+
+/**
+ * cgroup_propagate_control - refresh control masks of a subtree
+ * @cgrp: root of the target subtree
+ *
+ * For @cgrp and its subtree, ensure ->subtree_ss_mask matches
+ * ->subtree_control and propagate controller availability through the
+ * subtree so that descendants don't have unavailable controllers enabled.
+ */
+static void cgroup_propagate_control(struct cgroup *cgrp)
+{
+       struct cgroup *dsct;
+       struct cgroup_subsys_state *d_css;
+
+       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+               dsct->subtree_control &= cgroup_control(dsct);
+               dsct->subtree_ss_mask =
+                       cgroup_calc_subtree_ss_mask(dsct->subtree_control,
+                                                   cgroup_ss_mask(dsct));
+       }
+}
+
+/**
+ * cgroup_restore_control - restore control masks of a subtree
+ * @cgrp: root of the target subtree
+ *
+ * Restore ->subtree_control and ->subtree_ss_mask from the respective old_
+ * prefixed fields for @cgrp's subtree including @cgrp itself.
+ */
+static void cgroup_restore_control(struct cgroup *cgrp)
+{
+       struct cgroup *dsct;
+       struct cgroup_subsys_state *d_css;
+
+       cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
+               dsct->subtree_control = dsct->old_subtree_control;
+               dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
+       }
+}
+
+static bool css_visible(struct cgroup_subsys_state *css)
+{
+       struct cgroup_subsys *ss = css->ss;
+       struct cgroup *cgrp = css->cgroup;
+
+       if (cgroup_control(cgrp) & (1 << ss->id))
+               return true;
+       if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
+               return false;
+       return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
+}
+
+/**
+ * cgroup_apply_control_enable - enable or show csses according to control
+ * @cgrp: root of the target subtree
+ *
+ * Walk @cgrp's subtree and create new csses or make the existing ones
+ * visible.  A css is created invisible if it's being implicitly enabled
+ * through dependency.  An invisible css is made visible when the userland
+ * explicitly enables it.
+ *
+ * Returns 0 on success, -errno on failure.  On failure, csses which have
+ * been processed already aren't cleaned up.  The caller is responsible for
+ * cleaning up with cgroup_apply_control_disble().
+ */
+static int cgroup_apply_control_enable(struct cgroup *cgrp)
+{
+       struct cgroup *dsct;
+       struct cgroup_subsys_state *d_css;
+       struct cgroup_subsys *ss;
+       int ssid, ret;
+
+       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+               for_each_subsys(ss, ssid) {
+                       struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
+
+                       WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
+
+                       if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
+                               continue;
+
+                       if (!css) {
+                               css = css_create(dsct, ss);
+                               if (IS_ERR(css))
+                                       return PTR_ERR(css);
+                       }
+
+                       if (css_visible(css)) {
+                               ret = css_populate_dir(css);
+                               if (ret)
+                                       return ret;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * cgroup_apply_control_disable - kill or hide csses according to control
+ * @cgrp: root of the target subtree
+ *
+ * Walk @cgrp's subtree and kill and hide csses so that they match
+ * cgroup_ss_mask() and cgroup_visible_mask().
+ *
+ * A css is hidden when the userland requests it to be disabled while other
+ * subsystems are still depending on it.  The css must not actively control
+ * resources and be in the vanilla state if it's made visible again later.
+ * Controllers which may be depended upon should provide ->css_reset() for
+ * this purpose.
+ */
+static void cgroup_apply_control_disable(struct cgroup *cgrp)
+{
+       struct cgroup *dsct;
+       struct cgroup_subsys_state *d_css;
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
+               for_each_subsys(ss, ssid) {
+                       struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
+
+                       WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
+
+                       if (!css)
+                               continue;
+
+                       if (css->parent &&
+                           !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
+                               kill_css(css);
+                       } else if (!css_visible(css)) {
+                               css_clear_dir(css);
+                               if (ss->css_reset)
+                                       ss->css_reset(css);
+                       }
+               }
+       }
+}
+
+/**
+ * cgroup_apply_control - apply control mask updates to the subtree
+ * @cgrp: root of the target subtree
+ *
+ * subsystems can be enabled and disabled in a subtree using the following
+ * steps.
+ *
+ * 1. Call cgroup_save_control() to stash the current state.
+ * 2. Update ->subtree_control masks in the subtree as desired.
+ * 3. Call cgroup_apply_control() to apply the changes.
+ * 4. Optionally perform other related operations.
+ * 5. Call cgroup_finalize_control() to finish up.
+ *
+ * This function implements step 3 and propagates the mask changes
+ * throughout @cgrp's subtree, updates csses accordingly and perform
+ * process migrations.
+ */
+static int cgroup_apply_control(struct cgroup *cgrp)
+{
+       int ret;
+
+       cgroup_propagate_control(cgrp);
+
+       ret = cgroup_apply_control_enable(cgrp);
+       if (ret)
+               return ret;
+
+       /*
+        * At this point, cgroup_e_css() results reflect the new csses
+        * making the following cgroup_update_dfl_csses() properly update
+        * css associations of all tasks in the subtree.
+        */
+       ret = cgroup_update_dfl_csses(cgrp);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+/**
+ * cgroup_finalize_control - finalize control mask update
+ * @cgrp: root of the target subtree
+ * @ret: the result of the update
+ *
+ * Finalize control mask update.  See cgroup_apply_control() for more info.
+ */
+static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
+{
+       if (ret) {
+               cgroup_restore_control(cgrp);
+               cgroup_propagate_control(cgrp);
+       }
+
+       cgroup_apply_control_disable(cgrp);
+}
+
+/* change the enabled child controllers for a cgroup in the default hierarchy */
+static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
+                                           char *buf, size_t nbytes,
+                                           loff_t off)
+{
+       u16 enable = 0, disable = 0;
+       struct cgroup *cgrp, *child;
+       struct cgroup_subsys *ss;
+       char *tok;
+       int ssid, ret;
+
+       /*
+        * Parse input - space separated list of subsystem names prefixed
+        * with either + or -.
+        */
+       buf = strstrip(buf);
+       while ((tok = strsep(&buf, " "))) {
+               if (tok[0] == '\0')
+                       continue;
+               do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
+                       if (!cgroup_ssid_enabled(ssid) ||
+                           strcmp(tok + 1, ss->name))
+                               continue;
+
+                       if (*tok == '+') {
+                               enable |= 1 << ssid;
+                               disable &= ~(1 << ssid);
+                       } else if (*tok == '-') {
+                               disable |= 1 << ssid;
+                               enable &= ~(1 << ssid);
+                       } else {
+                               return -EINVAL;
+                       }
+                       break;
+               } while_each_subsys_mask();
+               if (ssid == CGROUP_SUBSYS_COUNT)
+                       return -EINVAL;
+       }
+
+       cgrp = cgroup_kn_lock_live(of->kn, true);
+       if (!cgrp)
+               return -ENODEV;
+
+       for_each_subsys(ss, ssid) {
+               if (enable & (1 << ssid)) {
+                       if (cgrp->subtree_control & (1 << ssid)) {
+                               enable &= ~(1 << ssid);
+                               continue;
+                       }
+
+                       if (!(cgroup_control(cgrp) & (1 << ssid))) {
+                               ret = -ENOENT;
+                               goto out_unlock;
+                       }
+               } else if (disable & (1 << ssid)) {
+                       if (!(cgrp->subtree_control & (1 << ssid))) {
+                               disable &= ~(1 << ssid);
+                               continue;
+                       }
+
+                       /* a child has it enabled? */
+                       cgroup_for_each_live_child(child, cgrp) {
+                               if (child->subtree_control & (1 << ssid)) {
+                                       ret = -EBUSY;
+                                       goto out_unlock;
+                               }
+                       }
+               }
+       }
+
+       if (!enable && !disable) {
+               ret = 0;
+               goto out_unlock;
+       }
+
+       /*
+        * Except for the root, subtree_control must be zero for a cgroup
+        * with tasks so that child cgroups don't compete against tasks.
+        */
+       if (enable && cgroup_parent(cgrp)) {
+               struct cgrp_cset_link *link;
+
+               /*
+                * Because namespaces pin csets too, @cgrp->cset_links
+                * might not be empty even when @cgrp is empty.  Walk and
+                * verify each cset.
+                */
+               spin_lock_irq(&css_set_lock);
+
+               ret = 0;
+               list_for_each_entry(link, &cgrp->cset_links, cset_link) {
+                       if (css_set_populated(link->cset)) {
+                               ret = -EBUSY;
+                               break;
+                       }
+               }
+
+               spin_unlock_irq(&css_set_lock);
+
+               if (ret)
+                       goto out_unlock;
+       }
+
+       /* save and update control masks and prepare csses */
+       cgroup_save_control(cgrp);
+
+       cgrp->subtree_control |= enable;
+       cgrp->subtree_control &= ~disable;
+
+       ret = cgroup_apply_control(cgrp);
+
+       cgroup_finalize_control(cgrp, ret);
+
+       kernfs_activate(cgrp->kn);
+       ret = 0;
+out_unlock:
+       cgroup_kn_unlock(of->kn);
+       return ret ?: nbytes;
+}
+
+static int cgroup_events_show(struct seq_file *seq, void *v)
+{
+       seq_printf(seq, "populated %d\n",
+                  cgroup_is_populated(seq_css(seq)->cgroup));
+       return 0;
+}
+
+static int cgroup_file_open(struct kernfs_open_file *of)
+{
+       struct cftype *cft = of->kn->priv;
+
+       if (cft->open)
+               return cft->open(of);
+       return 0;
+}
+
+static void cgroup_file_release(struct kernfs_open_file *of)
+{
+       struct cftype *cft = of->kn->priv;
+
+       if (cft->release)
+               cft->release(of);
+}
+
+static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
+                                size_t nbytes, loff_t off)
+{
+       struct cgroup *cgrp = of->kn->parent->priv;
+       struct cftype *cft = of->kn->priv;
+       struct cgroup_subsys_state *css;
+       int ret;
+
+       if (cft->write)
+               return cft->write(of, buf, nbytes, off);
+
+       /*
+        * kernfs guarantees that a file isn't deleted with operations in
+        * flight, which means that the matching css is and stays alive and
+        * doesn't need to be pinned.  The RCU locking is not necessary
+        * either.  It's just for the convenience of using cgroup_css().
+        */
+       rcu_read_lock();
+       css = cgroup_css(cgrp, cft->ss);
+       rcu_read_unlock();
+
+       if (cft->write_u64) {
+               unsigned long long v;
+               ret = kstrtoull(buf, 0, &v);
+               if (!ret)
+                       ret = cft->write_u64(css, cft, v);
+       } else if (cft->write_s64) {
+               long long v;
+               ret = kstrtoll(buf, 0, &v);
+               if (!ret)
+                       ret = cft->write_s64(css, cft, v);
+       } else {
+               ret = -EINVAL;
+       }
+
+       return ret ?: nbytes;
+}
+
+static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
+{
+       return seq_cft(seq)->seq_start(seq, ppos);
+}
+
+static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
+{
+       return seq_cft(seq)->seq_next(seq, v, ppos);
+}
+
+static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
+{
+       if (seq_cft(seq)->seq_stop)
+               seq_cft(seq)->seq_stop(seq, v);
+}
+
+static int cgroup_seqfile_show(struct seq_file *m, void *arg)
+{
+       struct cftype *cft = seq_cft(m);
+       struct cgroup_subsys_state *css = seq_css(m);
+
+       if (cft->seq_show)
+               return cft->seq_show(m, arg);
+
+       if (cft->read_u64)
+               seq_printf(m, "%llu\n", cft->read_u64(css, cft));
+       else if (cft->read_s64)
+               seq_printf(m, "%lld\n", cft->read_s64(css, cft));
+       else
+               return -EINVAL;
+       return 0;
+}
+
+static struct kernfs_ops cgroup_kf_single_ops = {
+       .atomic_write_len       = PAGE_SIZE,
+       .open                   = cgroup_file_open,
+       .release                = cgroup_file_release,
+       .write                  = cgroup_file_write,
+       .seq_show               = cgroup_seqfile_show,
+};
+
+static struct kernfs_ops cgroup_kf_ops = {
+       .atomic_write_len       = PAGE_SIZE,
+       .open                   = cgroup_file_open,
+       .release                = cgroup_file_release,
+       .write                  = cgroup_file_write,
+       .seq_start              = cgroup_seqfile_start,
+       .seq_next               = cgroup_seqfile_next,
+       .seq_stop               = cgroup_seqfile_stop,
+       .seq_show               = cgroup_seqfile_show,
+};
+
+/* set uid and gid of cgroup dirs and files to that of the creator */
+static int cgroup_kn_set_ugid(struct kernfs_node *kn)
+{
+       struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
+                              .ia_uid = current_fsuid(),
+                              .ia_gid = current_fsgid(), };
+
+       if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
+           gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
+               return 0;
+
+       return kernfs_setattr(kn, &iattr);
+}
+
+static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
+                          struct cftype *cft)
+{
+       char name[CGROUP_FILE_NAME_MAX];
+       struct kernfs_node *kn;
+       struct lock_class_key *key = NULL;
+       int ret;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       key = &cft->lockdep_key;
+#endif
+       kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
+                                 cgroup_file_mode(cft), 0, cft->kf_ops, cft,
+                                 NULL, key);
+       if (IS_ERR(kn))
+               return PTR_ERR(kn);
+
+       ret = cgroup_kn_set_ugid(kn);
+       if (ret) {
+               kernfs_remove(kn);
+               return ret;
+       }
+
+       if (cft->file_offset) {
+               struct cgroup_file *cfile = (void *)css + cft->file_offset;
+
+               spin_lock_irq(&cgroup_file_kn_lock);
+               cfile->kn = kn;
+               spin_unlock_irq(&cgroup_file_kn_lock);
+       }
+
+       return 0;
+}
+
+/**
+ * cgroup_addrm_files - add or remove files to a cgroup directory
+ * @css: the target css
+ * @cgrp: the target cgroup (usually css->cgroup)
+ * @cfts: array of cftypes to be added
+ * @is_add: whether to add or remove
+ *
+ * Depending on @is_add, add or remove files defined by @cfts on @cgrp.
+ * For removals, this function never fails.
+ */
+static int cgroup_addrm_files(struct cgroup_subsys_state *css,
+                             struct cgroup *cgrp, struct cftype cfts[],
+                             bool is_add)
+{
+       struct cftype *cft, *cft_end = NULL;
+       int ret = 0;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+restart:
+       for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
+               /* does cft->flags tell us to skip this file on @cgrp? */
+               if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
+                       continue;
+               if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
+                       continue;
+               if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
+                       continue;
+               if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
+                       continue;
+
+               if (is_add) {
+                       ret = cgroup_add_file(css, cgrp, cft);
+                       if (ret) {
+                               pr_warn("%s: failed to add %s, err=%d\n",
+                                       __func__, cft->name, ret);
+                               cft_end = cft;
+                               is_add = false;
+                               goto restart;
+                       }
+               } else {
+                       cgroup_rm_file(cgrp, cft);
+               }
+       }
+       return ret;
+}
+
+static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
+{
+       LIST_HEAD(pending);
+       struct cgroup_subsys *ss = cfts[0].ss;
+       struct cgroup *root = &ss->root->cgrp;
+       struct cgroup_subsys_state *css;
+       int ret = 0;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       /* add/rm files for all cgroups created before */
+       css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
+               struct cgroup *cgrp = css->cgroup;
+
+               if (!(css->flags & CSS_VISIBLE))
+                       continue;
+
+               ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
+               if (ret)
+                       break;
+       }
+
+       if (is_add && !ret)
+               kernfs_activate(root->kn);
+       return ret;
+}
+
+static void cgroup_exit_cftypes(struct cftype *cfts)
+{
+       struct cftype *cft;
+
+       for (cft = cfts; cft->name[0] != '\0'; cft++) {
+               /* free copy for custom atomic_write_len, see init_cftypes() */
+               if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
+                       kfree(cft->kf_ops);
+               cft->kf_ops = NULL;
+               cft->ss = NULL;
+
+               /* revert flags set by cgroup core while adding @cfts */
+               cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
+       }
+}
+
+static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+{
+       struct cftype *cft;
+
+       for (cft = cfts; cft->name[0] != '\0'; cft++) {
+               struct kernfs_ops *kf_ops;
+
+               WARN_ON(cft->ss || cft->kf_ops);
+
+               if (cft->seq_start)
+                       kf_ops = &cgroup_kf_ops;
+               else
+                       kf_ops = &cgroup_kf_single_ops;
+
+               /*
+                * Ugh... if @cft wants a custom max_write_len, we need to
+                * make a copy of kf_ops to set its atomic_write_len.
+                */
+               if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
+                       kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
+                       if (!kf_ops) {
+                               cgroup_exit_cftypes(cfts);
+                               return -ENOMEM;
+                       }
+                       kf_ops->atomic_write_len = cft->max_write_len;
+               }
+
+               cft->kf_ops = kf_ops;
+               cft->ss = ss;
+       }
+
+       return 0;
+}
+
+static int cgroup_rm_cftypes_locked(struct cftype *cfts)
+{
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (!cfts || !cfts[0].ss)
+               return -ENOENT;
+
+       list_del(&cfts->node);
+       cgroup_apply_cftypes(cfts, false);
+       cgroup_exit_cftypes(cfts);
+       return 0;
+}
+
+/**
+ * cgroup_rm_cftypes - remove an array of cftypes from a subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Unregister @cfts.  Files described by @cfts are removed from all
+ * existing cgroups and all future cgroups won't have them either.  This
+ * function can be called anytime whether @cfts' subsys is attached or not.
+ *
+ * Returns 0 on successful unregistration, -ENOENT if @cfts is not
+ * registered.
+ */
+int cgroup_rm_cftypes(struct cftype *cfts)
+{
+       int ret;
+
+       mutex_lock(&cgroup_mutex);
+       ret = cgroup_rm_cftypes_locked(cfts);
+       mutex_unlock(&cgroup_mutex);
+       return ret;
+}
+
+/**
+ * cgroup_add_cftypes - add an array of cftypes to a subsystem
+ * @ss: target cgroup subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Register @cfts to @ss.  Files described by @cfts are created for all
+ * existing cgroups to which @ss is attached and all future cgroups will
+ * have them too.  This function can be called anytime whether @ss is
+ * attached or not.
+ *
+ * Returns 0 on successful registration, -errno on failure.  Note that this
+ * function currently returns 0 as long as @cfts registration is successful
+ * even if some file creation attempts on existing cgroups fail.
+ */
+static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+{
+       int ret;
+
+       if (!cgroup_ssid_enabled(ss->id))
+               return 0;
+
+       if (!cfts || cfts[0].name[0] == '\0')
+               return 0;
+
+       ret = cgroup_init_cftypes(ss, cfts);
+       if (ret)
+               return ret;
+
+       mutex_lock(&cgroup_mutex);
+
+       list_add_tail(&cfts->node, &ss->cfts);
+       ret = cgroup_apply_cftypes(cfts, true);
+       if (ret)
+               cgroup_rm_cftypes_locked(cfts);
+
+       mutex_unlock(&cgroup_mutex);
+       return ret;
+}
+
+/**
+ * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy
+ * @ss: target cgroup subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Similar to cgroup_add_cftypes() but the added files are only used for
+ * the default hierarchy.
+ */
+int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+{
+       struct cftype *cft;
+
+       for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
+               cft->flags |= __CFTYPE_ONLY_ON_DFL;
+       return cgroup_add_cftypes(ss, cfts);
+}
+
+/**
+ * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies
+ * @ss: target cgroup subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Similar to cgroup_add_cftypes() but the added files are only used for
+ * the legacy hierarchies.
+ */
+int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+{
+       struct cftype *cft;
+
+       for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
+               cft->flags |= __CFTYPE_NOT_ON_DFL;
+       return cgroup_add_cftypes(ss, cfts);
+}
+
+/**
+ * cgroup_file_notify - generate a file modified event for a cgroup_file
+ * @cfile: target cgroup_file
+ *
+ * @cfile must have been obtained by setting cftype->file_offset.
+ */
+void cgroup_file_notify(struct cgroup_file *cfile)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cgroup_file_kn_lock, flags);
+       if (cfile->kn)
+               kernfs_notify(cfile->kn);
+       spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
+}
+
+/**
+ * css_next_child - find the next child of a given css
+ * @pos: the current position (%NULL to initiate traversal)
+ * @parent: css whose children to walk
+ *
+ * This function returns the next child of @parent and should be called
+ * under either cgroup_mutex or RCU read lock.  The only requirement is
+ * that @parent and @pos are accessible.  The next sibling is guaranteed to
+ * be returned regardless of their states.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ */
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+                                          struct cgroup_subsys_state *parent)
+{
+       struct cgroup_subsys_state *next;
+
+       cgroup_assert_mutex_or_rcu_locked();
+
+       /*
+        * @pos could already have been unlinked from the sibling list.
+        * Once a cgroup is removed, its ->sibling.next is no longer
+        * updated when its next sibling changes.  CSS_RELEASED is set when
+        * @pos is taken off list, at which time its next pointer is valid,
+        * and, as releases are serialized, the one pointed to by the next
+        * pointer is guaranteed to not have started release yet.  This
+        * implies that if we observe !CSS_RELEASED on @pos in this RCU
+        * critical section, the one pointed to by its next pointer is
+        * guaranteed to not have finished its RCU grace period even if we
+        * have dropped rcu_read_lock() inbetween iterations.
+        *
+        * If @pos has CSS_RELEASED set, its next pointer can't be
+        * dereferenced; however, as each css is given a monotonically
+        * increasing unique serial number and always appended to the
+        * sibling list, the next one can be found by walking the parent's
+        * children until the first css with higher serial number than
+        * @pos's.  While this path can be slower, it happens iff iteration
+        * races against release and the race window is very small.
+        */
+       if (!pos) {
+               next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
+       } else if (likely(!(pos->flags & CSS_RELEASED))) {
+               next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
+       } else {
+               list_for_each_entry_rcu(next, &parent->children, sibling)
+                       if (next->serial_nr > pos->serial_nr)
+                               break;
+       }
+
+       /*
+        * @next, if not pointing to the head, can be dereferenced and is
+        * the next sibling.
+        */
+       if (&next->sibling != &parent->children)
+               return next;
+       return NULL;
+}
+
+/**
+ * css_next_descendant_pre - find the next descendant for pre-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @root: css whose descendants to walk
+ *
+ * To be used by css_for_each_descendant_pre().  Find the next descendant
+ * to visit for pre-order traversal of @root's descendants.  @root is
+ * included in the iteration and the first node to be visited.
+ *
+ * While this function requires cgroup_mutex or RCU read locking, it
+ * doesn't require the whole traversal to be contained in a single critical
+ * section.  This function will return the correct next descendant as long
+ * as both @pos and @root are accessible and @pos is a descendant of @root.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ */
+struct cgroup_subsys_state *
+css_next_descendant_pre(struct cgroup_subsys_state *pos,
+                       struct cgroup_subsys_state *root)
+{
+       struct cgroup_subsys_state *next;
+
+       cgroup_assert_mutex_or_rcu_locked();
+
+       /* if first iteration, visit @root */
+       if (!pos)
+               return root;
+
+       /* visit the first child if exists */
+       next = css_next_child(NULL, pos);
+       if (next)
+               return next;
+
+       /* no child, visit my or the closest ancestor's next sibling */
+       while (pos != root) {
+               next = css_next_child(pos, pos->parent);
+               if (next)
+                       return next;
+               pos = pos->parent;
+       }
+
+       return NULL;
+}
+
+/**
+ * css_rightmost_descendant - return the rightmost descendant of a css
+ * @pos: css of interest
+ *
+ * Return the rightmost descendant of @pos.  If there's no descendant, @pos
+ * is returned.  This can be used during pre-order traversal to skip
+ * subtree of @pos.
+ *
+ * While this function requires cgroup_mutex or RCU read locking, it
+ * doesn't require the whole traversal to be contained in a single critical
+ * section.  This function will return the correct rightmost descendant as
+ * long as @pos is accessible.
+ */
+struct cgroup_subsys_state *
+css_rightmost_descendant(struct cgroup_subsys_state *pos)
+{
+       struct cgroup_subsys_state *last, *tmp;
+
+       cgroup_assert_mutex_or_rcu_locked();
+
+       do {
+               last = pos;
+               /* ->prev isn't RCU safe, walk ->next till the end */
+               pos = NULL;
+               css_for_each_child(tmp, last)
+                       pos = tmp;
+       } while (pos);
+
+       return last;
+}
+
+static struct cgroup_subsys_state *
+css_leftmost_descendant(struct cgroup_subsys_state *pos)
+{
+       struct cgroup_subsys_state *last;
+
+       do {
+               last = pos;
+               pos = css_next_child(NULL, pos);
+       } while (pos);
+
+       return last;
+}
+
+/**
+ * css_next_descendant_post - find the next descendant for post-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @root: css whose descendants to walk
+ *
+ * To be used by css_for_each_descendant_post().  Find the next descendant
+ * to visit for post-order traversal of @root's descendants.  @root is
+ * included in the iteration and the last node to be visited.
+ *
+ * While this function requires cgroup_mutex or RCU read locking, it
+ * doesn't require the whole traversal to be contained in a single critical
+ * section.  This function will return the correct next descendant as long
+ * as both @pos and @cgroup are accessible and @pos is a descendant of
+ * @cgroup.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ */
+struct cgroup_subsys_state *
+css_next_descendant_post(struct cgroup_subsys_state *pos,
+                        struct cgroup_subsys_state *root)
+{
+       struct cgroup_subsys_state *next;
+
+       cgroup_assert_mutex_or_rcu_locked();
+
+       /* if first iteration, visit leftmost descendant which may be @root */
+       if (!pos)
+               return css_leftmost_descendant(root);
+
+       /* if we visited @root, we're done */
+       if (pos == root)
+               return NULL;
+
+       /* if there's an unvisited sibling, visit its leftmost descendant */
+       next = css_next_child(pos, pos->parent);
+       if (next)
+               return css_leftmost_descendant(next);
+
+       /* no sibling left, visit parent */
+       return pos->parent;
+}
+
+/**
+ * css_has_online_children - does a css have online children
+ * @css: the target css
+ *
+ * Returns %true if @css has any online children; otherwise, %false.  This
+ * function can be called from any context but the caller is responsible
+ * for synchronizing against on/offlining as necessary.
+ */
+bool css_has_online_children(struct cgroup_subsys_state *css)
+{
+       struct cgroup_subsys_state *child;
+       bool ret = false;
+
+       rcu_read_lock();
+       css_for_each_child(child, css) {
+               if (child->flags & CSS_ONLINE) {
+                       ret = true;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
+}
+
+/**
+ * css_task_iter_advance_css_set - advance a task itererator to the next css_set
+ * @it: the iterator to advance
+ *
+ * Advance @it to the next css_set to walk.
+ */
+static void css_task_iter_advance_css_set(struct css_task_iter *it)
+{
+       struct list_head *l = it->cset_pos;
+       struct cgrp_cset_link *link;
+       struct css_set *cset;
+
+       lockdep_assert_held(&css_set_lock);
+
+       /* Advance to the next non-empty css_set */
+       do {
+               l = l->next;
+               if (l == it->cset_head) {
+                       it->cset_pos = NULL;
+                       it->task_pos = NULL;
+                       return;
+               }
+
+               if (it->ss) {
+                       cset = container_of(l, struct css_set,
+                                           e_cset_node[it->ss->id]);
+               } else {
+                       link = list_entry(l, struct cgrp_cset_link, cset_link);
+                       cset = link->cset;
+               }
+       } while (!css_set_populated(cset));
+
+       it->cset_pos = l;
+
+       if (!list_empty(&cset->tasks))
+               it->task_pos = cset->tasks.next;
+       else
+               it->task_pos = cset->mg_tasks.next;
+
+       it->tasks_head = &cset->tasks;
+       it->mg_tasks_head = &cset->mg_tasks;
+
+       /*
+        * We don't keep css_sets locked across iteration steps and thus
+        * need to take steps to ensure that iteration can be resumed after
+        * the lock is re-acquired.  Iteration is performed at two levels -
+        * css_sets and tasks in them.
+        *
+        * Once created, a css_set never leaves its cgroup lists, so a
+        * pinned css_set is guaranteed to stay put and we can resume
+        * iteration afterwards.
+        *
+        * Tasks may leave @cset across iteration steps.  This is resolved
+        * by registering each iterator with the css_set currently being
+        * walked and making css_set_move_task() advance iterators whose
+        * next task is leaving.
+        */
+       if (it->cur_cset) {
+               list_del(&it->iters_node);
+               put_css_set_locked(it->cur_cset);
+       }
+       get_css_set(cset);
+       it->cur_cset = cset;
+       list_add(&it->iters_node, &cset->task_iters);
+}
+
+static void css_task_iter_advance(struct css_task_iter *it)
+{
+       struct list_head *l = it->task_pos;
+
+       lockdep_assert_held(&css_set_lock);
+       WARN_ON_ONCE(!l);
+
+       /*
+        * Advance iterator to find next entry.  cset->tasks is consumed
+        * first and then ->mg_tasks.  After ->mg_tasks, we move onto the
+        * next cset.
+        */
+       l = l->next;
+
+       if (l == it->tasks_head)
+               l = it->mg_tasks_head->next;
+
+       if (l == it->mg_tasks_head)
+               css_task_iter_advance_css_set(it);
+       else
+               it->task_pos = l;
+}
+
+/**
+ * css_task_iter_start - initiate task iteration
+ * @css: the css to walk tasks of
+ * @it: the task iterator to use
+ *
+ * Initiate iteration through the tasks of @css.  The caller can call
+ * css_task_iter_next() to walk through the tasks until the function
+ * returns NULL.  On completion of iteration, css_task_iter_end() must be
+ * called.
+ */
+void css_task_iter_start(struct cgroup_subsys_state *css,
+                        struct css_task_iter *it)
+{
+       /* no one should try to iterate before mounting cgroups */
+       WARN_ON_ONCE(!use_task_css_set_links);
+
+       memset(it, 0, sizeof(*it));
+
+       spin_lock_irq(&css_set_lock);
+
+       it->ss = css->ss;
+
+       if (it->ss)
+               it->cset_pos = &css->cgroup->e_csets[css->ss->id];
+       else
+               it->cset_pos = &css->cgroup->cset_links;
+
+       it->cset_head = it->cset_pos;
+
+       css_task_iter_advance_css_set(it);
+
+       spin_unlock_irq(&css_set_lock);
+}
+
+/**
+ * css_task_iter_next - return the next task for the iterator
+ * @it: the task iterator being iterated
+ *
+ * The "next" function for task iteration.  @it should have been
+ * initialized via css_task_iter_start().  Returns NULL when the iteration
+ * reaches the end.
+ */
+struct task_struct *css_task_iter_next(struct css_task_iter *it)
+{
+       if (it->cur_task) {
+               put_task_struct(it->cur_task);
+               it->cur_task = NULL;
+       }
+
+       spin_lock_irq(&css_set_lock);
+
+       if (it->task_pos) {
+               it->cur_task = list_entry(it->task_pos, struct task_struct,
+                                         cg_list);
+               get_task_struct(it->cur_task);
+               css_task_iter_advance(it);
+       }
+
+       spin_unlock_irq(&css_set_lock);
+
+       return it->cur_task;
+}
+
+/**
+ * css_task_iter_end - finish task iteration
+ * @it: the task iterator to finish
+ *
+ * Finish task iteration started by css_task_iter_start().
+ */
+void css_task_iter_end(struct css_task_iter *it)
+{
+       if (it->cur_cset) {
+               spin_lock_irq(&css_set_lock);
+               list_del(&it->iters_node);
+               put_css_set_locked(it->cur_cset);
+               spin_unlock_irq(&css_set_lock);
+       }
+
+       if (it->cur_task)
+               put_task_struct(it->cur_task);
+}
+
+static void cgroup_procs_release(struct kernfs_open_file *of)
+{
+       if (of->priv) {
+               css_task_iter_end(of->priv);
+               kfree(of->priv);
+       }
+}
+
+static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct kernfs_open_file *of = s->private;
+       struct css_task_iter *it = of->priv;
+       struct task_struct *task;
+
+       do {
+               task = css_task_iter_next(it);
+       } while (task && !thread_group_leader(task));
+
+       return task;
+}
+
+static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
+{
+       struct kernfs_open_file *of = s->private;
+       struct cgroup *cgrp = seq_css(s)->cgroup;
+       struct css_task_iter *it = of->priv;
+
+       /*
+        * When a seq_file is seeked, it's always traversed sequentially
+        * from position 0, so we can simply keep iterating on !0 *pos.
+        */
+       if (!it) {
+               if (WARN_ON_ONCE((*pos)++))
+                       return ERR_PTR(-EINVAL);
+
+               it = kzalloc(sizeof(*it), GFP_KERNEL);
+               if (!it)
+                       return ERR_PTR(-ENOMEM);
+               of->priv = it;
+               css_task_iter_start(&cgrp->self, it);
+       } else if (!(*pos)++) {
+               css_task_iter_end(it);
+               css_task_iter_start(&cgrp->self, it);
+       }
+
+       return cgroup_procs_next(s, NULL, NULL);
+}
+
+static int cgroup_procs_show(struct seq_file *s, void *v)
+{
+       seq_printf(s, "%d\n", task_tgid_vnr(v));
+       return 0;
+}
+
+/* cgroup core interface files for the default hierarchy */
+static struct cftype cgroup_base_files[] = {
+       {
+               .name = "cgroup.procs",
+               .file_offset = offsetof(struct cgroup, procs_file),
+               .release = cgroup_procs_release,
+               .seq_start = cgroup_procs_start,
+               .seq_next = cgroup_procs_next,
+               .seq_show = cgroup_procs_show,
+               .write = cgroup_procs_write,
+       },
+       {
+               .name = "cgroup.controllers",
+               .seq_show = cgroup_controllers_show,
+       },
+       {
+               .name = "cgroup.subtree_control",
+               .seq_show = cgroup_subtree_control_show,
+               .write = cgroup_subtree_control_write,
+       },
+       {
+               .name = "cgroup.events",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .file_offset = offsetof(struct cgroup, events_file),
+               .seq_show = cgroup_events_show,
+       },
+       { }     /* terminate */
+};
+
+/*
+ * css destruction is four-stage process.
+ *
+ * 1. Destruction starts.  Killing of the percpu_ref is initiated.
+ *    Implemented in kill_css().
+ *
+ * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
+ *    and thus css_tryget_online() is guaranteed to fail, the css can be
+ *    offlined by invoking offline_css().  After offlining, the base ref is
+ *    put.  Implemented in css_killed_work_fn().
+ *
+ * 3. When the percpu_ref reaches zero, the only possible remaining
+ *    accessors are inside RCU read sections.  css_release() schedules the
+ *    RCU callback.
+ *
+ * 4. After the grace period, the css can be freed.  Implemented in
+ *    css_free_work_fn().
+ *
+ * It is actually hairier because both step 2 and 4 require process context
+ * and thus involve punting to css->destroy_work adding two additional
+ * steps to the already complex sequence.
+ */
+static void css_free_work_fn(struct work_struct *work)
+{
+       struct cgroup_subsys_state *css =
+               container_of(work, struct cgroup_subsys_state, destroy_work);
+       struct cgroup_subsys *ss = css->ss;
+       struct cgroup *cgrp = css->cgroup;
+
+       percpu_ref_exit(&css->refcnt);
+
+       if (ss) {
+               /* css free path */
+               struct cgroup_subsys_state *parent = css->parent;
+               int id = css->id;
+
+               ss->css_free(css);
+               cgroup_idr_remove(&ss->css_idr, id);
+               cgroup_put(cgrp);
+
+               if (parent)
+                       css_put(parent);
+       } else {
+               /* cgroup free path */
+               atomic_dec(&cgrp->root->nr_cgrps);
+               cgroup1_pidlist_destroy_all(cgrp);
+               cancel_work_sync(&cgrp->release_agent_work);
+
+               if (cgroup_parent(cgrp)) {
+                       /*
+                        * We get a ref to the parent, and put the ref when
+                        * this cgroup is being freed, so it's guaranteed
+                        * that the parent won't be destroyed before its
+                        * children.
+                        */
+                       cgroup_put(cgroup_parent(cgrp));
+                       kernfs_put(cgrp->kn);
+                       kfree(cgrp);
+               } else {
+                       /*
+                        * This is root cgroup's refcnt reaching zero,
+                        * which indicates that the root should be
+                        * released.
+                        */
+                       cgroup_destroy_root(cgrp->root);
+               }
+       }
+}
+
+static void css_free_rcu_fn(struct rcu_head *rcu_head)
+{
+       struct cgroup_subsys_state *css =
+               container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
+
+       INIT_WORK(&css->destroy_work, css_free_work_fn);
+       queue_work(cgroup_destroy_wq, &css->destroy_work);
+}
+
+static void css_release_work_fn(struct work_struct *work)
+{
+       struct cgroup_subsys_state *css =
+               container_of(work, struct cgroup_subsys_state, destroy_work);
+       struct cgroup_subsys *ss = css->ss;
+       struct cgroup *cgrp = css->cgroup;
+
+       mutex_lock(&cgroup_mutex);
+
+       css->flags |= CSS_RELEASED;
+       list_del_rcu(&css->sibling);
+
+       if (ss) {
+               /* css release path */
+               cgroup_idr_replace(&ss->css_idr, NULL, css->id);
+               if (ss->css_released)
+                       ss->css_released(css);
+       } else {
+               /* cgroup release path */
+               trace_cgroup_release(cgrp);
+
+               cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+               cgrp->id = -1;
+
+               /*
+                * There are two control paths which try to determine
+                * cgroup from dentry without going through kernfs -
+                * cgroupstats_build() and css_tryget_online_from_dir().
+                * Those are supported by RCU protecting clearing of
+                * cgrp->kn->priv backpointer.
+                */
+               if (cgrp->kn)
+                       RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
+                                        NULL);
+
+               cgroup_bpf_put(cgrp);
+       }
+
+       mutex_unlock(&cgroup_mutex);
+
+       call_rcu(&css->rcu_head, css_free_rcu_fn);
+}
+
+static void css_release(struct percpu_ref *ref)
+{
+       struct cgroup_subsys_state *css =
+               container_of(ref, struct cgroup_subsys_state, refcnt);
+
+       INIT_WORK(&css->destroy_work, css_release_work_fn);
+       queue_work(cgroup_destroy_wq, &css->destroy_work);
+}
+
+static void init_and_link_css(struct cgroup_subsys_state *css,
+                             struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+       lockdep_assert_held(&cgroup_mutex);
+
+       cgroup_get(cgrp);
+
+       memset(css, 0, sizeof(*css));
+       css->cgroup = cgrp;
+       css->ss = ss;
+       css->id = -1;
+       INIT_LIST_HEAD(&css->sibling);
+       INIT_LIST_HEAD(&css->children);
+       css->serial_nr = css_serial_nr_next++;
+       atomic_set(&css->online_cnt, 0);
+
+       if (cgroup_parent(cgrp)) {
+               css->parent = cgroup_css(cgroup_parent(cgrp), ss);
+               css_get(css->parent);
+       }
+
+       BUG_ON(cgroup_css(cgrp, ss));
+}
+
+/* invoke ->css_online() on a new CSS and mark it online if successful */
+static int online_css(struct cgroup_subsys_state *css)
+{
+       struct cgroup_subsys *ss = css->ss;
+       int ret = 0;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (ss->css_online)
+               ret = ss->css_online(css);
+       if (!ret) {
+               css->flags |= CSS_ONLINE;
+               rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
+
+               atomic_inc(&css->online_cnt);
+               if (css->parent)
+                       atomic_inc(&css->parent->online_cnt);
+       }
+       return ret;
+}
+
+/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
+static void offline_css(struct cgroup_subsys_state *css)
+{
+       struct cgroup_subsys *ss = css->ss;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (!(css->flags & CSS_ONLINE))
+               return;
+
+       if (ss->css_reset)
+               ss->css_reset(css);
+
+       if (ss->css_offline)
+               ss->css_offline(css);
+
+       css->flags &= ~CSS_ONLINE;
+       RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
+
+       wake_up_all(&css->cgroup->offline_waitq);
+}
+
+/**
+ * css_create - create a cgroup_subsys_state
+ * @cgrp: the cgroup new css will be associated with
+ * @ss: the subsys of new css
+ *
+ * Create a new css associated with @cgrp - @ss pair.  On success, the new
+ * css is online and installed in @cgrp.  This function doesn't create the
+ * interface files.  Returns 0 on success, -errno on failure.
+ */
+static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
+                                             struct cgroup_subsys *ss)
+{
+       struct cgroup *parent = cgroup_parent(cgrp);
+       struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
+       struct cgroup_subsys_state *css;
+       int err;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       css = ss->css_alloc(parent_css);
+       if (!css)
+               css = ERR_PTR(-ENOMEM);
+       if (IS_ERR(css))
+               return css;
+
+       init_and_link_css(css, ss, cgrp);
+
+       err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
+       if (err)
+               goto err_free_css;
+
+       err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
+       if (err < 0)
+               goto err_free_css;
+       css->id = err;
+
+       /* @css is ready to be brought online now, make it visible */
+       list_add_tail_rcu(&css->sibling, &parent_css->children);
+       cgroup_idr_replace(&ss->css_idr, css, css->id);
+
+       err = online_css(css);
+       if (err)
+               goto err_list_del;
+
+       if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
+           cgroup_parent(parent)) {
+               pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
+                       current->comm, current->pid, ss->name);
+               if (!strcmp(ss->name, "memory"))
+                       pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
+               ss->warned_broken_hierarchy = true;
+       }
+
+       return css;
+
+err_list_del:
+       list_del_rcu(&css->sibling);
+err_free_css:
+       call_rcu(&css->rcu_head, css_free_rcu_fn);
+       return ERR_PTR(err);
+}
+
+/*
+ * The returned cgroup is fully initialized including its control mask, but
+ * it isn't associated with its kernfs_node and doesn't have the control
+ * mask applied.
+ */
+static struct cgroup *cgroup_create(struct cgroup *parent)
+{
+       struct cgroup_root *root = parent->root;
+       struct cgroup *cgrp, *tcgrp;
+       int level = parent->level + 1;
+       int ret;
+
+       /* allocate the cgroup and its ID, 0 is reserved for the root */
+       cgrp = kzalloc(sizeof(*cgrp) +
+                      sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL);
+       if (!cgrp)
+               return ERR_PTR(-ENOMEM);
+
+       ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
+       if (ret)
+               goto out_free_cgrp;
+
+       /*
+        * Temporarily set the pointer to NULL, so idr_find() won't return
+        * a half-baked cgroup.
+        */
+       cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
+       if (cgrp->id < 0) {
+               ret = -ENOMEM;
+               goto out_cancel_ref;
+       }
+
+       init_cgroup_housekeeping(cgrp);
+
+       cgrp->self.parent = &parent->self;
+       cgrp->root = root;
+       cgrp->level = level;
+
+       for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp))
+               cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
+
+       if (notify_on_release(parent))
+               set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+
+       if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
+               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
+
+       cgrp->self.serial_nr = css_serial_nr_next++;
+
+       /* allocation complete, commit to creation */
+       list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
+       atomic_inc(&root->nr_cgrps);
+       cgroup_get(parent);
+
+       /*
+        * @cgrp is now fully operational.  If something fails after this
+        * point, it'll be released via the normal destruction path.
+        */
+       cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
+
+       /*
+        * On the default hierarchy, a child doesn't automatically inherit
+        * subtree_control from the parent.  Each is configured manually.
+        */
+       if (!cgroup_on_dfl(cgrp))
+               cgrp->subtree_control = cgroup_control(cgrp);
+
+       if (parent)
+               cgroup_bpf_inherit(cgrp, parent);
+
+       cgroup_propagate_control(cgrp);
+
+       return cgrp;
+
+out_cancel_ref:
+       percpu_ref_exit(&cgrp->self.refcnt);
+out_free_cgrp:
+       kfree(cgrp);
+       return ERR_PTR(ret);
+}
+
+int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
+{
+       struct cgroup *parent, *cgrp;
+       struct kernfs_node *kn;
+       int ret;
+
+       /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
+       if (strchr(name, '\n'))
+               return -EINVAL;
+
+       parent = cgroup_kn_lock_live(parent_kn, false);
+       if (!parent)
+               return -ENODEV;
+
+       cgrp = cgroup_create(parent);
+       if (IS_ERR(cgrp)) {
+               ret = PTR_ERR(cgrp);
+               goto out_unlock;
+       }
+
+       /* create the directory */
+       kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
+       if (IS_ERR(kn)) {
+               ret = PTR_ERR(kn);
+               goto out_destroy;
+       }
+       cgrp->kn = kn;
+
+       /*
+        * This extra ref will be put in cgroup_free_fn() and guarantees
+        * that @cgrp->kn is always accessible.
+        */
+       kernfs_get(kn);
+
+       ret = cgroup_kn_set_ugid(kn);
+       if (ret)
+               goto out_destroy;
+
+       ret = css_populate_dir(&cgrp->self);
+       if (ret)
+               goto out_destroy;
+
+       ret = cgroup_apply_control_enable(cgrp);
+       if (ret)
+               goto out_destroy;
+
+       trace_cgroup_mkdir(cgrp);
+
+       /* let's create and online css's */
+       kernfs_activate(kn);
+
+       ret = 0;
+       goto out_unlock;
+
+out_destroy:
+       cgroup_destroy_locked(cgrp);
+out_unlock:
+       cgroup_kn_unlock(parent_kn);
+       return ret;
+}
+
+/*
+ * This is called when the refcnt of a css is confirmed to be killed.
+ * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
+ * initate destruction and put the css ref from kill_css().
+ */
+static void css_killed_work_fn(struct work_struct *work)
+{
+       struct cgroup_subsys_state *css =
+               container_of(work, struct cgroup_subsys_state, destroy_work);
+
+       mutex_lock(&cgroup_mutex);
+
+       do {
+               offline_css(css);
+               css_put(css);
+               /* @css can't go away while we're holding cgroup_mutex */
+               css = css->parent;
+       } while (css && atomic_dec_and_test(&css->online_cnt));
+
+       mutex_unlock(&cgroup_mutex);
+}
+
+/* css kill confirmation processing requires process context, bounce */
+static void css_killed_ref_fn(struct percpu_ref *ref)
+{
+       struct cgroup_subsys_state *css =
+               container_of(ref, struct cgroup_subsys_state, refcnt);
+
+       if (atomic_dec_and_test(&css->online_cnt)) {
+               INIT_WORK(&css->destroy_work, css_killed_work_fn);
+               queue_work(cgroup_destroy_wq, &css->destroy_work);
+       }
+}
+
+/**
+ * kill_css - destroy a css
+ * @css: css to destroy
+ *
+ * This function initiates destruction of @css by removing cgroup interface
+ * files and putting its base reference.  ->css_offline() will be invoked
+ * asynchronously once css_tryget_online() is guaranteed to fail and when
+ * the reference count reaches zero, @css will be released.
+ */
+static void kill_css(struct cgroup_subsys_state *css)
+{
+       lockdep_assert_held(&cgroup_mutex);
+
+       /*
+        * This must happen before css is disassociated with its cgroup.
+        * See seq_css() for details.
+        */
+       css_clear_dir(css);
+
+       /*
+        * Killing would put the base ref, but we need to keep it alive
+        * until after ->css_offline().
+        */
+       css_get(css);
+
+       /*
+        * cgroup core guarantees that, by the time ->css_offline() is
+        * invoked, no new css reference will be given out via
+        * css_tryget_online().  We can't simply call percpu_ref_kill() and
+        * proceed to offlining css's because percpu_ref_kill() doesn't
+        * guarantee that the ref is seen as killed on all CPUs on return.
+        *
+        * Use percpu_ref_kill_and_confirm() to get notifications as each
+        * css is confirmed to be seen as killed on all CPUs.
+        */
+       percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
+}
+
+/**
+ * cgroup_destroy_locked - the first stage of cgroup destruction
+ * @cgrp: cgroup to be destroyed
+ *
+ * css's make use of percpu refcnts whose killing latency shouldn't be
+ * exposed to userland and are RCU protected.  Also, cgroup core needs to
+ * guarantee that css_tryget_online() won't succeed by the time
+ * ->css_offline() is invoked.  To satisfy all the requirements,
+ * destruction is implemented in the following two steps.
+ *
+ * s1. Verify @cgrp can be destroyed and mark it dying.  Remove all
+ *     userland visible parts and start killing the percpu refcnts of
+ *     css's.  Set up so that the next stage will be kicked off once all
+ *     the percpu refcnts are confirmed to be killed.
+ *
+ * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
+ *     rest of destruction.  Once all cgroup references are gone, the
+ *     cgroup is RCU-freed.
+ *
+ * This function implements s1.  After this step, @cgrp is gone as far as
+ * the userland is concerned and a new cgroup with the same name may be
+ * created.  As cgroup doesn't care about the names internally, this
+ * doesn't cause any problem.
+ */
+static int cgroup_destroy_locked(struct cgroup *cgrp)
+       __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
+{
+       struct cgroup_subsys_state *css;
+       struct cgrp_cset_link *link;
+       int ssid;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       /*
+        * Only migration can raise populated from zero and we're already
+        * holding cgroup_mutex.
+        */
+       if (cgroup_is_populated(cgrp))
+               return -EBUSY;
+
+       /*
+        * Make sure there's no live children.  We can't test emptiness of
+        * ->self.children as dead children linger on it while being
+        * drained; otherwise, "rmdir parent/child parent" may fail.
+        */
+       if (css_has_online_children(&cgrp->self))
+               return -EBUSY;
+
+       /*
+        * Mark @cgrp and the associated csets dead.  The former prevents
+        * further task migration and child creation by disabling
+        * cgroup_lock_live_group().  The latter makes the csets ignored by
+        * the migration path.
+        */
+       cgrp->self.flags &= ~CSS_ONLINE;
+
+       spin_lock_irq(&css_set_lock);
+       list_for_each_entry(link, &cgrp->cset_links, cset_link)
+               link->cset->dead = true;
+       spin_unlock_irq(&css_set_lock);
+
+       /* initiate massacre of all css's */
+       for_each_css(css, ssid, cgrp)
+               kill_css(css);
+
+       /*
+        * Remove @cgrp directory along with the base files.  @cgrp has an
+        * extra ref on its kn.
+        */
+       kernfs_remove(cgrp->kn);
+
+       cgroup1_check_for_release(cgroup_parent(cgrp));
+
+       /* put the base reference */
+       percpu_ref_kill(&cgrp->self.refcnt);
+
+       return 0;
+};
+
+int cgroup_rmdir(struct kernfs_node *kn)
+{
+       struct cgroup *cgrp;
+       int ret = 0;
+
+       cgrp = cgroup_kn_lock_live(kn, false);
+       if (!cgrp)
+               return 0;
+
+       ret = cgroup_destroy_locked(cgrp);
+
+       if (!ret)
+               trace_cgroup_rmdir(cgrp);
+
+       cgroup_kn_unlock(kn);
+       return ret;
+}
+
+static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
+       .remount_fs             = cgroup_remount,
+       .mkdir                  = cgroup_mkdir,
+       .rmdir                  = cgroup_rmdir,
+       .show_path              = cgroup_show_path,
+};
+
+static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
+{
+       struct cgroup_subsys_state *css;
+
+       pr_debug("Initializing cgroup subsys %s\n", ss->name);
+
+       mutex_lock(&cgroup_mutex);
+
+       idr_init(&ss->css_idr);
+       INIT_LIST_HEAD(&ss->cfts);
+
+       /* Create the root cgroup state for this subsystem */
+       ss->root = &cgrp_dfl_root;
+       css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
+       /* We don't handle early failures gracefully */
+       BUG_ON(IS_ERR(css));
+       init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
+
+       /*
+        * Root csses are never destroyed and we can't initialize
+        * percpu_ref during early init.  Disable refcnting.
+        */
+       css->flags |= CSS_NO_REF;
+
+       if (early) {
+               /* allocation can't be done safely during early init */
+               css->id = 1;
+       } else {
+               css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
+               BUG_ON(css->id < 0);
+       }
+
+       /* Update the init_css_set to contain a subsys
+        * pointer to this state - since the subsystem is
+        * newly registered, all tasks and hence the
+        * init_css_set is in the subsystem's root cgroup. */
+       init_css_set.subsys[ss->id] = css;
+
+       have_fork_callback |= (bool)ss->fork << ss->id;
+       have_exit_callback |= (bool)ss->exit << ss->id;
+       have_free_callback |= (bool)ss->free << ss->id;
+       have_canfork_callback |= (bool)ss->can_fork << ss->id;
+
+       /* At system boot, before all subsystems have been
+        * registered, no tasks have been forked, so we don't
+        * need to invoke fork callbacks here. */
+       BUG_ON(!list_empty(&init_task.tasks));
+
+       BUG_ON(online_css(css));
+
+       mutex_unlock(&cgroup_mutex);
+}
+
+/**
+ * cgroup_init_early - cgroup initialization at system boot
+ *
+ * Initialize cgroups at system boot, and initialize any
+ * subsystems that request early init.
+ */
+int __init cgroup_init_early(void)
+{
+       static struct cgroup_sb_opts __initdata opts;
+       struct cgroup_subsys *ss;
+       int i;
+
+       init_cgroup_root(&cgrp_dfl_root, &opts);
+       cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
+
+       RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
+
+       for_each_subsys(ss, i) {
+               WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
+                    "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
+                    i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
+                    ss->id, ss->name);
+               WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
+                    "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
+
+               ss->id = i;
+               ss->name = cgroup_subsys_name[i];
+               if (!ss->legacy_name)
+                       ss->legacy_name = cgroup_subsys_name[i];
+
+               if (ss->early_init)
+                       cgroup_init_subsys(ss, true);
+       }
+       return 0;
+}
+
+static u16 cgroup_disable_mask __initdata;
+
+/**
+ * cgroup_init - cgroup initialization
+ *
+ * Register cgroup filesystem and /proc file, and initialize
+ * any subsystems that didn't request early init.
+ */
+int __init cgroup_init(void)
+{
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
+       BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
+       BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
+       BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
+
+       /*
+        * The latency of the synchronize_sched() is too high for cgroups,
+        * avoid it at the cost of forcing all readers into the slow path.
+        */
+       rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
+
+       get_user_ns(init_cgroup_ns.user_ns);
+
+       mutex_lock(&cgroup_mutex);
+
+       /*
+        * Add init_css_set to the hash table so that dfl_root can link to
+        * it during init.
+        */
+       hash_add(css_set_table, &init_css_set.hlist,
+                css_set_hash(init_css_set.subsys));
+
+       BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
+
+       mutex_unlock(&cgroup_mutex);
+
+       for_each_subsys(ss, ssid) {
+               if (ss->early_init) {
+                       struct cgroup_subsys_state *css =
+                               init_css_set.subsys[ss->id];
+
+                       css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
+                                                  GFP_KERNEL);
+                       BUG_ON(css->id < 0);
+               } else {
+                       cgroup_init_subsys(ss, false);
+               }
+
+               list_add_tail(&init_css_set.e_cset_node[ssid],
+                             &cgrp_dfl_root.cgrp.e_csets[ssid]);
+
+               /*
+                * Setting dfl_root subsys_mask needs to consider the
+                * disabled flag and cftype registration needs kmalloc,
+                * both of which aren't available during early_init.
+                */
+               if (cgroup_disable_mask & (1 << ssid)) {
+                       static_branch_disable(cgroup_subsys_enabled_key[ssid]);
+                       printk(KERN_INFO "Disabling %s control group subsystem\n",
+                              ss->name);
+                       continue;
+               }
+
+               if (cgroup1_ssid_disabled(ssid))
+                       printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
+                              ss->name);
+
+               cgrp_dfl_root.subsys_mask |= 1 << ss->id;
+
+               if (ss->implicit_on_dfl)
+                       cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
+               else if (!ss->dfl_cftypes)
+                       cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
+
+               if (ss->dfl_cftypes == ss->legacy_cftypes) {
+                       WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
+               } else {
+                       WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
+                       WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
+               }
+
+               if (ss->bind)
+                       ss->bind(init_css_set.subsys[ssid]);
+       }
+
+       /* init_css_set.subsys[] has been updated, re-hash */
+       hash_del(&init_css_set.hlist);
+       hash_add(css_set_table, &init_css_set.hlist,
+                css_set_hash(init_css_set.subsys));
+
+       WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
+       WARN_ON(register_filesystem(&cgroup_fs_type));
+       WARN_ON(register_filesystem(&cgroup2_fs_type));
+       WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
+
+       return 0;
+}
+
+static int __init cgroup_wq_init(void)
+{
+       /*
+        * There isn't much point in executing destruction path in
+        * parallel.  Good chunk is serialized with cgroup_mutex anyway.
+        * Use 1 for @max_active.
+        *
+        * We would prefer to do this in cgroup_init() above, but that
+        * is called before init_workqueues(): so leave this until after.
+        */
+       cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
+       BUG_ON(!cgroup_destroy_wq);
+       return 0;
+}
+core_initcall(cgroup_wq_init);
+
+/*
+ * proc_cgroup_show()
+ *  - Print task's cgroup paths into seq_file, one line for each hierarchy
+ *  - Used for /proc/<pid>/cgroup.
+ */
+int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+                    struct pid *pid, struct task_struct *tsk)
+{
+       char *buf;
+       int retval;
+       struct cgroup_root *root;
+
+       retval = -ENOMEM;
+       buf = kmalloc(PATH_MAX, GFP_KERNEL);
+       if (!buf)
+               goto out;
+
+       mutex_lock(&cgroup_mutex);
+       spin_lock_irq(&css_set_lock);
+
+       for_each_root(root) {
+               struct cgroup_subsys *ss;
+               struct cgroup *cgrp;
+               int ssid, count = 0;
+
+               if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
+                       continue;
+
+               seq_printf(m, "%d:", root->hierarchy_id);
+               if (root != &cgrp_dfl_root)
+                       for_each_subsys(ss, ssid)
+                               if (root->subsys_mask & (1 << ssid))
+                                       seq_printf(m, "%s%s", count++ ? "," : "",
+                                                  ss->legacy_name);
+               if (strlen(root->name))
+                       seq_printf(m, "%sname=%s", count ? "," : "",
+                                  root->name);
+               seq_putc(m, ':');
+
+               cgrp = task_cgroup_from_root(tsk, root);
+
+               /*
+                * On traditional hierarchies, all zombie tasks show up as
+                * belonging to the root cgroup.  On the default hierarchy,
+                * while a zombie doesn't show up in "cgroup.procs" and
+                * thus can't be migrated, its /proc/PID/cgroup keeps
+                * reporting the cgroup it belonged to before exiting.  If
+                * the cgroup is removed before the zombie is reaped,
+                * " (deleted)" is appended to the cgroup path.
+                */
+               if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
+                       retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+                                               current->nsproxy->cgroup_ns);
+                       if (retval >= PATH_MAX)
+                               retval = -ENAMETOOLONG;
+                       if (retval < 0)
+                               goto out_unlock;
+
+                       seq_puts(m, buf);
+               } else {
+                       seq_puts(m, "/");
+               }
+
+               if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
+                       seq_puts(m, " (deleted)\n");
+               else
+                       seq_putc(m, '\n');
+       }
+
+       retval = 0;
+out_unlock:
+       spin_unlock_irq(&css_set_lock);
+       mutex_unlock(&cgroup_mutex);
+       kfree(buf);
+out:
+       return retval;
+}
+
+/**
+ * cgroup_fork - initialize cgroup related fields during copy_process()
+ * @child: pointer to task_struct of forking parent process.
+ *
+ * A task is associated with the init_css_set until cgroup_post_fork()
+ * attaches it to the parent's css_set.  Empty cg_list indicates that
+ * @child isn't holding reference to its css_set.
+ */
+void cgroup_fork(struct task_struct *child)
+{
+       RCU_INIT_POINTER(child->cgroups, &init_css_set);
+       INIT_LIST_HEAD(&child->cg_list);
+}
+
+/**
+ * cgroup_can_fork - called on a new task before the process is exposed
+ * @child: the task in question.
+ *
+ * This calls the subsystem can_fork() callbacks. If the can_fork() callback
+ * returns an error, the fork aborts with that error code. This allows for
+ * a cgroup subsystem to conditionally allow or deny new forks.
+ */
+int cgroup_can_fork(struct task_struct *child)
+{
+       struct cgroup_subsys *ss;
+       int i, j, ret;
+
+       do_each_subsys_mask(ss, i, have_canfork_callback) {
+               ret = ss->can_fork(child);
+               if (ret)
+                       goto out_revert;
+       } while_each_subsys_mask();
+
+       return 0;
+
+out_revert:
+       for_each_subsys(ss, j) {
+               if (j >= i)
+                       break;
+               if (ss->cancel_fork)
+                       ss->cancel_fork(child);
+       }
+
+       return ret;
+}
+
+/**
+ * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
+ * @child: the task in question
+ *
+ * This calls the cancel_fork() callbacks if a fork failed *after*
+ * cgroup_can_fork() succeded.
+ */
+void cgroup_cancel_fork(struct task_struct *child)
+{
+       struct cgroup_subsys *ss;
+       int i;
+
+       for_each_subsys(ss, i)
+               if (ss->cancel_fork)
+                       ss->cancel_fork(child);
+}
+
+/**
+ * cgroup_post_fork - called on a new task after adding it to the task list
+ * @child: the task in question
+ *
+ * Adds the task to the list running through its css_set if necessary and
+ * call the subsystem fork() callbacks.  Has to be after the task is
+ * visible on the task list in case we race with the first call to
+ * cgroup_task_iter_start() - to guarantee that the new task ends up on its
+ * list.
+ */
+void cgroup_post_fork(struct task_struct *child)
+{
+       struct cgroup_subsys *ss;
+       int i;
+
+       /*
+        * This may race against cgroup_enable_task_cg_lists().  As that
+        * function sets use_task_css_set_links before grabbing
+        * tasklist_lock and we just went through tasklist_lock to add
+        * @child, it's guaranteed that either we see the set
+        * use_task_css_set_links or cgroup_enable_task_cg_lists() sees
+        * @child during its iteration.
+        *
+        * If we won the race, @child is associated with %current's
+        * css_set.  Grabbing css_set_lock guarantees both that the
+        * association is stable, and, on completion of the parent's
+        * migration, @child is visible in the source of migration or
+        * already in the destination cgroup.  This guarantee is necessary
+        * when implementing operations which need to migrate all tasks of
+        * a cgroup to another.
+        *
+        * Note that if we lose to cgroup_enable_task_cg_lists(), @child
+        * will remain in init_css_set.  This is safe because all tasks are
+        * in the init_css_set before cg_links is enabled and there's no
+        * operation which transfers all tasks out of init_css_set.
+        */
+       if (use_task_css_set_links) {
+               struct css_set *cset;
+
+               spin_lock_irq(&css_set_lock);
+               cset = task_css_set(current);
+               if (list_empty(&child->cg_list)) {
+                       get_css_set(cset);
+                       css_set_move_task(child, NULL, cset, false);
+               }
+               spin_unlock_irq(&css_set_lock);
+       }
+
+       /*
+        * Call ss->fork().  This must happen after @child is linked on
+        * css_set; otherwise, @child might change state between ->fork()
+        * and addition to css_set.
+        */
+       do_each_subsys_mask(ss, i, have_fork_callback) {
+               ss->fork(child);
+       } while_each_subsys_mask();
+}
+
+/**
+ * cgroup_exit - detach cgroup from exiting task
+ * @tsk: pointer to task_struct of exiting process
+ *
+ * Description: Detach cgroup from @tsk and release it.
+ *
+ * Note that cgroups marked notify_on_release force every task in
+ * them to take the global cgroup_mutex mutex when exiting.
+ * This could impact scaling on very large systems.  Be reluctant to
+ * use notify_on_release cgroups where very high task exit scaling
+ * is required on large systems.
+ *
+ * We set the exiting tasks cgroup to the root cgroup (top_cgroup).  We
+ * call cgroup_exit() while the task is still competent to handle
+ * notify_on_release(), then leave the task attached to the root cgroup in
+ * each hierarchy for the remainder of its exit.  No need to bother with
+ * init_css_set refcnting.  init_css_set never goes away and we can't race
+ * with migration path - PF_EXITING is visible to migration path.
+ */
+void cgroup_exit(struct task_struct *tsk)
+{
+       struct cgroup_subsys *ss;
+       struct css_set *cset;
+       int i;
+
+       /*
+        * Unlink from @tsk from its css_set.  As migration path can't race
+        * with us, we can check css_set and cg_list without synchronization.
+        */
+       cset = task_css_set(tsk);
+
+       if (!list_empty(&tsk->cg_list)) {
+               spin_lock_irq(&css_set_lock);
+               css_set_move_task(tsk, cset, NULL, false);
+               spin_unlock_irq(&css_set_lock);
+       } else {
+               get_css_set(cset);
+       }
+
+       /* see cgroup_post_fork() for details */
+       do_each_subsys_mask(ss, i, have_exit_callback) {
+               ss->exit(tsk);
+       } while_each_subsys_mask();
+}
+
+void cgroup_free(struct task_struct *task)
+{
+       struct css_set *cset = task_css_set(task);
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       do_each_subsys_mask(ss, ssid, have_free_callback) {
+               ss->free(task);
+       } while_each_subsys_mask();
+
+       put_css_set(cset);
+}
+
+static int __init cgroup_disable(char *str)
+{
+       struct cgroup_subsys *ss;
+       char *token;
+       int i;
+
+       while ((token = strsep(&str, ",")) != NULL) {
+               if (!*token)
+                       continue;
+
+               for_each_subsys(ss, i) {
+                       if (strcmp(token, ss->name) &&
+                           strcmp(token, ss->legacy_name))
+                               continue;
+                       cgroup_disable_mask |= 1 << i;
+               }
+       }
+       return 1;
+}
+__setup("cgroup_disable=", cgroup_disable);
+
+/**
+ * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
+ * @dentry: directory dentry of interest
+ * @ss: subsystem of interest
+ *
+ * If @dentry is a directory for a cgroup which has @ss enabled on it, try
+ * to get the corresponding css and return it.  If such css doesn't exist
+ * or can't be pinned, an ERR_PTR value is returned.
+ */
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+                                                      struct cgroup_subsys *ss)
+{
+       struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
+       struct file_system_type *s_type = dentry->d_sb->s_type;
+       struct cgroup_subsys_state *css = NULL;
+       struct cgroup *cgrp;
+
+       /* is @dentry a cgroup dir? */
+       if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
+           !kn || kernfs_type(kn) != KERNFS_DIR)
+               return ERR_PTR(-EBADF);
+
+       rcu_read_lock();
+
+       /*
+        * This path doesn't originate from kernfs and @kn could already
+        * have been or be removed at any point.  @kn->priv is RCU
+        * protected for this access.  See css_release_work_fn() for details.
+        */
+       cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
+       if (cgrp)
+               css = cgroup_css(cgrp, ss);
+
+       if (!css || !css_tryget_online(css))
+               css = ERR_PTR(-ENOENT);
+
+       rcu_read_unlock();
+       return css;
+}
+
+/**
+ * css_from_id - lookup css by id
+ * @id: the cgroup id
+ * @ss: cgroup subsys to be looked into
+ *
+ * Returns the css if there's valid one with @id, otherwise returns NULL.
+ * Should be called under rcu_read_lock().
+ */
+struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
+{
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       return idr_find(&ss->css_idr, id);
+}
+
+/**
+ * cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path
+ * @path: path on the default hierarchy
+ *
+ * Find the cgroup at @path on the default hierarchy, increment its
+ * reference count and return it.  Returns pointer to the found cgroup on
+ * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
+ * if @path points to a non-directory.
+ */
+struct cgroup *cgroup_get_from_path(const char *path)
+{
+       struct kernfs_node *kn;
+       struct cgroup *cgrp;
+
+       mutex_lock(&cgroup_mutex);
+
+       kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
+       if (kn) {
+               if (kernfs_type(kn) == KERNFS_DIR) {
+                       cgrp = kn->priv;
+                       cgroup_get(cgrp);
+               } else {
+                       cgrp = ERR_PTR(-ENOTDIR);
+               }
+               kernfs_put(kn);
+       } else {
+               cgrp = ERR_PTR(-ENOENT);
+       }
+
+       mutex_unlock(&cgroup_mutex);
+       return cgrp;
+}
+EXPORT_SYMBOL_GPL(cgroup_get_from_path);
+
+/**
+ * cgroup_get_from_fd - get a cgroup pointer from a fd
+ * @fd: fd obtained by open(cgroup2_dir)
+ *
+ * Find the cgroup from a fd which should be obtained
+ * by opening a cgroup directory.  Returns a pointer to the
+ * cgroup on success. ERR_PTR is returned if the cgroup
+ * cannot be found.
+ */
+struct cgroup *cgroup_get_from_fd(int fd)
+{
+       struct cgroup_subsys_state *css;
+       struct cgroup *cgrp;
+       struct file *f;
+
+       f = fget_raw(fd);
+       if (!f)
+               return ERR_PTR(-EBADF);
+
+       css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
+       fput(f);
+       if (IS_ERR(css))
+               return ERR_CAST(css);
+
+       cgrp = css->cgroup;
+       if (!cgroup_on_dfl(cgrp)) {
+               cgroup_put(cgrp);
+               return ERR_PTR(-EBADF);
+       }
+
+       return cgrp;
+}
+EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
+
+/*
+ * sock->sk_cgrp_data handling.  For more info, see sock_cgroup_data
+ * definition in cgroup-defs.h.
+ */
+#ifdef CONFIG_SOCK_CGROUP_DATA
+
+#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+
+DEFINE_SPINLOCK(cgroup_sk_update_lock);
+static bool cgroup_sk_alloc_disabled __read_mostly;
+
+void cgroup_sk_alloc_disable(void)
+{
+       if (cgroup_sk_alloc_disabled)
+               return;
+       pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
+       cgroup_sk_alloc_disabled = true;
+}
+
+#else
+
+#define cgroup_sk_alloc_disabled       false
+
+#endif
+
+void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
+{
+       if (cgroup_sk_alloc_disabled)
+               return;
+
+       /* Socket clone path */
+       if (skcd->val) {
+               cgroup_get(sock_cgroup_ptr(skcd));
+               return;
+       }
+
+       rcu_read_lock();
+
+       while (true) {
+               struct css_set *cset;
+
+               cset = task_css_set(current);
+               if (likely(cgroup_tryget(cset->dfl_cgrp))) {
+                       skcd->val = (unsigned long)cset->dfl_cgrp;
+                       break;
+               }
+               cpu_relax();
+       }
+
+       rcu_read_unlock();
+}
+
+void cgroup_sk_free(struct sock_cgroup_data *skcd)
+{
+       cgroup_put(sock_cgroup_ptr(skcd));
+}
+
+#endif /* CONFIG_SOCK_CGROUP_DATA */
+
+#ifdef CONFIG_CGROUP_BPF
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+                     enum bpf_attach_type type, bool overridable)
+{
+       struct cgroup *parent = cgroup_parent(cgrp);
+       int ret;
+
+       mutex_lock(&cgroup_mutex);
+       ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
+       mutex_unlock(&cgroup_mutex);
+       return ret;
+}
+#endif /* CONFIG_CGROUP_BPF */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
new file mode 100644 (file)
index 0000000..b308888
--- /dev/null
@@ -0,0 +1,2752 @@
+/*
+ *  kernel/cpuset.c
+ *
+ *  Processor and Memory placement constraints for sets of tasks.
+ *
+ *  Copyright (C) 2003 BULL SA.
+ *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
+ *  Copyright (C) 2006 Google, Inc
+ *
+ *  Portions derived from Patrick Mochel's sysfs code.
+ *  sysfs is Copyright (c) 2001-3 Patrick Mochel
+ *
+ *  2003-10-10 Written by Simon Derr.
+ *  2003-10-22 Updates by Stephen Hemminger.
+ *  2004 May-July Rework by Paul Jackson.
+ *  2006 Rework by Paul Menage to use generic cgroups
+ *  2008 Rework of the scheduler domains and CPU hotplug handling
+ *       by Max Krasnyansky
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpuset.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/mempolicy.h>
+#include <linux/mm.h>
+#include <linux/memory.h>
+#include <linux/export.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/time64.h>
+#include <linux/backing-dev.h>
+#include <linux/sort.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/cgroup.h>
+#include <linux/wait.h>
+
+DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
+
+/* See "Frequency meter" comments, below. */
+
+struct fmeter {
+       int cnt;                /* unprocessed events count */
+       int val;                /* most recent output value */
+       time64_t time;          /* clock (secs) when val computed */
+       spinlock_t lock;        /* guards read or write of above */
+};
+
+struct cpuset {
+       struct cgroup_subsys_state css;
+
+       unsigned long flags;            /* "unsigned long" so bitops work */
+
+       /*
+        * On default hierarchy:
+        *
+        * The user-configured masks can only be changed by writing to
+        * cpuset.cpus and cpuset.mems, and won't be limited by the
+        * parent masks.
+        *
+        * The effective masks is the real masks that apply to the tasks
+        * in the cpuset. They may be changed if the configured masks are
+        * changed or hotplug happens.
+        *
+        * effective_mask == configured_mask & parent's effective_mask,
+        * and if it ends up empty, it will inherit the parent's mask.
+        *
+        *
+        * On legacy hierachy:
+        *
+        * The user-configured masks are always the same with effective masks.
+        */
+
+       /* user-configured CPUs and Memory Nodes allow to tasks */
+       cpumask_var_t cpus_allowed;
+       nodemask_t mems_allowed;
+
+       /* effective CPUs and Memory Nodes allow to tasks */
+       cpumask_var_t effective_cpus;
+       nodemask_t effective_mems;
+
+       /*
+        * This is old Memory Nodes tasks took on.
+        *
+        * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
+        * - A new cpuset's old_mems_allowed is initialized when some
+        *   task is moved into it.
+        * - old_mems_allowed is used in cpuset_migrate_mm() when we change
+        *   cpuset.mems_allowed and have tasks' nodemask updated, and
+        *   then old_mems_allowed is updated to mems_allowed.
+        */
+       nodemask_t old_mems_allowed;
+
+       struct fmeter fmeter;           /* memory_pressure filter */
+
+       /*
+        * Tasks are being attached to this cpuset.  Used to prevent
+        * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
+        */
+       int attach_in_progress;
+
+       /* partition number for rebuild_sched_domains() */
+       int pn;
+
+       /* for custom sched domain */
+       int relax_domain_level;
+};
+
+static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
+{
+       return css ? container_of(css, struct cpuset, css) : NULL;
+}
+
+/* Retrieve the cpuset for a task */
+static inline struct cpuset *task_cs(struct task_struct *task)
+{
+       return css_cs(task_css(task, cpuset_cgrp_id));
+}
+
+static inline struct cpuset *parent_cs(struct cpuset *cs)
+{
+       return css_cs(cs->css.parent);
+}
+
+#ifdef CONFIG_NUMA
+static inline bool task_has_mempolicy(struct task_struct *task)
+{
+       return task->mempolicy;
+}
+#else
+static inline bool task_has_mempolicy(struct task_struct *task)
+{
+       return false;
+}
+#endif
+
+
+/* bits in struct cpuset flags field */
+typedef enum {
+       CS_ONLINE,
+       CS_CPU_EXCLUSIVE,
+       CS_MEM_EXCLUSIVE,
+       CS_MEM_HARDWALL,
+       CS_MEMORY_MIGRATE,
+       CS_SCHED_LOAD_BALANCE,
+       CS_SPREAD_PAGE,
+       CS_SPREAD_SLAB,
+} cpuset_flagbits_t;
+
+/* convenient tests for these bits */
+static inline bool is_cpuset_online(const struct cpuset *cs)
+{
+       return test_bit(CS_ONLINE, &cs->flags);
+}
+
+static inline int is_cpu_exclusive(const struct cpuset *cs)
+{
+       return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
+}
+
+static inline int is_mem_exclusive(const struct cpuset *cs)
+{
+       return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
+}
+
+static inline int is_mem_hardwall(const struct cpuset *cs)
+{
+       return test_bit(CS_MEM_HARDWALL, &cs->flags);
+}
+
+static inline int is_sched_load_balance(const struct cpuset *cs)
+{
+       return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
+}
+
+static inline int is_memory_migrate(const struct cpuset *cs)
+{
+       return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
+}
+
+static inline int is_spread_page(const struct cpuset *cs)
+{
+       return test_bit(CS_SPREAD_PAGE, &cs->flags);
+}
+
+static inline int is_spread_slab(const struct cpuset *cs)
+{
+       return test_bit(CS_SPREAD_SLAB, &cs->flags);
+}
+
+static struct cpuset top_cpuset = {
+       .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
+                 (1 << CS_MEM_EXCLUSIVE)),
+};
+
+/**
+ * cpuset_for_each_child - traverse online children of a cpuset
+ * @child_cs: loop cursor pointing to the current child
+ * @pos_css: used for iteration
+ * @parent_cs: target cpuset to walk children of
+ *
+ * Walk @child_cs through the online children of @parent_cs.  Must be used
+ * with RCU read locked.
+ */
+#define cpuset_for_each_child(child_cs, pos_css, parent_cs)            \
+       css_for_each_child((pos_css), &(parent_cs)->css)                \
+               if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
+
+/**
+ * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
+ * @des_cs: loop cursor pointing to the current descendant
+ * @pos_css: used for iteration
+ * @root_cs: target cpuset to walk ancestor of
+ *
+ * Walk @des_cs through the online descendants of @root_cs.  Must be used
+ * with RCU read locked.  The caller may modify @pos_css by calling
+ * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
+ * iteration and the first node to be visited.
+ */
+#define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)       \
+       css_for_each_descendant_pre((pos_css), &(root_cs)->css)         \
+               if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
+
+/*
+ * There are two global locks guarding cpuset structures - cpuset_mutex and
+ * callback_lock. We also require taking task_lock() when dereferencing a
+ * task's cpuset pointer. See "The task_lock() exception", at the end of this
+ * comment.
+ *
+ * A task must hold both locks to modify cpusets.  If a task holds
+ * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
+ * is the only task able to also acquire callback_lock and be able to
+ * modify cpusets.  It can perform various checks on the cpuset structure
+ * first, knowing nothing will change.  It can also allocate memory while
+ * just holding cpuset_mutex.  While it is performing these checks, various
+ * callback routines can briefly acquire callback_lock to query cpusets.
+ * Once it is ready to make the changes, it takes callback_lock, blocking
+ * everyone else.
+ *
+ * Calls to the kernel memory allocator can not be made while holding
+ * callback_lock, as that would risk double tripping on callback_lock
+ * from one of the callbacks into the cpuset code from within
+ * __alloc_pages().
+ *
+ * If a task is only holding callback_lock, then it has read-only
+ * access to cpusets.
+ *
+ * Now, the task_struct fields mems_allowed and mempolicy may be changed
+ * by other task, we use alloc_lock in the task_struct fields to protect
+ * them.
+ *
+ * The cpuset_common_file_read() handlers only hold callback_lock across
+ * small pieces of code, such as when reading out possibly multi-word
+ * cpumasks and nodemasks.
+ *
+ * Accessing a task's cpuset should be done in accordance with the
+ * guidelines for accessing subsystem state in kernel/cgroup.c
+ */
+
+static DEFINE_MUTEX(cpuset_mutex);
+static DEFINE_SPINLOCK(callback_lock);
+
+static struct workqueue_struct *cpuset_migrate_mm_wq;
+
+/*
+ * CPU / memory hotplug is handled asynchronously.
+ */
+static void cpuset_hotplug_workfn(struct work_struct *work);
+static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
+
+static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
+
+/*
+ * This is ugly, but preserves the userspace API for existing cpuset
+ * users. If someone tries to mount the "cpuset" filesystem, we
+ * silently switch it to mount "cgroup" instead
+ */
+static struct dentry *cpuset_mount(struct file_system_type *fs_type,
+                        int flags, const char *unused_dev_name, void *data)
+{
+       struct file_system_type *cgroup_fs = get_fs_type("cgroup");
+       struct dentry *ret = ERR_PTR(-ENODEV);
+       if (cgroup_fs) {
+               char mountopts[] =
+                       "cpuset,noprefix,"
+                       "release_agent=/sbin/cpuset_release_agent";
+               ret = cgroup_fs->mount(cgroup_fs, flags,
+                                          unused_dev_name, mountopts);
+               put_filesystem(cgroup_fs);
+       }
+       return ret;
+}
+
+static struct file_system_type cpuset_fs_type = {
+       .name = "cpuset",
+       .mount = cpuset_mount,
+};
+
+/*
+ * Return in pmask the portion of a cpusets's cpus_allowed that
+ * are online.  If none are online, walk up the cpuset hierarchy
+ * until we find one that does have some online cpus.
+ *
+ * One way or another, we guarantee to return some non-empty subset
+ * of cpu_online_mask.
+ *
+ * Call with callback_lock or cpuset_mutex held.
+ */
+static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
+{
+       while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
+               cs = parent_cs(cs);
+               if (unlikely(!cs)) {
+                       /*
+                        * The top cpuset doesn't have any online cpu as a
+                        * consequence of a race between cpuset_hotplug_work
+                        * and cpu hotplug notifier.  But we know the top
+                        * cpuset's effective_cpus is on its way to to be
+                        * identical to cpu_online_mask.
+                        */
+                       cpumask_copy(pmask, cpu_online_mask);
+                       return;
+               }
+       }
+       cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
+}
+
+/*
+ * Return in *pmask the portion of a cpusets's mems_allowed that
+ * are online, with memory.  If none are online with memory, walk
+ * up the cpuset hierarchy until we find one that does have some
+ * online mems.  The top cpuset always has some mems online.
+ *
+ * One way or another, we guarantee to return some non-empty subset
+ * of node_states[N_MEMORY].
+ *
+ * Call with callback_lock or cpuset_mutex held.
+ */
+static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
+{
+       while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY]))
+               cs = parent_cs(cs);
+       nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]);
+}
+
+/*
+ * update task's spread flag if cpuset's page/slab spread flag is set
+ *
+ * Call with callback_lock or cpuset_mutex held.
+ */
+static void cpuset_update_task_spread_flag(struct cpuset *cs,
+                                       struct task_struct *tsk)
+{
+       if (is_spread_page(cs))
+               task_set_spread_page(tsk);
+       else
+               task_clear_spread_page(tsk);
+
+       if (is_spread_slab(cs))
+               task_set_spread_slab(tsk);
+       else
+               task_clear_spread_slab(tsk);
+}
+
+/*
+ * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
+ *
+ * One cpuset is a subset of another if all its allowed CPUs and
+ * Memory Nodes are a subset of the other, and its exclusive flags
+ * are only set if the other's are set.  Call holding cpuset_mutex.
+ */
+
+static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
+{
+       return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
+               nodes_subset(p->mems_allowed, q->mems_allowed) &&
+               is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
+               is_mem_exclusive(p) <= is_mem_exclusive(q);
+}
+
+/**
+ * alloc_trial_cpuset - allocate a trial cpuset
+ * @cs: the cpuset that the trial cpuset duplicates
+ */
+static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
+{
+       struct cpuset *trial;
+
+       trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
+       if (!trial)
+               return NULL;
+
+       if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL))
+               goto free_cs;
+       if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL))
+               goto free_cpus;
+
+       cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
+       cpumask_copy(trial->effective_cpus, cs->effective_cpus);
+       return trial;
+
+free_cpus:
+       free_cpumask_var(trial->cpus_allowed);
+free_cs:
+       kfree(trial);
+       return NULL;
+}
+
+/**
+ * free_trial_cpuset - free the trial cpuset
+ * @trial: the trial cpuset to be freed
+ */
+static void free_trial_cpuset(struct cpuset *trial)
+{
+       free_cpumask_var(trial->effective_cpus);
+       free_cpumask_var(trial->cpus_allowed);
+       kfree(trial);
+}
+
+/*
+ * validate_change() - Used to validate that any proposed cpuset change
+ *                    follows the structural rules for cpusets.
+ *
+ * If we replaced the flag and mask values of the current cpuset
+ * (cur) with those values in the trial cpuset (trial), would
+ * our various subset and exclusive rules still be valid?  Presumes
+ * cpuset_mutex held.
+ *
+ * 'cur' is the address of an actual, in-use cpuset.  Operations
+ * such as list traversal that depend on the actual address of the
+ * cpuset in the list must use cur below, not trial.
+ *
+ * 'trial' is the address of bulk structure copy of cur, with
+ * perhaps one or more of the fields cpus_allowed, mems_allowed,
+ * or flags changed to new, trial values.
+ *
+ * Return 0 if valid, -errno if not.
+ */
+
+static int validate_change(struct cpuset *cur, struct cpuset *trial)
+{
+       struct cgroup_subsys_state *css;
+       struct cpuset *c, *par;
+       int ret;
+
+       rcu_read_lock();
+
+       /* Each of our child cpusets must be a subset of us */
+       ret = -EBUSY;
+       cpuset_for_each_child(c, css, cur)
+               if (!is_cpuset_subset(c, trial))
+                       goto out;
+
+       /* Remaining checks don't apply to root cpuset */
+       ret = 0;
+       if (cur == &top_cpuset)
+               goto out;
+
+       par = parent_cs(cur);
+
+       /* On legacy hiearchy, we must be a subset of our parent cpuset. */
+       ret = -EACCES;
+       if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+           !is_cpuset_subset(trial, par))
+               goto out;
+
+       /*
+        * If either I or some sibling (!= me) is exclusive, we can't
+        * overlap
+        */
+       ret = -EINVAL;
+       cpuset_for_each_child(c, css, par) {
+               if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
+                   c != cur &&
+                   cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
+                       goto out;
+               if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
+                   c != cur &&
+                   nodes_intersects(trial->mems_allowed, c->mems_allowed))
+                       goto out;
+       }
+
+       /*
+        * Cpusets with tasks - existing or newly being attached - can't
+        * be changed to have empty cpus_allowed or mems_allowed.
+        */
+       ret = -ENOSPC;
+       if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
+               if (!cpumask_empty(cur->cpus_allowed) &&
+                   cpumask_empty(trial->cpus_allowed))
+                       goto out;
+               if (!nodes_empty(cur->mems_allowed) &&
+                   nodes_empty(trial->mems_allowed))
+                       goto out;
+       }
+
+       /*
+        * We can't shrink if we won't have enough room for SCHED_DEADLINE
+        * tasks.
+        */
+       ret = -EBUSY;
+       if (is_cpu_exclusive(cur) &&
+           !cpuset_cpumask_can_shrink(cur->cpus_allowed,
+                                      trial->cpus_allowed))
+               goto out;
+
+       ret = 0;
+out:
+       rcu_read_unlock();
+       return ret;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Helper routine for generate_sched_domains().
+ * Do cpusets a, b have overlapping effective cpus_allowed masks?
+ */
+static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
+{
+       return cpumask_intersects(a->effective_cpus, b->effective_cpus);
+}
+
+static void
+update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
+{
+       if (dattr->relax_domain_level < c->relax_domain_level)
+               dattr->relax_domain_level = c->relax_domain_level;
+       return;
+}
+
+static void update_domain_attr_tree(struct sched_domain_attr *dattr,
+                                   struct cpuset *root_cs)
+{
+       struct cpuset *cp;
+       struct cgroup_subsys_state *pos_css;
+
+       rcu_read_lock();
+       cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
+               /* skip the whole subtree if @cp doesn't have any CPU */
+               if (cpumask_empty(cp->cpus_allowed)) {
+                       pos_css = css_rightmost_descendant(pos_css);
+                       continue;
+               }
+
+               if (is_sched_load_balance(cp))
+                       update_domain_attr(dattr, cp);
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * generate_sched_domains()
+ *
+ * This function builds a partial partition of the systems CPUs
+ * A 'partial partition' is a set of non-overlapping subsets whose
+ * union is a subset of that set.
+ * The output of this function needs to be passed to kernel/sched/core.c
+ * partition_sched_domains() routine, which will rebuild the scheduler's
+ * load balancing domains (sched domains) as specified by that partial
+ * partition.
+ *
+ * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt
+ * for a background explanation of this.
+ *
+ * Does not return errors, on the theory that the callers of this
+ * routine would rather not worry about failures to rebuild sched
+ * domains when operating in the severe memory shortage situations
+ * that could cause allocation failures below.
+ *
+ * Must be called with cpuset_mutex held.
+ *
+ * The three key local variables below are:
+ *    q  - a linked-list queue of cpuset pointers, used to implement a
+ *        top-down scan of all cpusets.  This scan loads a pointer
+ *        to each cpuset marked is_sched_load_balance into the
+ *        array 'csa'.  For our purposes, rebuilding the schedulers
+ *        sched domains, we can ignore !is_sched_load_balance cpusets.
+ *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
+ *        that need to be load balanced, for convenient iterative
+ *        access by the subsequent code that finds the best partition,
+ *        i.e the set of domains (subsets) of CPUs such that the
+ *        cpus_allowed of every cpuset marked is_sched_load_balance
+ *        is a subset of one of these domains, while there are as
+ *        many such domains as possible, each as small as possible.
+ * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
+ *        the kernel/sched/core.c routine partition_sched_domains() in a
+ *        convenient format, that can be easily compared to the prior
+ *        value to determine what partition elements (sched domains)
+ *        were changed (added or removed.)
+ *
+ * Finding the best partition (set of domains):
+ *     The triple nested loops below over i, j, k scan over the
+ *     load balanced cpusets (using the array of cpuset pointers in
+ *     csa[]) looking for pairs of cpusets that have overlapping
+ *     cpus_allowed, but which don't have the same 'pn' partition
+ *     number and gives them in the same partition number.  It keeps
+ *     looping on the 'restart' label until it can no longer find
+ *     any such pairs.
+ *
+ *     The union of the cpus_allowed masks from the set of
+ *     all cpusets having the same 'pn' value then form the one
+ *     element of the partition (one sched domain) to be passed to
+ *     partition_sched_domains().
+ */
+static int generate_sched_domains(cpumask_var_t **domains,
+                       struct sched_domain_attr **attributes)
+{
+       struct cpuset *cp;      /* scans q */
+       struct cpuset **csa;    /* array of all cpuset ptrs */
+       int csn;                /* how many cpuset ptrs in csa so far */
+       int i, j, k;            /* indices for partition finding loops */
+       cpumask_var_t *doms;    /* resulting partition; i.e. sched domains */
+       cpumask_var_t non_isolated_cpus;  /* load balanced CPUs */
+       struct sched_domain_attr *dattr;  /* attributes for custom domains */
+       int ndoms = 0;          /* number of sched domains in result */
+       int nslot;              /* next empty doms[] struct cpumask slot */
+       struct cgroup_subsys_state *pos_css;
+
+       doms = NULL;
+       dattr = NULL;
+       csa = NULL;
+
+       if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
+               goto done;
+       cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+
+       /* Special case for the 99% of systems with one, full, sched domain */
+       if (is_sched_load_balance(&top_cpuset)) {
+               ndoms = 1;
+               doms = alloc_sched_domains(ndoms);
+               if (!doms)
+                       goto done;
+
+               dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
+               if (dattr) {
+                       *dattr = SD_ATTR_INIT;
+                       update_domain_attr_tree(dattr, &top_cpuset);
+               }
+               cpumask_and(doms[0], top_cpuset.effective_cpus,
+                                    non_isolated_cpus);
+
+               goto done;
+       }
+
+       csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL);
+       if (!csa)
+               goto done;
+       csn = 0;
+
+       rcu_read_lock();
+       cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
+               if (cp == &top_cpuset)
+                       continue;
+               /*
+                * Continue traversing beyond @cp iff @cp has some CPUs and
+                * isn't load balancing.  The former is obvious.  The
+                * latter: All child cpusets contain a subset of the
+                * parent's cpus, so just skip them, and then we call
+                * update_domain_attr_tree() to calc relax_domain_level of
+                * the corresponding sched domain.
+                */
+               if (!cpumask_empty(cp->cpus_allowed) &&
+                   !(is_sched_load_balance(cp) &&
+                     cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
+                       continue;
+
+               if (is_sched_load_balance(cp))
+                       csa[csn++] = cp;
+
+               /* skip @cp's subtree */
+               pos_css = css_rightmost_descendant(pos_css);
+       }
+       rcu_read_unlock();
+
+       for (i = 0; i < csn; i++)
+               csa[i]->pn = i;
+       ndoms = csn;
+
+restart:
+       /* Find the best partition (set of sched domains) */
+       for (i = 0; i < csn; i++) {
+               struct cpuset *a = csa[i];
+               int apn = a->pn;
+
+               for (j = 0; j < csn; j++) {
+                       struct cpuset *b = csa[j];
+                       int bpn = b->pn;
+
+                       if (apn != bpn && cpusets_overlap(a, b)) {
+                               for (k = 0; k < csn; k++) {
+                                       struct cpuset *c = csa[k];
+
+                                       if (c->pn == bpn)
+                                               c->pn = apn;
+                               }
+                               ndoms--;        /* one less element */
+                               goto restart;
+                       }
+               }
+       }
+
+       /*
+        * Now we know how many domains to create.
+        * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
+        */
+       doms = alloc_sched_domains(ndoms);
+       if (!doms)
+               goto done;
+
+       /*
+        * The rest of the code, including the scheduler, can deal with
+        * dattr==NULL case. No need to abort if alloc fails.
+        */
+       dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
+
+       for (nslot = 0, i = 0; i < csn; i++) {
+               struct cpuset *a = csa[i];
+               struct cpumask *dp;
+               int apn = a->pn;
+
+               if (apn < 0) {
+                       /* Skip completed partitions */
+                       continue;
+               }
+
+               dp = doms[nslot];
+
+               if (nslot == ndoms) {
+                       static int warnings = 10;
+                       if (warnings) {
+                               pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n",
+                                       nslot, ndoms, csn, i, apn);
+                               warnings--;
+                       }
+                       continue;
+               }
+
+               cpumask_clear(dp);
+               if (dattr)
+                       *(dattr + nslot) = SD_ATTR_INIT;
+               for (j = i; j < csn; j++) {
+                       struct cpuset *b = csa[j];
+
+                       if (apn == b->pn) {
+                               cpumask_or(dp, dp, b->effective_cpus);
+                               cpumask_and(dp, dp, non_isolated_cpus);
+                               if (dattr)
+                                       update_domain_attr_tree(dattr + nslot, b);
+
+                               /* Done with this partition */
+                               b->pn = -1;
+                       }
+               }
+               nslot++;
+       }
+       BUG_ON(nslot != ndoms);
+
+done:
+       free_cpumask_var(non_isolated_cpus);
+       kfree(csa);
+
+       /*
+        * Fallback to the default domain if kmalloc() failed.
+        * See comments in partition_sched_domains().
+        */
+       if (doms == NULL)
+               ndoms = 1;
+
+       *domains    = doms;
+       *attributes = dattr;
+       return ndoms;
+}
+
+/*
+ * Rebuild scheduler domains.
+ *
+ * If the flag 'sched_load_balance' of any cpuset with non-empty
+ * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
+ * which has that flag enabled, or if any cpuset with a non-empty
+ * 'cpus' is removed, then call this routine to rebuild the
+ * scheduler's dynamic sched domains.
+ *
+ * Call with cpuset_mutex held.  Takes get_online_cpus().
+ */
+static void rebuild_sched_domains_locked(void)
+{
+       struct sched_domain_attr *attr;
+       cpumask_var_t *doms;
+       int ndoms;
+
+       lockdep_assert_held(&cpuset_mutex);
+       get_online_cpus();
+
+       /*
+        * We have raced with CPU hotplug. Don't do anything to avoid
+        * passing doms with offlined cpu to partition_sched_domains().
+        * Anyways, hotplug work item will rebuild sched domains.
+        */
+       if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
+               goto out;
+
+       /* Generate domain masks and attrs */
+       ndoms = generate_sched_domains(&doms, &attr);
+
+       /* Have scheduler rebuild the domains */
+       partition_sched_domains(ndoms, doms, attr);
+out:
+       put_online_cpus();
+}
+#else /* !CONFIG_SMP */
+static void rebuild_sched_domains_locked(void)
+{
+}
+#endif /* CONFIG_SMP */
+
+void rebuild_sched_domains(void)
+{
+       mutex_lock(&cpuset_mutex);
+       rebuild_sched_domains_locked();
+       mutex_unlock(&cpuset_mutex);
+}
+
+/**
+ * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
+ *
+ * Iterate through each task of @cs updating its cpus_allowed to the
+ * effective cpuset's.  As this function is called with cpuset_mutex held,
+ * cpuset membership stays stable.
+ */
+static void update_tasks_cpumask(struct cpuset *cs)
+{
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       css_task_iter_start(&cs->css, &it);
+       while ((task = css_task_iter_next(&it)))
+               set_cpus_allowed_ptr(task, cs->effective_cpus);
+       css_task_iter_end(&it);
+}
+
+/*
+ * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
+ * @cs: the cpuset to consider
+ * @new_cpus: temp variable for calculating new effective_cpus
+ *
+ * When congifured cpumask is changed, the effective cpumasks of this cpuset
+ * and all its descendants need to be updated.
+ *
+ * On legacy hierachy, effective_cpus will be the same with cpu_allowed.
+ *
+ * Called with cpuset_mutex held
+ */
+static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
+{
+       struct cpuset *cp;
+       struct cgroup_subsys_state *pos_css;
+       bool need_rebuild_sched_domains = false;
+
+       rcu_read_lock();
+       cpuset_for_each_descendant_pre(cp, pos_css, cs) {
+               struct cpuset *parent = parent_cs(cp);
+
+               cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus);
+
+               /*
+                * If it becomes empty, inherit the effective mask of the
+                * parent, which is guaranteed to have some CPUs.
+                */
+               if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+                   cpumask_empty(new_cpus))
+                       cpumask_copy(new_cpus, parent->effective_cpus);
+
+               /* Skip the whole subtree if the cpumask remains the same. */
+               if (cpumask_equal(new_cpus, cp->effective_cpus)) {
+                       pos_css = css_rightmost_descendant(pos_css);
+                       continue;
+               }
+
+               if (!css_tryget_online(&cp->css))
+                       continue;
+               rcu_read_unlock();
+
+               spin_lock_irq(&callback_lock);
+               cpumask_copy(cp->effective_cpus, new_cpus);
+               spin_unlock_irq(&callback_lock);
+
+               WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+                       !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
+
+               update_tasks_cpumask(cp);
+
+               /*
+                * If the effective cpumask of any non-empty cpuset is changed,
+                * we need to rebuild sched domains.
+                */
+               if (!cpumask_empty(cp->cpus_allowed) &&
+                   is_sched_load_balance(cp))
+                       need_rebuild_sched_domains = true;
+
+               rcu_read_lock();
+               css_put(&cp->css);
+       }
+       rcu_read_unlock();
+
+       if (need_rebuild_sched_domains)
+               rebuild_sched_domains_locked();
+}
+
+/**
+ * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
+ * @cs: the cpuset to consider
+ * @trialcs: trial cpuset
+ * @buf: buffer of cpu numbers written to this cpuset
+ */
+static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+                         const char *buf)
+{
+       int retval;
+
+       /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
+       if (cs == &top_cpuset)
+               return -EACCES;
+
+       /*
+        * An empty cpus_allowed is ok only if the cpuset has no tasks.
+        * Since cpulist_parse() fails on an empty mask, we special case
+        * that parsing.  The validate_change() call ensures that cpusets
+        * with tasks have cpus.
+        */
+       if (!*buf) {
+               cpumask_clear(trialcs->cpus_allowed);
+       } else {
+               retval = cpulist_parse(buf, trialcs->cpus_allowed);
+               if (retval < 0)
+                       return retval;
+
+               if (!cpumask_subset(trialcs->cpus_allowed,
+                                   top_cpuset.cpus_allowed))
+                       return -EINVAL;
+       }
+
+       /* Nothing to do if the cpus didn't change */
+       if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
+               return 0;
+
+       retval = validate_change(cs, trialcs);
+       if (retval < 0)
+               return retval;
+
+       spin_lock_irq(&callback_lock);
+       cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+       spin_unlock_irq(&callback_lock);
+
+       /* use trialcs->cpus_allowed as a temp variable */
+       update_cpumasks_hier(cs, trialcs->cpus_allowed);
+       return 0;
+}
+
+/*
+ * Migrate memory region from one set of nodes to another.  This is
+ * performed asynchronously as it can be called from process migration path
+ * holding locks involved in process management.  All mm migrations are
+ * performed in the queued order and can be waited for by flushing
+ * cpuset_migrate_mm_wq.
+ */
+
+struct cpuset_migrate_mm_work {
+       struct work_struct      work;
+       struct mm_struct        *mm;
+       nodemask_t              from;
+       nodemask_t              to;
+};
+
+static void cpuset_migrate_mm_workfn(struct work_struct *work)
+{
+       struct cpuset_migrate_mm_work *mwork =
+               container_of(work, struct cpuset_migrate_mm_work, work);
+
+       /* on a wq worker, no need to worry about %current's mems_allowed */
+       do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
+       mmput(mwork->mm);
+       kfree(mwork);
+}
+
+static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
+                                                       const nodemask_t *to)
+{
+       struct cpuset_migrate_mm_work *mwork;
+
+       mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
+       if (mwork) {
+               mwork->mm = mm;
+               mwork->from = *from;
+               mwork->to = *to;
+               INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
+               queue_work(cpuset_migrate_mm_wq, &mwork->work);
+       } else {
+               mmput(mm);
+       }
+}
+
+static void cpuset_post_attach(void)
+{
+       flush_workqueue(cpuset_migrate_mm_wq);
+}
+
+/*
+ * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
+ * @tsk: the task to change
+ * @newmems: new nodes that the task will be set
+ *
+ * In order to avoid seeing no nodes if the old and new nodes are disjoint,
+ * we structure updates as setting all new allowed nodes, then clearing newly
+ * disallowed ones.
+ */
+static void cpuset_change_task_nodemask(struct task_struct *tsk,
+                                       nodemask_t *newmems)
+{
+       bool need_loop;
+
+       task_lock(tsk);
+       /*
+        * Determine if a loop is necessary if another thread is doing
+        * read_mems_allowed_begin().  If at least one node remains unchanged and
+        * tsk does not have a mempolicy, then an empty nodemask will not be
+        * possible when mems_allowed is larger than a word.
+        */
+       need_loop = task_has_mempolicy(tsk) ||
+                       !nodes_intersects(*newmems, tsk->mems_allowed);
+
+       if (need_loop) {
+               local_irq_disable();
+               write_seqcount_begin(&tsk->mems_allowed_seq);
+       }
+
+       nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
+       mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
+
+       mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
+       tsk->mems_allowed = *newmems;
+
+       if (need_loop) {
+               write_seqcount_end(&tsk->mems_allowed_seq);
+               local_irq_enable();
+       }
+
+       task_unlock(tsk);
+}
+
+static void *cpuset_being_rebound;
+
+/**
+ * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
+ *
+ * Iterate through each task of @cs updating its mems_allowed to the
+ * effective cpuset's.  As this function is called with cpuset_mutex held,
+ * cpuset membership stays stable.
+ */
+static void update_tasks_nodemask(struct cpuset *cs)
+{
+       static nodemask_t newmems;      /* protected by cpuset_mutex */
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       cpuset_being_rebound = cs;              /* causes mpol_dup() rebind */
+
+       guarantee_online_mems(cs, &newmems);
+
+       /*
+        * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
+        * take while holding tasklist_lock.  Forks can happen - the
+        * mpol_dup() cpuset_being_rebound check will catch such forks,
+        * and rebind their vma mempolicies too.  Because we still hold
+        * the global cpuset_mutex, we know that no other rebind effort
+        * will be contending for the global variable cpuset_being_rebound.
+        * It's ok if we rebind the same mm twice; mpol_rebind_mm()
+        * is idempotent.  Also migrate pages in each mm to new nodes.
+        */
+       css_task_iter_start(&cs->css, &it);
+       while ((task = css_task_iter_next(&it))) {
+               struct mm_struct *mm;
+               bool migrate;
+
+               cpuset_change_task_nodemask(task, &newmems);
+
+               mm = get_task_mm(task);
+               if (!mm)
+                       continue;
+
+               migrate = is_memory_migrate(cs);
+
+               mpol_rebind_mm(mm, &cs->mems_allowed);
+               if (migrate)
+                       cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
+               else
+                       mmput(mm);
+       }
+       css_task_iter_end(&it);
+
+       /*
+        * All the tasks' nodemasks have been updated, update
+        * cs->old_mems_allowed.
+        */
+       cs->old_mems_allowed = newmems;
+
+       /* We're done rebinding vmas to this cpuset's new mems_allowed. */
+       cpuset_being_rebound = NULL;
+}
+
+/*
+ * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
+ * @cs: the cpuset to consider
+ * @new_mems: a temp variable for calculating new effective_mems
+ *
+ * When configured nodemask is changed, the effective nodemasks of this cpuset
+ * and all its descendants need to be updated.
+ *
+ * On legacy hiearchy, effective_mems will be the same with mems_allowed.
+ *
+ * Called with cpuset_mutex held
+ */
+static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
+{
+       struct cpuset *cp;
+       struct cgroup_subsys_state *pos_css;
+
+       rcu_read_lock();
+       cpuset_for_each_descendant_pre(cp, pos_css, cs) {
+               struct cpuset *parent = parent_cs(cp);
+
+               nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems);
+
+               /*
+                * If it becomes empty, inherit the effective mask of the
+                * parent, which is guaranteed to have some MEMs.
+                */
+               if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+                   nodes_empty(*new_mems))
+                       *new_mems = parent->effective_mems;
+
+               /* Skip the whole subtree if the nodemask remains the same. */
+               if (nodes_equal(*new_mems, cp->effective_mems)) {
+                       pos_css = css_rightmost_descendant(pos_css);
+                       continue;
+               }
+
+               if (!css_tryget_online(&cp->css))
+                       continue;
+               rcu_read_unlock();
+
+               spin_lock_irq(&callback_lock);
+               cp->effective_mems = *new_mems;
+               spin_unlock_irq(&callback_lock);
+
+               WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+                       !nodes_equal(cp->mems_allowed, cp->effective_mems));
+
+               update_tasks_nodemask(cp);
+
+               rcu_read_lock();
+               css_put(&cp->css);
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * Handle user request to change the 'mems' memory placement
+ * of a cpuset.  Needs to validate the request, update the
+ * cpusets mems_allowed, and for each task in the cpuset,
+ * update mems_allowed and rebind task's mempolicy and any vma
+ * mempolicies and if the cpuset is marked 'memory_migrate',
+ * migrate the tasks pages to the new memory.
+ *
+ * Call with cpuset_mutex held. May take callback_lock during call.
+ * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
+ * lock each such tasks mm->mmap_sem, scan its vma's and rebind
+ * their mempolicies to the cpusets new mems_allowed.
+ */
+static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
+                          const char *buf)
+{
+       int retval;
+
+       /*
+        * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
+        * it's read-only
+        */
+       if (cs == &top_cpuset) {
+               retval = -EACCES;
+               goto done;
+       }
+
+       /*
+        * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+        * Since nodelist_parse() fails on an empty mask, we special case
+        * that parsing.  The validate_change() call ensures that cpusets
+        * with tasks have memory.
+        */
+       if (!*buf) {
+               nodes_clear(trialcs->mems_allowed);
+       } else {
+               retval = nodelist_parse(buf, trialcs->mems_allowed);
+               if (retval < 0)
+                       goto done;
+
+               if (!nodes_subset(trialcs->mems_allowed,
+                                 top_cpuset.mems_allowed)) {
+                       retval = -EINVAL;
+                       goto done;
+               }
+       }
+
+       if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
+               retval = 0;             /* Too easy - nothing to do */
+               goto done;
+       }
+       retval = validate_change(cs, trialcs);
+       if (retval < 0)
+               goto done;
+
+       spin_lock_irq(&callback_lock);
+       cs->mems_allowed = trialcs->mems_allowed;
+       spin_unlock_irq(&callback_lock);
+
+       /* use trialcs->mems_allowed as a temp variable */
+       update_nodemasks_hier(cs, &trialcs->mems_allowed);
+done:
+       return retval;
+}
+
+int current_cpuset_is_being_rebound(void)
+{
+       int ret;
+
+       rcu_read_lock();
+       ret = task_cs(current) == cpuset_being_rebound;
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static int update_relax_domain_level(struct cpuset *cs, s64 val)
+{
+#ifdef CONFIG_SMP
+       if (val < -1 || val >= sched_domain_level_max)
+               return -EINVAL;
+#endif
+
+       if (val != cs->relax_domain_level) {
+               cs->relax_domain_level = val;
+               if (!cpumask_empty(cs->cpus_allowed) &&
+                   is_sched_load_balance(cs))
+                       rebuild_sched_domains_locked();
+       }
+
+       return 0;
+}
+
+/**
+ * update_tasks_flags - update the spread flags of tasks in the cpuset.
+ * @cs: the cpuset in which each task's spread flags needs to be changed
+ *
+ * Iterate through each task of @cs updating its spread flags.  As this
+ * function is called with cpuset_mutex held, cpuset membership stays
+ * stable.
+ */
+static void update_tasks_flags(struct cpuset *cs)
+{
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       css_task_iter_start(&cs->css, &it);
+       while ((task = css_task_iter_next(&it)))
+               cpuset_update_task_spread_flag(cs, task);
+       css_task_iter_end(&it);
+}
+
+/*
+ * update_flag - read a 0 or a 1 in a file and update associated flag
+ * bit:                the bit to update (see cpuset_flagbits_t)
+ * cs:         the cpuset to update
+ * turning_on:         whether the flag is being set or cleared
+ *
+ * Call with cpuset_mutex held.
+ */
+
+static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
+                      int turning_on)
+{
+       struct cpuset *trialcs;
+       int balance_flag_changed;
+       int spread_flag_changed;
+       int err;
+
+       trialcs = alloc_trial_cpuset(cs);
+       if (!trialcs)
+               return -ENOMEM;
+
+       if (turning_on)
+               set_bit(bit, &trialcs->flags);
+       else
+               clear_bit(bit, &trialcs->flags);
+
+       err = validate_change(cs, trialcs);
+       if (err < 0)
+               goto out;
+
+       balance_flag_changed = (is_sched_load_balance(cs) !=
+                               is_sched_load_balance(trialcs));
+
+       spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
+                       || (is_spread_page(cs) != is_spread_page(trialcs)));
+
+       spin_lock_irq(&callback_lock);
+       cs->flags = trialcs->flags;
+       spin_unlock_irq(&callback_lock);
+
+       if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
+               rebuild_sched_domains_locked();
+
+       if (spread_flag_changed)
+               update_tasks_flags(cs);
+out:
+       free_trial_cpuset(trialcs);
+       return err;
+}
+
+/*
+ * Frequency meter - How fast is some event occurring?
+ *
+ * These routines manage a digitally filtered, constant time based,
+ * event frequency meter.  There are four routines:
+ *   fmeter_init() - initialize a frequency meter.
+ *   fmeter_markevent() - called each time the event happens.
+ *   fmeter_getrate() - returns the recent rate of such events.
+ *   fmeter_update() - internal routine used to update fmeter.
+ *
+ * A common data structure is passed to each of these routines,
+ * which is used to keep track of the state required to manage the
+ * frequency meter and its digital filter.
+ *
+ * The filter works on the number of events marked per unit time.
+ * The filter is single-pole low-pass recursive (IIR).  The time unit
+ * is 1 second.  Arithmetic is done using 32-bit integers scaled to
+ * simulate 3 decimal digits of precision (multiplied by 1000).
+ *
+ * With an FM_COEF of 933, and a time base of 1 second, the filter
+ * has a half-life of 10 seconds, meaning that if the events quit
+ * happening, then the rate returned from the fmeter_getrate()
+ * will be cut in half each 10 seconds, until it converges to zero.
+ *
+ * It is not worth doing a real infinitely recursive filter.  If more
+ * than FM_MAXTICKS ticks have elapsed since the last filter event,
+ * just compute FM_MAXTICKS ticks worth, by which point the level
+ * will be stable.
+ *
+ * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
+ * arithmetic overflow in the fmeter_update() routine.
+ *
+ * Given the simple 32 bit integer arithmetic used, this meter works
+ * best for reporting rates between one per millisecond (msec) and
+ * one per 32 (approx) seconds.  At constant rates faster than one
+ * per msec it maxes out at values just under 1,000,000.  At constant
+ * rates between one per msec, and one per second it will stabilize
+ * to a value N*1000, where N is the rate of events per second.
+ * At constant rates between one per second and one per 32 seconds,
+ * it will be choppy, moving up on the seconds that have an event,
+ * and then decaying until the next event.  At rates slower than
+ * about one in 32 seconds, it decays all the way back to zero between
+ * each event.
+ */
+
+#define FM_COEF 933            /* coefficient for half-life of 10 secs */
+#define FM_MAXTICKS ((u32)99)   /* useless computing more ticks than this */
+#define FM_MAXCNT 1000000      /* limit cnt to avoid overflow */
+#define FM_SCALE 1000          /* faux fixed point scale */
+
+/* Initialize a frequency meter */
+static void fmeter_init(struct fmeter *fmp)
+{
+       fmp->cnt = 0;
+       fmp->val = 0;
+       fmp->time = 0;
+       spin_lock_init(&fmp->lock);
+}
+
+/* Internal meter update - process cnt events and update value */
+static void fmeter_update(struct fmeter *fmp)
+{
+       time64_t now;
+       u32 ticks;
+
+       now = ktime_get_seconds();
+       ticks = now - fmp->time;
+
+       if (ticks == 0)
+               return;
+
+       ticks = min(FM_MAXTICKS, ticks);
+       while (ticks-- > 0)
+               fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
+       fmp->time = now;
+
+       fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
+       fmp->cnt = 0;
+}
+
+/* Process any previous ticks, then bump cnt by one (times scale). */
+static void fmeter_markevent(struct fmeter *fmp)
+{
+       spin_lock(&fmp->lock);
+       fmeter_update(fmp);
+       fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
+       spin_unlock(&fmp->lock);
+}
+
+/* Process any previous ticks, then return current value. */
+static int fmeter_getrate(struct fmeter *fmp)
+{
+       int val;
+
+       spin_lock(&fmp->lock);
+       fmeter_update(fmp);
+       val = fmp->val;
+       spin_unlock(&fmp->lock);
+       return val;
+}
+
+static struct cpuset *cpuset_attach_old_cs;
+
+/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
+static int cpuset_can_attach(struct cgroup_taskset *tset)
+{
+       struct cgroup_subsys_state *css;
+       struct cpuset *cs;
+       struct task_struct *task;
+       int ret;
+
+       /* used later by cpuset_attach() */
+       cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
+       cs = css_cs(css);
+
+       mutex_lock(&cpuset_mutex);
+
+       /* allow moving tasks into an empty cpuset if on default hierarchy */
+       ret = -ENOSPC;
+       if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+           (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
+               goto out_unlock;
+
+       cgroup_taskset_for_each(task, css, tset) {
+               ret = task_can_attach(task, cs->cpus_allowed);
+               if (ret)
+                       goto out_unlock;
+               ret = security_task_setscheduler(task);
+               if (ret)
+                       goto out_unlock;
+       }
+
+       /*
+        * Mark attach is in progress.  This makes validate_change() fail
+        * changes which zero cpus/mems_allowed.
+        */
+       cs->attach_in_progress++;
+       ret = 0;
+out_unlock:
+       mutex_unlock(&cpuset_mutex);
+       return ret;
+}
+
+static void cpuset_cancel_attach(struct cgroup_taskset *tset)
+{
+       struct cgroup_subsys_state *css;
+       struct cpuset *cs;
+
+       cgroup_taskset_first(tset, &css);
+       cs = css_cs(css);
+
+       mutex_lock(&cpuset_mutex);
+       css_cs(css)->attach_in_progress--;
+       mutex_unlock(&cpuset_mutex);
+}
+
+/*
+ * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
+ * but we can't allocate it dynamically there.  Define it global and
+ * allocate from cpuset_init().
+ */
+static cpumask_var_t cpus_attach;
+
+static void cpuset_attach(struct cgroup_taskset *tset)
+{
+       /* static buf protected by cpuset_mutex */
+       static nodemask_t cpuset_attach_nodemask_to;
+       struct task_struct *task;
+       struct task_struct *leader;
+       struct cgroup_subsys_state *css;
+       struct cpuset *cs;
+       struct cpuset *oldcs = cpuset_attach_old_cs;
+
+       cgroup_taskset_first(tset, &css);
+       cs = css_cs(css);
+
+       mutex_lock(&cpuset_mutex);
+
+       /* prepare for attach */
+       if (cs == &top_cpuset)
+               cpumask_copy(cpus_attach, cpu_possible_mask);
+       else
+               guarantee_online_cpus(cs, cpus_attach);
+
+       guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+
+       cgroup_taskset_for_each(task, css, tset) {
+               /*
+                * can_attach beforehand should guarantee that this doesn't
+                * fail.  TODO: have a better way to handle failure here
+                */
+               WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+
+               cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+               cpuset_update_task_spread_flag(cs, task);
+       }
+
+       /*
+        * Change mm for all threadgroup leaders. This is expensive and may
+        * sleep and should be moved outside migration path proper.
+        */
+       cpuset_attach_nodemask_to = cs->effective_mems;
+       cgroup_taskset_for_each_leader(leader, css, tset) {
+               struct mm_struct *mm = get_task_mm(leader);
+
+               if (mm) {
+                       mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
+
+                       /*
+                        * old_mems_allowed is the same with mems_allowed
+                        * here, except if this task is being moved
+                        * automatically due to hotplug.  In that case
+                        * @mems_allowed has been updated and is empty, so
+                        * @old_mems_allowed is the right nodesets that we
+                        * migrate mm from.
+                        */
+                       if (is_memory_migrate(cs))
+                               cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
+                                                 &cpuset_attach_nodemask_to);
+                       else
+                               mmput(mm);
+               }
+       }
+
+       cs->old_mems_allowed = cpuset_attach_nodemask_to;
+
+       cs->attach_in_progress--;
+       if (!cs->attach_in_progress)
+               wake_up(&cpuset_attach_wq);
+
+       mutex_unlock(&cpuset_mutex);
+}
+
+/* The various types of files and directories in a cpuset file system */
+
+typedef enum {
+       FILE_MEMORY_MIGRATE,
+       FILE_CPULIST,
+       FILE_MEMLIST,
+       FILE_EFFECTIVE_CPULIST,
+       FILE_EFFECTIVE_MEMLIST,
+       FILE_CPU_EXCLUSIVE,
+       FILE_MEM_EXCLUSIVE,
+       FILE_MEM_HARDWALL,
+       FILE_SCHED_LOAD_BALANCE,
+       FILE_SCHED_RELAX_DOMAIN_LEVEL,
+       FILE_MEMORY_PRESSURE_ENABLED,
+       FILE_MEMORY_PRESSURE,
+       FILE_SPREAD_PAGE,
+       FILE_SPREAD_SLAB,
+} cpuset_filetype_t;
+
+static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
+                           u64 val)
+{
+       struct cpuset *cs = css_cs(css);
+       cpuset_filetype_t type = cft->private;
+       int retval = 0;
+
+       mutex_lock(&cpuset_mutex);
+       if (!is_cpuset_online(cs)) {
+               retval = -ENODEV;
+               goto out_unlock;
+       }
+
+       switch (type) {
+       case FILE_CPU_EXCLUSIVE:
+               retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
+               break;
+       case FILE_MEM_EXCLUSIVE:
+               retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
+               break;
+       case FILE_MEM_HARDWALL:
+               retval = update_flag(CS_MEM_HARDWALL, cs, val);
+               break;
+       case FILE_SCHED_LOAD_BALANCE:
+               retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
+               break;
+       case FILE_MEMORY_MIGRATE:
+               retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
+               break;
+       case FILE_MEMORY_PRESSURE_ENABLED:
+               cpuset_memory_pressure_enabled = !!val;
+               break;
+       case FILE_SPREAD_PAGE:
+               retval = update_flag(CS_SPREAD_PAGE, cs, val);
+               break;
+       case FILE_SPREAD_SLAB:
+               retval = update_flag(CS_SPREAD_SLAB, cs, val);
+               break;
+       default:
+               retval = -EINVAL;
+               break;
+       }
+out_unlock:
+       mutex_unlock(&cpuset_mutex);
+       return retval;
+}
+
+static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
+                           s64 val)
+{
+       struct cpuset *cs = css_cs(css);
+       cpuset_filetype_t type = cft->private;
+       int retval = -ENODEV;
+
+       mutex_lock(&cpuset_mutex);
+       if (!is_cpuset_online(cs))
+               goto out_unlock;
+
+       switch (type) {
+       case FILE_SCHED_RELAX_DOMAIN_LEVEL:
+               retval = update_relax_domain_level(cs, val);
+               break;
+       default:
+               retval = -EINVAL;
+               break;
+       }
+out_unlock:
+       mutex_unlock(&cpuset_mutex);
+       return retval;
+}
+
+/*
+ * Common handling for a write to a "cpus" or "mems" file.
+ */
+static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
+                                   char *buf, size_t nbytes, loff_t off)
+{
+       struct cpuset *cs = css_cs(of_css(of));
+       struct cpuset *trialcs;
+       int retval = -ENODEV;
+
+       buf = strstrip(buf);
+
+       /*
+        * CPU or memory hotunplug may leave @cs w/o any execution
+        * resources, in which case the hotplug code asynchronously updates
+        * configuration and transfers all tasks to the nearest ancestor
+        * which can execute.
+        *
+        * As writes to "cpus" or "mems" may restore @cs's execution
+        * resources, wait for the previously scheduled operations before
+        * proceeding, so that we don't end up keep removing tasks added
+        * after execution capability is restored.
+        *
+        * cpuset_hotplug_work calls back into cgroup core via
+        * cgroup_transfer_tasks() and waiting for it from a cgroupfs
+        * operation like this one can lead to a deadlock through kernfs
+        * active_ref protection.  Let's break the protection.  Losing the
+        * protection is okay as we check whether @cs is online after
+        * grabbing cpuset_mutex anyway.  This only happens on the legacy
+        * hierarchies.
+        */
+       css_get(&cs->css);
+       kernfs_break_active_protection(of->kn);
+       flush_work(&cpuset_hotplug_work);
+
+       mutex_lock(&cpuset_mutex);
+       if (!is_cpuset_online(cs))
+               goto out_unlock;
+
+       trialcs = alloc_trial_cpuset(cs);
+       if (!trialcs) {
+               retval = -ENOMEM;
+               goto out_unlock;
+       }
+
+       switch (of_cft(of)->private) {
+       case FILE_CPULIST:
+               retval = update_cpumask(cs, trialcs, buf);
+               break;
+       case FILE_MEMLIST:
+               retval = update_nodemask(cs, trialcs, buf);
+               break;
+       default:
+               retval = -EINVAL;
+               break;
+       }
+
+       free_trial_cpuset(trialcs);
+out_unlock:
+       mutex_unlock(&cpuset_mutex);
+       kernfs_unbreak_active_protection(of->kn);
+       css_put(&cs->css);
+       flush_workqueue(cpuset_migrate_mm_wq);
+       return retval ?: nbytes;
+}
+
+/*
+ * These ascii lists should be read in a single call, by using a user
+ * buffer large enough to hold the entire map.  If read in smaller
+ * chunks, there is no guarantee of atomicity.  Since the display format
+ * used, list of ranges of sequential numbers, is variable length,
+ * and since these maps can change value dynamically, one could read
+ * gibberish by doing partial reads while a list was changing.
+ */
+static int cpuset_common_seq_show(struct seq_file *sf, void *v)
+{
+       struct cpuset *cs = css_cs(seq_css(sf));
+       cpuset_filetype_t type = seq_cft(sf)->private;
+       int ret = 0;
+
+       spin_lock_irq(&callback_lock);
+
+       switch (type) {
+       case FILE_CPULIST:
+               seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
+               break;
+       case FILE_MEMLIST:
+               seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
+               break;
+       case FILE_EFFECTIVE_CPULIST:
+               seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus));
+               break;
+       case FILE_EFFECTIVE_MEMLIST:
+               seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       spin_unlock_irq(&callback_lock);
+       return ret;
+}
+
+static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct cpuset *cs = css_cs(css);
+       cpuset_filetype_t type = cft->private;
+       switch (type) {
+       case FILE_CPU_EXCLUSIVE:
+               return is_cpu_exclusive(cs);
+       case FILE_MEM_EXCLUSIVE:
+               return is_mem_exclusive(cs);
+       case FILE_MEM_HARDWALL:
+               return is_mem_hardwall(cs);
+       case FILE_SCHED_LOAD_BALANCE:
+               return is_sched_load_balance(cs);
+       case FILE_MEMORY_MIGRATE:
+               return is_memory_migrate(cs);
+       case FILE_MEMORY_PRESSURE_ENABLED:
+               return cpuset_memory_pressure_enabled;
+       case FILE_MEMORY_PRESSURE:
+               return fmeter_getrate(&cs->fmeter);
+       case FILE_SPREAD_PAGE:
+               return is_spread_page(cs);
+       case FILE_SPREAD_SLAB:
+               return is_spread_slab(cs);
+       default:
+               BUG();
+       }
+
+       /* Unreachable but makes gcc happy */
+       return 0;
+}
+
+static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct cpuset *cs = css_cs(css);
+       cpuset_filetype_t type = cft->private;
+       switch (type) {
+       case FILE_SCHED_RELAX_DOMAIN_LEVEL:
+               return cs->relax_domain_level;
+       default:
+               BUG();
+       }
+
+       /* Unrechable but makes gcc happy */
+       return 0;
+}
+
+
+/*
+ * for the common functions, 'private' gives the type of file
+ */
+
+static struct cftype files[] = {
+       {
+               .name = "cpus",
+               .seq_show = cpuset_common_seq_show,
+               .write = cpuset_write_resmask,
+               .max_write_len = (100U + 6 * NR_CPUS),
+               .private = FILE_CPULIST,
+       },
+
+       {
+               .name = "mems",
+               .seq_show = cpuset_common_seq_show,
+               .write = cpuset_write_resmask,
+               .max_write_len = (100U + 6 * MAX_NUMNODES),
+               .private = FILE_MEMLIST,
+       },
+
+       {
+               .name = "effective_cpus",
+               .seq_show = cpuset_common_seq_show,
+               .private = FILE_EFFECTIVE_CPULIST,
+       },
+
+       {
+               .name = "effective_mems",
+               .seq_show = cpuset_common_seq_show,
+               .private = FILE_EFFECTIVE_MEMLIST,
+       },
+
+       {
+               .name = "cpu_exclusive",
+               .read_u64 = cpuset_read_u64,
+               .write_u64 = cpuset_write_u64,
+               .private = FILE_CPU_EXCLUSIVE,
+       },
+
+       {
+               .name = "mem_exclusive",
+               .read_u64 = cpuset_read_u64,
+               .write_u64 = cpuset_write_u64,
+               .private = FILE_MEM_EXCLUSIVE,
+       },
+
+       {
+               .name = "mem_hardwall",
+               .read_u64 = cpuset_read_u64,
+               .write_u64 = cpuset_write_u64,
+               .private = FILE_MEM_HARDWALL,
+       },
+
+       {
+               .name = "sched_load_balance",
+               .read_u64 = cpuset_read_u64,
+               .write_u64 = cpuset_write_u64,
+               .private = FILE_SCHED_LOAD_BALANCE,
+       },
+
+       {
+               .name = "sched_relax_domain_level",
+               .read_s64 = cpuset_read_s64,
+               .write_s64 = cpuset_write_s64,
+               .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
+       },
+
+       {
+               .name = "memory_migrate",
+               .read_u64 = cpuset_read_u64,
+               .write_u64 = cpuset_write_u64,
+               .private = FILE_MEMORY_MIGRATE,
+       },
+
+       {
+               .name = "memory_pressure",
+               .read_u64 = cpuset_read_u64,
+       },
+
+       {
+               .name = "memory_spread_page",
+               .read_u64 = cpuset_read_u64,
+               .write_u64 = cpuset_write_u64,
+               .private = FILE_SPREAD_PAGE,
+       },
+
+       {
+               .name = "memory_spread_slab",
+               .read_u64 = cpuset_read_u64,
+               .write_u64 = cpuset_write_u64,
+               .private = FILE_SPREAD_SLAB,
+       },
+
+       {
+               .name = "memory_pressure_enabled",
+               .flags = CFTYPE_ONLY_ON_ROOT,
+               .read_u64 = cpuset_read_u64,
+               .write_u64 = cpuset_write_u64,
+               .private = FILE_MEMORY_PRESSURE_ENABLED,
+       },
+
+       { }     /* terminate */
+};
+
+/*
+ *     cpuset_css_alloc - allocate a cpuset css
+ *     cgrp:   control group that the new cpuset will be part of
+ */
+
+static struct cgroup_subsys_state *
+cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+       struct cpuset *cs;
+
+       if (!parent_css)
+               return &top_cpuset.css;
+
+       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+       if (!cs)
+               return ERR_PTR(-ENOMEM);
+       if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
+               goto free_cs;
+       if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
+               goto free_cpus;
+
+       set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
+       cpumask_clear(cs->cpus_allowed);
+       nodes_clear(cs->mems_allowed);
+       cpumask_clear(cs->effective_cpus);
+       nodes_clear(cs->effective_mems);
+       fmeter_init(&cs->fmeter);
+       cs->relax_domain_level = -1;
+
+       return &cs->css;
+
+free_cpus:
+       free_cpumask_var(cs->cpus_allowed);
+free_cs:
+       kfree(cs);
+       return ERR_PTR(-ENOMEM);
+}
+
+static int cpuset_css_online(struct cgroup_subsys_state *css)
+{
+       struct cpuset *cs = css_cs(css);
+       struct cpuset *parent = parent_cs(cs);
+       struct cpuset *tmp_cs;
+       struct cgroup_subsys_state *pos_css;
+
+       if (!parent)
+               return 0;
+
+       mutex_lock(&cpuset_mutex);
+
+       set_bit(CS_ONLINE, &cs->flags);
+       if (is_spread_page(parent))
+               set_bit(CS_SPREAD_PAGE, &cs->flags);
+       if (is_spread_slab(parent))
+               set_bit(CS_SPREAD_SLAB, &cs->flags);
+
+       cpuset_inc();
+
+       spin_lock_irq(&callback_lock);
+       if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
+               cpumask_copy(cs->effective_cpus, parent->effective_cpus);
+               cs->effective_mems = parent->effective_mems;
+       }
+       spin_unlock_irq(&callback_lock);
+
+       if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
+               goto out_unlock;
+
+       /*
+        * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is
+        * set.  This flag handling is implemented in cgroup core for
+        * histrical reasons - the flag may be specified during mount.
+        *
+        * Currently, if any sibling cpusets have exclusive cpus or mem, we
+        * refuse to clone the configuration - thereby refusing the task to
+        * be entered, and as a result refusing the sys_unshare() or
+        * clone() which initiated it.  If this becomes a problem for some
+        * users who wish to allow that scenario, then this could be
+        * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
+        * (and likewise for mems) to the new cgroup.
+        */
+       rcu_read_lock();
+       cpuset_for_each_child(tmp_cs, pos_css, parent) {
+               if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
+                       rcu_read_unlock();
+                       goto out_unlock;
+               }
+       }
+       rcu_read_unlock();
+
+       spin_lock_irq(&callback_lock);
+       cs->mems_allowed = parent->mems_allowed;
+       cs->effective_mems = parent->mems_allowed;
+       cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+       cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
+       spin_unlock_irq(&callback_lock);
+out_unlock:
+       mutex_unlock(&cpuset_mutex);
+       return 0;
+}
+
+/*
+ * If the cpuset being removed has its flag 'sched_load_balance'
+ * enabled, then simulate turning sched_load_balance off, which
+ * will call rebuild_sched_domains_locked().
+ */
+
+static void cpuset_css_offline(struct cgroup_subsys_state *css)
+{
+       struct cpuset *cs = css_cs(css);
+
+       mutex_lock(&cpuset_mutex);
+
+       if (is_sched_load_balance(cs))
+               update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
+
+       cpuset_dec();
+       clear_bit(CS_ONLINE, &cs->flags);
+
+       mutex_unlock(&cpuset_mutex);
+}
+
+static void cpuset_css_free(struct cgroup_subsys_state *css)
+{
+       struct cpuset *cs = css_cs(css);
+
+       free_cpumask_var(cs->effective_cpus);
+       free_cpumask_var(cs->cpus_allowed);
+       kfree(cs);
+}
+
+static void cpuset_bind(struct cgroup_subsys_state *root_css)
+{
+       mutex_lock(&cpuset_mutex);
+       spin_lock_irq(&callback_lock);
+
+       if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
+               cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
+               top_cpuset.mems_allowed = node_possible_map;
+       } else {
+               cpumask_copy(top_cpuset.cpus_allowed,
+                            top_cpuset.effective_cpus);
+               top_cpuset.mems_allowed = top_cpuset.effective_mems;
+       }
+
+       spin_unlock_irq(&callback_lock);
+       mutex_unlock(&cpuset_mutex);
+}
+
+/*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+ * state from the parent and before it sits on the cgroup's task list.
+ */
+static void cpuset_fork(struct task_struct *task)
+{
+       if (task_css_is_root(task, cpuset_cgrp_id))
+               return;
+
+       set_cpus_allowed_ptr(task, &current->cpus_allowed);
+       task->mems_allowed = current->mems_allowed;
+}
+
+struct cgroup_subsys cpuset_cgrp_subsys = {
+       .css_alloc      = cpuset_css_alloc,
+       .css_online     = cpuset_css_online,
+       .css_offline    = cpuset_css_offline,
+       .css_free       = cpuset_css_free,
+       .can_attach     = cpuset_can_attach,
+       .cancel_attach  = cpuset_cancel_attach,
+       .attach         = cpuset_attach,
+       .post_attach    = cpuset_post_attach,
+       .bind           = cpuset_bind,
+       .fork           = cpuset_fork,
+       .legacy_cftypes = files,
+       .early_init     = true,
+};
+
+/**
+ * cpuset_init - initialize cpusets at system boot
+ *
+ * Description: Initialize top_cpuset and the cpuset internal file system,
+ **/
+
+int __init cpuset_init(void)
+{
+       int err = 0;
+
+       if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
+               BUG();
+       if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
+               BUG();
+
+       cpumask_setall(top_cpuset.cpus_allowed);
+       nodes_setall(top_cpuset.mems_allowed);
+       cpumask_setall(top_cpuset.effective_cpus);
+       nodes_setall(top_cpuset.effective_mems);
+
+       fmeter_init(&top_cpuset.fmeter);
+       set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
+       top_cpuset.relax_domain_level = -1;
+
+       err = register_filesystem(&cpuset_fs_type);
+       if (err < 0)
+               return err;
+
+       if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
+               BUG();
+
+       return 0;
+}
+
+/*
+ * If CPU and/or memory hotplug handlers, below, unplug any CPUs
+ * or memory nodes, we need to walk over the cpuset hierarchy,
+ * removing that CPU or node from all cpusets.  If this removes the
+ * last CPU or node from a cpuset, then move the tasks in the empty
+ * cpuset to its next-highest non-empty parent.
+ */
+static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
+{
+       struct cpuset *parent;
+
+       /*
+        * Find its next-highest non-empty parent, (top cpuset
+        * has online cpus, so can't be empty).
+        */
+       parent = parent_cs(cs);
+       while (cpumask_empty(parent->cpus_allowed) ||
+                       nodes_empty(parent->mems_allowed))
+               parent = parent_cs(parent);
+
+       if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
+               pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
+               pr_cont_cgroup_name(cs->css.cgroup);
+               pr_cont("\n");
+       }
+}
+
+static void
+hotplug_update_tasks_legacy(struct cpuset *cs,
+                           struct cpumask *new_cpus, nodemask_t *new_mems,
+                           bool cpus_updated, bool mems_updated)
+{
+       bool is_empty;
+
+       spin_lock_irq(&callback_lock);
+       cpumask_copy(cs->cpus_allowed, new_cpus);
+       cpumask_copy(cs->effective_cpus, new_cpus);
+       cs->mems_allowed = *new_mems;
+       cs->effective_mems = *new_mems;
+       spin_unlock_irq(&callback_lock);
+
+       /*
+        * Don't call update_tasks_cpumask() if the cpuset becomes empty,
+        * as the tasks will be migratecd to an ancestor.
+        */
+       if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
+               update_tasks_cpumask(cs);
+       if (mems_updated && !nodes_empty(cs->mems_allowed))
+               update_tasks_nodemask(cs);
+
+       is_empty = cpumask_empty(cs->cpus_allowed) ||
+                  nodes_empty(cs->mems_allowed);
+
+       mutex_unlock(&cpuset_mutex);
+
+       /*
+        * Move tasks to the nearest ancestor with execution resources,
+        * This is full cgroup operation which will also call back into
+        * cpuset. Should be done outside any lock.
+        */
+       if (is_empty)
+               remove_tasks_in_empty_cpuset(cs);
+
+       mutex_lock(&cpuset_mutex);
+}
+
+static void
+hotplug_update_tasks(struct cpuset *cs,
+                    struct cpumask *new_cpus, nodemask_t *new_mems,
+                    bool cpus_updated, bool mems_updated)
+{
+       if (cpumask_empty(new_cpus))
+               cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus);
+       if (nodes_empty(*new_mems))
+               *new_mems = parent_cs(cs)->effective_mems;
+
+       spin_lock_irq(&callback_lock);
+       cpumask_copy(cs->effective_cpus, new_cpus);
+       cs->effective_mems = *new_mems;
+       spin_unlock_irq(&callback_lock);
+
+       if (cpus_updated)
+               update_tasks_cpumask(cs);
+       if (mems_updated)
+               update_tasks_nodemask(cs);
+}
+
+/**
+ * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
+ * @cs: cpuset in interest
+ *
+ * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
+ * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
+ * all its tasks are moved to the nearest ancestor with both resources.
+ */
+static void cpuset_hotplug_update_tasks(struct cpuset *cs)
+{
+       static cpumask_t new_cpus;
+       static nodemask_t new_mems;
+       bool cpus_updated;
+       bool mems_updated;
+retry:
+       wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
+
+       mutex_lock(&cpuset_mutex);
+
+       /*
+        * We have raced with task attaching. We wait until attaching
+        * is finished, so we won't attach a task to an empty cpuset.
+        */
+       if (cs->attach_in_progress) {
+               mutex_unlock(&cpuset_mutex);
+               goto retry;
+       }
+
+       cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
+       nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
+
+       cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
+       mems_updated = !nodes_equal(new_mems, cs->effective_mems);
+
+       if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
+               hotplug_update_tasks(cs, &new_cpus, &new_mems,
+                                    cpus_updated, mems_updated);
+       else
+               hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
+                                           cpus_updated, mems_updated);
+
+       mutex_unlock(&cpuset_mutex);
+}
+
+/**
+ * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
+ *
+ * This function is called after either CPU or memory configuration has
+ * changed and updates cpuset accordingly.  The top_cpuset is always
+ * synchronized to cpu_active_mask and N_MEMORY, which is necessary in
+ * order to make cpusets transparent (of no affect) on systems that are
+ * actively using CPU hotplug but making no active use of cpusets.
+ *
+ * Non-root cpusets are only affected by offlining.  If any CPUs or memory
+ * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on
+ * all descendants.
+ *
+ * Note that CPU offlining during suspend is ignored.  We don't modify
+ * cpusets across suspend/resume cycles at all.
+ */
+static void cpuset_hotplug_workfn(struct work_struct *work)
+{
+       static cpumask_t new_cpus;
+       static nodemask_t new_mems;
+       bool cpus_updated, mems_updated;
+       bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
+
+       mutex_lock(&cpuset_mutex);
+
+       /* fetch the available cpus/mems and find out which changed how */
+       cpumask_copy(&new_cpus, cpu_active_mask);
+       new_mems = node_states[N_MEMORY];
+
+       cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
+       mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
+
+       /* synchronize cpus_allowed to cpu_active_mask */
+       if (cpus_updated) {
+               spin_lock_irq(&callback_lock);
+               if (!on_dfl)
+                       cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
+               cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
+               spin_unlock_irq(&callback_lock);
+               /* we don't mess with cpumasks of tasks in top_cpuset */
+       }
+
+       /* synchronize mems_allowed to N_MEMORY */
+       if (mems_updated) {
+               spin_lock_irq(&callback_lock);
+               if (!on_dfl)
+                       top_cpuset.mems_allowed = new_mems;
+               top_cpuset.effective_mems = new_mems;
+               spin_unlock_irq(&callback_lock);
+               update_tasks_nodemask(&top_cpuset);
+       }
+
+       mutex_unlock(&cpuset_mutex);
+
+       /* if cpus or mems changed, we need to propagate to descendants */
+       if (cpus_updated || mems_updated) {
+               struct cpuset *cs;
+               struct cgroup_subsys_state *pos_css;
+
+               rcu_read_lock();
+               cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
+                       if (cs == &top_cpuset || !css_tryget_online(&cs->css))
+                               continue;
+                       rcu_read_unlock();
+
+                       cpuset_hotplug_update_tasks(cs);
+
+                       rcu_read_lock();
+                       css_put(&cs->css);
+               }
+               rcu_read_unlock();
+       }
+
+       /* rebuild sched domains if cpus_allowed has changed */
+       if (cpus_updated)
+               rebuild_sched_domains();
+}
+
+void cpuset_update_active_cpus(bool cpu_online)
+{
+       /*
+        * We're inside cpu hotplug critical region which usually nests
+        * inside cgroup synchronization.  Bounce actual hotplug processing
+        * to a work item to avoid reverse locking order.
+        *
+        * We still need to do partition_sched_domains() synchronously;
+        * otherwise, the scheduler will get confused and put tasks to the
+        * dead CPU.  Fall back to the default single domain.
+        * cpuset_hotplug_workfn() will rebuild it as necessary.
+        */
+       partition_sched_domains(1, NULL, NULL);
+       schedule_work(&cpuset_hotplug_work);
+}
+
+/*
+ * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
+ * Call this routine anytime after node_states[N_MEMORY] changes.
+ * See cpuset_update_active_cpus() for CPU hotplug handling.
+ */
+static int cpuset_track_online_nodes(struct notifier_block *self,
+                               unsigned long action, void *arg)
+{
+       schedule_work(&cpuset_hotplug_work);
+       return NOTIFY_OK;
+}
+
+static struct notifier_block cpuset_track_online_nodes_nb = {
+       .notifier_call = cpuset_track_online_nodes,
+       .priority = 10,         /* ??! */
+};
+
+/**
+ * cpuset_init_smp - initialize cpus_allowed
+ *
+ * Description: Finish top cpuset after cpu, node maps are initialized
+ */
+void __init cpuset_init_smp(void)
+{
+       cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
+       top_cpuset.mems_allowed = node_states[N_MEMORY];
+       top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
+
+       cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
+       top_cpuset.effective_mems = node_states[N_MEMORY];
+
+       register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
+
+       cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
+       BUG_ON(!cpuset_migrate_mm_wq);
+}
+
+/**
+ * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
+ * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
+ * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
+ *
+ * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
+ * attached to the specified @tsk.  Guaranteed to return some non-empty
+ * subset of cpu_online_mask, even if this means going outside the
+ * tasks cpuset.
+ **/
+
+void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&callback_lock, flags);
+       rcu_read_lock();
+       guarantee_online_cpus(task_cs(tsk), pmask);
+       rcu_read_unlock();
+       spin_unlock_irqrestore(&callback_lock, flags);
+}
+
+void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+{
+       rcu_read_lock();
+       do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus);
+       rcu_read_unlock();
+
+       /*
+        * We own tsk->cpus_allowed, nobody can change it under us.
+        *
+        * But we used cs && cs->cpus_allowed lockless and thus can
+        * race with cgroup_attach_task() or update_cpumask() and get
+        * the wrong tsk->cpus_allowed. However, both cases imply the
+        * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
+        * which takes task_rq_lock().
+        *
+        * If we are called after it dropped the lock we must see all
+        * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
+        * set any mask even if it is not right from task_cs() pov,
+        * the pending set_cpus_allowed_ptr() will fix things.
+        *
+        * select_fallback_rq() will fix things ups and set cpu_possible_mask
+        * if required.
+        */
+}
+
+void __init cpuset_init_current_mems_allowed(void)
+{
+       nodes_setall(current->mems_allowed);
+}
+
+/**
+ * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
+ * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
+ *
+ * Description: Returns the nodemask_t mems_allowed of the cpuset
+ * attached to the specified @tsk.  Guaranteed to return some non-empty
+ * subset of node_states[N_MEMORY], even if this means going outside the
+ * tasks cpuset.
+ **/
+
+nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
+{
+       nodemask_t mask;
+       unsigned long flags;
+
+       spin_lock_irqsave(&callback_lock, flags);
+       rcu_read_lock();
+       guarantee_online_mems(task_cs(tsk), &mask);
+       rcu_read_unlock();
+       spin_unlock_irqrestore(&callback_lock, flags);
+
+       return mask;
+}
+
+/**
+ * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
+ * @nodemask: the nodemask to be checked
+ *
+ * Are any of the nodes in the nodemask allowed in current->mems_allowed?
+ */
+int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
+{
+       return nodes_intersects(*nodemask, current->mems_allowed);
+}
+
+/*
+ * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
+ * mem_hardwall ancestor to the specified cpuset.  Call holding
+ * callback_lock.  If no ancestor is mem_exclusive or mem_hardwall
+ * (an unusual configuration), then returns the root cpuset.
+ */
+static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
+{
+       while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs))
+               cs = parent_cs(cs);
+       return cs;
+}
+
+/**
+ * cpuset_node_allowed - Can we allocate on a memory node?
+ * @node: is this an allowed node?
+ * @gfp_mask: memory allocation flags
+ *
+ * If we're in interrupt, yes, we can always allocate.  If @node is set in
+ * current's mems_allowed, yes.  If it's not a __GFP_HARDWALL request and this
+ * node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
+ * yes.  If current has access to memory reserves due to TIF_MEMDIE, yes.
+ * Otherwise, no.
+ *
+ * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
+ * and do not allow allocations outside the current tasks cpuset
+ * unless the task has been OOM killed as is marked TIF_MEMDIE.
+ * GFP_KERNEL allocations are not so marked, so can escape to the
+ * nearest enclosing hardwalled ancestor cpuset.
+ *
+ * Scanning up parent cpusets requires callback_lock.  The
+ * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
+ * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
+ * current tasks mems_allowed came up empty on the first pass over
+ * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
+ * cpuset are short of memory, might require taking the callback_lock.
+ *
+ * The first call here from mm/page_alloc:get_page_from_freelist()
+ * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
+ * so no allocation on a node outside the cpuset is allowed (unless
+ * in interrupt, of course).
+ *
+ * The second pass through get_page_from_freelist() doesn't even call
+ * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
+ * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
+ * in alloc_flags.  That logic and the checks below have the combined
+ * affect that:
+ *     in_interrupt - any node ok (current task context irrelevant)
+ *     GFP_ATOMIC   - any node ok
+ *     TIF_MEMDIE   - any node ok
+ *     GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
+ *     GFP_USER     - only nodes in current tasks mems allowed ok.
+ */
+bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
+{
+       struct cpuset *cs;              /* current cpuset ancestors */
+       int allowed;                    /* is allocation in zone z allowed? */
+       unsigned long flags;
+
+       if (in_interrupt())
+               return true;
+       if (node_isset(node, current->mems_allowed))
+               return true;
+       /*
+        * Allow tasks that have access to memory reserves because they have
+        * been OOM killed to get memory anywhere.
+        */
+       if (unlikely(test_thread_flag(TIF_MEMDIE)))
+               return true;
+       if (gfp_mask & __GFP_HARDWALL)  /* If hardwall request, stop here */
+               return false;
+
+       if (current->flags & PF_EXITING) /* Let dying task have memory */
+               return true;
+
+       /* Not hardwall and node outside mems_allowed: scan up cpusets */
+       spin_lock_irqsave(&callback_lock, flags);
+
+       rcu_read_lock();
+       cs = nearest_hardwall_ancestor(task_cs(current));
+       allowed = node_isset(node, cs->mems_allowed);
+       rcu_read_unlock();
+
+       spin_unlock_irqrestore(&callback_lock, flags);
+       return allowed;
+}
+
+/**
+ * cpuset_mem_spread_node() - On which node to begin search for a file page
+ * cpuset_slab_spread_node() - On which node to begin search for a slab page
+ *
+ * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
+ * tasks in a cpuset with is_spread_page or is_spread_slab set),
+ * and if the memory allocation used cpuset_mem_spread_node()
+ * to determine on which node to start looking, as it will for
+ * certain page cache or slab cache pages such as used for file
+ * system buffers and inode caches, then instead of starting on the
+ * local node to look for a free page, rather spread the starting
+ * node around the tasks mems_allowed nodes.
+ *
+ * We don't have to worry about the returned node being offline
+ * because "it can't happen", and even if it did, it would be ok.
+ *
+ * The routines calling guarantee_online_mems() are careful to
+ * only set nodes in task->mems_allowed that are online.  So it
+ * should not be possible for the following code to return an
+ * offline node.  But if it did, that would be ok, as this routine
+ * is not returning the node where the allocation must be, only
+ * the node where the search should start.  The zonelist passed to
+ * __alloc_pages() will include all nodes.  If the slab allocator
+ * is passed an offline node, it will fall back to the local node.
+ * See kmem_cache_alloc_node().
+ */
+
+static int cpuset_spread_node(int *rotor)
+{
+       return *rotor = next_node_in(*rotor, current->mems_allowed);
+}
+
+int cpuset_mem_spread_node(void)
+{
+       if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
+               current->cpuset_mem_spread_rotor =
+                       node_random(&current->mems_allowed);
+
+       return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
+}
+
+int cpuset_slab_spread_node(void)
+{
+       if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
+               current->cpuset_slab_spread_rotor =
+                       node_random(&current->mems_allowed);
+
+       return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
+}
+
+EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
+
+/**
+ * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
+ * @tsk1: pointer to task_struct of some task.
+ * @tsk2: pointer to task_struct of some other task.
+ *
+ * Description: Return true if @tsk1's mems_allowed intersects the
+ * mems_allowed of @tsk2.  Used by the OOM killer to determine if
+ * one of the task's memory usage might impact the memory available
+ * to the other.
+ **/
+
+int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
+                                  const struct task_struct *tsk2)
+{
+       return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
+}
+
+/**
+ * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
+ *
+ * Description: Prints current's name, cpuset name, and cached copy of its
+ * mems_allowed to the kernel log.
+ */
+void cpuset_print_current_mems_allowed(void)
+{
+       struct cgroup *cgrp;
+
+       rcu_read_lock();
+
+       cgrp = task_cs(current)->css.cgroup;
+       pr_info("%s cpuset=", current->comm);
+       pr_cont_cgroup_name(cgrp);
+       pr_cont(" mems_allowed=%*pbl\n",
+               nodemask_pr_args(&current->mems_allowed));
+
+       rcu_read_unlock();
+}
+
+/*
+ * Collection of memory_pressure is suppressed unless
+ * this flag is enabled by writing "1" to the special
+ * cpuset file 'memory_pressure_enabled' in the root cpuset.
+ */
+
+int cpuset_memory_pressure_enabled __read_mostly;
+
+/**
+ * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
+ *
+ * Keep a running average of the rate of synchronous (direct)
+ * page reclaim efforts initiated by tasks in each cpuset.
+ *
+ * This represents the rate at which some task in the cpuset
+ * ran low on memory on all nodes it was allowed to use, and
+ * had to enter the kernels page reclaim code in an effort to
+ * create more free memory by tossing clean pages or swapping
+ * or writing dirty pages.
+ *
+ * Display to user space in the per-cpuset read-only file
+ * "memory_pressure".  Value displayed is an integer
+ * representing the recent rate of entry into the synchronous
+ * (direct) page reclaim by any task attached to the cpuset.
+ **/
+
+void __cpuset_memory_pressure_bump(void)
+{
+       rcu_read_lock();
+       fmeter_markevent(&task_cs(current)->fmeter);
+       rcu_read_unlock();
+}
+
+#ifdef CONFIG_PROC_PID_CPUSET
+/*
+ * proc_cpuset_show()
+ *  - Print tasks cpuset path into seq_file.
+ *  - Used for /proc/<pid>/cpuset.
+ *  - No need to task_lock(tsk) on this tsk->cpuset reference, as it
+ *    doesn't really matter if tsk->cpuset changes after we read it,
+ *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
+ *    anyway.
+ */
+int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+                    struct pid *pid, struct task_struct *tsk)
+{
+       char *buf;
+       struct cgroup_subsys_state *css;
+       int retval;
+
+       retval = -ENOMEM;
+       buf = kmalloc(PATH_MAX, GFP_KERNEL);
+       if (!buf)
+               goto out;
+
+       css = task_get_css(tsk, cpuset_cgrp_id);
+       retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
+                               current->nsproxy->cgroup_ns);
+       css_put(css);
+       if (retval >= PATH_MAX)
+               retval = -ENAMETOOLONG;
+       if (retval < 0)
+               goto out_free;
+       seq_puts(m, buf);
+       seq_putc(m, '\n');
+       retval = 0;
+out_free:
+       kfree(buf);
+out:
+       return retval;
+}
+#endif /* CONFIG_PROC_PID_CPUSET */
+
+/* Display task mems_allowed in /proc/<pid>/status file. */
+void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
+{
+       seq_printf(m, "Mems_allowed:\t%*pb\n",
+                  nodemask_pr_args(&task->mems_allowed));
+       seq_printf(m, "Mems_allowed_list:\t%*pbl\n",
+                  nodemask_pr_args(&task->mems_allowed));
+}
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c
new file mode 100644 (file)
index 0000000..1b72d56
--- /dev/null
@@ -0,0 +1,481 @@
+/*
+ * cgroup_freezer.c -  control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+
+/*
+ * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
+ * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
+ * for "THAWED".  FREEZING_PARENT is set if the parent freezer is FREEZING
+ * for whatever reason.  IOW, a cgroup has FREEZING_PARENT set if one of
+ * its ancestors has FREEZING_SELF set.
+ */
+enum freezer_state_flags {
+       CGROUP_FREEZER_ONLINE   = (1 << 0), /* freezer is fully online */
+       CGROUP_FREEZING_SELF    = (1 << 1), /* this freezer is freezing */
+       CGROUP_FREEZING_PARENT  = (1 << 2), /* the parent freezer is freezing */
+       CGROUP_FROZEN           = (1 << 3), /* this and its descendants frozen */
+
+       /* mask for all FREEZING flags */
+       CGROUP_FREEZING         = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
+};
+
+struct freezer {
+       struct cgroup_subsys_state      css;
+       unsigned int                    state;
+};
+
+static DEFINE_MUTEX(freezer_mutex);
+
+static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
+{
+       return css ? container_of(css, struct freezer, css) : NULL;
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+       return css_freezer(task_css(task, freezer_cgrp_id));
+}
+
+static struct freezer *parent_freezer(struct freezer *freezer)
+{
+       return css_freezer(freezer->css.parent);
+}
+
+bool cgroup_freezing(struct task_struct *task)
+{
+       bool ret;
+
+       rcu_read_lock();
+       ret = task_freezer(task)->state & CGROUP_FREEZING;
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static const char *freezer_state_strs(unsigned int state)
+{
+       if (state & CGROUP_FROZEN)
+               return "FROZEN";
+       if (state & CGROUP_FREEZING)
+               return "FREEZING";
+       return "THAWED";
+};
+
+static struct cgroup_subsys_state *
+freezer_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+       struct freezer *freezer;
+
+       freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+       if (!freezer)
+               return ERR_PTR(-ENOMEM);
+
+       return &freezer->css;
+}
+
+/**
+ * freezer_css_online - commit creation of a freezer css
+ * @css: css being created
+ *
+ * We're committing to creation of @css.  Mark it online and inherit
+ * parent's freezing state while holding both parent's and our
+ * freezer->lock.
+ */
+static int freezer_css_online(struct cgroup_subsys_state *css)
+{
+       struct freezer *freezer = css_freezer(css);
+       struct freezer *parent = parent_freezer(freezer);
+
+       mutex_lock(&freezer_mutex);
+
+       freezer->state |= CGROUP_FREEZER_ONLINE;
+
+       if (parent && (parent->state & CGROUP_FREEZING)) {
+               freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
+               atomic_inc(&system_freezing_cnt);
+       }
+
+       mutex_unlock(&freezer_mutex);
+       return 0;
+}
+
+/**
+ * freezer_css_offline - initiate destruction of a freezer css
+ * @css: css being destroyed
+ *
+ * @css is going away.  Mark it dead and decrement system_freezing_count if
+ * it was holding one.
+ */
+static void freezer_css_offline(struct cgroup_subsys_state *css)
+{
+       struct freezer *freezer = css_freezer(css);
+
+       mutex_lock(&freezer_mutex);
+
+       if (freezer->state & CGROUP_FREEZING)
+               atomic_dec(&system_freezing_cnt);
+
+       freezer->state = 0;
+
+       mutex_unlock(&freezer_mutex);
+}
+
+static void freezer_css_free(struct cgroup_subsys_state *css)
+{
+       kfree(css_freezer(css));
+}
+
+/*
+ * Tasks can be migrated into a different freezer anytime regardless of its
+ * current state.  freezer_attach() is responsible for making new tasks
+ * conform to the current state.
+ *
+ * Freezer state changes and task migration are synchronized via
+ * @freezer->lock.  freezer_attach() makes the new tasks conform to the
+ * current state and all following state changes can see the new tasks.
+ */
+static void freezer_attach(struct cgroup_taskset *tset)
+{
+       struct task_struct *task;
+       struct cgroup_subsys_state *new_css;
+
+       mutex_lock(&freezer_mutex);
+
+       /*
+        * Make the new tasks conform to the current state of @new_css.
+        * For simplicity, when migrating any task to a FROZEN cgroup, we
+        * revert it to FREEZING and let update_if_frozen() determine the
+        * correct state later.
+        *
+        * Tasks in @tset are on @new_css but may not conform to its
+        * current state before executing the following - !frozen tasks may
+        * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
+        */
+       cgroup_taskset_for_each(task, new_css, tset) {
+               struct freezer *freezer = css_freezer(new_css);
+
+               if (!(freezer->state & CGROUP_FREEZING)) {
+                       __thaw_task(task);
+               } else {
+                       freeze_task(task);
+                       /* clear FROZEN and propagate upwards */
+                       while (freezer && (freezer->state & CGROUP_FROZEN)) {
+                               freezer->state &= ~CGROUP_FROZEN;
+                               freezer = parent_freezer(freezer);
+                       }
+               }
+       }
+
+       mutex_unlock(&freezer_mutex);
+}
+
+/**
+ * freezer_fork - cgroup post fork callback
+ * @task: a task which has just been forked
+ *
+ * @task has just been created and should conform to the current state of
+ * the cgroup_freezer it belongs to.  This function may race against
+ * freezer_attach().  Losing to freezer_attach() means that we don't have
+ * to do anything as freezer_attach() will put @task into the appropriate
+ * state.
+ */
+static void freezer_fork(struct task_struct *task)
+{
+       struct freezer *freezer;
+
+       /*
+        * The root cgroup is non-freezable, so we can skip locking the
+        * freezer.  This is safe regardless of race with task migration.
+        * If we didn't race or won, skipping is obviously the right thing
+        * to do.  If we lost and root is the new cgroup, noop is still the
+        * right thing to do.
+        */
+       if (task_css_is_root(task, freezer_cgrp_id))
+               return;
+
+       mutex_lock(&freezer_mutex);
+       rcu_read_lock();
+
+       freezer = task_freezer(task);
+       if (freezer->state & CGROUP_FREEZING)
+               freeze_task(task);
+
+       rcu_read_unlock();
+       mutex_unlock(&freezer_mutex);
+}
+
+/**
+ * update_if_frozen - update whether a cgroup finished freezing
+ * @css: css of interest
+ *
+ * Once FREEZING is initiated, transition to FROZEN is lazily updated by
+ * calling this function.  If the current state is FREEZING but not FROZEN,
+ * this function checks whether all tasks of this cgroup and the descendant
+ * cgroups finished freezing and, if so, sets FROZEN.
+ *
+ * The caller is responsible for grabbing RCU read lock and calling
+ * update_if_frozen() on all descendants prior to invoking this function.
+ *
+ * Task states and freezer state might disagree while tasks are being
+ * migrated into or out of @css, so we can't verify task states against
+ * @freezer state here.  See freezer_attach() for details.
+ */
+static void update_if_frozen(struct cgroup_subsys_state *css)
+{
+       struct freezer *freezer = css_freezer(css);
+       struct cgroup_subsys_state *pos;
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       lockdep_assert_held(&freezer_mutex);
+
+       if (!(freezer->state & CGROUP_FREEZING) ||
+           (freezer->state & CGROUP_FROZEN))
+               return;
+
+       /* are all (live) children frozen? */
+       rcu_read_lock();
+       css_for_each_child(pos, css) {
+               struct freezer *child = css_freezer(pos);
+
+               if ((child->state & CGROUP_FREEZER_ONLINE) &&
+                   !(child->state & CGROUP_FROZEN)) {
+                       rcu_read_unlock();
+                       return;
+               }
+       }
+       rcu_read_unlock();
+
+       /* are all tasks frozen? */
+       css_task_iter_start(css, &it);
+
+       while ((task = css_task_iter_next(&it))) {
+               if (freezing(task)) {
+                       /*
+                        * freezer_should_skip() indicates that the task
+                        * should be skipped when determining freezing
+                        * completion.  Consider it frozen in addition to
+                        * the usual frozen condition.
+                        */
+                       if (!frozen(task) && !freezer_should_skip(task))
+                               goto out_iter_end;
+               }
+       }
+
+       freezer->state |= CGROUP_FROZEN;
+out_iter_end:
+       css_task_iter_end(&it);
+}
+
+static int freezer_read(struct seq_file *m, void *v)
+{
+       struct cgroup_subsys_state *css = seq_css(m), *pos;
+
+       mutex_lock(&freezer_mutex);
+       rcu_read_lock();
+
+       /* update states bottom-up */
+       css_for_each_descendant_post(pos, css) {
+               if (!css_tryget_online(pos))
+                       continue;
+               rcu_read_unlock();
+
+               update_if_frozen(pos);
+
+               rcu_read_lock();
+               css_put(pos);
+       }
+
+       rcu_read_unlock();
+       mutex_unlock(&freezer_mutex);
+
+       seq_puts(m, freezer_state_strs(css_freezer(css)->state));
+       seq_putc(m, '\n');
+       return 0;
+}
+
+static void freeze_cgroup(struct freezer *freezer)
+{
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       css_task_iter_start(&freezer->css, &it);
+       while ((task = css_task_iter_next(&it)))
+               freeze_task(task);
+       css_task_iter_end(&it);
+}
+
+static void unfreeze_cgroup(struct freezer *freezer)
+{
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       css_task_iter_start(&freezer->css, &it);
+       while ((task = css_task_iter_next(&it)))
+               __thaw_task(task);
+       css_task_iter_end(&it);
+}
+
+/**
+ * freezer_apply_state - apply state change to a single cgroup_freezer
+ * @freezer: freezer to apply state change to
+ * @freeze: whether to freeze or unfreeze
+ * @state: CGROUP_FREEZING_* flag to set or clear
+ *
+ * Set or clear @state on @cgroup according to @freeze, and perform
+ * freezing or thawing as necessary.
+ */
+static void freezer_apply_state(struct freezer *freezer, bool freeze,
+                               unsigned int state)
+{
+       /* also synchronizes against task migration, see freezer_attach() */
+       lockdep_assert_held(&freezer_mutex);
+
+       if (!(freezer->state & CGROUP_FREEZER_ONLINE))
+               return;
+
+       if (freeze) {
+               if (!(freezer->state & CGROUP_FREEZING))
+                       atomic_inc(&system_freezing_cnt);
+               freezer->state |= state;
+               freeze_cgroup(freezer);
+       } else {
+               bool was_freezing = freezer->state & CGROUP_FREEZING;
+
+               freezer->state &= ~state;
+
+               if (!(freezer->state & CGROUP_FREEZING)) {
+                       if (was_freezing)
+                               atomic_dec(&system_freezing_cnt);
+                       freezer->state &= ~CGROUP_FROZEN;
+                       unfreeze_cgroup(freezer);
+               }
+       }
+}
+
+/**
+ * freezer_change_state - change the freezing state of a cgroup_freezer
+ * @freezer: freezer of interest
+ * @freeze: whether to freeze or thaw
+ *
+ * Freeze or thaw @freezer according to @freeze.  The operations are
+ * recursive - all descendants of @freezer will be affected.
+ */
+static void freezer_change_state(struct freezer *freezer, bool freeze)
+{
+       struct cgroup_subsys_state *pos;
+
+       /*
+        * Update all its descendants in pre-order traversal.  Each
+        * descendant will try to inherit its parent's FREEZING state as
+        * CGROUP_FREEZING_PARENT.
+        */
+       mutex_lock(&freezer_mutex);
+       rcu_read_lock();
+       css_for_each_descendant_pre(pos, &freezer->css) {
+               struct freezer *pos_f = css_freezer(pos);
+               struct freezer *parent = parent_freezer(pos_f);
+
+               if (!css_tryget_online(pos))
+                       continue;
+               rcu_read_unlock();
+
+               if (pos_f == freezer)
+                       freezer_apply_state(pos_f, freeze,
+                                           CGROUP_FREEZING_SELF);
+               else
+                       freezer_apply_state(pos_f,
+                                           parent->state & CGROUP_FREEZING,
+                                           CGROUP_FREEZING_PARENT);
+
+               rcu_read_lock();
+               css_put(pos);
+       }
+       rcu_read_unlock();
+       mutex_unlock(&freezer_mutex);
+}
+
+static ssize_t freezer_write(struct kernfs_open_file *of,
+                            char *buf, size_t nbytes, loff_t off)
+{
+       bool freeze;
+
+       buf = strstrip(buf);
+
+       if (strcmp(buf, freezer_state_strs(0)) == 0)
+               freeze = false;
+       else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
+               freeze = true;
+       else
+               return -EINVAL;
+
+       freezer_change_state(css_freezer(of_css(of)), freeze);
+       return nbytes;
+}
+
+static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
+                                     struct cftype *cft)
+{
+       struct freezer *freezer = css_freezer(css);
+
+       return (bool)(freezer->state & CGROUP_FREEZING_SELF);
+}
+
+static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
+                                       struct cftype *cft)
+{
+       struct freezer *freezer = css_freezer(css);
+
+       return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
+}
+
+static struct cftype files[] = {
+       {
+               .name = "state",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = freezer_read,
+               .write = freezer_write,
+       },
+       {
+               .name = "self_freezing",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_u64 = freezer_self_freezing_read,
+       },
+       {
+               .name = "parent_freezing",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_u64 = freezer_parent_freezing_read,
+       },
+       { }     /* terminate */
+};
+
+struct cgroup_subsys freezer_cgrp_subsys = {
+       .css_alloc      = freezer_css_alloc,
+       .css_online     = freezer_css_online,
+       .css_offline    = freezer_css_offline,
+       .css_free       = freezer_css_free,
+       .attach         = freezer_attach,
+       .fork           = freezer_fork,
+       .legacy_cftypes = files,
+};
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
new file mode 100644 (file)
index 0000000..cff7ea6
--- /dev/null
@@ -0,0 +1,155 @@
+#include "cgroup-internal.h"
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/nsproxy.h>
+#include <linux/proc_ns.h>
+
+
+/* cgroup namespaces */
+
+static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
+{
+       return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
+}
+
+static void dec_cgroup_namespaces(struct ucounts *ucounts)
+{
+       dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
+}
+
+static struct cgroup_namespace *alloc_cgroup_ns(void)
+{
+       struct cgroup_namespace *new_ns;
+       int ret;
+
+       new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
+       if (!new_ns)
+               return ERR_PTR(-ENOMEM);
+       ret = ns_alloc_inum(&new_ns->ns);
+       if (ret) {
+               kfree(new_ns);
+               return ERR_PTR(ret);
+       }
+       atomic_set(&new_ns->count, 1);
+       new_ns->ns.ops = &cgroupns_operations;
+       return new_ns;
+}
+
+void free_cgroup_ns(struct cgroup_namespace *ns)
+{
+       put_css_set(ns->root_cset);
+       dec_cgroup_namespaces(ns->ucounts);
+       put_user_ns(ns->user_ns);
+       ns_free_inum(&ns->ns);
+       kfree(ns);
+}
+EXPORT_SYMBOL(free_cgroup_ns);
+
+struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+                                       struct user_namespace *user_ns,
+                                       struct cgroup_namespace *old_ns)
+{
+       struct cgroup_namespace *new_ns;
+       struct ucounts *ucounts;
+       struct css_set *cset;
+
+       BUG_ON(!old_ns);
+
+       if (!(flags & CLONE_NEWCGROUP)) {
+               get_cgroup_ns(old_ns);
+               return old_ns;
+       }
+
+       /* Allow only sysadmin to create cgroup namespace. */
+       if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
+
+       ucounts = inc_cgroup_namespaces(user_ns);
+       if (!ucounts)
+               return ERR_PTR(-ENOSPC);
+
+       /* It is not safe to take cgroup_mutex here */
+       spin_lock_irq(&css_set_lock);
+       cset = task_css_set(current);
+       get_css_set(cset);
+       spin_unlock_irq(&css_set_lock);
+
+       new_ns = alloc_cgroup_ns();
+       if (IS_ERR(new_ns)) {
+               put_css_set(cset);
+               dec_cgroup_namespaces(ucounts);
+               return new_ns;
+       }
+
+       new_ns->user_ns = get_user_ns(user_ns);
+       new_ns->ucounts = ucounts;
+       new_ns->root_cset = cset;
+
+       return new_ns;
+}
+
+static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
+{
+       return container_of(ns, struct cgroup_namespace, ns);
+}
+
+static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+{
+       struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
+
+       if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
+           !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
+               return -EPERM;
+
+       /* Don't need to do anything if we are attaching to our own cgroupns. */
+       if (cgroup_ns == nsproxy->cgroup_ns)
+               return 0;
+
+       get_cgroup_ns(cgroup_ns);
+       put_cgroup_ns(nsproxy->cgroup_ns);
+       nsproxy->cgroup_ns = cgroup_ns;
+
+       return 0;
+}
+
+static struct ns_common *cgroupns_get(struct task_struct *task)
+{
+       struct cgroup_namespace *ns = NULL;
+       struct nsproxy *nsproxy;
+
+       task_lock(task);
+       nsproxy = task->nsproxy;
+       if (nsproxy) {
+               ns = nsproxy->cgroup_ns;
+               get_cgroup_ns(ns);
+       }
+       task_unlock(task);
+
+       return ns ? &ns->ns : NULL;
+}
+
+static void cgroupns_put(struct ns_common *ns)
+{
+       put_cgroup_ns(to_cg_ns(ns));
+}
+
+static struct user_namespace *cgroupns_owner(struct ns_common *ns)
+{
+       return to_cg_ns(ns)->user_ns;
+}
+
+const struct proc_ns_operations cgroupns_operations = {
+       .name           = "cgroup",
+       .type           = CLONE_NEWCGROUP,
+       .get            = cgroupns_get,
+       .put            = cgroupns_put,
+       .install        = cgroupns_install,
+       .owner          = cgroupns_owner,
+};
+
+static __init int cgroup_namespaces_init(void)
+{
+       return 0;
+}
+subsys_initcall(cgroup_namespaces_init);
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
new file mode 100644 (file)
index 0000000..2bd6737
--- /dev/null
@@ -0,0 +1,348 @@
+/*
+ * Process number limiting controller for cgroups.
+ *
+ * Used to allow a cgroup hierarchy to stop any new processes from fork()ing
+ * after a certain limit is reached.
+ *
+ * Since it is trivial to hit the task limit without hitting any kmemcg limits
+ * in place, PIDs are a fundamental resource. As such, PID exhaustion must be
+ * preventable in the scope of a cgroup hierarchy by allowing resource limiting
+ * of the number of tasks in a cgroup.
+ *
+ * In order to use the `pids` controller, set the maximum number of tasks in
+ * pids.max (this is not available in the root cgroup for obvious reasons). The
+ * number of processes currently in the cgroup is given by pids.current.
+ * Organisational operations are not blocked by cgroup policies, so it is
+ * possible to have pids.current > pids.max. However, it is not possible to
+ * violate a cgroup policy through fork(). fork() will return -EAGAIN if forking
+ * would cause a cgroup policy to be violated.
+ *
+ * To set a cgroup to have no limit, set pids.max to "max". This is the default
+ * for all new cgroups (N.B. that PID limits are hierarchical, so the most
+ * stringent limit in the hierarchy is followed).
+ *
+ * pids.current tracks all child cgroup hierarchies, so parent/pids.current is
+ * a superset of parent/child/pids.current.
+ *
+ * Copyright (C) 2015 Aleksa Sarai <cyphar@cyphar.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License.  See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/atomic.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+
+#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
+#define PIDS_MAX_STR "max"
+
+struct pids_cgroup {
+       struct cgroup_subsys_state      css;
+
+       /*
+        * Use 64-bit types so that we can safely represent "max" as
+        * %PIDS_MAX = (%PID_MAX_LIMIT + 1).
+        */
+       atomic64_t                      counter;
+       int64_t                         limit;
+
+       /* Handle for "pids.events" */
+       struct cgroup_file              events_file;
+
+       /* Number of times fork failed because limit was hit. */
+       atomic64_t                      events_limit;
+};
+
+static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
+{
+       return container_of(css, struct pids_cgroup, css);
+}
+
+static struct pids_cgroup *parent_pids(struct pids_cgroup *pids)
+{
+       return css_pids(pids->css.parent);
+}
+
+static struct cgroup_subsys_state *
+pids_css_alloc(struct cgroup_subsys_state *parent)
+{
+       struct pids_cgroup *pids;
+
+       pids = kzalloc(sizeof(struct pids_cgroup), GFP_KERNEL);
+       if (!pids)
+               return ERR_PTR(-ENOMEM);
+
+       pids->limit = PIDS_MAX;
+       atomic64_set(&pids->counter, 0);
+       atomic64_set(&pids->events_limit, 0);
+       return &pids->css;
+}
+
+static void pids_css_free(struct cgroup_subsys_state *css)
+{
+       kfree(css_pids(css));
+}
+
+/**
+ * pids_cancel - uncharge the local pid count
+ * @pids: the pid cgroup state
+ * @num: the number of pids to cancel
+ *
+ * This function will WARN if the pid count goes under 0, because such a case is
+ * a bug in the pids controller proper.
+ */
+static void pids_cancel(struct pids_cgroup *pids, int num)
+{
+       /*
+        * A negative count (or overflow for that matter) is invalid,
+        * and indicates a bug in the `pids` controller proper.
+        */
+       WARN_ON_ONCE(atomic64_add_negative(-num, &pids->counter));
+}
+
+/**
+ * pids_uncharge - hierarchically uncharge the pid count
+ * @pids: the pid cgroup state
+ * @num: the number of pids to uncharge
+ */
+static void pids_uncharge(struct pids_cgroup *pids, int num)
+{
+       struct pids_cgroup *p;
+
+       for (p = pids; parent_pids(p); p = parent_pids(p))
+               pids_cancel(p, num);
+}
+
+/**
+ * pids_charge - hierarchically charge the pid count
+ * @pids: the pid cgroup state
+ * @num: the number of pids to charge
+ *
+ * This function does *not* follow the pid limit set. It cannot fail and the new
+ * pid count may exceed the limit. This is only used for reverting failed
+ * attaches, where there is no other way out than violating the limit.
+ */
+static void pids_charge(struct pids_cgroup *pids, int num)
+{
+       struct pids_cgroup *p;
+
+       for (p = pids; parent_pids(p); p = parent_pids(p))
+               atomic64_add(num, &p->counter);
+}
+
+/**
+ * pids_try_charge - hierarchically try to charge the pid count
+ * @pids: the pid cgroup state
+ * @num: the number of pids to charge
+ *
+ * This function follows the set limit. It will fail if the charge would cause
+ * the new value to exceed the hierarchical limit. Returns 0 if the charge
+ * succeeded, otherwise -EAGAIN.
+ */
+static int pids_try_charge(struct pids_cgroup *pids, int num)
+{
+       struct pids_cgroup *p, *q;
+
+       for (p = pids; parent_pids(p); p = parent_pids(p)) {
+               int64_t new = atomic64_add_return(num, &p->counter);
+
+               /*
+                * Since new is capped to the maximum number of pid_t, if
+                * p->limit is %PIDS_MAX then we know that this test will never
+                * fail.
+                */
+               if (new > p->limit)
+                       goto revert;
+       }
+
+       return 0;
+
+revert:
+       for (q = pids; q != p; q = parent_pids(q))
+               pids_cancel(q, num);
+       pids_cancel(p, num);
+
+       return -EAGAIN;
+}
+
+static int pids_can_attach(struct cgroup_taskset *tset)
+{
+       struct task_struct *task;
+       struct cgroup_subsys_state *dst_css;
+
+       cgroup_taskset_for_each(task, dst_css, tset) {
+               struct pids_cgroup *pids = css_pids(dst_css);
+               struct cgroup_subsys_state *old_css;
+               struct pids_cgroup *old_pids;
+
+               /*
+                * No need to pin @old_css between here and cancel_attach()
+                * because cgroup core protects it from being freed before
+                * the migration completes or fails.
+                */
+               old_css = task_css(task, pids_cgrp_id);
+               old_pids = css_pids(old_css);
+
+               pids_charge(pids, 1);
+               pids_uncharge(old_pids, 1);
+       }
+
+       return 0;
+}
+
+static void pids_cancel_attach(struct cgroup_taskset *tset)
+{
+       struct task_struct *task;
+       struct cgroup_subsys_state *dst_css;
+
+       cgroup_taskset_for_each(task, dst_css, tset) {
+               struct pids_cgroup *pids = css_pids(dst_css);
+               struct cgroup_subsys_state *old_css;
+               struct pids_cgroup *old_pids;
+
+               old_css = task_css(task, pids_cgrp_id);
+               old_pids = css_pids(old_css);
+
+               pids_charge(old_pids, 1);
+               pids_uncharge(pids, 1);
+       }
+}
+
+/*
+ * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
+ * on threadgroup_change_begin() held by the copy_process().
+ */
+static int pids_can_fork(struct task_struct *task)
+{
+       struct cgroup_subsys_state *css;
+       struct pids_cgroup *pids;
+       int err;
+
+       css = task_css_check(current, pids_cgrp_id, true);
+       pids = css_pids(css);
+       err = pids_try_charge(pids, 1);
+       if (err) {
+               /* Only log the first time events_limit is incremented. */
+               if (atomic64_inc_return(&pids->events_limit) == 1) {
+                       pr_info("cgroup: fork rejected by pids controller in ");
+                       pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+                       pr_cont("\n");
+               }
+               cgroup_file_notify(&pids->events_file);
+       }
+       return err;
+}
+
+static void pids_cancel_fork(struct task_struct *task)
+{
+       struct cgroup_subsys_state *css;
+       struct pids_cgroup *pids;
+
+       css = task_css_check(current, pids_cgrp_id, true);
+       pids = css_pids(css);
+       pids_uncharge(pids, 1);
+}
+
+static void pids_free(struct task_struct *task)
+{
+       struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
+
+       pids_uncharge(pids, 1);
+}
+
+static ssize_t pids_max_write(struct kernfs_open_file *of, char *buf,
+                             size_t nbytes, loff_t off)
+{
+       struct cgroup_subsys_state *css = of_css(of);
+       struct pids_cgroup *pids = css_pids(css);
+       int64_t limit;
+       int err;
+
+       buf = strstrip(buf);
+       if (!strcmp(buf, PIDS_MAX_STR)) {
+               limit = PIDS_MAX;
+               goto set_limit;
+       }
+
+       err = kstrtoll(buf, 0, &limit);
+       if (err)
+               return err;
+
+       if (limit < 0 || limit >= PIDS_MAX)
+               return -EINVAL;
+
+set_limit:
+       /*
+        * Limit updates don't need to be mutex'd, since it isn't
+        * critical that any racing fork()s follow the new limit.
+        */
+       pids->limit = limit;
+       return nbytes;
+}
+
+static int pids_max_show(struct seq_file *sf, void *v)
+{
+       struct cgroup_subsys_state *css = seq_css(sf);
+       struct pids_cgroup *pids = css_pids(css);
+       int64_t limit = pids->limit;
+
+       if (limit >= PIDS_MAX)
+               seq_printf(sf, "%s\n", PIDS_MAX_STR);
+       else
+               seq_printf(sf, "%lld\n", limit);
+
+       return 0;
+}
+
+static s64 pids_current_read(struct cgroup_subsys_state *css,
+                            struct cftype *cft)
+{
+       struct pids_cgroup *pids = css_pids(css);
+
+       return atomic64_read(&pids->counter);
+}
+
+static int pids_events_show(struct seq_file *sf, void *v)
+{
+       struct pids_cgroup *pids = css_pids(seq_css(sf));
+
+       seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events_limit));
+       return 0;
+}
+
+static struct cftype pids_files[] = {
+       {
+               .name = "max",
+               .write = pids_max_write,
+               .seq_show = pids_max_show,
+               .flags = CFTYPE_NOT_ON_ROOT,
+       },
+       {
+               .name = "current",
+               .read_s64 = pids_current_read,
+               .flags = CFTYPE_NOT_ON_ROOT,
+       },
+       {
+               .name = "events",
+               .seq_show = pids_events_show,
+               .file_offset = offsetof(struct pids_cgroup, events_file),
+               .flags = CFTYPE_NOT_ON_ROOT,
+       },
+       { }     /* terminate */
+};
+
+struct cgroup_subsys pids_cgrp_subsys = {
+       .css_alloc      = pids_css_alloc,
+       .css_free       = pids_css_free,
+       .can_attach     = pids_can_attach,
+       .cancel_attach  = pids_cancel_attach,
+       .can_fork       = pids_can_fork,
+       .cancel_fork    = pids_cancel_fork,
+       .free           = pids_free,
+       .legacy_cftypes = pids_files,
+       .dfl_cftypes    = pids_files,
+};
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
new file mode 100644 (file)
index 0000000..defad3c
--- /dev/null
@@ -0,0 +1,619 @@
+/*
+ * RDMA resource limiting controller for cgroups.
+ *
+ * Used to allow a cgroup hierarchy to stop processes from consuming
+ * additional RDMA resources after a certain limit is reached.
+ *
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License. See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+#include <linux/cgroup.h>
+#include <linux/parser.h>
+#include <linux/cgroup_rdma.h>
+
+#define RDMACG_MAX_STR "max"
+
+/*
+ * Protects list of resource pools maintained on per cgroup basis
+ * and rdma device list.
+ */
+static DEFINE_MUTEX(rdmacg_mutex);
+static LIST_HEAD(rdmacg_devices);
+
+enum rdmacg_file_type {
+       RDMACG_RESOURCE_TYPE_MAX,
+       RDMACG_RESOURCE_TYPE_STAT,
+};
+
+/*
+ * resource table definition as to be seen by the user.
+ * Need to add entries to it when more resources are
+ * added/defined at IB verb/core layer.
+ */
+static char const *rdmacg_resource_names[] = {
+       [RDMACG_RESOURCE_HCA_HANDLE]    = "hca_handle",
+       [RDMACG_RESOURCE_HCA_OBJECT]    = "hca_object",
+};
+
+/* resource tracker for each resource of rdma cgroup */
+struct rdmacg_resource {
+       int max;
+       int usage;
+};
+
+/*
+ * resource pool object which represents per cgroup, per device
+ * resources. There are multiple instances of this object per cgroup,
+ * therefore it cannot be embedded within rdma_cgroup structure. It
+ * is maintained as list.
+ */
+struct rdmacg_resource_pool {
+       struct rdmacg_device    *device;
+       struct rdmacg_resource  resources[RDMACG_RESOURCE_MAX];
+
+       struct list_head        cg_node;
+       struct list_head        dev_node;
+
+       /* count active user tasks of this pool */
+       u64                     usage_sum;
+       /* total number counts which are set to max */
+       int                     num_max_cnt;
+};
+
+static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
+{
+       return container_of(css, struct rdma_cgroup, css);
+}
+
+static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg)
+{
+       return css_rdmacg(cg->css.parent);
+}
+
+static inline struct rdma_cgroup *get_current_rdmacg(void)
+{
+       return css_rdmacg(task_get_css(current, rdma_cgrp_id));
+}
+
+static void set_resource_limit(struct rdmacg_resource_pool *rpool,
+                              int index, int new_max)
+{
+       if (new_max == S32_MAX) {
+               if (rpool->resources[index].max != S32_MAX)
+                       rpool->num_max_cnt++;
+       } else {
+               if (rpool->resources[index].max == S32_MAX)
+                       rpool->num_max_cnt--;
+       }
+       rpool->resources[index].max = new_max;
+}
+
+static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool)
+{
+       int i;
+
+       for (i = 0; i < RDMACG_RESOURCE_MAX; i++)
+               set_resource_limit(rpool, i, S32_MAX);
+}
+
+static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
+{
+       lockdep_assert_held(&rdmacg_mutex);
+
+       list_del(&rpool->cg_node);
+       list_del(&rpool->dev_node);
+       kfree(rpool);
+}
+
+static struct rdmacg_resource_pool *
+find_cg_rpool_locked(struct rdma_cgroup *cg,
+                    struct rdmacg_device *device)
+
+{
+       struct rdmacg_resource_pool *pool;
+
+       lockdep_assert_held(&rdmacg_mutex);
+
+       list_for_each_entry(pool, &cg->rpools, cg_node)
+               if (pool->device == device)
+                       return pool;
+
+       return NULL;
+}
+
+static struct rdmacg_resource_pool *
+get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device)
+{
+       struct rdmacg_resource_pool *rpool;
+
+       rpool = find_cg_rpool_locked(cg, device);
+       if (rpool)
+               return rpool;
+
+       rpool = kzalloc(sizeof(*rpool), GFP_KERNEL);
+       if (!rpool)
+               return ERR_PTR(-ENOMEM);
+
+       rpool->device = device;
+       set_all_resource_max_limit(rpool);
+
+       INIT_LIST_HEAD(&rpool->cg_node);
+       INIT_LIST_HEAD(&rpool->dev_node);
+       list_add_tail(&rpool->cg_node, &cg->rpools);
+       list_add_tail(&rpool->dev_node, &device->rpools);
+       return rpool;
+}
+
+/**
+ * uncharge_cg_locked - uncharge resource for rdma cgroup
+ * @cg: pointer to cg to uncharge and all parents in hierarchy
+ * @device: pointer to rdmacg device
+ * @index: index of the resource to uncharge in cg (resource pool)
+ *
+ * It also frees the resource pool which was created as part of
+ * charging operation when there are no resources attached to
+ * resource pool.
+ */
+static void
+uncharge_cg_locked(struct rdma_cgroup *cg,
+                  struct rdmacg_device *device,
+                  enum rdmacg_resource_type index)
+{
+       struct rdmacg_resource_pool *rpool;
+
+       rpool = find_cg_rpool_locked(cg, device);
+
+       /*
+        * rpool cannot be null at this stage. Let kernel operate in case
+        * if there a bug in IB stack or rdma controller, instead of crashing
+        * the system.
+        */
+       if (unlikely(!rpool)) {
+               pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
+               return;
+       }
+
+       rpool->resources[index].usage--;
+
+       /*
+        * A negative count (or overflow) is invalid,
+        * it indicates a bug in the rdma controller.
+        */
+       WARN_ON_ONCE(rpool->resources[index].usage < 0);
+       rpool->usage_sum--;
+       if (rpool->usage_sum == 0 &&
+           rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
+               /*
+                * No user of the rpool and all entries are set to max, so
+                * safe to delete this rpool.
+                */
+               free_cg_rpool_locked(rpool);
+       }
+}
+
+/**
+ * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count
+ * @device: pointer to rdmacg device
+ * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup
+ *           stop uncharging
+ * @index: index of the resource to uncharge in cg in given resource pool
+ */
+static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
+                                    struct rdmacg_device *device,
+                                    struct rdma_cgroup *stop_cg,
+                                    enum rdmacg_resource_type index)
+{
+       struct rdma_cgroup *p;
+
+       mutex_lock(&rdmacg_mutex);
+
+       for (p = cg; p != stop_cg; p = parent_rdmacg(p))
+               uncharge_cg_locked(p, device, index);
+
+       mutex_unlock(&rdmacg_mutex);
+
+       css_put(&cg->css);
+}
+
+/**
+ * rdmacg_uncharge - hierarchically uncharge rdma resource count
+ * @device: pointer to rdmacg device
+ * @index: index of the resource to uncharge in cgroup in given resource pool
+ */
+void rdmacg_uncharge(struct rdma_cgroup *cg,
+                    struct rdmacg_device *device,
+                    enum rdmacg_resource_type index)
+{
+       if (index >= RDMACG_RESOURCE_MAX)
+               return;
+
+       rdmacg_uncharge_hierarchy(cg, device, NULL, index);
+}
+EXPORT_SYMBOL(rdmacg_uncharge);
+
+/**
+ * rdmacg_try_charge - hierarchically try to charge the rdma resource
+ * @rdmacg: pointer to rdma cgroup which will own this resource
+ * @device: pointer to rdmacg device
+ * @index: index of the resource to charge in cgroup (resource pool)
+ *
+ * This function follows charging resource in hierarchical way.
+ * It will fail if the charge would cause the new value to exceed the
+ * hierarchical limit.
+ * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL.
+ * Returns pointer to rdmacg for this resource when charging is successful.
+ *
+ * Charger needs to account resources on two criteria.
+ * (a) per cgroup & (b) per device resource usage.
+ * Per cgroup resource usage ensures that tasks of cgroup doesn't cross
+ * the configured limits. Per device provides granular configuration
+ * in multi device usage. It allocates resource pool in the hierarchy
+ * for each parent it come across for first resource. Later on resource
+ * pool will be available. Therefore it will be much faster thereon
+ * to charge/uncharge.
+ */
+int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
+                     struct rdmacg_device *device,
+                     enum rdmacg_resource_type index)
+{
+       struct rdma_cgroup *cg, *p;
+       struct rdmacg_resource_pool *rpool;
+       s64 new;
+       int ret = 0;
+
+       if (index >= RDMACG_RESOURCE_MAX)
+               return -EINVAL;
+
+       /*
+        * hold on to css, as cgroup can be removed but resource
+        * accounting happens on css.
+        */
+       cg = get_current_rdmacg();
+
+       mutex_lock(&rdmacg_mutex);
+       for (p = cg; p; p = parent_rdmacg(p)) {
+               rpool = get_cg_rpool_locked(p, device);
+               if (IS_ERR(rpool)) {
+                       ret = PTR_ERR(rpool);
+                       goto err;
+               } else {
+                       new = rpool->resources[index].usage + 1;
+                       if (new > rpool->resources[index].max) {
+                               ret = -EAGAIN;
+                               goto err;
+                       } else {
+                               rpool->resources[index].usage = new;
+                               rpool->usage_sum++;
+                       }
+               }
+       }
+       mutex_unlock(&rdmacg_mutex);
+
+       *rdmacg = cg;
+       return 0;
+
+err:
+       mutex_unlock(&rdmacg_mutex);
+       rdmacg_uncharge_hierarchy(cg, device, p, index);
+       return ret;
+}
+EXPORT_SYMBOL(rdmacg_try_charge);
+
+/**
+ * rdmacg_register_device - register rdmacg device to rdma controller.
+ * @device: pointer to rdmacg device whose resources need to be accounted.
+ *
+ * If IB stack wish a device to participate in rdma cgroup resource
+ * tracking, it must invoke this API to register with rdma cgroup before
+ * any user space application can start using the RDMA resources.
+ * Returns 0 on success or EINVAL when table length given is beyond
+ * supported size.
+ */
+int rdmacg_register_device(struct rdmacg_device *device)
+{
+       INIT_LIST_HEAD(&device->dev_node);
+       INIT_LIST_HEAD(&device->rpools);
+
+       mutex_lock(&rdmacg_mutex);
+       list_add_tail(&device->dev_node, &rdmacg_devices);
+       mutex_unlock(&rdmacg_mutex);
+       return 0;
+}
+EXPORT_SYMBOL(rdmacg_register_device);
+
+/**
+ * rdmacg_unregister_device - unregister rdmacg device from rdma controller.
+ * @device: pointer to rdmacg device which was previously registered with rdma
+ *          controller using rdmacg_register_device().
+ *
+ * IB stack must invoke this after all the resources of the IB device
+ * are destroyed and after ensuring that no more resources will be created
+ * when this API is invoked.
+ */
+void rdmacg_unregister_device(struct rdmacg_device *device)
+{
+       struct rdmacg_resource_pool *rpool, *tmp;
+
+       /*
+        * Synchronize with any active resource settings,
+        * usage query happening via configfs.
+        */
+       mutex_lock(&rdmacg_mutex);
+       list_del_init(&device->dev_node);
+
+       /*
+        * Now that this device is off the cgroup list, its safe to free
+        * all the rpool resources.
+        */
+       list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
+               free_cg_rpool_locked(rpool);
+
+       mutex_unlock(&rdmacg_mutex);
+}
+EXPORT_SYMBOL(rdmacg_unregister_device);
+
+static int parse_resource(char *c, int *intval)
+{
+       substring_t argstr;
+       const char **table = &rdmacg_resource_names[0];
+       char *name, *value = c;
+       size_t len;
+       int ret, i = 0;
+
+       name = strsep(&value, "=");
+       if (!name || !value)
+               return -EINVAL;
+
+       len = strlen(value);
+
+       for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
+               if (strcmp(table[i], name))
+                       continue;
+
+               argstr.from = value;
+               argstr.to = value + len;
+
+               ret = match_int(&argstr, intval);
+               if (ret >= 0) {
+                       if (*intval < 0)
+                               break;
+                       return i;
+               }
+               if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
+                       *intval = S32_MAX;
+                       return i;
+               }
+               break;
+       }
+       return -EINVAL;
+}
+
+static int rdmacg_parse_limits(char *options,
+                              int *new_limits, unsigned long *enables)
+{
+       char *c;
+       int err = -EINVAL;
+
+       /* parse resource options */
+       while ((c = strsep(&options, " ")) != NULL) {
+               int index, intval;
+
+               index = parse_resource(c, &intval);
+               if (index < 0)
+                       goto err;
+
+               new_limits[index] = intval;
+               *enables |= BIT(index);
+       }
+       return 0;
+
+err:
+       return err;
+}
+
+static struct rdmacg_device *rdmacg_get_device_locked(const char *name)
+{
+       struct rdmacg_device *device;
+
+       lockdep_assert_held(&rdmacg_mutex);
+
+       list_for_each_entry(device, &rdmacg_devices, dev_node)
+               if (!strcmp(name, device->name))
+                       return device;
+
+       return NULL;
+}
+
+static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
+                                      char *buf, size_t nbytes, loff_t off)
+{
+       struct rdma_cgroup *cg = css_rdmacg(of_css(of));
+       const char *dev_name;
+       struct rdmacg_resource_pool *rpool;
+       struct rdmacg_device *device;
+       char *options = strstrip(buf);
+       int *new_limits;
+       unsigned long enables = 0;
+       int i = 0, ret = 0;
+
+       /* extract the device name first */
+       dev_name = strsep(&options, " ");
+       if (!dev_name) {
+               ret = -EINVAL;
+               goto err;
+       }
+
+       new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL);
+       if (!new_limits) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       ret = rdmacg_parse_limits(options, new_limits, &enables);
+       if (ret)
+               goto parse_err;
+
+       /* acquire lock to synchronize with hot plug devices */
+       mutex_lock(&rdmacg_mutex);
+
+       device = rdmacg_get_device_locked(dev_name);
+       if (!device) {
+               ret = -ENODEV;
+               goto dev_err;
+       }
+
+       rpool = get_cg_rpool_locked(cg, device);
+       if (IS_ERR(rpool)) {
+               ret = PTR_ERR(rpool);
+               goto dev_err;
+       }
+
+       /* now set the new limits of the rpool */
+       for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
+               set_resource_limit(rpool, i, new_limits[i]);
+
+       if (rpool->usage_sum == 0 &&
+           rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
+               /*
+                * No user of the rpool and all entries are set to max, so
+                * safe to delete this rpool.
+                */
+               free_cg_rpool_locked(rpool);
+       }
+
+dev_err:
+       mutex_unlock(&rdmacg_mutex);
+
+parse_err:
+       kfree(new_limits);
+
+err:
+       return ret ?: nbytes;
+}
+
+static void print_rpool_values(struct seq_file *sf,
+                              struct rdmacg_resource_pool *rpool)
+{
+       enum rdmacg_file_type sf_type;
+       int i;
+       u32 value;
+
+       sf_type = seq_cft(sf)->private;
+
+       for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
+               seq_puts(sf, rdmacg_resource_names[i]);
+               seq_putc(sf, '=');
+               if (sf_type == RDMACG_RESOURCE_TYPE_MAX) {
+                       if (rpool)
+                               value = rpool->resources[i].max;
+                       else
+                               value = S32_MAX;
+               } else {
+                       if (rpool)
+                               value = rpool->resources[i].usage;
+                       else
+                               value = 0;
+               }
+
+               if (value == S32_MAX)
+                       seq_puts(sf, RDMACG_MAX_STR);
+               else
+                       seq_printf(sf, "%d", value);
+               seq_putc(sf, ' ');
+       }
+}
+
+static int rdmacg_resource_read(struct seq_file *sf, void *v)
+{
+       struct rdmacg_device *device;
+       struct rdmacg_resource_pool *rpool;
+       struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
+
+       mutex_lock(&rdmacg_mutex);
+
+       list_for_each_entry(device, &rdmacg_devices, dev_node) {
+               seq_printf(sf, "%s ", device->name);
+
+               rpool = find_cg_rpool_locked(cg, device);
+               print_rpool_values(sf, rpool);
+
+               seq_putc(sf, '\n');
+       }
+
+       mutex_unlock(&rdmacg_mutex);
+       return 0;
+}
+
+static struct cftype rdmacg_files[] = {
+       {
+               .name = "max",
+               .write = rdmacg_resource_set_max,
+               .seq_show = rdmacg_resource_read,
+               .private = RDMACG_RESOURCE_TYPE_MAX,
+               .flags = CFTYPE_NOT_ON_ROOT,
+       },
+       {
+               .name = "current",
+               .seq_show = rdmacg_resource_read,
+               .private = RDMACG_RESOURCE_TYPE_STAT,
+               .flags = CFTYPE_NOT_ON_ROOT,
+       },
+       { }     /* terminate */
+};
+
+static struct cgroup_subsys_state *
+rdmacg_css_alloc(struct cgroup_subsys_state *parent)
+{
+       struct rdma_cgroup *cg;
+
+       cg = kzalloc(sizeof(*cg), GFP_KERNEL);
+       if (!cg)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&cg->rpools);
+       return &cg->css;
+}
+
+static void rdmacg_css_free(struct cgroup_subsys_state *css)
+{
+       struct rdma_cgroup *cg = css_rdmacg(css);
+
+       kfree(cg);
+}
+
+/**
+ * rdmacg_css_offline - cgroup css_offline callback
+ * @css: css of interest
+ *
+ * This function is called when @css is about to go away and responsible
+ * for shooting down all rdmacg associated with @css. As part of that it
+ * marks all the resource pool entries to max value, so that when resources are
+ * uncharged, associated resource pool can be freed as well.
+ */
+static void rdmacg_css_offline(struct cgroup_subsys_state *css)
+{
+       struct rdma_cgroup *cg = css_rdmacg(css);
+       struct rdmacg_resource_pool *rpool;
+
+       mutex_lock(&rdmacg_mutex);
+
+       list_for_each_entry(rpool, &cg->rpools, cg_node)
+               set_all_resource_max_limit(rpool);
+
+       mutex_unlock(&rdmacg_mutex);
+}
+
+struct cgroup_subsys rdma_cgrp_subsys = {
+       .css_alloc      = rdmacg_css_alloc,
+       .css_free       = rdmacg_css_free,
+       .css_offline    = rdmacg_css_offline,
+       .legacy_cftypes = rdmacg_files,
+       .dfl_cftypes    = rdmacg_files,
+};
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
deleted file mode 100644 (file)
index 1b72d56..0000000
+++ /dev/null
@@ -1,481 +0,0 @@
-/*
- * cgroup_freezer.c -  control group freezer subsystem
- *
- * Copyright IBM Corporation, 2007
- *
- * Author : Cedric Le Goater <clg@fr.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/freezer.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-
-/*
- * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
- * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
- * for "THAWED".  FREEZING_PARENT is set if the parent freezer is FREEZING
- * for whatever reason.  IOW, a cgroup has FREEZING_PARENT set if one of
- * its ancestors has FREEZING_SELF set.
- */
-enum freezer_state_flags {
-       CGROUP_FREEZER_ONLINE   = (1 << 0), /* freezer is fully online */
-       CGROUP_FREEZING_SELF    = (1 << 1), /* this freezer is freezing */
-       CGROUP_FREEZING_PARENT  = (1 << 2), /* the parent freezer is freezing */
-       CGROUP_FROZEN           = (1 << 3), /* this and its descendants frozen */
-
-       /* mask for all FREEZING flags */
-       CGROUP_FREEZING         = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
-};
-
-struct freezer {
-       struct cgroup_subsys_state      css;
-       unsigned int                    state;
-};
-
-static DEFINE_MUTEX(freezer_mutex);
-
-static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
-{
-       return css ? container_of(css, struct freezer, css) : NULL;
-}
-
-static inline struct freezer *task_freezer(struct task_struct *task)
-{
-       return css_freezer(task_css(task, freezer_cgrp_id));
-}
-
-static struct freezer *parent_freezer(struct freezer *freezer)
-{
-       return css_freezer(freezer->css.parent);
-}
-
-bool cgroup_freezing(struct task_struct *task)
-{
-       bool ret;
-
-       rcu_read_lock();
-       ret = task_freezer(task)->state & CGROUP_FREEZING;
-       rcu_read_unlock();
-
-       return ret;
-}
-
-static const char *freezer_state_strs(unsigned int state)
-{
-       if (state & CGROUP_FROZEN)
-               return "FROZEN";
-       if (state & CGROUP_FREEZING)
-               return "FREEZING";
-       return "THAWED";
-};
-
-static struct cgroup_subsys_state *
-freezer_css_alloc(struct cgroup_subsys_state *parent_css)
-{
-       struct freezer *freezer;
-
-       freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
-       if (!freezer)
-               return ERR_PTR(-ENOMEM);
-
-       return &freezer->css;
-}
-
-/**
- * freezer_css_online - commit creation of a freezer css
- * @css: css being created
- *
- * We're committing to creation of @css.  Mark it online and inherit
- * parent's freezing state while holding both parent's and our
- * freezer->lock.
- */
-static int freezer_css_online(struct cgroup_subsys_state *css)
-{
-       struct freezer *freezer = css_freezer(css);
-       struct freezer *parent = parent_freezer(freezer);
-
-       mutex_lock(&freezer_mutex);
-
-       freezer->state |= CGROUP_FREEZER_ONLINE;
-
-       if (parent && (parent->state & CGROUP_FREEZING)) {
-               freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
-               atomic_inc(&system_freezing_cnt);
-       }
-
-       mutex_unlock(&freezer_mutex);
-       return 0;
-}
-
-/**
- * freezer_css_offline - initiate destruction of a freezer css
- * @css: css being destroyed
- *
- * @css is going away.  Mark it dead and decrement system_freezing_count if
- * it was holding one.
- */
-static void freezer_css_offline(struct cgroup_subsys_state *css)
-{
-       struct freezer *freezer = css_freezer(css);
-
-       mutex_lock(&freezer_mutex);
-
-       if (freezer->state & CGROUP_FREEZING)
-               atomic_dec(&system_freezing_cnt);
-
-       freezer->state = 0;
-
-       mutex_unlock(&freezer_mutex);
-}
-
-static void freezer_css_free(struct cgroup_subsys_state *css)
-{
-       kfree(css_freezer(css));
-}
-
-/*
- * Tasks can be migrated into a different freezer anytime regardless of its
- * current state.  freezer_attach() is responsible for making new tasks
- * conform to the current state.
- *
- * Freezer state changes and task migration are synchronized via
- * @freezer->lock.  freezer_attach() makes the new tasks conform to the
- * current state and all following state changes can see the new tasks.
- */
-static void freezer_attach(struct cgroup_taskset *tset)
-{
-       struct task_struct *task;
-       struct cgroup_subsys_state *new_css;
-
-       mutex_lock(&freezer_mutex);
-
-       /*
-        * Make the new tasks conform to the current state of @new_css.
-        * For simplicity, when migrating any task to a FROZEN cgroup, we
-        * revert it to FREEZING and let update_if_frozen() determine the
-        * correct state later.
-        *
-        * Tasks in @tset are on @new_css but may not conform to its
-        * current state before executing the following - !frozen tasks may
-        * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
-        */
-       cgroup_taskset_for_each(task, new_css, tset) {
-               struct freezer *freezer = css_freezer(new_css);
-
-               if (!(freezer->state & CGROUP_FREEZING)) {
-                       __thaw_task(task);
-               } else {
-                       freeze_task(task);
-                       /* clear FROZEN and propagate upwards */
-                       while (freezer && (freezer->state & CGROUP_FROZEN)) {
-                               freezer->state &= ~CGROUP_FROZEN;
-                               freezer = parent_freezer(freezer);
-                       }
-               }
-       }
-
-       mutex_unlock(&freezer_mutex);
-}
-
-/**
- * freezer_fork - cgroup post fork callback
- * @task: a task which has just been forked
- *
- * @task has just been created and should conform to the current state of
- * the cgroup_freezer it belongs to.  This function may race against
- * freezer_attach().  Losing to freezer_attach() means that we don't have
- * to do anything as freezer_attach() will put @task into the appropriate
- * state.
- */
-static void freezer_fork(struct task_struct *task)
-{
-       struct freezer *freezer;
-
-       /*
-        * The root cgroup is non-freezable, so we can skip locking the
-        * freezer.  This is safe regardless of race with task migration.
-        * If we didn't race or won, skipping is obviously the right thing
-        * to do.  If we lost and root is the new cgroup, noop is still the
-        * right thing to do.
-        */
-       if (task_css_is_root(task, freezer_cgrp_id))
-               return;
-
-       mutex_lock(&freezer_mutex);
-       rcu_read_lock();
-
-       freezer = task_freezer(task);
-       if (freezer->state & CGROUP_FREEZING)
-               freeze_task(task);
-
-       rcu_read_unlock();
-       mutex_unlock(&freezer_mutex);
-}
-
-/**
- * update_if_frozen - update whether a cgroup finished freezing
- * @css: css of interest
- *
- * Once FREEZING is initiated, transition to FROZEN is lazily updated by
- * calling this function.  If the current state is FREEZING but not FROZEN,
- * this function checks whether all tasks of this cgroup and the descendant
- * cgroups finished freezing and, if so, sets FROZEN.
- *
- * The caller is responsible for grabbing RCU read lock and calling
- * update_if_frozen() on all descendants prior to invoking this function.
- *
- * Task states and freezer state might disagree while tasks are being
- * migrated into or out of @css, so we can't verify task states against
- * @freezer state here.  See freezer_attach() for details.
- */
-static void update_if_frozen(struct cgroup_subsys_state *css)
-{
-       struct freezer *freezer = css_freezer(css);
-       struct cgroup_subsys_state *pos;
-       struct css_task_iter it;
-       struct task_struct *task;
-
-       lockdep_assert_held(&freezer_mutex);
-
-       if (!(freezer->state & CGROUP_FREEZING) ||
-           (freezer->state & CGROUP_FROZEN))
-               return;
-
-       /* are all (live) children frozen? */
-       rcu_read_lock();
-       css_for_each_child(pos, css) {
-               struct freezer *child = css_freezer(pos);
-
-               if ((child->state & CGROUP_FREEZER_ONLINE) &&
-                   !(child->state & CGROUP_FROZEN)) {
-                       rcu_read_unlock();
-                       return;
-               }
-       }
-       rcu_read_unlock();
-
-       /* are all tasks frozen? */
-       css_task_iter_start(css, &it);
-
-       while ((task = css_task_iter_next(&it))) {
-               if (freezing(task)) {
-                       /*
-                        * freezer_should_skip() indicates that the task
-                        * should be skipped when determining freezing
-                        * completion.  Consider it frozen in addition to
-                        * the usual frozen condition.
-                        */
-                       if (!frozen(task) && !freezer_should_skip(task))
-                               goto out_iter_end;
-               }
-       }
-
-       freezer->state |= CGROUP_FROZEN;
-out_iter_end:
-       css_task_iter_end(&it);
-}
-
-static int freezer_read(struct seq_file *m, void *v)
-{
-       struct cgroup_subsys_state *css = seq_css(m), *pos;
-
-       mutex_lock(&freezer_mutex);
-       rcu_read_lock();
-
-       /* update states bottom-up */
-       css_for_each_descendant_post(pos, css) {
-               if (!css_tryget_online(pos))
-                       continue;
-               rcu_read_unlock();
-
-               update_if_frozen(pos);
-
-               rcu_read_lock();
-               css_put(pos);
-       }
-
-       rcu_read_unlock();
-       mutex_unlock(&freezer_mutex);
-
-       seq_puts(m, freezer_state_strs(css_freezer(css)->state));
-       seq_putc(m, '\n');
-       return 0;
-}
-
-static void freeze_cgroup(struct freezer *freezer)
-{
-       struct css_task_iter it;
-       struct task_struct *task;
-
-       css_task_iter_start(&freezer->css, &it);
-       while ((task = css_task_iter_next(&it)))
-               freeze_task(task);
-       css_task_iter_end(&it);
-}
-
-static void unfreeze_cgroup(struct freezer *freezer)
-{
-       struct css_task_iter it;
-       struct task_struct *task;
-
-       css_task_iter_start(&freezer->css, &it);
-       while ((task = css_task_iter_next(&it)))
-               __thaw_task(task);
-       css_task_iter_end(&it);
-}
-
-/**
- * freezer_apply_state - apply state change to a single cgroup_freezer
- * @freezer: freezer to apply state change to
- * @freeze: whether to freeze or unfreeze
- * @state: CGROUP_FREEZING_* flag to set or clear
- *
- * Set or clear @state on @cgroup according to @freeze, and perform
- * freezing or thawing as necessary.
- */
-static void freezer_apply_state(struct freezer *freezer, bool freeze,
-                               unsigned int state)
-{
-       /* also synchronizes against task migration, see freezer_attach() */
-       lockdep_assert_held(&freezer_mutex);
-
-       if (!(freezer->state & CGROUP_FREEZER_ONLINE))
-               return;
-
-       if (freeze) {
-               if (!(freezer->state & CGROUP_FREEZING))
-                       atomic_inc(&system_freezing_cnt);
-               freezer->state |= state;
-               freeze_cgroup(freezer);
-       } else {
-               bool was_freezing = freezer->state & CGROUP_FREEZING;
-
-               freezer->state &= ~state;
-
-               if (!(freezer->state & CGROUP_FREEZING)) {
-                       if (was_freezing)
-                               atomic_dec(&system_freezing_cnt);
-                       freezer->state &= ~CGROUP_FROZEN;
-                       unfreeze_cgroup(freezer);
-               }
-       }
-}
-
-/**
- * freezer_change_state - change the freezing state of a cgroup_freezer
- * @freezer: freezer of interest
- * @freeze: whether to freeze or thaw
- *
- * Freeze or thaw @freezer according to @freeze.  The operations are
- * recursive - all descendants of @freezer will be affected.
- */
-static void freezer_change_state(struct freezer *freezer, bool freeze)
-{
-       struct cgroup_subsys_state *pos;
-
-       /*
-        * Update all its descendants in pre-order traversal.  Each
-        * descendant will try to inherit its parent's FREEZING state as
-        * CGROUP_FREEZING_PARENT.
-        */
-       mutex_lock(&freezer_mutex);
-       rcu_read_lock();
-       css_for_each_descendant_pre(pos, &freezer->css) {
-               struct freezer *pos_f = css_freezer(pos);
-               struct freezer *parent = parent_freezer(pos_f);
-
-               if (!css_tryget_online(pos))
-                       continue;
-               rcu_read_unlock();
-
-               if (pos_f == freezer)
-                       freezer_apply_state(pos_f, freeze,
-                                           CGROUP_FREEZING_SELF);
-               else
-                       freezer_apply_state(pos_f,
-                                           parent->state & CGROUP_FREEZING,
-                                           CGROUP_FREEZING_PARENT);
-
-               rcu_read_lock();
-               css_put(pos);
-       }
-       rcu_read_unlock();
-       mutex_unlock(&freezer_mutex);
-}
-
-static ssize_t freezer_write(struct kernfs_open_file *of,
-                            char *buf, size_t nbytes, loff_t off)
-{
-       bool freeze;
-
-       buf = strstrip(buf);
-
-       if (strcmp(buf, freezer_state_strs(0)) == 0)
-               freeze = false;
-       else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
-               freeze = true;
-       else
-               return -EINVAL;
-
-       freezer_change_state(css_freezer(of_css(of)), freeze);
-       return nbytes;
-}
-
-static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
-                                     struct cftype *cft)
-{
-       struct freezer *freezer = css_freezer(css);
-
-       return (bool)(freezer->state & CGROUP_FREEZING_SELF);
-}
-
-static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
-                                       struct cftype *cft)
-{
-       struct freezer *freezer = css_freezer(css);
-
-       return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
-}
-
-static struct cftype files[] = {
-       {
-               .name = "state",
-               .flags = CFTYPE_NOT_ON_ROOT,
-               .seq_show = freezer_read,
-               .write = freezer_write,
-       },
-       {
-               .name = "self_freezing",
-               .flags = CFTYPE_NOT_ON_ROOT,
-               .read_u64 = freezer_self_freezing_read,
-       },
-       {
-               .name = "parent_freezing",
-               .flags = CFTYPE_NOT_ON_ROOT,
-               .read_u64 = freezer_parent_freezing_read,
-       },
-       { }     /* terminate */
-};
-
-struct cgroup_subsys freezer_cgrp_subsys = {
-       .css_alloc      = freezer_css_alloc,
-       .css_online     = freezer_css_online,
-       .css_offline    = freezer_css_offline,
-       .css_free       = freezer_css_free,
-       .attach         = freezer_attach,
-       .fork           = freezer_fork,
-       .legacy_cftypes = files,
-};
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
deleted file mode 100644 (file)
index 2bd6737..0000000
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Process number limiting controller for cgroups.
- *
- * Used to allow a cgroup hierarchy to stop any new processes from fork()ing
- * after a certain limit is reached.
- *
- * Since it is trivial to hit the task limit without hitting any kmemcg limits
- * in place, PIDs are a fundamental resource. As such, PID exhaustion must be
- * preventable in the scope of a cgroup hierarchy by allowing resource limiting
- * of the number of tasks in a cgroup.
- *
- * In order to use the `pids` controller, set the maximum number of tasks in
- * pids.max (this is not available in the root cgroup for obvious reasons). The
- * number of processes currently in the cgroup is given by pids.current.
- * Organisational operations are not blocked by cgroup policies, so it is
- * possible to have pids.current > pids.max. However, it is not possible to
- * violate a cgroup policy through fork(). fork() will return -EAGAIN if forking
- * would cause a cgroup policy to be violated.
- *
- * To set a cgroup to have no limit, set pids.max to "max". This is the default
- * for all new cgroups (N.B. that PID limits are hierarchical, so the most
- * stringent limit in the hierarchy is followed).
- *
- * pids.current tracks all child cgroup hierarchies, so parent/pids.current is
- * a superset of parent/child/pids.current.
- *
- * Copyright (C) 2015 Aleksa Sarai <cyphar@cyphar.com>
- *
- * This file is subject to the terms and conditions of version 2 of the GNU
- * General Public License.  See the file COPYING in the main directory of the
- * Linux distribution for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/atomic.h>
-#include <linux/cgroup.h>
-#include <linux/slab.h>
-
-#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
-#define PIDS_MAX_STR "max"
-
-struct pids_cgroup {
-       struct cgroup_subsys_state      css;
-
-       /*
-        * Use 64-bit types so that we can safely represent "max" as
-        * %PIDS_MAX = (%PID_MAX_LIMIT + 1).
-        */
-       atomic64_t                      counter;
-       int64_t                         limit;
-
-       /* Handle for "pids.events" */
-       struct cgroup_file              events_file;
-
-       /* Number of times fork failed because limit was hit. */
-       atomic64_t                      events_limit;
-};
-
-static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
-{
-       return container_of(css, struct pids_cgroup, css);
-}
-
-static struct pids_cgroup *parent_pids(struct pids_cgroup *pids)
-{
-       return css_pids(pids->css.parent);
-}
-
-static struct cgroup_subsys_state *
-pids_css_alloc(struct cgroup_subsys_state *parent)
-{
-       struct pids_cgroup *pids;
-
-       pids = kzalloc(sizeof(struct pids_cgroup), GFP_KERNEL);
-       if (!pids)
-               return ERR_PTR(-ENOMEM);
-
-       pids->limit = PIDS_MAX;
-       atomic64_set(&pids->counter, 0);
-       atomic64_set(&pids->events_limit, 0);
-       return &pids->css;
-}
-
-static void pids_css_free(struct cgroup_subsys_state *css)
-{
-       kfree(css_pids(css));
-}
-
-/**
- * pids_cancel - uncharge the local pid count
- * @pids: the pid cgroup state
- * @num: the number of pids to cancel
- *
- * This function will WARN if the pid count goes under 0, because such a case is
- * a bug in the pids controller proper.
- */
-static void pids_cancel(struct pids_cgroup *pids, int num)
-{
-       /*
-        * A negative count (or overflow for that matter) is invalid,
-        * and indicates a bug in the `pids` controller proper.
-        */
-       WARN_ON_ONCE(atomic64_add_negative(-num, &pids->counter));
-}
-
-/**
- * pids_uncharge - hierarchically uncharge the pid count
- * @pids: the pid cgroup state
- * @num: the number of pids to uncharge
- */
-static void pids_uncharge(struct pids_cgroup *pids, int num)
-{
-       struct pids_cgroup *p;
-
-       for (p = pids; parent_pids(p); p = parent_pids(p))
-               pids_cancel(p, num);
-}
-
-/**
- * pids_charge - hierarchically charge the pid count
- * @pids: the pid cgroup state
- * @num: the number of pids to charge
- *
- * This function does *not* follow the pid limit set. It cannot fail and the new
- * pid count may exceed the limit. This is only used for reverting failed
- * attaches, where there is no other way out than violating the limit.
- */
-static void pids_charge(struct pids_cgroup *pids, int num)
-{
-       struct pids_cgroup *p;
-
-       for (p = pids; parent_pids(p); p = parent_pids(p))
-               atomic64_add(num, &p->counter);
-}
-
-/**
- * pids_try_charge - hierarchically try to charge the pid count
- * @pids: the pid cgroup state
- * @num: the number of pids to charge
- *
- * This function follows the set limit. It will fail if the charge would cause
- * the new value to exceed the hierarchical limit. Returns 0 if the charge
- * succeeded, otherwise -EAGAIN.
- */
-static int pids_try_charge(struct pids_cgroup *pids, int num)
-{
-       struct pids_cgroup *p, *q;
-
-       for (p = pids; parent_pids(p); p = parent_pids(p)) {
-               int64_t new = atomic64_add_return(num, &p->counter);
-
-               /*
-                * Since new is capped to the maximum number of pid_t, if
-                * p->limit is %PIDS_MAX then we know that this test will never
-                * fail.
-                */
-               if (new > p->limit)
-                       goto revert;
-       }
-
-       return 0;
-
-revert:
-       for (q = pids; q != p; q = parent_pids(q))
-               pids_cancel(q, num);
-       pids_cancel(p, num);
-
-       return -EAGAIN;
-}
-
-static int pids_can_attach(struct cgroup_taskset *tset)
-{
-       struct task_struct *task;
-       struct cgroup_subsys_state *dst_css;
-
-       cgroup_taskset_for_each(task, dst_css, tset) {
-               struct pids_cgroup *pids = css_pids(dst_css);
-               struct cgroup_subsys_state *old_css;
-               struct pids_cgroup *old_pids;
-
-               /*
-                * No need to pin @old_css between here and cancel_attach()
-                * because cgroup core protects it from being freed before
-                * the migration completes or fails.
-                */
-               old_css = task_css(task, pids_cgrp_id);
-               old_pids = css_pids(old_css);
-
-               pids_charge(pids, 1);
-               pids_uncharge(old_pids, 1);
-       }
-
-       return 0;
-}
-
-static void pids_cancel_attach(struct cgroup_taskset *tset)
-{
-       struct task_struct *task;
-       struct cgroup_subsys_state *dst_css;
-
-       cgroup_taskset_for_each(task, dst_css, tset) {
-               struct pids_cgroup *pids = css_pids(dst_css);
-               struct cgroup_subsys_state *old_css;
-               struct pids_cgroup *old_pids;
-
-               old_css = task_css(task, pids_cgrp_id);
-               old_pids = css_pids(old_css);
-
-               pids_charge(old_pids, 1);
-               pids_uncharge(pids, 1);
-       }
-}
-
-/*
- * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
- * on threadgroup_change_begin() held by the copy_process().
- */
-static int pids_can_fork(struct task_struct *task)
-{
-       struct cgroup_subsys_state *css;
-       struct pids_cgroup *pids;
-       int err;
-
-       css = task_css_check(current, pids_cgrp_id, true);
-       pids = css_pids(css);
-       err = pids_try_charge(pids, 1);
-       if (err) {
-               /* Only log the first time events_limit is incremented. */
-               if (atomic64_inc_return(&pids->events_limit) == 1) {
-                       pr_info("cgroup: fork rejected by pids controller in ");
-                       pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
-                       pr_cont("\n");
-               }
-               cgroup_file_notify(&pids->events_file);
-       }
-       return err;
-}
-
-static void pids_cancel_fork(struct task_struct *task)
-{
-       struct cgroup_subsys_state *css;
-       struct pids_cgroup *pids;
-
-       css = task_css_check(current, pids_cgrp_id, true);
-       pids = css_pids(css);
-       pids_uncharge(pids, 1);
-}
-
-static void pids_free(struct task_struct *task)
-{
-       struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
-
-       pids_uncharge(pids, 1);
-}
-
-static ssize_t pids_max_write(struct kernfs_open_file *of, char *buf,
-                             size_t nbytes, loff_t off)
-{
-       struct cgroup_subsys_state *css = of_css(of);
-       struct pids_cgroup *pids = css_pids(css);
-       int64_t limit;
-       int err;
-
-       buf = strstrip(buf);
-       if (!strcmp(buf, PIDS_MAX_STR)) {
-               limit = PIDS_MAX;
-               goto set_limit;
-       }
-
-       err = kstrtoll(buf, 0, &limit);
-       if (err)
-               return err;
-
-       if (limit < 0 || limit >= PIDS_MAX)
-               return -EINVAL;
-
-set_limit:
-       /*
-        * Limit updates don't need to be mutex'd, since it isn't
-        * critical that any racing fork()s follow the new limit.
-        */
-       pids->limit = limit;
-       return nbytes;
-}
-
-static int pids_max_show(struct seq_file *sf, void *v)
-{
-       struct cgroup_subsys_state *css = seq_css(sf);
-       struct pids_cgroup *pids = css_pids(css);
-       int64_t limit = pids->limit;
-
-       if (limit >= PIDS_MAX)
-               seq_printf(sf, "%s\n", PIDS_MAX_STR);
-       else
-               seq_printf(sf, "%lld\n", limit);
-
-       return 0;
-}
-
-static s64 pids_current_read(struct cgroup_subsys_state *css,
-                            struct cftype *cft)
-{
-       struct pids_cgroup *pids = css_pids(css);
-
-       return atomic64_read(&pids->counter);
-}
-
-static int pids_events_show(struct seq_file *sf, void *v)
-{
-       struct pids_cgroup *pids = css_pids(seq_css(sf));
-
-       seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events_limit));
-       return 0;
-}
-
-static struct cftype pids_files[] = {
-       {
-               .name = "max",
-               .write = pids_max_write,
-               .seq_show = pids_max_show,
-               .flags = CFTYPE_NOT_ON_ROOT,
-       },
-       {
-               .name = "current",
-               .read_s64 = pids_current_read,
-               .flags = CFTYPE_NOT_ON_ROOT,
-       },
-       {
-               .name = "events",
-               .seq_show = pids_events_show,
-               .file_offset = offsetof(struct pids_cgroup, events_file),
-               .flags = CFTYPE_NOT_ON_ROOT,
-       },
-       { }     /* terminate */
-};
-
-struct cgroup_subsys pids_cgrp_subsys = {
-       .css_alloc      = pids_css_alloc,
-       .css_free       = pids_css_free,
-       .can_attach     = pids_can_attach,
-       .cancel_attach  = pids_cancel_attach,
-       .can_fork       = pids_can_fork,
-       .cancel_fork    = pids_cancel_fork,
-       .free           = pids_free,
-       .legacy_cftypes = pids_files,
-       .dfl_cftypes    = pids_files,
-};
index 1a8f34f6360112bab3cc122b8027ec479768a7af..26a06e09a5bdec94dca885d62a97808e28d75eb8 100644 (file)
@@ -21,6 +21,7 @@ CONFIG_CP15_BARRIER_EMULATION=y
 CONFIG_DEFAULT_SECURITY_SELINUX=y
 CONFIG_EMBEDDED=y
 CONFIG_FB=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_INET6_AH=y
 CONFIG_INET6_ESP=y
@@ -129,6 +130,7 @@ CONFIG_PPP_DEFLATE=y
 CONFIG_PPP_MPPE=y
 CONFIG_PREEMPT=y
 CONFIG_QUOTA=y
+CONFIG_RANDOMIZE_BASE=y
 CONFIG_RTC_CLASS=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_SECCOMP=y
index 99127edc52043ce74c84536613ed0c3e3044ed7f..28ee064b674443b4afad906f4f26b6c1a839cc6e 100644 (file)
@@ -1,4 +1,5 @@
 #  KEEP ALPHABETICALLY SORTED
+# CONFIG_AIO is not set
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_LEGACY_PTYS is not set
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
deleted file mode 100644 (file)
index b308888..0000000
+++ /dev/null
@@ -1,2752 +0,0 @@
-/*
- *  kernel/cpuset.c
- *
- *  Processor and Memory placement constraints for sets of tasks.
- *
- *  Copyright (C) 2003 BULL SA.
- *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
- *  Copyright (C) 2006 Google, Inc
- *
- *  Portions derived from Patrick Mochel's sysfs code.
- *  sysfs is Copyright (c) 2001-3 Patrick Mochel
- *
- *  2003-10-10 Written by Simon Derr.
- *  2003-10-22 Updates by Stephen Hemminger.
- *  2004 May-July Rework by Paul Jackson.
- *  2006 Rework by Paul Menage to use generic cgroups
- *  2008 Rework of the scheduler domains and CPU hotplug handling
- *       by Max Krasnyansky
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of the Linux
- *  distribution for more details.
- */
-
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/cpuset.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/list.h>
-#include <linux/mempolicy.h>
-#include <linux/mm.h>
-#include <linux/memory.h>
-#include <linux/export.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/proc_fs.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/security.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/time.h>
-#include <linux/time64.h>
-#include <linux/backing-dev.h>
-#include <linux/sort.h>
-
-#include <linux/uaccess.h>
-#include <linux/atomic.h>
-#include <linux/mutex.h>
-#include <linux/cgroup.h>
-#include <linux/wait.h>
-
-DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
-
-/* See "Frequency meter" comments, below. */
-
-struct fmeter {
-       int cnt;                /* unprocessed events count */
-       int val;                /* most recent output value */
-       time64_t time;          /* clock (secs) when val computed */
-       spinlock_t lock;        /* guards read or write of above */
-};
-
-struct cpuset {
-       struct cgroup_subsys_state css;
-
-       unsigned long flags;            /* "unsigned long" so bitops work */
-
-       /*
-        * On default hierarchy:
-        *
-        * The user-configured masks can only be changed by writing to
-        * cpuset.cpus and cpuset.mems, and won't be limited by the
-        * parent masks.
-        *
-        * The effective masks is the real masks that apply to the tasks
-        * in the cpuset. They may be changed if the configured masks are
-        * changed or hotplug happens.
-        *
-        * effective_mask == configured_mask & parent's effective_mask,
-        * and if it ends up empty, it will inherit the parent's mask.
-        *
-        *
-        * On legacy hierachy:
-        *
-        * The user-configured masks are always the same with effective masks.
-        */
-
-       /* user-configured CPUs and Memory Nodes allow to tasks */
-       cpumask_var_t cpus_allowed;
-       nodemask_t mems_allowed;
-
-       /* effective CPUs and Memory Nodes allow to tasks */
-       cpumask_var_t effective_cpus;
-       nodemask_t effective_mems;
-
-       /*
-        * This is old Memory Nodes tasks took on.
-        *
-        * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
-        * - A new cpuset's old_mems_allowed is initialized when some
-        *   task is moved into it.
-        * - old_mems_allowed is used in cpuset_migrate_mm() when we change
-        *   cpuset.mems_allowed and have tasks' nodemask updated, and
-        *   then old_mems_allowed is updated to mems_allowed.
-        */
-       nodemask_t old_mems_allowed;
-
-       struct fmeter fmeter;           /* memory_pressure filter */
-
-       /*
-        * Tasks are being attached to this cpuset.  Used to prevent
-        * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
-        */
-       int attach_in_progress;
-
-       /* partition number for rebuild_sched_domains() */
-       int pn;
-
-       /* for custom sched domain */
-       int relax_domain_level;
-};
-
-static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
-{
-       return css ? container_of(css, struct cpuset, css) : NULL;
-}
-
-/* Retrieve the cpuset for a task */
-static inline struct cpuset *task_cs(struct task_struct *task)
-{
-       return css_cs(task_css(task, cpuset_cgrp_id));
-}
-
-static inline struct cpuset *parent_cs(struct cpuset *cs)
-{
-       return css_cs(cs->css.parent);
-}
-
-#ifdef CONFIG_NUMA
-static inline bool task_has_mempolicy(struct task_struct *task)
-{
-       return task->mempolicy;
-}
-#else
-static inline bool task_has_mempolicy(struct task_struct *task)
-{
-       return false;
-}
-#endif
-
-
-/* bits in struct cpuset flags field */
-typedef enum {
-       CS_ONLINE,
-       CS_CPU_EXCLUSIVE,
-       CS_MEM_EXCLUSIVE,
-       CS_MEM_HARDWALL,
-       CS_MEMORY_MIGRATE,
-       CS_SCHED_LOAD_BALANCE,
-       CS_SPREAD_PAGE,
-       CS_SPREAD_SLAB,
-} cpuset_flagbits_t;
-
-/* convenient tests for these bits */
-static inline bool is_cpuset_online(const struct cpuset *cs)
-{
-       return test_bit(CS_ONLINE, &cs->flags);
-}
-
-static inline int is_cpu_exclusive(const struct cpuset *cs)
-{
-       return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
-}
-
-static inline int is_mem_exclusive(const struct cpuset *cs)
-{
-       return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
-}
-
-static inline int is_mem_hardwall(const struct cpuset *cs)
-{
-       return test_bit(CS_MEM_HARDWALL, &cs->flags);
-}
-
-static inline int is_sched_load_balance(const struct cpuset *cs)
-{
-       return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
-}
-
-static inline int is_memory_migrate(const struct cpuset *cs)
-{
-       return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
-}
-
-static inline int is_spread_page(const struct cpuset *cs)
-{
-       return test_bit(CS_SPREAD_PAGE, &cs->flags);
-}
-
-static inline int is_spread_slab(const struct cpuset *cs)
-{
-       return test_bit(CS_SPREAD_SLAB, &cs->flags);
-}
-
-static struct cpuset top_cpuset = {
-       .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
-                 (1 << CS_MEM_EXCLUSIVE)),
-};
-
-/**
- * cpuset_for_each_child - traverse online children of a cpuset
- * @child_cs: loop cursor pointing to the current child
- * @pos_css: used for iteration
- * @parent_cs: target cpuset to walk children of
- *
- * Walk @child_cs through the online children of @parent_cs.  Must be used
- * with RCU read locked.
- */
-#define cpuset_for_each_child(child_cs, pos_css, parent_cs)            \
-       css_for_each_child((pos_css), &(parent_cs)->css)                \
-               if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
-
-/**
- * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
- * @des_cs: loop cursor pointing to the current descendant
- * @pos_css: used for iteration
- * @root_cs: target cpuset to walk ancestor of
- *
- * Walk @des_cs through the online descendants of @root_cs.  Must be used
- * with RCU read locked.  The caller may modify @pos_css by calling
- * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
- * iteration and the first node to be visited.
- */
-#define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)       \
-       css_for_each_descendant_pre((pos_css), &(root_cs)->css)         \
-               if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
-
-/*
- * There are two global locks guarding cpuset structures - cpuset_mutex and
- * callback_lock. We also require taking task_lock() when dereferencing a
- * task's cpuset pointer. See "The task_lock() exception", at the end of this
- * comment.
- *
- * A task must hold both locks to modify cpusets.  If a task holds
- * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
- * is the only task able to also acquire callback_lock and be able to
- * modify cpusets.  It can perform various checks on the cpuset structure
- * first, knowing nothing will change.  It can also allocate memory while
- * just holding cpuset_mutex.  While it is performing these checks, various
- * callback routines can briefly acquire callback_lock to query cpusets.
- * Once it is ready to make the changes, it takes callback_lock, blocking
- * everyone else.
- *
- * Calls to the kernel memory allocator can not be made while holding
- * callback_lock, as that would risk double tripping on callback_lock
- * from one of the callbacks into the cpuset code from within
- * __alloc_pages().
- *
- * If a task is only holding callback_lock, then it has read-only
- * access to cpusets.
- *
- * Now, the task_struct fields mems_allowed and mempolicy may be changed
- * by other task, we use alloc_lock in the task_struct fields to protect
- * them.
- *
- * The cpuset_common_file_read() handlers only hold callback_lock across
- * small pieces of code, such as when reading out possibly multi-word
- * cpumasks and nodemasks.
- *
- * Accessing a task's cpuset should be done in accordance with the
- * guidelines for accessing subsystem state in kernel/cgroup.c
- */
-
-static DEFINE_MUTEX(cpuset_mutex);
-static DEFINE_SPINLOCK(callback_lock);
-
-static struct workqueue_struct *cpuset_migrate_mm_wq;
-
-/*
- * CPU / memory hotplug is handled asynchronously.
- */
-static void cpuset_hotplug_workfn(struct work_struct *work);
-static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
-
-static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
-
-/*
- * This is ugly, but preserves the userspace API for existing cpuset
- * users. If someone tries to mount the "cpuset" filesystem, we
- * silently switch it to mount "cgroup" instead
- */
-static struct dentry *cpuset_mount(struct file_system_type *fs_type,
-                        int flags, const char *unused_dev_name, void *data)
-{
-       struct file_system_type *cgroup_fs = get_fs_type("cgroup");
-       struct dentry *ret = ERR_PTR(-ENODEV);
-       if (cgroup_fs) {
-               char mountopts[] =
-                       "cpuset,noprefix,"
-                       "release_agent=/sbin/cpuset_release_agent";
-               ret = cgroup_fs->mount(cgroup_fs, flags,
-                                          unused_dev_name, mountopts);
-               put_filesystem(cgroup_fs);
-       }
-       return ret;
-}
-
-static struct file_system_type cpuset_fs_type = {
-       .name = "cpuset",
-       .mount = cpuset_mount,
-};
-
-/*
- * Return in pmask the portion of a cpusets's cpus_allowed that
- * are online.  If none are online, walk up the cpuset hierarchy
- * until we find one that does have some online cpus.
- *
- * One way or another, we guarantee to return some non-empty subset
- * of cpu_online_mask.
- *
- * Call with callback_lock or cpuset_mutex held.
- */
-static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
-{
-       while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
-               cs = parent_cs(cs);
-               if (unlikely(!cs)) {
-                       /*
-                        * The top cpuset doesn't have any online cpu as a
-                        * consequence of a race between cpuset_hotplug_work
-                        * and cpu hotplug notifier.  But we know the top
-                        * cpuset's effective_cpus is on its way to to be
-                        * identical to cpu_online_mask.
-                        */
-                       cpumask_copy(pmask, cpu_online_mask);
-                       return;
-               }
-       }
-       cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
-}
-
-/*
- * Return in *pmask the portion of a cpusets's mems_allowed that
- * are online, with memory.  If none are online with memory, walk
- * up the cpuset hierarchy until we find one that does have some
- * online mems.  The top cpuset always has some mems online.
- *
- * One way or another, we guarantee to return some non-empty subset
- * of node_states[N_MEMORY].
- *
- * Call with callback_lock or cpuset_mutex held.
- */
-static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
-{
-       while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY]))
-               cs = parent_cs(cs);
-       nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]);
-}
-
-/*
- * update task's spread flag if cpuset's page/slab spread flag is set
- *
- * Call with callback_lock or cpuset_mutex held.
- */
-static void cpuset_update_task_spread_flag(struct cpuset *cs,
-                                       struct task_struct *tsk)
-{
-       if (is_spread_page(cs))
-               task_set_spread_page(tsk);
-       else
-               task_clear_spread_page(tsk);
-
-       if (is_spread_slab(cs))
-               task_set_spread_slab(tsk);
-       else
-               task_clear_spread_slab(tsk);
-}
-
-/*
- * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
- *
- * One cpuset is a subset of another if all its allowed CPUs and
- * Memory Nodes are a subset of the other, and its exclusive flags
- * are only set if the other's are set.  Call holding cpuset_mutex.
- */
-
-static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
-{
-       return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
-               nodes_subset(p->mems_allowed, q->mems_allowed) &&
-               is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
-               is_mem_exclusive(p) <= is_mem_exclusive(q);
-}
-
-/**
- * alloc_trial_cpuset - allocate a trial cpuset
- * @cs: the cpuset that the trial cpuset duplicates
- */
-static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
-{
-       struct cpuset *trial;
-
-       trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
-       if (!trial)
-               return NULL;
-
-       if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL))
-               goto free_cs;
-       if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL))
-               goto free_cpus;
-
-       cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
-       cpumask_copy(trial->effective_cpus, cs->effective_cpus);
-       return trial;
-
-free_cpus:
-       free_cpumask_var(trial->cpus_allowed);
-free_cs:
-       kfree(trial);
-       return NULL;
-}
-
-/**
- * free_trial_cpuset - free the trial cpuset
- * @trial: the trial cpuset to be freed
- */
-static void free_trial_cpuset(struct cpuset *trial)
-{
-       free_cpumask_var(trial->effective_cpus);
-       free_cpumask_var(trial->cpus_allowed);
-       kfree(trial);
-}
-
-/*
- * validate_change() - Used to validate that any proposed cpuset change
- *                    follows the structural rules for cpusets.
- *
- * If we replaced the flag and mask values of the current cpuset
- * (cur) with those values in the trial cpuset (trial), would
- * our various subset and exclusive rules still be valid?  Presumes
- * cpuset_mutex held.
- *
- * 'cur' is the address of an actual, in-use cpuset.  Operations
- * such as list traversal that depend on the actual address of the
- * cpuset in the list must use cur below, not trial.
- *
- * 'trial' is the address of bulk structure copy of cur, with
- * perhaps one or more of the fields cpus_allowed, mems_allowed,
- * or flags changed to new, trial values.
- *
- * Return 0 if valid, -errno if not.
- */
-
-static int validate_change(struct cpuset *cur, struct cpuset *trial)
-{
-       struct cgroup_subsys_state *css;
-       struct cpuset *c, *par;
-       int ret;
-
-       rcu_read_lock();
-
-       /* Each of our child cpusets must be a subset of us */
-       ret = -EBUSY;
-       cpuset_for_each_child(c, css, cur)
-               if (!is_cpuset_subset(c, trial))
-                       goto out;
-
-       /* Remaining checks don't apply to root cpuset */
-       ret = 0;
-       if (cur == &top_cpuset)
-               goto out;
-
-       par = parent_cs(cur);
-
-       /* On legacy hiearchy, we must be a subset of our parent cpuset. */
-       ret = -EACCES;
-       if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
-           !is_cpuset_subset(trial, par))
-               goto out;
-
-       /*
-        * If either I or some sibling (!= me) is exclusive, we can't
-        * overlap
-        */
-       ret = -EINVAL;
-       cpuset_for_each_child(c, css, par) {
-               if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
-                   c != cur &&
-                   cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
-                       goto out;
-               if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
-                   c != cur &&
-                   nodes_intersects(trial->mems_allowed, c->mems_allowed))
-                       goto out;
-       }
-
-       /*
-        * Cpusets with tasks - existing or newly being attached - can't
-        * be changed to have empty cpus_allowed or mems_allowed.
-        */
-       ret = -ENOSPC;
-       if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
-               if (!cpumask_empty(cur->cpus_allowed) &&
-                   cpumask_empty(trial->cpus_allowed))
-                       goto out;
-               if (!nodes_empty(cur->mems_allowed) &&
-                   nodes_empty(trial->mems_allowed))
-                       goto out;
-       }
-
-       /*
-        * We can't shrink if we won't have enough room for SCHED_DEADLINE
-        * tasks.
-        */
-       ret = -EBUSY;
-       if (is_cpu_exclusive(cur) &&
-           !cpuset_cpumask_can_shrink(cur->cpus_allowed,
-                                      trial->cpus_allowed))
-               goto out;
-
-       ret = 0;
-out:
-       rcu_read_unlock();
-       return ret;
-}
-
-#ifdef CONFIG_SMP
-/*
- * Helper routine for generate_sched_domains().
- * Do cpusets a, b have overlapping effective cpus_allowed masks?
- */
-static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
-{
-       return cpumask_intersects(a->effective_cpus, b->effective_cpus);
-}
-
-static void
-update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
-{
-       if (dattr->relax_domain_level < c->relax_domain_level)
-               dattr->relax_domain_level = c->relax_domain_level;
-       return;
-}
-
-static void update_domain_attr_tree(struct sched_domain_attr *dattr,
-                                   struct cpuset *root_cs)
-{
-       struct cpuset *cp;
-       struct cgroup_subsys_state *pos_css;
-
-       rcu_read_lock();
-       cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
-               /* skip the whole subtree if @cp doesn't have any CPU */
-               if (cpumask_empty(cp->cpus_allowed)) {
-                       pos_css = css_rightmost_descendant(pos_css);
-                       continue;
-               }
-
-               if (is_sched_load_balance(cp))
-                       update_domain_attr(dattr, cp);
-       }
-       rcu_read_unlock();
-}
-
-/*
- * generate_sched_domains()
- *
- * This function builds a partial partition of the systems CPUs
- * A 'partial partition' is a set of non-overlapping subsets whose
- * union is a subset of that set.
- * The output of this function needs to be passed to kernel/sched/core.c
- * partition_sched_domains() routine, which will rebuild the scheduler's
- * load balancing domains (sched domains) as specified by that partial
- * partition.
- *
- * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt
- * for a background explanation of this.
- *
- * Does not return errors, on the theory that the callers of this
- * routine would rather not worry about failures to rebuild sched
- * domains when operating in the severe memory shortage situations
- * that could cause allocation failures below.
- *
- * Must be called with cpuset_mutex held.
- *
- * The three key local variables below are:
- *    q  - a linked-list queue of cpuset pointers, used to implement a
- *        top-down scan of all cpusets.  This scan loads a pointer
- *        to each cpuset marked is_sched_load_balance into the
- *        array 'csa'.  For our purposes, rebuilding the schedulers
- *        sched domains, we can ignore !is_sched_load_balance cpusets.
- *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
- *        that need to be load balanced, for convenient iterative
- *        access by the subsequent code that finds the best partition,
- *        i.e the set of domains (subsets) of CPUs such that the
- *        cpus_allowed of every cpuset marked is_sched_load_balance
- *        is a subset of one of these domains, while there are as
- *        many such domains as possible, each as small as possible.
- * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
- *        the kernel/sched/core.c routine partition_sched_domains() in a
- *        convenient format, that can be easily compared to the prior
- *        value to determine what partition elements (sched domains)
- *        were changed (added or removed.)
- *
- * Finding the best partition (set of domains):
- *     The triple nested loops below over i, j, k scan over the
- *     load balanced cpusets (using the array of cpuset pointers in
- *     csa[]) looking for pairs of cpusets that have overlapping
- *     cpus_allowed, but which don't have the same 'pn' partition
- *     number and gives them in the same partition number.  It keeps
- *     looping on the 'restart' label until it can no longer find
- *     any such pairs.
- *
- *     The union of the cpus_allowed masks from the set of
- *     all cpusets having the same 'pn' value then form the one
- *     element of the partition (one sched domain) to be passed to
- *     partition_sched_domains().
- */
-static int generate_sched_domains(cpumask_var_t **domains,
-                       struct sched_domain_attr **attributes)
-{
-       struct cpuset *cp;      /* scans q */
-       struct cpuset **csa;    /* array of all cpuset ptrs */
-       int csn;                /* how many cpuset ptrs in csa so far */
-       int i, j, k;            /* indices for partition finding loops */
-       cpumask_var_t *doms;    /* resulting partition; i.e. sched domains */
-       cpumask_var_t non_isolated_cpus;  /* load balanced CPUs */
-       struct sched_domain_attr *dattr;  /* attributes for custom domains */
-       int ndoms = 0;          /* number of sched domains in result */
-       int nslot;              /* next empty doms[] struct cpumask slot */
-       struct cgroup_subsys_state *pos_css;
-
-       doms = NULL;
-       dattr = NULL;
-       csa = NULL;
-
-       if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
-               goto done;
-       cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
-
-       /* Special case for the 99% of systems with one, full, sched domain */
-       if (is_sched_load_balance(&top_cpuset)) {
-               ndoms = 1;
-               doms = alloc_sched_domains(ndoms);
-               if (!doms)
-                       goto done;
-
-               dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
-               if (dattr) {
-                       *dattr = SD_ATTR_INIT;
-                       update_domain_attr_tree(dattr, &top_cpuset);
-               }
-               cpumask_and(doms[0], top_cpuset.effective_cpus,
-                                    non_isolated_cpus);
-
-               goto done;
-       }
-
-       csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL);
-       if (!csa)
-               goto done;
-       csn = 0;
-
-       rcu_read_lock();
-       cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
-               if (cp == &top_cpuset)
-                       continue;
-               /*
-                * Continue traversing beyond @cp iff @cp has some CPUs and
-                * isn't load balancing.  The former is obvious.  The
-                * latter: All child cpusets contain a subset of the
-                * parent's cpus, so just skip them, and then we call
-                * update_domain_attr_tree() to calc relax_domain_level of
-                * the corresponding sched domain.
-                */
-               if (!cpumask_empty(cp->cpus_allowed) &&
-                   !(is_sched_load_balance(cp) &&
-                     cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
-                       continue;
-
-               if (is_sched_load_balance(cp))
-                       csa[csn++] = cp;
-
-               /* skip @cp's subtree */
-               pos_css = css_rightmost_descendant(pos_css);
-       }
-       rcu_read_unlock();
-
-       for (i = 0; i < csn; i++)
-               csa[i]->pn = i;
-       ndoms = csn;
-
-restart:
-       /* Find the best partition (set of sched domains) */
-       for (i = 0; i < csn; i++) {
-               struct cpuset *a = csa[i];
-               int apn = a->pn;
-
-               for (j = 0; j < csn; j++) {
-                       struct cpuset *b = csa[j];
-                       int bpn = b->pn;
-
-                       if (apn != bpn && cpusets_overlap(a, b)) {
-                               for (k = 0; k < csn; k++) {
-                                       struct cpuset *c = csa[k];
-
-                                       if (c->pn == bpn)
-                                               c->pn = apn;
-                               }
-                               ndoms--;        /* one less element */
-                               goto restart;
-                       }
-               }
-       }
-
-       /*
-        * Now we know how many domains to create.
-        * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
-        */
-       doms = alloc_sched_domains(ndoms);
-       if (!doms)
-               goto done;
-
-       /*
-        * The rest of the code, including the scheduler, can deal with
-        * dattr==NULL case. No need to abort if alloc fails.
-        */
-       dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
-
-       for (nslot = 0, i = 0; i < csn; i++) {
-               struct cpuset *a = csa[i];
-               struct cpumask *dp;
-               int apn = a->pn;
-
-               if (apn < 0) {
-                       /* Skip completed partitions */
-                       continue;
-               }
-
-               dp = doms[nslot];
-
-               if (nslot == ndoms) {
-                       static int warnings = 10;
-                       if (warnings) {
-                               pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n",
-                                       nslot, ndoms, csn, i, apn);
-                               warnings--;
-                       }
-                       continue;
-               }
-
-               cpumask_clear(dp);
-               if (dattr)
-                       *(dattr + nslot) = SD_ATTR_INIT;
-               for (j = i; j < csn; j++) {
-                       struct cpuset *b = csa[j];
-
-                       if (apn == b->pn) {
-                               cpumask_or(dp, dp, b->effective_cpus);
-                               cpumask_and(dp, dp, non_isolated_cpus);
-                               if (dattr)
-                                       update_domain_attr_tree(dattr + nslot, b);
-
-                               /* Done with this partition */
-                               b->pn = -1;
-                       }
-               }
-               nslot++;
-       }
-       BUG_ON(nslot != ndoms);
-
-done:
-       free_cpumask_var(non_isolated_cpus);
-       kfree(csa);
-
-       /*
-        * Fallback to the default domain if kmalloc() failed.
-        * See comments in partition_sched_domains().
-        */
-       if (doms == NULL)
-               ndoms = 1;
-
-       *domains    = doms;
-       *attributes = dattr;
-       return ndoms;
-}
-
-/*
- * Rebuild scheduler domains.
- *
- * If the flag 'sched_load_balance' of any cpuset with non-empty
- * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
- * which has that flag enabled, or if any cpuset with a non-empty
- * 'cpus' is removed, then call this routine to rebuild the
- * scheduler's dynamic sched domains.
- *
- * Call with cpuset_mutex held.  Takes get_online_cpus().
- */
-static void rebuild_sched_domains_locked(void)
-{
-       struct sched_domain_attr *attr;
-       cpumask_var_t *doms;
-       int ndoms;
-
-       lockdep_assert_held(&cpuset_mutex);
-       get_online_cpus();
-
-       /*
-        * We have raced with CPU hotplug. Don't do anything to avoid
-        * passing doms with offlined cpu to partition_sched_domains().
-        * Anyways, hotplug work item will rebuild sched domains.
-        */
-       if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
-               goto out;
-
-       /* Generate domain masks and attrs */
-       ndoms = generate_sched_domains(&doms, &attr);
-
-       /* Have scheduler rebuild the domains */
-       partition_sched_domains(ndoms, doms, attr);
-out:
-       put_online_cpus();
-}
-#else /* !CONFIG_SMP */
-static void rebuild_sched_domains_locked(void)
-{
-}
-#endif /* CONFIG_SMP */
-
-void rebuild_sched_domains(void)
-{
-       mutex_lock(&cpuset_mutex);
-       rebuild_sched_domains_locked();
-       mutex_unlock(&cpuset_mutex);
-}
-
-/**
- * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
- * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
- *
- * Iterate through each task of @cs updating its cpus_allowed to the
- * effective cpuset's.  As this function is called with cpuset_mutex held,
- * cpuset membership stays stable.
- */
-static void update_tasks_cpumask(struct cpuset *cs)
-{
-       struct css_task_iter it;
-       struct task_struct *task;
-
-       css_task_iter_start(&cs->css, &it);
-       while ((task = css_task_iter_next(&it)))
-               set_cpus_allowed_ptr(task, cs->effective_cpus);
-       css_task_iter_end(&it);
-}
-
-/*
- * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
- * @cs: the cpuset to consider
- * @new_cpus: temp variable for calculating new effective_cpus
- *
- * When congifured cpumask is changed, the effective cpumasks of this cpuset
- * and all its descendants need to be updated.
- *
- * On legacy hierachy, effective_cpus will be the same with cpu_allowed.
- *
- * Called with cpuset_mutex held
- */
-static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
-{
-       struct cpuset *cp;
-       struct cgroup_subsys_state *pos_css;
-       bool need_rebuild_sched_domains = false;
-
-       rcu_read_lock();
-       cpuset_for_each_descendant_pre(cp, pos_css, cs) {
-               struct cpuset *parent = parent_cs(cp);
-
-               cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus);
-
-               /*
-                * If it becomes empty, inherit the effective mask of the
-                * parent, which is guaranteed to have some CPUs.
-                */
-               if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
-                   cpumask_empty(new_cpus))
-                       cpumask_copy(new_cpus, parent->effective_cpus);
-
-               /* Skip the whole subtree if the cpumask remains the same. */
-               if (cpumask_equal(new_cpus, cp->effective_cpus)) {
-                       pos_css = css_rightmost_descendant(pos_css);
-                       continue;
-               }
-
-               if (!css_tryget_online(&cp->css))
-                       continue;
-               rcu_read_unlock();
-
-               spin_lock_irq(&callback_lock);
-               cpumask_copy(cp->effective_cpus, new_cpus);
-               spin_unlock_irq(&callback_lock);
-
-               WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
-                       !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
-
-               update_tasks_cpumask(cp);
-
-               /*
-                * If the effective cpumask of any non-empty cpuset is changed,
-                * we need to rebuild sched domains.
-                */
-               if (!cpumask_empty(cp->cpus_allowed) &&
-                   is_sched_load_balance(cp))
-                       need_rebuild_sched_domains = true;
-
-               rcu_read_lock();
-               css_put(&cp->css);
-       }
-       rcu_read_unlock();
-
-       if (need_rebuild_sched_domains)
-               rebuild_sched_domains_locked();
-}
-
-/**
- * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
- * @cs: the cpuset to consider
- * @trialcs: trial cpuset
- * @buf: buffer of cpu numbers written to this cpuset
- */
-static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
-                         const char *buf)
-{
-       int retval;
-
-       /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
-       if (cs == &top_cpuset)
-               return -EACCES;
-
-       /*
-        * An empty cpus_allowed is ok only if the cpuset has no tasks.
-        * Since cpulist_parse() fails on an empty mask, we special case
-        * that parsing.  The validate_change() call ensures that cpusets
-        * with tasks have cpus.
-        */
-       if (!*buf) {
-               cpumask_clear(trialcs->cpus_allowed);
-       } else {
-               retval = cpulist_parse(buf, trialcs->cpus_allowed);
-               if (retval < 0)
-                       return retval;
-
-               if (!cpumask_subset(trialcs->cpus_allowed,
-                                   top_cpuset.cpus_allowed))
-                       return -EINVAL;
-       }
-
-       /* Nothing to do if the cpus didn't change */
-       if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
-               return 0;
-
-       retval = validate_change(cs, trialcs);
-       if (retval < 0)
-               return retval;
-
-       spin_lock_irq(&callback_lock);
-       cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
-       spin_unlock_irq(&callback_lock);
-
-       /* use trialcs->cpus_allowed as a temp variable */
-       update_cpumasks_hier(cs, trialcs->cpus_allowed);
-       return 0;
-}
-
-/*
- * Migrate memory region from one set of nodes to another.  This is
- * performed asynchronously as it can be called from process migration path
- * holding locks involved in process management.  All mm migrations are
- * performed in the queued order and can be waited for by flushing
- * cpuset_migrate_mm_wq.
- */
-
-struct cpuset_migrate_mm_work {
-       struct work_struct      work;
-       struct mm_struct        *mm;
-       nodemask_t              from;
-       nodemask_t              to;
-};
-
-static void cpuset_migrate_mm_workfn(struct work_struct *work)
-{
-       struct cpuset_migrate_mm_work *mwork =
-               container_of(work, struct cpuset_migrate_mm_work, work);
-
-       /* on a wq worker, no need to worry about %current's mems_allowed */
-       do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
-       mmput(mwork->mm);
-       kfree(mwork);
-}
-
-static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
-                                                       const nodemask_t *to)
-{
-       struct cpuset_migrate_mm_work *mwork;
-
-       mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
-       if (mwork) {
-               mwork->mm = mm;
-               mwork->from = *from;
-               mwork->to = *to;
-               INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
-               queue_work(cpuset_migrate_mm_wq, &mwork->work);
-       } else {
-               mmput(mm);
-       }
-}
-
-static void cpuset_post_attach(void)
-{
-       flush_workqueue(cpuset_migrate_mm_wq);
-}
-
-/*
- * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
- * @tsk: the task to change
- * @newmems: new nodes that the task will be set
- *
- * In order to avoid seeing no nodes if the old and new nodes are disjoint,
- * we structure updates as setting all new allowed nodes, then clearing newly
- * disallowed ones.
- */
-static void cpuset_change_task_nodemask(struct task_struct *tsk,
-                                       nodemask_t *newmems)
-{
-       bool need_loop;
-
-       task_lock(tsk);
-       /*
-        * Determine if a loop is necessary if another thread is doing
-        * read_mems_allowed_begin().  If at least one node remains unchanged and
-        * tsk does not have a mempolicy, then an empty nodemask will not be
-        * possible when mems_allowed is larger than a word.
-        */
-       need_loop = task_has_mempolicy(tsk) ||
-                       !nodes_intersects(*newmems, tsk->mems_allowed);
-
-       if (need_loop) {
-               local_irq_disable();
-               write_seqcount_begin(&tsk->mems_allowed_seq);
-       }
-
-       nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
-       mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
-
-       mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
-       tsk->mems_allowed = *newmems;
-
-       if (need_loop) {
-               write_seqcount_end(&tsk->mems_allowed_seq);
-               local_irq_enable();
-       }
-
-       task_unlock(tsk);
-}
-
-static void *cpuset_being_rebound;
-
-/**
- * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
- * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
- *
- * Iterate through each task of @cs updating its mems_allowed to the
- * effective cpuset's.  As this function is called with cpuset_mutex held,
- * cpuset membership stays stable.
- */
-static void update_tasks_nodemask(struct cpuset *cs)
-{
-       static nodemask_t newmems;      /* protected by cpuset_mutex */
-       struct css_task_iter it;
-       struct task_struct *task;
-
-       cpuset_being_rebound = cs;              /* causes mpol_dup() rebind */
-
-       guarantee_online_mems(cs, &newmems);
-
-       /*
-        * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
-        * take while holding tasklist_lock.  Forks can happen - the
-        * mpol_dup() cpuset_being_rebound check will catch such forks,
-        * and rebind their vma mempolicies too.  Because we still hold
-        * the global cpuset_mutex, we know that no other rebind effort
-        * will be contending for the global variable cpuset_being_rebound.
-        * It's ok if we rebind the same mm twice; mpol_rebind_mm()
-        * is idempotent.  Also migrate pages in each mm to new nodes.
-        */
-       css_task_iter_start(&cs->css, &it);
-       while ((task = css_task_iter_next(&it))) {
-               struct mm_struct *mm;
-               bool migrate;
-
-               cpuset_change_task_nodemask(task, &newmems);
-
-               mm = get_task_mm(task);
-               if (!mm)
-                       continue;
-
-               migrate = is_memory_migrate(cs);
-
-               mpol_rebind_mm(mm, &cs->mems_allowed);
-               if (migrate)
-                       cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
-               else
-                       mmput(mm);
-       }
-       css_task_iter_end(&it);
-
-       /*
-        * All the tasks' nodemasks have been updated, update
-        * cs->old_mems_allowed.
-        */
-       cs->old_mems_allowed = newmems;
-
-       /* We're done rebinding vmas to this cpuset's new mems_allowed. */
-       cpuset_being_rebound = NULL;
-}
-
-/*
- * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
- * @cs: the cpuset to consider
- * @new_mems: a temp variable for calculating new effective_mems
- *
- * When configured nodemask is changed, the effective nodemasks of this cpuset
- * and all its descendants need to be updated.
- *
- * On legacy hiearchy, effective_mems will be the same with mems_allowed.
- *
- * Called with cpuset_mutex held
- */
-static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
-{
-       struct cpuset *cp;
-       struct cgroup_subsys_state *pos_css;
-
-       rcu_read_lock();
-       cpuset_for_each_descendant_pre(cp, pos_css, cs) {
-               struct cpuset *parent = parent_cs(cp);
-
-               nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems);
-
-               /*
-                * If it becomes empty, inherit the effective mask of the
-                * parent, which is guaranteed to have some MEMs.
-                */
-               if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
-                   nodes_empty(*new_mems))
-                       *new_mems = parent->effective_mems;
-
-               /* Skip the whole subtree if the nodemask remains the same. */
-               if (nodes_equal(*new_mems, cp->effective_mems)) {
-                       pos_css = css_rightmost_descendant(pos_css);
-                       continue;
-               }
-
-               if (!css_tryget_online(&cp->css))
-                       continue;
-               rcu_read_unlock();
-
-               spin_lock_irq(&callback_lock);
-               cp->effective_mems = *new_mems;
-               spin_unlock_irq(&callback_lock);
-
-               WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
-                       !nodes_equal(cp->mems_allowed, cp->effective_mems));
-
-               update_tasks_nodemask(cp);
-
-               rcu_read_lock();
-               css_put(&cp->css);
-       }
-       rcu_read_unlock();
-}
-
-/*
- * Handle user request to change the 'mems' memory placement
- * of a cpuset.  Needs to validate the request, update the
- * cpusets mems_allowed, and for each task in the cpuset,
- * update mems_allowed and rebind task's mempolicy and any vma
- * mempolicies and if the cpuset is marked 'memory_migrate',
- * migrate the tasks pages to the new memory.
- *
- * Call with cpuset_mutex held. May take callback_lock during call.
- * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
- * lock each such tasks mm->mmap_sem, scan its vma's and rebind
- * their mempolicies to the cpusets new mems_allowed.
- */
-static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
-                          const char *buf)
-{
-       int retval;
-
-       /*
-        * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
-        * it's read-only
-        */
-       if (cs == &top_cpuset) {
-               retval = -EACCES;
-               goto done;
-       }
-
-       /*
-        * An empty mems_allowed is ok iff there are no tasks in the cpuset.
-        * Since nodelist_parse() fails on an empty mask, we special case
-        * that parsing.  The validate_change() call ensures that cpusets
-        * with tasks have memory.
-        */
-       if (!*buf) {
-               nodes_clear(trialcs->mems_allowed);
-       } else {
-               retval = nodelist_parse(buf, trialcs->mems_allowed);
-               if (retval < 0)
-                       goto done;
-
-               if (!nodes_subset(trialcs->mems_allowed,
-                                 top_cpuset.mems_allowed)) {
-                       retval = -EINVAL;
-                       goto done;
-               }
-       }
-
-       if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
-               retval = 0;             /* Too easy - nothing to do */
-               goto done;
-       }
-       retval = validate_change(cs, trialcs);
-       if (retval < 0)
-               goto done;
-
-       spin_lock_irq(&callback_lock);
-       cs->mems_allowed = trialcs->mems_allowed;
-       spin_unlock_irq(&callback_lock);
-
-       /* use trialcs->mems_allowed as a temp variable */
-       update_nodemasks_hier(cs, &trialcs->mems_allowed);
-done:
-       return retval;
-}
-
-int current_cpuset_is_being_rebound(void)
-{
-       int ret;
-
-       rcu_read_lock();
-       ret = task_cs(current) == cpuset_being_rebound;
-       rcu_read_unlock();
-
-       return ret;
-}
-
-static int update_relax_domain_level(struct cpuset *cs, s64 val)
-{
-#ifdef CONFIG_SMP
-       if (val < -1 || val >= sched_domain_level_max)
-               return -EINVAL;
-#endif
-
-       if (val != cs->relax_domain_level) {
-               cs->relax_domain_level = val;
-               if (!cpumask_empty(cs->cpus_allowed) &&
-                   is_sched_load_balance(cs))
-                       rebuild_sched_domains_locked();
-       }
-
-       return 0;
-}
-
-/**
- * update_tasks_flags - update the spread flags of tasks in the cpuset.
- * @cs: the cpuset in which each task's spread flags needs to be changed
- *
- * Iterate through each task of @cs updating its spread flags.  As this
- * function is called with cpuset_mutex held, cpuset membership stays
- * stable.
- */
-static void update_tasks_flags(struct cpuset *cs)
-{
-       struct css_task_iter it;
-       struct task_struct *task;
-
-       css_task_iter_start(&cs->css, &it);
-       while ((task = css_task_iter_next(&it)))
-               cpuset_update_task_spread_flag(cs, task);
-       css_task_iter_end(&it);
-}
-
-/*
- * update_flag - read a 0 or a 1 in a file and update associated flag
- * bit:                the bit to update (see cpuset_flagbits_t)
- * cs:         the cpuset to update
- * turning_on:         whether the flag is being set or cleared
- *
- * Call with cpuset_mutex held.
- */
-
-static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
-                      int turning_on)
-{
-       struct cpuset *trialcs;
-       int balance_flag_changed;
-       int spread_flag_changed;
-       int err;
-
-       trialcs = alloc_trial_cpuset(cs);
-       if (!trialcs)
-               return -ENOMEM;
-
-       if (turning_on)
-               set_bit(bit, &trialcs->flags);
-       else
-               clear_bit(bit, &trialcs->flags);
-
-       err = validate_change(cs, trialcs);
-       if (err < 0)
-               goto out;
-
-       balance_flag_changed = (is_sched_load_balance(cs) !=
-                               is_sched_load_balance(trialcs));
-
-       spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
-                       || (is_spread_page(cs) != is_spread_page(trialcs)));
-
-       spin_lock_irq(&callback_lock);
-       cs->flags = trialcs->flags;
-       spin_unlock_irq(&callback_lock);
-
-       if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
-               rebuild_sched_domains_locked();
-
-       if (spread_flag_changed)
-               update_tasks_flags(cs);
-out:
-       free_trial_cpuset(trialcs);
-       return err;
-}
-
-/*
- * Frequency meter - How fast is some event occurring?
- *
- * These routines manage a digitally filtered, constant time based,
- * event frequency meter.  There are four routines:
- *   fmeter_init() - initialize a frequency meter.
- *   fmeter_markevent() - called each time the event happens.
- *   fmeter_getrate() - returns the recent rate of such events.
- *   fmeter_update() - internal routine used to update fmeter.
- *
- * A common data structure is passed to each of these routines,
- * which is used to keep track of the state required to manage the
- * frequency meter and its digital filter.
- *
- * The filter works on the number of events marked per unit time.
- * The filter is single-pole low-pass recursive (IIR).  The time unit
- * is 1 second.  Arithmetic is done using 32-bit integers scaled to
- * simulate 3 decimal digits of precision (multiplied by 1000).
- *
- * With an FM_COEF of 933, and a time base of 1 second, the filter
- * has a half-life of 10 seconds, meaning that if the events quit
- * happening, then the rate returned from the fmeter_getrate()
- * will be cut in half each 10 seconds, until it converges to zero.
- *
- * It is not worth doing a real infinitely recursive filter.  If more
- * than FM_MAXTICKS ticks have elapsed since the last filter event,
- * just compute FM_MAXTICKS ticks worth, by which point the level
- * will be stable.
- *
- * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
- * arithmetic overflow in the fmeter_update() routine.
- *
- * Given the simple 32 bit integer arithmetic used, this meter works
- * best for reporting rates between one per millisecond (msec) and
- * one per 32 (approx) seconds.  At constant rates faster than one
- * per msec it maxes out at values just under 1,000,000.  At constant
- * rates between one per msec, and one per second it will stabilize
- * to a value N*1000, where N is the rate of events per second.
- * At constant rates between one per second and one per 32 seconds,
- * it will be choppy, moving up on the seconds that have an event,
- * and then decaying until the next event.  At rates slower than
- * about one in 32 seconds, it decays all the way back to zero between
- * each event.
- */
-
-#define FM_COEF 933            /* coefficient for half-life of 10 secs */
-#define FM_MAXTICKS ((u32)99)   /* useless computing more ticks than this */
-#define FM_MAXCNT 1000000      /* limit cnt to avoid overflow */
-#define FM_SCALE 1000          /* faux fixed point scale */
-
-/* Initialize a frequency meter */
-static void fmeter_init(struct fmeter *fmp)
-{
-       fmp->cnt = 0;
-       fmp->val = 0;
-       fmp->time = 0;
-       spin_lock_init(&fmp->lock);
-}
-
-/* Internal meter update - process cnt events and update value */
-static void fmeter_update(struct fmeter *fmp)
-{
-       time64_t now;
-       u32 ticks;
-
-       now = ktime_get_seconds();
-       ticks = now - fmp->time;
-
-       if (ticks == 0)
-               return;
-
-       ticks = min(FM_MAXTICKS, ticks);
-       while (ticks-- > 0)
-               fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
-       fmp->time = now;
-
-       fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
-       fmp->cnt = 0;
-}
-
-/* Process any previous ticks, then bump cnt by one (times scale). */
-static void fmeter_markevent(struct fmeter *fmp)
-{
-       spin_lock(&fmp->lock);
-       fmeter_update(fmp);
-       fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
-       spin_unlock(&fmp->lock);
-}
-
-/* Process any previous ticks, then return current value. */
-static int fmeter_getrate(struct fmeter *fmp)
-{
-       int val;
-
-       spin_lock(&fmp->lock);
-       fmeter_update(fmp);
-       val = fmp->val;
-       spin_unlock(&fmp->lock);
-       return val;
-}
-
-static struct cpuset *cpuset_attach_old_cs;
-
-/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
-static int cpuset_can_attach(struct cgroup_taskset *tset)
-{
-       struct cgroup_subsys_state *css;
-       struct cpuset *cs;
-       struct task_struct *task;
-       int ret;
-
-       /* used later by cpuset_attach() */
-       cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
-       cs = css_cs(css);
-
-       mutex_lock(&cpuset_mutex);
-
-       /* allow moving tasks into an empty cpuset if on default hierarchy */
-       ret = -ENOSPC;
-       if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
-           (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
-               goto out_unlock;
-
-       cgroup_taskset_for_each(task, css, tset) {
-               ret = task_can_attach(task, cs->cpus_allowed);
-               if (ret)
-                       goto out_unlock;
-               ret = security_task_setscheduler(task);
-               if (ret)
-                       goto out_unlock;
-       }
-
-       /*
-        * Mark attach is in progress.  This makes validate_change() fail
-        * changes which zero cpus/mems_allowed.
-        */
-       cs->attach_in_progress++;
-       ret = 0;
-out_unlock:
-       mutex_unlock(&cpuset_mutex);
-       return ret;
-}
-
-static void cpuset_cancel_attach(struct cgroup_taskset *tset)
-{
-       struct cgroup_subsys_state *css;
-       struct cpuset *cs;
-
-       cgroup_taskset_first(tset, &css);
-       cs = css_cs(css);
-
-       mutex_lock(&cpuset_mutex);
-       css_cs(css)->attach_in_progress--;
-       mutex_unlock(&cpuset_mutex);
-}
-
-/*
- * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
- * but we can't allocate it dynamically there.  Define it global and
- * allocate from cpuset_init().
- */
-static cpumask_var_t cpus_attach;
-
-static void cpuset_attach(struct cgroup_taskset *tset)
-{
-       /* static buf protected by cpuset_mutex */
-       static nodemask_t cpuset_attach_nodemask_to;
-       struct task_struct *task;
-       struct task_struct *leader;
-       struct cgroup_subsys_state *css;
-       struct cpuset *cs;
-       struct cpuset *oldcs = cpuset_attach_old_cs;
-
-       cgroup_taskset_first(tset, &css);
-       cs = css_cs(css);
-
-       mutex_lock(&cpuset_mutex);
-
-       /* prepare for attach */
-       if (cs == &top_cpuset)
-               cpumask_copy(cpus_attach, cpu_possible_mask);
-       else
-               guarantee_online_cpus(cs, cpus_attach);
-
-       guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
-
-       cgroup_taskset_for_each(task, css, tset) {
-               /*
-                * can_attach beforehand should guarantee that this doesn't
-                * fail.  TODO: have a better way to handle failure here
-                */
-               WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
-               cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-               cpuset_update_task_spread_flag(cs, task);
-       }
-
-       /*
-        * Change mm for all threadgroup leaders. This is expensive and may
-        * sleep and should be moved outside migration path proper.
-        */
-       cpuset_attach_nodemask_to = cs->effective_mems;
-       cgroup_taskset_for_each_leader(leader, css, tset) {
-               struct mm_struct *mm = get_task_mm(leader);
-
-               if (mm) {
-                       mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
-
-                       /*
-                        * old_mems_allowed is the same with mems_allowed
-                        * here, except if this task is being moved
-                        * automatically due to hotplug.  In that case
-                        * @mems_allowed has been updated and is empty, so
-                        * @old_mems_allowed is the right nodesets that we
-                        * migrate mm from.
-                        */
-                       if (is_memory_migrate(cs))
-                               cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
-                                                 &cpuset_attach_nodemask_to);
-                       else
-                               mmput(mm);
-               }
-       }
-
-       cs->old_mems_allowed = cpuset_attach_nodemask_to;
-
-       cs->attach_in_progress--;
-       if (!cs->attach_in_progress)
-               wake_up(&cpuset_attach_wq);
-
-       mutex_unlock(&cpuset_mutex);
-}
-
-/* The various types of files and directories in a cpuset file system */
-
-typedef enum {
-       FILE_MEMORY_MIGRATE,
-       FILE_CPULIST,
-       FILE_MEMLIST,
-       FILE_EFFECTIVE_CPULIST,
-       FILE_EFFECTIVE_MEMLIST,
-       FILE_CPU_EXCLUSIVE,
-       FILE_MEM_EXCLUSIVE,
-       FILE_MEM_HARDWALL,
-       FILE_SCHED_LOAD_BALANCE,
-       FILE_SCHED_RELAX_DOMAIN_LEVEL,
-       FILE_MEMORY_PRESSURE_ENABLED,
-       FILE_MEMORY_PRESSURE,
-       FILE_SPREAD_PAGE,
-       FILE_SPREAD_SLAB,
-} cpuset_filetype_t;
-
-static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
-                           u64 val)
-{
-       struct cpuset *cs = css_cs(css);
-       cpuset_filetype_t type = cft->private;
-       int retval = 0;
-
-       mutex_lock(&cpuset_mutex);
-       if (!is_cpuset_online(cs)) {
-               retval = -ENODEV;
-               goto out_unlock;
-       }
-
-       switch (type) {
-       case FILE_CPU_EXCLUSIVE:
-               retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
-               break;
-       case FILE_MEM_EXCLUSIVE:
-               retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
-               break;
-       case FILE_MEM_HARDWALL:
-               retval = update_flag(CS_MEM_HARDWALL, cs, val);
-               break;
-       case FILE_SCHED_LOAD_BALANCE:
-               retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
-               break;
-       case FILE_MEMORY_MIGRATE:
-               retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
-               break;
-       case FILE_MEMORY_PRESSURE_ENABLED:
-               cpuset_memory_pressure_enabled = !!val;
-               break;
-       case FILE_SPREAD_PAGE:
-               retval = update_flag(CS_SPREAD_PAGE, cs, val);
-               break;
-       case FILE_SPREAD_SLAB:
-               retval = update_flag(CS_SPREAD_SLAB, cs, val);
-               break;
-       default:
-               retval = -EINVAL;
-               break;
-       }
-out_unlock:
-       mutex_unlock(&cpuset_mutex);
-       return retval;
-}
-
-static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
-                           s64 val)
-{
-       struct cpuset *cs = css_cs(css);
-       cpuset_filetype_t type = cft->private;
-       int retval = -ENODEV;
-
-       mutex_lock(&cpuset_mutex);
-       if (!is_cpuset_online(cs))
-               goto out_unlock;
-
-       switch (type) {
-       case FILE_SCHED_RELAX_DOMAIN_LEVEL:
-               retval = update_relax_domain_level(cs, val);
-               break;
-       default:
-               retval = -EINVAL;
-               break;
-       }
-out_unlock:
-       mutex_unlock(&cpuset_mutex);
-       return retval;
-}
-
-/*
- * Common handling for a write to a "cpus" or "mems" file.
- */
-static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
-                                   char *buf, size_t nbytes, loff_t off)
-{
-       struct cpuset *cs = css_cs(of_css(of));
-       struct cpuset *trialcs;
-       int retval = -ENODEV;
-
-       buf = strstrip(buf);
-
-       /*
-        * CPU or memory hotunplug may leave @cs w/o any execution
-        * resources, in which case the hotplug code asynchronously updates
-        * configuration and transfers all tasks to the nearest ancestor
-        * which can execute.
-        *
-        * As writes to "cpus" or "mems" may restore @cs's execution
-        * resources, wait for the previously scheduled operations before
-        * proceeding, so that we don't end up keep removing tasks added
-        * after execution capability is restored.
-        *
-        * cpuset_hotplug_work calls back into cgroup core via
-        * cgroup_transfer_tasks() and waiting for it from a cgroupfs
-        * operation like this one can lead to a deadlock through kernfs
-        * active_ref protection.  Let's break the protection.  Losing the
-        * protection is okay as we check whether @cs is online after
-        * grabbing cpuset_mutex anyway.  This only happens on the legacy
-        * hierarchies.
-        */
-       css_get(&cs->css);
-       kernfs_break_active_protection(of->kn);
-       flush_work(&cpuset_hotplug_work);
-
-       mutex_lock(&cpuset_mutex);
-       if (!is_cpuset_online(cs))
-               goto out_unlock;
-
-       trialcs = alloc_trial_cpuset(cs);
-       if (!trialcs) {
-               retval = -ENOMEM;
-               goto out_unlock;
-       }
-
-       switch (of_cft(of)->private) {
-       case FILE_CPULIST:
-               retval = update_cpumask(cs, trialcs, buf);
-               break;
-       case FILE_MEMLIST:
-               retval = update_nodemask(cs, trialcs, buf);
-               break;
-       default:
-               retval = -EINVAL;
-               break;
-       }
-
-       free_trial_cpuset(trialcs);
-out_unlock:
-       mutex_unlock(&cpuset_mutex);
-       kernfs_unbreak_active_protection(of->kn);
-       css_put(&cs->css);
-       flush_workqueue(cpuset_migrate_mm_wq);
-       return retval ?: nbytes;
-}
-
-/*
- * These ascii lists should be read in a single call, by using a user
- * buffer large enough to hold the entire map.  If read in smaller
- * chunks, there is no guarantee of atomicity.  Since the display format
- * used, list of ranges of sequential numbers, is variable length,
- * and since these maps can change value dynamically, one could read
- * gibberish by doing partial reads while a list was changing.
- */
-static int cpuset_common_seq_show(struct seq_file *sf, void *v)
-{
-       struct cpuset *cs = css_cs(seq_css(sf));
-       cpuset_filetype_t type = seq_cft(sf)->private;
-       int ret = 0;
-
-       spin_lock_irq(&callback_lock);
-
-       switch (type) {
-       case FILE_CPULIST:
-               seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
-               break;
-       case FILE_MEMLIST:
-               seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
-               break;
-       case FILE_EFFECTIVE_CPULIST:
-               seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus));
-               break;
-       case FILE_EFFECTIVE_MEMLIST:
-               seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
-               break;
-       default:
-               ret = -EINVAL;
-       }
-
-       spin_unlock_irq(&callback_lock);
-       return ret;
-}
-
-static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
-{
-       struct cpuset *cs = css_cs(css);
-       cpuset_filetype_t type = cft->private;
-       switch (type) {
-       case FILE_CPU_EXCLUSIVE:
-               return is_cpu_exclusive(cs);
-       case FILE_MEM_EXCLUSIVE:
-               return is_mem_exclusive(cs);
-       case FILE_MEM_HARDWALL:
-               return is_mem_hardwall(cs);
-       case FILE_SCHED_LOAD_BALANCE:
-               return is_sched_load_balance(cs);
-       case FILE_MEMORY_MIGRATE:
-               return is_memory_migrate(cs);
-       case FILE_MEMORY_PRESSURE_ENABLED:
-               return cpuset_memory_pressure_enabled;
-       case FILE_MEMORY_PRESSURE:
-               return fmeter_getrate(&cs->fmeter);
-       case FILE_SPREAD_PAGE:
-               return is_spread_page(cs);
-       case FILE_SPREAD_SLAB:
-               return is_spread_slab(cs);
-       default:
-               BUG();
-       }
-
-       /* Unreachable but makes gcc happy */
-       return 0;
-}
-
-static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
-{
-       struct cpuset *cs = css_cs(css);
-       cpuset_filetype_t type = cft->private;
-       switch (type) {
-       case FILE_SCHED_RELAX_DOMAIN_LEVEL:
-               return cs->relax_domain_level;
-       default:
-               BUG();
-       }
-
-       /* Unrechable but makes gcc happy */
-       return 0;
-}
-
-
-/*
- * for the common functions, 'private' gives the type of file
- */
-
-static struct cftype files[] = {
-       {
-               .name = "cpus",
-               .seq_show = cpuset_common_seq_show,
-               .write = cpuset_write_resmask,
-               .max_write_len = (100U + 6 * NR_CPUS),
-               .private = FILE_CPULIST,
-       },
-
-       {
-               .name = "mems",
-               .seq_show = cpuset_common_seq_show,
-               .write = cpuset_write_resmask,
-               .max_write_len = (100U + 6 * MAX_NUMNODES),
-               .private = FILE_MEMLIST,
-       },
-
-       {
-               .name = "effective_cpus",
-               .seq_show = cpuset_common_seq_show,
-               .private = FILE_EFFECTIVE_CPULIST,
-       },
-
-       {
-               .name = "effective_mems",
-               .seq_show = cpuset_common_seq_show,
-               .private = FILE_EFFECTIVE_MEMLIST,
-       },
-
-       {
-               .name = "cpu_exclusive",
-               .read_u64 = cpuset_read_u64,
-               .write_u64 = cpuset_write_u64,
-               .private = FILE_CPU_EXCLUSIVE,
-       },
-
-       {
-               .name = "mem_exclusive",
-               .read_u64 = cpuset_read_u64,
-               .write_u64 = cpuset_write_u64,
-               .private = FILE_MEM_EXCLUSIVE,
-       },
-
-       {
-               .name = "mem_hardwall",
-               .read_u64 = cpuset_read_u64,
-               .write_u64 = cpuset_write_u64,
-               .private = FILE_MEM_HARDWALL,
-       },
-
-       {
-               .name = "sched_load_balance",
-               .read_u64 = cpuset_read_u64,
-               .write_u64 = cpuset_write_u64,
-               .private = FILE_SCHED_LOAD_BALANCE,
-       },
-
-       {
-               .name = "sched_relax_domain_level",
-               .read_s64 = cpuset_read_s64,
-               .write_s64 = cpuset_write_s64,
-               .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
-       },
-
-       {
-               .name = "memory_migrate",
-               .read_u64 = cpuset_read_u64,
-               .write_u64 = cpuset_write_u64,
-               .private = FILE_MEMORY_MIGRATE,
-       },
-
-       {
-               .name = "memory_pressure",
-               .read_u64 = cpuset_read_u64,
-       },
-
-       {
-               .name = "memory_spread_page",
-               .read_u64 = cpuset_read_u64,
-               .write_u64 = cpuset_write_u64,
-               .private = FILE_SPREAD_PAGE,
-       },
-
-       {
-               .name = "memory_spread_slab",
-               .read_u64 = cpuset_read_u64,
-               .write_u64 = cpuset_write_u64,
-               .private = FILE_SPREAD_SLAB,
-       },
-
-       {
-               .name = "memory_pressure_enabled",
-               .flags = CFTYPE_ONLY_ON_ROOT,
-               .read_u64 = cpuset_read_u64,
-               .write_u64 = cpuset_write_u64,
-               .private = FILE_MEMORY_PRESSURE_ENABLED,
-       },
-
-       { }     /* terminate */
-};
-
-/*
- *     cpuset_css_alloc - allocate a cpuset css
- *     cgrp:   control group that the new cpuset will be part of
- */
-
-static struct cgroup_subsys_state *
-cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
-{
-       struct cpuset *cs;
-
-       if (!parent_css)
-               return &top_cpuset.css;
-
-       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
-       if (!cs)
-               return ERR_PTR(-ENOMEM);
-       if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
-               goto free_cs;
-       if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
-               goto free_cpus;
-
-       set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
-       cpumask_clear(cs->cpus_allowed);
-       nodes_clear(cs->mems_allowed);
-       cpumask_clear(cs->effective_cpus);
-       nodes_clear(cs->effective_mems);
-       fmeter_init(&cs->fmeter);
-       cs->relax_domain_level = -1;
-
-       return &cs->css;
-
-free_cpus:
-       free_cpumask_var(cs->cpus_allowed);
-free_cs:
-       kfree(cs);
-       return ERR_PTR(-ENOMEM);
-}
-
-static int cpuset_css_online(struct cgroup_subsys_state *css)
-{
-       struct cpuset *cs = css_cs(css);
-       struct cpuset *parent = parent_cs(cs);
-       struct cpuset *tmp_cs;
-       struct cgroup_subsys_state *pos_css;
-
-       if (!parent)
-               return 0;
-
-       mutex_lock(&cpuset_mutex);
-
-       set_bit(CS_ONLINE, &cs->flags);
-       if (is_spread_page(parent))
-               set_bit(CS_SPREAD_PAGE, &cs->flags);
-       if (is_spread_slab(parent))
-               set_bit(CS_SPREAD_SLAB, &cs->flags);
-
-       cpuset_inc();
-
-       spin_lock_irq(&callback_lock);
-       if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
-               cpumask_copy(cs->effective_cpus, parent->effective_cpus);
-               cs->effective_mems = parent->effective_mems;
-       }
-       spin_unlock_irq(&callback_lock);
-
-       if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
-               goto out_unlock;
-
-       /*
-        * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is
-        * set.  This flag handling is implemented in cgroup core for
-        * histrical reasons - the flag may be specified during mount.
-        *
-        * Currently, if any sibling cpusets have exclusive cpus or mem, we
-        * refuse to clone the configuration - thereby refusing the task to
-        * be entered, and as a result refusing the sys_unshare() or
-        * clone() which initiated it.  If this becomes a problem for some
-        * users who wish to allow that scenario, then this could be
-        * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
-        * (and likewise for mems) to the new cgroup.
-        */
-       rcu_read_lock();
-       cpuset_for_each_child(tmp_cs, pos_css, parent) {
-               if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
-                       rcu_read_unlock();
-                       goto out_unlock;
-               }
-       }
-       rcu_read_unlock();
-
-       spin_lock_irq(&callback_lock);
-       cs->mems_allowed = parent->mems_allowed;
-       cs->effective_mems = parent->mems_allowed;
-       cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
-       cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
-       spin_unlock_irq(&callback_lock);
-out_unlock:
-       mutex_unlock(&cpuset_mutex);
-       return 0;
-}
-
-/*
- * If the cpuset being removed has its flag 'sched_load_balance'
- * enabled, then simulate turning sched_load_balance off, which
- * will call rebuild_sched_domains_locked().
- */
-
-static void cpuset_css_offline(struct cgroup_subsys_state *css)
-{
-       struct cpuset *cs = css_cs(css);
-
-       mutex_lock(&cpuset_mutex);
-
-       if (is_sched_load_balance(cs))
-               update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
-
-       cpuset_dec();
-       clear_bit(CS_ONLINE, &cs->flags);
-
-       mutex_unlock(&cpuset_mutex);
-}
-
-static void cpuset_css_free(struct cgroup_subsys_state *css)
-{
-       struct cpuset *cs = css_cs(css);
-
-       free_cpumask_var(cs->effective_cpus);
-       free_cpumask_var(cs->cpus_allowed);
-       kfree(cs);
-}
-
-static void cpuset_bind(struct cgroup_subsys_state *root_css)
-{
-       mutex_lock(&cpuset_mutex);
-       spin_lock_irq(&callback_lock);
-
-       if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
-               cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
-               top_cpuset.mems_allowed = node_possible_map;
-       } else {
-               cpumask_copy(top_cpuset.cpus_allowed,
-                            top_cpuset.effective_cpus);
-               top_cpuset.mems_allowed = top_cpuset.effective_mems;
-       }
-
-       spin_unlock_irq(&callback_lock);
-       mutex_unlock(&cpuset_mutex);
-}
-
-/*
- * Make sure the new task conform to the current state of its parent,
- * which could have been changed by cpuset just after it inherits the
- * state from the parent and before it sits on the cgroup's task list.
- */
-static void cpuset_fork(struct task_struct *task)
-{
-       if (task_css_is_root(task, cpuset_cgrp_id))
-               return;
-
-       set_cpus_allowed_ptr(task, &current->cpus_allowed);
-       task->mems_allowed = current->mems_allowed;
-}
-
-struct cgroup_subsys cpuset_cgrp_subsys = {
-       .css_alloc      = cpuset_css_alloc,
-       .css_online     = cpuset_css_online,
-       .css_offline    = cpuset_css_offline,
-       .css_free       = cpuset_css_free,
-       .can_attach     = cpuset_can_attach,
-       .cancel_attach  = cpuset_cancel_attach,
-       .attach         = cpuset_attach,
-       .post_attach    = cpuset_post_attach,
-       .bind           = cpuset_bind,
-       .fork           = cpuset_fork,
-       .legacy_cftypes = files,
-       .early_init     = true,
-};
-
-/**
- * cpuset_init - initialize cpusets at system boot
- *
- * Description: Initialize top_cpuset and the cpuset internal file system,
- **/
-
-int __init cpuset_init(void)
-{
-       int err = 0;
-
-       if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
-               BUG();
-       if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
-               BUG();
-
-       cpumask_setall(top_cpuset.cpus_allowed);
-       nodes_setall(top_cpuset.mems_allowed);
-       cpumask_setall(top_cpuset.effective_cpus);
-       nodes_setall(top_cpuset.effective_mems);
-
-       fmeter_init(&top_cpuset.fmeter);
-       set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
-       top_cpuset.relax_domain_level = -1;
-
-       err = register_filesystem(&cpuset_fs_type);
-       if (err < 0)
-               return err;
-
-       if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
-               BUG();
-
-       return 0;
-}
-
-/*
- * If CPU and/or memory hotplug handlers, below, unplug any CPUs
- * or memory nodes, we need to walk over the cpuset hierarchy,
- * removing that CPU or node from all cpusets.  If this removes the
- * last CPU or node from a cpuset, then move the tasks in the empty
- * cpuset to its next-highest non-empty parent.
- */
-static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
-{
-       struct cpuset *parent;
-
-       /*
-        * Find its next-highest non-empty parent, (top cpuset
-        * has online cpus, so can't be empty).
-        */
-       parent = parent_cs(cs);
-       while (cpumask_empty(parent->cpus_allowed) ||
-                       nodes_empty(parent->mems_allowed))
-               parent = parent_cs(parent);
-
-       if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
-               pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
-               pr_cont_cgroup_name(cs->css.cgroup);
-               pr_cont("\n");
-       }
-}
-
-static void
-hotplug_update_tasks_legacy(struct cpuset *cs,
-                           struct cpumask *new_cpus, nodemask_t *new_mems,
-                           bool cpus_updated, bool mems_updated)
-{
-       bool is_empty;
-
-       spin_lock_irq(&callback_lock);
-       cpumask_copy(cs->cpus_allowed, new_cpus);
-       cpumask_copy(cs->effective_cpus, new_cpus);
-       cs->mems_allowed = *new_mems;
-       cs->effective_mems = *new_mems;
-       spin_unlock_irq(&callback_lock);
-
-       /*
-        * Don't call update_tasks_cpumask() if the cpuset becomes empty,
-        * as the tasks will be migratecd to an ancestor.
-        */
-       if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
-               update_tasks_cpumask(cs);
-       if (mems_updated && !nodes_empty(cs->mems_allowed))
-               update_tasks_nodemask(cs);
-
-       is_empty = cpumask_empty(cs->cpus_allowed) ||
-                  nodes_empty(cs->mems_allowed);
-
-       mutex_unlock(&cpuset_mutex);
-
-       /*
-        * Move tasks to the nearest ancestor with execution resources,
-        * This is full cgroup operation which will also call back into
-        * cpuset. Should be done outside any lock.
-        */
-       if (is_empty)
-               remove_tasks_in_empty_cpuset(cs);
-
-       mutex_lock(&cpuset_mutex);
-}
-
-static void
-hotplug_update_tasks(struct cpuset *cs,
-                    struct cpumask *new_cpus, nodemask_t *new_mems,
-                    bool cpus_updated, bool mems_updated)
-{
-       if (cpumask_empty(new_cpus))
-               cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus);
-       if (nodes_empty(*new_mems))
-               *new_mems = parent_cs(cs)->effective_mems;
-
-       spin_lock_irq(&callback_lock);
-       cpumask_copy(cs->effective_cpus, new_cpus);
-       cs->effective_mems = *new_mems;
-       spin_unlock_irq(&callback_lock);
-
-       if (cpus_updated)
-               update_tasks_cpumask(cs);
-       if (mems_updated)
-               update_tasks_nodemask(cs);
-}
-
-/**
- * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
- * @cs: cpuset in interest
- *
- * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
- * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
- * all its tasks are moved to the nearest ancestor with both resources.
- */
-static void cpuset_hotplug_update_tasks(struct cpuset *cs)
-{
-       static cpumask_t new_cpus;
-       static nodemask_t new_mems;
-       bool cpus_updated;
-       bool mems_updated;
-retry:
-       wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
-
-       mutex_lock(&cpuset_mutex);
-
-       /*
-        * We have raced with task attaching. We wait until attaching
-        * is finished, so we won't attach a task to an empty cpuset.
-        */
-       if (cs->attach_in_progress) {
-               mutex_unlock(&cpuset_mutex);
-               goto retry;
-       }
-
-       cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
-       nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
-
-       cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
-       mems_updated = !nodes_equal(new_mems, cs->effective_mems);
-
-       if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
-               hotplug_update_tasks(cs, &new_cpus, &new_mems,
-                                    cpus_updated, mems_updated);
-       else
-               hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
-                                           cpus_updated, mems_updated);
-
-       mutex_unlock(&cpuset_mutex);
-}
-
-/**
- * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
- *
- * This function is called after either CPU or memory configuration has
- * changed and updates cpuset accordingly.  The top_cpuset is always
- * synchronized to cpu_active_mask and N_MEMORY, which is necessary in
- * order to make cpusets transparent (of no affect) on systems that are
- * actively using CPU hotplug but making no active use of cpusets.
- *
- * Non-root cpusets are only affected by offlining.  If any CPUs or memory
- * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on
- * all descendants.
- *
- * Note that CPU offlining during suspend is ignored.  We don't modify
- * cpusets across suspend/resume cycles at all.
- */
-static void cpuset_hotplug_workfn(struct work_struct *work)
-{
-       static cpumask_t new_cpus;
-       static nodemask_t new_mems;
-       bool cpus_updated, mems_updated;
-       bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
-
-       mutex_lock(&cpuset_mutex);
-
-       /* fetch the available cpus/mems and find out which changed how */
-       cpumask_copy(&new_cpus, cpu_active_mask);
-       new_mems = node_states[N_MEMORY];
-
-       cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
-       mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
-
-       /* synchronize cpus_allowed to cpu_active_mask */
-       if (cpus_updated) {
-               spin_lock_irq(&callback_lock);
-               if (!on_dfl)
-                       cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
-               cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
-               spin_unlock_irq(&callback_lock);
-               /* we don't mess with cpumasks of tasks in top_cpuset */
-       }
-
-       /* synchronize mems_allowed to N_MEMORY */
-       if (mems_updated) {
-               spin_lock_irq(&callback_lock);
-               if (!on_dfl)
-                       top_cpuset.mems_allowed = new_mems;
-               top_cpuset.effective_mems = new_mems;
-               spin_unlock_irq(&callback_lock);
-               update_tasks_nodemask(&top_cpuset);
-       }
-
-       mutex_unlock(&cpuset_mutex);
-
-       /* if cpus or mems changed, we need to propagate to descendants */
-       if (cpus_updated || mems_updated) {
-               struct cpuset *cs;
-               struct cgroup_subsys_state *pos_css;
-
-               rcu_read_lock();
-               cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
-                       if (cs == &top_cpuset || !css_tryget_online(&cs->css))
-                               continue;
-                       rcu_read_unlock();
-
-                       cpuset_hotplug_update_tasks(cs);
-
-                       rcu_read_lock();
-                       css_put(&cs->css);
-               }
-               rcu_read_unlock();
-       }
-
-       /* rebuild sched domains if cpus_allowed has changed */
-       if (cpus_updated)
-               rebuild_sched_domains();
-}
-
-void cpuset_update_active_cpus(bool cpu_online)
-{
-       /*
-        * We're inside cpu hotplug critical region which usually nests
-        * inside cgroup synchronization.  Bounce actual hotplug processing
-        * to a work item to avoid reverse locking order.
-        *
-        * We still need to do partition_sched_domains() synchronously;
-        * otherwise, the scheduler will get confused and put tasks to the
-        * dead CPU.  Fall back to the default single domain.
-        * cpuset_hotplug_workfn() will rebuild it as necessary.
-        */
-       partition_sched_domains(1, NULL, NULL);
-       schedule_work(&cpuset_hotplug_work);
-}
-
-/*
- * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
- * Call this routine anytime after node_states[N_MEMORY] changes.
- * See cpuset_update_active_cpus() for CPU hotplug handling.
- */
-static int cpuset_track_online_nodes(struct notifier_block *self,
-                               unsigned long action, void *arg)
-{
-       schedule_work(&cpuset_hotplug_work);
-       return NOTIFY_OK;
-}
-
-static struct notifier_block cpuset_track_online_nodes_nb = {
-       .notifier_call = cpuset_track_online_nodes,
-       .priority = 10,         /* ??! */
-};
-
-/**
- * cpuset_init_smp - initialize cpus_allowed
- *
- * Description: Finish top cpuset after cpu, node maps are initialized
- */
-void __init cpuset_init_smp(void)
-{
-       cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
-       top_cpuset.mems_allowed = node_states[N_MEMORY];
-       top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
-
-       cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
-       top_cpuset.effective_mems = node_states[N_MEMORY];
-
-       register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
-
-       cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
-       BUG_ON(!cpuset_migrate_mm_wq);
-}
-
-/**
- * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
- * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
- * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
- *
- * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
- * attached to the specified @tsk.  Guaranteed to return some non-empty
- * subset of cpu_online_mask, even if this means going outside the
- * tasks cpuset.
- **/
-
-void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&callback_lock, flags);
-       rcu_read_lock();
-       guarantee_online_cpus(task_cs(tsk), pmask);
-       rcu_read_unlock();
-       spin_unlock_irqrestore(&callback_lock, flags);
-}
-
-void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
-{
-       rcu_read_lock();
-       do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus);
-       rcu_read_unlock();
-
-       /*
-        * We own tsk->cpus_allowed, nobody can change it under us.
-        *
-        * But we used cs && cs->cpus_allowed lockless and thus can
-        * race with cgroup_attach_task() or update_cpumask() and get
-        * the wrong tsk->cpus_allowed. However, both cases imply the
-        * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
-        * which takes task_rq_lock().
-        *
-        * If we are called after it dropped the lock we must see all
-        * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
-        * set any mask even if it is not right from task_cs() pov,
-        * the pending set_cpus_allowed_ptr() will fix things.
-        *
-        * select_fallback_rq() will fix things ups and set cpu_possible_mask
-        * if required.
-        */
-}
-
-void __init cpuset_init_current_mems_allowed(void)
-{
-       nodes_setall(current->mems_allowed);
-}
-
-/**
- * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
- * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
- *
- * Description: Returns the nodemask_t mems_allowed of the cpuset
- * attached to the specified @tsk.  Guaranteed to return some non-empty
- * subset of node_states[N_MEMORY], even if this means going outside the
- * tasks cpuset.
- **/
-
-nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
-{
-       nodemask_t mask;
-       unsigned long flags;
-
-       spin_lock_irqsave(&callback_lock, flags);
-       rcu_read_lock();
-       guarantee_online_mems(task_cs(tsk), &mask);
-       rcu_read_unlock();
-       spin_unlock_irqrestore(&callback_lock, flags);
-
-       return mask;
-}
-
-/**
- * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
- * @nodemask: the nodemask to be checked
- *
- * Are any of the nodes in the nodemask allowed in current->mems_allowed?
- */
-int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
-{
-       return nodes_intersects(*nodemask, current->mems_allowed);
-}
-
-/*
- * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
- * mem_hardwall ancestor to the specified cpuset.  Call holding
- * callback_lock.  If no ancestor is mem_exclusive or mem_hardwall
- * (an unusual configuration), then returns the root cpuset.
- */
-static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
-{
-       while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs))
-               cs = parent_cs(cs);
-       return cs;
-}
-
-/**
- * cpuset_node_allowed - Can we allocate on a memory node?
- * @node: is this an allowed node?
- * @gfp_mask: memory allocation flags
- *
- * If we're in interrupt, yes, we can always allocate.  If @node is set in
- * current's mems_allowed, yes.  If it's not a __GFP_HARDWALL request and this
- * node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
- * yes.  If current has access to memory reserves due to TIF_MEMDIE, yes.
- * Otherwise, no.
- *
- * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
- * and do not allow allocations outside the current tasks cpuset
- * unless the task has been OOM killed as is marked TIF_MEMDIE.
- * GFP_KERNEL allocations are not so marked, so can escape to the
- * nearest enclosing hardwalled ancestor cpuset.
- *
- * Scanning up parent cpusets requires callback_lock.  The
- * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
- * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
- * current tasks mems_allowed came up empty on the first pass over
- * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
- * cpuset are short of memory, might require taking the callback_lock.
- *
- * The first call here from mm/page_alloc:get_page_from_freelist()
- * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
- * so no allocation on a node outside the cpuset is allowed (unless
- * in interrupt, of course).
- *
- * The second pass through get_page_from_freelist() doesn't even call
- * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
- * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
- * in alloc_flags.  That logic and the checks below have the combined
- * affect that:
- *     in_interrupt - any node ok (current task context irrelevant)
- *     GFP_ATOMIC   - any node ok
- *     TIF_MEMDIE   - any node ok
- *     GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
- *     GFP_USER     - only nodes in current tasks mems allowed ok.
- */
-bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
-{
-       struct cpuset *cs;              /* current cpuset ancestors */
-       int allowed;                    /* is allocation in zone z allowed? */
-       unsigned long flags;
-
-       if (in_interrupt())
-               return true;
-       if (node_isset(node, current->mems_allowed))
-               return true;
-       /*
-        * Allow tasks that have access to memory reserves because they have
-        * been OOM killed to get memory anywhere.
-        */
-       if (unlikely(test_thread_flag(TIF_MEMDIE)))
-               return true;
-       if (gfp_mask & __GFP_HARDWALL)  /* If hardwall request, stop here */
-               return false;
-
-       if (current->flags & PF_EXITING) /* Let dying task have memory */
-               return true;
-
-       /* Not hardwall and node outside mems_allowed: scan up cpusets */
-       spin_lock_irqsave(&callback_lock, flags);
-
-       rcu_read_lock();
-       cs = nearest_hardwall_ancestor(task_cs(current));
-       allowed = node_isset(node, cs->mems_allowed);
-       rcu_read_unlock();
-
-       spin_unlock_irqrestore(&callback_lock, flags);
-       return allowed;
-}
-
-/**
- * cpuset_mem_spread_node() - On which node to begin search for a file page
- * cpuset_slab_spread_node() - On which node to begin search for a slab page
- *
- * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
- * tasks in a cpuset with is_spread_page or is_spread_slab set),
- * and if the memory allocation used cpuset_mem_spread_node()
- * to determine on which node to start looking, as it will for
- * certain page cache or slab cache pages such as used for file
- * system buffers and inode caches, then instead of starting on the
- * local node to look for a free page, rather spread the starting
- * node around the tasks mems_allowed nodes.
- *
- * We don't have to worry about the returned node being offline
- * because "it can't happen", and even if it did, it would be ok.
- *
- * The routines calling guarantee_online_mems() are careful to
- * only set nodes in task->mems_allowed that are online.  So it
- * should not be possible for the following code to return an
- * offline node.  But if it did, that would be ok, as this routine
- * is not returning the node where the allocation must be, only
- * the node where the search should start.  The zonelist passed to
- * __alloc_pages() will include all nodes.  If the slab allocator
- * is passed an offline node, it will fall back to the local node.
- * See kmem_cache_alloc_node().
- */
-
-static int cpuset_spread_node(int *rotor)
-{
-       return *rotor = next_node_in(*rotor, current->mems_allowed);
-}
-
-int cpuset_mem_spread_node(void)
-{
-       if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
-               current->cpuset_mem_spread_rotor =
-                       node_random(&current->mems_allowed);
-
-       return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
-}
-
-int cpuset_slab_spread_node(void)
-{
-       if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
-               current->cpuset_slab_spread_rotor =
-                       node_random(&current->mems_allowed);
-
-       return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
-}
-
-EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
-
-/**
- * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
- * @tsk1: pointer to task_struct of some task.
- * @tsk2: pointer to task_struct of some other task.
- *
- * Description: Return true if @tsk1's mems_allowed intersects the
- * mems_allowed of @tsk2.  Used by the OOM killer to determine if
- * one of the task's memory usage might impact the memory available
- * to the other.
- **/
-
-int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
-                                  const struct task_struct *tsk2)
-{
-       return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
-}
-
-/**
- * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
- *
- * Description: Prints current's name, cpuset name, and cached copy of its
- * mems_allowed to the kernel log.
- */
-void cpuset_print_current_mems_allowed(void)
-{
-       struct cgroup *cgrp;
-
-       rcu_read_lock();
-
-       cgrp = task_cs(current)->css.cgroup;
-       pr_info("%s cpuset=", current->comm);
-       pr_cont_cgroup_name(cgrp);
-       pr_cont(" mems_allowed=%*pbl\n",
-               nodemask_pr_args(&current->mems_allowed));
-
-       rcu_read_unlock();
-}
-
-/*
- * Collection of memory_pressure is suppressed unless
- * this flag is enabled by writing "1" to the special
- * cpuset file 'memory_pressure_enabled' in the root cpuset.
- */
-
-int cpuset_memory_pressure_enabled __read_mostly;
-
-/**
- * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
- *
- * Keep a running average of the rate of synchronous (direct)
- * page reclaim efforts initiated by tasks in each cpuset.
- *
- * This represents the rate at which some task in the cpuset
- * ran low on memory on all nodes it was allowed to use, and
- * had to enter the kernels page reclaim code in an effort to
- * create more free memory by tossing clean pages or swapping
- * or writing dirty pages.
- *
- * Display to user space in the per-cpuset read-only file
- * "memory_pressure".  Value displayed is an integer
- * representing the recent rate of entry into the synchronous
- * (direct) page reclaim by any task attached to the cpuset.
- **/
-
-void __cpuset_memory_pressure_bump(void)
-{
-       rcu_read_lock();
-       fmeter_markevent(&task_cs(current)->fmeter);
-       rcu_read_unlock();
-}
-
-#ifdef CONFIG_PROC_PID_CPUSET
-/*
- * proc_cpuset_show()
- *  - Print tasks cpuset path into seq_file.
- *  - Used for /proc/<pid>/cpuset.
- *  - No need to task_lock(tsk) on this tsk->cpuset reference, as it
- *    doesn't really matter if tsk->cpuset changes after we read it,
- *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
- *    anyway.
- */
-int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
-                    struct pid *pid, struct task_struct *tsk)
-{
-       char *buf;
-       struct cgroup_subsys_state *css;
-       int retval;
-
-       retval = -ENOMEM;
-       buf = kmalloc(PATH_MAX, GFP_KERNEL);
-       if (!buf)
-               goto out;
-
-       css = task_get_css(tsk, cpuset_cgrp_id);
-       retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
-                               current->nsproxy->cgroup_ns);
-       css_put(css);
-       if (retval >= PATH_MAX)
-               retval = -ENAMETOOLONG;
-       if (retval < 0)
-               goto out_free;
-       seq_puts(m, buf);
-       seq_putc(m, '\n');
-       retval = 0;
-out_free:
-       kfree(buf);
-out:
-       return retval;
-}
-#endif /* CONFIG_PROC_PID_CPUSET */
-
-/* Display task mems_allowed in /proc/<pid>/status file. */
-void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
-{
-       seq_printf(m, "Mems_allowed:\t%*pb\n",
-                  nodemask_pr_args(&task->mems_allowed));
-       seq_printf(m, "Mems_allowed_list:\t%*pbl\n",
-                  nodemask_pr_args(&task->mems_allowed));
-}
index 77a932b54a64fbeb2640b35c1cc4c096994bf1d7..1031bdf9f0125110088f0880699349c485d9bfcf 100644 (file)
@@ -455,7 +455,7 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
                                void __user *buffer, size_t *lenp,
                                loff_t *ppos)
 {
-       int ret = proc_dointvec(table, write, buffer, lenp, ppos);
+       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
        if (ret || !write)
                return ret;
@@ -3522,6 +3522,8 @@ static void perf_event_enable_on_exec(int ctxn)
        if (enabled) {
                clone_ctx = unclone_ctx(ctx);
                ctx_resched(cpuctx, ctx, event_type);
+       } else {
+               ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
        }
        perf_ctx_unlock(cpuctx, ctx);
 
@@ -4925,9 +4927,9 @@ unlock:
        rcu_read_unlock();
 }
 
-static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int perf_mmap_fault(struct vm_fault *vmf)
 {
-       struct perf_event *event = vma->vm_file->private_data;
+       struct perf_event *event = vmf->vma->vm_file->private_data;
        struct ring_buffer *rb;
        int ret = VM_FAULT_SIGBUS;
 
@@ -4950,7 +4952,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                goto unlock;
 
        get_page(vmf->page);
-       vmf->page->mapping = vma->vm_file->f_mapping;
+       vmf->page->mapping = vmf->vma->vm_file->f_mapping;
        vmf->page->index   = vmf->pgoff;
 
        ret = 0;
@@ -9955,6 +9957,7 @@ SYSCALL_DEFINE5(perf_event_open,
                 * of swizzling perf_event::ctx.
                 */
                perf_remove_from_context(group_leader, 0);
+               put_ctx(gctx);
 
                list_for_each_entry(sibling, &group_leader->sibling_list,
                                    group_entry) {
@@ -9993,13 +9996,6 @@ SYSCALL_DEFINE5(perf_event_open,
                perf_event__state_init(group_leader);
                perf_install_in_context(ctx, group_leader, group_leader->cpu);
                get_ctx(ctx);
-
-               /*
-                * Now that all events are installed in @ctx, nothing
-                * references @gctx anymore, so drop the last reference we have
-                * on it.
-                */
-               put_ctx(gctx);
        }
 
        /*
@@ -10959,5 +10955,11 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
        .css_alloc      = perf_cgroup_css_alloc,
        .css_free       = perf_cgroup_css_free,
        .attach         = perf_cgroup_attach,
+       /*
+        * Implicitly enable on dfl hierarchy so that perf events can
+        * always be filtered by cgroup2 path as long as perf_event
+        * controller is not mounted on a legacy hierarchy.
+        */
+       .implicit_on_dfl = true,
 };
 #endif /* CONFIG_CGROUP_PERF */
index d416f3baf3924d8093cf275fb1d290bf9d3564e4..d630f8ac4d2f2163292a38112b1272b5afc5358b 100644 (file)
@@ -153,14 +153,19 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
                                struct page *old_page, struct page *new_page)
 {
        struct mm_struct *mm = vma->vm_mm;
-       spinlock_t *ptl;
-       pte_t *ptep;
+       struct page_vma_mapped_walk pvmw = {
+               .page = old_page,
+               .vma = vma,
+               .address = addr,
+       };
        int err;
        /* For mmu_notifiers */
        const unsigned long mmun_start = addr;
        const unsigned long mmun_end   = addr + PAGE_SIZE;
        struct mem_cgroup *memcg;
 
+       VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
+
        err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
                        false);
        if (err)
@@ -171,11 +176,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        err = -EAGAIN;
-       ptep = page_check_address(old_page, mm, addr, &ptl, 0);
-       if (!ptep) {
+       if (!page_vma_mapped_walk(&pvmw)) {
                mem_cgroup_cancel_charge(new_page, memcg, false);
                goto unlock;
        }
+       VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
 
        get_page(new_page);
        page_add_new_anon_rmap(new_page, vma, addr, false);
@@ -187,14 +192,15 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
                inc_mm_counter(mm, MM_ANONPAGES);
        }
 
-       flush_cache_page(vma, addr, pte_pfn(*ptep));
-       ptep_clear_flush_notify(vma, addr, ptep);
-       set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
+       flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
+       ptep_clear_flush_notify(vma, addr, pvmw.pte);
+       set_pte_at_notify(mm, addr, pvmw.pte,
+                       mk_pte(new_page, vma->vm_page_prot));
 
        page_remove_rmap(old_page, false);
        if (!page_mapped(old_page))
                try_to_free_swap(old_page);
-       pte_unmap_unlock(ptep, ptl);
+       page_vma_mapped_walk_done(&pvmw);
 
        if (vma->vm_flags & VM_LOCKED)
                munlock_vma_page(old_page);
@@ -300,8 +306,8 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 
 retry:
        /* Read the page with vaddr into memory */
-       ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
-                       &vma, NULL);
+       ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+                       FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
        if (ret <= 0)
                return ret;
 
@@ -741,7 +747,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
                        continue;
                }
 
-               if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
+               if (!mmget_not_zero(vma->vm_mm))
                        continue;
 
                info = prev;
index 9960accbf2ab0863643a5ee38b0040043f424d20..8a768a3672a555e6e22f89eb81582e0d5b2aa97e 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
+#include <linux/userfaultfd_k.h>
 #include <linux/init_task.h>
 #include <linux/perf_event.h>
 #include <trace/events/sched.h>
@@ -538,7 +539,7 @@ static void exit_mm(void)
                __set_current_state(TASK_RUNNING);
                down_read(&mm->mmap_sem);
        }
-       atomic_inc(&mm->mm_count);
+       mmgrab(mm);
        BUG_ON(mm != current->active_mm);
        /* more a memory barrier than a real lock */
        task_lock(current);
@@ -547,6 +548,7 @@ static void exit_mm(void)
        enter_lazy_tlb(mm, current);
        task_unlock(current);
        mm_update_next_owner(mm);
+       userfaultfd_exit(mm);
        mmput(mm);
        if (test_thread_flag(TIF_MEMDIE))
                exit_oom_victim();
index 348fe73155bc280123d7c961455187b6b640fb67..246bf9aaf9dfddf4632f6994ab6e2bcdd8a02434 100644 (file)
@@ -1000,7 +1000,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
                if (task->flags & PF_KTHREAD)
                        mm = NULL;
                else
-                       atomic_inc(&mm->mm_users);
+                       mmget(mm);
        }
        task_unlock(task);
        return mm;
@@ -1188,7 +1188,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
        vmacache_flush(tsk);
 
        if (clone_flags & CLONE_VM) {
-               atomic_inc(&oldmm->mm_users);
+               mmget(oldmm);
                mm = oldmm;
                goto good_mm;
        }
index cdf3650361414e39e97018012871579c1eda76f6..b687cb22301ce0dab9307156651ef69172170982 100644 (file)
@@ -338,7 +338,7 @@ static inline bool should_fail_futex(bool fshared)
 
 static inline void futex_get_mm(union futex_key *key)
 {
-       atomic_inc(&key->private.mm->mm_count);
+       mmgrab(key->private.mm);
        /*
         * Ensure futex_get_mm() implies a full barrier such that
         * get_futex_key() implies a full barrier. This is relied upon
index 6b669593e7eb18b18743a771415976a8c17b6920..944d068b6c4887f0b07df10ac42825d78fed4db6 100644 (file)
@@ -353,7 +353,7 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
                return 0;
 
        /*
-        * Preserve the managed affinity setting and an userspace affinity
+        * Preserve the managed affinity setting and a userspace affinity
         * setup, but make sure that one of the targets is online.
         */
        if (irqd_affinity_is_managed(&desc->irq_data) ||
index a9b8cf50059151c17f63d35cf4c622ae8b72f131..6c9cb208ac4827ea6141d3d04864fbd4560ec8b9 100644 (file)
@@ -236,12 +236,28 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry
 
 static inline struct jump_entry *static_key_entries(struct static_key *key)
 {
-       return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
+       WARN_ON_ONCE(key->type & JUMP_TYPE_LINKED);
+       return (struct jump_entry *)(key->type & ~JUMP_TYPE_MASK);
 }
 
 static inline bool static_key_type(struct static_key *key)
 {
-       return (unsigned long)key->entries & JUMP_TYPE_MASK;
+       return key->type & JUMP_TYPE_TRUE;
+}
+
+static inline bool static_key_linked(struct static_key *key)
+{
+       return key->type & JUMP_TYPE_LINKED;
+}
+
+static inline void static_key_clear_linked(struct static_key *key)
+{
+       key->type &= ~JUMP_TYPE_LINKED;
+}
+
+static inline void static_key_set_linked(struct static_key *key)
+{
+       key->type |= JUMP_TYPE_LINKED;
 }
 
 static inline struct static_key *jump_entry_key(struct jump_entry *entry)
@@ -254,6 +270,26 @@ static bool jump_entry_branch(struct jump_entry *entry)
        return (unsigned long)entry->key & 1UL;
 }
 
+/***
+ * A 'struct static_key' uses a union such that it either points directly
+ * to a table of 'struct jump_entry' or to a linked list of modules which in
+ * turn point to 'struct jump_entry' tables.
+ *
+ * The two lower bits of the pointer are used to keep track of which pointer
+ * type is in use and to store the initial branch direction, we use an access
+ * function which preserves these bits.
+ */
+static void static_key_set_entries(struct static_key *key,
+                                  struct jump_entry *entries)
+{
+       unsigned long type;
+
+       WARN_ON_ONCE((unsigned long)entries & JUMP_TYPE_MASK);
+       type = key->type & JUMP_TYPE_MASK;
+       key->entries = entries;
+       key->type |= type;
+}
+
 static enum jump_label_type jump_label_type(struct jump_entry *entry)
 {
        struct static_key *key = jump_entry_key(entry);
@@ -313,13 +349,7 @@ void __init jump_label_init(void)
                        continue;
 
                key = iterk;
-               /*
-                * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
-                */
-               *((unsigned long *)&key->entries) += (unsigned long)iter;
-#ifdef CONFIG_MODULES
-               key->next = NULL;
-#endif
+               static_key_set_entries(key, iter);
        }
        static_key_initialized = true;
        jump_label_unlock();
@@ -343,6 +373,29 @@ struct static_key_mod {
        struct module *mod;
 };
 
+static inline struct static_key_mod *static_key_mod(struct static_key *key)
+{
+       WARN_ON_ONCE(!(key->type & JUMP_TYPE_LINKED));
+       return (struct static_key_mod *)(key->type & ~JUMP_TYPE_MASK);
+}
+
+/***
+ * key->type and key->next are the same via union.
+ * This sets key->next and preserves the type bits.
+ *
+ * See additional comments above static_key_set_entries().
+ */
+static void static_key_set_mod(struct static_key *key,
+                              struct static_key_mod *mod)
+{
+       unsigned long type;
+
+       WARN_ON_ONCE((unsigned long)mod & JUMP_TYPE_MASK);
+       type = key->type & JUMP_TYPE_MASK;
+       key->next = mod;
+       key->type |= type;
+}
+
 static int __jump_label_mod_text_reserved(void *start, void *end)
 {
        struct module *mod;
@@ -365,11 +418,23 @@ static void __jump_label_mod_update(struct static_key *key)
 {
        struct static_key_mod *mod;
 
-       for (mod = key->next; mod; mod = mod->next) {
-               struct module *m = mod->mod;
+       for (mod = static_key_mod(key); mod; mod = mod->next) {
+               struct jump_entry *stop;
+               struct module *m;
+
+               /*
+                * NULL if the static_key is defined in a module
+                * that does not use it
+                */
+               if (!mod->entries)
+                       continue;
 
-               __jump_label_update(key, mod->entries,
-                                   m->jump_entries + m->num_jump_entries);
+               m = mod->mod;
+               if (!m)
+                       stop = __stop___jump_table;
+               else
+                       stop = m->jump_entries + m->num_jump_entries;
+               __jump_label_update(key, mod->entries, stop);
        }
 }
 
@@ -404,7 +469,7 @@ static int jump_label_add_module(struct module *mod)
        struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
        struct jump_entry *iter;
        struct static_key *key = NULL;
-       struct static_key_mod *jlm;
+       struct static_key_mod *jlm, *jlm2;
 
        /* if the module doesn't have jump label entries, just return */
        if (iter_start == iter_stop)
@@ -421,20 +486,32 @@ static int jump_label_add_module(struct module *mod)
 
                key = iterk;
                if (within_module(iter->key, mod)) {
-                       /*
-                        * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
-                        */
-                       *((unsigned long *)&key->entries) += (unsigned long)iter;
-                       key->next = NULL;
+                       static_key_set_entries(key, iter);
                        continue;
                }
                jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL);
                if (!jlm)
                        return -ENOMEM;
+               if (!static_key_linked(key)) {
+                       jlm2 = kzalloc(sizeof(struct static_key_mod),
+                                      GFP_KERNEL);
+                       if (!jlm2) {
+                               kfree(jlm);
+                               return -ENOMEM;
+                       }
+                       preempt_disable();
+                       jlm2->mod = __module_address((unsigned long)key);
+                       preempt_enable();
+                       jlm2->entries = static_key_entries(key);
+                       jlm2->next = NULL;
+                       static_key_set_mod(key, jlm2);
+                       static_key_set_linked(key);
+               }
                jlm->mod = mod;
                jlm->entries = iter;
-               jlm->next = key->next;
-               key->next = jlm;
+               jlm->next = static_key_mod(key);
+               static_key_set_mod(key, jlm);
+               static_key_set_linked(key);
 
                /* Only update if we've changed from our initial state */
                if (jump_label_type(iter) != jump_label_init_type(iter))
@@ -461,16 +538,34 @@ static void jump_label_del_module(struct module *mod)
                if (within_module(iter->key, mod))
                        continue;
 
+               /* No memory during module load */
+               if (WARN_ON(!static_key_linked(key)))
+                       continue;
+
                prev = &key->next;
-               jlm = key->next;
+               jlm = static_key_mod(key);
 
                while (jlm && jlm->mod != mod) {
                        prev = &jlm->next;
                        jlm = jlm->next;
                }
 
-               if (jlm) {
+               /* No memory during module load */
+               if (WARN_ON(!jlm))
+                       continue;
+
+               if (prev == &key->next)
+                       static_key_set_mod(key, jlm->next);
+               else
                        *prev = jlm->next;
+
+               kfree(jlm);
+
+               jlm = static_key_mod(key);
+               /* if only one etry is left, fold it back into the static_key */
+               if (jlm->next == NULL) {
+                       static_key_set_entries(key, jlm->entries);
+                       static_key_clear_linked(key);
                        kfree(jlm);
                }
        }
@@ -499,8 +594,10 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
        case MODULE_STATE_COMING:
                jump_label_lock();
                ret = jump_label_add_module(mod);
-               if (ret)
+               if (ret) {
+                       WARN(1, "Failed to allocatote memory: jump_label may not work properly.\n");
                        jump_label_del_module(mod);
+               }
                jump_label_unlock();
                break;
        case MODULE_STATE_GOING:
@@ -561,11 +658,14 @@ int jump_label_text_reserved(void *start, void *end)
 static void jump_label_update(struct static_key *key)
 {
        struct jump_entry *stop = __stop___jump_table;
-       struct jump_entry *entry = static_key_entries(key);
+       struct jump_entry *entry;
 #ifdef CONFIG_MODULES
        struct module *mod;
 
-       __jump_label_mod_update(key);
+       if (static_key_linked(key)) {
+               __jump_label_mod_update(key);
+               return;
+       }
 
        preempt_disable();
        mod = __module_address((unsigned long)key);
@@ -573,6 +673,7 @@ static void jump_label_update(struct static_key *key)
                stop = mod->jump_entries + mod->num_jump_entries;
        preempt_enable();
 #endif
+       entry = static_key_entries(key);
        /* if there are no users, entry can be NULL */
        if (entry)
                __jump_label_update(key, entry, stop);
index ee1bc1bb8feb82f4873eb38106805c2823bed7b2..0999679d6f26706350cca88f42b970a1306f7037 100644 (file)
@@ -195,7 +195,7 @@ static ssize_t notes_read(struct file *filp, struct kobject *kobj,
        return count;
 }
 
-static struct bin_attribute notes_attr = {
+static struct bin_attribute notes_attr __ro_after_init  = {
        .attr = {
                .name = "notes",
                .mode = S_IRUGO,
index 9ecedc28b928debb6a5988a5db8b76833133d9e4..06123234f1189c86ee42dffdc2d14873b6b16895 100644 (file)
@@ -246,9 +246,13 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
        /* pages are dead and unused, undo the arch mapping */
        align_start = res->start & ~(SECTION_SIZE - 1);
        align_size = ALIGN(resource_size(res), SECTION_SIZE);
+
+       lock_device_hotplug();
        mem_hotplug_begin();
        arch_remove_memory(align_start, align_size);
        mem_hotplug_done();
+       unlock_device_hotplug();
+
        untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
        pgmap_radix_release(res);
        dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
@@ -360,9 +364,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        if (error)
                goto err_pfn_remap;
 
+       lock_device_hotplug();
        mem_hotplug_begin();
        error = arch_add_memory(nid, align_start, align_size, true);
        mem_hotplug_done();
+       unlock_device_hotplug();
        if (error)
                goto err_add_memory;
 
index fd2c9acbcc19856f4b53e28d1acd9f5d1cdae165..6196af8a82230024ba98647c7fe946bf842bd102 100644 (file)
@@ -95,7 +95,7 @@ static int notifier_call_chain(struct notifier_block **nl,
                if (nr_calls)
                        (*nr_calls)++;
 
-               if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
+               if (ret & NOTIFY_STOP_MASK)
                        break;
                nb = next_nb;
                nr_to_call--;
index b95959733ce08a8723e98535153fbb74e495ad7b..3ec16e603e88281eb3e8596a99f10a30af1b7990 100644 (file)
@@ -273,7 +273,8 @@ void panic(const char *fmt, ...)
                extern int stop_a_enabled;
                /* Make sure the user can actually press Stop-A (L1-A) */
                stop_a_enabled = 1;
-               pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
+               pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
+                        "twice on console to return to the boot prom\n");
        }
 #endif
 #if defined(CONFIG_S390)
index 8f18d314a96a49a1e9fc6c4cf1c5bead5c96a787..0e413d9eec8af484517300e1c142325356865771 100644 (file)
@@ -39,10 +39,10 @@ static void relay_file_mmap_close(struct vm_area_struct *vma)
 /*
  * fault() vm_op implementation for relay file mapping.
  */
-static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int relay_buf_fault(struct vm_fault *vmf)
 {
        struct page *page;
-       struct rchan_buf *buf = vma->vm_private_data;
+       struct rchan_buf *buf = vmf->vma->vm_private_data;
        pgoff_t pgoff = vmf->pgoff;
 
        if (!buf)
@@ -847,7 +847,7 @@ void relay_close(struct rchan *chan)
 
        if (chan->last_toobig)
                printk(KERN_WARNING "relay: one or more items not logged "
-                      "[item size (%Zd) > sub-buffer size (%Zd)]\n",
+                      "[item size (%zd) > sub-buffer size (%zd)]\n",
                       chan->last_toobig, chan->subbuf_size);
 
        list_del(&chan->list);
index e1ae6ac15eac94bb6562cb8d206190ec60a9cd5f..bbfb917a9b4998f8254b4cae92f2dce129761bbb 100644 (file)
@@ -1090,6 +1090,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
        int ret = 0;
 
        rq = task_rq_lock(p, &rf);
+       update_rq_clock(rq);
 
        if (p->flags & PF_KTHREAD) {
                /*
@@ -2847,7 +2848,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 
        if (!mm) {
                next->active_mm = oldmm;
-               atomic_inc(&oldmm->mm_count);
+               mmgrab(oldmm);
                enter_lazy_tlb(oldmm, next);
        } else
                switch_mm_irqs_off(oldmm, mm, next);
@@ -5560,7 +5561,7 @@ static void migrate_tasks(struct rq *dead_rq)
 {
        struct rq *rq = dead_rq;
        struct task_struct *next, *stop = rq->stop;
-       struct rq_flags rf, old_rf;
+       struct rq_flags rf;
        int dest_cpu;
 
        /*
@@ -5579,7 +5580,9 @@ static void migrate_tasks(struct rq *dead_rq)
         * class method both need to have an up-to-date
         * value of rq->clock[_task]
         */
+       rq_pin_lock(rq, &rf);
        update_rq_clock(rq);
+       rq_unpin_lock(rq, &rf);
 
        for (;;) {
                /*
@@ -5592,7 +5595,7 @@ static void migrate_tasks(struct rq *dead_rq)
                /*
                 * pick_next_task() assumes pinned rq->lock:
                 */
-               rq_pin_lock(rq, &rf);
+               rq_repin_lock(rq, &rf);
                next = pick_next_task(rq, &fake_task, &rf);
                BUG_ON(!next);
                next->sched_class->put_prev_task(rq, next);
@@ -5621,13 +5624,6 @@ static void migrate_tasks(struct rq *dead_rq)
                        continue;
                }
 
-               /*
-                * __migrate_task() may return with a different
-                * rq->lock held and a new cookie in 'rf', but we need
-                * to preserve rf::clock_update_flags for 'dead_rq'.
-                */
-               old_rf = rf;
-
                /* Find suitable destination for @next, with force if needed. */
                dest_cpu = select_fallback_rq(dead_rq->cpu, next);
 
@@ -5636,7 +5632,6 @@ static void migrate_tasks(struct rq *dead_rq)
                        raw_spin_unlock(&rq->lock);
                        rq = dead_rq;
                        raw_spin_lock(&rq->lock);
-                       rf = old_rf;
                }
                raw_spin_unlock(&next->pi_lock);
        }
@@ -6098,7 +6093,7 @@ void __init sched_init(void)
        /*
         * The boot idle thread does lazy MMU switching as well:
         */
-       atomic_inc(&init_mm.mm_count);
+       mmgrab(&init_mm);
        enter_lazy_tlb(&init_mm, current);
 
        /*
@@ -6819,11 +6814,20 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        if (IS_ERR(tg))
                return ERR_PTR(-ENOMEM);
 
-       sched_online_group(tg, parent);
-
        return &tg->css;
 }
 
+/* Expose task group only after completing cgroup initialization */
+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+{
+       struct task_group *tg = css_tg(css);
+       struct task_group *parent = css_tg(css->parent);
+
+       if (parent)
+               sched_online_group(tg, parent);
+       return 0;
+}
+
 static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
 {
        struct task_group *tg = css_tg(css);
@@ -7229,6 +7233,7 @@ static struct cftype cpu_files[] = {
 
 struct cgroup_subsys cpu_cgrp_subsys = {
        .css_alloc      = cpu_cgroup_css_alloc,
+       .css_online     = cpu_cgroup_css_online,
        .css_released   = cpu_cgroup_css_released,
        .css_free       = cpu_cgroup_css_free,
        .fork           = cpu_cgroup_fork,
index 13f9def8b24aecf662dd12095e1d505e909785e4..214a8feeb77124c69b976d3021044953d212fd8b 100644 (file)
@@ -3239,10 +3239,17 @@ int compat_restore_altstack(const compat_stack_t __user *uss)
 
 int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
 {
+       int err;
        struct task_struct *t = current;
-       return  __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) |
-               __put_user(sas_ss_flags(sp), &uss->ss_flags) |
+       err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp),
+                        &uss->ss_sp) |
+               __put_user(t->sas_ss_flags, &uss->ss_flags) |
                __put_user(t->sas_ss_size, &uss->ss_size);
+       if (err)
+               return err;
+       if (t->sas_ss_flags & SS_AUTODISARM)
+               sas_ss_reset(t);
+       return 0;
 }
 #endif
 
index 0d887eb62856e38761aa840ab22347cdc98c1c90..01a99976f072e56e562e7ccaaf7f50eb4f82ae57 100644 (file)
@@ -311,7 +311,7 @@ EXPORT_SYMBOL_GPL(torture_random);
 /*
  * Variables for shuffling.  The idea is to ensure that each CPU stays
  * idle for an extended period to test interactions with dyntick idle,
- * as well as interactions with any per-CPU varibles.
+ * as well as interactions with any per-CPU variables.
  */
 struct shuffle_task {
        struct list_head st_l;
index eb230f06ba4123a1dc75dafb508789f6cd7afd25..0c060932639140dab517ff505230c2dedfd42855 100644 (file)
@@ -1110,13 +1110,6 @@ struct ftrace_func_entry {
        unsigned long ip;
 };
 
-struct ftrace_hash {
-       unsigned long           size_bits;
-       struct hlist_head       *buckets;
-       unsigned long           count;
-       struct rcu_head         rcu;
-};
-
 /*
  * We make these constant because no one should touch them,
  * but they are used as the default "empty hash", to avoid allocating
@@ -1192,26 +1185,24 @@ struct ftrace_page {
 static struct ftrace_page      *ftrace_pages_start;
 static struct ftrace_page      *ftrace_pages;
 
-static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash)
+static __always_inline unsigned long
+ftrace_hash_key(struct ftrace_hash *hash, unsigned long ip)
 {
-       return !hash || !hash->count;
+       if (hash->size_bits > 0)
+               return hash_long(ip, hash->size_bits);
+
+       return 0;
 }
 
-static struct ftrace_func_entry *
-ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
+/* Only use this function if ftrace_hash_empty() has already been tested */
+static __always_inline struct ftrace_func_entry *
+__ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 {
        unsigned long key;
        struct ftrace_func_entry *entry;
        struct hlist_head *hhd;
 
-       if (ftrace_hash_empty(hash))
-               return NULL;
-
-       if (hash->size_bits > 0)
-               key = hash_long(ip, hash->size_bits);
-       else
-               key = 0;
-
+       key = ftrace_hash_key(hash, ip);
        hhd = &hash->buckets[key];
 
        hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) {
@@ -1221,17 +1212,32 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
        return NULL;
 }
 
+/**
+ * ftrace_lookup_ip - Test to see if an ip exists in an ftrace_hash
+ * @hash: The hash to look at
+ * @ip: The instruction pointer to test
+ *
+ * Search a given @hash to see if a given instruction pointer (@ip)
+ * exists in it.
+ *
+ * Returns the entry that holds the @ip if found. NULL otherwise.
+ */
+struct ftrace_func_entry *
+ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
+{
+       if (ftrace_hash_empty(hash))
+               return NULL;
+
+       return __ftrace_lookup_ip(hash, ip);
+}
+
 static void __add_hash_entry(struct ftrace_hash *hash,
                             struct ftrace_func_entry *entry)
 {
        struct hlist_head *hhd;
        unsigned long key;
 
-       if (hash->size_bits)
-               key = hash_long(entry->ip, hash->size_bits);
-       else
-               key = 0;
-
+       key = ftrace_hash_key(hash, entry->ip);
        hhd = &hash->buckets[key];
        hlist_add_head(&entry->hlist, hhd);
        hash->count++;
@@ -1383,9 +1389,8 @@ ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash);
 static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
                                       struct ftrace_hash *new_hash);
 
-static int
-ftrace_hash_move(struct ftrace_ops *ops, int enable,
-                struct ftrace_hash **dst, struct ftrace_hash *src)
+static struct ftrace_hash *
+__ftrace_hash_move(struct ftrace_hash *src)
 {
        struct ftrace_func_entry *entry;
        struct hlist_node *tn;
@@ -1393,21 +1398,13 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
        struct ftrace_hash *new_hash;
        int size = src->count;
        int bits = 0;
-       int ret;
        int i;
 
-       /* Reject setting notrace hash on IPMODIFY ftrace_ops */
-       if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable)
-               return -EINVAL;
-
        /*
-        * If the new source is empty, just free dst and assign it
-        * the empty_hash.
+        * If the new source is empty, just return the empty_hash.
         */
-       if (!src->count) {
-               new_hash = EMPTY_HASH;
-               goto update;
-       }
+       if (!src->count)
+               return EMPTY_HASH;
 
        /*
         * Make the hash size about 1/2 the # found
@@ -1421,7 +1418,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
 
        new_hash = alloc_ftrace_hash(bits);
        if (!new_hash)
-               return -ENOMEM;
+               return NULL;
 
        size = 1 << src->size_bits;
        for (i = 0; i < size; i++) {
@@ -1432,7 +1429,24 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
                }
        }
 
-update:
+       return new_hash;
+}
+
+static int
+ftrace_hash_move(struct ftrace_ops *ops, int enable,
+                struct ftrace_hash **dst, struct ftrace_hash *src)
+{
+       struct ftrace_hash *new_hash;
+       int ret;
+
+       /* Reject setting notrace hash on IPMODIFY ftrace_ops */
+       if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable)
+               return -EINVAL;
+
+       new_hash = __ftrace_hash_move(src);
+       if (!new_hash)
+               return -ENOMEM;
+
        /* Make sure this can be applied if it is IPMODIFY ftrace_ops */
        if (enable) {
                /* IPMODIFY should be updated only when filter_hash updating */
@@ -1466,9 +1480,9 @@ static bool hash_contains_ip(unsigned long ip,
         * notrace hash is considered not in the notrace hash.
         */
        return (ftrace_hash_empty(hash->filter_hash) ||
-               ftrace_lookup_ip(hash->filter_hash, ip)) &&
+               __ftrace_lookup_ip(hash->filter_hash, ip)) &&
                (ftrace_hash_empty(hash->notrace_hash) ||
-                !ftrace_lookup_ip(hash->notrace_hash, ip));
+                !__ftrace_lookup_ip(hash->notrace_hash, ip));
 }
 
 /*
@@ -2880,7 +2894,7 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 
        /* The function must be in the filter */
        if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
-           !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
+           !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
                return 0;
 
        /* If in notrace hash, we ignore it too */
@@ -4382,7 +4396,7 @@ __setup("ftrace_filter=", set_ftrace_filter);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
 static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
-static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer);
+static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer);
 
 static unsigned long save_global_trampoline;
 static unsigned long save_global_flags;
@@ -4405,18 +4419,17 @@ static void __init set_ftrace_early_graph(char *buf, int enable)
 {
        int ret;
        char *func;
-       unsigned long *table = ftrace_graph_funcs;
-       int *count = &ftrace_graph_count;
+       struct ftrace_hash *hash;
 
-       if (!enable) {
-               table = ftrace_graph_notrace_funcs;
-               count = &ftrace_graph_notrace_count;
-       }
+       if (enable)
+               hash = ftrace_graph_hash;
+       else
+               hash = ftrace_graph_notrace_hash;
 
        while (buf) {
                func = strsep(&buf, ",");
                /* we allow only one expression at a time */
-               ret = ftrace_set_func(table, count, FTRACE_GRAPH_MAX_FUNCS, func);
+               ret = ftrace_graph_set_hash(hash, func);
                if (ret)
                        printk(KERN_DEBUG "ftrace: function %s not "
                                          "traceable\n", func);
@@ -4540,26 +4553,55 @@ static const struct file_operations ftrace_notrace_fops = {
 
 static DEFINE_MUTEX(graph_lock);
 
-int ftrace_graph_count;
-int ftrace_graph_notrace_count;
-unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
-unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH;
+struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH;
+
+enum graph_filter_type {
+       GRAPH_FILTER_NOTRACE    = 0,
+       GRAPH_FILTER_FUNCTION,
+};
+
+#define FTRACE_GRAPH_EMPTY     ((void *)1)
 
 struct ftrace_graph_data {
-       unsigned long *table;
-       size_t size;
-       int *count;
-       const struct seq_operations *seq_ops;
+       struct ftrace_hash              *hash;
+       struct ftrace_func_entry        *entry;
+       int                             idx;   /* for hash table iteration */
+       enum graph_filter_type          type;
+       struct ftrace_hash              *new_hash;
+       const struct seq_operations     *seq_ops;
+       struct trace_parser             parser;
 };
 
 static void *
 __g_next(struct seq_file *m, loff_t *pos)
 {
        struct ftrace_graph_data *fgd = m->private;
+       struct ftrace_func_entry *entry = fgd->entry;
+       struct hlist_head *head;
+       int i, idx = fgd->idx;
 
-       if (*pos >= *fgd->count)
+       if (*pos >= fgd->hash->count)
                return NULL;
-       return &fgd->table[*pos];
+
+       if (entry) {
+               hlist_for_each_entry_continue(entry, hlist) {
+                       fgd->entry = entry;
+                       return entry;
+               }
+
+               idx++;
+       }
+
+       for (i = idx; i < 1 << fgd->hash->size_bits; i++) {
+               head = &fgd->hash->buckets[i];
+               hlist_for_each_entry(entry, head, hlist) {
+                       fgd->entry = entry;
+                       fgd->idx = i;
+                       return entry;
+               }
+       }
+       return NULL;
 }
 
 static void *
@@ -4575,10 +4617,19 @@ static void *g_start(struct seq_file *m, loff_t *pos)
 
        mutex_lock(&graph_lock);
 
+       if (fgd->type == GRAPH_FILTER_FUNCTION)
+               fgd->hash = rcu_dereference_protected(ftrace_graph_hash,
+                                       lockdep_is_held(&graph_lock));
+       else
+               fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
+                                       lockdep_is_held(&graph_lock));
+
        /* Nothing, tell g_show to print all functions are enabled */
-       if (!*fgd->count && !*pos)
-               return (void *)1;
+       if (ftrace_hash_empty(fgd->hash) && !*pos)
+               return FTRACE_GRAPH_EMPTY;
 
+       fgd->idx = 0;
+       fgd->entry = NULL;
        return __g_next(m, pos);
 }
 
@@ -4589,22 +4640,22 @@ static void g_stop(struct seq_file *m, void *p)
 
 static int g_show(struct seq_file *m, void *v)
 {
-       unsigned long *ptr = v;
+       struct ftrace_func_entry *entry = v;
 
-       if (!ptr)
+       if (!entry)
                return 0;
 
-       if (ptr == (unsigned long *)1) {
+       if (entry == FTRACE_GRAPH_EMPTY) {
                struct ftrace_graph_data *fgd = m->private;
 
-               if (fgd->table == ftrace_graph_funcs)
+               if (fgd->type == GRAPH_FILTER_FUNCTION)
                        seq_puts(m, "#### all functions enabled ####\n");
                else
                        seq_puts(m, "#### no functions disabled ####\n");
                return 0;
        }
 
-       seq_printf(m, "%ps\n", (void *)*ptr);
+       seq_printf(m, "%ps\n", (void *)entry->ip);
 
        return 0;
 }
@@ -4621,24 +4672,51 @@ __ftrace_graph_open(struct inode *inode, struct file *file,
                    struct ftrace_graph_data *fgd)
 {
        int ret = 0;
+       struct ftrace_hash *new_hash = NULL;
 
-       mutex_lock(&graph_lock);
-       if ((file->f_mode & FMODE_WRITE) &&
-           (file->f_flags & O_TRUNC)) {
-               *fgd->count = 0;
-               memset(fgd->table, 0, fgd->size * sizeof(*fgd->table));
+       if (file->f_mode & FMODE_WRITE) {
+               const int size_bits = FTRACE_HASH_DEFAULT_BITS;
+
+               if (trace_parser_get_init(&fgd->parser, FTRACE_BUFF_MAX))
+                       return -ENOMEM;
+
+               if (file->f_flags & O_TRUNC)
+                       new_hash = alloc_ftrace_hash(size_bits);
+               else
+                       new_hash = alloc_and_copy_ftrace_hash(size_bits,
+                                                             fgd->hash);
+               if (!new_hash) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
        }
-       mutex_unlock(&graph_lock);
 
        if (file->f_mode & FMODE_READ) {
-               ret = seq_open(file, fgd->seq_ops);
+               ret = seq_open(file, &ftrace_graph_seq_ops);
                if (!ret) {
                        struct seq_file *m = file->private_data;
                        m->private = fgd;
+               } else {
+                       /* Failed */
+                       free_ftrace_hash(new_hash);
+                       new_hash = NULL;
                }
        } else
                file->private_data = fgd;
 
+out:
+       if (ret < 0 && file->f_mode & FMODE_WRITE)
+               trace_parser_put(&fgd->parser);
+
+       fgd->new_hash = new_hash;
+
+       /*
+        * All uses of fgd->hash must be taken with the graph_lock
+        * held. The graph_lock is going to be released, so force
+        * fgd->hash to be reinitialized when it is taken again.
+        */
+       fgd->hash = NULL;
+
        return ret;
 }
 
@@ -4646,6 +4724,7 @@ static int
 ftrace_graph_open(struct inode *inode, struct file *file)
 {
        struct ftrace_graph_data *fgd;
+       int ret;
 
        if (unlikely(ftrace_disabled))
                return -ENODEV;
@@ -4654,18 +4733,26 @@ ftrace_graph_open(struct inode *inode, struct file *file)
        if (fgd == NULL)
                return -ENOMEM;
 
-       fgd->table = ftrace_graph_funcs;
-       fgd->size = FTRACE_GRAPH_MAX_FUNCS;
-       fgd->count = &ftrace_graph_count;
+       mutex_lock(&graph_lock);
+
+       fgd->hash = rcu_dereference_protected(ftrace_graph_hash,
+                                       lockdep_is_held(&graph_lock));
+       fgd->type = GRAPH_FILTER_FUNCTION;
        fgd->seq_ops = &ftrace_graph_seq_ops;
 
-       return __ftrace_graph_open(inode, file, fgd);
+       ret = __ftrace_graph_open(inode, file, fgd);
+       if (ret < 0)
+               kfree(fgd);
+
+       mutex_unlock(&graph_lock);
+       return ret;
 }
 
 static int
 ftrace_graph_notrace_open(struct inode *inode, struct file *file)
 {
        struct ftrace_graph_data *fgd;
+       int ret;
 
        if (unlikely(ftrace_disabled))
                return -ENODEV;
@@ -4674,45 +4761,97 @@ ftrace_graph_notrace_open(struct inode *inode, struct file *file)
        if (fgd == NULL)
                return -ENOMEM;
 
-       fgd->table = ftrace_graph_notrace_funcs;
-       fgd->size = FTRACE_GRAPH_MAX_FUNCS;
-       fgd->count = &ftrace_graph_notrace_count;
+       mutex_lock(&graph_lock);
+
+       fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
+                                       lockdep_is_held(&graph_lock));
+       fgd->type = GRAPH_FILTER_NOTRACE;
        fgd->seq_ops = &ftrace_graph_seq_ops;
 
-       return __ftrace_graph_open(inode, file, fgd);
+       ret = __ftrace_graph_open(inode, file, fgd);
+       if (ret < 0)
+               kfree(fgd);
+
+       mutex_unlock(&graph_lock);
+       return ret;
 }
 
 static int
 ftrace_graph_release(struct inode *inode, struct file *file)
 {
+       struct ftrace_graph_data *fgd;
+       struct ftrace_hash *old_hash, *new_hash;
+       struct trace_parser *parser;
+       int ret = 0;
+
        if (file->f_mode & FMODE_READ) {
                struct seq_file *m = file->private_data;
 
-               kfree(m->private);
+               fgd = m->private;
                seq_release(inode, file);
        } else {
-               kfree(file->private_data);
+               fgd = file->private_data;
        }
 
-       return 0;
+
+       if (file->f_mode & FMODE_WRITE) {
+
+               parser = &fgd->parser;
+
+               if (trace_parser_loaded((parser))) {
+                       parser->buffer[parser->idx] = 0;
+                       ret = ftrace_graph_set_hash(fgd->new_hash,
+                                                   parser->buffer);
+               }
+
+               trace_parser_put(parser);
+
+               new_hash = __ftrace_hash_move(fgd->new_hash);
+               if (!new_hash) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               mutex_lock(&graph_lock);
+
+               if (fgd->type == GRAPH_FILTER_FUNCTION) {
+                       old_hash = rcu_dereference_protected(ftrace_graph_hash,
+                                       lockdep_is_held(&graph_lock));
+                       rcu_assign_pointer(ftrace_graph_hash, new_hash);
+               } else {
+                       old_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
+                                       lockdep_is_held(&graph_lock));
+                       rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash);
+               }
+
+               mutex_unlock(&graph_lock);
+
+               /* Wait till all users are no longer using the old hash */
+               synchronize_sched();
+
+               free_ftrace_hash(old_hash);
+       }
+
+ out:
+       kfree(fgd->new_hash);
+       kfree(fgd);
+
+       return ret;
 }
 
 static int
-ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)
+ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
 {
        struct ftrace_glob func_g;
        struct dyn_ftrace *rec;
        struct ftrace_page *pg;
+       struct ftrace_func_entry *entry;
        int fail = 1;
        int not;
-       bool exists;
-       int i;
 
        /* decode regex */
        func_g.type = filter_parse_regex(buffer, strlen(buffer),
                                         &func_g.search, &not);
-       if (!not && *idx >= size)
-               return -EBUSY;
 
        func_g.len = strlen(func_g.search);
 
@@ -4729,26 +4868,18 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)
                        continue;
 
                if (ftrace_match_record(rec, &func_g, NULL, 0)) {
-                       /* if it is in the array */
-                       exists = false;
-                       for (i = 0; i < *idx; i++) {
-                               if (array[i] == rec->ip) {
-                                       exists = true;
-                                       break;
-                               }
-                       }
+                       entry = ftrace_lookup_ip(hash, rec->ip);
 
                        if (!not) {
                                fail = 0;
-                               if (!exists) {
-                                       array[(*idx)++] = rec->ip;
-                                       if (*idx >= size)
-                                               goto out;
-                               }
+
+                               if (entry)
+                                       continue;
+                               if (add_hash_entry(hash, rec->ip) < 0)
+                                       goto out;
                        } else {
-                               if (exists) {
-                                       array[i] = array[--(*idx)];
-                                       array[*idx] = 0;
+                               if (entry) {
+                                       free_hash_entry(hash, entry);
                                        fail = 0;
                                }
                        }
@@ -4767,35 +4898,34 @@ static ssize_t
 ftrace_graph_write(struct file *file, const char __user *ubuf,
                   size_t cnt, loff_t *ppos)
 {
-       struct trace_parser parser;
        ssize_t read, ret = 0;
        struct ftrace_graph_data *fgd = file->private_data;
+       struct trace_parser *parser;
 
        if (!cnt)
                return 0;
 
-       if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX))
-               return -ENOMEM;
-
-       read = trace_get_user(&parser, ubuf, cnt, ppos);
+       /* Read mode uses seq functions */
+       if (file->f_mode & FMODE_READ) {
+               struct seq_file *m = file->private_data;
+               fgd = m->private;
+       }
 
-       if (read >= 0 && trace_parser_loaded((&parser))) {
-               parser.buffer[parser.idx] = 0;
+       parser = &fgd->parser;
 
-               mutex_lock(&graph_lock);
+       read = trace_get_user(parser, ubuf, cnt, ppos);
 
-               /* we allow only one expression at a time */
-               ret = ftrace_set_func(fgd->table, fgd->count, fgd->size,
-                                     parser.buffer);
+       if (read >= 0 && trace_parser_loaded(parser) &&
+           !trace_parser_cont(parser)) {
 
-               mutex_unlock(&graph_lock);
+               ret = ftrace_graph_set_hash(fgd->new_hash,
+                                           parser->buffer);
+               trace_parser_clear(parser);
        }
 
        if (!ret)
                ret = read;
 
-       trace_parser_put(&parser);
-
        return ret;
 }
 
index 310f0ea0d1a2e63ec534b300dab74359206cf74f..707445ceb7efd4e098ba3ad5a129bb19a03122f9 100644 (file)
@@ -260,16 +260,8 @@ unsigned long long ns2usecs(u64 nsec)
        TRACE_ITER_EVENT_FORK
 
 /*
- * The global_trace is the descriptor that holds the tracing
- * buffers for the live tracing. For each CPU, it contains
- * a link list of pages that will store trace entries. The
- * page descriptor of the pages in the memory is used to hold
- * the link list by linking the lru item in the page descriptor
- * to each of the pages in the buffer per CPU.
- *
- * For each active CPU there is a data field that holds the
- * pages for the buffer for that CPU. Each CPU has the same number
- * of pages allocated for its buffer.
+ * The global_trace is the descriptor that holds the top-level tracing
+ * buffers for the live tracing.
  */
 static struct trace_array global_trace = {
        .trace_flags = TRACE_DEFAULT_FLAGS,
@@ -1193,6 +1185,7 @@ int trace_parser_get_init(struct trace_parser *parser, int size)
 void trace_parser_put(struct trace_parser *parser)
 {
        kfree(parser->buffer);
+       parser->buffer = NULL;
 }
 
 /*
index 1ea51ab53edf372924692e78c692f52302280bc1..ae1cce91fead25a065899109e426a6cc1e597d28 100644 (file)
@@ -753,6 +753,21 @@ enum print_line_t print_trace_line(struct trace_iterator *iter);
 
 extern char trace_find_mark(unsigned long long duration);
 
+struct ftrace_hash {
+       unsigned long           size_bits;
+       struct hlist_head       *buckets;
+       unsigned long           count;
+       struct rcu_head         rcu;
+};
+
+struct ftrace_func_entry *
+ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip);
+
+static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
+{
+       return !hash || !hash->count;
+}
+
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
@@ -787,53 +802,50 @@ extern void __trace_graph_return(struct trace_array *tr,
                                 struct ftrace_graph_ret *trace,
                                 unsigned long flags, int pc);
 
-
 #ifdef CONFIG_DYNAMIC_FTRACE
-/* TODO: make this variable */
-#define FTRACE_GRAPH_MAX_FUNCS         32
-extern int ftrace_graph_count;
-extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
-extern int ftrace_graph_notrace_count;
-extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
+extern struct ftrace_hash *ftrace_graph_hash;
+extern struct ftrace_hash *ftrace_graph_notrace_hash;
 
 static inline int ftrace_graph_addr(unsigned long addr)
 {
-       int i;
-
-       if (!ftrace_graph_count)
-               return 1;
-
-       for (i = 0; i < ftrace_graph_count; i++) {
-               if (addr == ftrace_graph_funcs[i]) {
-                       /*
-                        * If no irqs are to be traced, but a set_graph_function
-                        * is set, and called by an interrupt handler, we still
-                        * want to trace it.
-                        */
-                       if (in_irq())
-                               trace_recursion_set(TRACE_IRQ_BIT);
-                       else
-                               trace_recursion_clear(TRACE_IRQ_BIT);
-                       return 1;
-               }
+       int ret = 0;
+
+       preempt_disable_notrace();
+
+       if (ftrace_hash_empty(ftrace_graph_hash)) {
+               ret = 1;
+               goto out;
        }
 
-       return 0;
+       if (ftrace_lookup_ip(ftrace_graph_hash, addr)) {
+               /*
+                * If no irqs are to be traced, but a set_graph_function
+                * is set, and called by an interrupt handler, we still
+                * want to trace it.
+                */
+               if (in_irq())
+                       trace_recursion_set(TRACE_IRQ_BIT);
+               else
+                       trace_recursion_clear(TRACE_IRQ_BIT);
+               ret = 1;
+       }
+
+out:
+       preempt_enable_notrace();
+       return ret;
 }
 
 static inline int ftrace_graph_notrace_addr(unsigned long addr)
 {
-       int i;
+       int ret = 0;
 
-       if (!ftrace_graph_notrace_count)
-               return 0;
+       preempt_disable_notrace();
 
-       for (i = 0; i < ftrace_graph_notrace_count; i++) {
-               if (addr == ftrace_graph_notrace_funcs[i])
-                       return 1;
-       }
+       if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr))
+               ret = 1;
 
-       return 0;
+       preempt_enable_notrace();
+       return ret;
 }
 #else
 static inline int ftrace_graph_addr(unsigned long addr)
@@ -1300,7 +1312,8 @@ static inline bool is_string_field(struct ftrace_event_field *field)
 {
        return field->filter_type == FILTER_DYN_STRING ||
               field->filter_type == FILTER_STATIC_STRING ||
-              field->filter_type == FILTER_PTR_STRING;
+              field->filter_type == FILTER_PTR_STRING ||
+              field->filter_type == FILTER_COMM;
 }
 
 static inline bool is_function_field(struct ftrace_event_field *field)
index e3b488825ae3770239639cebf1e322807c48cff5..e49fbe901cfc64c60734d52a0a7c8db320606b00 100644 (file)
@@ -175,9 +175,9 @@ int trace_benchmark_reg(void)
 
        bm_event_thread = kthread_run(benchmark_event_kthread,
                                      NULL, "event_benchmark");
-       if (!bm_event_thread) {
+       if (IS_ERR(bm_event_thread)) {
                pr_warning("trace benchmark failed to create kernel thread\n");
-               return -ENOMEM;
+               return PTR_ERR(bm_event_thread);
        }
 
        return 0;
index 75489de546b6092c2485d150480f83ab313df615..4d8fdf3184dce4a897565b6fb4b3bfeba1a8152d 100644 (file)
@@ -27,7 +27,7 @@ static DEFINE_MUTEX(branch_tracing_mutex);
 static struct trace_array *branch_tracer;
 
 static void
-probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+probe_likely_condition(struct ftrace_likely_data *f, int val, int expect)
 {
        struct trace_event_call *call = &event_branch;
        struct trace_array *tr = branch_tracer;
@@ -68,16 +68,17 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
        entry   = ring_buffer_event_data(event);
 
        /* Strip off the path, only save the file */
-       p = f->file + strlen(f->file);
-       while (p >= f->file && *p != '/')
+       p = f->data.file + strlen(f->data.file);
+       while (p >= f->data.file && *p != '/')
                p--;
        p++;
 
-       strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
+       strncpy(entry->func, f->data.func, TRACE_FUNC_SIZE);
        strncpy(entry->file, p, TRACE_FILE_SIZE);
        entry->func[TRACE_FUNC_SIZE] = 0;
        entry->file[TRACE_FILE_SIZE] = 0;
-       entry->line = f->line;
+       entry->constant = f->constant;
+       entry->line = f->data.line;
        entry->correct = val == expect;
 
        if (!call_filter_check_discard(call, entry, buffer, event))
@@ -89,7 +90,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 }
 
 static inline
-void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
 {
        if (!branch_tracing_enabled)
                return;
@@ -195,13 +196,19 @@ core_initcall(init_branch_tracer);
 
 #else
 static inline
-void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
 {
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
-void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
+void ftrace_likely_update(struct ftrace_likely_data *f, int val,
+                         int expect, int is_constant)
 {
+       /* A constant is always correct */
+       if (is_constant) {
+               f->constant++;
+               val = expect;
+       }
        /*
         * I would love to have a trace point here instead, but the
         * trace point code is so inundated with unlikely and likely
@@ -212,9 +219,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
 
        /* FIXME: Make this atomic! */
        if (val == expect)
-               f->correct++;
+               f->data.correct++;
        else
-               f->incorrect++;
+               f->data.incorrect++;
 }
 EXPORT_SYMBOL(ftrace_likely_update);
 
@@ -245,29 +252,60 @@ static inline long get_incorrect_percent(struct ftrace_branch_data *p)
        return percent;
 }
 
-static int branch_stat_show(struct seq_file *m, void *v)
+static const char *branch_stat_process_file(struct ftrace_branch_data *p)
 {
-       struct ftrace_branch_data *p = v;
        const char *f;
-       long percent;
 
        /* Only print the file, not the path */
        f = p->file + strlen(p->file);
        while (f >= p->file && *f != '/')
                f--;
-       f++;
+       return ++f;
+}
+
+static void branch_stat_show(struct seq_file *m,
+                            struct ftrace_branch_data *p, const char *f)
+{
+       long percent;
 
        /*
         * The miss is overlayed on correct, and hit on incorrect.
         */
        percent = get_incorrect_percent(p);
 
-       seq_printf(m, "%8lu %8lu ",  p->correct, p->incorrect);
        if (percent < 0)
                seq_puts(m, "  X ");
        else
                seq_printf(m, "%3ld ", percent);
+
        seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
+}
+
+static int branch_stat_show_normal(struct seq_file *m,
+                                  struct ftrace_branch_data *p, const char *f)
+{
+       seq_printf(m, "%8lu %8lu ",  p->correct, p->incorrect);
+       branch_stat_show(m, p, f);
+       return 0;
+}
+
+static int annotate_branch_stat_show(struct seq_file *m, void *v)
+{
+       struct ftrace_likely_data *p = v;
+       const char *f;
+       int l;
+
+       f = branch_stat_process_file(&p->data);
+
+       if (!p->constant)
+               return branch_stat_show_normal(m, &p->data, f);
+
+       l = snprintf(NULL, 0, "/%lu", p->constant);
+       l = l > 8 ? 0 : 8 - l;
+
+       seq_printf(m, "%8lu/%lu %*lu ",
+                  p->data.correct, p->constant, l, p->data.incorrect);
+       branch_stat_show(m, &p->data, f);
        return 0;
 }
 
@@ -279,7 +317,7 @@ static void *annotated_branch_stat_start(struct tracer_stat *trace)
 static void *
 annotated_branch_stat_next(void *v, int idx)
 {
-       struct ftrace_branch_data *p = v;
+       struct ftrace_likely_data *p = v;
 
        ++p;
 
@@ -328,7 +366,7 @@ static struct tracer_stat annotated_branch_stats = {
        .stat_next = annotated_branch_stat_next,
        .stat_cmp = annotated_branch_stat_cmp,
        .stat_headers = annotated_branch_stat_headers,
-       .stat_show = branch_stat_show
+       .stat_show = annotate_branch_stat_show
 };
 
 __init static int init_annotated_branch_stats(void)
@@ -379,12 +417,21 @@ all_branch_stat_next(void *v, int idx)
        return p;
 }
 
+static int all_branch_stat_show(struct seq_file *m, void *v)
+{
+       struct ftrace_branch_data *p = v;
+       const char *f;
+
+       f = branch_stat_process_file(p);
+       return branch_stat_show_normal(m, p, f);
+}
+
 static struct tracer_stat all_branch_stats = {
        .name = "branch_all",
        .stat_start = all_branch_stat_start,
        .stat_next = all_branch_stat_next,
        .stat_headers = all_branch_stat_headers,
-       .stat_show = branch_stat_show
+       .stat_show = all_branch_stat_show
 };
 
 __init static int all_annotated_branch_stats(void)
index eb7396b7e7c3073586b03d7cc68e6273cc5da994..c203ac4df791f28e074d387ce573cec74bc40cf6 100644 (file)
@@ -328,11 +328,13 @@ FTRACE_ENTRY(branch, trace_branch,
                __array(        char,           func,   TRACE_FUNC_SIZE+1       )
                __array(        char,           file,   TRACE_FILE_SIZE+1       )
                __field(        char,           correct                         )
+               __field(        char,           constant                        )
        ),
 
-       F_printk("%u:%s:%s (%u)",
+       F_printk("%u:%s:%s (%u)%s",
                 __entry->line,
-                __entry->func, __entry->file, __entry->correct),
+                __entry->func, __entry->file, __entry->correct,
+                __entry->constant ? " CONSTANT" : ""),
 
        FILTER_OTHER
 );
index af344a1bf0d0e6270e5e659ffa160753e148cdd9..edfacd954e1bb54a454f9345276d7a91e65fd7da 100644 (file)
@@ -266,24 +266,13 @@ out:
 static struct cpumask save_cpumask;
 static bool disable_migrate;
 
-static void move_to_next_cpu(bool initmask)
+static void move_to_next_cpu(void)
 {
-       static struct cpumask *current_mask;
+       struct cpumask *current_mask = &save_cpumask;
        int next_cpu;
 
        if (disable_migrate)
                return;
-
-       /* Just pick the first CPU on first iteration */
-       if (initmask) {
-               current_mask = &save_cpumask;
-               get_online_cpus();
-               cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask);
-               put_online_cpus();
-               next_cpu = cpumask_first(current_mask);
-               goto set_affinity;
-       }
-
        /*
         * If for some reason the user modifies the CPU affinity
         * of this thread, than stop migrating for the duration
@@ -300,7 +289,6 @@ static void move_to_next_cpu(bool initmask)
        if (next_cpu >= nr_cpu_ids)
                next_cpu = cpumask_first(current_mask);
 
- set_affinity:
        if (next_cpu >= nr_cpu_ids) /* Shouldn't happen! */
                goto disable;
 
@@ -322,20 +310,15 @@ static void move_to_next_cpu(bool initmask)
  * need to ensure nothing else might be running (and thus preempting).
  * Obviously this should never be used in production environments.
  *
- * Currently this runs on which ever CPU it was scheduled on, but most
- * real-world hardware latency situations occur across several CPUs,
- * but we might later generalize this if we find there are any actualy
- * systems with alternate SMI delivery or other hardware latencies.
+ * Executes one loop interaction on each CPU in tracing_cpumask sysfs file.
  */
 static int kthread_fn(void *data)
 {
        u64 interval;
-       bool initmask = true;
 
        while (!kthread_should_stop()) {
 
-               move_to_next_cpu(initmask);
-               initmask = false;
+               move_to_next_cpu();
 
                local_irq_disable();
                get_sample();
@@ -366,13 +349,27 @@ static int kthread_fn(void *data)
  */
 static int start_kthread(struct trace_array *tr)
 {
+       struct cpumask *current_mask = &save_cpumask;
        struct task_struct *kthread;
+       int next_cpu;
+
+       /* Just pick the first CPU on first iteration */
+       current_mask = &save_cpumask;
+       get_online_cpus();
+       cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask);
+       put_online_cpus();
+       next_cpu = cpumask_first(current_mask);
 
        kthread = kthread_create(kthread_fn, NULL, "hwlatd");
        if (IS_ERR(kthread)) {
                pr_err(BANNER "could not start sampling thread\n");
                return -ENOMEM;
        }
+
+       cpumask_clear(current_mask);
+       cpumask_set_cpu(next_cpu, current_mask);
+       sched_setaffinity(kthread->pid, current_mask);
+
        hwlat_kthread = kthread;
        wake_up_process(kthread);
 
index 7ad9e53ad174bc6cdb0f99490f87e419e9381b02..eadd96ef772f783b7e7d91a33db52522e7a53ade 100644 (file)
@@ -16,6 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+#define pr_fmt(fmt)    "trace_kprobe: " fmt
 
 #include <linux/module.h>
 #include <linux/uaccess.h>
index 8c0553d9afd3f2563756641cfa0e659a4ab7ff99..52478f033f88f2d38315c887406608554c391a32 100644 (file)
@@ -21,6 +21,7 @@
  * Copyright (C) IBM Corporation, 2010-2011
  * Author:     Srikar Dronamraju
  */
+#define pr_fmt(fmt)    "trace_probe: " fmt
 
 #include "trace_probe.h"
 
@@ -647,7 +648,7 @@ ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
                                size_t count, loff_t *ppos,
                                int (*createfn)(int, char **))
 {
-       char *kbuf, *tmp;
+       char *kbuf, *buf, *tmp;
        int ret = 0;
        size_t done = 0;
        size_t size;
@@ -667,27 +668,38 @@ ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
                        goto out;
                }
                kbuf[size] = '\0';
-               tmp = strchr(kbuf, '\n');
+               buf = kbuf;
+               do {
+                       tmp = strchr(buf, '\n');
+                       if (tmp) {
+                               *tmp = '\0';
+                               size = tmp - buf + 1;
+                       } else {
+                               size = strlen(buf);
+                               if (done + size < count) {
+                                       if (buf != kbuf)
+                                               break;
+                                       /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
+                                       pr_warn("Line length is too long: Should be less than %d\n",
+                                               WRITE_BUFSIZE - 2);
+                                       ret = -EINVAL;
+                                       goto out;
+                               }
+                       }
+                       done += size;
 
-               if (tmp) {
-                       *tmp = '\0';
-                       size = tmp - kbuf + 1;
-               } else if (done + size < count) {
-                       pr_warn("Line length is too long: Should be less than %d\n",
-                               WRITE_BUFSIZE);
-                       ret = -EINVAL;
-                       goto out;
-               }
-               done += size;
-               /* Remove comments */
-               tmp = strchr(kbuf, '#');
+                       /* Remove comments */
+                       tmp = strchr(buf, '#');
 
-               if (tmp)
-                       *tmp = '\0';
+                       if (tmp)
+                               *tmp = '\0';
 
-               ret = traceprobe_command(kbuf, createfn);
-               if (ret)
-                       goto out;
+                       ret = traceprobe_command(buf, createfn);
+                       if (ret)
+                               goto out;
+                       buf += size;
+
+               } while (done < count);
        }
        ret = done;
 
index 0913693caf6e037f2cd20edc0b1a3c22276d00bf..f4379e772171dc283c7b492a08c4e49bf41f479b 100644 (file)
@@ -17,6 +17,7 @@
  * Copyright (C) IBM Corporation, 2010-2012
  * Author:     Srikar Dronamraju <srikar@linux.vnet.ibm.com>
  */
+#define pr_fmt(fmt)    "trace_kprobe: " fmt
 
 #include <linux/module.h>
 #include <linux/uaccess.h>
@@ -431,7 +432,8 @@ static int create_trace_uprobe(int argc, char **argv)
                pr_info("Probe point is not specified.\n");
                return -EINVAL;
        }
-       arg = strchr(argv[1], ':');
+       /* Find the last occurrence, in case the path contains ':' too. */
+       arg = strrchr(argv[1], ':');
        if (!arg) {
                ret = -EINVAL;
                goto fail_address_parse;
index 44d1a1181fb5664ac7ba4d1a2c1f1a3f14fc330c..0c8b78a9ae2ef97a1e83753146959b2d7b39d16d 100644 (file)
@@ -103,8 +103,7 @@ config CRC32
          functions require M here.
 
 config CRC32_SELFTEST
-       bool "CRC32 perform self test on init"
-       default n
+       tristate "CRC32 perform self test on init"
        depends on CRC32
        help
          This option enables the CRC32 library functions to perform a
@@ -395,6 +394,16 @@ config HAS_DMA
        depends on !NO_DMA
        default y
 
+config DMA_NOOP_OPS
+       bool
+       depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
+       default n
+
+config DMA_VIRT_OPS
+       bool
+       depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
+       default n
+
 config CHECK_SIGNATURE
        bool
 
@@ -432,8 +441,7 @@ config GLOB
          depends on this.
 
 config GLOB_SELFTEST
-       bool "glob self-test on init"
-       default n
+       tristate "glob self-test on init"
        depends on GLOB
        help
          This option enables a simple self-test of the glob_match
index 66fb4389f05c96959cf8b8f226964e4a3a5bcaf7..97d62c2da6c25dd5721f8c1c75264c83201f7247 100644 (file)
@@ -729,19 +729,6 @@ source "lib/Kconfig.kmemcheck"
 
 source "lib/Kconfig.kasan"
 
-config DEBUG_REFCOUNT
-       bool "Verbose refcount checks"
-       help
-         Say Y here if you want reference counters (refcount_t and kref) to
-         generate WARNs on dubious usage. Without this refcount_t will still
-         be a saturating counter and avoid Use-After-Free by turning it into
-         a resource leak Denial-Of-Service.
-
-         Use of this option will increase kernel text size but will alert the
-         admin of potential abuse.
-
-         If in doubt, say "N".
-
 endmenu # "Memory Debugging"
 
 config ARCH_HAS_KCOV
@@ -1739,6 +1726,14 @@ config TEST_LIST_SORT
 
          If unsure, say N.
 
+config TEST_SORT
+       bool "Array-based sort test"
+       depends on DEBUG_KERNEL
+       help
+         This option enables the self-test function of 'sort()' at boot.
+
+         If unsure, say N.
+
 config KPROBES_SANITY_TEST
        bool "Kprobes sanity tests"
        depends on DEBUG_KERNEL
@@ -1790,9 +1785,10 @@ config PERCPU_TEST
          If unsure, say N.
 
 config ATOMIC64_SELFTEST
-       bool "Perform an atomic64_t self-test at boot"
+       tristate "Perform an atomic64_t self-test"
        help
-         Enable this option to test the atomic64_t functions at boot.
+         Enable this option to test the atomic64_t functions at boot or
+         at module load time.
 
          If unsure, say N.
 
index f1a0364af3774460567b10d0acf9a3ceb8aecae8..320ac46a8725b6f9dac0726767a4976a2bd8907c 100644 (file)
@@ -25,9 +25,13 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
         earlycpio.o seq_buf.o siphash.o \
         nmi_backtrace.o nodemask.o win_minmax.o
 
+CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER
+CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER
+
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
-lib-$(CONFIG_HAS_DMA) += dma-noop.o
+lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
+lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
 
 lib-y  += kobject.o klist.o
 obj-y  += lockref.o
@@ -37,7 +41,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
         gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
         bsearch.o find_bit.o llist.o memweight.o kfifo.o \
         percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
-        once.o
+        once.o refcount.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
@@ -50,6 +54,7 @@ obj-$(CONFIG_TEST_KASAN) += test_kasan.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
+obj-$(CONFIG_TEST_SORT) += test_sort.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
@@ -92,6 +97,7 @@ obj-$(CONFIG_CRC16)   += crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
 obj-$(CONFIG_CRC_ITU_T)        += crc-itu-t.o
 obj-$(CONFIG_CRC32)    += crc32.o
+obj-$(CONFIG_CRC32_SELFTEST)   += crc32test.o
 obj-$(CONFIG_CRC7)     += crc7.o
 obj-$(CONFIG_LIBCRC32C)        += libcrc32c.o
 obj-$(CONFIG_CRC8)     += crc8.o
@@ -161,6 +167,7 @@ obj-$(CONFIG_CORDIC) += cordic.o
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
 obj-$(CONFIG_GLOB) += glob.o
+obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
 
 obj-$(CONFIG_MPILIB) += mpi/
 obj-$(CONFIG_SIGNATURE) += digsig.o
index 46042901130f107b4b943487337cca4b764b50f6..fd70c0e0e67314999fcab5c69c9c06dcec527c03 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/atomic.h>
+#include <linux/module.h>
 
 #ifdef CONFIG_X86
 #include <asm/cpufeature.h>    /* for boot_cpu_has below */
@@ -241,7 +242,7 @@ static __init void test_atomic64(void)
        BUG_ON(v.counter != r);
 }
 
-static __init int test_atomics(void)
+static __init int test_atomics_init(void)
 {
        test_atomic();
        test_atomic64();
@@ -264,4 +265,9 @@ static __init int test_atomics(void)
        return 0;
 }
 
-core_initcall(test_atomics);
+static __exit void test_atomics_exit(void) {}
+
+module_init(test_atomics_init);
+module_exit(test_atomics_exit);
+
+MODULE_LICENSE("GPL");
index 7fbd1a112b9d2474c9fb02c7a959d546448935c2..6ddc92bc1460936f170e17e4f5b6fc633d11784a 100644 (file)
@@ -340,827 +340,3 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 }
 #endif
 EXPORT_SYMBOL(crc32_be);
-
-#ifdef CONFIG_CRC32_SELFTEST
-
-/* 4096 random bytes */
-static u8 const __aligned(8) test_buf[] __initconst =
-{
-       0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
-       0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
-       0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60,
-       0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c,
-       0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4,
-       0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a,
-       0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a,
-       0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4,
-       0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9,
-       0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4,
-       0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca,
-       0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61,
-       0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e,
-       0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a,
-       0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f,
-       0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd,
-       0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c,
-       0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88,
-       0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53,
-       0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f,
-       0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4,
-       0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74,
-       0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60,
-       0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09,
-       0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07,
-       0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1,
-       0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f,
-       0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2,
-       0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0,
-       0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95,
-       0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22,
-       0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93,
-       0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86,
-       0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d,
-       0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40,
-       0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b,
-       0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35,
-       0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40,
-       0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63,
-       0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b,
-       0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8,
-       0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72,
-       0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86,
-       0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff,
-       0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed,
-       0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c,
-       0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed,
-       0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30,
-       0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99,
-       0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4,
-       0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80,
-       0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37,
-       0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04,
-       0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e,
-       0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd,
-       0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c,
-       0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09,
-       0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb,
-       0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b,
-       0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53,
-       0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b,
-       0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f,
-       0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff,
-       0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40,
-       0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6,
-       0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb,
-       0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73,
-       0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f,
-       0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4,
-       0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66,
-       0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1,
-       0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80,
-       0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f,
-       0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5,
-       0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7,
-       0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce,
-       0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff,
-       0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48,
-       0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26,
-       0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72,
-       0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88,
-       0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9,
-       0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc,
-       0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8,
-       0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09,
-       0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8,
-       0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c,
-       0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48,
-       0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d,
-       0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f,
-       0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae,
-       0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97,
-       0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8,
-       0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75,
-       0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc,
-       0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27,
-       0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf,
-       0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7,
-       0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0,
-       0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8,
-       0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c,
-       0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44,
-       0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54,
-       0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38,
-       0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f,
-       0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b,
-       0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7,
-       0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef,
-       0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e,
-       0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c,
-       0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c,
-       0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0,
-       0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37,
-       0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf,
-       0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e,
-       0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4,
-       0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60,
-       0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe,
-       0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61,
-       0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3,
-       0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe,
-       0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40,
-       0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec,
-       0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f,
-       0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7,
-       0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79,
-       0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c,
-       0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f,
-       0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21,
-       0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9,
-       0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30,
-       0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b,
-       0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee,
-       0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6,
-       0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3,
-       0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09,
-       0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd,
-       0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f,
-       0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9,
-       0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc,
-       0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59,
-       0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60,
-       0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5,
-       0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1,
-       0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8,
-       0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9,
-       0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab,
-       0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80,
-       0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01,
-       0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e,
-       0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d,
-       0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35,
-       0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38,
-       0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a,
-       0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac,
-       0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca,
-       0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57,
-       0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed,
-       0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20,
-       0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef,
-       0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c,
-       0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a,
-       0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64,
-       0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4,
-       0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54,
-       0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16,
-       0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26,
-       0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc,
-       0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87,
-       0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60,
-       0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d,
-       0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54,
-       0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13,
-       0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59,
-       0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb,
-       0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f,
-       0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15,
-       0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78,
-       0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93,
-       0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e,
-       0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31,
-       0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1,
-       0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37,
-       0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15,
-       0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78,
-       0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f,
-       0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31,
-       0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f,
-       0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc,
-       0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9,
-       0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3,
-       0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe,
-       0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4,
-       0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24,
-       0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1,
-       0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85,
-       0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8,
-       0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09,
-       0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c,
-       0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46,
-       0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5,
-       0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39,
-       0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2,
-       0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc,
-       0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35,
-       0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde,
-       0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80,
-       0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15,
-       0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63,
-       0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58,
-       0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d,
-       0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf,
-       0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12,
-       0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c,
-       0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b,
-       0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1,
-       0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6,
-       0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73,
-       0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9,
-       0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e,
-       0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22,
-       0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb,
-       0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2,
-       0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c,
-       0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c,
-       0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93,
-       0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f,
-       0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38,
-       0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57,
-       0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03,
-       0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90,
-       0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8,
-       0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4,
-       0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36,
-       0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7,
-       0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47,
-       0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46,
-       0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73,
-       0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72,
-       0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23,
-       0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a,
-       0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58,
-       0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f,
-       0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96,
-       0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9,
-       0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b,
-       0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c,
-       0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef,
-       0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3,
-       0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4,
-       0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f,
-       0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17,
-       0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18,
-       0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8,
-       0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98,
-       0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42,
-       0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97,
-       0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97,
-       0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1,
-       0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77,
-       0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb,
-       0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c,
-       0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb,
-       0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56,
-       0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04,
-       0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48,
-       0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe,
-       0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d,
-       0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97,
-       0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8,
-       0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f,
-       0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e,
-       0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca,
-       0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44,
-       0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f,
-       0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6,
-       0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63,
-       0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19,
-       0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58,
-       0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b,
-       0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28,
-       0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf,
-       0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6,
-       0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3,
-       0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe,
-       0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f,
-       0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf,
-       0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9,
-       0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e,
-       0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7,
-       0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70,
-       0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0,
-       0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d,
-       0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4,
-       0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5,
-       0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85,
-       0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc,
-       0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f,
-       0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56,
-       0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb,
-       0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b,
-       0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5,
-       0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03,
-       0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23,
-       0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03,
-       0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87,
-       0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4,
-       0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43,
-       0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11,
-       0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40,
-       0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59,
-       0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9,
-       0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30,
-       0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd,
-       0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45,
-       0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83,
-       0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b,
-       0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5,
-       0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3,
-       0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84,
-       0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8,
-       0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34,
-       0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b,
-       0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31,
-       0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b,
-       0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40,
-       0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b,
-       0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e,
-       0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38,
-       0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb,
-       0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2,
-       0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c,
-       0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1,
-       0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc,
-       0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec,
-       0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34,
-       0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95,
-       0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92,
-       0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f,
-       0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c,
-       0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b,
-       0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c,
-       0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5,
-       0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb,
-       0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4,
-       0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9,
-       0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4,
-       0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41,
-       0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a,
-       0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8,
-       0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06,
-       0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62,
-       0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47,
-       0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4,
-       0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00,
-       0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67,
-       0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81,
-       0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0,
-       0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10,
-       0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79,
-       0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19,
-       0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8,
-       0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1,
-       0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83,
-       0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86,
-       0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55,
-       0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66,
-       0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0,
-       0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49,
-       0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea,
-       0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24,
-       0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e,
-       0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88,
-       0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87,
-       0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34,
-       0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f,
-       0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a,
-       0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a,
-       0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93,
-       0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37,
-       0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38,
-       0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4,
-       0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48,
-       0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65,
-       0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09,
-       0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e,
-       0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5,
-       0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b,
-       0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4,
-       0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e,
-       0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d,
-       0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0,
-       0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5,
-       0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48,
-       0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e,
-       0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f,
-       0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a,
-       0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d,
-       0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14,
-       0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69,
-       0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53,
-       0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56,
-       0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48,
-       0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4,
-       0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26,
-       0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e,
-       0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40,
-       0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7,
-       0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62,
-       0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe,
-       0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf,
-       0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2,
-       0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d,
-       0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32,
-       0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa,
-       0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45,
-       0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04,
-       0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33,
-       0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad,
-       0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4,
-       0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c,
-       0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b,
-       0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36,
-       0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa,
-       0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9,
-       0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28,
-       0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b,
-       0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03,
-       0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d,
-       0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff,
-       0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39,
-       0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b,
-       0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2,
-       0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34,
-       0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe,
-       0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0,
-       0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27,
-       0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86,
-       0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90,
-       0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03,
-       0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb,
-       0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57,
-       0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9,
-       0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5,
-       0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16,
-       0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5,
-       0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a,
-       0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d,
-       0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0,
-       0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f,
-       0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48,
-       0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1,
-       0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09,
-       0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51,
-       0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b,
-       0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf,
-       0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe,
-       0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad,
-       0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e,
-       0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57,
-       0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f,
-       0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef,
-       0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8,
-       0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69,
-       0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d,
-       0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59,
-       0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9,
-       0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d,
-       0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea,
-       0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56,
-       0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4,
-       0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8,
-       0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78,
-       0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f,
-       0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4,
-       0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91,
-       0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f,
-       0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c,
-       0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57,
-       0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4,
-       0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23,
-       0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17,
-       0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66,
-       0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39,
-       0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36,
-       0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00,
-       0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7,
-       0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60,
-       0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c,
-       0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e,
-       0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7,
-       0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a,
-       0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d,
-       0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37,
-       0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82,
-       0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8,
-       0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e,
-       0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85,
-       0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98,
-       0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22,
-       0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7,
-       0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49,
-       0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33,
-       0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc,
-       0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8,
-       0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f,
-       0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3,
-       0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98,
-       0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c,
-       0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6,
-       0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc,
-       0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d,
-};
-
-/* 100 test cases */
-static struct crc_test {
-       u32 crc;        /* random starting crc */
-       u32 start;      /* random 6 bit offset in buf */
-       u32 length;     /* random 11 bit length of test */
-       u32 crc_le;     /* expected crc32_le result */
-       u32 crc_be;     /* expected crc32_be result */
-       u32 crc32c_le;  /* expected crc32c_le result */
-} const test[] __initconst =
-{
-       {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
-       {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
-       {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8},
-       {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a},
-       {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152},
-       {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7},
-       {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc},
-       {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2},
-       {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d},
-       {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5},
-       {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f},
-       {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a},
-       {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8},
-       {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa},
-       {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801},
-       {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597},
-       {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b},
-       {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a},
-       {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d},
-       {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982},
-       {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18},
-       {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7},
-       {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3},
-       {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5},
-       {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59},
-       {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e},
-       {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603},
-       {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060},
-       {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072},
-       {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59},
-       {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213},
-       {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41},
-       {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5},
-       {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2},
-       {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a},
-       {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2},
-       {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b},
-       {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1},
-       {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba},
-       {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62},
-       {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe},
-       {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988},
-       {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be},
-       {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546},
-       {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc},
-       {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69},
-       {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a},
-       {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2},
-       {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd},
-       {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb},
-       {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b},
-       {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76},
-       {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339},
-       {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9},
-       {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548},
-       {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de},
-       {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59},
-       {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b},
-       {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73},
-       {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11},
-       {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c},
-       {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b},
-       {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb},
-       {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc},
-       {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196},
-       {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a},
-       {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de},
-       {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9},
-       {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0},
-       {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60},
-       {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6},
-       {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c},
-       {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73},
-       {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7},
-       {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf},
-       {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83},
-       {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867},
-       {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211},
-       {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2},
-       {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874},
-       {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f},
-       {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff},
-       {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95},
-       {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd},
-       {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06},
-       {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784},
-       {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616},
-       {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c},
-       {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c},
-       {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d},
-       {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d},
-       {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272},
-       {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb},
-       {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b},
-       {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e},
-       {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23},
-       {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672},
-       {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86},
-       {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd},
-       {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48},
-};
-
-#include <linux/time.h>
-
-static int __init crc32c_test(void)
-{
-       int i;
-       int errors = 0;
-       int bytes = 0;
-       u64 nsec;
-       unsigned long flags;
-
-       /* keep static to prevent cache warming code from
-        * getting eliminated by the compiler */
-       static u32 crc;
-
-       /* pre-warm the cache */
-       for (i = 0; i < 100; i++) {
-               bytes += 2*test[i].length;
-
-               crc ^= __crc32c_le(test[i].crc, test_buf +
-                   test[i].start, test[i].length);
-       }
-
-       /* reduce OS noise */
-       local_irq_save(flags);
-       local_irq_disable();
-
-       nsec = ktime_get_ns();
-       for (i = 0; i < 100; i++) {
-               if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
-                   test[i].start, test[i].length))
-                       errors++;
-       }
-       nsec = ktime_get_ns() - nsec;
-
-       local_irq_restore(flags);
-       local_irq_enable();
-
-       pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
-
-       if (errors)
-               pr_warn("crc32c: %d self tests failed\n", errors);
-       else {
-               pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n",
-                       bytes, nsec);
-       }
-
-       return 0;
-}
-
-static int __init crc32c_combine_test(void)
-{
-       int i, j;
-       int errors = 0, runs = 0;
-
-       for (i = 0; i < 10; i++) {
-               u32 crc_full;
-
-               crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start,
-                                      test[i].length);
-               for (j = 0; j <= test[i].length; ++j) {
-                       u32 crc1, crc2;
-                       u32 len1 = j, len2 = test[i].length - j;
-
-                       crc1 = __crc32c_le(test[i].crc, test_buf +
-                                          test[i].start, len1);
-                       crc2 = __crc32c_le(0, test_buf + test[i].start +
-                                          len1, len2);
-
-                       if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) &&
-                             crc_full == test[i].crc32c_le))
-                               errors++;
-                       runs++;
-                       cond_resched();
-               }
-       }
-
-       if (errors)
-               pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs);
-       else
-               pr_info("crc32c_combine: %d self tests passed\n", runs);
-
-       return 0;
-}
-
-static int __init crc32_test(void)
-{
-       int i;
-       int errors = 0;
-       int bytes = 0;
-       u64 nsec;
-       unsigned long flags;
-
-       /* keep static to prevent cache warming code from
-        * getting eliminated by the compiler */
-       static u32 crc;
-
-       /* pre-warm the cache */
-       for (i = 0; i < 100; i++) {
-               bytes += 2*test[i].length;
-
-               crc ^= crc32_le(test[i].crc, test_buf +
-                   test[i].start, test[i].length);
-
-               crc ^= crc32_be(test[i].crc, test_buf +
-                   test[i].start, test[i].length);
-       }
-
-       /* reduce OS noise */
-       local_irq_save(flags);
-       local_irq_disable();
-
-       nsec = ktime_get_ns();
-       for (i = 0; i < 100; i++) {
-               if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
-                   test[i].start, test[i].length))
-                       errors++;
-
-               if (test[i].crc_be != crc32_be(test[i].crc, test_buf +
-                   test[i].start, test[i].length))
-                       errors++;
-       }
-       nsec = ktime_get_ns() - nsec;
-
-       local_irq_restore(flags);
-       local_irq_enable();
-
-       pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
-                CRC_LE_BITS, CRC_BE_BITS);
-
-       if (errors)
-               pr_warn("crc32: %d self tests failed\n", errors);
-       else {
-               pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n",
-                       bytes, nsec);
-       }
-
-       return 0;
-}
-
-static int __init crc32_combine_test(void)
-{
-       int i, j;
-       int errors = 0, runs = 0;
-
-       for (i = 0; i < 10; i++) {
-               u32 crc_full;
-
-               crc_full = crc32_le(test[i].crc, test_buf + test[i].start,
-                                   test[i].length);
-               for (j = 0; j <= test[i].length; ++j) {
-                       u32 crc1, crc2;
-                       u32 len1 = j, len2 = test[i].length - j;
-
-                       crc1 = crc32_le(test[i].crc, test_buf +
-                                       test[i].start, len1);
-                       crc2 = crc32_le(0, test_buf + test[i].start +
-                                       len1, len2);
-
-                       if (!(crc_full == crc32_le_combine(crc1, crc2, len2) &&
-                             crc_full == test[i].crc_le))
-                               errors++;
-                       runs++;
-                       cond_resched();
-               }
-       }
-
-       if (errors)
-               pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs);
-       else
-               pr_info("crc32_combine: %d self tests passed\n", runs);
-
-       return 0;
-}
-
-static int __init crc32test_init(void)
-{
-       crc32_test();
-       crc32c_test();
-
-       crc32_combine_test();
-       crc32c_combine_test();
-
-       return 0;
-}
-
-static void __exit crc32_exit(void)
-{
-}
-
-module_init(crc32test_init);
-module_exit(crc32_exit);
-#endif /* CONFIG_CRC32_SELFTEST */
diff --git a/lib/crc32test.c b/lib/crc32test.c
new file mode 100644 (file)
index 0000000..97d6a57
--- /dev/null
@@ -0,0 +1,856 @@
+/*
+ * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin
+ * cleaned up code to current version of sparse and added the slicing-by-8
+ * algorithm to the closely similar existing slicing-by-4 algorithm.
+ *
+ * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com>
+ * Nicer crc32 functions/docs submitted by linux@horizon.com.  Thanks!
+ * Code was from the public domain, copyright abandoned.  Code was
+ * subsequently included in the kernel, thus was re-licensed under the
+ * GNU GPL v2.
+ *
+ * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com>
+ * Same crc32 function was used in 5 other places in the kernel.
+ * I made one version, and deleted the others.
+ * There are various incantations of crc32().  Some use a seed of 0 or ~0.
+ * Some xor at the end with ~0.  The generic crc32() function takes
+ * seed as an argument, and doesn't xor at the end.  Then individual
+ * users can do whatever they need.
+ *   drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
+ *   fs/jffs2 uses seed 0, doesn't xor with ~0.
+ *   fs/partitions/efi.c uses seed ~0, xor's with ~0.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/crc32.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include "crc32defs.h"
+
+/* 4096 random bytes */
+static u8 const __aligned(8) test_buf[] __initconst =
+{
+       0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
+       0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
+       0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60,
+       0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c,
+       0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4,
+       0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a,
+       0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a,
+       0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4,
+       0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9,
+       0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4,
+       0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca,
+       0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61,
+       0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e,
+       0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a,
+       0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f,
+       0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd,
+       0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c,
+       0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88,
+       0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53,
+       0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f,
+       0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4,
+       0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74,
+       0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60,
+       0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09,
+       0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07,
+       0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1,
+       0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f,
+       0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2,
+       0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0,
+       0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95,
+       0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22,
+       0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93,
+       0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86,
+       0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d,
+       0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40,
+       0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b,
+       0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35,
+       0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40,
+       0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63,
+       0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b,
+       0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8,
+       0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72,
+       0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86,
+       0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff,
+       0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed,
+       0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c,
+       0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed,
+       0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30,
+       0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99,
+       0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4,
+       0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80,
+       0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37,
+       0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04,
+       0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e,
+       0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd,
+       0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c,
+       0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09,
+       0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb,
+       0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b,
+       0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53,
+       0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b,
+       0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f,
+       0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff,
+       0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40,
+       0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6,
+       0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb,
+       0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73,
+       0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f,
+       0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4,
+       0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66,
+       0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1,
+       0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80,
+       0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f,
+       0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5,
+       0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7,
+       0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce,
+       0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff,
+       0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48,
+       0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26,
+       0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72,
+       0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88,
+       0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9,
+       0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc,
+       0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8,
+       0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09,
+       0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8,
+       0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c,
+       0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48,
+       0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d,
+       0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f,
+       0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae,
+       0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97,
+       0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8,
+       0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75,
+       0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc,
+       0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27,
+       0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf,
+       0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7,
+       0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0,
+       0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8,
+       0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c,
+       0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44,
+       0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54,
+       0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38,
+       0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f,
+       0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b,
+       0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7,
+       0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef,
+       0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e,
+       0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c,
+       0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c,
+       0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0,
+       0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37,
+       0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf,
+       0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e,
+       0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4,
+       0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60,
+       0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe,
+       0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61,
+       0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3,
+       0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe,
+       0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40,
+       0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec,
+       0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f,
+       0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7,
+       0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79,
+       0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c,
+       0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f,
+       0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21,
+       0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9,
+       0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30,
+       0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b,
+       0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee,
+       0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6,
+       0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3,
+       0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09,
+       0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd,
+       0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f,
+       0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9,
+       0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc,
+       0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59,
+       0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60,
+       0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5,
+       0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1,
+       0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8,
+       0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9,
+       0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab,
+       0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80,
+       0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01,
+       0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e,
+       0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d,
+       0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35,
+       0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38,
+       0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a,
+       0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac,
+       0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca,
+       0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57,
+       0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed,
+       0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20,
+       0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef,
+       0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c,
+       0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a,
+       0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64,
+       0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4,
+       0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54,
+       0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16,
+       0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26,
+       0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc,
+       0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87,
+       0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60,
+       0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d,
+       0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54,
+       0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13,
+       0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59,
+       0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb,
+       0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f,
+       0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15,
+       0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78,
+       0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93,
+       0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e,
+       0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31,
+       0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1,
+       0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37,
+       0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15,
+       0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78,
+       0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f,
+       0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31,
+       0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f,
+       0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc,
+       0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9,
+       0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3,
+       0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe,
+       0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4,
+       0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24,
+       0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1,
+       0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85,
+       0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8,
+       0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09,
+       0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c,
+       0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46,
+       0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5,
+       0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39,
+       0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2,
+       0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc,
+       0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35,
+       0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde,
+       0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80,
+       0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15,
+       0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63,
+       0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58,
+       0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d,
+       0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf,
+       0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12,
+       0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c,
+       0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b,
+       0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1,
+       0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6,
+       0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73,
+       0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9,
+       0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e,
+       0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22,
+       0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb,
+       0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2,
+       0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c,
+       0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c,
+       0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93,
+       0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f,
+       0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38,
+       0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57,
+       0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03,
+       0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90,
+       0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8,
+       0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4,
+       0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36,
+       0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7,
+       0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47,
+       0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46,
+       0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73,
+       0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72,
+       0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23,
+       0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a,
+       0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58,
+       0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f,
+       0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96,
+       0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9,
+       0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b,
+       0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c,
+       0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef,
+       0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3,
+       0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4,
+       0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f,
+       0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17,
+       0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18,
+       0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8,
+       0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98,
+       0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42,
+       0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97,
+       0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97,
+       0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1,
+       0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77,
+       0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb,
+       0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c,
+       0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb,
+       0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56,
+       0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04,
+       0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48,
+       0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe,
+       0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d,
+       0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97,
+       0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8,
+       0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f,
+       0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e,
+       0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca,
+       0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44,
+       0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f,
+       0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6,
+       0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63,
+       0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19,
+       0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58,
+       0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b,
+       0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28,
+       0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf,
+       0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6,
+       0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3,
+       0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe,
+       0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f,
+       0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf,
+       0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9,
+       0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e,
+       0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7,
+       0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70,
+       0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0,
+       0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d,
+       0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4,
+       0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5,
+       0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85,
+       0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc,
+       0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f,
+       0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56,
+       0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb,
+       0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b,
+       0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5,
+       0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03,
+       0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23,
+       0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03,
+       0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87,
+       0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4,
+       0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43,
+       0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11,
+       0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40,
+       0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59,
+       0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9,
+       0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30,
+       0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd,
+       0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45,
+       0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83,
+       0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b,
+       0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5,
+       0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3,
+       0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84,
+       0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8,
+       0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34,
+       0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b,
+       0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31,
+       0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b,
+       0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40,
+       0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b,
+       0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e,
+       0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38,
+       0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb,
+       0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2,
+       0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c,
+       0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1,
+       0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc,
+       0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec,
+       0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34,
+       0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95,
+       0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92,
+       0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f,
+       0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c,
+       0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b,
+       0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c,
+       0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5,
+       0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb,
+       0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4,
+       0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9,
+       0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4,
+       0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41,
+       0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a,
+       0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8,
+       0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06,
+       0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62,
+       0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47,
+       0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4,
+       0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00,
+       0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67,
+       0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81,
+       0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0,
+       0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10,
+       0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79,
+       0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19,
+       0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8,
+       0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1,
+       0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83,
+       0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86,
+       0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55,
+       0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66,
+       0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0,
+       0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49,
+       0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea,
+       0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24,
+       0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e,
+       0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88,
+       0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87,
+       0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34,
+       0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f,
+       0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a,
+       0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a,
+       0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93,
+       0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37,
+       0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38,
+       0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4,
+       0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48,
+       0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65,
+       0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09,
+       0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e,
+       0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5,
+       0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b,
+       0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4,
+       0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e,
+       0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d,
+       0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0,
+       0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5,
+       0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48,
+       0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e,
+       0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f,
+       0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a,
+       0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d,
+       0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14,
+       0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69,
+       0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53,
+       0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56,
+       0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48,
+       0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4,
+       0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26,
+       0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e,
+       0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40,
+       0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7,
+       0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62,
+       0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe,
+       0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf,
+       0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2,
+       0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d,
+       0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32,
+       0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa,
+       0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45,
+       0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04,
+       0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33,
+       0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad,
+       0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4,
+       0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c,
+       0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b,
+       0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36,
+       0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa,
+       0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9,
+       0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28,
+       0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b,
+       0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03,
+       0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d,
+       0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff,
+       0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39,
+       0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b,
+       0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2,
+       0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34,
+       0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe,
+       0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0,
+       0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27,
+       0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86,
+       0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90,
+       0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03,
+       0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb,
+       0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57,
+       0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9,
+       0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5,
+       0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16,
+       0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5,
+       0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a,
+       0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d,
+       0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0,
+       0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f,
+       0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48,
+       0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1,
+       0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09,
+       0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51,
+       0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b,
+       0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf,
+       0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe,
+       0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad,
+       0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e,
+       0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57,
+       0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f,
+       0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef,
+       0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8,
+       0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69,
+       0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d,
+       0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59,
+       0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9,
+       0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d,
+       0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea,
+       0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56,
+       0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4,
+       0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8,
+       0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78,
+       0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f,
+       0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4,
+       0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91,
+       0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f,
+       0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c,
+       0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57,
+       0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4,
+       0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23,
+       0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17,
+       0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66,
+       0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39,
+       0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36,
+       0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00,
+       0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7,
+       0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60,
+       0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c,
+       0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e,
+       0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7,
+       0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a,
+       0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d,
+       0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37,
+       0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82,
+       0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8,
+       0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e,
+       0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85,
+       0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98,
+       0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22,
+       0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7,
+       0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49,
+       0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33,
+       0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc,
+       0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8,
+       0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f,
+       0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3,
+       0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98,
+       0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c,
+       0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6,
+       0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc,
+       0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d,
+};
+
+/* 100 test cases */
+static struct crc_test {
+       u32 crc;        /* random starting crc */
+       u32 start;      /* random 6 bit offset in buf */
+       u32 length;     /* random 11 bit length of test */
+       u32 crc_le;     /* expected crc32_le result */
+       u32 crc_be;     /* expected crc32_be result */
+       u32 crc32c_le;  /* expected crc32c_le result */
+} const test[] __initconst =
+{
+       {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
+       {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
+       {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8},
+       {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a},
+       {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152},
+       {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7},
+       {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc},
+       {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2},
+       {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d},
+       {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5},
+       {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f},
+       {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a},
+       {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8},
+       {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa},
+       {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801},
+       {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597},
+       {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b},
+       {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a},
+       {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d},
+       {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982},
+       {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18},
+       {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7},
+       {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3},
+       {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5},
+       {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59},
+       {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e},
+       {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603},
+       {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060},
+       {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072},
+       {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59},
+       {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213},
+       {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41},
+       {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5},
+       {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2},
+       {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a},
+       {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2},
+       {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b},
+       {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1},
+       {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba},
+       {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62},
+       {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe},
+       {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988},
+       {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be},
+       {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546},
+       {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc},
+       {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69},
+       {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a},
+       {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2},
+       {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd},
+       {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb},
+       {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b},
+       {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76},
+       {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339},
+       {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9},
+       {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548},
+       {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de},
+       {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59},
+       {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b},
+       {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73},
+       {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11},
+       {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c},
+       {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b},
+       {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb},
+       {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc},
+       {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196},
+       {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a},
+       {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de},
+       {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9},
+       {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0},
+       {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60},
+       {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6},
+       {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c},
+       {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73},
+       {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7},
+       {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf},
+       {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83},
+       {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867},
+       {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211},
+       {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2},
+       {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874},
+       {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f},
+       {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff},
+       {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95},
+       {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd},
+       {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06},
+       {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784},
+       {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616},
+       {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c},
+       {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c},
+       {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d},
+       {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d},
+       {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272},
+       {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb},
+       {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b},
+       {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e},
+       {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23},
+       {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672},
+       {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86},
+       {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd},
+       {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48},
+};
+
+#include <linux/time.h>
+
+static int __init crc32c_test(void)
+{
+       int i;
+       int errors = 0;
+       int bytes = 0;
+       u64 nsec;
+       unsigned long flags;
+
+       /* keep static to prevent cache warming code from
+        * getting eliminated by the compiler */
+       static u32 crc;
+
+       /* pre-warm the cache */
+       for (i = 0; i < 100; i++) {
+               bytes += 2*test[i].length;
+
+               crc ^= __crc32c_le(test[i].crc, test_buf +
+                   test[i].start, test[i].length);
+       }
+
+       /* reduce OS noise */
+       local_irq_save(flags);
+       local_irq_disable();
+
+       nsec = ktime_get_ns();
+       for (i = 0; i < 100; i++) {
+               if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
+                   test[i].start, test[i].length))
+                       errors++;
+       }
+       nsec = ktime_get_ns() - nsec;
+
+       local_irq_restore(flags);
+       local_irq_enable();
+
+       pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
+
+       if (errors)
+               pr_warn("crc32c: %d self tests failed\n", errors);
+       else {
+               pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n",
+                       bytes, nsec);
+       }
+
+       return 0;
+}
+
+static int __init crc32c_combine_test(void)
+{
+       int i, j;
+       int errors = 0, runs = 0;
+
+       for (i = 0; i < 10; i++) {
+               u32 crc_full;
+
+               crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start,
+                                      test[i].length);
+               for (j = 0; j <= test[i].length; ++j) {
+                       u32 crc1, crc2;
+                       u32 len1 = j, len2 = test[i].length - j;
+
+                       crc1 = __crc32c_le(test[i].crc, test_buf +
+                                          test[i].start, len1);
+                       crc2 = __crc32c_le(0, test_buf + test[i].start +
+                                          len1, len2);
+
+                       if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) &&
+                             crc_full == test[i].crc32c_le))
+                               errors++;
+                       runs++;
+                       cond_resched();
+               }
+       }
+
+       if (errors)
+               pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs);
+       else
+               pr_info("crc32c_combine: %d self tests passed\n", runs);
+
+       return 0;
+}
+
+static int __init crc32_test(void)
+{
+       int i;
+       int errors = 0;
+       int bytes = 0;
+       u64 nsec;
+       unsigned long flags;
+
+       /* keep static to prevent cache warming code from
+        * getting eliminated by the compiler */
+       static u32 crc;
+
+       /* pre-warm the cache */
+       for (i = 0; i < 100; i++) {
+               bytes += 2*test[i].length;
+
+               crc ^= crc32_le(test[i].crc, test_buf +
+                   test[i].start, test[i].length);
+
+               crc ^= crc32_be(test[i].crc, test_buf +
+                   test[i].start, test[i].length);
+       }
+
+       /* reduce OS noise */
+       local_irq_save(flags);
+       local_irq_disable();
+
+       nsec = ktime_get_ns();
+       for (i = 0; i < 100; i++) {
+               if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
+                   test[i].start, test[i].length))
+                       errors++;
+
+               if (test[i].crc_be != crc32_be(test[i].crc, test_buf +
+                   test[i].start, test[i].length))
+                       errors++;
+       }
+       nsec = ktime_get_ns() - nsec;
+
+       local_irq_restore(flags);
+       local_irq_enable();
+
+       pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
+                CRC_LE_BITS, CRC_BE_BITS);
+
+       if (errors)
+               pr_warn("crc32: %d self tests failed\n", errors);
+       else {
+               pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n",
+                       bytes, nsec);
+       }
+
+       return 0;
+}
+
+static int __init crc32_combine_test(void)
+{
+       int i, j;
+       int errors = 0, runs = 0;
+
+       for (i = 0; i < 10; i++) {
+               u32 crc_full;
+
+               crc_full = crc32_le(test[i].crc, test_buf + test[i].start,
+                                   test[i].length);
+               for (j = 0; j <= test[i].length; ++j) {
+                       u32 crc1, crc2;
+                       u32 len1 = j, len2 = test[i].length - j;
+
+                       crc1 = crc32_le(test[i].crc, test_buf +
+                                       test[i].start, len1);
+                       crc2 = crc32_le(0, test_buf + test[i].start +
+                                       len1, len2);
+
+                       if (!(crc_full == crc32_le_combine(crc1, crc2, len2) &&
+                             crc_full == test[i].crc_le))
+                               errors++;
+                       runs++;
+                       cond_resched();
+               }
+       }
+
+       if (errors)
+               pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs);
+       else
+               pr_info("crc32_combine: %d self tests passed\n", runs);
+
+       return 0;
+}
+
+static int __init crc32test_init(void)
+{
+       crc32_test();
+       crc32c_test();
+
+       crc32_combine_test();
+       crc32c_combine_test();
+
+       return 0;
+}
+
+static void __exit crc32_exit(void)
+{
+}
+
+module_init(crc32test_init);
+module_exit(crc32_exit);
+
+MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
+MODULE_DESCRIPTION("CRC32 selftest");
+MODULE_LICENSE("GPL");
index 036fc882cd72561a2a96b39314078676fe94ff90..1b0baf3008ea3611bc257c7b4dd814c8645b8b02 100644 (file)
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
                error("NULL input pointer and missing fill function");
                goto exit_1;
        } else {
-               inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+               inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
                if (!inp) {
                        error("Could not allocate input buffer");
                        goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
                        inp += 4;
                        size -= 4;
                } else {
-                       if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+                       if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
                                error("chunk length is longer than allocated");
                                goto exit_2;
                        }
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
                        out_len -= dest_len;
                } else
                        dest_len = out_len;
-               ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+               ret = LZ4_decompress_fast(inp, outp, dest_len);
+               chunksize = ret;
 #else
                dest_len = uncomp_chunksize;
-               ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-                               &dest_len);
+
+               ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+               dest_len = ret;
 #endif
                if (ret < 0) {
                        error("Decoding failed");
index 3d766e78fbe26663e0de166fbe26d9b21b19ca89..de26c8b68f344857a4a066b03ba937825c03b6ca 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     lib/dma-noop.c
  *
- * Simple DMA noop-ops that map 1:1 with memory
+ * DMA operations that map to physical addresses without flushing memory.
  */
 #include <linux/export.h>
 #include <linux/mm.h>
@@ -64,7 +64,7 @@ static int dma_noop_supported(struct device *dev, u64 mask)
        return 1;
 }
 
-struct dma_map_ops dma_noop_ops = {
+const struct dma_map_ops dma_noop_ops = {
        .alloc                  = dma_noop_alloc,
        .free                   = dma_noop_free,
        .map_page               = dma_noop_map_page,
diff --git a/lib/dma-virt.c b/lib/dma-virt.c
new file mode 100644 (file)
index 0000000..dcd4df1
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ *     lib/dma-virt.c
+ *
+ * DMA operations that map to virtual addresses without flushing memory.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+
+static void *dma_virt_alloc(struct device *dev, size_t size,
+                           dma_addr_t *dma_handle, gfp_t gfp,
+                           unsigned long attrs)
+{
+       void *ret;
+
+       ret = (void *)__get_free_pages(gfp, get_order(size));
+       if (ret)
+               *dma_handle = (uintptr_t)ret;
+       return ret;
+}
+
+static void dma_virt_free(struct device *dev, size_t size,
+                         void *cpu_addr, dma_addr_t dma_addr,
+                         unsigned long attrs)
+{
+       free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
+                                   unsigned long offset, size_t size,
+                                   enum dma_data_direction dir,
+                                   unsigned long attrs)
+{
+       return (uintptr_t)(page_address(page) + offset);
+}
+
+static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
+                          int nents, enum dma_data_direction dir,
+                          unsigned long attrs)
+{
+       int i;
+       struct scatterlist *sg;
+
+       for_each_sg(sgl, sg, nents, i) {
+               BUG_ON(!sg_page(sg));
+               sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
+               sg_dma_len(sg) = sg->length;
+       }
+
+       return nents;
+}
+
+static int dma_virt_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+       return false;
+}
+
+static int dma_virt_supported(struct device *dev, u64 mask)
+{
+       return true;
+}
+
+const struct dma_map_ops dma_virt_ops = {
+       .alloc                  = dma_virt_alloc,
+       .free                   = dma_virt_free,
+       .map_page               = dma_virt_map_page,
+       .map_sg                 = dma_virt_map_sg,
+       .mapping_error          = dma_virt_mapping_error,
+       .dma_supported          = dma_virt_supported,
+};
+EXPORT_SYMBOL(dma_virt_ops);
index 18072ea9c20ebba59aaa3fdf69ee6bfc1db395b6..6ed74f78380ce1f6e69568fad0c28d685a5f2048 100644 (file)
@@ -33,7 +33,7 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 {
        unsigned long tmp;
 
-       if (!nbits || start >= nbits)
+       if (unlikely(start >= nbits))
                return nbits;
 
        tmp = addr[start / BITS_PER_LONG] ^ invert;
@@ -151,7 +151,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
 {
        unsigned long tmp;
 
-       if (!nbits || start >= nbits)
+       if (unlikely(start >= nbits))
                return nbits;
 
        tmp = addr[start / BITS_PER_LONG] ^ invert;
index e77dfe00de363cd31d258a34855e88bb1c2a2fde..8fa0791e8a1e54a5e886228c1ea84572dd3f5e3d 100644 (file)
@@ -87,6 +87,14 @@ config FONT_6x10
          embedded devices with a 320x240 screen, to get a reasonable number
          of characters (53x24) that are still at a readable size.
 
+config FONT_10x18
+       bool "console 10x18 font (not supported by all drivers)" if FONTS
+       depends on FRAMEBUFFER_CONSOLE
+       help
+         This is a high resolution console font for machines with very
+         big letters. It fits between the sun 12x22 and the normal 8x16 font.
+         If other fonts are too big or too small for you, say Y, otherwise say N.
+
 config FONT_SUN8x16
        bool "Sparc console 8x16 font"
        depends on FRAMEBUFFER_CONSOLE && (!SPARC && FONTS || SPARC)
@@ -101,14 +109,6 @@ config FONT_SUN12x22
          big letters (like the letters used in the SPARC PROM). If the
          standard font is unreadable for you, say Y, otherwise say N.
 
-config FONT_10x18
-       bool "console 10x18 font (not supported by all drivers)" if FONTS
-       depends on FRAMEBUFFER_CONSOLE
-       help
-         This is a high resolution console font for machines with very
-         big letters. It fits between the sun 12x22 and the normal 8x16 font.
-         If other fonts are too big or too small for you, say Y, otherwise say N.
-
 config FONT_AUTOSELECT
        def_bool y
        depends on !FONT_8x8
index 500fc80d23e107651d8075603a343e4c13629a64..0ba3ea86b5466c102d588327dad3e7e8df092f5d 100644 (file)
@@ -121,167 +121,3 @@ backtrack:
        }
 }
 EXPORT_SYMBOL(glob_match);
-
-
-#ifdef CONFIG_GLOB_SELFTEST
-
-#include <linux/printk.h>
-#include <linux/moduleparam.h>
-
-/* Boot with "glob.verbose=1" to show successful tests, too */
-static bool verbose = false;
-module_param(verbose, bool, 0);
-
-struct glob_test {
-       char const *pat, *str;
-       bool expected;
-};
-
-static bool __pure __init test(char const *pat, char const *str, bool expected)
-{
-       bool match = glob_match(pat, str);
-       bool success = match == expected;
-
-       /* Can't get string literals into a particular section, so... */
-       static char const msg_error[] __initconst =
-               KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
-       static char const msg_ok[] __initconst =
-               KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
-       static char const mismatch[] __initconst = "mismatch";
-       char const *message;
-
-       if (!success)
-               message = msg_error;
-       else if (verbose)
-               message = msg_ok;
-       else
-               return success;
-
-       printk(message, pat, str, mismatch + 3*match);
-       return success;
-}
-
-/*
- * The tests are all jammed together in one array to make it simpler
- * to place that array in the .init.rodata section.  The obvious
- * "array of structures containing char *" has no way to force the
- * pointed-to strings to be in a particular section.
- *
- * Anyway, a test consists of:
- * 1. Expected glob_match result: '1' or '0'.
- * 2. Pattern to match: null-terminated string
- * 3. String to match against: null-terminated string
- *
- * The list of tests is terminated with a final '\0' instead of
- * a glob_match result character.
- */
-static char const glob_tests[] __initconst =
-       /* Some basic tests */
-       "1" "a\0" "a\0"
-       "0" "a\0" "b\0"
-       "0" "a\0" "aa\0"
-       "0" "a\0" "\0"
-       "1" "\0" "\0"
-       "0" "\0" "a\0"
-       /* Simple character class tests */
-       "1" "[a]\0" "a\0"
-       "0" "[a]\0" "b\0"
-       "0" "[!a]\0" "a\0"
-       "1" "[!a]\0" "b\0"
-       "1" "[ab]\0" "a\0"
-       "1" "[ab]\0" "b\0"
-       "0" "[ab]\0" "c\0"
-       "1" "[!ab]\0" "c\0"
-       "1" "[a-c]\0" "b\0"
-       "0" "[a-c]\0" "d\0"
-       /* Corner cases in character class parsing */
-       "1" "[a-c-e-g]\0" "-\0"
-       "0" "[a-c-e-g]\0" "d\0"
-       "1" "[a-c-e-g]\0" "f\0"
-       "1" "[]a-ceg-ik[]\0" "a\0"
-       "1" "[]a-ceg-ik[]\0" "]\0"
-       "1" "[]a-ceg-ik[]\0" "[\0"
-       "1" "[]a-ceg-ik[]\0" "h\0"
-       "0" "[]a-ceg-ik[]\0" "f\0"
-       "0" "[!]a-ceg-ik[]\0" "h\0"
-       "0" "[!]a-ceg-ik[]\0" "]\0"
-       "1" "[!]a-ceg-ik[]\0" "f\0"
-       /* Simple wild cards */
-       "1" "?\0" "a\0"
-       "0" "?\0" "aa\0"
-       "0" "??\0" "a\0"
-       "1" "?x?\0" "axb\0"
-       "0" "?x?\0" "abx\0"
-       "0" "?x?\0" "xab\0"
-       /* Asterisk wild cards (backtracking) */
-       "0" "*??\0" "a\0"
-       "1" "*??\0" "ab\0"
-       "1" "*??\0" "abc\0"
-       "1" "*??\0" "abcd\0"
-       "0" "??*\0" "a\0"
-       "1" "??*\0" "ab\0"
-       "1" "??*\0" "abc\0"
-       "1" "??*\0" "abcd\0"
-       "0" "?*?\0" "a\0"
-       "1" "?*?\0" "ab\0"
-       "1" "?*?\0" "abc\0"
-       "1" "?*?\0" "abcd\0"
-       "1" "*b\0" "b\0"
-       "1" "*b\0" "ab\0"
-       "0" "*b\0" "ba\0"
-       "1" "*b\0" "bb\0"
-       "1" "*b\0" "abb\0"
-       "1" "*b\0" "bab\0"
-       "1" "*bc\0" "abbc\0"
-       "1" "*bc\0" "bc\0"
-       "1" "*bc\0" "bbc\0"
-       "1" "*bc\0" "bcbc\0"
-       /* Multiple asterisks (complex backtracking) */
-       "1" "*ac*\0" "abacadaeafag\0"
-       "1" "*ac*ae*ag*\0" "abacadaeafag\0"
-       "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
-       "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
-       "1" "*abcd*\0" "abcabcabcabcdefg\0"
-       "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
-       "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
-       "0" "*abcd*\0" "abcabcabcabcefg\0"
-       "0" "*ab*cd*\0" "abcabcabcabcefg\0";
-
-static int __init glob_init(void)
-{
-       unsigned successes = 0;
-       unsigned n = 0;
-       char const *p = glob_tests;
-       static char const message[] __initconst =
-               KERN_INFO "glob: %u self-tests passed, %u failed\n";
-
-       /*
-        * Tests are jammed together in a string.  The first byte is '1'
-        * or '0' to indicate the expected outcome, or '\0' to indicate the
-        * end of the tests.  Then come two null-terminated strings: the
-        * pattern and the string to match it against.
-        */
-       while (*p) {
-               bool expected = *p++ & 1;
-               char const *pat = p;
-
-               p += strlen(p) + 1;
-               successes += test(pat, p, expected);
-               p += strlen(p) + 1;
-               n++;
-       }
-
-       n -= successes;
-       printk(message, successes, n);
-
-       /* What's the errno for "kernel bug detected"?  Guess... */
-       return n ? -ECANCELED : 0;
-}
-
-/* We need a dummy exit function to allow unload */
-static void __exit glob_fini(void) { }
-
-module_init(glob_init);
-module_exit(glob_fini);
-
-#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/globtest.c b/lib/globtest.c
new file mode 100644 (file)
index 0000000..d8e97d4
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * Extracted fronm glob.c
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/glob.h>
+#include <linux/printk.h>
+
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+
+struct glob_test {
+       char const *pat, *str;
+       bool expected;
+};
+
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+       bool match = glob_match(pat, str);
+       bool success = match == expected;
+
+       /* Can't get string literals into a particular section, so... */
+       static char const msg_error[] __initconst =
+               KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+       static char const msg_ok[] __initconst =
+               KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+       static char const mismatch[] __initconst = "mismatch";
+       char const *message;
+
+       if (!success)
+               message = msg_error;
+       else if (verbose)
+               message = msg_ok;
+       else
+               return success;
+
+       printk(message, pat, str, mismatch + 3*match);
+       return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section.  The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+       /* Some basic tests */
+       "1" "a\0" "a\0"
+       "0" "a\0" "b\0"
+       "0" "a\0" "aa\0"
+       "0" "a\0" "\0"
+       "1" "\0" "\0"
+       "0" "\0" "a\0"
+       /* Simple character class tests */
+       "1" "[a]\0" "a\0"
+       "0" "[a]\0" "b\0"
+       "0" "[!a]\0" "a\0"
+       "1" "[!a]\0" "b\0"
+       "1" "[ab]\0" "a\0"
+       "1" "[ab]\0" "b\0"
+       "0" "[ab]\0" "c\0"
+       "1" "[!ab]\0" "c\0"
+       "1" "[a-c]\0" "b\0"
+       "0" "[a-c]\0" "d\0"
+       /* Corner cases in character class parsing */
+       "1" "[a-c-e-g]\0" "-\0"
+       "0" "[a-c-e-g]\0" "d\0"
+       "1" "[a-c-e-g]\0" "f\0"
+       "1" "[]a-ceg-ik[]\0" "a\0"
+       "1" "[]a-ceg-ik[]\0" "]\0"
+       "1" "[]a-ceg-ik[]\0" "[\0"
+       "1" "[]a-ceg-ik[]\0" "h\0"
+       "0" "[]a-ceg-ik[]\0" "f\0"
+       "0" "[!]a-ceg-ik[]\0" "h\0"
+       "0" "[!]a-ceg-ik[]\0" "]\0"
+       "1" "[!]a-ceg-ik[]\0" "f\0"
+       /* Simple wild cards */
+       "1" "?\0" "a\0"
+       "0" "?\0" "aa\0"
+       "0" "??\0" "a\0"
+       "1" "?x?\0" "axb\0"
+       "0" "?x?\0" "abx\0"
+       "0" "?x?\0" "xab\0"
+       /* Asterisk wild cards (backtracking) */
+       "0" "*??\0" "a\0"
+       "1" "*??\0" "ab\0"
+       "1" "*??\0" "abc\0"
+       "1" "*??\0" "abcd\0"
+       "0" "??*\0" "a\0"
+       "1" "??*\0" "ab\0"
+       "1" "??*\0" "abc\0"
+       "1" "??*\0" "abcd\0"
+       "0" "?*?\0" "a\0"
+       "1" "?*?\0" "ab\0"
+       "1" "?*?\0" "abc\0"
+       "1" "?*?\0" "abcd\0"
+       "1" "*b\0" "b\0"
+       "1" "*b\0" "ab\0"
+       "0" "*b\0" "ba\0"
+       "1" "*b\0" "bb\0"
+       "1" "*b\0" "abb\0"
+       "1" "*b\0" "bab\0"
+       "1" "*bc\0" "abbc\0"
+       "1" "*bc\0" "bc\0"
+       "1" "*bc\0" "bbc\0"
+       "1" "*bc\0" "bcbc\0"
+       /* Multiple asterisks (complex backtracking) */
+       "1" "*ac*\0" "abacadaeafag\0"
+       "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+       "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+       "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+       "1" "*abcd*\0" "abcabcabcabcdefg\0"
+       "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+       "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+       "0" "*abcd*\0" "abcabcabcabcefg\0"
+       "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+static int __init glob_init(void)
+{
+       unsigned successes = 0;
+       unsigned n = 0;
+       char const *p = glob_tests;
+       static char const message[] __initconst =
+               KERN_INFO "glob: %u self-tests passed, %u failed\n";
+
+       /*
+        * Tests are jammed together in a string.  The first byte is '1'
+        * or '0' to indicate the expected outcome, or '\0' to indicate the
+        * end of the tests.  Then come two null-terminated strings: the
+        * pattern and the string to match it against.
+        */
+       while (*p) {
+               bool expected = *p++ & 1;
+               char const *pat = p;
+
+               p += strlen(p) + 1;
+               successes += test(pat, p, expected);
+               p += strlen(p) + 1;
+               n++;
+       }
+
+       n -= successes;
+       printk(message, successes, n);
+
+       /* What's the errno for "kernel bug detected"?  Guess... */
+       return n ? -ECANCELED : 0;
+}
+
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+
+module_init(glob_init);
+module_exit(glob_fini);
+
+MODULE_DESCRIPTION("glob(7) matching tests");
+MODULE_LICENSE("Dual MIT/GPL");
index 52d2979a05e808432389f73fd77f180a8e5415a5..b13682bb0a1c67432092ffb6f3f23071e9cd15dd 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
-/*
- * 2002-10-18  written by Jim Houston jim.houston@ccur.com
- *     Copyright (C) 2002 by Concurrent Computer Corporation
- *     Distributed under the GNU GPL license version 2.
- *
- * Modified by George Anzinger to reuse immediately and to use
- * find bit instructions.  Also removed _irq on spinlocks.
- *
- * Modified by Nadia Derbey to make it RCU safe.
- *
- * Small id to pointer translation service.
- *
- * It uses a radix tree like structure as a sparse array indexed
- * by the id to obtain the pointer.  The bitmap makes allocating
- * a new id quick.
- *
- * You call it to allocate an id (an int) an associate with that id a
- * pointer or what ever, we treat it as a (void *).  You can pass this
- * id to a user for him to pass back at a later time.  You then pass
- * that id to this code and it returns your pointer.
- */
-
-#ifndef TEST                        // to test in user space...
-#include <linux/slab.h>
-#include <linux/init.h>
+#include <linux/bitmap.h>
 #include <linux/export.h>
-#endif
-#include <linux/err.h>
-#include <linux/string.h>
 #include <linux/idr.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/percpu.h>
-
-#define MAX_IDR_SHIFT          (sizeof(int) * 8 - 1)
-#define MAX_IDR_BIT            (1U << MAX_IDR_SHIFT)
-
-/* Leave the possibility of an incomplete final layer */
-#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
 
-/* Number of id_layer structs to leave in free list */
-#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
-
-static struct kmem_cache *idr_layer_cache;
-static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
-static DEFINE_PER_CPU(int, idr_preload_cnt);
+DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
-/* the maximum ID which can be allocated given idr->layers */
-static int idr_max(int layers)
-{
-       int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT);
-
-       return (1 << bits) - 1;
-}
-
-/*
- * Prefix mask for an idr_layer at @layer.  For layer 0, the prefix mask is
- * all bits except for the lower IDR_BITS.  For layer 1, 2 * IDR_BITS, and
- * so on.
- */
-static int idr_layer_prefix_mask(int layer)
-{
-       return ~idr_max(layer + 1);
-}
-
-static struct idr_layer *get_from_free_list(struct idr *idp)
-{
-       struct idr_layer *p;
-       unsigned long flags;
-
-       spin_lock_irqsave(&idp->lock, flags);
-       if ((p = idp->id_free)) {
-               idp->id_free = p->ary[0];
-               idp->id_free_cnt--;
-               p->ary[0] = NULL;
-       }
-       spin_unlock_irqrestore(&idp->lock, flags);
-       return(p);
-}
-
 /**
- * idr_layer_alloc - allocate a new idr_layer
- * @gfp_mask: allocation mask
- * @layer_idr: optional idr to allocate from
- *
- * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch
- * one from the per-cpu preload buffer.  If @layer_idr is not %NULL, fetch
- * an idr_layer from @idr->id_free.
- *
- * @layer_idr is to maintain backward compatibility with the old alloc
- * interface - idr_pre_get() and idr_get_new*() - and will be removed
- * together with per-pool preload buffer.
- */
-static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
-{
-       struct idr_layer *new;
-
-       /* this is the old path, bypass to get_from_free_list() */
-       if (layer_idr)
-               return get_from_free_list(layer_idr);
-
-       /*
-        * Try to allocate directly from kmem_cache.  We want to try this
-        * before preload buffer; otherwise, non-preloading idr_alloc()
-        * users will end up taking advantage of preloading ones.  As the
-        * following is allowed to fail for preloaded cases, suppress
-        * warning this time.
-        */
-       new = kmem_cache_zalloc(idr_layer_cache, gfp_mask | __GFP_NOWARN);
-       if (new)
-               return new;
-
-       /*
-        * Try to fetch one from the per-cpu preload buffer if in process
-        * context.  See idr_preload() for details.
-        */
-       if (!in_interrupt()) {
-               preempt_disable();
-               new = __this_cpu_read(idr_preload_head);
-               if (new) {
-                       __this_cpu_write(idr_preload_head, new->ary[0]);
-                       __this_cpu_dec(idr_preload_cnt);
-                       new->ary[0] = NULL;
-               }
-               preempt_enable();
-               if (new)
-                       return new;
-       }
-
-       /*
-        * Both failed.  Try kmem_cache again w/o adding __GFP_NOWARN so
-        * that memory allocation failure warning is printed as intended.
-        */
-       return kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-}
-
-static void idr_layer_rcu_free(struct rcu_head *head)
-{
-       struct idr_layer *layer;
-
-       layer = container_of(head, struct idr_layer, rcu_head);
-       kmem_cache_free(idr_layer_cache, layer);
-}
-
-static inline void free_layer(struct idr *idr, struct idr_layer *p)
-{
-       if (idr->hint == p)
-               RCU_INIT_POINTER(idr->hint, NULL);
-       call_rcu(&p->rcu_head, idr_layer_rcu_free);
-}
-
-/* only called when idp->lock is held */
-static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
-{
-       p->ary[0] = idp->id_free;
-       idp->id_free = p;
-       idp->id_free_cnt++;
-}
-
-static void move_to_free_list(struct idr *idp, struct idr_layer *p)
-{
-       unsigned long flags;
-
-       /*
-        * Depends on the return element being zeroed.
-        */
-       spin_lock_irqsave(&idp->lock, flags);
-       __move_to_free_list(idp, p);
-       spin_unlock_irqrestore(&idp->lock, flags);
-}
-
-static void idr_mark_full(struct idr_layer **pa, int id)
-{
-       struct idr_layer *p = pa[0];
-       int l = 0;
-
-       __set_bit(id & IDR_MASK, p->bitmap);
-       /*
-        * If this layer is full mark the bit in the layer above to
-        * show that this part of the radix tree is full.  This may
-        * complete the layer above and require walking up the radix
-        * tree.
-        */
-       while (bitmap_full(p->bitmap, IDR_SIZE)) {
-               if (!(p = pa[++l]))
-                       break;
-               id = id >> IDR_BITS;
-               __set_bit((id & IDR_MASK), p->bitmap);
-       }
-}
-
-static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
-{
-       while (idp->id_free_cnt < MAX_IDR_FREE) {
-               struct idr_layer *new;
-               new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-               if (new == NULL)
-                       return (0);
-               move_to_free_list(idp, new);
-       }
-       return 1;
-}
-
-/**
- * sub_alloc - try to allocate an id without growing the tree depth
- * @idp: idr handle
- * @starting_id: id to start search at
- * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer
- * @gfp_mask: allocation mask for idr_layer_alloc()
- * @layer_idr: optional idr passed to idr_layer_alloc()
- *
- * Allocate an id in range [@starting_id, INT_MAX] from @idp without
- * growing its depth.  Returns
- *
- *  the allocated id >= 0 if successful,
- *  -EAGAIN if the tree needs to grow for allocation to succeed,
- *  -ENOSPC if the id space is exhausted,
- *  -ENOMEM if more idr_layers need to be allocated.
- */
-static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
-                    gfp_t gfp_mask, struct idr *layer_idr)
-{
-       int n, m, sh;
-       struct idr_layer *p, *new;
-       int l, id, oid;
-
-       id = *starting_id;
- restart:
-       p = idp->top;
-       l = idp->layers;
-       pa[l--] = NULL;
-       while (1) {
-               /*
-                * We run around this while until we reach the leaf node...
-                */
-               n = (id >> (IDR_BITS*l)) & IDR_MASK;
-               m = find_next_zero_bit(p->bitmap, IDR_SIZE, n);
-               if (m == IDR_SIZE) {
-                       /* no space available go back to previous layer. */
-                       l++;
-                       oid = id;
-                       id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
-
-                       /* if already at the top layer, we need to grow */
-                       if (id > idr_max(idp->layers)) {
-                               *starting_id = id;
-                               return -EAGAIN;
-                       }
-                       p = pa[l];
-                       BUG_ON(!p);
-
-                       /* If we need to go up one layer, continue the
-                        * loop; otherwise, restart from the top.
-                        */
-                       sh = IDR_BITS * (l + 1);
-                       if (oid >> sh == id >> sh)
-                               continue;
-                       else
-                               goto restart;
-               }
-               if (m != n) {
-                       sh = IDR_BITS*l;
-                       id = ((id >> sh) ^ n ^ m) << sh;
-               }
-               if ((id >= MAX_IDR_BIT) || (id < 0))
-                       return -ENOSPC;
-               if (l == 0)
-                       break;
-               /*
-                * Create the layer below if it is missing.
-                */
-               if (!p->ary[m]) {
-                       new = idr_layer_alloc(gfp_mask, layer_idr);
-                       if (!new)
-                               return -ENOMEM;
-                       new->layer = l-1;
-                       new->prefix = id & idr_layer_prefix_mask(new->layer);
-                       rcu_assign_pointer(p->ary[m], new);
-                       p->count++;
-               }
-               pa[l--] = p;
-               p = p->ary[m];
-       }
-
-       pa[l] = p;
-       return id;
-}
-
-static int idr_get_empty_slot(struct idr *idp, int starting_id,
-                             struct idr_layer **pa, gfp_t gfp_mask,
-                             struct idr *layer_idr)
-{
-       struct idr_layer *p, *new;
-       int layers, v, id;
-       unsigned long flags;
-
-       id = starting_id;
-build_up:
-       p = idp->top;
-       layers = idp->layers;
-       if (unlikely(!p)) {
-               if (!(p = idr_layer_alloc(gfp_mask, layer_idr)))
-                       return -ENOMEM;
-               p->layer = 0;
-               layers = 1;
-       }
-       /*
-        * Add a new layer to the top of the tree if the requested
-        * id is larger than the currently allocated space.
-        */
-       while (id > idr_max(layers)) {
-               layers++;
-               if (!p->count) {
-                       /* special case: if the tree is currently empty,
-                        * then we grow the tree by moving the top node
-                        * upwards.
-                        */
-                       p->layer++;
-                       WARN_ON_ONCE(p->prefix);
-                       continue;
-               }
-               if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) {
-                       /*
-                        * The allocation failed.  If we built part of
-                        * the structure tear it down.
-                        */
-                       spin_lock_irqsave(&idp->lock, flags);
-                       for (new = p; p && p != idp->top; new = p) {
-                               p = p->ary[0];
-                               new->ary[0] = NULL;
-                               new->count = 0;
-                               bitmap_clear(new->bitmap, 0, IDR_SIZE);
-                               __move_to_free_list(idp, new);
-                       }
-                       spin_unlock_irqrestore(&idp->lock, flags);
-                       return -ENOMEM;
-               }
-               new->ary[0] = p;
-               new->count = 1;
-               new->layer = layers-1;
-               new->prefix = id & idr_layer_prefix_mask(new->layer);
-               if (bitmap_full(p->bitmap, IDR_SIZE))
-                       __set_bit(0, new->bitmap);
-               p = new;
-       }
-       rcu_assign_pointer(idp->top, p);
-       idp->layers = layers;
-       v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr);
-       if (v == -EAGAIN)
-               goto build_up;
-       return(v);
-}
-
-/*
- * @id and @pa are from a successful allocation from idr_get_empty_slot().
- * Install the user pointer @ptr and mark the slot full.
- */
-static void idr_fill_slot(struct idr *idr, void *ptr, int id,
-                         struct idr_layer **pa)
-{
-       /* update hint used for lookup, cleared from free_layer() */
-       rcu_assign_pointer(idr->hint, pa[0]);
-
-       rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr);
-       pa[0]->count++;
-       idr_mark_full(pa, id);
-}
-
-
-/**
- * idr_preload - preload for idr_alloc()
- * @gfp_mask: allocation mask to use for preloading
- *
- * Preload per-cpu layer buffer for idr_alloc().  Can only be used from
- * process context and each idr_preload() invocation should be matched with
- * idr_preload_end().  Note that preemption is disabled while preloaded.
- *
- * The first idr_alloc() in the preloaded section can be treated as if it
- * were invoked with @gfp_mask used for preloading.  This allows using more
- * permissive allocation masks for idrs protected by spinlocks.
- *
- * For example, if idr_alloc() below fails, the failure can be treated as
- * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT.
- *
- *     idr_preload(GFP_KERNEL);
- *     spin_lock(lock);
- *
- *     id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT);
- *
- *     spin_unlock(lock);
- *     idr_preload_end();
- *     if (id < 0)
- *             error;
- */
-void idr_preload(gfp_t gfp_mask)
-{
-       /*
-        * Consuming preload buffer from non-process context breaks preload
-        * allocation guarantee.  Disallow usage from those contexts.
-        */
-       WARN_ON_ONCE(in_interrupt());
-       might_sleep_if(gfpflags_allow_blocking(gfp_mask));
-
-       preempt_disable();
-
-       /*
-        * idr_alloc() is likely to succeed w/o full idr_layer buffer and
-        * return value from idr_alloc() needs to be checked for failure
-        * anyway.  Silently give up if allocation fails.  The caller can
-        * treat failures from idr_alloc() as if idr_alloc() were called
-        * with @gfp_mask which should be enough.
-        */
-       while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
-               struct idr_layer *new;
-
-               preempt_enable();
-               new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-               preempt_disable();
-               if (!new)
-                       break;
-
-               /* link the new one to per-cpu preload list */
-               new->ary[0] = __this_cpu_read(idr_preload_head);
-               __this_cpu_write(idr_preload_head, new);
-               __this_cpu_inc(idr_preload_cnt);
-       }
-}
-EXPORT_SYMBOL(idr_preload);
-
-/**
- * idr_alloc - allocate new idr entry
- * @idr: the (initialized) idr
+ * idr_alloc - allocate an id
+ * @idr: idr handle
  * @ptr: pointer to be associated with the new id
  * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive, <= 0 for max)
- * @gfp_mask: memory allocation flags
+ * @end: the maximum id (exclusive)
+ * @gfp: memory allocation flags
  *
- * Allocate an id in [start, end) and associate it with @ptr.  If no ID is
- * available in the specified range, returns -ENOSPC.  On memory allocation
- * failure, returns -ENOMEM.
+ * Allocates an unused ID in the range [start, end).  Returns -ENOSPC
+ * if there are no unused IDs in that range.
  *
  * Note that @end is treated as max when <= 0.  This is to always allow
  * using @start + N as @end as long as N is inside integer range.
  *
- * The user is responsible for exclusively synchronizing all operations
- * which may modify @idr.  However, read-only accesses such as idr_find()
- * or iteration can be performed under RCU read lock provided the user
- * destroys @ptr in RCU-safe way after removal from idr.
+ * Simultaneous modifications to the @idr are not allowed and should be
+ * prevented by the user, usually with a lock.  idr_alloc() may be called
+ * concurrently with read-only accesses to the @idr, such as idr_find() and
+ * idr_for_each_entry().
  */
-int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
+int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-       int max = end > 0 ? end - 1 : INT_MAX;  /* inclusive upper limit */
-       struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-       int id;
+       void __rcu **slot;
+       struct radix_tree_iter iter;
 
-       might_sleep_if(gfpflags_allow_blocking(gfp_mask));
-
-       /* sanity checks */
        if (WARN_ON_ONCE(start < 0))
                return -EINVAL;
-       if (unlikely(max < start))
-               return -ENOSPC;
+       if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
+               return -EINVAL;
 
-       /* allocate id */
-       id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL);
-       if (unlikely(id < 0))
-               return id;
-       if (unlikely(id > max))
-               return -ENOSPC;
+       radix_tree_iter_init(&iter, start);
+       slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
+       if (IS_ERR(slot))
+               return PTR_ERR(slot);
 
-       idr_fill_slot(idr, ptr, id, pa);
-       return id;
+       radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
+       radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
+       return iter.index;
 }
 EXPORT_SYMBOL_GPL(idr_alloc);
 
 /**
  * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
- * @idr: the (initialized) idr
+ * @idr: idr handle
  * @ptr: pointer to be associated with the new id
  * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive, <= 0 for max)
- * @gfp_mask: memory allocation flags
- *
- * Essentially the same as idr_alloc, but prefers to allocate progressively
- * higher ids if it can. If the "cur" counter wraps, then it will start again
- * at the "start" end of the range and allocate one that has already been used.
- */
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end,
-                       gfp_t gfp_mask)
-{
-       int id;
-
-       id = idr_alloc(idr, ptr, max(start, idr->cur), end, gfp_mask);
-       if (id == -ENOSPC)
-               id = idr_alloc(idr, ptr, start, end, gfp_mask);
-
-       if (likely(id >= 0))
-               idr->cur = id + 1;
-       return id;
-}
-EXPORT_SYMBOL(idr_alloc_cyclic);
-
-static void idr_remove_warning(int id)
-{
-       WARN(1, "idr_remove called for id=%d which is not allocated.\n", id);
-}
-
-static void sub_remove(struct idr *idp, int shift, int id)
-{
-       struct idr_layer *p = idp->top;
-       struct idr_layer **pa[MAX_IDR_LEVEL + 1];
-       struct idr_layer ***paa = &pa[0];
-       struct idr_layer *to_free;
-       int n;
-
-       *paa = NULL;
-       *++paa = &idp->top;
-
-       while ((shift > 0) && p) {
-               n = (id >> shift) & IDR_MASK;
-               __clear_bit(n, p->bitmap);
-               *++paa = &p->ary[n];
-               p = p->ary[n];
-               shift -= IDR_BITS;
-       }
-       n = id & IDR_MASK;
-       if (likely(p != NULL && test_bit(n, p->bitmap))) {
-               __clear_bit(n, p->bitmap);
-               RCU_INIT_POINTER(p->ary[n], NULL);
-               to_free = NULL;
-               while(*paa && ! --((**paa)->count)){
-                       if (to_free)
-                               free_layer(idp, to_free);
-                       to_free = **paa;
-                       **paa-- = NULL;
-               }
-               if (!*paa)
-                       idp->layers = 0;
-               if (to_free)
-                       free_layer(idp, to_free);
-       } else
-               idr_remove_warning(id);
-}
-
-/**
- * idr_remove - remove the given id and free its slot
- * @idp: idr handle
- * @id: unique key
- */
-void idr_remove(struct idr *idp, int id)
-{
-       struct idr_layer *p;
-       struct idr_layer *to_free;
-
-       if (id < 0)
-               return;
-
-       if (id > idr_max(idp->layers)) {
-               idr_remove_warning(id);
-               return;
-       }
-
-       sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
-       if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
-           idp->top->ary[0]) {
-               /*
-                * Single child at leftmost slot: we can shrink the tree.
-                * This level is not needed anymore since when layers are
-                * inserted, they are inserted at the top of the existing
-                * tree.
-                */
-               to_free = idp->top;
-               p = idp->top->ary[0];
-               rcu_assign_pointer(idp->top, p);
-               --idp->layers;
-               to_free->count = 0;
-               bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
-               free_layer(idp, to_free);
-       }
-}
-EXPORT_SYMBOL(idr_remove);
-
-static void __idr_remove_all(struct idr *idp)
-{
-       int n, id, max;
-       int bt_mask;
-       struct idr_layer *p;
-       struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-       struct idr_layer **paa = &pa[0];
-
-       n = idp->layers * IDR_BITS;
-       *paa = idp->top;
-       RCU_INIT_POINTER(idp->top, NULL);
-       max = idr_max(idp->layers);
-
-       id = 0;
-       while (id >= 0 && id <= max) {
-               p = *paa;
-               while (n > IDR_BITS && p) {
-                       n -= IDR_BITS;
-                       p = p->ary[(id >> n) & IDR_MASK];
-                       *++paa = p;
-               }
-
-               bt_mask = id;
-               id += 1 << n;
-               /* Get the highest bit that the above add changed from 0->1. */
-               while (n < fls(id ^ bt_mask)) {
-                       if (*paa)
-                               free_layer(idp, *paa);
-                       n += IDR_BITS;
-                       --paa;
-               }
-       }
-       idp->layers = 0;
-}
-
-/**
- * idr_destroy - release all cached layers within an idr tree
- * @idp: idr handle
- *
- * Free all id mappings and all idp_layers.  After this function, @idp is
- * completely unused and can be freed / recycled.  The caller is
- * responsible for ensuring that no one else accesses @idp during or after
- * idr_destroy().
+ * @end: the maximum id (exclusive)
+ * @gfp: memory allocation flags
  *
- * A typical clean-up sequence for objects stored in an idr tree will use
- * idr_for_each() to free all objects, if necessary, then idr_destroy() to
- * free up the id mappings and cached idr_layers.
+ * Allocates an ID larger than the last ID allocated if one is available.
+ * If not, it will attempt to allocate the smallest ID that is larger or
+ * equal to @start.
  */
-void idr_destroy(struct idr *idp)
+int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-       __idr_remove_all(idp);
+       int id, curr = idr->idr_next;
 
-       while (idp->id_free_cnt) {
-               struct idr_layer *p = get_from_free_list(idp);
-               kmem_cache_free(idr_layer_cache, p);
-       }
-}
-EXPORT_SYMBOL(idr_destroy);
+       if (curr < start)
+               curr = start;
 
-void *idr_find_slowpath(struct idr *idp, int id)
-{
-       int n;
-       struct idr_layer *p;
-
-       if (id < 0)
-               return NULL;
-
-       p = rcu_dereference_raw(idp->top);
-       if (!p)
-               return NULL;
-       n = (p->layer+1) * IDR_BITS;
+       id = idr_alloc(idr, ptr, curr, end, gfp);
+       if ((id == -ENOSPC) && (curr > start))
+               id = idr_alloc(idr, ptr, start, curr, gfp);
 
-       if (id > idr_max(p->layer + 1))
-               return NULL;
-       BUG_ON(n == 0);
+       if (id >= 0)
+               idr->idr_next = id + 1U;
 
-       while (n > 0 && p) {
-               n -= IDR_BITS;
-               BUG_ON(n != p->layer*IDR_BITS);
-               p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-       }
-       return((void *)p);
+       return id;
 }
-EXPORT_SYMBOL(idr_find_slowpath);
+EXPORT_SYMBOL(idr_alloc_cyclic);
 
 /**
  * idr_for_each - iterate through all stored pointers
- * @idp: idr handle
+ * @idr: idr handle
  * @fn: function to be called for each pointer
- * @data: data passed back to callback function
+ * @data: data passed to callback function
  *
- * Iterate over the pointers registered with the given idr.  The
- * callback function will be called for each pointer currently
- * registered, passing the id, the pointer and the data pointer passed
- * to this function.  It is not safe to modify the idr tree while in
- * the callback, so functions such as idr_get_new and idr_remove are
- * not allowed.
+ * The callback function will be called for each entry in @idr, passing
+ * the id, the pointer and the data pointer passed to this function.
  *
- * We check the return of @fn each time. If it returns anything other
- * than %0, we break out and return that value.
+ * If @fn returns anything other than %0, the iteration stops and that
+ * value is returned from this function.
  *
- * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
+ * idr_for_each() can be called concurrently with idr_alloc() and
+ * idr_remove() if protected by RCU.  Newly added entries may not be
+ * seen and deleted entries may be seen, but adding and removing entries
+ * will not cause other entries to be skipped, nor spurious ones to be seen.
  */
-int idr_for_each(struct idr *idp,
-                int (*fn)(int id, void *p, void *data), void *data)
+int idr_for_each(const struct idr *idr,
+               int (*fn)(int id, void *p, void *data), void *data)
 {
-       int n, id, max, error = 0;
-       struct idr_layer *p;
-       struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-       struct idr_layer **paa = &pa[0];
-
-       n = idp->layers * IDR_BITS;
-       *paa = rcu_dereference_raw(idp->top);
-       max = idr_max(idp->layers);
+       struct radix_tree_iter iter;
+       void __rcu **slot;
 
-       id = 0;
-       while (id >= 0 && id <= max) {
-               p = *paa;
-               while (n > 0 && p) {
-                       n -= IDR_BITS;
-                       p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-                       *++paa = p;
-               }
-
-               if (p) {
-                       error = fn(id, (void *)p, data);
-                       if (error)
-                               break;
-               }
-
-               id += 1 << n;
-               while (n < fls(id)) {
-                       n += IDR_BITS;
-                       --paa;
-               }
+       radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
+               int ret = fn(iter.index, rcu_dereference_raw(*slot), data);
+               if (ret)
+                       return ret;
        }
 
-       return error;
+       return 0;
 }
 EXPORT_SYMBOL(idr_for_each);
 
 /**
- * idr_get_next - lookup next object of id to given id.
- * @idp: idr handle
- * @nextidp:  pointer to lookup key
- *
- * Returns pointer to registered object with id, which is next number to
- * given id. After being looked up, *@nextidp will be updated for the next
- * iteration.
- *
- * This function can be called under rcu_read_lock(), given that the leaf
- * pointers lifetimes are correctly managed.
+ * idr_get_next - Find next populated entry
+ * @idr: idr handle
+ * @nextid: Pointer to lowest possible ID to return
+ *
+ * Returns the next populated entry in the tree with an ID greater than
+ * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
+ * to the ID of the found value.  To use in a loop, the value pointed to by
+ * nextid must be incremented by the user.
  */
-void *idr_get_next(struct idr *idp, int *nextidp)
+void *idr_get_next(struct idr *idr, int *nextid)
 {
-       struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1];
-       struct idr_layer **paa = &pa[0];
-       int id = *nextidp;
-       int n, max;
+       struct radix_tree_iter iter;
+       void __rcu **slot;
 
-       /* find first ent */
-       p = *paa = rcu_dereference_raw(idp->top);
-       if (!p)
+       slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+       if (!slot)
                return NULL;
-       n = (p->layer + 1) * IDR_BITS;
-       max = idr_max(p->layer + 1);
-
-       while (id >= 0 && id <= max) {
-               p = *paa;
-               while (n > 0 && p) {
-                       n -= IDR_BITS;
-                       p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-                       *++paa = p;
-               }
-
-               if (p) {
-                       *nextidp = id;
-                       return p;
-               }
 
-               /*
-                * Proceed to the next layer at the current level.  Unlike
-                * idr_for_each(), @id isn't guaranteed to be aligned to
-                * layer boundary at this point and adding 1 << n may
-                * incorrectly skip IDs.  Make sure we jump to the
-                * beginning of the next layer using round_up().
-                */
-               id = round_up(id + 1, 1 << n);
-               while (n < fls(id)) {
-                       n += IDR_BITS;
-                       --paa;
-               }
-       }
-       return NULL;
+       *nextid = iter.index;
+       return rcu_dereference_raw(*slot);
 }
 EXPORT_SYMBOL(idr_get_next);
 
-
 /**
  * idr_replace - replace pointer for given id
- * @idp: idr handle
- * @ptr: pointer you want associated with the id
- * @id: lookup key
+ * @idr: idr handle
+ * @ptr: New pointer to associate with the ID
+ * @id: Lookup key
  *
- * Replace the pointer registered with an id and return the old value.
- * A %-ENOENT return indicates that @id was not found.
- * A %-EINVAL return indicates that @id was not within valid constraints.
+ * Replace the pointer registered with an ID and return the old value.
+ * This function can be called under the RCU read lock concurrently with
+ * idr_alloc() and idr_remove() (as long as the ID being removed is not
+ * the one being replaced!).
  *
- * The caller must serialize with writers.
+ * Returns: 0 on success.  %-ENOENT indicates that @id was not found.
+ * %-EINVAL indicates that @id or @ptr were not valid.
  */
-void *idr_replace(struct idr *idp, void *ptr, int id)
+void *idr_replace(struct idr *idr, void *ptr, int id)
 {
-       int n;
-       struct idr_layer *p, *old_p;
+       struct radix_tree_node *node;
+       void __rcu **slot = NULL;
+       void *entry;
 
-       if (id < 0)
+       if (WARN_ON_ONCE(id < 0))
+               return ERR_PTR(-EINVAL);
+       if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
                return ERR_PTR(-EINVAL);
 
-       p = idp->top;
-       if (!p)
-               return ERR_PTR(-ENOENT);
-
-       if (id > idr_max(p->layer + 1))
-               return ERR_PTR(-ENOENT);
-
-       n = p->layer * IDR_BITS;
-       while ((n > 0) && p) {
-               p = p->ary[(id >> n) & IDR_MASK];
-               n -= IDR_BITS;
-       }
-
-       n = id & IDR_MASK;
-       if (unlikely(p == NULL || !test_bit(n, p->bitmap)))
+       entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
+       if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
                return ERR_PTR(-ENOENT);
 
-       old_p = p->ary[n];
-       rcu_assign_pointer(p->ary[n], ptr);
+       __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL);
 
-       return old_p;
+       return entry;
 }
 EXPORT_SYMBOL(idr_replace);
 
-void __init idr_init_cache(void)
-{
-       idr_layer_cache = kmem_cache_create("idr_layer_cache",
-                               sizeof(struct idr_layer), 0, SLAB_PANIC, NULL);
-}
-
-/**
- * idr_init - initialize idr handle
- * @idp:       idr handle
- *
- * This function is use to set up the handle (@idp) that you will pass
- * to the rest of the functions.
- */
-void idr_init(struct idr *idp)
-{
-       memset(idp, 0, sizeof(struct idr));
-       spin_lock_init(&idp->lock);
-}
-EXPORT_SYMBOL(idr_init);
-
-static int idr_has_entry(int id, void *p, void *data)
-{
-       return 1;
-}
-
-bool idr_is_empty(struct idr *idp)
-{
-       return !idr_for_each(idp, idr_has_entry, NULL);
-}
-EXPORT_SYMBOL(idr_is_empty);
-
 /**
  * DOC: IDA description
- * IDA - IDR based ID allocator
  *
- * This is id allocator without id -> pointer translation.  Memory
- * usage is much lower than full blown idr because each id only
- * occupies a bit.  ida uses a custom leaf node which contains
- * IDA_BITMAP_BITS slots.
- *
- * 2007-04-25  written by Tejun Heo <htejun@gmail.com>
+ * The IDA is an ID allocator which does not provide the ability to
+ * associate an ID with a pointer.  As such, it only needs to store one
+ * bit per ID, and so is more space efficient than an IDR.  To use an IDA,
+ * define it using DEFINE_IDA() (or embed a &struct ida in a data structure,
+ * then initialise it using ida_init()).  To allocate a new ID, call
+ * ida_simple_get().  To free an ID, call ida_simple_remove().
+ *
+ * If you have more complex locking requirements, use a loop around
+ * ida_pre_get() and ida_get_new() to allocate a new ID.  Then use
+ * ida_remove() to free an ID.  You must make sure that ida_get_new() and
+ * ida_remove() cannot be called at the same time as each other for the
+ * same IDA.
+ *
+ * You can also use ida_get_new_above() if you need an ID to be allocated
+ * above a particular number.  ida_destroy() can be used to dispose of an
+ * IDA without needing to free the individual IDs in it.  You can use
+ * ida_is_empty() to find out whether the IDA has any IDs currently allocated.
+ *
+ * IDs are currently limited to the range [0-INT_MAX].  If this is an awkward
+ * limitation, it should be quite straightforward to raise the maximum.
  */
 
-static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
-{
-       unsigned long flags;
-
-       if (!ida->free_bitmap) {
-               spin_lock_irqsave(&ida->idr.lock, flags);
-               if (!ida->free_bitmap) {
-                       ida->free_bitmap = bitmap;
-                       bitmap = NULL;
-               }
-               spin_unlock_irqrestore(&ida->idr.lock, flags);
-       }
-
-       kfree(bitmap);
-}
-
-/**
- * ida_pre_get - reserve resources for ida allocation
- * @ida:       ida handle
- * @gfp_mask:  memory allocation flag
- *
- * This function should be called prior to locking and calling the
- * following function.  It preallocates enough memory to satisfy the
- * worst possible allocation.
- *
- * If the system is REALLY out of memory this function returns %0,
- * otherwise %1.
+/*
+ * Developer's notes:
+ *
+ * The IDA uses the functionality provided by the IDR & radix tree to store
+ * bitmaps in each entry.  The IDR_FREE tag means there is at least one bit
+ * free, unlike the IDR where it means at least one entry is free.
+ *
+ * I considered telling the radix tree that each slot is an order-10 node
+ * and storing the bit numbers in the radix tree, but the radix tree can't
+ * allow a single multiorder entry at index 0, which would significantly
+ * increase memory consumption for the IDA.  So instead we divide the index
+ * by the number of bits in the leaf bitmap before doing a radix tree lookup.
+ *
+ * As an optimisation, if there are only a few low bits set in any given
+ * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
+ * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
+ * directly in the entry.  By being really tricksy, we could store
+ * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
+ * for 0-3 allocated IDs.
+ *
+ * We allow the radix tree 'exceptional' count to get out of date.  Nothing
+ * in the IDA nor the radix tree code checks it.  If it becomes important
+ * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
+ * calls to radix_tree_iter_replace() which will correct the exceptional
+ * count.
+ *
+ * The IDA always requires a lock to alloc/free.  If we add a 'test_bit'
+ * equivalent, it will still need locking.  Going to RCU lookup would require
+ * using RCU to free bitmaps, and that's not trivial without embedding an
+ * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
+ * bitmap, which is excessive.
  */
-int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
-{
-       /* allocate idr_layers */
-       if (!__idr_pre_get(&ida->idr, gfp_mask))
-               return 0;
 
-       /* allocate free_bitmap */
-       if (!ida->free_bitmap) {
-               struct ida_bitmap *bitmap;
-
-               bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask);
-               if (!bitmap)
-                       return 0;
-
-               free_bitmap(ida, bitmap);
-       }
-
-       return 1;
-}
-EXPORT_SYMBOL(ida_pre_get);
+#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
 
 /**
  * ida_get_new_above - allocate new ID above or equal to a start id
- * @ida:       ida handle
- * @starting_id: id to start search at
- * @p_id:      pointer to the allocated handle
+ * @ida: ida handle
+ * @start: id to start search at
+ * @id: pointer to the allocated handle
  *
- * Allocate new ID above or equal to @starting_id.  It should be called
- * with any required locks.
+ * Allocate new ID above or equal to @start.  It should be called
+ * with any required locks to ensure that concurrent calls to
+ * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.
+ * Consider using ida_simple_get() if you do not have complex locking
+ * requirements.
  *
  * If memory is required, it will return %-EAGAIN, you should unlock
  * and go back to the ida_pre_get() call.  If the ida is full, it will
- * return %-ENOSPC.
- *
- * Note that callers must ensure that concurrent access to @ida is not possible.
- * See ida_simple_get() for a varaint which takes care of locking.
+ * return %-ENOSPC.  On success, it will return 0.
  *
- * @p_id returns a value in the range @starting_id ... %0x7fffffff.
+ * @id returns a value in the range @start ... %0x7fffffff.
  */
-int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
+int ida_get_new_above(struct ida *ida, int start, int *id)
 {
-       struct idr_layer *pa[MAX_IDR_LEVEL + 1];
+       struct radix_tree_root *root = &ida->ida_rt;
+       void __rcu **slot;
+       struct radix_tree_iter iter;
        struct ida_bitmap *bitmap;
-       unsigned long flags;
-       int idr_id = starting_id / IDA_BITMAP_BITS;
-       int offset = starting_id % IDA_BITMAP_BITS;
-       int t, id;
-
- restart:
-       /* get vacant slot */
-       t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr);
-       if (t < 0)
-               return t == -ENOMEM ? -EAGAIN : t;
-
-       if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT)
-               return -ENOSPC;
-
-       if (t != idr_id)
-               offset = 0;
-       idr_id = t;
-
-       /* if bitmap isn't there, create a new one */
-       bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK];
-       if (!bitmap) {
-               spin_lock_irqsave(&ida->idr.lock, flags);
-               bitmap = ida->free_bitmap;
-               ida->free_bitmap = NULL;
-               spin_unlock_irqrestore(&ida->idr.lock, flags);
-
-               if (!bitmap)
-                       return -EAGAIN;
-
-               memset(bitmap, 0, sizeof(struct ida_bitmap));
-               rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
-                               (void *)bitmap);
-               pa[0]->count++;
-       }
-
-       /* lookup for empty slot */
-       t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset);
-       if (t == IDA_BITMAP_BITS) {
-               /* no empty slot after offset, continue to the next chunk */
-               idr_id++;
-               offset = 0;
-               goto restart;
-       }
-
-       id = idr_id * IDA_BITMAP_BITS + t;
-       if (id >= MAX_IDR_BIT)
-               return -ENOSPC;
+       unsigned long index;
+       unsigned bit, ebit;
+       int new;
+
+       index = start / IDA_BITMAP_BITS;
+       bit = start % IDA_BITMAP_BITS;
+       ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
+
+       slot = radix_tree_iter_init(&iter, index);
+       for (;;) {
+               if (slot)
+                       slot = radix_tree_next_slot(slot, &iter,
+                                               RADIX_TREE_ITER_TAGGED);
+               if (!slot) {
+                       slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
+                       if (IS_ERR(slot)) {
+                               if (slot == ERR_PTR(-ENOMEM))
+                                       return -EAGAIN;
+                               return PTR_ERR(slot);
+                       }
+               }
+               if (iter.index > index) {
+                       bit = 0;
+                       ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
+               }
+               new = iter.index * IDA_BITMAP_BITS;
+               bitmap = rcu_dereference_raw(*slot);
+               if (radix_tree_exception(bitmap)) {
+                       unsigned long tmp = (unsigned long)bitmap;
+                       ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
+                       if (ebit < BITS_PER_LONG) {
+                               tmp |= 1UL << ebit;
+                               rcu_assign_pointer(*slot, (void *)tmp);
+                               *id = new + ebit - RADIX_TREE_EXCEPTIONAL_SHIFT;
+                               return 0;
+                       }
+                       bitmap = this_cpu_xchg(ida_bitmap, NULL);
+                       if (!bitmap)
+                               return -EAGAIN;
+                       memset(bitmap, 0, sizeof(*bitmap));
+                       bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+                       rcu_assign_pointer(*slot, bitmap);
+               }
 
-       __set_bit(t, bitmap->bitmap);
-       if (++bitmap->nr_busy == IDA_BITMAP_BITS)
-               idr_mark_full(pa, idr_id);
+               if (bitmap) {
+                       bit = find_next_zero_bit(bitmap->bitmap,
+                                                       IDA_BITMAP_BITS, bit);
+                       new += bit;
+                       if (new < 0)
+                               return -ENOSPC;
+                       if (bit == IDA_BITMAP_BITS)
+                               continue;
 
-       *p_id = id;
+                       __set_bit(bit, bitmap->bitmap);
+                       if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+                               radix_tree_iter_tag_clear(root, &iter,
+                                                               IDR_FREE);
+               } else {
+                       new += bit;
+                       if (new < 0)
+                               return -ENOSPC;
+                       if (ebit < BITS_PER_LONG) {
+                               bitmap = (void *)((1UL << ebit) |
+                                               RADIX_TREE_EXCEPTIONAL_ENTRY);
+                               radix_tree_iter_replace(root, &iter, slot,
+                                               bitmap);
+                               *id = new;
+                               return 0;
+                       }
+                       bitmap = this_cpu_xchg(ida_bitmap, NULL);
+                       if (!bitmap)
+                               return -EAGAIN;
+                       memset(bitmap, 0, sizeof(*bitmap));
+                       __set_bit(bit, bitmap->bitmap);
+                       radix_tree_iter_replace(root, &iter, slot, bitmap);
+               }
 
-       /* Each leaf node can handle nearly a thousand slots and the
-        * whole idea of ida is to have small memory foot print.
-        * Throw away extra resources one by one after each successful
-        * allocation.
-        */
-       if (ida->idr.id_free_cnt || ida->free_bitmap) {
-               struct idr_layer *p = get_from_free_list(&ida->idr);
-               if (p)
-                       kmem_cache_free(idr_layer_cache, p);
+               *id = new;
+               return 0;
        }
-
-       return 0;
 }
 EXPORT_SYMBOL(ida_get_new_above);
 
 /**
- * ida_remove - remove the given ID
- * @ida:       ida handle
- * @id:                ID to free
+ * ida_remove - Free the given ID
+ * @ida: ida handle
+ * @id: ID to free
+ *
+ * This function should not be called at the same time as ida_get_new_above().
  */
 void ida_remove(struct ida *ida, int id)
 {
-       struct idr_layer *p = ida->idr.top;
-       int shift = (ida->idr.layers - 1) * IDR_BITS;
-       int idr_id = id / IDA_BITMAP_BITS;
-       int offset = id % IDA_BITMAP_BITS;
-       int n;
+       unsigned long index = id / IDA_BITMAP_BITS;
+       unsigned offset = id % IDA_BITMAP_BITS;
        struct ida_bitmap *bitmap;
+       unsigned long *btmp;
+       struct radix_tree_iter iter;
+       void __rcu **slot;
 
-       if (idr_id > idr_max(ida->idr.layers))
+       slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
+       if (!slot)
                goto err;
 
-       /* clear full bits while looking up the leaf idr_layer */
-       while ((shift > 0) && p) {
-               n = (idr_id >> shift) & IDR_MASK;
-               __clear_bit(n, p->bitmap);
-               p = p->ary[n];
-               shift -= IDR_BITS;
+       bitmap = rcu_dereference_raw(*slot);
+       if (radix_tree_exception(bitmap)) {
+               btmp = (unsigned long *)slot;
+               offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
+               if (offset >= BITS_PER_LONG)
+                       goto err;
+       } else {
+               btmp = bitmap->bitmap;
        }
-
-       if (p == NULL)
-               goto err;
-
-       n = idr_id & IDR_MASK;
-       __clear_bit(n, p->bitmap);
-
-       bitmap = (void *)p->ary[n];
-       if (!bitmap || !test_bit(offset, bitmap->bitmap))
+       if (!test_bit(offset, btmp))
                goto err;
 
-       /* update bitmap and remove it if empty */
-       __clear_bit(offset, bitmap->bitmap);
-       if (--bitmap->nr_busy == 0) {
-               __set_bit(n, p->bitmap);        /* to please idr_remove() */
-               idr_remove(&ida->idr, idr_id);
-               free_bitmap(ida, bitmap);
+       __clear_bit(offset, btmp);
+       radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
+       if (radix_tree_exception(bitmap)) {
+               if (rcu_dereference_raw(*slot) ==
+                                       (void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
+                       radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+       } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
+               kfree(bitmap);
+               radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
        }
-
        return;
-
  err:
        WARN(1, "ida_remove called for id=%d which is not allocated.\n", id);
 }
 EXPORT_SYMBOL(ida_remove);
 
 /**
- * ida_destroy - release all cached layers within an ida tree
- * @ida:               ida handle
+ * ida_destroy - Free the contents of an ida
+ * @ida: ida handle
+ *
+ * Calling this function releases all resources associated with an IDA.  When
+ * this call returns, the IDA is empty and can be reused or freed.  The caller
+ * should not allow ida_remove() or ida_get_new_above() to be called at the
+ * same time.
  */
 void ida_destroy(struct ida *ida)
 {
-       idr_destroy(&ida->idr);
-       kfree(ida->free_bitmap);
+       struct radix_tree_iter iter;
+       void __rcu **slot;
+
+       radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
+               struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
+               if (!radix_tree_exception(bitmap))
+                       kfree(bitmap);
+               radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+       }
 }
 EXPORT_SYMBOL(ida_destroy);
 
@@ -1141,18 +482,3 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
        spin_unlock_irqrestore(&simple_ida_lock, flags);
 }
 EXPORT_SYMBOL(ida_simple_remove);
-
-/**
- * ida_init - initialize ida handle
- * @ida:       ida handle
- *
- * This function is use to set up the handle (@ida) that you will pass
- * to the rest of the functions.
- */
-void ida_init(struct ida *ida)
-{
-       memset(ida, 0, sizeof(struct ida));
-       idr_init(&ida->idr);
-
-}
-EXPORT_SYMBOL(ida_init);
index 7f7bfa55eb6df3e9c9c3c2a244ff02d475197e9d..a34db8d276676782ca8d45f286827e7074e26a37 100644 (file)
 bool __list_add_valid(struct list_head *new, struct list_head *prev,
                      struct list_head *next)
 {
-       CHECK_DATA_CORRUPTION(next->prev != prev,
-               "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
-               prev, next->prev, next);
-       CHECK_DATA_CORRUPTION(prev->next != next,
-               "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
-               next, prev->next, prev);
-       CHECK_DATA_CORRUPTION(new == prev || new == next,
-               "list_add double add: new=%p, prev=%p, next=%p.\n",
-               new, prev, next);
+       if (CHECK_DATA_CORRUPTION(next->prev != prev,
+                       "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+                       prev, next->prev, next) ||
+           CHECK_DATA_CORRUPTION(prev->next != next,
+                       "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+                       next, prev->next, prev) ||
+           CHECK_DATA_CORRUPTION(new == prev || new == next,
+                       "list_add double add: new=%p, prev=%p, next=%p.\n",
+                       new, prev, next))
+               return false;
 
        return true;
 }
@@ -41,18 +42,20 @@ bool __list_del_entry_valid(struct list_head *entry)
        prev = entry->prev;
        next = entry->next;
 
-       CHECK_DATA_CORRUPTION(next == LIST_POISON1,
-               "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
-               entry, LIST_POISON1);
-       CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
-               "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
-               entry, LIST_POISON2);
-       CHECK_DATA_CORRUPTION(prev->next != entry,
-               "list_del corruption. prev->next should be %p, but was %p\n",
-               entry, prev->next);
-       CHECK_DATA_CORRUPTION(next->prev != entry,
-               "list_del corruption. next->prev should be %p, but was %p\n",
-               entry, next->prev);
+       if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+                       "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+                       entry, LIST_POISON1) ||
+           CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+                       "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+                       entry, LIST_POISON2) ||
+           CHECK_DATA_CORRUPTION(prev->next != entry,
+                       "list_del corruption. prev->next should be %p, but was %p\n",
+                       entry, prev->next) ||
+           CHECK_DATA_CORRUPTION(next->prev != entry,
+                       "list_del corruption. next->prev should be %p, but was %p\n",
+                       entry, next->prev))
+               return false;
+
        return true;
 
 }
index 8085d04e9309145200d26cfc673e32eae63d3518..f7b113271d136d62e28ede163ebc7f86d275b6e3 100644 (file)
@@ -1,3 +1,5 @@
+ccflags-y += -O3
+
 obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o
 obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o
 obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o
index 28321d8f75eff530ca6832f67f0418cf0abad936..cc7b6d4cc7c7cf4dbb62e8c7940429e58dfce0f6 100644 (file)
@@ -1,19 +1,16 @@
 /*
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *     Changed for kernel usage by:
+ *     Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+/*-************************************
+ *     Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
-               const char *source,
-               char *dest,
-               int isize,
-               int maxoutputsize)
+static const int LZ4_minLength = (MFLIMIT + 1);
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
+
+/*-******************************
+ *     Compression functions
+ ********************************/
+static FORCE_INLINE U32 LZ4_hash4(
+       U32 sequence,
+       tableType_t const tableType)
 {
-       HTYPE *hashtable = (HTYPE *)ctx;
-       const u8 *ip = (u8 *)source;
-#if LZ4_ARCH64
-       const BYTE * const base = ip;
+       if (tableType == byU16)
+               return ((sequence * 2654435761U)
+                       >> ((MINMATCH * 8) - (LZ4_HASHLOG + 1)));
+       else
+               return ((sequence * 2654435761U)
+                       >> ((MINMATCH * 8) - LZ4_HASHLOG));
+}
+
+static FORCE_INLINE U32 LZ4_hash5(
+       U64 sequence,
+       tableType_t const tableType)
+{
+       const U32 hashLog = (tableType == byU16)
+               ? LZ4_HASHLOG + 1
+               : LZ4_HASHLOG;
+
+#if LZ4_LITTLE_ENDIAN
+       static const U64 prime5bytes = 889523592379ULL;
+
+       return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 #else
-       const int base = 0;
+       static const U64 prime8bytes = 11400714785074694791ULL;
+
+       return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
-       const u8 *anchor = ip;
-       const u8 *const iend = ip + isize;
-       const u8 *const mflimit = iend - MFLIMIT;
-       #define MATCHLIMIT (iend - LASTLITERALS)
-
-       u8 *op = (u8 *) dest;
-       u8 *const oend = op + maxoutputsize;
-       int length;
-       const int skipstrength = SKIPSTRENGTH;
-       u32 forwardh;
-       int lastrun;
-
-       /* Init */
-       if (isize < MINLENGTH)
-               goto _last_literals;
+}
+
+static FORCE_INLINE U32 LZ4_hashPosition(
+       const void *p,
+       tableType_t const tableType)
+{
+#if LZ4_ARCH64
+       if (tableType == byU32)
+               return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+       return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(
+       const BYTE *p,
+       U32 h,
+       void *tableBase,
+       tableType_t const tableType,
+       const BYTE *srcBase)
+{
+       switch (tableType) {
+       case byPtr:
+       {
+               const BYTE **hashTable = (const BYTE **)tableBase;
+
+               hashTable[h] = p;
+               return;
+       }
+       case byU32:
+       {
+               U32 *hashTable = (U32 *) tableBase;
+
+               hashTable[h] = (U32)(p - srcBase);
+               return;
+       }
+       case byU16:
+       {
+               U16 *hashTable = (U16 *) tableBase;
+
+               hashTable[h] = (U16)(p - srcBase);
+               return;
+       }
+       }
+}
+
+static FORCE_INLINE void LZ4_putPosition(
+       const BYTE *p,
+       void *tableBase,
+       tableType_t tableType,
+       const BYTE *srcBase)
+{
+       U32 const h = LZ4_hashPosition(p, tableType);
+
+       LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(
+       U32 h,
+       void *tableBase,
+       tableType_t tableType,
+       const BYTE *srcBase)
+{
+       if (tableType == byPtr) {
+               const BYTE **hashTable = (const BYTE **) tableBase;
+
+               return hashTable[h];
+       }
+
+       if (tableType == byU32) {
+               const U32 * const hashTable = (U32 *) tableBase;
+
+               return hashTable[h] + srcBase;
+       }
+
+       {
+               /* default, to ensure a return */
+               const U16 * const hashTable = (U16 *) tableBase;
+
+               return hashTable[h] + srcBase;
+       }
+}
+
+static FORCE_INLINE const BYTE *LZ4_getPosition(
+       const BYTE *p,
+       void *tableBase,
+       tableType_t tableType,
+       const BYTE *srcBase)
+{
+       U32 const h = LZ4_hashPosition(p, tableType);
+
+       return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
 
-       memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static FORCE_INLINE int LZ4_compress_generic(
+       LZ4_stream_t_internal * const dictPtr,
+       const char * const source,
+       char * const dest,
+       const int inputSize,
+       const int maxOutputSize,
+       const limitedOutput_directive outputLimited,
+       const tableType_t tableType,
+       const dict_directive dict,
+       const dictIssue_directive dictIssue,
+       const U32 acceleration)
+{
+       const BYTE *ip = (const BYTE *) source;
+       const BYTE *base;
+       const BYTE *lowLimit;
+       const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+       const BYTE * const dictionary = dictPtr->dictionary;
+       const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+       const size_t dictDelta = dictEnd - (const BYTE *)source;
+       const BYTE *anchor = (const BYTE *) source;
+       const BYTE * const iend = ip + inputSize;
+       const BYTE * const mflimit = iend - MFLIMIT;
+       const BYTE * const matchlimit = iend - LASTLITERALS;
+
+       BYTE *op = (BYTE *) dest;
+       BYTE * const olimit = op + maxOutputSize;
+
+       U32 forwardH;
+       size_t refDelta = 0;
+
+       /* Init conditions */
+       if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+               /* Unsupported inputSize, too large (or negative) */
+               return 0;
+       }
+
+       switch (dict) {
+       case noDict:
+       default:
+               base = (const BYTE *)source;
+               lowLimit = (const BYTE *)source;
+               break;
+       case withPrefix64k:
+               base = (const BYTE *)source - dictPtr->currentOffset;
+               lowLimit = (const BYTE *)source - dictPtr->dictSize;
+               break;
+       case usingExtDict:
+               base = (const BYTE *)source - dictPtr->currentOffset;
+               lowLimit = (const BYTE *)source;
+               break;
+       }
+
+       if ((tableType == byU16)
+               && (inputSize >= LZ4_64Klimit)) {
+               /* Size too large (not within 64K limit) */
+               return 0;
+       }
+
+       if (inputSize < LZ4_minLength) {
+               /* Input too small, no compression (all literals) */
+               goto _last_literals;
+       }
 
        /* First Byte */
-       hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
+       LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
        ip++;
-       forwardh = LZ4_HASH_VALUE(ip);
+       forwardH = LZ4_hashPosition(ip, tableType);
 
        /* Main Loop */
-       for (;;) {
-               int findmatchattempts = (1U << skipstrength) + 3;
-               const u8 *forwardip = ip;
-               const u8 *ref;
-               u8 *token;
+       for ( ; ; ) {
+               const BYTE *match;
+               BYTE *token;
 
                /* Find a match */
-               do {
-                       u32 h = forwardh;
-                       int step = findmatchattempts++ >> skipstrength;
-                       ip = forwardip;
-                       forwardip = ip + step;
-
-                       if (unlikely(forwardip > mflimit))
-                               goto _last_literals;
-
-                       forwardh = LZ4_HASH_VALUE(forwardip);
-                       ref = base + hashtable[h];
-                       hashtable[h] = ip - base;
-               } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+               {
+                       const BYTE *forwardIp = ip;
+                       unsigned int step = 1;
+                       unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER;
+
+                       do {
+                               U32 const h = forwardH;
+
+                               ip = forwardIp;
+                               forwardIp += step;
+                               step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);
+
+                               if (unlikely(forwardIp > mflimit))
+                                       goto _last_literals;
+
+                               match = LZ4_getPositionOnHash(h,
+                                       dictPtr->hashTable,
+                                       tableType, base);
+
+                               if (dict == usingExtDict) {
+                                       if (match < (const BYTE *)source) {
+                                               refDelta = dictDelta;
+                                               lowLimit = dictionary;
+                                       } else {
+                                               refDelta = 0;
+                                               lowLimit = (const BYTE *)source;
+                               }        }
+
+                               forwardH = LZ4_hashPosition(forwardIp,
+                                       tableType);
+
+                               LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+                                       tableType, base);
+                       } while (((dictIssue == dictSmall)
+                                       ? (match < lowRefLimit)
+                                       : 0)
+                               || ((tableType == byU16)
+                                       ? 0
+                                       : (match + MAX_DISTANCE < ip))
+                               || (LZ4_read32(match + refDelta)
+                                       != LZ4_read32(ip)));
+               }
 
                /* Catch up */
-               while ((ip > anchor) && (ref > (u8 *)source) &&
-                       unlikely(ip[-1] == ref[-1])) {
+               while (((ip > anchor) & (match + refDelta > lowLimit))
+                               && (unlikely(ip[-1] == match[refDelta - 1]))) {
                        ip--;
-                       ref--;
+                       match--;
                }
 
-               /* Encode Literal length */
-               length = (int)(ip - anchor);
-               token = op++;
-               /* check output limit */
-               if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-                       (length >> 8) > oend))
-                       return 0;
+               /* Encode Literals */
+               {
+                       unsigned const int litLength = (unsigned int)(ip - anchor);
 
-               if (length >= (int)RUN_MASK) {
-                       int len;
-                       *token = (RUN_MASK << ML_BITS);
-                       len = length - RUN_MASK;
-                       for (; len > 254 ; len -= 255)
-                               *op++ = 255;
-                       *op++ = (u8)len;
-               } else
-                       *token = (length << ML_BITS);
+                       token = op++;
+
+                       if ((outputLimited) &&
+                               /* Check output buffer overflow */
+                               (unlikely(op + litLength +
+                                       (2 + 1 + LASTLITERALS) +
+                                       (litLength / 255) > olimit)))
+                               return 0;
+
+                       if (litLength >= RUN_MASK) {
+                               int len = (int)litLength - RUN_MASK;
+
+                               *token = (RUN_MASK << ML_BITS);
+
+                               for (; len >= 255; len -= 255)
+                                       *op++ = 255;
+                               *op++ = (BYTE)len;
+                       } else
+                               *token = (BYTE)(litLength << ML_BITS);
+
+                       /* Copy Literals */
+                       LZ4_wildCopy(op, anchor, op + litLength);
+                       op += litLength;
+               }
 
-               /* Copy Literals */
-               LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
                /* Encode Offset */
-               LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+               LZ4_writeLE16(op, (U16)(ip - match));
+               op += 2;
 
-               /* Start Counting */
-               ip += MINMATCH;
-               /* MinMatch verified */
-               ref += MINMATCH;
-               anchor = ip;
-               while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-                       #if LZ4_ARCH64
-                       u64 diff = A64(ref) ^ A64(ip);
-                       #else
-                       u32 diff = A32(ref) ^ A32(ip);
-                       #endif
-                       if (!diff) {
-                               ip += STEPSIZE;
-                               ref += STEPSIZE;
-                               continue;
-                       }
-                       ip += LZ4_NBCOMMONBYTES(diff);
-                       goto _endcount;
-               }
-               #if LZ4_ARCH64
-               if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-                       ip += 4;
-                       ref += 4;
-               }
-               #endif
-               if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-                       ip += 2;
-                       ref += 2;
-               }
-               if ((ip < MATCHLIMIT) && (*ref == *ip))
-                       ip++;
-_endcount:
                /* Encode MatchLength */
-               length = (int)(ip - anchor);
-               /* Check output limit */
-               if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-                       return 0;
-               if (length >= (int)ML_MASK) {
-                       *token += ML_MASK;
-                       length -= ML_MASK;
-                       for (; length > 509 ; length -= 510) {
-                               *op++ = 255;
-                               *op++ = 255;
-                       }
-                       if (length > 254) {
-                               length -= 255;
-                               *op++ = 255;
+               {
+                       unsigned int matchCode;
+
+                       if ((dict == usingExtDict)
+                               && (lowLimit == dictionary)) {
+                               const BYTE *limit;
+
+                               match += refDelta;
+                               limit = ip + (dictEnd - match);
+
+                               if (limit > matchlimit)
+                                       limit = matchlimit;
+
+                               matchCode = LZ4_count(ip + MINMATCH,
+                                       match + MINMATCH, limit);
+
+                               ip += MINMATCH + matchCode;
+
+                               if (ip == limit) {
+                                       unsigned const int more = LZ4_count(ip,
+                                               (const BYTE *)source,
+                                               matchlimit);
+
+                                       matchCode += more;
+                                       ip += more;
+                               }
+                       } else {
+                               matchCode = LZ4_count(ip + MINMATCH,
+                                       match + MINMATCH, matchlimit);
+                               ip += MINMATCH + matchCode;
                        }
-                       *op++ = (u8)length;
-               } else
-                       *token += length;
+
+                       if (outputLimited &&
+                               /* Check output buffer overflow */
+                               (unlikely(op +
+                                       (1 + LASTLITERALS) +
+                                       (matchCode >> 8) > olimit)))
+                               return 0;
+
+                       if (matchCode >= ML_MASK) {
+                               *token += ML_MASK;
+                               matchCode -= ML_MASK;
+                               LZ4_write32(op, 0xFFFFFFFF);
+
+                               while (matchCode >= 4 * 255) {
+                                       op += 4;
+                                       LZ4_write32(op, 0xFFFFFFFF);
+                                       matchCode -= 4 * 255;
+                               }
+
+                               op += matchCode / 255;
+                               *op++ = (BYTE)(matchCode % 255);
+                       } else
+                               *token += (BYTE)(matchCode);
+               }
+
+               anchor = ip;
 
                /* Test end of chunk */
-               if (ip > mflimit) {
-                       anchor = ip;
+               if (ip > mflimit)
                        break;
-               }
 
                /* Fill table */
-               hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+               LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
 
                /* Test next position */
-               ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-               hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-               if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+               match = LZ4_getPosition(ip, dictPtr->hashTable,
+                       tableType, base);
+
+               if (dict == usingExtDict) {
+                       if (match < (const BYTE *)source) {
+                               refDelta = dictDelta;
+                               lowLimit = dictionary;
+                       } else {
+                               refDelta = 0;
+                               lowLimit = (const BYTE *)source;
+                       }
+               }
+
+               LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+
+               if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+                       && (match + MAX_DISTANCE >= ip)
+                       && (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
                        token = op++;
                        *token = 0;
                        goto _next_match;
                }
 
                /* Prepare next loop */
-               anchor = ip++;
-               forwardh = LZ4_HASH_VALUE(ip);
+               forwardH = LZ4_hashPosition(++ip, tableType);
        }
 
 _last_literals:
        /* Encode Last Literals */
-       lastrun = (int)(iend - anchor);
-       if (((char *)op - dest) + lastrun + 1
-               + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-               return 0;
+       {
+               size_t const lastRun = (size_t)(iend - anchor);
+
+               if ((outputLimited) &&
+                       /* Check output buffer overflow */
+                       ((op - (BYTE *)dest) + lastRun + 1 +
+                       ((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize))
+                       return 0;
+
+               if (lastRun >= RUN_MASK) {
+                       size_t accumulator = lastRun - RUN_MASK;
+                       *op++ = RUN_MASK << ML_BITS;
+                       for (; accumulator >= 255; accumulator -= 255)
+                               *op++ = 255;
+                       *op++ = (BYTE) accumulator;
+               } else {
+                       *op++ = (BYTE)(lastRun << ML_BITS);
+               }
 
-       if (lastrun >= (int)RUN_MASK) {
-               *op++ = (RUN_MASK << ML_BITS);
-               lastrun -= RUN_MASK;
-               for (; lastrun > 254 ; lastrun -= 255)
-                       *op++ = 255;
-               *op++ = (u8)lastrun;
-       } else
-               *op++ = (lastrun << ML_BITS);
-       memcpy(op, anchor, iend - anchor);
-       op += iend - anchor;
+               memcpy(op, anchor, lastRun);
+
+               op += lastRun;
+       }
 
        /* End */
-       return (int)(((char *)op) - dest);
+       return (int) (((char *)op) - dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-               const char *source,
-               char *dest,
-               int isize,
-               int maxoutputsize)
+static int LZ4_compress_fast_extState(
+       void *state,
+       const char *source,
+       char *dest,
+       int inputSize,
+       int maxOutputSize,
+       int acceleration)
 {
-       u16 *hashtable = (u16 *)ctx;
-       const u8 *ip = (u8 *) source;
-       const u8 *anchor = ip;
-       const u8 *const base = ip;
-       const u8 *const iend = ip + isize;
-       const u8 *const mflimit = iend - MFLIMIT;
-       #define MATCHLIMIT (iend - LASTLITERALS)
-
-       u8 *op = (u8 *) dest;
-       u8 *const oend = op + maxoutputsize;
-       int len, length;
-       const int skipstrength = SKIPSTRENGTH;
-       u32 forwardh;
-       int lastrun;
-
-       /* Init */
-       if (isize < MINLENGTH)
-               goto _last_literals;
+       LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+#if LZ4_ARCH64
+       const tableType_t tableType = byU32;
+#else
+       const tableType_t tableType = byPtr;
+#endif
+
+       LZ4_resetStream((LZ4_stream_t *)state);
+
+       if (acceleration < 1)
+               acceleration = LZ4_ACCELERATION_DEFAULT;
+
+       if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) {
+               if (inputSize < LZ4_64Klimit)
+                       return LZ4_compress_generic(ctx, source,
+                               dest, inputSize, 0,
+                               noLimit, byU16, noDict,
+                               noDictIssue, acceleration);
+               else
+                       return LZ4_compress_generic(ctx, source,
+                               dest, inputSize, 0,
+                               noLimit, tableType, noDict,
+                               noDictIssue, acceleration);
+       } else {
+               if (inputSize < LZ4_64Klimit)
+                       return LZ4_compress_generic(ctx, source,
+                               dest, inputSize,
+                               maxOutputSize, limitedOutput, byU16, noDict,
+                               noDictIssue, acceleration);
+               else
+                       return LZ4_compress_generic(ctx, source,
+                               dest, inputSize,
+                               maxOutputSize, limitedOutput, tableType, noDict,
+                               noDictIssue, acceleration);
+       }
+}
+
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+       int maxOutputSize, int acceleration, void *wrkmem)
+{
+       return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+               maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
 
-       memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+       int maxOutputSize, void *wrkmem)
+{
+       return LZ4_compress_fast(source, dest, inputSize,
+               maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ *     *_destSize() variant
+ ********************************/
+static int LZ4_compress_destSize_generic(
+       LZ4_stream_t_internal * const ctx,
+       const char * const src,
+       char * const dst,
+       int * const srcSizePtr,
+       const int targetDstSize,
+       const tableType_t tableType)
+{
+       const BYTE *ip = (const BYTE *) src;
+       const BYTE *base = (const BYTE *) src;
+       const BYTE *lowLimit = (const BYTE *) src;
+       const BYTE *anchor = ip;
+       const BYTE * const iend = ip + *srcSizePtr;
+       const BYTE * const mflimit = iend - MFLIMIT;
+       const BYTE * const matchlimit = iend - LASTLITERALS;
+
+       BYTE *op = (BYTE *) dst;
+       BYTE * const oend = op + targetDstSize;
+       BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+               - 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+       BYTE * const oMaxMatch = op + targetDstSize
+               - (LASTLITERALS + 1 /* token */);
+       BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+       U32 forwardH;
+
+       /* Init conditions */
+       /* Impossible to store anything */
+       if (targetDstSize < 1)
+               return 0;
+       /* Unsupported input size, too large (or negative) */
+       if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+               return 0;
+       /* Size too large (not within 64K limit) */
+       if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+               return 0;
+       /* Input too small, no compression (all literals) */
+       if (*srcSizePtr < LZ4_minLength)
+               goto _last_literals;
 
        /* First Byte */
-       ip++;
-       forwardh = LZ4_HASH64K_VALUE(ip);
+       *srcSizePtr = 0;
+       LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+       ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
        /* Main Loop */
-       for (;;) {
-               int findmatchattempts = (1U << skipstrength) + 3;
-               const u8 *forwardip = ip;
-               const u8 *ref;
-               u8 *token;
+       for ( ; ; ) {
+               const BYTE *match;
+               BYTE *token;
 
                /* Find a match */
-               do {
-                       u32 h = forwardh;
-                       int step = findmatchattempts++ >> skipstrength;
-                       ip = forwardip;
-                       forwardip = ip + step;
-
-                       if (forwardip > mflimit)
-                               goto _last_literals;
-
-                       forwardh = LZ4_HASH64K_VALUE(forwardip);
-                       ref = base + hashtable[h];
-                       hashtable[h] = (u16)(ip - base);
-               } while (A32(ref) != A32(ip));
+               {
+                       const BYTE *forwardIp = ip;
+                       unsigned int step = 1;
+                       unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER;
+
+                       do {
+                               U32 h = forwardH;
+
+                               ip = forwardIp;
+                               forwardIp += step;
+                               step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);
+
+                               if (unlikely(forwardIp > mflimit))
+                                       goto _last_literals;
+
+                               match = LZ4_getPositionOnHash(h, ctx->hashTable,
+                                       tableType, base);
+                               forwardH = LZ4_hashPosition(forwardIp,
+                                       tableType);
+                               LZ4_putPositionOnHash(ip, h,
+                                       ctx->hashTable, tableType,
+                                       base);
+
+                       } while (((tableType == byU16)
+                               ? 0
+                               : (match + MAX_DISTANCE < ip))
+                               || (LZ4_read32(match) != LZ4_read32(ip)));
+               }
 
                /* Catch up */
-               while ((ip > anchor) && (ref > (u8 *)source)
-                       && (ip[-1] == ref[-1])) {
+               while ((ip > anchor)
+                       && (match > lowLimit)
+                       && (unlikely(ip[-1] == match[-1]))) {
                        ip--;
-                       ref--;
+                       match--;
                }
 
                /* Encode Literal length */
-               length = (int)(ip - anchor);
-               token = op++;
-               /* Check output limit */
-               if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-                       + (length >> 8) > oend))
-                       return 0;
-               if (length >= (int)RUN_MASK) {
-                       *token = (RUN_MASK << ML_BITS);
-                       len = length - RUN_MASK;
-                       for (; len > 254 ; len -= 255)
-                               *op++ = 255;
-                       *op++ = (u8)len;
-               } else
-                       *token = (length << ML_BITS);
+               {
+                       unsigned int litLength = (unsigned int)(ip - anchor);
 
-               /* Copy Literals */
-               LZ4_BLINDCOPY(anchor, op, length);
+                       token = op++;
+                       if (op + ((litLength + 240) / 255)
+                               + litLength > oMaxLit) {
+                               /* Not enough space for a last match */
+                               op--;
+                               goto _last_literals;
+                       }
+                       if (litLength >= RUN_MASK) {
+                               unsigned int len = litLength - RUN_MASK;
+                               *token = (RUN_MASK<<ML_BITS);
+                               for (; len >= 255; len -= 255)
+                                       *op++ = 255;
+                               *op++ = (BYTE)len;
+                       } else
+                               *token = (BYTE)(litLength << ML_BITS);
+
+                       /* Copy Literals */
+                       LZ4_wildCopy(op, anchor, op + litLength);
+                       op += litLength;
+               }
 
 _next_match:
                /* Encode Offset */
-               LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+               LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-               /* Start Counting */
-               ip += MINMATCH;
-               /* MinMatch verified */
-               ref += MINMATCH;
-               anchor = ip;
+               /* Encode MatchLength */
+               {
+                       size_t matchLength = LZ4_count(ip + MINMATCH,
+                       match + MINMATCH, matchlimit);
 
-               while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-                       #if LZ4_ARCH64
-                       u64 diff = A64(ref) ^ A64(ip);
-                       #else
-                       u32 diff = A32(ref) ^ A32(ip);
-                       #endif
-
-                       if (!diff) {
-                               ip += STEPSIZE;
-                               ref += STEPSIZE;
-                               continue;
+                       if (op + ((matchLength + 240)/255) > oMaxMatch) {
+                               /* Match description too long : reduce it */
+                               matchLength = (15 - 1) + (oMaxMatch - op) * 255;
                        }
-                       ip += LZ4_NBCOMMONBYTES(diff);
-                       goto _endcount;
-               }
-               #if LZ4_ARCH64
-               if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-                       ip += 4;
-                       ref += 4;
+                       ip += MINMATCH + matchLength;
+
+                       if (matchLength >= ML_MASK) {
+                               *token += ML_MASK;
+                               matchLength -= ML_MASK;
+                               while (matchLength >= 255) {
+                                       matchLength -= 255;
+                                       *op++ = 255;
+                               }
+                               *op++ = (BYTE)matchLength;
+                       } else
+                               *token += (BYTE)(matchLength);
                }
-               #endif
-               if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-                       ip += 2;
-                       ref += 2;
-               }
-               if ((ip < MATCHLIMIT) && (*ref == *ip))
-                       ip++;
-_endcount:
 
-               /* Encode MatchLength */
-               len = (int)(ip - anchor);
-               /* Check output limit */
-               if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-                       return 0;
-               if (len >= (int)ML_MASK) {
-                       *token += ML_MASK;
-                       len -= ML_MASK;
-                       for (; len > 509 ; len -= 510) {
-                               *op++ = 255;
-                               *op++ = 255;
-                       }
-                       if (len > 254) {
-                               len -= 255;
-                               *op++ = 255;
-                       }
-                       *op++ = (u8)len;
-               } else
-                       *token += len;
+               anchor = ip;
 
-               /* Test end of chunk */
-               if (ip > mflimit) {
-                       anchor = ip;
+               /* Test end of block */
+               if (ip > mflimit)
+                       break;
+               if (op > oMaxSeq)
                        break;
-               }
 
                /* Fill table */
-               hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+               LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
 
                /* Test next position */
-               ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-               hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-               if (A32(ref) == A32(ip)) {
-                       token = op++;
-                       *token = 0;
+               match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+               LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+               if ((match + MAX_DISTANCE >= ip)
+                       && (LZ4_read32(match) == LZ4_read32(ip))) {
+                       token = op++; *token = 0;
                        goto _next_match;
                }
 
                /* Prepare next loop */
-               anchor = ip++;
-               forwardh = LZ4_HASH64K_VALUE(ip);
+               forwardH = LZ4_hashPosition(++ip, tableType);
        }
 
 _last_literals:
        /* Encode Last Literals */
-       lastrun = (int)(iend - anchor);
-       if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-               return 0;
-       if (lastrun >= (int)RUN_MASK) {
-               *op++ = (RUN_MASK << ML_BITS);
-               lastrun -= RUN_MASK;
-               for (; lastrun > 254 ; lastrun -= 255)
-                       *op++ = 255;
-               *op++ = (u8)lastrun;
-       } else
-               *op++ = (lastrun << ML_BITS);
-       memcpy(op, anchor, iend - anchor);
-       op += iend - anchor;
+       {
+               size_t lastRunSize = (size_t)(iend - anchor);
+
+               if (op + 1 /* token */
+                       + ((lastRunSize + 240) / 255) /* litLength */
+                       + lastRunSize /* literals */ > oend) {
+                       /* adapt lastRunSize to fill 'dst' */
+                       lastRunSize     = (oend - op) - 1;
+                       lastRunSize -= (lastRunSize + 240) / 255;
+               }
+               ip = anchor + lastRunSize;
+
+               if (lastRunSize >= RUN_MASK) {
+                       size_t accumulator = lastRunSize - RUN_MASK;
+
+                       *op++ = RUN_MASK << ML_BITS;
+                       for (; accumulator >= 255; accumulator -= 255)
+                               *op++ = 255;
+                       *op++ = (BYTE) accumulator;
+               } else {
+                       *op++ = (BYTE)(lastRunSize<<ML_BITS);
+               }
+               memcpy(op, anchor, lastRunSize);
+               op += lastRunSize;
+       }
+
        /* End */
-       return (int)(((char *)op) - dest);
+       *srcSizePtr = (int) (((const char *)ip) - src);
+       return (int) (((char *)op) - dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-                       unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(
+       LZ4_stream_t *state,
+       const char *src,
+       char *dst,
+       int *srcSizePtr,
+       int targetDstSize)
 {
-       int ret = -1;
-       int out_len = 0;
+#if LZ4_ARCH64
+       const tableType_t tableType = byU32;
+#else
+       const tableType_t tableType = byPtr;
+#endif
 
-       if (src_len < LZ4_64KLIMIT)
-               out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-                               lz4_compressbound(src_len));
-       else
-               out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-                               lz4_compressbound(src_len));
+       LZ4_resetStream(state);
+
+       if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) {
+               /* compression success is guaranteed */
+               return LZ4_compress_fast_extState(
+                       state, src, dst, *srcSizePtr,
+                       targetDstSize, 1);
+       } else {
+               if (*srcSizePtr < LZ4_64Klimit)
+                       return LZ4_compress_destSize_generic(
+                               &state->internal_donotuse,
+                               src, dst, srcSizePtr,
+                               targetDstSize, byU16);
+               else
+                       return LZ4_compress_destSize_generic(
+                               &state->internal_donotuse,
+                               src, dst, srcSizePtr,
+                               targetDstSize, tableType);
+       }
+}
+
+
+int LZ4_compress_destSize(
+       const char *src,
+       char *dst,
+       int *srcSizePtr,
+       int targetDstSize,
+       void *wrkmem)
+{
+       return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+               targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ *     Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+       memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+       const char *dictionary, int dictSize)
+{
+       LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+       const BYTE *p = (const BYTE *)dictionary;
+       const BYTE * const dictEnd = p + dictSize;
+       const BYTE *base;
+
+       if ((dict->initCheck)
+               || (dict->currentOffset > 1 * GB)) {
+               /* Uninitialized structure, or reuse overflow */
+               LZ4_resetStream(LZ4_dict);
+       }
+
+       if (dictSize < (int)HASH_UNIT) {
+               dict->dictionary = NULL;
+               dict->dictSize = 0;
+               return 0;
+       }
+
+       if ((dictEnd - p) > 64 * KB)
+               p = dictEnd - 64 * KB;
+       dict->currentOffset += 64 * KB;
+       base = p - dict->currentOffset;
+       dict->dictionary = p;
+       dict->dictSize = (U32)(dictEnd - p);
+       dict->currentOffset += dict->dictSize;
+
+       while (p <= dictEnd - HASH_UNIT) {
+               LZ4_putPosition(p, dict->hashTable, byU32, base);
+               p += 3;
+       }
+
+       return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+       const BYTE *src)
+{
+       if ((LZ4_dict->currentOffset > 0x80000000) ||
+               ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+               /* address space overflow */
+               /* rescale hash table */
+               U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+               const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+               int i;
+
+               for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+                       if (LZ4_dict->hashTable[i] < delta)
+                               LZ4_dict->hashTable[i] = 0;
+                       else
+                               LZ4_dict->hashTable[i] -= delta;
+               }
+               LZ4_dict->currentOffset = 64 * KB;
+               if (LZ4_dict->dictSize > 64 * KB)
+                       LZ4_dict->dictSize = 64 * KB;
+               LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+       }
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+       LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+       const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+       if ((U32)dictSize > 64 * KB) {
+               /* useless to define a dictionary > 64 * KB */
+               dictSize = 64 * KB;
+       }
+       if ((U32)dictSize > dict->dictSize)
+               dictSize = dict->dictSize;
+
+       memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+       dict->dictionary = (const BYTE *)safeBuffer;
+       dict->dictSize = (U32)dictSize;
+
+       return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+       char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+       LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+       const BYTE * const dictEnd = streamPtr->dictionary
+               + streamPtr->dictSize;
 
-       if (out_len < 0)
-               goto exit;
+       const BYTE *smallest = (const BYTE *) source;
 
-       *dst_len = out_len;
+       if (streamPtr->initCheck) {
+               /* Uninitialized structure detected */
+               return 0;
+       }
+
+       if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+               smallest = dictEnd;
+
+       LZ4_renormDictT(streamPtr, smallest);
+
+       if (acceleration < 1)
+               acceleration = LZ4_ACCELERATION_DEFAULT;
+
+       /* Check overlapping input/dictionary space */
+       {
+               const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+               if ((sourceEnd > streamPtr->dictionary)
+                       && (sourceEnd < dictEnd)) {
+                       streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+                       if (streamPtr->dictSize > 64 * KB)
+                               streamPtr->dictSize = 64 * KB;
+                       if (streamPtr->dictSize < 4)
+                               streamPtr->dictSize = 0;
+                       streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+               }
+       }
 
-       return 0;
-exit:
-       return ret;
+       /* prefix mode : source data follows dictionary */
+       if (dictEnd == (const BYTE *)source) {
+               int result;
+
+               if ((streamPtr->dictSize < 64 * KB) &&
+                       (streamPtr->dictSize < streamPtr->currentOffset)) {
+                       result = LZ4_compress_generic(
+                               streamPtr, source, dest, inputSize,
+                               maxOutputSize, limitedOutput, byU32,
+                               withPrefix64k, dictSmall, acceleration);
+               } else {
+                       result = LZ4_compress_generic(
+                               streamPtr, source, dest, inputSize,
+                               maxOutputSize, limitedOutput, byU32,
+                               withPrefix64k, noDictIssue, acceleration);
+               }
+               streamPtr->dictSize += (U32)inputSize;
+               streamPtr->currentOffset += (U32)inputSize;
+               return result;
+       }
+
+       /* external dictionary mode */
+       {
+               int result;
+
+               if ((streamPtr->dictSize < 64 * KB) &&
+                       (streamPtr->dictSize < streamPtr->currentOffset)) {
+                       result = LZ4_compress_generic(
+                               streamPtr, source, dest, inputSize,
+                               maxOutputSize, limitedOutput, byU32,
+                               usingExtDict, dictSmall, acceleration);
+               } else {
+                       result = LZ4_compress_generic(
+                               streamPtr, source, dest, inputSize,
+                               maxOutputSize, limitedOutput, byU32,
+                               usingExtDict, noDictIssue, acceleration);
+               }
+               streamPtr->dictionary = (const BYTE *)source;
+               streamPtr->dictSize = (U32)inputSize;
+               streamPtr->currentOffset += (U32)inputSize;
+               return result;
+       }
 }
-EXPORT_SYMBOL(lz4_compress);
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 compressor");
index 6d940c72b5fc28c4b3d431f44a9c198fc6ea7c36..bd3574312b827c606fe93c8d0a8a8c0ca0c6542d 100644 (file)
@@ -1,25 +1,16 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
  *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
+ *     Changed for kernel usage by:
+ *     Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#ifndef STATIC
+/*-************************************
+ *     Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ *     Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static FORCE_INLINE int LZ4_decompress_generic(
+        const char * const source,
+        char * const dest,
+        int inputSize,
+               /*
+                * If endOnInput == endOnInputSize,
+                * this value is the max size of Output Buffer.
+                */
+        int outputSize,
+        /* endOnOutputSize, endOnInputSize */
+        int endOnInput,
+        /* full, partial */
+        int partialDecoding,
+        /* only used if partialDecoding == partial */
+        int targetOutputSize,
+        /* noDict, withPrefix64k, usingExtDict */
+        int dict,
+        /* == dest when no prefix */
+        const BYTE * const lowPrefix,
+        /* only if dict == usingExtDict */
+        const BYTE * const dictStart,
+        /* note : = 0 if noDict */
+        const size_t dictSize
+        )
 {
+       /* Local Variables */
        const BYTE *ip = (const BYTE *) source;
-       const BYTE *ref;
+       const BYTE * const iend = ip + inputSize;
+
        BYTE *op = (BYTE *) dest;
-       BYTE * const oend = op + osize;
+       BYTE * const oend = op + outputSize;
        BYTE *cpy;
-       unsigned token;
-       size_t length;
+       BYTE *oexit = op + targetOutputSize;
+       const BYTE * const lowLimit = lowPrefix - dictSize;
 
+       const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+       const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
+       const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
+
+       const int safeDecode = (endOnInput == endOnInputSize);
+       const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
+
+       /* Special cases */
+       /* targetOutputSize too high => decode everything */
+       if ((partialDecoding) && (oexit > oend - MFLIMIT))
+               oexit = oend - MFLIMIT;
+
+       /* Empty output buffer */
+       if ((endOnInput) && (unlikely(outputSize == 0)))
+               return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+       if ((!endOnInput) && (unlikely(outputSize == 0)))
+               return (*ip == 0 ? 1 : -1);
+
+       /* Main Loop : decode sequences */
        while (1) {
+               size_t length;
+               const BYTE *match;
+               size_t offset;
+
+               /* get literal length */
+               unsigned int const token = *ip++;
+
+               length = token>>ML_BITS;
 
-               /* get runlength */
-               token = *ip++;
-               length = (token >> ML_BITS);
                if (length == RUN_MASK) {
-                       size_t len;
+                       unsigned int s;
 
-                       len = *ip++;
-                       for (; len == 255; length += 255)
-                               len = *ip++;
-                       if (unlikely(length > (size_t)(length + len)))
+                       do {
+                               s = *ip++;
+                               length += s;
+                       } while (likely(endOnInput
+                               ? ip < iend - RUN_MASK
+                               : 1) & (s == 255));
+
+                       if ((safeDecode)
+                               && unlikely(
+                                       (size_t)(op + length) < (size_t)(op))) {
+                               /* overflow detection */
+                               goto _output_error;
+                       }
+                       if ((safeDecode)
+                               && unlikely(
+                                       (size_t)(ip + length) < (size_t)(ip))) {
+                               /* overflow detection */
                                goto _output_error;
-                       length += len;
+                       }
                }
 
                /* copy literals */
                cpy = op + length;
-               if (unlikely(cpy > oend - COPYLENGTH)) {
-                       /*
-                        * Error: not enough place for another match
-                        * (min 4) + 5 literals
-                        */
-                       if (cpy != oend)
-                               goto _output_error;
+               if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+                       || (ip + length > iend - (2 + 1 + LASTLITERALS))))
+                       || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+                       if (partialDecoding) {
+                               if (cpy > oend) {
+                                       /*
+                                        * Error :
+                                        * write attempt beyond end of output buffer
+                                        */
+                                       goto _output_error;
+                               }
+                               if ((endOnInput)
+                                       && (ip + length > iend)) {
+                                       /*
+                                        * Error :
+                                        * read attempt beyond
+                                        * end of input buffer
+                                        */
+                                       goto _output_error;
+                               }
+                       } else {
+                               if ((!endOnInput)
+                                       && (cpy != oend)) {
+                                       /*
+                                        * Error :
+                                        * block decoding must
+                                        * stop exactly there
+                                        */
+                                       goto _output_error;
+                               }
+                               if ((endOnInput)
+                                       && ((ip + length != iend)
+                                       || (cpy > oend))) {
+                                       /*
+                                        * Error :
+                                        * input must be consumed
+                                        */
+                                       goto _output_error;
+                               }
+                       }
 
                        memcpy(op, ip, length);
                        ip += length;
-                       break; /* EOF */
+                       op += length;
+                       /* Necessarily EOF, due to parsing restrictions */
+                       break;
                }
-               LZ4_WILDCOPY(ip, op, cpy);
-               ip -= (op - cpy);
+
+               LZ4_wildCopy(op, ip, cpy);
+               ip += length;
                op = cpy;
 
                /* get offset */
-               LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
+               offset = LZ4_readLE16(ip);
                ip += 2;
+               match = op - offset;
 
-               /* Error: offset create reference outside destination buffer */
-               if (unlikely(ref < (BYTE *const) dest))
+               if ((checkOffset) && (unlikely(match < lowLimit))) {
+                       /* Error : offset outside buffers */
                        goto _output_error;
+               }
+
+               /* costs ~1%; silence an msan warning when offset == 0 */
+               LZ4_write32(op, (U32)offset);
 
                /* get matchlength */
                length = token & ML_MASK;
                if (length == ML_MASK) {
-                       for (; *ip == 255; length += 255)
-                               ip++;
-                       if (unlikely(length > (size_t)(length + *ip)))
+                       unsigned int s;
+
+                       do {
+                               s = *ip++;
+
+                               if ((endOnInput) && (ip > iend - LASTLITERALS))
+                                       goto _output_error;
+
+                               length += s;
+                       } while (s == 255);
+
+                       if ((safeDecode)
+                               && unlikely(
+                                       (size_t)(op + length) < (size_t)op)) {
+                               /* overflow detection */
                                goto _output_error;
-                       length += *ip++;
+                       }
                }
 
-               /* copy repeated sequence */
-               if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-                       int dec64 = dec64table[op - ref];
-#else
-                       const int dec64 = 0;
-#endif
-                       op[0] = ref[0];
-                       op[1] = ref[1];
-                       op[2] = ref[2];
-                       op[3] = ref[3];
-                       op += 4;
-                       ref += 4;
-                       ref -= dec32table[op-ref];
-                       PUT4(ref, op);
-                       op += STEPSIZE - 4;
-                       ref -= dec64;
+               length += MINMATCH;
+
+               /* check external dictionary */
+               if ((dict == usingExtDict) && (match < lowPrefix)) {
+                       if (unlikely(op + length > oend - LASTLITERALS)) {
+                               /* doesn't respect parsing restriction */
+                               goto _output_error;
+                       }
+
+                       if (length <= (size_t)(lowPrefix - match)) {
+                               /*
+                                * match can be copied as a single segment
+                                * from external dictionary
+                                */
+                               memmove(op, dictEnd - (lowPrefix - match),
+                                       length);
+                               op += length;
+                       } else {
+                               /*
+                                * match encompass external
+                                * dictionary and current block
+                                */
+                               size_t const copySize = (size_t)(lowPrefix - match);
+                               size_t const restSize = length - copySize;
+
+                               memcpy(op, dictEnd - copySize, copySize);
+                               op += copySize;
+
+                               if (restSize > (size_t)(op - lowPrefix)) {
+                                       /* overlap copy */
+                                       BYTE * const endOfMatch = op + restSize;
+                                       const BYTE *copyFrom = lowPrefix;
+
+                                       while (op < endOfMatch)
+                                               *op++ = *copyFrom++;
+                               } else {
+                                       memcpy(op, lowPrefix, restSize);
+                                       op += restSize;
+                               }
+                       }
+
+                       continue;
+               }
+
+               /* copy match within block */
+               cpy = op + length;
+
+               if (unlikely(offset < 8)) {
+                       const int dec64 = dec64table[offset];
+
+                       op[0] = match[0];
+                       op[1] = match[1];
+                       op[2] = match[2];
+                       op[3] = match[3];
+                       match += dec32table[offset];
+                       memcpy(op + 4, match, 4);
+                       match -= dec64;
                } else {
-                       LZ4_COPYSTEP(ref, op);
+                       LZ4_copy8(op, match);
+                       match += 8;
                }
-               cpy = op + length - (STEPSIZE - 4);
-               if (cpy > (oend - COPYLENGTH)) {
 
-                       /* Error: request to write beyond destination buffer */
-                       if (cpy > oend)
-                               goto _output_error;
-#if LZ4_ARCH64
-                       if ((ref + COPYLENGTH) > oend)
-#else
-                       if ((ref + COPYLENGTH) > oend ||
-                                       (op + COPYLENGTH) > oend)
-#endif
+               op += 8;
+
+               if (unlikely(cpy > oend - 12)) {
+                       BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+                       if (cpy > oend - LASTLITERALS) {
+                               /*
+                                * Error : last LASTLITERALS bytes
+                                * must be literals (uncompressed)
+                                */
                                goto _output_error;
-                       LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+                       }
+
+                       if (op < oCopyLimit) {
+                               LZ4_wildCopy(op, match, oCopyLimit);
+                               match += oCopyLimit - op;
+                               op = oCopyLimit;
+                       }
+
                        while (op < cpy)
-                               *op++ = *ref++;
-                       op = cpy;
-                       /*
-                        * Check EOF (should never happen, since last 5 bytes
-                        * are supposed to be literals)
-                        */
-                       if (op == oend)
-                               goto _output_error;
-                       continue;
+                               *op++ = *match++;
+               } else {
+                       LZ4_copy8(op, match);
+
+                       if (length > 16)
+                               LZ4_wildCopy(op + 8, match + 8, cpy);
                }
-               LZ4_SECURECOPY(ref, op, cpy);
+
                op = cpy; /* correction */
        }
+
        /* end of decoding */
-       return (int) (((char *)ip) - source);
+       if (endOnInput) {
+               /* Nb of output bytes decoded */
+               return (int) (((char *)op) - dest);
+       } else {
+               /* Nb of input bytes read */
+               return (int) (((const char *)ip) - source);
+       }
 
-       /* write overflow error detected */
+       /* Overflow error detected */
 _output_error:
        return -1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-                               int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+       int compressedSize, int maxDecompressedSize)
 {
-       const BYTE *ip = (const BYTE *) source;
-       const BYTE *const iend = ip + isize;
-       const BYTE *ref;
-
+       return LZ4_decompress_generic(source, dest, compressedSize,
+               maxDecompressedSize, endOnInputSize, full, 0,
+               noDict, (BYTE *)dest, NULL, 0);
+}
 
-       BYTE *op = (BYTE *) dest;
-       BYTE * const oend = op + maxoutputsize;
-       BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+       int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+       return LZ4_decompress_generic(source, dest, compressedSize,
+               maxDecompressedSize, endOnInputSize, partial,
+               targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
 
-       /* Main Loop */
-       while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+       return LZ4_decompress_generic(source, dest, 0, originalSize,
+               endOnOutputSize, full, 0, withPrefix64k,
+               (BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
 
-               unsigned token;
-               size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *dictionary, int dictSize)
+{
+       LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
 
-               /* get runlength */
-               token = *ip++;
-               length = (token >> ML_BITS);
-               if (length == RUN_MASK) {
-                       int s = 255;
-                       while ((ip < iend) && (s == 255)) {
-                               s = *ip++;
-                               if (unlikely(length > (size_t)(length + s)))
-                                       goto _output_error;
-                               length += s;
-                       }
-               }
-               /* copy literals */
-               cpy = op + length;
-               if ((cpy > oend - COPYLENGTH) ||
-                       (ip + length > iend - COPYLENGTH)) {
-
-                       if (cpy > oend)
-                               goto _output_error;/* writes beyond buffer */
-
-                       if (ip + length != iend)
-                               goto _output_error;/*
-                                                   * Error: LZ4 format requires
-                                                   * to consume all input
-                                                   * at this stage
-                                                   */
-                       memcpy(op, ip, length);
-                       op += length;
-                       break;/* Necessarily EOF, due to parsing restrictions */
-               }
-               LZ4_WILDCOPY(ip, op, cpy);
-               ip -= (op - cpy);
-               op = cpy;
+       lz4sd->prefixSize = (size_t) dictSize;
+       lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+       lz4sd->externalDict = NULL;
+       lz4sd->extDictSize      = 0;
+       return 1;
+}
 
-               /* get offset */
-               LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-               ip += 2;
-               if (ref < (BYTE * const) dest)
-                       goto _output_error;
-                       /*
-                        * Error : offset creates reference
-                        * outside of destination buffer
-                        */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+       LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+       int result;
+
+       if (lz4sd->prefixEnd == (BYTE *)dest) {
+               result = LZ4_decompress_generic(source, dest,
+                       compressedSize,
+                       maxOutputSize,
+                       endOnInputSize, full, 0,
+                       usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+                       lz4sd->externalDict,
+                       lz4sd->extDictSize);
+
+               if (result <= 0)
+                       return result;
+
+               lz4sd->prefixSize += result;
+               lz4sd->prefixEnd        += result;
+       } else {
+               lz4sd->extDictSize = lz4sd->prefixSize;
+               lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+               result = LZ4_decompress_generic(source, dest,
+                       compressedSize, maxOutputSize,
+                       endOnInputSize, full, 0,
+                       usingExtDict, (BYTE *)dest,
+                       lz4sd->externalDict, lz4sd->extDictSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize = result;
+               lz4sd->prefixEnd        = (BYTE *)dest + result;
+       }
 
-               /* get matchlength */
-               length = (token & ML_MASK);
-               if (length == ML_MASK) {
-                       while (ip < iend) {
-                               int s = *ip++;
-                               if (unlikely(length > (size_t)(length + s)))
-                                       goto _output_error;
-                               length += s;
-                               if (s == 255)
-                                       continue;
-                               break;
-                       }
-               }
+       return result;
+}
 
-               /* copy repeated sequence */
-               if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-                       int dec64 = dec64table[op - ref];
-#else
-                       const int dec64 = 0;
-#endif
-                               op[0] = ref[0];
-                               op[1] = ref[1];
-                               op[2] = ref[2];
-                               op[3] = ref[3];
-                               op += 4;
-                               ref += 4;
-                               ref -= dec32table[op - ref];
-                               PUT4(ref, op);
-                               op += STEPSIZE - 4;
-                               ref -= dec64;
-               } else {
-                       LZ4_COPYSTEP(ref, op);
-               }
-               cpy = op + length - (STEPSIZE-4);
-               if (cpy > oend - COPYLENGTH) {
-                       if (cpy > oend)
-                               goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-                       if ((ref + COPYLENGTH) > oend)
-#else
-                       if ((ref + COPYLENGTH) > oend ||
-                                       (op + COPYLENGTH) > oend)
-#endif
-                               goto _output_error;
-                       LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-                       while (op < cpy)
-                               *op++ = *ref++;
-                       op = cpy;
-                       /*
-                        * Check EOF (should never happen, since last 5 bytes
-                        * are supposed to be literals)
-                        */
-                       if (op == oend)
-                               goto _output_error;
-                       continue;
-               }
-               LZ4_SECURECOPY(ref, op, cpy);
-               op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *source, char *dest, int originalSize)
+{
+       LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+       int result;
+
+       if (lz4sd->prefixEnd == (BYTE *)dest) {
+               result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                       endOnOutputSize, full, 0,
+                       usingExtDict,
+                       lz4sd->prefixEnd - lz4sd->prefixSize,
+                       lz4sd->externalDict, lz4sd->extDictSize);
+
+               if (result <= 0)
+                       return result;
+
+               lz4sd->prefixSize += originalSize;
+               lz4sd->prefixEnd        += originalSize;
+       } else {
+               lz4sd->extDictSize = lz4sd->prefixSize;
+               lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+               result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                       endOnOutputSize, full, 0,
+                       usingExtDict, (BYTE *)dest,
+                       lz4sd->externalDict, lz4sd->extDictSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize = originalSize;
+               lz4sd->prefixEnd        = (BYTE *)dest + originalSize;
        }
-       /* end of decoding */
-       return (int) (((char *) op) - dest);
 
-       /* write overflow error detected */
-_output_error:
-       return -1;
+       return result;
 }
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-               unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
+       char *dest, int compressedSize, int maxOutputSize, int safe,
+       const char *dictStart, int dictSize)
 {
-       int ret = -1;
-       int input_len = 0;
-
-       input_len = lz4_uncompress(src, dest, actual_dest_len);
-       if (input_len < 0)
-               goto exit_0;
-       *src_len = input_len;
+       if (dictSize == 0)
+               return LZ4_decompress_generic(source, dest,
+                       compressedSize, maxOutputSize, safe, full, 0,
+                       noDict, (BYTE *)dest, NULL, 0);
+       if (dictStart + dictSize == dest) {
+               if (dictSize >= (int)(64 * KB - 1))
+                       return LZ4_decompress_generic(source, dest,
+                               compressedSize, maxOutputSize, safe, full, 0,
+                               withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+               return LZ4_decompress_generic(source, dest, compressedSize,
+                       maxOutputSize, safe, full, 0, noDict,
+                       (BYTE *)dest - dictSize, NULL, 0);
+       }
+       return LZ4_decompress_generic(source, dest, compressedSize,
+               maxOutputSize, safe, full, 0, usingExtDict,
+               (BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
 
-       return 0;
-exit_0:
-       return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+       int compressedSize, int maxOutputSize,
+       const char *dictStart, int dictSize)
+{
+       return LZ4_decompress_usingDict_generic(source, dest,
+               compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-               unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+       int originalSize, const char *dictStart, int dictSize)
 {
-       int ret = -1;
-       int out_len = 0;
-
-       out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-                                       *dest_len);
-       if (out_len < 0)
-               goto exit_0;
-       *dest_len = out_len;
-
-       return 0;
-exit_0:
-       return ret;
+       return LZ4_decompress_usingDict_generic(source, dest, 0,
+               originalSize, 0, dictStart, dictSize);
 }
+
 #ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
+EXPORT_SYMBOL(LZ4_decompress_safe);
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
+EXPORT_SYMBOL(LZ4_decompress_fast);
+EXPORT_SYMBOL(LZ4_setStreamDecode);
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
+MODULE_DESCRIPTION("LZ4 decompressor");
 #endif
index c79d7ea8a38e47b8292d9f9a23bb0744a0efe7c8..00a0b58a0871bc3c1f0afca8aa9bbd03aab09edd 100644 (file)
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *     Changed for kernel usage by:
+ *     Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-/*
- * Detects 64 bits mode
- */
+#include <asm/unaligned.h>
+#include <linux/string.h>       /* memset, memcpy */
+
+#define FORCE_INLINE __always_inline
+
+/*-************************************
+ *     Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef        uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef        int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+/*-************************************
+ *     Architecture specifics
+ **************************************/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
-/*
- * Architecture-specific macros
- */
-#define BYTE   u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)      \
-       (d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)        \
-       do {    \
-               A16(p) = v; \
-               p += 2; \
-       } while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-       put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-       put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)      \
-       (d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)                        \
-       do {                                            \
-               put_unaligned_le16(v, (u16 *)(p));      \
-               p += 2;                                 \
-       } while (0)
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
 #endif
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
+/*-************************************
+ *     Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6
+
+#define HASH_UNIT sizeof(size_t)
+
+#define KB (1 << 10)
+#define MB (1 << 20)
+#define GB (1U << 30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS        4
+#define ML_MASK        ((1U << ML_BITS) - 1)
 #define RUN_BITS (8 - ML_BITS)
 #define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE   14
-#define MINMATCH       4
-#define SKIPSTRENGTH   6
-#define LASTLITERALS   5
-#define MFLIMIT                (COPYLENGTH + MINMATCH)
-#define MINLENGTH      (MFLIMIT + 1)
-#define MAXD_LOG       16
-#define MAXD           (1 << MAXD_LOG)
-#define MAXD_MASK      (u32)(MAXD - 1)
-#define MAX_DISTANCE   (MAXD - 1)
-#define HASH_LOG       (MAXD_LOG - 1)
-#define HASHTABLESIZE  (1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS        256
-#define OPTIMAL_ML     (int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT   ((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K     ((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE       (1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)      (((A32(p)) * 2654435761U) >> \
-                               ((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)   (((A32(p)) * 2654435761U) >> \
-                               ((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)          (((A32(p)) * 2654435761U) >> \
-                               ((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)     \
-       do {                    \
-               PUT8(s, d);     \
-               d += 8;         \
-               s += 8;         \
-       } while (0)
-
-#define LZ4_COPYPACKET(s, d)   LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)                        \
-       do {                                    \
-               if (d < e) {                    \
-                       LZ4_WILDCOPY(s, d, e);  \
-               }                               \
-       } while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+
+/*-************************************
+ *     Reading and writing into memory
+ **************************************/
+static FORCE_INLINE U16 LZ4_read16(const void *ptr)
+{
+       return get_unaligned((const U16 *)ptr);
+}
+
+static FORCE_INLINE U32 LZ4_read32(const void *ptr)
+{
+       return get_unaligned((const U32 *)ptr);
+}
+
+static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
+{
+       return get_unaligned((const size_t *)ptr);
+}
+
+static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
+{
+       put_unaligned(value, (U16 *)memPtr);
+}
+
+static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
+{
+       put_unaligned(value, (U32 *)memPtr);
+}
+
+static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
+{
+       return get_unaligned_le16(memPtr);
+}
+
+static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
+{
+       return put_unaligned_le16(value, memPtr);
+}
+
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
+{
+#if LZ4_ARCH64
+       U64 a = get_unaligned((const U64 *)src);
+
+       put_unaligned(a, (U64 *)dst);
+#else
+       U32 a = get_unaligned((const U32 *)src);
+       U32 b = get_unaligned((const U32 *)src + 1);
+
+       put_unaligned(a, (U32 *)dst);
+       put_unaligned(b, (U32 *)dst + 1);
+#endif
+}
+
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+       const void *srcPtr, void *dstEnd)
+{
+       BYTE *d = (BYTE *)dstPtr;
+       const BYTE *s = (const BYTE *)srcPtr;
+       BYTE *const e = (BYTE *)dstEnd;
+
+       do {
+               LZ4_copy8(d, s);
+               d += 8;
+               s += 8;
+       } while (d < e);
+}
+
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+       return __ffs(val) >> 3;
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+       return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
+#endif
+}
+
+static FORCE_INLINE unsigned int LZ4_count(
+       const BYTE *pIn,
+       const BYTE *pMatch,
+       const BYTE *pInLimit)
+{
+       const BYTE *const pStart = pIn;
+
+       while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+               size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+               if (!diff) {
+                       pIn += STEPSIZE;
+                       pMatch += STEPSIZE;
+                       continue;
+               }
+
+               pIn += LZ4_NbCommonBytes(diff);
+
+               return (unsigned int)(pIn - pStart);
+       }
+
+#if LZ4_ARCH64
+       if ((pIn < (pInLimit - 3))
+               && (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+               pIn += 4;
+               pMatch += 4;
+       }
 #endif
 
-#else  /* 32-bit */
-#define STEPSIZE 4
+       if ((pIn < (pInLimit - 1))
+               && (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+               pIn += 2;
+               pMatch += 2;
+       }
 
-#define LZ4_COPYSTEP(s, d)     \
-       do {                    \
-               PUT4(s, d);     \
-               d += 4;         \
-               s += 4;         \
-       } while (0)
+       if ((pIn < pInLimit) && (*pMatch == *pIn))
+               pIn++;
 
-#define LZ4_COPYPACKET(s, d)           \
-       do {                            \
-               LZ4_COPYSTEP(s, d);     \
-               LZ4_COPYSTEP(s, d);     \
-       } while (0)
+       return (unsigned int)(pIn - pStart);
+}
 
-#define LZ4_SECURECOPY LZ4_WILDCOPY
-#define HTYPE const u8*
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
 
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
-#else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
-#endif
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
-#endif
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
 
-#define LZ4_WILDCOPY(s, d, e)          \
-       do {                            \
-               LZ4_COPYPACKET(s, d);   \
-       } while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l) \
-       do {    \
-               u8 *e = (d) + l;        \
-               LZ4_WILDCOPY(s, d, e);  \
-               d = e;  \
-       } while (0)
+#endif
index f344f76b6559620bf7ae3bdaaeb6ab25265a344e..176f03b83e560a84bbc0e0819ef3cb7d1bf2dd35 100644 (file)
@@ -1,19 +1,17 @@
 /*
  * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
  *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *     Changed for kernel usage by:
+ *     Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ *     Dependencies
+ **************************************/
 #include <linux/lz4.h>
-#include <asm/unaligned.h>
 #include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
 
-struct lz4hc_data {
-       const u8 *base;
-       HTYPE hashtable[HASHTABLESIZE];
-       u16 chaintable[MAXD];
-       const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+ *     Local Constants and types
+ ***************************************/
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
+
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) \
+       >> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)        chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
 {
-       memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-       memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-       hc4->nexttoupdate = base + 1;
-#else
-       hc4->nexttoupdate = base;
-#endif
-       hc4->base = base;
-       return 1;
+       return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *     HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
+{
+       memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+       memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+       hc4->nextToUpdate = 64 * KB;
+       hc4->base = start - 64 * KB;
+       hc4->end = start;
+       hc4->dictBase = start - 64 * KB;
+       hc4->dictLimit = 64 * KB;
+       hc4->lowLimit = 64 * KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+       const BYTE *ip)
 {
-       u16 *chaintable = hc4->chaintable;
-       HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
+       U16 * const chainTable = hc4->chainTable;
+       U32 * const hashTable   = hc4->hashTable;
        const BYTE * const base = hc4->base;
-#else
-       const int base = 0;
-#endif
+       U32 const target = (U32)(ip - base);
+       U32 idx = hc4->nextToUpdate;
+
+       while (idx < target) {
+               U32 const h = LZ4HC_hashPtr(base + idx);
+               size_t delta = idx - hashTable[h];
 
-       while (hc4->nexttoupdate < ip) {
-               const u8 *p = hc4->nexttoupdate;
-               size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
                if (delta > MAX_DISTANCE)
                        delta = MAX_DISTANCE;
-               chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-               hashtable[HASH_VALUE(p)] = (p) - base;
-               hc4->nexttoupdate++;
-       }
-}
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-               const u8 *const matchlimit)
-{
-       const u8 *p1t = p1;
-
-       while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-               u64 diff = A64(p2) ^ A64(p1t);
-#else
-               u32 diff = A32(p2) ^ A32(p1t);
-#endif
-               if (!diff) {
-                       p1t += STEPSIZE;
-                       p2 += STEPSIZE;
-                       continue;
-               }
-               p1t += LZ4_NBCOMMONBYTES(diff);
-               return p1t - p1;
-       }
-#if LZ4_ARCH64
-       if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-               p1t += 4;
-               p2 += 4;
-       }
-#endif
+               DELTANEXTU16(idx) = (U16)delta;
 
-       if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-               p1t += 2;
-               p2 += 2;
+               hashTable[h] = idx;
+               idx++;
        }
-       if ((p1t < matchlimit) && (*p2 == *p1t))
-               p1t++;
-       return p1t - p1;
+
+       hc4->nextToUpdate = target;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-               const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(
+       LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+       const BYTE *ip,
+       const BYTE * const iLimit,
+       const BYTE **matchpos,
+       const int maxNbAttempts)
 {
-       u16 *const chaintable = hc4->chaintable;
-       HTYPE *const hashtable = hc4->hashtable;
-       const u8 *ref;
-#if LZ4_ARCH64
+       U16 * const chainTable = hc4->chainTable;
+       U32 * const HashTable = hc4->hashTable;
        const BYTE * const base = hc4->base;
-#else
-       const int base = 0;
-#endif
-       int nbattempts = MAX_NB_ATTEMPTS;
-       size_t repl = 0, ml = 0;
-       u16 delta;
+       const BYTE * const dictBase = hc4->dictBase;
+       const U32 dictLimit = hc4->dictLimit;
+       const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+               ? hc4->lowLimit
+               : (U32)(ip - base) - (64 * KB - 1);
+       U32 matchIndex;
+       int nbAttempts = maxNbAttempts;
+       size_t ml = 0;
 
        /* HC4 match finder */
-       lz4hc_insert(hc4, ip);
-       ref = hashtable[HASH_VALUE(ip)] + base;
-
-       /* potential repetition */
-       if (ref >= ip-4) {
-               /* confirmed */
-               if (A32(ref) == A32(ip)) {
-                       delta = (u16)(ip-ref);
-                       repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-                                       ref + MINMATCH, matchlimit) + MINMATCH;
-                       *matchpos = ref;
-               }
-               ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-       }
+       LZ4HC_Insert(hc4, ip);
+       matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+       while ((matchIndex >= lowLimit)
+               && (nbAttempts)) {
+               nbAttempts--;
+               if (matchIndex >= dictLimit) {
+                       const BYTE * const match = base + matchIndex;
+
+                       if (*(match + ml) == *(ip + ml)
+                               && (LZ4_read32(match) == LZ4_read32(ip))) {
+                               size_t const mlt = LZ4_count(ip + MINMATCH,
+                                       match + MINMATCH, iLimit) + MINMATCH;
 
-       while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-               nbattempts--;
-               if (*(ref + ml) == *(ip + ml)) {
-                       if (A32(ref) == A32(ip)) {
-                               size_t mlt =
-                                       lz4hc_commonlength(ip + MINMATCH,
-                                       ref + MINMATCH, matchlimit) + MINMATCH;
                                if (mlt > ml) {
                                        ml = mlt;
-                                       *matchpos = ref;
+                                       *matchpos = match;
+                               }
+                       }
+               } else {
+                       const BYTE * const match = dictBase + matchIndex;
+
+                       if (LZ4_read32(match) == LZ4_read32(ip)) {
+                               size_t mlt;
+                               const BYTE *vLimit = ip
+                                       + (dictLimit - matchIndex);
+
+                               if (vLimit > iLimit)
+                                       vLimit = iLimit;
+                               mlt = LZ4_count(ip + MINMATCH,
+                                       match + MINMATCH, vLimit) + MINMATCH;
+                               if ((ip + mlt == vLimit)
+                                       && (vLimit < iLimit))
+                                       mlt += LZ4_count(ip + mlt,
+                                               base + dictLimit,
+                                               iLimit);
+                               if (mlt > ml) {
+                                       /* virtual matchpos */
+                                       ml = mlt;
+                                       *matchpos = base + matchIndex;
                                }
                        }
                }
-               ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-       }
-
-       /* Complete table */
-       if (repl) {
-               const BYTE *ptr = ip;
-               const BYTE *end;
-               end = ip + repl - (MINMATCH-1);
-               /* Pre-Load */
-               while (ptr < end - delta) {
-                       chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-                       ptr++;
-               }
-               do {
-                       chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-                       /* Head of chain */
-                       hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-                       ptr++;
-               } while (ptr < end);
-               hc4->nexttoupdate = end;
+               matchIndex -= DELTANEXTU16(matchIndex);
        }
 
        return (int)ml;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-       const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-       const u8 **matchpos, const u8 **startpos)
+static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch(
+       LZ4HC_CCtx_internal *hc4,
+       const BYTE * const ip,
+       const BYTE * const iLowLimit,
+       const BYTE * const iHighLimit,
+       int longest,
+       const BYTE **matchpos,
+       const BYTE **startpos,
+       const int maxNbAttempts)
 {
-       u16 *const chaintable = hc4->chaintable;
-       HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+       U16 * const chainTable = hc4->chainTable;
+       U32 * const HashTable = hc4->hashTable;
        const BYTE * const base = hc4->base;
-#else
-       const int base = 0;
-#endif
-       const u8 *ref;
-       int nbattempts = MAX_NB_ATTEMPTS;
-       int delta = (int)(ip - startlimit);
+       const U32 dictLimit = hc4->dictLimit;
+       const BYTE * const lowPrefixPtr = base + dictLimit;
+       const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+               ? hc4->lowLimit
+               : (U32)(ip - base) - (64 * KB - 1);
+       const BYTE * const dictBase = hc4->dictBase;
+       U32 matchIndex;
+       int nbAttempts = maxNbAttempts;
+       int delta = (int)(ip - iLowLimit);
 
        /* First Match */
-       lz4hc_insert(hc4, ip);
-       ref = hashtable[HASH_VALUE(ip)] + base;
-
-       while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-               && (nbattempts)) {
-               nbattempts--;
-               if (*(startlimit + longest) == *(ref - delta + longest)) {
-                       if (A32(ref) == A32(ip)) {
-                               const u8 *reft = ref + MINMATCH;
-                               const u8 *ipt = ip + MINMATCH;
-                               const u8 *startt = ip;
-
-                               while (ipt < matchlimit-(STEPSIZE - 1)) {
-                                       #if LZ4_ARCH64
-                                       u64 diff = A64(reft) ^ A64(ipt);
-                                       #else
-                                       u32 diff = A32(reft) ^ A32(ipt);
-                                       #endif
-
-                                       if (!diff) {
-                                               ipt += STEPSIZE;
-                                               reft += STEPSIZE;
-                                               continue;
+       LZ4HC_Insert(hc4, ip);
+       matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+       while ((matchIndex >= lowLimit)
+               && (nbAttempts)) {
+               nbAttempts--;
+               if (matchIndex >= dictLimit) {
+                       const BYTE *matchPtr = base + matchIndex;
+
+                       if (*(iLowLimit + longest)
+                               == *(matchPtr - delta + longest)) {
+                               if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                                       int mlt = MINMATCH + LZ4_count(
+                                               ip + MINMATCH,
+                                               matchPtr + MINMATCH,
+                                               iHighLimit);
+                                       int back = 0;
+
+                                       while ((ip + back > iLowLimit)
+                                               && (matchPtr + back > lowPrefixPtr)
+                                               && (ip[back - 1] == matchPtr[back - 1]))
+                                               back--;
+
+                                       mlt -= back;
+
+                                       if (mlt > longest) {
+                                               longest = (int)mlt;
+                                               *matchpos = matchPtr + back;
+                                               *startpos = ip + back;
                                        }
-                                       ipt += LZ4_NBCOMMONBYTES(diff);
-                                       goto _endcount;
-                               }
-                               #if LZ4_ARCH64
-                               if ((ipt < (matchlimit - 3))
-                                       && (A32(reft) == A32(ipt))) {
-                                       ipt += 4;
-                                       reft += 4;
-                               }
-                               ipt += 2;
-                               #endif
-                               if ((ipt < (matchlimit - 1))
-                                       && (A16(reft) == A16(ipt))) {
-                                       reft += 2;
                                }
-                               if ((ipt < matchlimit) && (*reft == *ipt))
-                                       ipt++;
-_endcount:
-                               reft = ref;
-
-                               while ((startt > startlimit)
-                                       && (reft > hc4->base)
-                                       && (startt[-1] == reft[-1])) {
-                                       startt--;
-                                       reft--;
-                               }
-
-                               if ((ipt - startt) > longest) {
-                                       longest = (int)(ipt - startt);
-                                       *matchpos = reft;
-                                       *startpos = startt;
+                       }
+               } else {
+                       const BYTE * const matchPtr = dictBase + matchIndex;
+
+                       if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                               size_t mlt;
+                               int back = 0;
+                               const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+                               if (vLimit > iHighLimit)
+                                       vLimit = iHighLimit;
+
+                               mlt = LZ4_count(ip + MINMATCH,
+                                       matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+                               if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+                                       mlt += LZ4_count(ip + mlt, base + dictLimit,
+                                               iHighLimit);
+                               while ((ip + back > iLowLimit)
+                                       && (matchIndex + back > lowLimit)
+                                       && (ip[back - 1] == matchPtr[back - 1]))
+                                       back--;
+
+                               mlt -= back;
+
+                               if ((int)mlt > longest) {
+                                       longest = (int)mlt;
+                                       *matchpos = base + matchIndex + back;
+                                       *startpos = ip + back;
                                }
                        }
                }
-               ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+
+               matchIndex -= DELTANEXTU16(matchIndex);
        }
+
        return longest;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-               int ml, const u8 *ref)
+static FORCE_INLINE int LZ4HC_encodeSequence(
+       const BYTE **ip,
+       BYTE **op,
+       const BYTE **anchor,
+       int matchLength,
+       const BYTE * const match,
+       limitedOutput_directive limitedOutputBuffer,
+       BYTE *oend)
 {
-       int length, len;
-       u8 *token;
+       int length;
+       BYTE *token;
 
        /* Encode Literal length */
        length = (int)(*ip - *anchor);
        token = (*op)++;
+
+       if ((limitedOutputBuffer)
+               && ((*op + (length>>8)
+                       + length + (2 + 1 + LASTLITERALS)) > oend)) {
+               /* Check output limit */
+               return 1;
+       }
        if (length >= (int)RUN_MASK) {
-               *token = (RUN_MASK << ML_BITS);
+               int len;
+
+               *token = (RUN_MASK<<ML_BITS);
                len = length - RUN_MASK;
                for (; len > 254 ; len -= 255)
                        *(*op)++ = 255;
-               *(*op)++ = (u8)len;
+               *(*op)++ = (BYTE)len;
        } else
-               *token = (length << ML_BITS);
+               *token = (BYTE)(length<<ML_BITS);
 
        /* Copy Literals */
-       LZ4_BLINDCOPY(*anchor, *op, length);
+       LZ4_wildCopy(*op, *anchor, (*op) + length);
+       *op += length;
 
        /* Encode Offset */
-       LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+       LZ4_writeLE16(*op, (U16)(*ip - match));
+       *op += 2;
 
        /* Encode MatchLength */
-       len = (int)(ml - MINMATCH);
-       if (len >= (int)ML_MASK) {
+       length = (int)(matchLength - MINMATCH);
+
+       if ((limitedOutputBuffer)
+               && (*op + (length>>8)
+                       + (1 + LASTLITERALS) > oend)) {
+               /* Check output limit */
+               return 1;
+       }
+
+       if (length >= (int)ML_MASK) {
                *token += ML_MASK;
-               len -= ML_MASK;
-               for (; len > 509 ; len -= 510) {
+               length -= ML_MASK;
+
+               for (; length > 509 ; length -= 510) {
                        *(*op)++ = 255;
                        *(*op)++ = 255;
                }
-               if (len > 254) {
-                       len -= 255;
+
+               if (length > 254) {
+                       length -= 255;
                        *(*op)++ = 255;
                }
-               *(*op)++ = (u8)len;
+
+               *(*op)++ = (BYTE)length;
        } else
-               *token += len;
+               *token += (BYTE)(length);
 
        /* Prepare next loop */
-       *ip += ml;
+       *ip += matchLength;
        *anchor = *ip;
 
        return 0;
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-               const char *source,
-               char *dest,
-               int isize)
+static int LZ4HC_compress_generic(
+       LZ4HC_CCtx_internal *const ctx,
+       const char * const source,
+       char * const dest,
+       int const inputSize,
+       int const maxOutputSize,
+       int compressionLevel,
+       limitedOutput_directive limit
+       )
 {
-       const u8 *ip = (const u8 *)source;
-       const u8 *anchor = ip;
-       const u8 *const iend = ip + isize;
-       const u8 *const mflimit = iend - MFLIMIT;
-       const u8 *const matchlimit = (iend - LASTLITERALS);
+       const BYTE *ip = (const BYTE *) source;
+       const BYTE *anchor = ip;
+       const BYTE * const iend = ip + inputSize;
+       const BYTE * const mflimit = iend - MFLIMIT;
+       const BYTE * const matchlimit = (iend - LASTLITERALS);
 
-       u8 *op = (u8 *)dest;
+       BYTE *op = (BYTE *) dest;
+       BYTE * const oend = op + maxOutputSize;
 
+       unsigned int maxNbAttempts;
        int ml, ml2, ml3, ml0;
-       const u8 *ref = NULL;
-       const u8 *start2 = NULL;
-       const u8 *ref2 = NULL;
-       const u8 *start3 = NULL;
-       const u8 *ref3 = NULL;
-       const u8 *start0;
-       const u8 *ref0;
-       int lastrun;
+       const BYTE *ref = NULL;
+       const BYTE *start2 = NULL;
+       const BYTE *ref2 = NULL;
+       const BYTE *start3 = NULL;
+       const BYTE *ref3 = NULL;
+       const BYTE *start0;
+       const BYTE *ref0;
+
+       /* init */
+       if (compressionLevel > LZ4HC_MAX_CLEVEL)
+               compressionLevel = LZ4HC_MAX_CLEVEL;
+       if (compressionLevel < 1)
+               compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+       maxNbAttempts = 1 << (compressionLevel - 1);
+       ctx->end += inputSize;
 
        ip++;
 
        /* Main Loop */
        while (ip < mflimit) {
-               ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+               ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+                       matchlimit, (&ref), maxNbAttempts);
                if (!ml) {
                        ip++;
                        continue;
@@ -351,51 +387,59 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
                start0 = ip;
                ref0 = ref;
                ml0 = ml;
-_search2:
-               if (ip+ml < mflimit)
-                       ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-                               ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+               if (ip + ml < mflimit)
+                       ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                               ip + ml - 2, ip + 0,
+                               matchlimit, ml, &ref2,
+                               &start2, maxNbAttempts);
                else
                        ml2 = ml;
-               /* No better match */
+
                if (ml2 == ml) {
-                       lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+                       /* No better match */
+                       if (LZ4HC_encodeSequence(&ip, &op,
+                               &anchor, ml, ref, limit, oend))
+                               return 0;
                        continue;
                }
 
                if (start0 < ip) {
-                       /* empirical */
                        if (start2 < ip + ml0) {
+                               /* empirical */
                                ip = start0;
                                ref = ref0;
                                ml = ml0;
                        }
                }
-               /*
-                * Here, start0==ip
-                * First Match too small : removed
-                */
+
+               /* Here, start0 == ip */
                if ((start2 - ip) < 3) {
+                       /* First Match too small : removed */
                        ml = ml2;
                        ip = start2;
                        ref = ref2;
-                       goto _search2;
+                       goto _Search2;
                }
 
-_search3:
+_Search3:
                /*
-                * Currently we have :
-                * ml2 > ml1, and
-                * ip1+3 <= ip2 (usually < ip1+ml1)
-                */
+               * Currently we have :
+               * ml2 > ml1, and
+               * ip1 + 3 <= ip2 (usually < ip1 + ml1)
+               */
                if ((start2 - ip) < OPTIMAL_ML) {
                        int correction;
                        int new_ml = ml;
+
                        if (new_ml > OPTIMAL_ML)
                                new_ml = OPTIMAL_ML;
                        if (ip + new_ml > start2 + ml2 - MINMATCH)
                                new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+
                        correction = new_ml - (int)(start2 - ip);
+
                        if (correction > 0) {
                                start2 += correction;
                                ref2 += correction;
@@ -403,39 +447,44 @@ _search3:
                        }
                }
                /*
-                * Now, we have start2 = ip+new_ml,
-                * with new_ml=min(ml, OPTIMAL_ML=18)
+                * Now, we have start2 = ip + new_ml,
+                * with new_ml = min(ml, OPTIMAL_ML = 18)
                 */
+
                if (start2 + ml2 < mflimit)
-                       ml3 = lz4hc_insertandgetwidermatch(ctx,
-                               start2 + ml2 - 3, start2, matchlimit,
-                               ml2, &ref3, &start3);
+                       ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                               start2 + ml2 - 3, start2,
+                               matchlimit, ml2, &ref3, &start3,
+                               maxNbAttempts);
                else
                        ml3 = ml2;
 
-               /* No better match : 2 sequences to encode */
                if (ml3 == ml2) {
+                       /* No better match : 2 sequences to encode */
                        /* ip & ref are known; Now for ml */
-                       if (start2 < ip+ml)
+                       if (start2 < ip + ml)
                                ml = (int)(start2 - ip);
-
                        /* Now, encode 2 sequences */
-                       lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+                       if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+                               ml, ref, limit, oend))
+                               return 0;
                        ip = start2;
-                       lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+                       if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+                               ml2, ref2, limit, oend))
+                               return 0;
                        continue;
                }
 
-               /* Not enough space for match 2 : remove it */
                if (start3 < ip + ml + 3) {
-                       /*
-                        * can write Seq1 immediately ==> Seq2 is removed,
-                        * so Seq3 becomes Seq1
-                        */
+                       /* Not enough space for match 2 : remove it */
                        if (start3 >= (ip + ml)) {
+                               /* can write Seq1 immediately
+                                * ==> Seq2 is removed,
+                                * so Seq3 becomes Seq1
+                                */
                                if (start2 < ip + ml) {
-                                       int correction =
-                                               (int)(ip + ml - start2);
+                                       int correction = (int)(ip + ml - start2);
+
                                        start2 += correction;
                                        ref2 += correction;
                                        ml2 -= correction;
@@ -446,35 +495,38 @@ _search3:
                                        }
                                }
 
-                               lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-                               ip  = start3;
+                               if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+                                       ml, ref, limit, oend))
+                                       return 0;
+                               ip = start3;
                                ref = ref3;
-                               ml  = ml3;
+                               ml = ml3;
 
                                start0 = start2;
                                ref0 = ref2;
                                ml0 = ml2;
-                               goto _search2;
+                               goto _Search2;
                        }
 
                        start2 = start3;
                        ref2 = ref3;
                        ml2 = ml3;
-                       goto _search3;
+                       goto _Search3;
                }
 
                /*
-                * OK, now we have 3 ascending matches; let's write at least
-                * the first one ip & ref are known; Now for ml
-                */
+               * OK, now we have 3 ascending matches;
+               * let's write at least the first one
+               * ip & ref are known; Now for ml
+               */
                if (start2 < ip + ml) {
                        if ((start2 - ip) < (int)ML_MASK) {
                                int correction;
+
                                if (ml > OPTIMAL_ML)
                                        ml = OPTIMAL_ML;
                                if (ip + ml > start2 + ml2 - MINMATCH)
-                                       ml = (int)(start2 - ip) + ml2
-                                               - MINMATCH;
+                                       ml = (int)(start2 - ip) + ml2 - MINMATCH;
                                correction = ml - (int)(start2 - ip);
                                if (correction > 0) {
                                        start2 += correction;
@@ -484,7 +536,9 @@ _search3:
                        } else
                                ml = (int)(start2 - ip);
                }
-               lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+               if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+                       ref, limit, oend))
+                       return 0;
 
                ip = start2;
                ref = ref2;
@@ -494,46 +548,222 @@ _search3:
                ref2 = ref3;
                ml2 = ml3;
 
-               goto _search3;
+               goto _Search3;
        }
 
        /* Encode Last Literals */
-       lastrun = (int)(iend - anchor);
-       if (lastrun >= (int)RUN_MASK) {
-               *op++ = (RUN_MASK << ML_BITS);
-               lastrun -= RUN_MASK;
-               for (; lastrun > 254 ; lastrun -= 255)
-                       *op++ = 255;
-               *op++ = (u8) lastrun;
-       } else
-               *op++ = (lastrun << ML_BITS);
-       memcpy(op, anchor, iend - anchor);
-       op += iend - anchor;
+       {
+               int lastRun = (int)(iend - anchor);
+
+               if ((limit)
+                       && (((char *)op - dest) + lastRun + 1
+                               + ((lastRun + 255 - RUN_MASK)/255)
+                                       > (U32)maxOutputSize)) {
+                       /* Check output limit */
+                       return 0;
+               }
+               if (lastRun >= (int)RUN_MASK) {
+                       *op++ = (RUN_MASK<<ML_BITS);
+                       lastRun -= RUN_MASK;
+                       for (; lastRun > 254 ; lastRun -= 255)
+                               *op++ = 255;
+                       *op++ = (BYTE) lastRun;
+               } else
+                       *op++ = (BYTE)(lastRun<<ML_BITS);
+               memcpy(op, anchor, iend - anchor);
+               op += iend - anchor;
+       }
+
        /* End */
        return (int) (((char *)op) - dest);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-                       unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_HC_extStateHC(
+       void *state,
+       const char *src,
+       char *dst,
+       int srcSize,
+       int maxDstSize,
+       int compressionLevel)
 {
-       int ret = -1;
-       int out_len = 0;
+       LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 
-       struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-       lz4hc_init(hc4, (const u8 *)src);
-       out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-               (char *)dst, (int)src_len);
+       if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+               /* Error : state is not aligned
+                * for pointers (32 or 64 bits)
+                */
+               return 0;
+       }
 
-       if (out_len < 0)
-               goto exit;
+       LZ4HC_init(ctx, (const BYTE *)src);
 
-       *dst_len = out_len;
-       return 0;
+       if (maxDstSize < LZ4_compressBound(srcSize))
+               return LZ4HC_compress_generic(ctx, src, dst,
+                       srcSize, maxDstSize, compressionLevel, limitedOutput);
+       else
+               return LZ4HC_compress_generic(ctx, src, dst,
+                       srcSize, maxDstSize, compressionLevel, noLimit);
+}
+
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+       int maxDstSize, int compressionLevel, void *wrkmem)
+{
+       return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+               srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
+
+/**************************************
+ *     Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+       LZ4_streamHCPtr->internal_donotuse.base = NULL;
+       LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+       const char *dictionary,
+       int dictSize)
+{
+       LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+       if (dictSize > 64 * KB) {
+               dictionary += dictSize - 64 * KB;
+               dictSize = 64 * KB;
+       }
+       LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+       if (dictSize >= 4)
+               LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+       ctxPtr->end = (const BYTE *)dictionary + dictSize;
+       return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
 
-exit:
-       return ret;
+/* compression */
+
+static void LZ4HC_setExternalDict(
+       LZ4HC_CCtx_internal *ctxPtr,
+       const BYTE *newBlock)
+{
+       if (ctxPtr->end >= ctxPtr->base + 4) {
+               /* Referencing remaining dictionary content */
+               LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+       }
+
+       /*
+        * Only one memory segment for extDict,
+        * so any previous extDict is lost at this stage
+        */
+       ctxPtr->lowLimit        = ctxPtr->dictLimit;
+       ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+       ctxPtr->dictBase        = ctxPtr->base;
+       ctxPtr->base = newBlock - ctxPtr->dictLimit;
+       ctxPtr->end     = newBlock;
+       /* match referencing will resume from there */
+       ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+       LZ4_streamHC_t *LZ4_streamHCPtr,
+       const char *source,
+       char *dest,
+       int inputSize,
+       int maxOutputSize,
+       limitedOutput_directive limit)
+{
+       LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+       /* auto - init if forgotten */
+       if (ctxPtr->base == NULL)
+               LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+       /* Check overflow */
+       if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+               size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+                       - ctxPtr->dictLimit;
+               if (dictSize > 64 * KB)
+                       dictSize = 64 * KB;
+               LZ4_loadDictHC(LZ4_streamHCPtr,
+                       (const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+       }
+
+       /* Check if blocks follow each other */
+       if ((const BYTE *)source != ctxPtr->end)
+               LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+       /* Check overlapping input/dictionary space */
+       {
+               const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+               const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+               const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+               if ((sourceEnd > dictBegin)
+                       && ((const BYTE *)source < dictEnd)) {
+                       if (sourceEnd > dictEnd)
+                               sourceEnd = dictEnd;
+                       ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+                       if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+                               ctxPtr->lowLimit = ctxPtr->dictLimit;
+               }
+       }
+
+       return LZ4HC_compress_generic(ctxPtr, source, dest,
+               inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+       LZ4_streamHC_t *LZ4_streamHCPtr,
+       const char *source,
+       char *dest,
+       int inputSize,
+       int maxOutputSize)
+{
+       if (maxOutputSize < LZ4_compressBound(inputSize))
+               return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+                       source, dest, inputSize, maxOutputSize, limitedOutput);
+       else
+               return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+                       source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+       LZ4_streamHC_t *LZ4_streamHCPtr,
+       char *safeBuffer,
+       int dictSize)
+{
+       LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+       int const prefixSize = (int)(streamPtr->end
+               - (streamPtr->base + streamPtr->dictLimit));
+
+       if (dictSize > 64 * KB)
+               dictSize = 64 * KB;
+       if (dictSize < 4)
+               dictSize = 0;
+       if (dictSize > prefixSize)
+               dictSize = prefixSize;
+
+       memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+
+       {
+               U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+               streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+               streamPtr->base = streamPtr->end - endIndex;
+               streamPtr->dictLimit = endIndex - dictSize;
+               streamPtr->lowLimit = endIndex - dictSize;
+
+               if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+                       streamPtr->nextToUpdate = streamPtr->dictLimit;
+       }
+       return dictSize;
 }
-EXPORT_SYMBOL(lz4hc_compress);
+EXPORT_SYMBOL(LZ4_saveDictHC);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
index c8cebb1370765fac92170cd0d2f8ed1ede0a01da..9c21000df0b5ea1b99a83fd73a338073cb7fd016 100644 (file)
@@ -176,13 +176,12 @@ static int percpu_counter_cpu_dead(unsigned int cpu)
        spin_lock_irq(&percpu_counters_lock);
        list_for_each_entry(fbc, &percpu_counters, list) {
                s32 *pcount;
-               unsigned long flags;
 
-               raw_spin_lock_irqsave(&fbc->lock, flags);
+               raw_spin_lock(&fbc->lock);
                pcount = per_cpu_ptr(fbc->counters, cpu);
                fbc->count += *pcount;
                *pcount = 0;
-               raw_spin_unlock_irqrestore(&fbc->lock, flags);
+               raw_spin_unlock(&fbc->lock);
        }
        spin_unlock_irq(&percpu_counters_lock);
 #endif
index 84812a9fb16fbbd1409315ea3752fb9a1e3e39ef..5ed506d648c4e53ee955e9c19b942fd0d666eee1 100644 (file)
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/idr.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/radix-tree.h>
+#include <linux/kmemleak.h>
 #include <linux/percpu.h>
+#include <linux/preempt.h>             /* in_interrupt() */
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
 #include <linux/slab.h>
-#include <linux/kmemleak.h>
-#include <linux/cpu.h>
 #include <linux/string.h>
-#include <linux/bitops.h>
-#include <linux/rcupdate.h>
-#include <linux/preempt.h>             /* in_interrupt() */
 
 
 /* Number of nodes in fully populated tree of given height */
@@ -59,12 +60,29 @@ static struct kmem_cache *radix_tree_node_cachep;
  */
 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
 
+/*
+ * The IDR does not have to be as high as the radix tree since it uses
+ * signed integers, not unsigned longs.
+ */
+#define IDR_INDEX_BITS         (8 /* CHAR_BIT */ * sizeof(int) - 1)
+#define IDR_MAX_PATH           (DIV_ROUND_UP(IDR_INDEX_BITS, \
+                                               RADIX_TREE_MAP_SHIFT))
+#define IDR_PRELOAD_SIZE       (IDR_MAX_PATH * 2 - 1)
+
+/*
+ * The IDA is even shorter since it uses a bitmap at the last level.
+ */
+#define IDA_INDEX_BITS         (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS))
+#define IDA_MAX_PATH           (DIV_ROUND_UP(IDA_INDEX_BITS, \
+                                               RADIX_TREE_MAP_SHIFT))
+#define IDA_PRELOAD_SIZE       (IDA_MAX_PATH * 2 - 1)
+
 /*
  * Per-cpu pool of preloaded nodes
  */
 struct radix_tree_preload {
        unsigned nr;
-       /* nodes->private_data points to next preallocated node */
+       /* nodes->parent points to next preallocated node */
        struct radix_tree_node *nodes;
 };
 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
@@ -83,35 +101,38 @@ static inline void *node_to_entry(void *ptr)
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 /* Sibling slots point directly to another slot in the same node */
-static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node)
+static inline
+bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
 {
-       void **ptr = node;
+       void __rcu **ptr = node;
        return (parent->slots <= ptr) &&
                        (ptr < parent->slots + RADIX_TREE_MAP_SIZE);
 }
 #else
-static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node)
+static inline
+bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
 {
        return false;
 }
 #endif
 
-static inline unsigned long get_slot_offset(struct radix_tree_node *parent,
-                                                void **slot)
+static inline unsigned long
+get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
 {
        return slot - parent->slots;
 }
 
-static unsigned int radix_tree_descend(struct radix_tree_node *parent,
+static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
                        struct radix_tree_node **nodep, unsigned long index)
 {
        unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
-       void **entry = rcu_dereference_raw(parent->slots[offset]);
+       void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
        if (radix_tree_is_internal_node(entry)) {
                if (is_sibling_entry(parent, entry)) {
-                       void **sibentry = (void **) entry_to_node(entry);
+                       void __rcu **sibentry;
+                       sibentry = (void __rcu **) entry_to_node(entry);
                        offset = get_slot_offset(parent, sibentry);
                        entry = rcu_dereference_raw(*sibentry);
                }
@@ -122,7 +143,7 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent,
        return offset;
 }
 
-static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
+static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
 {
        return root->gfp_mask & __GFP_BITS_MASK;
 }
@@ -139,42 +160,48 @@ static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
        __clear_bit(offset, node->tags[tag]);
 }
 
-static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
+static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
                int offset)
 {
        return test_bit(offset, node->tags[tag]);
 }
 
-static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
+static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
 {
-       root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
+       root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
 {
-       root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
+       root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear_all(struct radix_tree_root *root)
 {
-       root->gfp_mask &= __GFP_BITS_MASK;
+       root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
+}
+
+static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
+{
+       return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
 }
 
-static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
+static inline unsigned root_tags_get(const struct radix_tree_root *root)
 {
-       return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+       return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
 }
 
-static inline unsigned root_tags_get(struct radix_tree_root *root)
+static inline bool is_idr(const struct radix_tree_root *root)
 {
-       return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT;
+       return !!(root->gfp_mask & ROOT_IS_IDR);
 }
 
 /*
  * Returns 1 if any slot in the node has this tag set.
  * Otherwise returns 0.
  */
-static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
+static inline int any_tag_set(const struct radix_tree_node *node,
+                                                       unsigned int tag)
 {
        unsigned idx;
        for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
@@ -184,6 +211,11 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
        return 0;
 }
 
+static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+       bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
+}
+
 /**
  * radix_tree_find_next_bit - find the next set bit in a memory region
  *
@@ -232,11 +264,18 @@ static inline unsigned long shift_maxindex(unsigned int shift)
        return (RADIX_TREE_MAP_SIZE << shift) - 1;
 }
 
-static inline unsigned long node_maxindex(struct radix_tree_node *node)
+static inline unsigned long node_maxindex(const struct radix_tree_node *node)
 {
        return shift_maxindex(node->shift);
 }
 
+static unsigned long next_index(unsigned long index,
+                               const struct radix_tree_node *node,
+                               unsigned long offset)
+{
+       return (index & ~node_maxindex(node)) + (offset << node->shift);
+}
+
 #ifndef __KERNEL__
 static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
@@ -275,11 +314,59 @@ static void radix_tree_dump(struct radix_tree_root *root)
 {
        pr_debug("radix root: %p rnode %p tags %x\n",
                        root, root->rnode,
-                       root->gfp_mask >> __GFP_BITS_SHIFT);
+                       root->gfp_mask >> ROOT_TAG_SHIFT);
        if (!radix_tree_is_internal_node(root->rnode))
                return;
        dump_node(entry_to_node(root->rnode), 0);
 }
+
+static void dump_ida_node(void *entry, unsigned long index)
+{
+       unsigned long i;
+
+       if (!entry)
+               return;
+
+       if (radix_tree_is_internal_node(entry)) {
+               struct radix_tree_node *node = entry_to_node(entry);
+
+               pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
+                       node, node->offset, index * IDA_BITMAP_BITS,
+                       ((index | node_maxindex(node)) + 1) *
+                               IDA_BITMAP_BITS - 1,
+                       node->parent, node->tags[0][0], node->shift,
+                       node->count);
+               for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+                       dump_ida_node(node->slots[i],
+                                       index | (i << node->shift));
+       } else if (radix_tree_exceptional_entry(entry)) {
+               pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
+                               entry, (int)(index & RADIX_TREE_MAP_MASK),
+                               index * IDA_BITMAP_BITS,
+                               index * IDA_BITMAP_BITS + BITS_PER_LONG -
+                                       RADIX_TREE_EXCEPTIONAL_SHIFT,
+                               (unsigned long)entry >>
+                                       RADIX_TREE_EXCEPTIONAL_SHIFT);
+       } else {
+               struct ida_bitmap *bitmap = entry;
+
+               pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
+                               (int)(index & RADIX_TREE_MAP_MASK),
+                               index * IDA_BITMAP_BITS,
+                               (index + 1) * IDA_BITMAP_BITS - 1);
+               for (i = 0; i < IDA_BITMAP_LONGS; i++)
+                       pr_cont(" %lx", bitmap->bitmap[i]);
+               pr_cont("\n");
+       }
+}
+
+static void ida_dump(struct ida *ida)
+{
+       struct radix_tree_root *root = &ida->ida_rt;
+       pr_debug("ida: %p node %p free %d\n", ida, root->rnode,
+                               root->gfp_mask >> ROOT_TAG_SHIFT);
+       dump_ida_node(root->rnode, 0);
+}
 #endif
 
 /*
@@ -287,13 +374,12 @@ static void radix_tree_dump(struct radix_tree_root *root)
  * that the caller has pinned this thread of control to the current CPU.
  */
 static struct radix_tree_node *
-radix_tree_node_alloc(struct radix_tree_root *root,
-                       struct radix_tree_node *parent,
+radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
+                       struct radix_tree_root *root,
                        unsigned int shift, unsigned int offset,
                        unsigned int count, unsigned int exceptional)
 {
        struct radix_tree_node *ret = NULL;
-       gfp_t gfp_mask = root_gfp_mask(root);
 
        /*
         * Preload code isn't irq safe and it doesn't make sense to use
@@ -321,8 +407,7 @@ radix_tree_node_alloc(struct radix_tree_root *root,
                rtp = this_cpu_ptr(&radix_tree_preloads);
                if (rtp->nr) {
                        ret = rtp->nodes;
-                       rtp->nodes = ret->private_data;
-                       ret->private_data = NULL;
+                       rtp->nodes = ret->parent;
                        rtp->nr--;
                }
                /*
@@ -336,11 +421,12 @@ radix_tree_node_alloc(struct radix_tree_root *root,
 out:
        BUG_ON(radix_tree_is_internal_node(ret));
        if (ret) {
-               ret->parent = parent;
                ret->shift = shift;
                ret->offset = offset;
                ret->count = count;
                ret->exceptional = exceptional;
+               ret->parent = parent;
+               ret->root = root;
        }
        return ret;
 }
@@ -399,7 +485,7 @@ static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
                preempt_disable();
                rtp = this_cpu_ptr(&radix_tree_preloads);
                if (rtp->nr < nr) {
-                       node->private_data = rtp->nodes;
+                       node->parent = rtp->nodes;
                        rtp->nodes = node;
                        rtp->nr++;
                } else {
@@ -510,7 +596,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
        return __radix_tree_preload(gfp_mask, nr_nodes);
 }
 
-static unsigned radix_tree_load_root(struct radix_tree_root *root,
+static unsigned radix_tree_load_root(const struct radix_tree_root *root,
                struct radix_tree_node **nodep, unsigned long *maxindex)
 {
        struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
@@ -530,10 +616,10 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root,
 /*
  *     Extend a radix tree so it can store key @index.
  */
-static int radix_tree_extend(struct radix_tree_root *root,
+static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
                                unsigned long index, unsigned int shift)
 {
-       struct radix_tree_node *slot;
+       void *entry;
        unsigned int maxshift;
        int tag;
 
@@ -542,32 +628,44 @@ static int radix_tree_extend(struct radix_tree_root *root,
        while (index > shift_maxindex(maxshift))
                maxshift += RADIX_TREE_MAP_SHIFT;
 
-       slot = root->rnode;
-       if (!slot)
+       entry = rcu_dereference_raw(root->rnode);
+       if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
                goto out;
 
        do {
-               struct radix_tree_node *node = radix_tree_node_alloc(root,
-                                                       NULL, shift, 0, 1, 0);
+               struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
+                                                       root, shift, 0, 1, 0);
                if (!node)
                        return -ENOMEM;
 
-               /* Propagate the aggregated tag info into the new root */
-               for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
-                       if (root_tag_get(root, tag))
-                               tag_set(node, tag, 0);
+               if (is_idr(root)) {
+                       all_tag_set(node, IDR_FREE);
+                       if (!root_tag_get(root, IDR_FREE)) {
+                               tag_clear(node, IDR_FREE, 0);
+                               root_tag_set(root, IDR_FREE);
+                       }
+               } else {
+                       /* Propagate the aggregated tag info to the new child */
+                       for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+                               if (root_tag_get(root, tag))
+                                       tag_set(node, tag, 0);
+                       }
                }
 
                BUG_ON(shift > BITS_PER_LONG);
-               if (radix_tree_is_internal_node(slot)) {
-                       entry_to_node(slot)->parent = node;
-               } else if (radix_tree_exceptional_entry(slot)) {
+               if (radix_tree_is_internal_node(entry)) {
+                       entry_to_node(entry)->parent = node;
+               } else if (radix_tree_exceptional_entry(entry)) {
                        /* Moving an exceptional root->rnode to a node */
                        node->exceptional = 1;
                }
-               node->slots[0] = slot;
-               slot = node_to_entry(node);
-               rcu_assign_pointer(root->rnode, slot);
+               /*
+                * entry was already in the radix tree, so we do not need
+                * rcu_assign_pointer here
+                */
+               node->slots[0] = (void __rcu *)entry;
+               entry = node_to_entry(node);
+               rcu_assign_pointer(root->rnode, entry);
                shift += RADIX_TREE_MAP_SHIFT;
        } while (shift <= maxshift);
 out:
@@ -578,12 +676,14 @@ out:
  *     radix_tree_shrink    -    shrink radix tree to minimum height
  *     @root           radix tree root
  */
-static inline void radix_tree_shrink(struct radix_tree_root *root,
+static inline bool radix_tree_shrink(struct radix_tree_root *root,
                                     radix_tree_update_node_t update_node,
                                     void *private)
 {
+       bool shrunk = false;
+
        for (;;) {
-               struct radix_tree_node *node = root->rnode;
+               struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
                struct radix_tree_node *child;
 
                if (!radix_tree_is_internal_node(node))
@@ -597,7 +697,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root,
                 */
                if (node->count != 1)
                        break;
-               child = node->slots[0];
+               child = rcu_dereference_raw(node->slots[0]);
                if (!child)
                        break;
                if (!radix_tree_is_internal_node(child) && node->shift)
@@ -613,7 +713,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root,
                 * (node->slots[0]), it will be safe to dereference the new
                 * one (root->rnode) as far as dependent read barriers go.
                 */
-               root->rnode = child;
+               root->rnode = (void __rcu *)child;
+               if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
+                       root_tag_clear(root, IDR_FREE);
 
                /*
                 * We have a dilemma here. The node's slot[0] must not be
@@ -635,27 +737,34 @@ static inline void radix_tree_shrink(struct radix_tree_root *root,
                 */
                node->count = 0;
                if (!radix_tree_is_internal_node(child)) {
-                       node->slots[0] = RADIX_TREE_RETRY;
+                       node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
                        if (update_node)
                                update_node(node, private);
                }
 
                WARN_ON_ONCE(!list_empty(&node->private_list));
                radix_tree_node_free(node);
+               shrunk = true;
        }
+
+       return shrunk;
 }
 
-static void delete_node(struct radix_tree_root *root,
+static bool delete_node(struct radix_tree_root *root,
                        struct radix_tree_node *node,
                        radix_tree_update_node_t update_node, void *private)
 {
+       bool deleted = false;
+
        do {
                struct radix_tree_node *parent;
 
                if (node->count) {
-                       if (node == entry_to_node(root->rnode))
-                               radix_tree_shrink(root, update_node, private);
-                       return;
+                       if (node_to_entry(node) ==
+                                       rcu_dereference_raw(root->rnode))
+                               deleted |= radix_tree_shrink(root, update_node,
+                                                               private);
+                       return deleted;
                }
 
                parent = node->parent;
@@ -663,15 +772,23 @@ static void delete_node(struct radix_tree_root *root,
                        parent->slots[node->offset] = NULL;
                        parent->count--;
                } else {
-                       root_tag_clear_all(root);
+                       /*
+                        * Shouldn't the tags already have all been cleared
+                        * by the caller?
+                        */
+                       if (!is_idr(root))
+                               root_tag_clear_all(root);
                        root->rnode = NULL;
                }
 
                WARN_ON_ONCE(!list_empty(&node->private_list));
                radix_tree_node_free(node);
+               deleted = true;
 
                node = parent;
        } while (node);
+
+       return deleted;
 }
 
 /**
@@ -693,13 +810,14 @@ static void delete_node(struct radix_tree_root *root,
  */
 int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
                        unsigned order, struct radix_tree_node **nodep,
-                       void ***slotp)
+                       void __rcu ***slotp)
 {
        struct radix_tree_node *node = NULL, *child;
-       void **slot = (void **)&root->rnode;
+       void __rcu **slot = (void __rcu **)&root->rnode;
        unsigned long maxindex;
        unsigned int shift, offset = 0;
        unsigned long max = index | ((1UL << order) - 1);
+       gfp_t gfp = root_gfp_mask(root);
 
        shift = radix_tree_load_root(root, &child, &maxindex);
 
@@ -707,18 +825,18 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
        if (order > 0 && max == ((1UL << order) - 1))
                max++;
        if (max > maxindex) {
-               int error = radix_tree_extend(root, max, shift);
+               int error = radix_tree_extend(root, gfp, max, shift);
                if (error < 0)
                        return error;
                shift = error;
-               child = root->rnode;
+               child = rcu_dereference_raw(root->rnode);
        }
 
        while (shift > order) {
                shift -= RADIX_TREE_MAP_SHIFT;
                if (child == NULL) {
                        /* Have to add a child node.  */
-                       child = radix_tree_node_alloc(root, node, shift,
+                       child = radix_tree_node_alloc(gfp, node, root, shift,
                                                        offset, 0, 0);
                        if (!child)
                                return -ENOMEM;
@@ -741,7 +859,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
        return 0;
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
 /*
  * Free any nodes below this node.  The tree is presumed to not need
  * shrinking, and any user data in the tree is presumed to not need a
@@ -757,7 +874,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
        struct radix_tree_node *child = entry_to_node(node);
 
        for (;;) {
-               void *entry = child->slots[offset];
+               void *entry = rcu_dereference_raw(child->slots[offset]);
                if (radix_tree_is_internal_node(entry) &&
                                        !is_sibling_entry(child, entry)) {
                        child = entry_to_node(entry);
@@ -777,8 +894,9 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
        }
 }
 
-static inline int insert_entries(struct radix_tree_node *node, void **slot,
-                               void *item, unsigned order, bool replace)
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+static inline int insert_entries(struct radix_tree_node *node,
+               void __rcu **slot, void *item, unsigned order, bool replace)
 {
        struct radix_tree_node *child;
        unsigned i, n, tag, offset, tags = 0;
@@ -813,7 +931,7 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
        }
 
        for (i = 0; i < n; i++) {
-               struct radix_tree_node *old = slot[i];
+               struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
                if (i) {
                        rcu_assign_pointer(slot[i], child);
                        for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
@@ -840,8 +958,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
        return n;
 }
 #else
-static inline int insert_entries(struct radix_tree_node *node, void **slot,
-                               void *item, unsigned order, bool replace)
+static inline int insert_entries(struct radix_tree_node *node,
+               void __rcu **slot, void *item, unsigned order, bool replace)
 {
        if (*slot)
                return -EEXIST;
@@ -868,7 +986,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
                        unsigned order, void *item)
 {
        struct radix_tree_node *node;
-       void **slot;
+       void __rcu **slot;
        int error;
 
        BUG_ON(radix_tree_is_internal_node(item));
@@ -908,16 +1026,17 @@ EXPORT_SYMBOL(__radix_tree_insert);
  *     allocated and @root->rnode is used as a direct slot instead of
  *     pointing to a node, in which case *@nodep will be NULL.
  */
-void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
-                         struct radix_tree_node **nodep, void ***slotp)
+void *__radix_tree_lookup(const struct radix_tree_root *root,
+                         unsigned long index, struct radix_tree_node **nodep,
+                         void __rcu ***slotp)
 {
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;
-       void **slot;
+       void __rcu **slot;
 
  restart:
        parent = NULL;
-       slot = (void **)&root->rnode;
+       slot = (void __rcu **)&root->rnode;
        radix_tree_load_root(root, &node, &maxindex);
        if (index > maxindex)
                return NULL;
@@ -952,9 +1071,10 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
  *     exclusive from other writers. Any dereference of the slot must be done
  *     using radix_tree_deref_slot.
  */
-void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
+                               unsigned long index)
 {
-       void **slot;
+       void __rcu **slot;
 
        if (!__radix_tree_lookup(root, index, NULL, &slot))
                return NULL;
@@ -974,75 +1094,76 @@ EXPORT_SYMBOL(radix_tree_lookup_slot);
  *     them safely). No RCU barriers are required to access or modify the
  *     returned item, however.
  */
-void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 {
        return __radix_tree_lookup(root, index, NULL, NULL);
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-static inline int slot_count(struct radix_tree_node *node,
-                                               void **slot)
+static inline void replace_sibling_entries(struct radix_tree_node *node,
+                               void __rcu **slot, int count, int exceptional)
 {
-       int n = 1;
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
        void *ptr = node_to_entry(slot);
-       unsigned offset = get_slot_offset(node, slot);
-       int i;
+       unsigned offset = get_slot_offset(node, slot) + 1;
 
-       for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
-               if (node->slots[offset + i] != ptr)
+       while (offset < RADIX_TREE_MAP_SIZE) {
+               if (rcu_dereference_raw(node->slots[offset]) != ptr)
                        break;
-               n++;
+               if (count < 0) {
+                       node->slots[offset] = NULL;
+                       node->count--;
+               }
+               node->exceptional += exceptional;
+               offset++;
        }
 #endif
-       return n;
 }
 
-static void replace_slot(struct radix_tree_root *root,
-                        struct radix_tree_node *node,
-                        void **slot, void *item,
-                        bool warn_typeswitch)
+static void replace_slot(void __rcu **slot, void *item,
+               struct radix_tree_node *node, int count, int exceptional)
 {
-       void *old = rcu_dereference_raw(*slot);
-       int count, exceptional;
-
-       WARN_ON_ONCE(radix_tree_is_internal_node(item));
-
-       count = !!item - !!old;
-       exceptional = !!radix_tree_exceptional_entry(item) -
-                     !!radix_tree_exceptional_entry(old);
-
-       WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
+       if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))
+               return;
 
-       if (node) {
+       if (node && (count || exceptional)) {
                node->count += count;
-               if (exceptional) {
-                       exceptional *= slot_count(node, slot);
-                       node->exceptional += exceptional;
-               }
+               node->exceptional += exceptional;
+               replace_sibling_entries(node, slot, count, exceptional);
        }
 
        rcu_assign_pointer(*slot, item);
 }
 
-static inline void delete_sibling_entries(struct radix_tree_node *node,
-                                               void **slot)
+static bool node_tag_get(const struct radix_tree_root *root,
+                               const struct radix_tree_node *node,
+                               unsigned int tag, unsigned int offset)
 {
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-       bool exceptional = radix_tree_exceptional_entry(*slot);
-       void *ptr = node_to_entry(slot);
-       unsigned offset = get_slot_offset(node, slot);
-       int i;
+       if (node)
+               return tag_get(node, tag, offset);
+       return root_tag_get(root, tag);
+}
 
-       for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
-               if (node->slots[offset + i] != ptr)
-                       break;
-               node->slots[offset + i] = NULL;
-               node->count--;
-               if (exceptional)
-                       node->exceptional--;
+/*
+ * IDR users want to be able to store NULL in the tree, so if the slot isn't
+ * free, don't adjust the count, even if it's transitioning between NULL and
+ * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
+ * have empty bits, but it only stores NULL in slots when they're being
+ * deleted.
+ */
+static int calculate_count(struct radix_tree_root *root,
+                               struct radix_tree_node *node, void __rcu **slot,
+                               void *item, void *old)
+{
+       if (is_idr(root)) {
+               unsigned offset = get_slot_offset(node, slot);
+               bool free = node_tag_get(root, node, IDR_FREE, offset);
+               if (!free)
+                       return 0;
+               if (!old)
+                       return 1;
        }
-#endif
+       return !!item - !!old;
 }
 
 /**
@@ -1059,18 +1180,22 @@ static inline void delete_sibling_entries(struct radix_tree_node *node,
  */
 void __radix_tree_replace(struct radix_tree_root *root,
                          struct radix_tree_node *node,
-                         void **slot, void *item,
+                         void __rcu **slot, void *item,
                          radix_tree_update_node_t update_node, void *private)
 {
-       if (!item)
-               delete_sibling_entries(node, slot);
+       void *old = rcu_dereference_raw(*slot);
+       int exceptional = !!radix_tree_exceptional_entry(item) -
+                               !!radix_tree_exceptional_entry(old);
+       int count = calculate_count(root, node, slot, item, old);
+
        /*
         * This function supports replacing exceptional entries and
         * deleting entries, but that needs accounting against the
         * node unless the slot is root->rnode.
         */
-       replace_slot(root, node, slot, item,
-                    !node && slot != (void **)&root->rnode);
+       WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) &&
+                       (count || exceptional));
+       replace_slot(slot, item, node, count, exceptional);
 
        if (!node)
                return;
@@ -1098,10 +1223,11 @@ void __radix_tree_replace(struct radix_tree_root *root,
  * radix_tree_iter_replace().
  */
 void radix_tree_replace_slot(struct radix_tree_root *root,
-                            void **slot, void *item)
+                            void __rcu **slot, void *item)
 {
-       replace_slot(root, NULL, slot, item, true);
+       __radix_tree_replace(root, NULL, slot, item, NULL, NULL);
 }
+EXPORT_SYMBOL(radix_tree_replace_slot);
 
 /**
  * radix_tree_iter_replace - replace item in a slot
@@ -1113,7 +1239,8 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
  * Caller must hold tree write locked across split and replacement.
  */
 void radix_tree_iter_replace(struct radix_tree_root *root,
-               const struct radix_tree_iter *iter, void **slot, void *item)
+                               const struct radix_tree_iter *iter,
+                               void __rcu **slot, void *item)
 {
        __radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
 }
@@ -1137,7 +1264,7 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index,
                        unsigned order, void *item)
 {
        struct radix_tree_node *node;
-       void **slot;
+       void __rcu **slot;
        int error;
 
        BUG_ON(radix_tree_is_internal_node(item));
@@ -1172,9 +1299,10 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
                                unsigned order)
 {
        struct radix_tree_node *parent, *node, *child;
-       void **slot;
+       void __rcu **slot;
        unsigned int offset, end;
        unsigned n, tag, tags = 0;
+       gfp_t gfp = root_gfp_mask(root);
 
        if (!__radix_tree_lookup(root, index, &parent, &slot))
                return -ENOENT;
@@ -1188,7 +1316,8 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
                        tags |= 1 << tag;
 
        for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
-               if (!is_sibling_entry(parent, parent->slots[end]))
+               if (!is_sibling_entry(parent,
+                               rcu_dereference_raw(parent->slots[end])))
                        break;
                for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
                        if (tags & (1 << tag))
@@ -1212,14 +1341,15 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 
        for (;;) {
                if (node->shift > order) {
-                       child = radix_tree_node_alloc(root, node,
+                       child = radix_tree_node_alloc(gfp, node, root,
                                        node->shift - RADIX_TREE_MAP_SHIFT,
                                        offset, 0, 0);
                        if (!child)
                                goto nomem;
                        if (node != parent) {
                                node->count++;
-                               node->slots[offset] = node_to_entry(child);
+                               rcu_assign_pointer(node->slots[offset],
+                                                       node_to_entry(child));
                                for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
                                        if (tags & (1 << tag))
                                                tag_set(node, tag, offset);
@@ -1261,6 +1391,22 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 }
 #endif
 
+static void node_tag_set(struct radix_tree_root *root,
+                               struct radix_tree_node *node,
+                               unsigned int tag, unsigned int offset)
+{
+       while (node) {
+               if (tag_get(node, tag, offset))
+                       return;
+               tag_set(node, tag, offset);
+               offset = node->offset;
+               node = node->parent;
+       }
+
+       if (!root_tag_get(root, tag))
+               root_tag_set(root, tag);
+}
+
 /**
  *     radix_tree_tag_set - set a tag on a radix tree node
  *     @root:          radix tree root
@@ -1302,6 +1448,18 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_set);
 
+/**
+ * radix_tree_iter_tag_set - set a tag on the current iterator entry
+ * @root:      radix tree root
+ * @iter:      iterator state
+ * @tag:       tag to set
+ */
+void radix_tree_iter_tag_set(struct radix_tree_root *root,
+                       const struct radix_tree_iter *iter, unsigned int tag)
+{
+       node_tag_set(root, iter->node, tag, iter_offset(iter));
+}
+
 static void node_tag_clear(struct radix_tree_root *root,
                                struct radix_tree_node *node,
                                unsigned int tag, unsigned int offset)
@@ -1322,34 +1480,6 @@ static void node_tag_clear(struct radix_tree_root *root,
                root_tag_clear(root, tag);
 }
 
-static void node_tag_set(struct radix_tree_root *root,
-                               struct radix_tree_node *node,
-                               unsigned int tag, unsigned int offset)
-{
-       while (node) {
-               if (tag_get(node, tag, offset))
-                       return;
-               tag_set(node, tag, offset);
-               offset = node->offset;
-               node = node->parent;
-       }
-
-       if (!root_tag_get(root, tag))
-               root_tag_set(root, tag);
-}
-
-/**
- * radix_tree_iter_tag_set - set a tag on the current iterator entry
- * @root:      radix tree root
- * @iter:      iterator state
- * @tag:       tag to set
- */
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
-                       const struct radix_tree_iter *iter, unsigned int tag)
-{
-       node_tag_set(root, iter->node, tag, iter_offset(iter));
-}
-
 /**
  *     radix_tree_tag_clear - clear a tag on a radix tree node
  *     @root:          radix tree root
@@ -1389,6 +1519,18 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_clear);
 
+/**
+  * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
+  * @root: radix tree root
+  * @iter: iterator state
+  * @tag: tag to clear
+  */
+void radix_tree_iter_tag_clear(struct radix_tree_root *root,
+                       const struct radix_tree_iter *iter, unsigned int tag)
+{
+       node_tag_clear(root, iter->node, tag, iter_offset(iter));
+}
+
 /**
  * radix_tree_tag_get - get a tag on a radix tree node
  * @root:              radix tree root
@@ -1404,7 +1546,7 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
  * the RCU lock is held, unless tag modification and node deletion are excluded
  * from concurrency.
  */
-int radix_tree_tag_get(struct radix_tree_root *root,
+int radix_tree_tag_get(const struct radix_tree_root *root,
                        unsigned long index, unsigned int tag)
 {
        struct radix_tree_node *node, *parent;
@@ -1416,8 +1558,6 @@ int radix_tree_tag_get(struct radix_tree_root *root,
        radix_tree_load_root(root, &node, &maxindex);
        if (index > maxindex)
                return 0;
-       if (node == NULL)
-               return 0;
 
        while (radix_tree_is_internal_node(node)) {
                unsigned offset;
@@ -1425,8 +1565,6 @@ int radix_tree_tag_get(struct radix_tree_root *root,
                parent = entry_to_node(node);
                offset = radix_tree_descend(parent, &node, index);
 
-               if (!node)
-                       return 0;
                if (!tag_get(parent, tag, offset))
                        return 0;
                if (node == RADIX_TREE_RETRY)
@@ -1453,6 +1591,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
        unsigned tag_long = offset / BITS_PER_LONG;
        unsigned tag_bit  = offset % BITS_PER_LONG;
 
+       if (!node) {
+               iter->tags = 1;
+               return;
+       }
+
        iter->tags = node->tags[tag][tag_long] >> tag_bit;
 
        /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
@@ -1467,8 +1610,8 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-static void **skip_siblings(struct radix_tree_node **nodep,
-                       void **slot, struct radix_tree_iter *iter)
+static void __rcu **skip_siblings(struct radix_tree_node **nodep,
+                       void __rcu **slot, struct radix_tree_iter *iter)
 {
        void *sib = node_to_entry(slot - 1);
 
@@ -1485,8 +1628,8 @@ static void **skip_siblings(struct radix_tree_node **nodep,
        return NULL;
 }
 
-void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
-                                       unsigned flags)
+void __rcu **__radix_tree_next_slot(void __rcu **slot,
+                               struct radix_tree_iter *iter, unsigned flags)
 {
        unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
        struct radix_tree_node *node = rcu_dereference_raw(*slot);
@@ -1539,20 +1682,20 @@ void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
 }
 EXPORT_SYMBOL(__radix_tree_next_slot);
 #else
-static void **skip_siblings(struct radix_tree_node **nodep,
-                       void **slot, struct radix_tree_iter *iter)
+static void __rcu **skip_siblings(struct radix_tree_node **nodep,
+                       void __rcu **slot, struct radix_tree_iter *iter)
 {
        return slot;
 }
 #endif
 
-void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter)
+void __rcu **radix_tree_iter_resume(void __rcu **slot,
+                                       struct radix_tree_iter *iter)
 {
        struct radix_tree_node *node;
 
        slot++;
        iter->index = __radix_tree_iter_add(iter, 1);
-       node = rcu_dereference_raw(*slot);
        skip_siblings(&node, slot, iter);
        iter->next_index = iter->index;
        iter->tags = 0;
@@ -1568,7 +1711,7 @@ EXPORT_SYMBOL(radix_tree_iter_resume);
  * @flags:     RADIX_TREE_ITER_* flags and tag index
  * Returns:    pointer to chunk first slot, or NULL if iteration is over
  */
-void **radix_tree_next_chunk(struct radix_tree_root *root,
+void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
                             struct radix_tree_iter *iter, unsigned flags)
 {
        unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
@@ -1605,7 +1748,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
                iter->tags = 1;
                iter->node = NULL;
                __set_iter_shift(iter, 0);
-               return (void **)&root->rnode;
+               return (void __rcu **)&root->rnode;
        }
 
        do {
@@ -1623,7 +1766,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
                                                offset + 1);
                        else
                                while (++offset < RADIX_TREE_MAP_SIZE) {
-                                       void *slot = node->slots[offset];
+                                       void *slot = rcu_dereference_raw(
+                                                       node->slots[offset]);
                                        if (is_sibling_entry(node, slot))
                                                continue;
                                        if (slot)
@@ -1679,11 +1823,11 @@ EXPORT_SYMBOL(radix_tree_next_chunk);
  *     stored in 'results'.
  */
 unsigned int
-radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
                        unsigned long first_index, unsigned int max_items)
 {
        struct radix_tree_iter iter;
-       void **slot;
+       void __rcu **slot;
        unsigned int ret = 0;
 
        if (unlikely(!max_items))
@@ -1724,12 +1868,12 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *     protection, radix_tree_deref_slot may fail requiring a retry.
  */
 unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root,
-                       void ***results, unsigned long *indices,
+radix_tree_gang_lookup_slot(const struct radix_tree_root *root,
+                       void __rcu ***results, unsigned long *indices,
                        unsigned long first_index, unsigned int max_items)
 {
        struct radix_tree_iter iter;
-       void **slot;
+       void __rcu **slot;
        unsigned int ret = 0;
 
        if (unlikely(!max_items))
@@ -1761,12 +1905,12 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
  *     returns the number of items which were placed at *@results.
  */
 unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
                unsigned long first_index, unsigned int max_items,
                unsigned int tag)
 {
        struct radix_tree_iter iter;
-       void **slot;
+       void __rcu **slot;
        unsigned int ret = 0;
 
        if (unlikely(!max_items))
@@ -1802,12 +1946,12 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
  *     returns the number of slots which were placed at *@results.
  */
 unsigned int
-radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
-               unsigned long first_index, unsigned int max_items,
-               unsigned int tag)
+radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
+               void __rcu ***results, unsigned long first_index,
+               unsigned int max_items, unsigned int tag)
 {
        struct radix_tree_iter iter;
-       void **slot;
+       void __rcu **slot;
        unsigned int ret = 0;
 
        if (unlikely(!max_items))
@@ -1842,59 +1986,83 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
        delete_node(root, node, update_node, private);
 }
 
+static bool __radix_tree_delete(struct radix_tree_root *root,
+                               struct radix_tree_node *node, void __rcu **slot)
+{
+       void *old = rcu_dereference_raw(*slot);
+       int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
+       unsigned offset = get_slot_offset(node, slot);
+       int tag;
+
+       if (is_idr(root))
+               node_tag_set(root, node, IDR_FREE, offset);
+       else
+               for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+                       node_tag_clear(root, node, tag, offset);
+
+       replace_slot(slot, NULL, node, -1, exceptional);
+       return node && delete_node(root, node, NULL, NULL);
+}
+
 /**
- *     radix_tree_delete_item    -    delete an item from a radix tree
- *     @root:          radix tree root
- *     @index:         index key
- *     @item:          expected item
+ * radix_tree_iter_delete - delete the entry at this iterator position
+ * @root: radix tree root
+ * @iter: iterator state
+ * @slot: pointer to slot
  *
- *     Remove @item at @index from the radix tree rooted at @root.
+ * Delete the entry at the position currently pointed to by the iterator.
+ * This may result in the current node being freed; if it is, the iterator
+ * is advanced so that it will not reference the freed memory.  This
+ * function may be called without any locking if there are no other threads
+ * which can access this tree.
+ */
+void radix_tree_iter_delete(struct radix_tree_root *root,
+                               struct radix_tree_iter *iter, void __rcu **slot)
+{
+       if (__radix_tree_delete(root, iter->node, slot))
+               iter->index = iter->next_index;
+}
+
+/**
+ * radix_tree_delete_item - delete an item from a radix tree
+ * @root: radix tree root
+ * @index: index key
+ * @item: expected item
  *
- *     Returns the address of the deleted item, or NULL if it was not present
- *     or the entry at the given @index was not @item.
+ * Remove @item at @index from the radix tree rooted at @root.
+ *
+ * Return: the deleted entry, or %NULL if it was not present
+ * or the entry at the given @index was not @item.
  */
 void *radix_tree_delete_item(struct radix_tree_root *root,
                             unsigned long index, void *item)
 {
-       struct radix_tree_node *node;
-       unsigned int offset;
-       void **slot;
+       struct radix_tree_node *node = NULL;
+       void __rcu **slot;
        void *entry;
-       int tag;
 
        entry = __radix_tree_lookup(root, index, &node, &slot);
-       if (!entry)
+       if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
+                                               get_slot_offset(node, slot))))
                return NULL;
 
        if (item && entry != item)
                return NULL;
 
-       if (!node) {
-               root_tag_clear_all(root);
-               root->rnode = NULL;
-               return entry;
-       }
-
-       offset = get_slot_offset(node, slot);
-
-       /* Clear all tags associated with the item to be deleted.  */
-       for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-               node_tag_clear(root, node, tag, offset);
-
-       __radix_tree_replace(root, node, slot, NULL, NULL, NULL);
+       __radix_tree_delete(root, node, slot);
 
        return entry;
 }
 EXPORT_SYMBOL(radix_tree_delete_item);
 
 /**
- *     radix_tree_delete    -    delete an item from a radix tree
- *     @root:          radix tree root
- *     @index:         index key
+ * radix_tree_delete - delete an entry from a radix tree
+ * @root: radix tree root
+ * @index: index key
  *
- *     Remove the item at @index from the radix tree rooted at @root.
+ * Remove the entry at @index from the radix tree rooted at @root.
  *
- *     Returns the address of the deleted item, or NULL if it was not present.
+ * Return: The deleted entry, or %NULL if it was not present.
  */
 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 {
@@ -1904,15 +2072,14 @@ EXPORT_SYMBOL(radix_tree_delete);
 
 void radix_tree_clear_tags(struct radix_tree_root *root,
                           struct radix_tree_node *node,
-                          void **slot)
+                          void __rcu **slot)
 {
        if (node) {
                unsigned int tag, offset = get_slot_offset(node, slot);
                for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
                        node_tag_clear(root, node, tag, offset);
        } else {
-               /* Clear root node tags */
-               root->gfp_mask &= __GFP_BITS_MASK;
+               root_tag_clear_all(root);
        }
 }
 
@@ -1921,12 +2088,147 @@ void radix_tree_clear_tags(struct radix_tree_root *root,
  *     @root:          radix tree root
  *     @tag:           tag to test
  */
-int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
+int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
 {
        return root_tag_get(root, tag);
 }
 EXPORT_SYMBOL(radix_tree_tagged);
 
+/**
+ * idr_preload - preload for idr_alloc()
+ * @gfp_mask: allocation mask to use for preloading
+ *
+ * Preallocate memory to use for the next call to idr_alloc().  This function
+ * returns with preemption disabled.  It will be enabled by idr_preload_end().
+ */
+void idr_preload(gfp_t gfp_mask)
+{
+       __radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE);
+}
+EXPORT_SYMBOL(idr_preload);
+
+/**
+ * ida_pre_get - reserve resources for ida allocation
+ * @ida: ida handle
+ * @gfp: memory allocation flags
+ *
+ * This function should be called before calling ida_get_new_above().  If it
+ * is unable to allocate memory, it will return %0.  On success, it returns %1.
+ */
+int ida_pre_get(struct ida *ida, gfp_t gfp)
+{
+       __radix_tree_preload(gfp, IDA_PRELOAD_SIZE);
+       /*
+        * The IDA API has no preload_end() equivalent.  Instead,
+        * ida_get_new() can return -EAGAIN, prompting the caller
+        * to return to the ida_pre_get() step.
+        */
+       preempt_enable();
+
+       if (!this_cpu_read(ida_bitmap)) {
+               struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+               if (!bitmap)
+                       return 0;
+               bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
+               kfree(bitmap);
+       }
+
+       return 1;
+}
+EXPORT_SYMBOL(ida_pre_get);
+
+void __rcu **idr_get_free(struct radix_tree_root *root,
+                       struct radix_tree_iter *iter, gfp_t gfp, int end)
+{
+       struct radix_tree_node *node = NULL, *child;
+       void __rcu **slot = (void __rcu **)&root->rnode;
+       unsigned long maxindex, start = iter->next_index;
+       unsigned long max = end > 0 ? end - 1 : INT_MAX;
+       unsigned int shift, offset = 0;
+
+ grow:
+       shift = radix_tree_load_root(root, &child, &maxindex);
+       if (!radix_tree_tagged(root, IDR_FREE))
+               start = max(start, maxindex + 1);
+       if (start > max)
+               return ERR_PTR(-ENOSPC);
+
+       if (start > maxindex) {
+               int error = radix_tree_extend(root, gfp, start, shift);
+               if (error < 0)
+                       return ERR_PTR(error);
+               shift = error;
+               child = rcu_dereference_raw(root->rnode);
+       }
+
+       while (shift) {
+               shift -= RADIX_TREE_MAP_SHIFT;
+               if (child == NULL) {
+                       /* Have to add a child node.  */
+                       child = radix_tree_node_alloc(gfp, node, root, shift,
+                                                       offset, 0, 0);
+                       if (!child)
+                               return ERR_PTR(-ENOMEM);
+                       all_tag_set(child, IDR_FREE);
+                       rcu_assign_pointer(*slot, node_to_entry(child));
+                       if (node)
+                               node->count++;
+               } else if (!radix_tree_is_internal_node(child))
+                       break;
+
+               node = entry_to_node(child);
+               offset = radix_tree_descend(node, &child, start);
+               if (!tag_get(node, IDR_FREE, offset)) {
+                       offset = radix_tree_find_next_bit(node, IDR_FREE,
+                                                       offset + 1);
+                       start = next_index(start, node, offset);
+                       if (start > max)
+                               return ERR_PTR(-ENOSPC);
+                       while (offset == RADIX_TREE_MAP_SIZE) {
+                               offset = node->offset + 1;
+                               node = node->parent;
+                               if (!node)
+                                       goto grow;
+                               shift = node->shift;
+                       }
+                       child = rcu_dereference_raw(node->slots[offset]);
+               }
+               slot = &node->slots[offset];
+       }
+
+       iter->index = start;
+       if (node)
+               iter->next_index = 1 + min(max, (start | node_maxindex(node)));
+       else
+               iter->next_index = 1;
+       iter->node = node;
+       __set_iter_shift(iter, shift);
+       set_iter_tags(iter, node, offset, IDR_FREE);
+
+       return slot;
+}
+
+/**
+ * idr_destroy - release all internal memory from an IDR
+ * @idr: idr handle
+ *
+ * After this function is called, the IDR is empty, and may be reused or
+ * the data structure containing it may be freed.
+ *
+ * A typical clean-up sequence for objects stored in an idr tree will use
+ * idr_for_each() to free all objects, if necessary, then idr_destroy() to
+ * free the memory used to keep track of those objects.
+ */
+void idr_destroy(struct idr *idr)
+{
+       struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
+       if (radix_tree_is_internal_node(node))
+               radix_tree_free_nodes(node);
+       idr->idr_rt.rnode = NULL;
+       root_tag_set(&idr->idr_rt, IDR_FREE);
+}
+EXPORT_SYMBOL(idr_destroy);
+
 static void
 radix_tree_node_ctor(void *arg)
 {
@@ -1970,10 +2272,12 @@ static int radix_tree_cpu_dead(unsigned int cpu)
        rtp = &per_cpu(radix_tree_preloads, cpu);
        while (rtp->nr) {
                node = rtp->nodes;
-               rtp->nodes = node->private_data;
+               rtp->nodes = node->parent;
                kmem_cache_free(radix_tree_node_cachep, node);
                rtp->nr--;
        }
+       kfree(per_cpu(ida_bitmap, cpu));
+       per_cpu(ida_bitmap, cpu) = NULL;
        return 0;
 }
 
index 1f8b112a7c359152260cddf15448826dda98bb91..4ba2828a67c0401b371b04cce7c52d566b5c0e74 100644 (file)
@@ -427,7 +427,9 @@ static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
 static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
 
 static const struct rb_augment_callbacks dummy_callbacks = {
-       dummy_propagate, dummy_copy, dummy_rotate
+       .propagate = dummy_propagate,
+       .copy = dummy_copy,
+       .rotate = dummy_rotate
 };
 
 void rb_insert_color(struct rb_node *node, struct rb_root *root)
diff --git a/lib/refcount.c b/lib/refcount.c
new file mode 100644 (file)
index 0000000..1d33366
--- /dev/null
@@ -0,0 +1,267 @@
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * It differs in that the counter saturates at UINT_MAX and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free issues.
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ */
+
+#include <linux/refcount.h>
+#include <linux/bug.h>
+
+bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+       unsigned int old, new, val = atomic_read(&r->refs);
+
+       for (;;) {
+               if (!val)
+                       return false;
+
+               if (unlikely(val == UINT_MAX))
+                       return true;
+
+               new = val + i;
+               if (new < val)
+                       new = UINT_MAX;
+               old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+
+       WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(refcount_add_not_zero);
+
+void refcount_add(unsigned int i, refcount_t *r)
+{
+       WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+}
+EXPORT_SYMBOL_GPL(refcount_add);
+
+/*
+ * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ */
+bool refcount_inc_not_zero(refcount_t *r)
+{
+       unsigned int old, new, val = atomic_read(&r->refs);
+
+       for (;;) {
+               new = val + 1;
+
+               if (!val)
+                       return false;
+
+               if (unlikely(!new))
+                       return true;
+
+               old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+
+       WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(refcount_inc_not_zero);
+
+/*
+ * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object, will WARN when this is not so.
+ */
+void refcount_inc(refcount_t *r)
+{
+       WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+}
+EXPORT_SYMBOL_GPL(refcount_inc);
+
+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+       unsigned int old, new, val = atomic_read(&r->refs);
+
+       for (;;) {
+               if (unlikely(val == UINT_MAX))
+                       return false;
+
+               new = val - i;
+               if (new > val) {
+                       WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+                       return false;
+               }
+
+               old = atomic_cmpxchg_release(&r->refs, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+
+       return !new;
+}
+EXPORT_SYMBOL_GPL(refcount_sub_and_test);
+
+/*
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+bool refcount_dec_and_test(refcount_t *r)
+{
+       return refcount_sub_and_test(1, r);
+}
+EXPORT_SYMBOL_GPL(refcount_dec_and_test);
+
+/*
+ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
+ * when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before.
+ */
+
+void refcount_dec(refcount_t *r)
+{
+       WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+}
+EXPORT_SYMBOL_GPL(refcount_dec);
+
+/*
+ * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the
+ * success thereof.
+ *
+ * Like all decrement operations, it provides release memory order and provides
+ * a control dependency.
+ *
+ * It can be used like a try-delete operator; this explicit case is provided
+ * and not cmpxchg in generic, because that would allow implementing unsafe
+ * operations.
+ */
+bool refcount_dec_if_one(refcount_t *r)
+{
+       return atomic_cmpxchg_release(&r->refs, 1, 0) == 1;
+}
+EXPORT_SYMBOL_GPL(refcount_dec_if_one);
+
+/*
+ * No atomic_t counterpart, it decrements unless the value is 1, in which case
+ * it will return false.
+ *
+ * Was often done like: atomic_add_unless(&var, -1, 1)
+ */
+bool refcount_dec_not_one(refcount_t *r)
+{
+       unsigned int old, new, val = atomic_read(&r->refs);
+
+       for (;;) {
+               if (unlikely(val == UINT_MAX))
+                       return true;
+
+               if (val == 1)
+                       return false;
+
+               new = val - 1;
+               if (new > val) {
+                       WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+                       return true;
+               }
+
+               old = atomic_cmpxchg_release(&r->refs, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(refcount_dec_not_one);
+
+/*
+ * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
+ * to decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
+{
+       if (refcount_dec_not_one(r))
+               return false;
+
+       mutex_lock(lock);
+       if (!refcount_dec_and_test(r)) {
+               mutex_unlock(lock);
+               return false;
+       }
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(refcount_dec_and_mutex_lock);
+
+/*
+ * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
+{
+       if (refcount_dec_not_one(r))
+               return false;
+
+       spin_lock(lock);
+       if (!refcount_dec_and_test(r)) {
+               spin_unlock(lock);
+               return false;
+       }
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(refcount_dec_and_lock);
+
index 004fc70fc56a3d06947f9e89c60e40e272ce551c..c6cf82242d655d929a63e2a5d517510af6a657ad 100644 (file)
@@ -651,7 +651,6 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
 {
        unsigned int offset = 0;
        struct sg_mapping_iter miter;
-       unsigned long flags;
        unsigned int sg_flags = SG_MITER_ATOMIC;
 
        if (to_buffer)
@@ -664,9 +663,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
        if (!sg_miter_skip(&miter, skip))
                return false;
 
-       local_irq_save(flags);
-
-       while (sg_miter_next(&miter) && offset < buflen) {
+       while ((offset < buflen) && sg_miter_next(&miter)) {
                unsigned int len;
 
                len = min(miter.length, buflen - offset);
@@ -681,7 +678,6 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
 
        sg_miter_stop(&miter);
 
-       local_irq_restore(flags);
        return offset;
 }
 EXPORT_SYMBOL(sg_copy_buffer);
index fc20df42aa6f2945aae8586c3c4d3599ca6d67f0..975c6ef6fec753164de34ee7ce5a508105a41794 100644 (file)
@@ -4,6 +4,8 @@
  * Jan 23 2005  Matt Mackall <mpm@selenic.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/export.h>
 #include <linux/sort.h>
@@ -101,42 +103,3 @@ void sort(void *base, size_t num, size_t size,
 }
 
 EXPORT_SYMBOL(sort);
-
-#if 0
-#include <linux/slab.h>
-/* a simple boot-time regression test */
-
-int cmpint(const void *a, const void *b)
-{
-       return *(int *)a - *(int *)b;
-}
-
-static int sort_test(void)
-{
-       int *a, i, r = 1;
-
-       a = kmalloc(1000 * sizeof(int), GFP_KERNEL);
-       BUG_ON(!a);
-
-       printk("testing sort()\n");
-
-       for (i = 0; i < 1000; i++) {
-               r = (r * 725861) % 6599;
-               a[i] = r;
-       }
-
-       sort(a, 1000, sizeof(int), cmpint, NULL);
-
-       for (i = 0; i < 999; i++)
-               if (a[i] > a[i+1]) {
-                       printk("sort() failed!\n");
-                       break;
-               }
-
-       kfree(a);
-
-       return 0;
-}
-
-module_init(sort_test);
-#endif
index fbdf87920093b9b1dbd94e4b370a24e43d6bb8f7..0b1d3140fbb87738ec37031997075e50c5670575 100644 (file)
@@ -11,6 +11,7 @@
 
 #define pr_fmt(fmt) "kasan test: %s " fmt, __func__
 
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
@@ -331,6 +332,38 @@ static noinline void __init kmem_cache_oob(void)
        kmem_cache_destroy(cache);
 }
 
+static noinline void __init memcg_accounted_kmem_cache(void)
+{
+       int i;
+       char *p;
+       size_t size = 200;
+       struct kmem_cache *cache;
+
+       cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL);
+       if (!cache) {
+               pr_err("Cache allocation failed\n");
+               return;
+       }
+
+       pr_info("allocate memcg accounted object\n");
+       /*
+        * Several allocations with a delay to allow for lazy per memcg kmem
+        * cache creation.
+        */
+       for (i = 0; i < 5; i++) {
+               p = kmem_cache_alloc(cache, GFP_KERNEL);
+               if (!p) {
+                       pr_err("Allocation failed\n");
+                       goto free_cache;
+               }
+               kmem_cache_free(cache, p);
+               msleep(100);
+       }
+
+free_cache:
+       kmem_cache_destroy(cache);
+}
+
 static char global_array[10];
 
 static noinline void __init kasan_global_oob(void)
@@ -460,6 +493,7 @@ static int __init kmalloc_tests_init(void)
        kmalloc_uaf_memset();
        kmalloc_uaf2();
        kmem_cache_oob();
+       memcg_accounted_kmem_cache();
        kasan_stack_oob();
        kasan_global_oob();
        ksize_unpoisons_memory();
diff --git a/lib/test_sort.c b/lib/test_sort.c
new file mode 100644 (file)
index 0000000..4db3911
--- /dev/null
@@ -0,0 +1,44 @@
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+
+/*
+ * A simple boot-time regression test
+ * License: GPL
+ */
+
+#define TEST_LEN 1000
+
+static int __init cmpint(const void *a, const void *b)
+{
+       return *(int *)a - *(int *)b;
+}
+
+static int __init test_sort_init(void)
+{
+       int *a, i, r = 1, err = -ENOMEM;
+
+       a = kmalloc_array(TEST_LEN, sizeof(*a), GFP_KERNEL);
+       if (!a)
+               return err;
+
+       for (i = 0; i < TEST_LEN; i++) {
+               r = (r * 725861) % 6599;
+               a[i] = r;
+       }
+
+       sort(a, TEST_LEN, sizeof(*a), cmpint, NULL);
+
+       err = -EINVAL;
+       for (i = 0; i < TEST_LEN-1; i++)
+               if (a[i] > a[i+1]) {
+                       pr_err("test has failed\n");
+                       goto exit;
+               }
+       err = 0;
+       pr_info("test passed\n");
+exit:
+       kfree(a);
+       return err;
+}
+subsys_initcall(test_sort_init);
index 0967771d8f7fd725972c74097bd324a8b39b5609..e3bf4e0f10b568ba3d74674642adeff013cb62c8 100644 (file)
@@ -1739,6 +1739,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
  * 'h', 'l', or 'L' for integer fields
  * 'z' support added 23/7/1999 S.H.
  * 'z' changed to 'Z' --davidm 1/25/99
+ * 'Z' changed to 'z' --adobriyan 2017-01-25
  * 't' added for ptrdiff_t
  *
  * @fmt: the format string
@@ -1838,7 +1839,7 @@ qualifier:
        /* get the conversion qualifier */
        qualifier = 0;
        if (*fmt == 'h' || _tolower(*fmt) == 'l' ||
-           _tolower(*fmt) == 'z' || *fmt == 't') {
+           *fmt == 'z' || *fmt == 't') {
                qualifier = *fmt++;
                if (unlikely(qualifier == *fmt)) {
                        if (qualifier == 'l') {
@@ -1907,7 +1908,7 @@ qualifier:
        else if (qualifier == 'l') {
                BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG);
                spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN);
-       } else if (_tolower(qualifier) == 'z') {
+       } else if (qualifier == 'z') {
                spec->type = FORMAT_TYPE_SIZE_T;
        } else if (qualifier == 't') {
                spec->type = FORMAT_TYPE_PTRDIFF;
@@ -2657,7 +2658,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
                /* get conversion qualifier */
                qualifier = -1;
                if (*fmt == 'h' || _tolower(*fmt) == 'l' ||
-                   _tolower(*fmt) == 'z') {
+                   *fmt == 'z') {
                        qualifier = *fmt++;
                        if (unlikely(qualifier == *fmt)) {
                                if (qualifier == 'h') {
@@ -2851,7 +2852,6 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
                        else
                                *va_arg(args, unsigned long long *) = val.u;
                        break;
-               case 'Z':
                case 'z':
                        *va_arg(args, size_t *) = val.u;
                        break;
index afcc550877fff9d81ef68b10fc28b1a21108f2c5..79d0fd13b5b3c1a826f472398fd60a1ae1cb5da6 100644 (file)
@@ -90,3 +90,9 @@ config DEBUG_PAGE_REF
          careful when enabling this feature because it adds about 30 KB to the
          kernel code.  However the runtime performance overhead is virtually
          nil until the tracepoints are actually enabled.
+
+config DEBUG_RODATA_TEST
+    bool "Testcase for the marking rodata read-only"
+    depends on STRICT_KERNEL_RWX
+    ---help---
+      This option enables a testcase for the setting rodata read-only.
index 433eaf9a876ed71b06a588a7fb08b28ce4254717..026f6a828a5023cdfa1b3239075a77c904409463 100644 (file)
@@ -23,8 +23,10 @@ KCOV_INSTRUMENT_vmstat.o := n
 
 mmu-y                  := nommu.o
 mmu-$(CONFIG_MMU)      := gup.o highmem.o memory.o mincore.o \
-                          mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
-                          vmalloc.o pagewalk.o pgtable-generic.o
+                          mlock.o mmap.o mprotect.o mremap.o msync.o \
+                          page_vma_mapped.o pagewalk.o pgtable-generic.o \
+                          rmap.o vmalloc.o
+
 
 ifdef CONFIG_CROSS_MEMORY_ATTACH
 mmu-$(CONFIG_MMU)      += process_vm_access.o
@@ -83,6 +85,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
+obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o
 obj-$(CONFIG_PAGE_OWNER) += page_owner.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
index 94b3460cd608aa26163ef84f99123d74c4a2d657..a6033e3444304c95adb7c392984cc6600684c7ec 100644 (file)
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -348,6 +348,32 @@ err:
        return ret;
 }
 
+#ifdef CONFIG_CMA_DEBUG
+static void cma_debug_show_areas(struct cma *cma)
+{
+       unsigned long next_zero_bit, next_set_bit;
+       unsigned long start = 0;
+       unsigned int nr_zero, nr_total = 0;
+
+       mutex_lock(&cma->lock);
+       pr_info("number of available pages: ");
+       for (;;) {
+               next_zero_bit = find_next_zero_bit(cma->bitmap, cma->count, start);
+               if (next_zero_bit >= cma->count)
+                       break;
+               next_set_bit = find_next_bit(cma->bitmap, cma->count, next_zero_bit);
+               nr_zero = next_set_bit - next_zero_bit;
+               pr_cont("%s%u@%lu", nr_total ? "+" : "", nr_zero, next_zero_bit);
+               nr_total += nr_zero;
+               start = next_zero_bit + nr_zero;
+       }
+       pr_cont("=> %u free of %lu total pages\n", nr_total, cma->count);
+       mutex_unlock(&cma->lock);
+}
+#else
+static inline void cma_debug_show_areas(struct cma *cma) { }
+#endif
+
 /**
  * cma_alloc() - allocate pages from contiguous area
  * @cma:   Contiguous memory region for which the allocation is performed.
@@ -357,14 +383,15 @@ err:
  * This function allocates part of contiguous memory on specific
  * contiguous memory area.
  */
-struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
+struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
+                      gfp_t gfp_mask)
 {
        unsigned long mask, offset;
        unsigned long pfn = -1;
        unsigned long start = 0;
        unsigned long bitmap_maxno, bitmap_no, bitmap_count;
        struct page *page = NULL;
-       int ret;
+       int ret = -ENOMEM;
 
        if (!cma || !cma->count)
                return NULL;
@@ -402,7 +429,8 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
 
                pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
                mutex_lock(&cma_mutex);
-               ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+               ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
+                                        gfp_mask);
                mutex_unlock(&cma_mutex);
                if (ret == 0) {
                        page = pfn_to_page(pfn);
@@ -421,6 +449,12 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
 
        trace_cma_alloc(pfn, page, count, align);
 
+       if (ret) {
+               pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n",
+                       __func__, count, ret);
+               cma_debug_show_areas(cma);
+       }
+
        pr_debug("%s(): returned %p\n", __func__, page);
        return page;
 }
index f8e4b60db167215862824637d856ffb34332f071..ffc0c3d0ae64a610409d85a5ac5704c2a660ab0c 100644 (file)
@@ -138,7 +138,7 @@ static int cma_alloc_mem(struct cma *cma, int count)
        if (!mem)
                return -ENOMEM;
 
-       p = cma_alloc(cma, count, 0);
+       p = cma_alloc(cma, count, 0, GFP_KERNEL);
        if (!p) {
                kfree(mem);
                return -ENOMEM;
index 0aa2757399ee00ace11a0492df09517fdea99f9e..0fdfde016ee283279b9a81c9b3e8f72f2ce3b353 100644 (file)
@@ -802,7 +802,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                                        locked = false;
                                }
 
-                               if (isolate_movable_page(page, isolate_mode))
+                               if (!isolate_movable_page(page, isolate_mode))
                                        goto isolate_success;
                        }
 
index abcbfe86c25ae12271d96418c5dfb4a592f334d8..4d90a64b2fdc8d50af4ef60278901a5b089b5735 100644 (file)
@@ -93,7 +93,7 @@ show_pools(struct device *dev, struct device_attribute *attr, char *buf)
                spin_unlock_irq(&pool->lock);
 
                /* per-pool info, no real statistics yet */
-               temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n",
+               temp = scnprintf(next, size, "%-16s %4u %4zu %4zu %2u\n",
                                 pool->name, blocks,
                                 pages * (pool->allocation / pool->size),
                                 pool->size, pages);
@@ -434,11 +434,11 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
                spin_unlock_irqrestore(&pool->lock, flags);
                if (pool->dev)
                        dev_err(pool->dev,
-                               "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
-                               pool->name, vaddr, (unsigned long long)dma);
+                               "dma_pool_free %s, %p (bad vaddr)/%pad\n",
+                               pool->name, vaddr, &dma);
                else
-                       pr_err("dma_pool_free %s, %p (bad vaddr)/%Lx\n",
-                              pool->name, vaddr, (unsigned long long)dma);
+                       pr_err("dma_pool_free %s, %p (bad vaddr)/%pad\n",
+                              pool->name, vaddr, &dma);
                return;
        }
        {
@@ -450,11 +450,11 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
                        }
                        spin_unlock_irqrestore(&pool->lock, flags);
                        if (pool->dev)
-                               dev_err(pool->dev, "dma_pool_free %s, dma %Lx already free\n",
-                                       pool->name, (unsigned long long)dma);
+                               dev_err(pool->dev, "dma_pool_free %s, dma %pad already free\n",
+                                       pool->name, &dma);
                        else
-                               pr_err("dma_pool_free %s, dma %Lx already free\n",
-                                      pool->name, (unsigned long long)dma);
+                               pr_err("dma_pool_free %s, dma %pad already free\n",
+                                      pool->name, &dma);
                        return;
                }
        }
index 416d563468a3abfa6649fe48717d073dede899ac..1944c631e3e660d6d06d72e8b23096d44d0183ea 100644 (file)
@@ -1008,9 +1008,12 @@ void page_endio(struct page *page, bool is_write, int err)
                unlock_page(page);
        } else {
                if (err) {
+                       struct address_space *mapping;
+
                        SetPageError(page);
-                       if (page->mapping)
-                               mapping_set_error(page->mapping, err);
+                       mapping = page_mapping(page);
+                       if (mapping)
+                               mapping_set_error(mapping, err);
                }
                end_page_writeback(page);
        }
@@ -2169,7 +2172,6 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
 
 /**
  * filemap_fault - read in file data for page fault handling
- * @vma:       vma in which the fault was taken
  * @vmf:       struct vm_fault containing details of the fault
  *
  * filemap_fault() is invoked via the vma operations vector for a
@@ -2191,10 +2193,10 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
  *
  * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
  */
-int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int filemap_fault(struct vm_fault *vmf)
 {
        int error;
-       struct file *file = vma->vm_file;
+       struct file *file = vmf->vma->vm_file;
        struct address_space *mapping = file->f_mapping;
        struct file_ra_state *ra = &file->f_ra;
        struct inode *inode = mapping->host;
@@ -2216,12 +2218,12 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                 * We found the page, so try async readahead before
                 * waiting for the lock.
                 */
-               do_async_mmap_readahead(vma, ra, file, page, offset);
+               do_async_mmap_readahead(vmf->vma, ra, file, page, offset);
        } else if (!page) {
                /* No page in the page cache at all */
-               do_sync_mmap_readahead(vma, ra, file, offset);
+               do_sync_mmap_readahead(vmf->vma, ra, file, offset);
                count_vm_event(PGMAJFAULT);
-               mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+               mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
                ret = VM_FAULT_MAJOR;
 retry_find:
                page = find_get_page(mapping, offset);
@@ -2229,7 +2231,7 @@ retry_find:
                        goto no_cached_page;
        }
 
-       if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
+       if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
                put_page(page);
                return ret | VM_FAULT_RETRY;
        }
@@ -2396,14 +2398,14 @@ next:
 }
 EXPORT_SYMBOL(filemap_map_pages);
 
-int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int filemap_page_mkwrite(struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
-       struct inode *inode = file_inode(vma->vm_file);
+       struct inode *inode = file_inode(vmf->vma->vm_file);
        int ret = VM_FAULT_LOCKED;
 
        sb_start_pagefault(inode->i_sb);
-       file_update_time(vma->vm_file);
+       file_update_time(vmf->vma->vm_file);
        lock_page(page);
        if (page->mapping != inode->i_mapping) {
                unlock_page(page);
index 40abe4c903834573d3249326b957553b72bd8eb9..94fab8fa432b2f46ccb0c0238a17cb750489896e 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -253,6 +253,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
                        return page;
                return no_page_table(vma, flags);
        }
+       if (pud_devmap(*pud)) {
+               ptl = pud_lock(mm, pud);
+               page = follow_devmap_pud(vma, address, pud, flags);
+               spin_unlock(ptl);
+               if (page)
+                       return page;
+       }
        if (unlikely(pud_bad(*pud)))
                return no_page_table(vma, flags);
 
@@ -265,8 +272,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
                        return page;
                return no_page_table(vma, flags);
        }
-       if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
-               return no_page_table(vma, flags);
        if (pmd_devmap(*pmd)) {
                ptl = pmd_lock(mm, pmd);
                page = follow_devmap_pmd(vma, address, pmd, flags);
@@ -277,6 +282,9 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
        if (likely(!pmd_trans_huge(*pmd)))
                return follow_page_pte(vma, address, pmd, flags);
 
+       if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
+               return no_page_table(vma, flags);
+
        ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_trans_huge(*pmd))) {
                spin_unlock(ptl);
index f9ecc2aeadfc5f202f29ad5d13acbf3e4d1a6617..71e3dede95b424fb57c5e5c44be3dd5b133fd71f 100644 (file)
@@ -757,6 +757,60 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
 
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
+{
+       if (likely(vma->vm_flags & VM_WRITE))
+               pud = pud_mkwrite(pud);
+       return pud;
+}
+
+static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+               pud_t *pud, pfn_t pfn, pgprot_t prot, bool write)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       pud_t entry;
+       spinlock_t *ptl;
+
+       ptl = pud_lock(mm, pud);
+       entry = pud_mkhuge(pfn_t_pud(pfn, prot));
+       if (pfn_t_devmap(pfn))
+               entry = pud_mkdevmap(entry);
+       if (write) {
+               entry = pud_mkyoung(pud_mkdirty(entry));
+               entry = maybe_pud_mkwrite(entry, vma);
+       }
+       set_pud_at(mm, addr, pud, entry);
+       update_mmu_cache_pud(vma, addr, pud);
+       spin_unlock(ptl);
+}
+
+int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+                       pud_t *pud, pfn_t pfn, bool write)
+{
+       pgprot_t pgprot = vma->vm_page_prot;
+       /*
+        * If we had pud_special, we could avoid all these restrictions,
+        * but we need to be consistent with PTEs and architectures that
+        * can't support a 'special' bit.
+        */
+       BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
+       BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
+                                               (VM_PFNMAP|VM_MIXEDMAP));
+       BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
+       BUG_ON(!pfn_t_devmap(pfn));
+
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
+       track_pfn_insert(vma, &pgprot, pfn);
+
+       insert_pfn_pud(vma, addr, pud, pfn, pgprot, write);
+       return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+
 static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
                pmd_t *pmd)
 {
@@ -887,6 +941,123 @@ out:
        return ret;
 }
 
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
+               pud_t *pud)
+{
+       pud_t _pud;
+
+       /*
+        * We should set the dirty bit only for FOLL_WRITE but for now
+        * the dirty bit in the pud is meaningless.  And if the dirty
+        * bit will become meaningful and we'll only set it with
+        * FOLL_WRITE, an atomic set_bit will be required on the pud to
+        * set the young bit, instead of the current set_pud_at.
+        */
+       _pud = pud_mkyoung(pud_mkdirty(*pud));
+       if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK,
+                               pud, _pud,  1))
+               update_mmu_cache_pud(vma, addr, pud);
+}
+
+struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+               pud_t *pud, int flags)
+{
+       unsigned long pfn = pud_pfn(*pud);
+       struct mm_struct *mm = vma->vm_mm;
+       struct dev_pagemap *pgmap;
+       struct page *page;
+
+       assert_spin_locked(pud_lockptr(mm, pud));
+
+       if (flags & FOLL_WRITE && !pud_write(*pud))
+               return NULL;
+
+       if (pud_present(*pud) && pud_devmap(*pud))
+               /* pass */;
+       else
+               return NULL;
+
+       if (flags & FOLL_TOUCH)
+               touch_pud(vma, addr, pud);
+
+       /*
+        * device mapped pages can only be returned if the
+        * caller will manage the page reference count.
+        */
+       if (!(flags & FOLL_GET))
+               return ERR_PTR(-EEXIST);
+
+       pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
+       pgmap = get_dev_pagemap(pfn, NULL);
+       if (!pgmap)
+               return ERR_PTR(-EFAULT);
+       page = pfn_to_page(pfn);
+       get_page(page);
+       put_dev_pagemap(pgmap);
+
+       return page;
+}
+
+int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+                 pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+                 struct vm_area_struct *vma)
+{
+       spinlock_t *dst_ptl, *src_ptl;
+       pud_t pud;
+       int ret;
+
+       dst_ptl = pud_lock(dst_mm, dst_pud);
+       src_ptl = pud_lockptr(src_mm, src_pud);
+       spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+
+       ret = -EAGAIN;
+       pud = *src_pud;
+       if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud)))
+               goto out_unlock;
+
+       /*
+        * When page table lock is held, the huge zero pud should not be
+        * under splitting since we don't split the page itself, only pud to
+        * a page table.
+        */
+       if (is_huge_zero_pud(pud)) {
+               /* No huge zero pud yet */
+       }
+
+       pudp_set_wrprotect(src_mm, addr, src_pud);
+       pud = pud_mkold(pud_wrprotect(pud));
+       set_pud_at(dst_mm, addr, dst_pud, pud);
+
+       ret = 0;
+out_unlock:
+       spin_unlock(src_ptl);
+       spin_unlock(dst_ptl);
+       return ret;
+}
+
+void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
+{
+       pud_t entry;
+       unsigned long haddr;
+       bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+       vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud);
+       if (unlikely(!pud_same(*vmf->pud, orig_pud)))
+               goto unlock;
+
+       entry = pud_mkyoung(orig_pud);
+       if (write)
+               entry = pud_mkdirty(entry);
+       haddr = vmf->address & HPAGE_PUD_MASK;
+       if (pudp_set_access_flags(vmf->vma, haddr, vmf->pud, entry, write))
+               update_mmu_cache_pud(vmf->vma, vmf->address, vmf->pud);
+
+unlock:
+       spin_unlock(vmf->ptl);
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+
 void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
 {
        pmd_t entry;
@@ -1255,7 +1426,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
        }
 
        /* See similar comment in do_numa_page for explanation */
-       if (!pmd_write(pmd))
+       if (!pmd_savedwrite(pmd))
                flags |= TNF_NO_GROUP;
 
        /*
@@ -1318,7 +1489,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
        goto out;
 clear_pmdnuma:
        BUG_ON(!PageLocked(page));
-       was_writable = pmd_write(pmd);
+       was_writable = pmd_savedwrite(pmd);
        pmd = pmd_modify(pmd, vma->vm_page_prot);
        pmd = pmd_mkyoung(pmd);
        if (was_writable)
@@ -1335,7 +1506,7 @@ out:
 
        if (page_nid != -1)
                task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
-                               vmf->flags);
+                               flags);
 
        return 0;
 }
@@ -1573,7 +1744,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                        entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
                        entry = pmd_modify(entry, newprot);
                        if (preserve_write)
-                               entry = pmd_mkwrite(entry);
+                               entry = pmd_mk_savedwrite(entry);
                        ret = HPAGE_PMD_NR;
                        set_pmd_at(mm, addr, pmd, entry);
                        BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
@@ -1601,6 +1772,84 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
        return NULL;
 }
 
+/*
+ * Returns true if a given pud maps a thp, false otherwise.
+ *
+ * Note that if it returns true, this routine returns without unlocking page
+ * table lock. So callers must unlock it.
+ */
+spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma)
+{
+       spinlock_t *ptl;
+
+       ptl = pud_lock(vma->vm_mm, pud);
+       if (likely(pud_trans_huge(*pud) || pud_devmap(*pud)))
+               return ptl;
+       spin_unlock(ptl);
+       return NULL;
+}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
+                pud_t *pud, unsigned long addr)
+{
+       pud_t orig_pud;
+       spinlock_t *ptl;
+
+       ptl = __pud_trans_huge_lock(pud, vma);
+       if (!ptl)
+               return 0;
+       /*
+        * For architectures like ppc64 we look at deposited pgtable
+        * when calling pudp_huge_get_and_clear. So do the
+        * pgtable_trans_huge_withdraw after finishing pudp related
+        * operations.
+        */
+       orig_pud = pudp_huge_get_and_clear_full(tlb->mm, addr, pud,
+                       tlb->fullmm);
+       tlb_remove_pud_tlb_entry(tlb, pud, addr);
+       if (vma_is_dax(vma)) {
+               spin_unlock(ptl);
+               /* No zero page support yet */
+       } else {
+               /* No support for anonymous PUD pages yet */
+               BUG();
+       }
+       return 1;
+}
+
+static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
+               unsigned long haddr)
+{
+       VM_BUG_ON(haddr & ~HPAGE_PUD_MASK);
+       VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
+       VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
+       VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud));
+
+       count_vm_event(THP_SPLIT_PMD);
+
+       pudp_huge_clear_flush_notify(vma, haddr, pud);
+}
+
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+               unsigned long address)
+{
+       spinlock_t *ptl;
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long haddr = address & HPAGE_PUD_MASK;
+
+       mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PUD_SIZE);
+       ptl = pud_lock(mm, pud);
+       if (unlikely(!pud_trans_huge(*pud) && !pud_devmap(*pud)))
+               goto out;
+       __split_huge_pud_locked(vma, pud, haddr);
+
+out:
+       spin_unlock(ptl);
+       mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE);
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+
 static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
                unsigned long haddr, pmd_t *pmd)
 {
@@ -1857,32 +2106,27 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 static void freeze_page(struct page *page)
 {
        enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
-               TTU_RMAP_LOCKED;
-       int i, ret;
+               TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+       int ret;
 
        VM_BUG_ON_PAGE(!PageHead(page), page);
 
        if (PageAnon(page))
                ttu_flags |= TTU_MIGRATION;
 
-       /* We only need TTU_SPLIT_HUGE_PMD once */
-       ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD);
-       for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
-               /* Cut short if the page is unmapped */
-               if (page_count(page) == 1)
-                       return;
-
-               ret = try_to_unmap(page + i, ttu_flags);
-       }
-       VM_BUG_ON_PAGE(ret, page + i - 1);
+       ret = try_to_unmap(page, ttu_flags);
+       VM_BUG_ON_PAGE(ret, page);
 }
 
 static void unfreeze_page(struct page *page)
 {
        int i;
-
-       for (i = 0; i < HPAGE_PMD_NR; i++)
-               remove_migration_ptes(page + i, page + i, true);
+       if (PageTransHuge(page)) {
+               remove_migration_ptes(page, page, true);
+       } else {
+               for (i = 0; i < HPAGE_PMD_NR; i++)
+                       remove_migration_ptes(page + i, page + i, true);
+       }
 }
 
 static void __split_huge_page_tail(struct page *head, int tail,
index 30e7709a5121981c6792dfe2294e8137adc1ece5..2e0e8159ce8e06652f43890ffe3d7f2e0a5665f9 100644 (file)
@@ -1052,7 +1052,8 @@ static int __alloc_gigantic_page(unsigned long start_pfn,
                                unsigned long nr_pages)
 {
        unsigned long end_pfn = start_pfn + nr_pages;
-       return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+       return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
+                                 GFP_KERNEL);
 }
 
 static bool pfn_range_valid_gigantic(struct zone *z,
@@ -3142,7 +3143,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
  * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
  * this far.
  */
-static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int hugetlb_vm_op_fault(struct vm_fault *vmf)
 {
        BUG();
        return 0;
index 8ab72f4374e046e604cc7c5f15049f960f1b864e..ccfc2a2969f4402bdbfb27e0b48df151f4da68b7 100644 (file)
@@ -335,12 +335,15 @@ __vma_address(struct page *page, struct vm_area_struct *vma)
 static inline unsigned long
 vma_address(struct page *page, struct vm_area_struct *vma)
 {
-       unsigned long address = __vma_address(page, vma);
+       unsigned long start, end;
+
+       start = __vma_address(page, vma);
+       end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
 
        /* page should be within @vma mapping range */
-       VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
+       VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
 
-       return address;
+       return max(start, vma->vm_start);
 }
 
 #else /* !CONFIG_MMU */
index b2a0cff2bb351ae94e4115c0c3920cd2ae0fbbca..25f0e6521f36c66e349804355aac536662e11257 100644 (file)
@@ -435,7 +435,7 @@ void kasan_cache_shrink(struct kmem_cache *cache)
        quarantine_remove_cache(cache);
 }
 
-void kasan_cache_destroy(struct kmem_cache *cache)
+void kasan_cache_shutdown(struct kmem_cache *cache)
 {
        quarantine_remove_cache(cache);
 }
index dae929c02bbb991cd0f50493a431af2ea6c36c29..6f1ed16308736918730ea836b5fecafc908e487b 100644 (file)
@@ -274,6 +274,7 @@ static void per_cpu_remove_cache(void *arg)
        qlist_free_all(&to_free, cache);
 }
 
+/* Free all quarantined objects belonging to cache. */
 void quarantine_remove_cache(struct kmem_cache *cache)
 {
        unsigned long flags, i;
index 77ae3239c3de17bfbf7ba29b56a5cb270611cfd8..34bce5c308e3b1ff005fb482fd73b10337c324a8 100644 (file)
@@ -420,7 +420,7 @@ int __khugepaged_enter(struct mm_struct *mm)
        list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head);
        spin_unlock(&khugepaged_mm_lock);
 
-       atomic_inc(&mm->mm_count);
+       mmgrab(mm);
        if (wakeup)
                wake_up_interruptible(&khugepaged_wait);
 
index 9ae6011a41f895d56942175814d1fe0158a591b7..520e4c37fec738c7cd72215486fa524fbee9e056 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -223,6 +223,12 @@ static unsigned int ksm_thread_pages_to_scan = 100;
 /* Milliseconds ksmd should sleep between batches */
 static unsigned int ksm_thread_sleep_millisecs = 20;
 
+/* Checksum of an empty (zeroed) page */
+static unsigned int zero_checksum __read_mostly;
+
+/* Whether to merge empty (zeroed) pages with actual zero pages */
+static bool ksm_use_zero_pages __read_mostly;
+
 #ifdef CONFIG_NUMA
 /* Zeroed when merging across nodes is not allowed */
 static unsigned int ksm_merge_across_nodes = 1;
@@ -850,33 +856,36 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
                              pte_t *orig_pte)
 {
        struct mm_struct *mm = vma->vm_mm;
-       unsigned long addr;
-       pte_t *ptep;
-       spinlock_t *ptl;
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+       };
        int swapped;
        int err = -EFAULT;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
 
-       addr = page_address_in_vma(page, vma);
-       if (addr == -EFAULT)
+       pvmw.address = page_address_in_vma(page, vma);
+       if (pvmw.address == -EFAULT)
                goto out;
 
        BUG_ON(PageTransCompound(page));
 
-       mmun_start = addr;
-       mmun_end   = addr + PAGE_SIZE;
+       mmun_start = pvmw.address;
+       mmun_end   = pvmw.address + PAGE_SIZE;
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 
-       ptep = page_check_address(page, mm, addr, &ptl, 0);
-       if (!ptep)
+       if (!page_vma_mapped_walk(&pvmw))
                goto out_mn;
+       if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
+               goto out_unlock;
 
-       if (pte_write(*ptep) || pte_dirty(*ptep)) {
+       if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
+           (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte))) {
                pte_t entry;
 
                swapped = PageSwapCache(page);
-               flush_cache_page(vma, addr, page_to_pfn(page));
+               flush_cache_page(vma, pvmw.address, page_to_pfn(page));
                /*
                 * Ok this is tricky, when get_user_pages_fast() run it doesn't
                 * take any lock, therefore the check that we are going to make
@@ -886,25 +895,29 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
                 * this assure us that no O_DIRECT can happen after the check
                 * or in the middle of the check.
                 */
-               entry = ptep_clear_flush_notify(vma, addr, ptep);
+               entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
                /*
                 * Check that no O_DIRECT or similar I/O is in progress on the
                 * page
                 */
                if (page_mapcount(page) + 1 + swapped != page_count(page)) {
-                       set_pte_at(mm, addr, ptep, entry);
+                       set_pte_at(mm, pvmw.address, pvmw.pte, entry);
                        goto out_unlock;
                }
                if (pte_dirty(entry))
                        set_page_dirty(page);
-               entry = pte_mkclean(pte_wrprotect(entry));
-               set_pte_at_notify(mm, addr, ptep, entry);
+
+               if (pte_protnone(entry))
+                       entry = pte_mkclean(pte_clear_savedwrite(entry));
+               else
+                       entry = pte_mkclean(pte_wrprotect(entry));
+               set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
        }
-       *orig_pte = *ptep;
+       *orig_pte = *pvmw.pte;
        err = 0;
 
 out_unlock:
-       pte_unmap_unlock(ptep, ptl);
+       page_vma_mapped_walk_done(&pvmw);
 out_mn:
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 out:
@@ -926,6 +939,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
        struct mm_struct *mm = vma->vm_mm;
        pmd_t *pmd;
        pte_t *ptep;
+       pte_t newpte;
        spinlock_t *ptl;
        unsigned long addr;
        int err = -EFAULT;
@@ -950,12 +964,22 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
                goto out_mn;
        }
 
-       get_page(kpage);
-       page_add_anon_rmap(kpage, vma, addr, false);
+       /*
+        * No need to check ksm_use_zero_pages here: we can only have a
+        * zero_page here if ksm_use_zero_pages was enabled alreaady.
+        */
+       if (!is_zero_pfn(page_to_pfn(kpage))) {
+               get_page(kpage);
+               page_add_anon_rmap(kpage, vma, addr, false);
+               newpte = mk_pte(kpage, vma->vm_page_prot);
+       } else {
+               newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
+                                              vma->vm_page_prot));
+       }
 
        flush_cache_page(vma, addr, pte_pfn(*ptep));
        ptep_clear_flush_notify(vma, addr, ptep);
-       set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+       set_pte_at_notify(mm, addr, ptep, newpte);
 
        page_remove_rmap(page, false);
        if (!page_mapped(page))
@@ -1467,6 +1491,23 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
                return;
        }
 
+       /*
+        * Same checksum as an empty page. We attempt to merge it with the
+        * appropriate zero page if the user enabled this via sysfs.
+        */
+       if (ksm_use_zero_pages && (checksum == zero_checksum)) {
+               struct vm_area_struct *vma;
+
+               vma = find_mergeable_vma(rmap_item->mm, rmap_item->address);
+               err = try_to_merge_one_page(vma, page,
+                                           ZERO_PAGE(rmap_item->address));
+               /*
+                * In case of failure, the page was not really empty, so we
+                * need to continue. Otherwise we're done.
+                */
+               if (!err)
+                       return;
+       }
        tree_rmap_item =
                unstable_tree_search_insert(rmap_item, page, &tree_page);
        if (tree_rmap_item) {
@@ -1813,7 +1854,7 @@ int __ksm_enter(struct mm_struct *mm)
        spin_unlock(&ksm_mmlist_lock);
 
        set_bit(MMF_VM_MERGEABLE, &mm->flags);
-       atomic_inc(&mm->mm_count);
+       mmgrab(mm);
 
        if (needs_wakeup)
                wake_up_interruptible(&ksm_thread_wait);
@@ -2233,6 +2274,28 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
 KSM_ATTR(merge_across_nodes);
 #endif
 
+static ssize_t use_zero_pages_show(struct kobject *kobj,
+                               struct kobj_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%u\n", ksm_use_zero_pages);
+}
+static ssize_t use_zero_pages_store(struct kobject *kobj,
+                                  struct kobj_attribute *attr,
+                                  const char *buf, size_t count)
+{
+       int err;
+       bool value;
+
+       err = kstrtobool(buf, &value);
+       if (err)
+               return -EINVAL;
+
+       ksm_use_zero_pages = value;
+
+       return count;
+}
+KSM_ATTR(use_zero_pages);
+
 static ssize_t pages_shared_show(struct kobject *kobj,
                                 struct kobj_attribute *attr, char *buf)
 {
@@ -2290,6 +2353,7 @@ static struct attribute *ksm_attrs[] = {
 #ifdef CONFIG_NUMA
        &merge_across_nodes_attr.attr,
 #endif
+       &use_zero_pages_attr.attr,
        NULL,
 };
 
@@ -2304,6 +2368,11 @@ static int __init ksm_init(void)
        struct task_struct *ksm_thread;
        int err;
 
+       /* The correct value depends on page size and endianness */
+       zero_checksum = calc_checksum(ZERO_PAGE(0));
+       /* Default to false for backwards compatibility */
+       ksm_use_zero_pages = false;
+
        err = ksm_slab_init();
        if (err)
                goto out;
index b530a4986035bac08c12c996600f19fddb597acb..dc5927c812d3d1f9a209fbdbea3a36a61cbde17d 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/backing-dev.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/shmem_fs.h>
 #include <linux/mmu_notifier.h>
 
 #include <asm/tlb.h>
@@ -92,14 +93,28 @@ static long madvise_behavior(struct vm_area_struct *vma,
        case MADV_MERGEABLE:
        case MADV_UNMERGEABLE:
                error = ksm_madvise(vma, start, end, behavior, &new_flags);
-               if (error)
+               if (error) {
+                       /*
+                        * madvise() returns EAGAIN if kernel resources, such as
+                        * slab, are temporarily unavailable.
+                        */
+                       if (error == -ENOMEM)
+                               error = -EAGAIN;
                        goto out;
+               }
                break;
        case MADV_HUGEPAGE:
        case MADV_NOHUGEPAGE:
                error = hugepage_madvise(vma, &new_flags, behavior);
-               if (error)
+               if (error) {
+                       /*
+                        * madvise() returns EAGAIN if kernel resources, such as
+                        * slab, are temporarily unavailable.
+                        */
+                       if (error == -ENOMEM)
+                               error = -EAGAIN;
                        goto out;
+               }
                break;
        }
 
@@ -120,15 +135,37 @@ static long madvise_behavior(struct vm_area_struct *vma,
        *prev = vma;
 
        if (start != vma->vm_start) {
-               error = split_vma(mm, vma, start, 1);
-               if (error)
+               if (unlikely(mm->map_count >= sysctl_max_map_count)) {
+                       error = -ENOMEM;
                        goto out;
+               }
+               error = __split_vma(mm, vma, start, 1);
+               if (error) {
+                       /*
+                        * madvise() returns EAGAIN if kernel resources, such as
+                        * slab, are temporarily unavailable.
+                        */
+                       if (error == -ENOMEM)
+                               error = -EAGAIN;
+                       goto out;
+               }
        }
 
        if (end != vma->vm_end) {
-               error = split_vma(mm, vma, end, 0);
-               if (error)
+               if (unlikely(mm->map_count >= sysctl_max_map_count)) {
+                       error = -ENOMEM;
+                       goto out;
+               }
+               error = __split_vma(mm, vma, end, 0);
+               if (error) {
+                       /*
+                        * madvise() returns EAGAIN if kernel resources, such as
+                        * slab, are temporarily unavailable.
+                        */
+                       if (error == -ENOMEM)
+                               error = -EAGAIN;
                        goto out;
+               }
        }
 
 success:
@@ -136,10 +173,7 @@ success:
         * vm_flags is protected by the mmap_sem held in write mode.
         */
        vma->vm_flags = new_flags;
-
 out:
-       if (error == -ENOMEM)
-               error = -EAGAIN;
        return error;
 }
 
@@ -479,7 +513,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
        if (!can_madv_dontneed_vma(vma))
                return -EINVAL;
 
-       madvise_userfault_dontneed(vma, prev, start, end);
+       userfaultfd_remove(vma, prev, start, end);
        zap_page_range(vma, start, end - start);
        return 0;
 }
@@ -520,6 +554,7 @@ static long madvise_remove(struct vm_area_struct *vma,
         * mmap_sem.
         */
        get_file(f);
+       userfaultfd_remove(vma, prev, start, end);
        up_read(&current->mm->mmap_sem);
        error = vfs_fallocate(f,
                                FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
index c004f52be419be1c53ab08c14313906001c19aca..b64b47803e529a87d87f3e3f022e97f17ff606be 100644 (file)
@@ -35,15 +35,18 @@ struct memblock memblock __initdata_memblock = {
        .memory.regions         = memblock_memory_init_regions,
        .memory.cnt             = 1,    /* empty dummy entry */
        .memory.max             = INIT_MEMBLOCK_REGIONS,
+       .memory.name            = "memory",
 
        .reserved.regions       = memblock_reserved_init_regions,
        .reserved.cnt           = 1,    /* empty dummy entry */
        .reserved.max           = INIT_MEMBLOCK_REGIONS,
+       .reserved.name          = "reserved",
 
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
        .physmem.regions        = memblock_physmem_init_regions,
        .physmem.cnt            = 1,    /* empty dummy entry */
        .physmem.max            = INIT_PHYSMEM_REGIONS,
+       .physmem.name           = "physmem",
 #endif
 
        .bottom_up              = false,
@@ -64,18 +67,6 @@ ulong __init_memblock choose_memblock_flags(void)
        return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
 }
 
-/* inline so we don't get a warning when pr_debug is compiled out */
-static __init_memblock const char *
-memblock_type_name(struct memblock_type *type)
-{
-       if (type == &memblock.memory)
-               return "memory";
-       else if (type == &memblock.reserved)
-               return "reserved";
-       else
-               return "unknown";
-}
-
 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
 {
@@ -402,12 +393,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
        }
        if (!addr) {
                pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
-                      memblock_type_name(type), type->max, type->max * 2);
+                      type->name, type->max, type->max * 2);
                return -1;
        }
 
        memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]",
-                       memblock_type_name(type), type->max * 2, (u64)addr,
+                       type->name, type->max * 2, (u64)addr,
                        (u64)addr + new_size - 1);
 
        /*
@@ -1693,14 +1684,14 @@ phys_addr_t __init_memblock memblock_get_current_limit(void)
        return memblock.current_limit;
 }
 
-static void __init_memblock memblock_dump(struct memblock_type *type, char *name)
+static void __init_memblock memblock_dump(struct memblock_type *type)
 {
        phys_addr_t base, end, size;
        unsigned long flags;
        int idx;
        struct memblock_region *rgn;
 
-       pr_info(" %s.cnt  = 0x%lx\n", name, type->cnt);
+       pr_info(" %s.cnt  = 0x%lx\n", type->name, type->cnt);
 
        for_each_memblock_type(type, rgn) {
                char nid_buf[32] = "";
@@ -1715,7 +1706,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name
                                 memblock_get_region_node(rgn));
 #endif
                pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#lx\n",
-                       name, idx, &base, &end, &size, nid_buf, flags);
+                       type->name, idx, &base, &end, &size, nid_buf, flags);
        }
 }
 
@@ -1726,8 +1717,11 @@ void __init_memblock __memblock_dump_all(void)
                &memblock.memory.total_size,
                &memblock.reserved.total_size);
 
-       memblock_dump(&memblock.memory, "memory");
-       memblock_dump(&memblock.reserved, "reserved");
+       memblock_dump(&memblock.memory);
+       memblock_dump(&memblock.reserved);
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+       memblock_dump(&memblock.physmem);
+#endif
 }
 
 void __init memblock_allow_resize(void)
index 1fd6affcdde7cfdff2c768f99ead14c62624371f..45867e439d31d7f9836769463cad10b170180558 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
 #include <linux/mm.h>
+#include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/smp.h>
index f283c7e0a2a302c617c03a3aebf60e262b94a895..3d0f2fd4bf73fee7b5cf320cd1d901e8932bb7ab 100644 (file)
@@ -1527,7 +1527,8 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
 {
        int ret = __get_any_page(page, pfn, flags);
 
-       if (ret == 1 && !PageHuge(page) && !PageLRU(page)) {
+       if (ret == 1 && !PageHuge(page) &&
+           !PageLRU(page) && !__PageMovable(page)) {
                /*
                 * Try to free it.
                 */
@@ -1649,7 +1650,10 @@ static int __soft_offline_page(struct page *page, int flags)
         * Try to migrate to a new page instead. migrate.c
         * handles a large number of cases for us.
         */
-       ret = isolate_lru_page(page);
+       if (PageLRU(page))
+               ret = isolate_lru_page(page);
+       else
+               ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
        /*
         * Drop page reference which is came from get_any_page()
         * successful isolate_lru_page() already took another one.
@@ -1657,18 +1661,20 @@ static int __soft_offline_page(struct page *page, int flags)
        put_hwpoison_page(page);
        if (!ret) {
                LIST_HEAD(pagelist);
-               inc_node_page_state(page, NR_ISOLATED_ANON +
-                                       page_is_file_cache(page));
+               /*
+                * After isolated lru page, the PageLRU will be cleared,
+                * so use !__PageMovable instead for LRU page's mapping
+                * cannot have PAGE_MAPPING_MOVABLE.
+                */
+               if (!__PageMovable(page))
+                       inc_node_page_state(page, NR_ISOLATED_ANON +
+                                               page_is_file_cache(page));
                list_add(&page->lru, &pagelist);
                ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
                                        MIGRATE_SYNC, MR_MEMORY_FAILURE);
                if (ret) {
-                       if (!list_empty(&pagelist)) {
-                               list_del(&page->lru);
-                               dec_node_page_state(page, NR_ISOLATED_ANON +
-                                               page_is_file_cache(page));
-                               putback_lru_page(page);
-                       }
+                       if (!list_empty(&pagelist))
+                               putback_movable_pages(&pagelist);
 
                        pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
                                pfn, ret, page->flags);
index 7663068a33c6674a4e9ea1de34f02dc936316c24..14fc0b40f0bb6cf3ee50cfff8e7db865ad442cdd 100644 (file)
@@ -30,7 +30,7 @@
 
 /*
  * 05.04.94  -  Multi-page memory management added for v1.1.
- *             Idea by Alex Bligh (alex@cconcepts.co.uk)
+ *              Idea by Alex Bligh (alex@cconcepts.co.uk)
  *
  * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
  *             (Gerhard.Wichert@pdb.siemens.de)
@@ -82,9 +82,9 @@
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
-struct page *mem_map;
-
 EXPORT_SYMBOL(max_mapnr);
+
+struct page *mem_map;
 EXPORT_SYMBOL(mem_map);
 #endif
 
@@ -95,8 +95,7 @@ EXPORT_SYMBOL(mem_map);
  * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL
  * and ZONE_HIGHMEM.
  */
-void * high_memory;
-
+void *high_memory;
 EXPORT_SYMBOL(high_memory);
 
 /*
@@ -120,10 +119,10 @@ static int __init disable_randmaps(char *s)
 __setup("norandmaps", disable_randmaps);
 
 unsigned long zero_pfn __read_mostly;
-unsigned long highest_memmap_pfn __read_mostly;
-
 EXPORT_SYMBOL(zero_pfn);
 
+unsigned long highest_memmap_pfn __read_mostly;
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */
@@ -556,7 +555,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
                if (is_vm_hugetlb_page(vma)) {
                        hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
-                               floor, next? next->vm_start: ceiling);
+                               floor, next ? next->vm_start : ceiling);
                } else {
                        /*
                         * Optimization: gather nearby vmas into one call down
@@ -569,7 +568,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                                unlink_file_vma(vma);
                        }
                        free_pgd_range(tlb, addr, vma->vm_end,
-                               floor, next? next->vm_start: ceiling);
+                               floor, next ? next->vm_start : ceiling);
                }
                vma = next;
        }
@@ -1001,7 +1000,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
                next = pmd_addr_end(addr, end);
                if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) {
                        int err;
-                       VM_BUG_ON(next-addr != HPAGE_PMD_SIZE);
+                       VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, vma);
                        err = copy_huge_pmd(dst_mm, src_mm,
                                            dst_pmd, src_pmd, addr, vma);
                        if (err == -ENOMEM)
@@ -1032,6 +1031,18 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
        src_pud = pud_offset(src_pgd, addr);
        do {
                next = pud_addr_end(addr, end);
+               if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
+                       int err;
+
+                       VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, vma);
+                       err = copy_huge_pud(dst_mm, src_mm,
+                                           dst_pud, src_pud, addr, vma);
+                       if (err == -ENOMEM)
+                               return -ENOMEM;
+                       if (!err)
+                               continue;
+                       /* fall through */
+               }
                if (pud_none_or_clear_bad(src_pud))
                        continue;
                if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
@@ -1129,9 +1140,8 @@ again:
        arch_enter_lazy_mmu_mode();
        do {
                pte_t ptent = *pte;
-               if (pte_none(ptent)) {
+               if (pte_none(ptent))
                        continue;
-               }
 
                if (pte_present(ptent)) {
                        struct page *page;
@@ -1263,9 +1273,19 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
        pud = pud_offset(pgd, addr);
        do {
                next = pud_addr_end(addr, end);
+               if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
+                       if (next - addr != HPAGE_PUD_SIZE) {
+                               VM_BUG_ON_VMA(!rwsem_is_locked(&tlb->mm->mmap_sem), vma);
+                               split_huge_pud(vma, pud, addr);
+                       } else if (zap_huge_pud(tlb, vma, pud, addr))
+                               goto next;
+                       /* fall through */
+               }
                if (pud_none_or_clear_bad(pud))
                        continue;
                next = zap_pmd_range(tlb, vma, pud, addr, next, details);
+next:
+               cond_resched();
        } while (pud++, addr = next, addr != end);
 
        return addr;
@@ -1441,10 +1461,10 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
 pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
                        spinlock_t **ptl)
 {
-       pgd_t * pgd = pgd_offset(mm, addr);
-       pud_t * pud = pud_alloc(mm, pgd, addr);
+       pgd_t *pgd = pgd_offset(mm, addr);
+       pud_t *pud = pud_alloc(mm, pgd, addr);
        if (pud) {
-               pmd_t * pmd = pmd_alloc(mm, pud, addr);
+               pmd_t *pmd = pmd_alloc(mm, pud, addr);
                if (pmd) {
                        VM_BUG_ON(pmd_trans_huge(*pmd));
                        return pte_alloc_map_lock(mm, pmd, addr, ptl);
@@ -2035,7 +2055,7 @@ static int do_page_mkwrite(struct vm_fault *vmf)
 
        vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
 
-       ret = vmf->vma->vm_ops->page_mkwrite(vmf->vma, vmf);
+       ret = vmf->vma->vm_ops->page_mkwrite(vmf);
        /* Restore original flags so that caller is not surprised */
        vmf->flags = old_flags;
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
@@ -2307,7 +2327,7 @@ static int wp_pfn_shared(struct vm_fault *vmf)
 
                pte_unmap_unlock(vmf->pte, vmf->ptl);
                vmf->flags |= FAULT_FLAG_MKWRITE;
-               ret = vma->vm_ops->pfn_mkwrite(vma, vmf);
+               ret = vma->vm_ops->pfn_mkwrite(vmf);
                if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
                        return ret;
                return finish_mkwrite_fault(vmf);
@@ -2503,7 +2523,7 @@ void unmap_mapping_range(struct address_space *mapping,
                        hlen = ULONG_MAX - hba + 1;
        }
 
-       details.check_mapping = even_cows? NULL: mapping;
+       details.check_mapping = even_cows ? NULL : mapping;
        details.first_index = hba;
        details.last_index = hba + hlen - 1;
        if (details.last_index < details.first_index)
@@ -2861,7 +2881,7 @@ static int __do_fault(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        int ret;
 
-       ret = vma->vm_ops->fault(vma, vmf);
+       ret = vma->vm_ops->fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
                            VM_FAULT_DONE_COW)))
                return ret;
@@ -2898,7 +2918,7 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
                atomic_long_inc(&vma->vm_mm->nr_ptes);
                pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
                spin_unlock(vmf->ptl);
-               vmf->prealloc_pte = 0;
+               vmf->prealloc_pte = NULL;
        } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
                return VM_FAULT_OOM;
        }
@@ -2946,7 +2966,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
         * count that as nr_ptes.
         */
        atomic_long_inc(&vma->vm_mm->nr_ptes);
-       vmf->prealloc_pte = 0;
+       vmf->prealloc_pte = NULL;
 }
 
 static int do_set_pmd(struct vm_fault *vmf, struct page *page)
@@ -3352,7 +3372,7 @@ static int do_fault(struct vm_fault *vmf)
        /* preallocated pagetable is unused: free it */
        if (vmf->prealloc_pte) {
                pte_free(vma->vm_mm, vmf->prealloc_pte);
-               vmf->prealloc_pte = 0;
+               vmf->prealloc_pte = NULL;
        }
        return ret;
 }
@@ -3380,32 +3400,32 @@ static int do_numa_page(struct vm_fault *vmf)
        int last_cpupid;
        int target_nid;
        bool migrated = false;
-       pte_t pte = vmf->orig_pte;
-       bool was_writable = pte_write(pte);
+       pte_t pte;
+       bool was_writable = pte_savedwrite(vmf->orig_pte);
        int flags = 0;
 
        /*
-       * The "pte" at this point cannot be used safely without
-       * validation through pte_unmap_same(). It's of NUMA type but
-       * the pfn may be screwed if the read is non atomic.
-       *
-       * We can safely just do a "set_pte_at()", because the old
-       * page table entry is not accessible, so there would be no
-       * concurrent hardware modifications to the PTE.
-       */
+        * The "pte" at this point cannot be used safely without
+        * validation through pte_unmap_same(). It's of NUMA type but
+        * the pfn may be screwed if the read is non atomic.
+        */
        vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd);
        spin_lock(vmf->ptl);
-       if (unlikely(!pte_same(*vmf->pte, pte))) {
+       if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
                pte_unmap_unlock(vmf->pte, vmf->ptl);
                goto out;
        }
 
-       /* Make it present again */
+       /*
+        * Make it present again, Depending on how arch implementes non
+        * accessible ptes, some can allow access by kernel mode.
+        */
+       pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte);
        pte = pte_modify(pte, vma->vm_page_prot);
        pte = pte_mkyoung(pte);
        if (was_writable)
                pte = pte_mkwrite(pte);
-       set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+       ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte);
        update_mmu_cache(vma, vmf->address, vmf->pte);
 
        page = vm_normal_page(vma, vmf->address, pte);
@@ -3466,8 +3486,8 @@ static int create_huge_pmd(struct vm_fault *vmf)
 {
        if (vma_is_anonymous(vmf->vma))
                return do_huge_pmd_anonymous_page(vmf);
-       if (vmf->vma->vm_ops->pmd_fault)
-               return vmf->vma->vm_ops->pmd_fault(vmf);
+       if (vmf->vma->vm_ops->huge_fault)
+               return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
        return VM_FAULT_FALLBACK;
 }
 
@@ -3475,8 +3495,8 @@ static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
 {
        if (vma_is_anonymous(vmf->vma))
                return do_huge_pmd_wp_page(vmf, orig_pmd);
-       if (vmf->vma->vm_ops->pmd_fault)
-               return vmf->vma->vm_ops->pmd_fault(vmf);
+       if (vmf->vma->vm_ops->huge_fault)
+               return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
 
        /* COW handled on pte level: split pmd */
        VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
@@ -3490,6 +3510,30 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
        return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
 }
 
+static int create_huge_pud(struct vm_fault *vmf)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       /* No support for anonymous transparent PUD pages yet */
+       if (vma_is_anonymous(vmf->vma))
+               return VM_FAULT_FALLBACK;
+       if (vmf->vma->vm_ops->huge_fault)
+               return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+       return VM_FAULT_FALLBACK;
+}
+
+static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       /* No support for anonymous transparent PUD pages yet */
+       if (vma_is_anonymous(vmf->vma))
+               return VM_FAULT_FALLBACK;
+       if (vmf->vma->vm_ops->huge_fault)
+               return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+       return VM_FAULT_FALLBACK;
+}
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
@@ -3605,22 +3649,46 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
        };
        struct mm_struct *mm = vma->vm_mm;
        pgd_t *pgd;
-       pud_t *pud;
+       int ret;
 
        pgd = pgd_offset(mm, address);
-       pud = pud_alloc(mm, pgd, address);
-       if (!pud)
+
+       vmf.pud = pud_alloc(mm, pgd, address);
+       if (!vmf.pud)
                return VM_FAULT_OOM;
-       vmf.pmd = pmd_alloc(mm, pud, address);
+       if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
+               ret = create_huge_pud(&vmf);
+               if (!(ret & VM_FAULT_FALLBACK))
+                       return ret;
+       } else {
+               pud_t orig_pud = *vmf.pud;
+
+               barrier();
+               if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {
+                       unsigned int dirty = flags & FAULT_FLAG_WRITE;
+
+                       /* NUMA case for anonymous PUDs would go here */
+
+                       if (dirty && !pud_write(orig_pud)) {
+                               ret = wp_huge_pud(&vmf, orig_pud);
+                               if (!(ret & VM_FAULT_FALLBACK))
+                                       return ret;
+                       } else {
+                               huge_pud_set_accessed(&vmf, orig_pud);
+                               return 0;
+                       }
+               }
+       }
+
+       vmf.pmd = pmd_alloc(mm, vmf.pud, address);
        if (!vmf.pmd)
                return VM_FAULT_OOM;
        if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) {
-               int ret = create_huge_pmd(&vmf);
+               ret = create_huge_pmd(&vmf);
                if (!(ret & VM_FAULT_FALLBACK))
                        return ret;
        } else {
                pmd_t orig_pmd = *vmf.pmd;
-               int ret;
 
                barrier();
                if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
@@ -3680,14 +3748,14 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 
        if (flags & FAULT_FLAG_USER) {
                mem_cgroup_oom_disable();
-                /*
-                 * The task may have entered a memcg OOM situation but
-                 * if the allocation error was handled gracefully (no
-                 * VM_FAULT_OOM), there is no need to kill anything.
-                 * Just clean up the OOM state peacefully.
-                 */
-                if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
-                        mem_cgroup_oom_synchronize(false);
+               /*
+                * The task may have entered a memcg OOM situation but
+                * if the allocation error was handled gracefully (no
+                * VM_FAULT_OOM), there is no need to kill anything.
+                * Just clean up the OOM state peacefully.
+                */
+               if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
+                       mem_cgroup_oom_synchronize(false);
        }
 
        /*
@@ -3737,13 +3805,14 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
  */
 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 {
+       spinlock_t *ptl;
        pmd_t *new = pmd_alloc_one(mm, address);
        if (!new)
                return -ENOMEM;
 
        smp_wmb(); /* See comment in __pte_alloc */
 
-       spin_lock(&mm->page_table_lock);
+       ptl = pud_lock(mm, pud);
 #ifndef __ARCH_HAS_4LEVEL_HACK
        if (!pud_present(*pud)) {
                mm_inc_nr_pmds(mm);
@@ -3757,7 +3826,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
        } else /* Another has populated it */
                pmd_free(mm, new);
 #endif /* __ARCH_HAS_4LEVEL_HACK */
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
        return 0;
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
index d67787d10ff0e9c4e068beb819daf4761947be04..1d3ed58f92abe199644399d0fe59da3f7906dcfb 100644 (file)
@@ -126,6 +126,8 @@ void put_online_mems(void)
 
 void mem_hotplug_begin(void)
 {
+       assert_held_device_hotplug();
+
        mem_hotplug.active_writer = current;
 
        memhp_lock_acquire();
@@ -862,7 +864,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(__remove_pages);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 int set_online_page_callback(online_page_callback_t callback)
@@ -1336,7 +1337,7 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
 
 static int online_memory_block(struct memory_block *mem, void *arg)
 {
-       return memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
+       return device_online(&mem->dev);
 }
 
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
@@ -1508,7 +1509,7 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
                        while ((i < MAX_ORDER_NR_PAGES) &&
                                !pfn_valid_within(pfn + i))
                                i++;
-                       if (i == MAX_ORDER_NR_PAGES)
+                       if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
                                continue;
                        page = pfn_to_page(pfn + i);
                        if (zone && page_zone(page) != zone)
@@ -1522,7 +1523,7 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
 
        if (zone) {
                *valid_start = start;
-               *valid_end = end;
+               *valid_end = min(end, end_pfn);
                return 1;
        } else {
                return 0;
@@ -1530,10 +1531,10 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
 }
 
 /*
- * Scan pfn range [start,end) to find movable/migratable pages (LRU pages
- * and hugepages). We scan pfn because it's much easier than scanning over
- * linked list. This function returns the pfn of the first found movable
- * page if it's found, otherwise 0.
+ * Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
+ * non-lru movable pages and hugepages). We scan pfn because it's much
+ * easier than scanning over linked list. This function returns the pfn
+ * of the first found movable page if it's found, otherwise 0.
  */
 static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 {
@@ -1544,6 +1545,8 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
                        page = pfn_to_page(pfn);
                        if (PageLRU(page))
                                return pfn;
+                       if (__PageMovable(page))
+                               return pfn;
                        if (PageHuge(page)) {
                                if (page_huge_active(page))
                                        return pfn;
@@ -1620,21 +1623,25 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                if (!get_page_unless_zero(page))
                        continue;
                /*
-                * We can skip free pages. And we can only deal with pages on
-                * LRU.
+                * We can skip free pages. And we can deal with pages on
+                * LRU and non-lru movable pages.
                 */
-               ret = isolate_lru_page(page);
+               if (PageLRU(page))
+                       ret = isolate_lru_page(page);
+               else
+                       ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
                if (!ret) { /* Success */
                        put_page(page);
                        list_add_tail(&page->lru, &source);
                        move_pages--;
-                       inc_node_page_state(page, NR_ISOLATED_ANON +
-                                           page_is_file_cache(page));
+                       if (!__PageMovable(page))
+                               inc_node_page_state(page, NR_ISOLATED_ANON +
+                                                   page_is_file_cache(page));
 
                } else {
 #ifdef CONFIG_DEBUG_VM
-                       pr_alert("removing pfn %lx from LRU failed\n", pfn);
-                       dump_page(page, "failed to remove from LRU");
+                       pr_alert("failed to isolate pfn %lx\n", pfn);
+                       dump_page(page, "isolation failed");
 #endif
                        put_page(page);
                        /* Because we don't have big zone->lock. we should
index 87f4d0f818194ea68cd323b1bcf84dd430b826a3..2c63ac06791bbdf0e382e669812fcca67bf3cc55 100644 (file)
@@ -74,7 +74,7 @@ int migrate_prep_local(void)
        return 0;
 }
 
-bool isolate_movable_page(struct page *page, isolate_mode_t mode)
+int isolate_movable_page(struct page *page, isolate_mode_t mode)
 {
        struct address_space *mapping;
 
@@ -125,14 +125,14 @@ bool isolate_movable_page(struct page *page, isolate_mode_t mode)
        __SetPageIsolated(page);
        unlock_page(page);
 
-       return true;
+       return 0;
 
 out_no_isolated:
        unlock_page(page);
 out_putpage:
        put_page(page);
 out:
-       return false;
+       return -EBUSY;
 }
 
 /* It should be called on page which is PG_movable */
@@ -193,82 +193,62 @@ void putback_movable_pages(struct list_head *l)
 /*
  * Restore a potential migration pte to a working pte entry
  */
-static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
+static int remove_migration_pte(struct page *page, struct vm_area_struct *vma,
                                 unsigned long addr, void *old)
 {
-       struct mm_struct *mm = vma->vm_mm;
+       struct page_vma_mapped_walk pvmw = {
+               .page = old,
+               .vma = vma,
+               .address = addr,
+               .flags = PVMW_SYNC | PVMW_MIGRATION,
+       };
+       struct page *new;
+       pte_t pte;
        swp_entry_t entry;
-       pmd_t *pmd;
-       pte_t *ptep, pte;
-       spinlock_t *ptl;
 
-       if (unlikely(PageHuge(new))) {
-               ptep = huge_pte_offset(mm, addr);
-               if (!ptep)
-                       goto out;
-               ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
-       } else {
-               pmd = mm_find_pmd(mm, addr);
-               if (!pmd)
-                       goto out;
+       VM_BUG_ON_PAGE(PageTail(page), page);
+       while (page_vma_mapped_walk(&pvmw)) {
+               new = page - pvmw.page->index +
+                       linear_page_index(vma, pvmw.address);
 
-               ptep = pte_offset_map(pmd, addr);
+               get_page(new);
+               pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
+               if (pte_swp_soft_dirty(*pvmw.pte))
+                       pte = pte_mksoft_dirty(pte);
 
                /*
-                * Peek to check is_swap_pte() before taking ptlock?  No, we
-                * can race mremap's move_ptes(), which skips anon_vma lock.
+                * Recheck VMA as permissions can change since migration started
                 */
-
-               ptl = pte_lockptr(mm, pmd);
-       }
-
-       spin_lock(ptl);
-       pte = *ptep;
-       if (!is_swap_pte(pte))
-               goto unlock;
-
-       entry = pte_to_swp_entry(pte);
-
-       if (!is_migration_entry(entry) ||
-           migration_entry_to_page(entry) != old)
-               goto unlock;
-
-       get_page(new);
-       pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
-       if (pte_swp_soft_dirty(*ptep))
-               pte = pte_mksoft_dirty(pte);
-
-       /* Recheck VMA as permissions can change since migration started  */
-       if (is_write_migration_entry(entry))
-               pte = maybe_mkwrite(pte, vma);
+               entry = pte_to_swp_entry(*pvmw.pte);
+               if (is_write_migration_entry(entry))
+                       pte = maybe_mkwrite(pte, vma);
 
 #ifdef CONFIG_HUGETLB_PAGE
-       if (PageHuge(new)) {
-               pte = pte_mkhuge(pte);
-               pte = arch_make_huge_pte(pte, vma, new, 0);
-       }
+               if (PageHuge(new)) {
+                       pte = pte_mkhuge(pte);
+                       pte = arch_make_huge_pte(pte, vma, new, 0);
+               }
 #endif
-       flush_dcache_page(new);
-       set_pte_at(mm, addr, ptep, pte);
+               flush_dcache_page(new);
+               set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
 
-       if (PageHuge(new)) {
-               if (PageAnon(new))
-                       hugepage_add_anon_rmap(new, vma, addr);
+               if (PageHuge(new)) {
+                       if (PageAnon(new))
+                               hugepage_add_anon_rmap(new, vma, pvmw.address);
+                       else
+                               page_dup_rmap(new, true);
+               } else if (PageAnon(new))
+                       page_add_anon_rmap(new, vma, pvmw.address, false);
                else
-                       page_dup_rmap(new, true);
-       } else if (PageAnon(new))
-               page_add_anon_rmap(new, vma, addr, false);
-       else
-               page_add_file_rmap(new, false);
+                       page_add_file_rmap(new, false);
 
-       if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
-               mlock_vma_page(new);
+               if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
+                       mlock_vma_page(new);
+
+               /* No need to invalidate - it was non-present before */
+               update_mmu_cache(vma, pvmw.address, pvmw.pte);
+       }
 
-       /* No need to invalidate - it was non-present before */
-       update_mmu_cache(vma, addr, ptep);
-unlock:
-       pte_unmap_unlock(ptep, ptl);
-out:
        return SWAP_AGAIN;
 }
 
index ddb872da3f5b065e11937ed5b236643731d24a3c..c5687c45c326b3280c7bb7147a5796a8cd80b68d 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/syscalls.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 
 #include <linux/uaccess.h>
index b729084eea901d3917fcbd2ae474eb8f5d7c57ba..499b988b1639ac1a905c6faaf349cb2427bd1907 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -176,7 +176,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
        return next;
 }
 
-static int do_brk(unsigned long addr, unsigned long len);
+static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
 
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
@@ -185,6 +185,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        struct mm_struct *mm = current->mm;
        unsigned long min_brk;
        bool populate;
+       LIST_HEAD(uf);
 
        if (down_write_killable(&mm->mmap_sem))
                return -EINTR;
@@ -222,7 +223,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 
        /* Always allow shrinking brk. */
        if (brk <= mm->brk) {
-               if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+               if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
                        goto set_brk;
                goto out;
        }
@@ -232,13 +233,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
                goto out;
 
        /* Ok, looks good - let it rip. */
-       if (do_brk(oldbrk, newbrk-oldbrk) < 0)
+       if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
                goto out;
 
 set_brk:
        mm->brk = brk;
        populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
        up_write(&mm->mmap_sem);
+       userfaultfd_unmap_complete(mm, &uf);
        if (populate)
                mm_populate(oldbrk, newbrk - oldbrk);
        return brk;
@@ -1304,7 +1306,8 @@ static inline int mlock_future_check(struct mm_struct *mm,
 unsigned long do_mmap(struct file *file, unsigned long addr,
                        unsigned long len, unsigned long prot,
                        unsigned long flags, vm_flags_t vm_flags,
-                       unsigned long pgoff, unsigned long *populate)
+                       unsigned long pgoff, unsigned long *populate,
+                       struct list_head *uf)
 {
        struct mm_struct *mm = current->mm;
        int pkey = 0;
@@ -1447,7 +1450,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
                        vm_flags |= VM_NORESERVE;
        }
 
-       addr = mmap_region(file, addr, len, vm_flags, pgoff);
+       addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
        if (!IS_ERR_VALUE(addr) &&
            ((vm_flags & VM_LOCKED) ||
             (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
@@ -1583,7 +1586,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
 }
 
 unsigned long mmap_region(struct file *file, unsigned long addr,
-               unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
+               unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
+               struct list_head *uf)
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma, *prev;
@@ -1609,7 +1613,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
        /* Clear old maps */
        while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
                              &rb_parent)) {
-               if (do_munmap(mm, addr, len))
+               if (do_munmap(mm, addr, len, uf))
                        return -ENOMEM;
        }
 
@@ -2495,11 +2499,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 /*
- * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
- * munmap path where it doesn't make sense to fail.
+ * __split_vma() bypasses sysctl_max_map_count checking.  We use this where it
+ * has already been checked or doesn't make sense to fail.
  */
-static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
-             unsigned long addr, int new_below)
+int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long addr, int new_below)
 {
        struct vm_area_struct *new;
        int err;
@@ -2579,7 +2583,8 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  * work.  This now handles partial unmappings.
  * Jeremy Fitzhardinge <jeremy@goop.org>
  */
-int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
+             struct list_head *uf)
 {
        unsigned long end;
        struct vm_area_struct *vma, *prev, *last;
@@ -2603,6 +2608,13 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        if (vma->vm_start >= end)
                return 0;
 
+       if (uf) {
+               int error = userfaultfd_unmap_prep(vma, start, end, uf);
+
+               if (error)
+                       return error;
+       }
+
        /*
         * If we need to split any vma, do it now to save pain later.
         *
@@ -2668,27 +2680,22 @@ int vm_munmap(unsigned long start, size_t len)
 {
        int ret;
        struct mm_struct *mm = current->mm;
+       LIST_HEAD(uf);
 
        if (down_write_killable(&mm->mmap_sem))
                return -EINTR;
 
-       ret = do_munmap(mm, start, len);
+       ret = do_munmap(mm, start, len, &uf);
        up_write(&mm->mmap_sem);
+       userfaultfd_unmap_complete(mm, &uf);
        return ret;
 }
 EXPORT_SYMBOL(vm_munmap);
 
 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
 {
-       int ret;
-       struct mm_struct *mm = current->mm;
-
        profile_munmap(addr);
-       if (down_write_killable(&mm->mmap_sem))
-               return -EINTR;
-       ret = do_munmap(mm, addr, len);
-       up_write(&mm->mmap_sem);
-       return ret;
+       return vm_munmap(addr, len);
 }
 
 
@@ -2780,7 +2787,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 
        file = get_file(vma->vm_file);
        ret = do_mmap_pgoff(vma->vm_file, start, size,
-                       prot, flags, pgoff, &populate);
+                       prot, flags, pgoff, &populate, NULL);
        fput(file);
 out:
        up_write(&mm->mmap_sem);
@@ -2806,7 +2813,7 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
  *  anonymous maps.  eventually we may be able to do some
  *  brk-specific accounting here.
  */
-static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
+static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf)
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma, *prev;
@@ -2845,7 +2852,7 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
         */
        while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
                              &rb_parent)) {
-               if (do_munmap(mm, addr, len))
+               if (do_munmap(mm, addr, len, uf))
                        return -ENOMEM;
        }
 
@@ -2892,9 +2899,9 @@ out:
        return 0;
 }
 
-static int do_brk(unsigned long addr, unsigned long len)
+static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
 {
-       return do_brk_flags(addr, len, 0);
+       return do_brk_flags(addr, len, 0, uf);
 }
 
 int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
@@ -2902,13 +2909,15 @@ int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
        struct mm_struct *mm = current->mm;
        int ret;
        bool populate;
+       LIST_HEAD(uf);
 
        if (down_write_killable(&mm->mmap_sem))
                return -EINTR;
 
-       ret = do_brk_flags(addr, len, flags);
+       ret = do_brk_flags(addr, len, flags, &uf);
        populate = ((mm->def_flags & VM_LOCKED) != 0);
        up_write(&mm->mmap_sem);
+       userfaultfd_unmap_complete(mm, &uf);
        if (populate && !ret)
                mm_populate(addr, len);
        return ret;
@@ -3125,8 +3134,7 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
                mm->data_vm += npages;
 }
 
-static int special_mapping_fault(struct vm_area_struct *vma,
-                                struct vm_fault *vmf);
+static int special_mapping_fault(struct vm_fault *vmf);
 
 /*
  * Having a close hook prevents vma merging regardless of flags.
@@ -3161,9 +3169,9 @@ static const struct vm_operations_struct legacy_special_mapping_vmops = {
        .fault = special_mapping_fault,
 };
 
-static int special_mapping_fault(struct vm_area_struct *vma,
-                               struct vm_fault *vmf)
+static int special_mapping_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        pgoff_t pgoff;
        struct page **pages;
 
@@ -3173,7 +3181,7 @@ static int special_mapping_fault(struct vm_area_struct *vma,
                struct vm_special_mapping *sm = vma->vm_private_data;
 
                if (sm->fault)
-                       return sm->fault(sm, vma, vmf);
+                       return sm->fault(sm, vmf->vma, vmf);
 
                pages = sm->pages;
        }
@@ -3447,7 +3455,7 @@ void mm_drop_all_locks(struct mm_struct *mm)
 }
 
 /*
- * initialise the VMA slab
+ * initialise the percpu counter for VM
  */
 void __init mmap_init(void)
 {
index 6f4d27c5bb325f6468461b17cd8800cc1e473308..daf67bb02b4af8471cb64d9296899da5c9b116c7 100644 (file)
@@ -25,7 +25,7 @@ void use_mm(struct mm_struct *mm)
        task_lock(tsk);
        active_mm = tsk->active_mm;
        if (active_mm != mm) {
-               atomic_inc(&mm->mm_count);
+               mmgrab(mm);
                tsk->active_mm = mm;
        }
        tsk->mm = mm;
index f4259e496f83a6d0465bba0a95e918980395030f..32bc9f2ff7eb9340c099e29479a52996bb0f6ff4 100644 (file)
@@ -275,7 +275,7 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
                mm->mmu_notifier_mm = mmu_notifier_mm;
                mmu_notifier_mm = NULL;
        }
-       atomic_inc(&mm->mm_count);
+       mmgrab(mm);
 
        /*
         * Serialize the update against mmu_notifier_unregister. A
index a45b4dc6a7f5d7577b56193d2dd2115bee2a7d59..848e946b08e58e31bf6482bd091338a43bb66fe1 100644 (file)
@@ -99,7 +99,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                        ptent = ptep_modify_prot_start(mm, addr, pte);
                        ptent = pte_modify(ptent, newprot);
                        if (preserve_write)
-                               ptent = pte_mkwrite(ptent);
+                               ptent = pte_mk_savedwrite(ptent);
 
                        /* Avoid taking write faults for known dirty pages */
                        if (dirty_accountable && pte_dirty(ptent) &&
index 8779928d6a70258c55155861ab3dbd1f1020a4b6..8233b0105c8258ec5757c42c0a65e34b2908272c 100644 (file)
@@ -252,7 +252,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 static unsigned long move_vma(struct vm_area_struct *vma,
                unsigned long old_addr, unsigned long old_len,
                unsigned long new_len, unsigned long new_addr,
-               bool *locked, struct vm_userfaultfd_ctx *uf)
+               bool *locked, struct vm_userfaultfd_ctx *uf,
+               struct list_head *uf_unmap)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *new_vma;
@@ -341,7 +342,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
        if (unlikely(vma->vm_flags & VM_PFNMAP))
                untrack_pfn_moved(vma);
 
-       if (do_munmap(mm, old_addr, old_len) < 0) {
+       if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
                /* OOM: unable to split vma, just get accounts right */
                vm_unacct_memory(excess >> PAGE_SHIFT);
                excess = 0;
@@ -417,7 +418,8 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
 
 static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
                unsigned long new_addr, unsigned long new_len, bool *locked,
-               struct vm_userfaultfd_ctx *uf)
+               struct vm_userfaultfd_ctx *uf,
+               struct list_head *uf_unmap)
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
@@ -435,12 +437,12 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
        if (addr + old_len > new_addr && new_addr + new_len > addr)
                goto out;
 
-       ret = do_munmap(mm, new_addr, new_len);
+       ret = do_munmap(mm, new_addr, new_len, NULL);
        if (ret)
                goto out;
 
        if (old_len >= new_len) {
-               ret = do_munmap(mm, addr+new_len, old_len - new_len);
+               ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
                if (ret && old_len != new_len)
                        goto out;
                old_len = new_len;
@@ -462,7 +464,8 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
        if (offset_in_page(ret))
                goto out1;
 
-       ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf);
+       ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
+                      uf_unmap);
        if (!(offset_in_page(ret)))
                goto out;
 out1:
@@ -502,6 +505,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
        unsigned long charged = 0;
        bool locked = false;
        struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
+       LIST_HEAD(uf_unmap);
 
        if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
                return ret;
@@ -528,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 
        if (flags & MREMAP_FIXED) {
                ret = mremap_to(addr, old_len, new_addr, new_len,
-                               &locked, &uf);
+                               &locked, &uf, &uf_unmap);
                goto out;
        }
 
@@ -538,7 +542,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
         * do_munmap does all the needed commit accounting
         */
        if (old_len >= new_len) {
-               ret = do_munmap(mm, addr+new_len, old_len - new_len);
+               ret = do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap);
                if (ret && old_len != new_len)
                        goto out;
                ret = addr;
@@ -598,7 +602,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
                }
 
                ret = move_vma(vma, addr, old_len, new_len, new_addr,
-                              &locked, &uf);
+                              &locked, &uf, &uf_unmap);
        }
 out:
        if (offset_in_page(ret)) {
@@ -609,5 +613,6 @@ out:
        if (locked && new_len > old_len)
                mm_populate(new_addr + old_len, new_len - old_len);
        mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
+       userfaultfd_unmap_complete(mm, &uf_unmap);
        return ret;
 }
index bc964c26be8c1a47f70ed3c639a5bb58e698d8e1..fe9f4fa4a7a7415df8dd750aeec99c52fd53830e 100644 (file)
@@ -517,7 +517,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 }
 
 /*
- * initialise the VMA and region record slabs
+ * initialise the percpu counter for VM and region record slabs
  */
 void __init mmap_init(void)
 {
@@ -1205,7 +1205,8 @@ unsigned long do_mmap(struct file *file,
                        unsigned long flags,
                        vm_flags_t vm_flags,
                        unsigned long pgoff,
-                       unsigned long *populate)
+                       unsigned long *populate,
+                       struct list_head *uf)
 {
        struct vm_area_struct *vma;
        struct vm_region *region;
@@ -1577,7 +1578,7 @@ static int shrink_vma(struct mm_struct *mm,
  * - under NOMMU conditions the chunk to be unmapped must be backed by a single
  *   VMA, though it need not cover the whole VMA
  */
-int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf)
 {
        struct vm_area_struct *vma;
        unsigned long end;
@@ -1643,7 +1644,7 @@ int vm_munmap(unsigned long addr, size_t len)
        int ret;
 
        down_write(&mm->mmap_sem);
-       ret = do_munmap(mm, addr, len);
+       ret = do_munmap(mm, addr, len, NULL);
        up_write(&mm->mmap_sem);
        return ret;
 }
@@ -1794,7 +1795,7 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int filemap_fault(struct vm_fault *vmf)
 {
        BUG();
        return 0;
index 8256788ac119679cb66d3b5b63c6797ba384d506..51c091849dcb65057d2e7443e0d01fc6856202c0 100644 (file)
@@ -403,12 +403,14 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 
 static void dump_header(struct oom_control *oc, struct task_struct *p)
 {
-       nodemask_t *nm = (oc->nodemask) ? oc->nodemask : &cpuset_current_mems_allowed;
-
-       pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd\n",
-               current->comm, oc->gfp_mask, &oc->gfp_mask,
-               nodemask_pr_args(nm), oc->order,
-               current->signal->oom_score_adj);
+       pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=",
+               current->comm, oc->gfp_mask, &oc->gfp_mask);
+       if (oc->nodemask)
+               pr_cont("%*pbl", nodemask_pr_args(oc->nodemask));
+       else
+               pr_cont("(null)");
+       pr_cont(",  order=%d, oom_score_adj=%hd\n",
+               oc->order, current->signal->oom_score_adj);
        if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
                pr_warn("COMPACTION is disabled!!!\n");
 
@@ -417,7 +419,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
        if (oc->memcg)
                mem_cgroup_print_oom_info(oc->memcg, p);
        else
-               show_mem(SHOW_MEM_FILTER_NODES, nm);
+               show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
        if (sysctl_oom_dump_tasks)
                dump_tasks(oc->memcg, oc->nodemask);
 }
@@ -651,7 +653,7 @@ static void mark_oom_victim(struct task_struct *tsk)
 
        /* oom_mm is bound to the signal struct life time. */
        if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
-               atomic_inc(&tsk->signal->oom_mm->mm_count);
+               mmgrab(tsk->signal->oom_mm);
 
        /*
         * Make sure that the task is woken up from uninterruptible sleep
@@ -868,7 +870,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 
        /* Get a reference to safely compare mm after task_unlock(victim) */
        mm = victim->mm;
-       atomic_inc(&mm->mm_count);
+       mmgrab(mm);
        /*
         * We should send SIGKILL before setting TIF_MEMDIE in order to prevent
         * the OOM victim from depleting the memory reserves from the user
index 2164498258594aff4a72e1263798e1cdec47e3d4..26a60818a8fcf769a051d6af675e4ab37513852a 100644 (file)
@@ -580,7 +580,7 @@ static void wb_domain_writeout_inc(struct wb_domain *dom,
        __fprop_inc_percpu_max(&dom->completions, completions,
                               max_prop_frac);
        /* First event after period switching was turned off? */
-       if (!unlikely(dom->period_time)) {
+       if (unlikely(!dom->period_time)) {
                /*
                 * We can race with other __bdi_writeout_inc calls here but
                 * it does not cause any harm since the resulting time when
@@ -1797,7 +1797,7 @@ pause:
                 * pages exceeds dirty_thresh, give the other good wb's a pipe
                 * to go through, so that tasks on them still remain responsive.
                 *
-                * In theory 1 page is enough to keep the comsumer-producer
+                * In theory 1 page is enough to keep the consumer-producer
                 * pipe going: the flusher cleans 1 page => the task dirties 1
                 * more page. However wb_dirty has accounting errors.  So use
                 * the larger and more IO friendly wb_stat_error.
index c21b336681334f33f13cc6a203f30e0a7ca389bf..a7a6aac95a6d158690e1ca0981b1b652820248d4 100644 (file)
@@ -59,7 +59,6 @@
 #include <linux/prefetch.h>
 #include <linux/mm_inline.h>
 #include <linux/migrate.h>
-#include <linux/page_ext.h>
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 #include <linux/page_owner.h>
@@ -92,6 +91,10 @@ EXPORT_PER_CPU_SYMBOL(_numa_mem_);
 int _node_numa_mem_[MAX_NUMNODES];
 #endif
 
+/* work_structs for global per-cpu drains */
+DEFINE_MUTEX(pcpu_drain_mutex);
+DEFINE_PER_CPU(struct work_struct, pcpu_drain);
+
 #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
 volatile unsigned long latent_entropy __latent_entropy;
 EXPORT_SYMBOL(latent_entropy);
@@ -1085,10 +1088,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 {
        int migratetype = 0;
        int batch_free = 0;
-       unsigned long nr_scanned;
+       unsigned long nr_scanned, flags;
        bool isolated_pageblocks;
 
-       spin_lock(&zone->lock);
+       spin_lock_irqsave(&zone->lock, flags);
        isolated_pageblocks = has_isolate_pageblock(zone);
        nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
        if (nr_scanned)
@@ -1137,7 +1140,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                        trace_mm_page_pcpu_drain(page, 0, mt);
                } while (--count && --batch_free && !list_empty(list));
        }
-       spin_unlock(&zone->lock);
+       spin_unlock_irqrestore(&zone->lock, flags);
 }
 
 static void free_one_page(struct zone *zone,
@@ -1145,8 +1148,9 @@ static void free_one_page(struct zone *zone,
                                unsigned int order,
                                int migratetype)
 {
-       unsigned long nr_scanned;
-       spin_lock(&zone->lock);
+       unsigned long nr_scanned, flags;
+       spin_lock_irqsave(&zone->lock, flags);
+       __count_vm_events(PGFREE, 1 << order);
        nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
        if (nr_scanned)
                __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
@@ -1156,7 +1160,7 @@ static void free_one_page(struct zone *zone,
                migratetype = get_pfnblock_migratetype(page, pfn);
        }
        __free_one_page(page, pfn, zone, order, migratetype);
-       spin_unlock(&zone->lock);
+       spin_unlock_irqrestore(&zone->lock, flags);
 }
 
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -1234,7 +1238,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
 
 static void __free_pages_ok(struct page *page, unsigned int order)
 {
-       unsigned long flags;
        int migratetype;
        unsigned long pfn = page_to_pfn(page);
 
@@ -1242,10 +1245,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
                return;
 
        migratetype = get_pfnblock_migratetype(page, pfn);
-       local_irq_save(flags);
-       __count_vm_events(PGFREE, 1 << order);
        free_one_page(page_zone(page), page, pfn, order, migratetype);
-       local_irq_restore(flags);
 }
 
 static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -2217,8 +2217,9 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                        int migratetype, bool cold)
 {
        int i, alloced = 0;
+       unsigned long flags;
 
-       spin_lock(&zone->lock);
+       spin_lock_irqsave(&zone->lock, flags);
        for (i = 0; i < count; ++i) {
                struct page *page = __rmqueue(zone, order, migratetype);
                if (unlikely(page == NULL))
@@ -2254,7 +2255,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
         * pages added to the pcp list.
         */
        __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
-       spin_unlock(&zone->lock);
+       spin_unlock_irqrestore(&zone->lock, flags);
        return alloced;
 }
 
@@ -2339,16 +2340,26 @@ void drain_local_pages(struct zone *zone)
                drain_pages(cpu);
 }
 
+static void drain_local_pages_wq(struct work_struct *work)
+{
+       /*
+        * drain_all_pages doesn't use proper cpu hotplug protection so
+        * we can race with cpu offline when the WQ can move this from
+        * a cpu pinned worker to an unbound one. We can operate on a different
+        * cpu which is allright but we also have to make sure to not move to
+        * a different one.
+        */
+       preempt_disable();
+       drain_local_pages(NULL);
+       preempt_enable();
+}
+
 /*
  * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
  *
  * When zone parameter is non-NULL, spill just the single zone's pages.
  *
- * Note that this code is protected against sending an IPI to an offline
- * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
- * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
- * nothing keeps CPUs from showing up after we populated the cpumask and
- * before the call to on_each_cpu_mask().
+ * Note that this can be extremely slow as the draining happens in a workqueue.
  */
 void drain_all_pages(struct zone *zone)
 {
@@ -2360,6 +2371,21 @@ void drain_all_pages(struct zone *zone)
         */
        static cpumask_t cpus_with_pcps;
 
+       /* Workqueues cannot recurse */
+       if (current->flags & PF_WQ_WORKER)
+               return;
+
+       /*
+        * Do not drain if one is already in progress unless it's specific to
+        * a zone. Such callers are primarily CMA and memory hotplug and need
+        * the drain to be complete when the call returns.
+        */
+       if (unlikely(!mutex_trylock(&pcpu_drain_mutex))) {
+               if (!zone)
+                       return;
+               mutex_lock(&pcpu_drain_mutex);
+       }
+
        /*
         * We don't care about racing with CPU hotplug event
         * as offline notification will cause the notified
@@ -2390,8 +2416,16 @@ void drain_all_pages(struct zone *zone)
                else
                        cpumask_clear_cpu(cpu, &cpus_with_pcps);
        }
-       on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
-                                                               zone, 1);
+
+       for_each_cpu(cpu, &cpus_with_pcps) {
+               struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
+               INIT_WORK(work, drain_local_pages_wq);
+               schedule_work_on(cpu, work);
+       }
+       for_each_cpu(cpu, &cpus_with_pcps)
+               flush_work(per_cpu_ptr(&pcpu_drain, cpu));
+
+       mutex_unlock(&pcpu_drain_mutex);
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -2442,17 +2476,20 @@ void free_hot_cold_page(struct page *page, bool cold)
 {
        struct zone *zone = page_zone(page);
        struct per_cpu_pages *pcp;
-       unsigned long flags;
        unsigned long pfn = page_to_pfn(page);
        int migratetype;
 
+       if (in_interrupt()) {
+               __free_pages_ok(page, 0);
+               return;
+       }
+
        if (!free_pcp_prepare(page))
                return;
 
        migratetype = get_pfnblock_migratetype(page, pfn);
        set_pcppage_migratetype(page, migratetype);
-       local_irq_save(flags);
-       __count_vm_event(PGFREE);
+       preempt_disable();
 
        /*
         * We only track unmovable, reclaimable and movable on pcp lists.
@@ -2469,6 +2506,7 @@ void free_hot_cold_page(struct page *page, bool cold)
                migratetype = MIGRATE_MOVABLE;
        }
 
+       __count_vm_event(PGFREE);
        pcp = &this_cpu_ptr(zone->pageset)->pcp;
        if (!cold)
                list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2482,7 +2520,7 @@ void free_hot_cold_page(struct page *page, bool cold)
        }
 
 out:
-       local_irq_restore(flags);
+       preempt_enable();
 }
 
 /*
@@ -2600,74 +2638,105 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
 #endif
 }
 
+/* Remove page from the per-cpu list, caller must protect the list */
+static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+                       bool cold, struct per_cpu_pages *pcp,
+                       struct list_head *list)
+{
+       struct page *page;
+
+       VM_BUG_ON(in_interrupt());
+
+       do {
+               if (list_empty(list)) {
+                       pcp->count += rmqueue_bulk(zone, 0,
+                                       pcp->batch, list,
+                                       migratetype, cold);
+                       if (unlikely(list_empty(list)))
+                               return NULL;
+               }
+
+               if (cold)
+                       page = list_last_entry(list, struct page, lru);
+               else
+                       page = list_first_entry(list, struct page, lru);
+
+               list_del(&page->lru);
+               pcp->count--;
+       } while (check_new_pcp(page));
+
+       return page;
+}
+
+/* Lock and remove page from the per-cpu list */
+static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+                       struct zone *zone, unsigned int order,
+                       gfp_t gfp_flags, int migratetype)
+{
+       struct per_cpu_pages *pcp;
+       struct list_head *list;
+       bool cold = ((gfp_flags & __GFP_COLD) != 0);
+       struct page *page;
+
+       preempt_disable();
+       pcp = &this_cpu_ptr(zone->pageset)->pcp;
+       list = &pcp->lists[migratetype];
+       page = __rmqueue_pcplist(zone,  migratetype, cold, pcp, list);
+       if (page) {
+               __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+               zone_statistics(preferred_zone, zone);
+       }
+       preempt_enable();
+       return page;
+}
+
 /*
  * Allocate a page from the given zone. Use pcplists for order-0 allocations.
  */
 static inline
-struct page *buffered_rmqueue(struct zone *preferred_zone,
+struct page *rmqueue(struct zone *preferred_zone,
                        struct zone *zone, unsigned int order,
                        gfp_t gfp_flags, unsigned int alloc_flags,
                        int migratetype)
 {
        unsigned long flags;
        struct page *page;
-       bool cold = ((gfp_flags & __GFP_COLD) != 0);
 
-       if (likely(order == 0)) {
-               struct per_cpu_pages *pcp;
-               struct list_head *list;
-
-               local_irq_save(flags);
-               do {
-                       pcp = &this_cpu_ptr(zone->pageset)->pcp;
-                       list = &pcp->lists[migratetype];
-                       if (list_empty(list)) {
-                               pcp->count += rmqueue_bulk(zone, 0,
-                                               pcp->batch, list,
-                                               migratetype, cold);
-                               if (unlikely(list_empty(list)))
-                                       goto failed;
-                       }
-
-                       if (cold)
-                               page = list_last_entry(list, struct page, lru);
-                       else
-                               page = list_first_entry(list, struct page, lru);
-
-                       list_del(&page->lru);
-                       pcp->count--;
+       if (likely(order == 0) && !in_interrupt()) {
+               page = rmqueue_pcplist(preferred_zone, zone, order,
+                               gfp_flags, migratetype);
+               goto out;
+       }
 
-               } while (check_new_pcp(page));
-       } else {
-               /*
-                * We most definitely don't want callers attempting to
-                * allocate greater than order-1 page units with __GFP_NOFAIL.
-                */
-               WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
-               spin_lock_irqsave(&zone->lock, flags);
+       /*
+        * We most definitely don't want callers attempting to
+        * allocate greater than order-1 page units with __GFP_NOFAIL.
+        */
+       WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+       spin_lock_irqsave(&zone->lock, flags);
 
-               do {
-                       page = NULL;
-                       if (alloc_flags & ALLOC_HARDER) {
-                               page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
-                               if (page)
-                                       trace_mm_page_alloc_zone_locked(page, order, migratetype);
-                       }
-                       if (!page)
-                               page = __rmqueue(zone, order, migratetype);
-               } while (page && check_new_pages(page, order));
-               spin_unlock(&zone->lock);
+       do {
+               page = NULL;
+               if (alloc_flags & ALLOC_HARDER) {
+                       page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+                       if (page)
+                               trace_mm_page_alloc_zone_locked(page, order, migratetype);
+               }
                if (!page)
-                       goto failed;
-               __mod_zone_freepage_state(zone, -(1 << order),
-                                         get_pcppage_migratetype(page));
-       }
+                       page = __rmqueue(zone, order, migratetype);
+       } while (page && check_new_pages(page, order));
+       spin_unlock(&zone->lock);
+       if (!page)
+               goto failed;
+       __mod_zone_freepage_state(zone, -(1 << order),
+                                 get_pcppage_migratetype(page));
 
        __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
        zone_statistics(preferred_zone, zone);
        local_irq_restore(flags);
 
-       VM_BUG_ON_PAGE(bad_range(zone, page), page);
+out:
+       VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
        return page;
 
 failed:
@@ -2875,7 +2944,7 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
 #ifdef CONFIG_NUMA
 static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 {
-       return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
+       return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
                                RECLAIM_DISTANCE;
 }
 #else  /* CONFIG_NUMA */
@@ -2972,7 +3041,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
                }
 
 try_this_zone:
-               page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order,
+               page = rmqueue(ac->preferred_zoneref->zone, zone, order,
                                gfp_mask, alloc_flags, ac->migratetype);
                if (page) {
                        prep_new_page(page, order, gfp_mask, alloc_flags);
@@ -3825,76 +3894,76 @@ got_pg:
        return page;
 }
 
-/*
- * This is the 'heart' of the zoned buddy allocator.
- */
-struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-                       struct zonelist *zonelist, nodemask_t *nodemask)
+static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
+               struct zonelist *zonelist, nodemask_t *nodemask,
+               struct alloc_context *ac, gfp_t *alloc_mask,
+               unsigned int *alloc_flags)
 {
-       struct page *page;
-       unsigned int alloc_flags = ALLOC_WMARK_LOW;
-       gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
-       struct alloc_context ac = {
-               .high_zoneidx = gfp_zone(gfp_mask),
-               .zonelist = zonelist,
-               .nodemask = nodemask,
-               .migratetype = gfpflags_to_migratetype(gfp_mask),
-       };
+       ac->high_zoneidx = gfp_zone(gfp_mask);
+       ac->zonelist = zonelist;
+       ac->nodemask = nodemask;
+       ac->migratetype = gfpflags_to_migratetype(gfp_mask);
 
        if (cpusets_enabled()) {
-               alloc_mask |= __GFP_HARDWALL;
-               alloc_flags |= ALLOC_CPUSET;
-               if (!ac.nodemask)
-                       ac.nodemask = &cpuset_current_mems_allowed;
+               *alloc_mask |= __GFP_HARDWALL;
+               if (!ac->nodemask)
+                       ac->nodemask = &cpuset_current_mems_allowed;
+               else
+                       *alloc_flags |= ALLOC_CPUSET;
        }
 
-       gfp_mask &= gfp_allowed_mask;
-
        lockdep_trace_alloc(gfp_mask);
 
        might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
 
        if (should_fail_alloc_page(gfp_mask, order))
-               return NULL;
+               return false;
 
-       /*
-        * Check the zones suitable for the gfp_mask contain at least one
-        * valid zone. It's possible to have an empty zonelist as a result
-        * of __GFP_THISNODE and a memoryless node
-        */
-       if (unlikely(!zonelist->_zonerefs->zone))
-               return NULL;
+       if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
+               *alloc_flags |= ALLOC_CMA;
 
-       if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
-               alloc_flags |= ALLOC_CMA;
+       return true;
+}
 
+/* Determine whether to spread dirty pages and what the first usable zone */
+static inline void finalise_ac(gfp_t gfp_mask,
+               unsigned int order, struct alloc_context *ac)
+{
        /* Dirty zone balancing only done in the fast path */
-       ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
+       ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
 
        /*
         * The preferred zone is used for statistics but crucially it is
         * also used as the starting point for the zonelist iterator. It
         * may get reset for allocations that ignore memory policies.
         */
-       ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
-                                       ac.high_zoneidx, ac.nodemask);
-       if (!ac.preferred_zoneref->zone) {
-               page = NULL;
-               /*
-                * This might be due to race with cpuset_current_mems_allowed
-                * update, so make sure we retry with original nodemask in the
-                * slow path.
-                */
-               goto no_zone;
-       }
+       ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
+                                       ac->high_zoneidx, ac->nodemask);
+}
+
+/*
+ * This is the 'heart' of the zoned buddy allocator.
+ */
+struct page *
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+                       struct zonelist *zonelist, nodemask_t *nodemask)
+{
+       struct page *page;
+       unsigned int alloc_flags = ALLOC_WMARK_LOW;
+       gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
+       struct alloc_context ac = { };
+
+       gfp_mask &= gfp_allowed_mask;
+       if (!prepare_alloc_pages(gfp_mask, order, zonelist, nodemask, &ac, &alloc_mask, &alloc_flags))
+               return NULL;
+
+       finalise_ac(gfp_mask, order, &ac);
 
        /* First allocation attempt */
        page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
        if (likely(page))
                goto out;
 
-no_zone:
        /*
         * Runtime PM, block IO and its error handling path can deadlock
         * because I/O on the device might not complete.
@@ -5856,7 +5925,7 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
         * the zone and SPARSEMEM is in use. If there are holes within the
         * zone, each populated memory region may cost us one or two extra
         * memmap pages due to alignment because memmap pages for each
-        * populated regions may not naturally algined on page boundary.
+        * populated regions may not be naturally aligned on page boundary.
         * So the (present_pages >> 4) heuristic is a tradeoff for that.
         */
        if (spanned_pages > present_pages + (present_pages >> 4) &&
@@ -6420,8 +6489,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 
                start_pfn = end_pfn;
        }
-       arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
-       arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
 
        /* Find the PFNs that ZONE_MOVABLE begins at in each node */
        memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
@@ -7157,8 +7224,9 @@ void *__init alloc_large_system_hash(const char *tablename,
  * If @count is not zero, it is okay to include less @count unmovable pages
  *
  * PageLRU check without isolation or lru_lock could race so that
- * MIGRATE_MOVABLE block might include unmovable pages. It means you can't
- * expect this function should be exact.
+ * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
+ * check without lock_page also may miss some movable non-lru pages at
+ * race condition. So you can't expect this function should be exact.
  */
 bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                         bool skip_hwpoisoned_pages)
@@ -7214,6 +7282,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                if (skip_hwpoisoned_pages && PageHWPoison(page))
                        continue;
 
+               if (__PageMovable(page))
+                       continue;
+
                if (!PageLRU(page))
                        found++;
                /*
@@ -7325,6 +7396,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
  *                     #MIGRATE_MOVABLE or #MIGRATE_CMA).  All pageblocks
  *                     in range must have the same migratetype and it must
  *                     be either of the two.
+ * @gfp_mask:  GFP mask to use during compaction
  *
  * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
  * aligned, however it's the caller's responsibility to guarantee that
@@ -7338,7 +7410,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
  * need to be freed with free_contig_range().
  */
 int alloc_contig_range(unsigned long start, unsigned long end,
-                      unsigned migratetype)
+                      unsigned migratetype, gfp_t gfp_mask)
 {
        unsigned long outer_start, outer_end;
        unsigned int order;
@@ -7350,7 +7422,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
                .zone = page_zone(pfn_to_page(start)),
                .mode = MIGRATE_SYNC,
                .ignore_skip_hint = true,
-               .gfp_mask = GFP_KERNEL,
+               .gfp_mask = memalloc_noio_flags(gfp_mask),
        };
        INIT_LIST_HEAD(&cc.migratepages);
 
index ae11aa914e5569afbd5ee67f5349b81912121d02..b0ee56c56b5850f32f5b3157f9b142788840f50f 100644 (file)
@@ -54,27 +54,27 @@ static int page_idle_clear_pte_refs_one(struct page *page,
                                        struct vm_area_struct *vma,
                                        unsigned long addr, void *arg)
 {
-       struct mm_struct *mm = vma->vm_mm;
-       pmd_t *pmd;
-       pte_t *pte;
-       spinlock_t *ptl;
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .address = addr,
+       };
        bool referenced = false;
 
-       if (!page_check_address_transhuge(page, mm, addr, &pmd, &pte, &ptl))
-               return SWAP_AGAIN;
-
-       if (pte) {
-               referenced = ptep_clear_young_notify(vma, addr, pte);
-               pte_unmap(pte);
-       } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-               referenced = pmdp_clear_young_notify(vma, addr, pmd);
-       } else {
-               /* unexpected pmd-mapped page? */
-               WARN_ON_ONCE(1);
+       while (page_vma_mapped_walk(&pvmw)) {
+               addr = pvmw.address;
+               if (pvmw.pte) {
+                       referenced = ptep_clear_young_notify(vma, addr,
+                                       pvmw.pte);
+               } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+                       referenced = pmdp_clear_young_notify(vma, addr,
+                                       pvmw.pmd);
+               } else {
+                       /* unexpected pmd-mapped page? */
+                       WARN_ON_ONCE(1);
+               }
        }
 
-       spin_unlock(ptl);
-
        if (referenced) {
                clear_page_idle(page);
                /*
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
new file mode 100644 (file)
index 0000000..a23001a
--- /dev/null
@@ -0,0 +1,218 @@
+#include <linux/mm.h>
+#include <linux/rmap.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+
+#include "internal.h"
+
+static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
+{
+       pmd_t pmde;
+       /*
+        * Make sure we don't re-load pmd between present and !trans_huge check.
+        * We need a consistent view.
+        */
+       pmde = READ_ONCE(*pvmw->pmd);
+       return pmd_present(pmde) && !pmd_trans_huge(pmde);
+}
+
+static inline bool not_found(struct page_vma_mapped_walk *pvmw)
+{
+       page_vma_mapped_walk_done(pvmw);
+       return false;
+}
+
+static bool map_pte(struct page_vma_mapped_walk *pvmw)
+{
+       pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address);
+       if (!(pvmw->flags & PVMW_SYNC)) {
+               if (pvmw->flags & PVMW_MIGRATION) {
+                       if (!is_swap_pte(*pvmw->pte))
+                               return false;
+               } else {
+                       if (!pte_present(*pvmw->pte))
+                               return false;
+               }
+       }
+       pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd);
+       spin_lock(pvmw->ptl);
+       return true;
+}
+
+static bool check_pte(struct page_vma_mapped_walk *pvmw)
+{
+       if (pvmw->flags & PVMW_MIGRATION) {
+#ifdef CONFIG_MIGRATION
+               swp_entry_t entry;
+               if (!is_swap_pte(*pvmw->pte))
+                       return false;
+               entry = pte_to_swp_entry(*pvmw->pte);
+               if (!is_migration_entry(entry))
+                       return false;
+               if (migration_entry_to_page(entry) - pvmw->page >=
+                               hpage_nr_pages(pvmw->page)) {
+                       return false;
+               }
+               if (migration_entry_to_page(entry) < pvmw->page)
+                       return false;
+#else
+               WARN_ON_ONCE(1);
+#endif
+       } else {
+               if (!pte_present(*pvmw->pte))
+                       return false;
+
+               /* THP can be referenced by any subpage */
+               if (pte_page(*pvmw->pte) - pvmw->page >=
+                               hpage_nr_pages(pvmw->page)) {
+                       return false;
+               }
+               if (pte_page(*pvmw->pte) < pvmw->page)
+                       return false;
+       }
+
+       return true;
+}
+
+/**
+ * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
+ * @pvmw->address
+ * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
+ * must be set. pmd, pte and ptl must be NULL.
+ *
+ * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
+ * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
+ * adjusted if needed (for PTE-mapped THPs).
+ *
+ * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
+ * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
+ * a loop to find all PTEs that map the THP.
+ *
+ * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
+ * regardless of which page table level the page is mapped at. @pvmw->pmd is
+ * NULL.
+ *
+ * Retruns false if there are no more page table entries for the page in
+ * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
+ *
+ * If you need to stop the walk before page_vma_mapped_walk() returned false,
+ * use page_vma_mapped_walk_done(). It will do the housekeeping.
+ */
+bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+{
+       struct mm_struct *mm = pvmw->vma->vm_mm;
+       struct page *page = pvmw->page;
+       pgd_t *pgd;
+       pud_t *pud;
+
+       /* The only possible pmd mapping has been handled on last iteration */
+       if (pvmw->pmd && !pvmw->pte)
+               return not_found(pvmw);
+
+       /* Only for THP, seek to next pte entry makes sense */
+       if (pvmw->pte) {
+               if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+                       return not_found(pvmw);
+               goto next_pte;
+       }
+
+       if (unlikely(PageHuge(pvmw->page))) {
+               /* when pud is not present, pte will be NULL */
+               pvmw->pte = huge_pte_offset(mm, pvmw->address);
+               if (!pvmw->pte)
+                       return false;
+
+               pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte);
+               spin_lock(pvmw->ptl);
+               if (!check_pte(pvmw))
+                       return not_found(pvmw);
+               return true;
+       }
+restart:
+       pgd = pgd_offset(mm, pvmw->address);
+       if (!pgd_present(*pgd))
+               return false;
+       pud = pud_offset(pgd, pvmw->address);
+       if (!pud_present(*pud))
+               return false;
+       pvmw->pmd = pmd_offset(pud, pvmw->address);
+       if (pmd_trans_huge(*pvmw->pmd)) {
+               pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+               if (!pmd_present(*pvmw->pmd))
+                       return not_found(pvmw);
+               if (likely(pmd_trans_huge(*pvmw->pmd))) {
+                       if (pvmw->flags & PVMW_MIGRATION)
+                               return not_found(pvmw);
+                       if (pmd_page(*pvmw->pmd) != page)
+                               return not_found(pvmw);
+                       return true;
+               } else {
+                       /* THP pmd was split under us: handle on pte level */
+                       spin_unlock(pvmw->ptl);
+                       pvmw->ptl = NULL;
+               }
+       } else {
+               if (!check_pmd(pvmw))
+                       return false;
+       }
+       if (!map_pte(pvmw))
+               goto next_pte;
+       while (1) {
+               if (check_pte(pvmw))
+                       return true;
+next_pte:      do {
+                       pvmw->address += PAGE_SIZE;
+                       if (pvmw->address >=
+                                       __vma_address(pvmw->page, pvmw->vma) +
+                                       hpage_nr_pages(pvmw->page) * PAGE_SIZE)
+                               return not_found(pvmw);
+                       /* Did we cross page table boundary? */
+                       if (pvmw->address % PMD_SIZE == 0) {
+                               pte_unmap(pvmw->pte);
+                               if (pvmw->ptl) {
+                                       spin_unlock(pvmw->ptl);
+                                       pvmw->ptl = NULL;
+                               }
+                               goto restart;
+                       } else {
+                               pvmw->pte++;
+                       }
+               } while (pte_none(*pvmw->pte));
+
+               if (!pvmw->ptl) {
+                       pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+                       spin_lock(pvmw->ptl);
+               }
+       }
+}
+
+/**
+ * page_mapped_in_vma - check whether a page is really mapped in a VMA
+ * @page: the page to test
+ * @vma: the VMA to test
+ *
+ * Returns 1 if the page is mapped into the page tables of the VMA, 0
+ * if the page is not mapped into the page tables of this VMA.  Only
+ * valid for normal file or anonymous VMAs.
+ */
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+{
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .flags = PVMW_SYNC,
+       };
+       unsigned long start, end;
+
+       start = __vma_address(page, vma);
+       end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
+
+       if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+               return 0;
+       pvmw.address = max(start, vma->vm_start);
+       if (!page_vma_mapped_walk(&pvmw))
+               return 0;
+       page_vma_mapped_walk_done(&pvmw);
+       return 1;
+}
index 207244489a681d10f16c318c0a6ff0423b5fe30b..03761577ae86e462cf2a7218892be7a28ae5877d 100644 (file)
@@ -78,14 +78,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 
        pud = pud_offset(pgd, addr);
        do {
+ again:
                next = pud_addr_end(addr, end);
-               if (pud_none_or_clear_bad(pud)) {
+               if (pud_none(*pud) || !walk->vma) {
                        if (walk->pte_hole)
                                err = walk->pte_hole(addr, next, walk);
                        if (err)
                                break;
                        continue;
                }
+
+               if (walk->pud_entry) {
+                       spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma);
+
+                       if (ptl) {
+                               err = walk->pud_entry(pud, addr, next, walk);
+                               spin_unlock(ptl);
+                               if (err)
+                                       break;
+                               continue;
+                       }
+               }
+
+               split_huge_pud(walk->vma, pud, addr);
+               if (pud_none(*pud))
+                       goto again;
+
                if (walk->pmd_entry || walk->pte_entry)
                        err = walk_pmd_range(pud, addr, next, walk);
                if (err)
index 0686f566d3470bb25cbe539ea19aeaec67bd4e72..5696039b5c0707eddcb652bc120a8556ca3cc58b 100644 (file)
@@ -43,7 +43,7 @@
  * Chunks can be determined from the address using the index field
  * in the page struct. The index field contains a pointer to the chunk.
  *
- * To use this allocator, arch code should do the followings.
+ * To use this allocator, arch code should do the following:
  *
  * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
  *   regular address to percpu pointer and back if they need to be
index 71c5f9109f2a84fce41b0fcfce07f9e7d8ba16d1..4ed5908c65b0f17d29f128f0f0b6e21ca9a53c36 100644 (file)
@@ -123,6 +123,20 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
        flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return pmd;
 }
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
+                           pud_t *pudp)
+{
+       pud_t pud;
+
+       VM_BUG_ON(address & ~HPAGE_PUD_MASK);
+       VM_BUG_ON(!pud_trans_huge(*pudp) && !pud_devmap(*pudp));
+       pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
+       flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+       return pud;
+}
+#endif
 #endif
 
 #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
index 91619fd709399a428a5a65fff6367d14cbef3db3..8774791e28099be82b7f4b7062c45600e8aed4ff 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -607,8 +607,7 @@ void try_to_unmap_flush_dirty(void)
                try_to_unmap_flush();
 }
 
-static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
-               struct page *page, bool writable)
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 {
        struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
 
@@ -643,8 +642,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
        return should_defer;
 }
 #else
-static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
-               struct page *page, bool writable)
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 {
 }
 
@@ -710,170 +708,6 @@ out:
        return pmd;
 }
 
-/*
- * Check that @page is mapped at @address into @mm.
- *
- * If @sync is false, page_check_address may perform a racy check to avoid
- * the page table lock when the pte is not present (helpful when reclaiming
- * highly shared pages).
- *
- * On success returns with pte mapped and locked.
- */
-pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
-                         unsigned long address, spinlock_t **ptlp, int sync)
-{
-       pmd_t *pmd;
-       pte_t *pte;
-       spinlock_t *ptl;
-
-       if (unlikely(PageHuge(page))) {
-               /* when pud is not present, pte will be NULL */
-               pte = huge_pte_offset(mm, address);
-               if (!pte)
-                       return NULL;
-
-               ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
-               goto check;
-       }
-
-       pmd = mm_find_pmd(mm, address);
-       if (!pmd)
-               return NULL;
-
-       pte = pte_offset_map(pmd, address);
-       /* Make a quick check before getting the lock */
-       if (!sync && !pte_present(*pte)) {
-               pte_unmap(pte);
-               return NULL;
-       }
-
-       ptl = pte_lockptr(mm, pmd);
-check:
-       spin_lock(ptl);
-       if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
-               *ptlp = ptl;
-               return pte;
-       }
-       pte_unmap_unlock(pte, ptl);
-       return NULL;
-}
-
-/**
- * page_mapped_in_vma - check whether a page is really mapped in a VMA
- * @page: the page to test
- * @vma: the VMA to test
- *
- * Returns 1 if the page is mapped into the page tables of the VMA, 0
- * if the page is not mapped into the page tables of this VMA.  Only
- * valid for normal file or anonymous VMAs.
- */
-int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
-{
-       unsigned long address;
-       pte_t *pte;
-       spinlock_t *ptl;
-
-       address = __vma_address(page, vma);
-       if (unlikely(address < vma->vm_start || address >= vma->vm_end))
-               return 0;
-       pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
-       if (!pte)                       /* the page is not in this mm */
-               return 0;
-       pte_unmap_unlock(pte, ptl);
-
-       return 1;
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-/*
- * Check that @page is mapped at @address into @mm. In contrast to
- * page_check_address(), this function can handle transparent huge pages.
- *
- * On success returns true with pte mapped and locked. For PMD-mapped
- * transparent huge pages *@ptep is set to NULL.
- */
-bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
-                                 unsigned long address, pmd_t **pmdp,
-                                 pte_t **ptep, spinlock_t **ptlp)
-{
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-       spinlock_t *ptl;
-
-       if (unlikely(PageHuge(page))) {
-               /* when pud is not present, pte will be NULL */
-               pte = huge_pte_offset(mm, address);
-               if (!pte)
-                       return false;
-
-               ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
-               pmd = NULL;
-               goto check_pte;
-       }
-
-       pgd = pgd_offset(mm, address);
-       if (!pgd_present(*pgd))
-               return false;
-       pud = pud_offset(pgd, address);
-       if (!pud_present(*pud))
-               return false;
-       pmd = pmd_offset(pud, address);
-
-       if (pmd_trans_huge(*pmd)) {
-               ptl = pmd_lock(mm, pmd);
-               if (!pmd_present(*pmd))
-                       goto unlock_pmd;
-               if (unlikely(!pmd_trans_huge(*pmd))) {
-                       spin_unlock(ptl);
-                       goto map_pte;
-               }
-
-               if (pmd_page(*pmd) != page)
-                       goto unlock_pmd;
-
-               pte = NULL;
-               goto found;
-unlock_pmd:
-               spin_unlock(ptl);
-               return false;
-       } else {
-               pmd_t pmde = *pmd;
-
-               barrier();
-               if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-                       return false;
-       }
-map_pte:
-       pte = pte_offset_map(pmd, address);
-       if (!pte_present(*pte)) {
-               pte_unmap(pte);
-               return false;
-       }
-
-       ptl = pte_lockptr(mm, pmd);
-check_pte:
-       spin_lock(ptl);
-
-       if (!pte_present(*pte)) {
-               pte_unmap_unlock(pte, ptl);
-               return false;
-       }
-
-       /* THP can be referenced by any subpage */
-       if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
-               pte_unmap_unlock(pte, ptl);
-               return false;
-       }
-found:
-       *ptep = pte;
-       *pmdp = pmd;
-       *ptlp = ptl;
-       return true;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
 struct page_referenced_arg {
        int mapcount;
        int referenced;
@@ -886,45 +720,48 @@ struct page_referenced_arg {
 static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                        unsigned long address, void *arg)
 {
-       struct mm_struct *mm = vma->vm_mm;
        struct page_referenced_arg *pra = arg;
-       pmd_t *pmd;
-       pte_t *pte;
-       spinlock_t *ptl;
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .address = address,
+       };
        int referenced = 0;
 
-       if (!page_check_address_transhuge(page, mm, address, &pmd, &pte, &ptl))
-               return SWAP_AGAIN;
+       while (page_vma_mapped_walk(&pvmw)) {
+               address = pvmw.address;
 
-       if (vma->vm_flags & VM_LOCKED) {
-               if (pte)
-                       pte_unmap(pte);
-               spin_unlock(ptl);
-               pra->vm_flags |= VM_LOCKED;
-               return SWAP_FAIL; /* To break the loop */
-       }
+               if (vma->vm_flags & VM_LOCKED) {
+                       page_vma_mapped_walk_done(&pvmw);
+                       pra->vm_flags |= VM_LOCKED;
+                       return SWAP_FAIL; /* To break the loop */
+               }
 
-       if (pte) {
-               if (ptep_clear_flush_young_notify(vma, address, pte)) {
-                       /*
-                        * Don't treat a reference through a sequentially read
-                        * mapping as such.  If the page has been used in
-                        * another mapping, we will catch it; if this other
-                        * mapping is already gone, the unmap path will have
-                        * set PG_referenced or activated the page.
-                        */
-                       if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+               if (pvmw.pte) {
+                       if (ptep_clear_flush_young_notify(vma, address,
+                                               pvmw.pte)) {
+                               /*
+                                * Don't treat a reference through
+                                * a sequentially read mapping as such.
+                                * If the page has been used in another mapping,
+                                * we will catch it; if this other mapping is
+                                * already gone, the unmap path will have set
+                                * PG_referenced or activated the page.
+                                */
+                               if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+                                       referenced++;
+                       }
+               } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+                       if (pmdp_clear_flush_young_notify(vma, address,
+                                               pvmw.pmd))
                                referenced++;
+               } else {
+                       /* unexpected pmd-mapped page? */
+                       WARN_ON_ONCE(1);
                }
-               pte_unmap(pte);
-       } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-               if (pmdp_clear_flush_young_notify(vma, address, pmd))
-                       referenced++;
-       } else {
-               /* unexpected pmd-mapped page? */
-               WARN_ON_ONCE(1);
+
+               pra->mapcount--;
        }
-       spin_unlock(ptl);
 
        if (referenced)
                clear_page_idle(page);
@@ -936,7 +773,6 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                pra->vm_flags |= vma->vm_flags;
        }
 
-       pra->mapcount--;
        if (!pra->mapcount)
                return SWAP_SUCCESS; /* To break the loop */
 
@@ -1015,34 +851,56 @@ int page_referenced(struct page *page,
 static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
                            unsigned long address, void *arg)
 {
-       struct mm_struct *mm = vma->vm_mm;
-       pte_t *pte;
-       spinlock_t *ptl;
-       int ret = 0;
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .address = address,
+               .flags = PVMW_SYNC,
+       };
        int *cleaned = arg;
 
-       pte = page_check_address(page, mm, address, &ptl, 1);
-       if (!pte)
-               goto out;
-
-       if (pte_dirty(*pte) || pte_write(*pte)) {
-               pte_t entry;
+       while (page_vma_mapped_walk(&pvmw)) {
+               int ret = 0;
+               address = pvmw.address;
+               if (pvmw.pte) {
+                       pte_t entry;
+                       pte_t *pte = pvmw.pte;
+
+                       if (!pte_dirty(*pte) && !pte_write(*pte))
+                               continue;
+
+                       flush_cache_page(vma, address, pte_pfn(*pte));
+                       entry = ptep_clear_flush(vma, address, pte);
+                       entry = pte_wrprotect(entry);
+                       entry = pte_mkclean(entry);
+                       set_pte_at(vma->vm_mm, address, pte, entry);
+                       ret = 1;
+               } else {
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+                       pmd_t *pmd = pvmw.pmd;
+                       pmd_t entry;
+
+                       if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
+                               continue;
+
+                       flush_cache_page(vma, address, page_to_pfn(page));
+                       entry = pmdp_huge_clear_flush(vma, address, pmd);
+                       entry = pmd_wrprotect(entry);
+                       entry = pmd_mkclean(entry);
+                       set_pmd_at(vma->vm_mm, address, pmd, entry);
+                       ret = 1;
+#else
+                       /* unexpected pmd-mapped page? */
+                       WARN_ON_ONCE(1);
+#endif
+               }
 
-               flush_cache_page(vma, address, pte_pfn(*pte));
-               entry = ptep_clear_flush(vma, address, pte);
-               entry = pte_wrprotect(entry);
-               entry = pte_mkclean(entry);
-               set_pte_at(mm, address, pte, entry);
-               ret = 1;
+               if (ret) {
+                       mmu_notifier_invalidate_page(vma->vm_mm, address);
+                       (*cleaned)++;
+               }
        }
 
-       pte_unmap_unlock(pte, ptl);
-
-       if (ret) {
-               mmu_notifier_invalidate_page(mm, address);
-               (*cleaned)++;
-       }
-out:
        return SWAP_AGAIN;
 }
 
@@ -1435,155 +1293,163 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                     unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
-       pte_t *pte;
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .address = address,
+       };
        pte_t pteval;
-       spinlock_t *ptl;
+       struct page *subpage;
        int ret = SWAP_AGAIN;
        struct rmap_private *rp = arg;
        enum ttu_flags flags = rp->flags;
 
        /* munlock has nothing to gain from examining un-locked vmas */
        if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
-               goto out;
+               return SWAP_AGAIN;
 
        if (flags & TTU_SPLIT_HUGE_PMD) {
                split_huge_pmd_address(vma, address,
                                flags & TTU_MIGRATION, page);
-               /* check if we have anything to do after split */
-               if (page_mapcount(page) == 0)
-                       goto out;
        }
 
-       pte = page_check_address(page, mm, address, &ptl,
-                                PageTransCompound(page));
-       if (!pte)
-               goto out;
+       while (page_vma_mapped_walk(&pvmw)) {
+               subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+               address = pvmw.address;
 
-       /*
-        * If the page is mlock()d, we cannot swap it out.
-        * If it's recently referenced (perhaps page_referenced
-        * skipped over this mm) then we should reactivate it.
-        */
-       if (!(flags & TTU_IGNORE_MLOCK)) {
-               if (vma->vm_flags & VM_LOCKED) {
-                       /* PTE-mapped THP are never mlocked */
-                       if (!PageTransCompound(page)) {
-                               /*
-                                * Holding pte lock, we do *not* need
-                                * mmap_sem here
-                                */
-                               mlock_vma_page(page);
-                       }
-                       ret = SWAP_MLOCK;
-                       goto out_unmap;
-               }
-               if (flags & TTU_MUNLOCK)
-                       goto out_unmap;
-       }
-       if (!(flags & TTU_IGNORE_ACCESS)) {
-               if (ptep_clear_flush_young_notify(vma, address, pte)) {
-                       ret = SWAP_FAIL;
-                       goto out_unmap;
-               }
-       }
+               /* Unexpected PMD-mapped THP? */
+               VM_BUG_ON_PAGE(!pvmw.pte, page);
 
-       /* Nuke the page table entry. */
-       flush_cache_page(vma, address, page_to_pfn(page));
-       if (should_defer_flush(mm, flags)) {
                /*
-                * We clear the PTE but do not flush so potentially a remote
-                * CPU could still be writing to the page. If the entry was
-                * previously clean then the architecture must guarantee that
-                * a clear->dirty transition on a cached TLB entry is written
-                * through and traps if the PTE is unmapped.
+                * If the page is mlock()d, we cannot swap it out.
+                * If it's recently referenced (perhaps page_referenced
+                * skipped over this mm) then we should reactivate it.
                 */
-               pteval = ptep_get_and_clear(mm, address, pte);
-
-               set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval));
-       } else {
-               pteval = ptep_clear_flush(vma, address, pte);
-       }
+               if (!(flags & TTU_IGNORE_MLOCK)) {
+                       if (vma->vm_flags & VM_LOCKED) {
+                               /* PTE-mapped THP are never mlocked */
+                               if (!PageTransCompound(page)) {
+                                       /*
+                                        * Holding pte lock, we do *not* need
+                                        * mmap_sem here
+                                        */
+                                       mlock_vma_page(page);
+                               }
+                               ret = SWAP_MLOCK;
+                               page_vma_mapped_walk_done(&pvmw);
+                               break;
+                       }
+                       if (flags & TTU_MUNLOCK)
+                               continue;
+               }
 
-       /* Move the dirty bit to the physical page now the pte is gone. */
-       if (pte_dirty(pteval))
-               set_page_dirty(page);
+               if (!(flags & TTU_IGNORE_ACCESS)) {
+                       if (ptep_clear_flush_young_notify(vma, address,
+                                               pvmw.pte)) {
+                               ret = SWAP_FAIL;
+                               page_vma_mapped_walk_done(&pvmw);
+                               break;
+                       }
+               }
 
-       /* Update high watermark before we lower rss */
-       update_hiwater_rss(mm);
+               /* Nuke the page table entry. */
+               flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+               if (should_defer_flush(mm, flags)) {
+                       /*
+                        * We clear the PTE but do not flush so potentially
+                        * a remote CPU could still be writing to the page.
+                        * If the entry was previously clean then the
+                        * architecture must guarantee that a clear->dirty
+                        * transition on a cached TLB entry is written through
+                        * and traps if the PTE is unmapped.
+                        */
+                       pteval = ptep_get_and_clear(mm, address, pvmw.pte);
 
-       if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
-               if (PageHuge(page)) {
-                       hugetlb_count_sub(1 << compound_order(page), mm);
+                       set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
                } else {
-                       dec_mm_counter(mm, mm_counter(page));
+                       pteval = ptep_clear_flush(vma, address, pvmw.pte);
                }
-               set_pte_at(mm, address, pte,
-                          swp_entry_to_pte(make_hwpoison_entry(page)));
-       } else if (pte_unused(pteval)) {
-               /*
-                * The guest indicated that the page content is of no
-                * interest anymore. Simply discard the pte, vmscan
-                * will take care of the rest.
-                */
-               dec_mm_counter(mm, mm_counter(page));
-       } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) {
-               swp_entry_t entry;
-               pte_t swp_pte;
-               /*
-                * Store the pfn of the page in a special migration
-                * pte. do_swap_page() will wait until the migration
-                * pte is removed and then restart fault handling.
-                */
-               entry = make_migration_entry(page, pte_write(pteval));
-               swp_pte = swp_entry_to_pte(entry);
-               if (pte_soft_dirty(pteval))
-                       swp_pte = pte_swp_mksoft_dirty(swp_pte);
-               set_pte_at(mm, address, pte, swp_pte);
-       } else if (PageAnon(page)) {
-               swp_entry_t entry = { .val = page_private(page) };
-               pte_t swp_pte;
-               /*
-                * Store the swap location in the pte.
-                * See handle_pte_fault() ...
-                */
-               VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 
-               if (!PageDirty(page) && (flags & TTU_LZFREE)) {
-                       /* It's a freeable page by MADV_FREE */
-                       dec_mm_counter(mm, MM_ANONPAGES);
-                       rp->lazyfreed++;
-                       goto discard;
-               }
+               /* Move the dirty bit to the page. Now the pte is gone. */
+               if (pte_dirty(pteval))
+                       set_page_dirty(page);
 
-               if (swap_duplicate(entry) < 0) {
-                       set_pte_at(mm, address, pte, pteval);
-                       ret = SWAP_FAIL;
-                       goto out_unmap;
-               }
-               if (list_empty(&mm->mmlist)) {
-                       spin_lock(&mmlist_lock);
-                       if (list_empty(&mm->mmlist))
-                               list_add(&mm->mmlist, &init_mm.mmlist);
-                       spin_unlock(&mmlist_lock);
-               }
-               dec_mm_counter(mm, MM_ANONPAGES);
-               inc_mm_counter(mm, MM_SWAPENTS);
-               swp_pte = swp_entry_to_pte(entry);
-               if (pte_soft_dirty(pteval))
-                       swp_pte = pte_swp_mksoft_dirty(swp_pte);
-               set_pte_at(mm, address, pte, swp_pte);
-       } else
-               dec_mm_counter(mm, mm_counter_file(page));
+               /* Update high watermark before we lower rss */
+               update_hiwater_rss(mm);
 
-discard:
-       page_remove_rmap(page, PageHuge(page));
-       put_page(page);
+               if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+                       if (PageHuge(page)) {
+                               int nr = 1 << compound_order(page);
+                               hugetlb_count_sub(nr, mm);
+                       } else {
+                               dec_mm_counter(mm, mm_counter(page));
+                       }
+
+                       pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+                       set_pte_at(mm, address, pvmw.pte, pteval);
+               } else if (pte_unused(pteval)) {
+                       /*
+                        * The guest indicated that the page content is of no
+                        * interest anymore. Simply discard the pte, vmscan
+                        * will take care of the rest.
+                        */
+                       dec_mm_counter(mm, mm_counter(page));
+               } else if (IS_ENABLED(CONFIG_MIGRATION) &&
+                               (flags & TTU_MIGRATION)) {
+                       swp_entry_t entry;
+                       pte_t swp_pte;
+                       /*
+                        * Store the pfn of the page in a special migration
+                        * pte. do_swap_page() will wait until the migration
+                        * pte is removed and then restart fault handling.
+                        */
+                       entry = make_migration_entry(subpage,
+                                       pte_write(pteval));
+                       swp_pte = swp_entry_to_pte(entry);
+                       if (pte_soft_dirty(pteval))
+                               swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       set_pte_at(mm, address, pvmw.pte, swp_pte);
+               } else if (PageAnon(page)) {
+                       swp_entry_t entry = { .val = page_private(subpage) };
+                       pte_t swp_pte;
+                       /*
+                        * Store the swap location in the pte.
+                        * See handle_pte_fault() ...
+                        */
+                       VM_BUG_ON_PAGE(!PageSwapCache(page), page);
+
+                       if (!PageDirty(page) && (flags & TTU_LZFREE)) {
+                               /* It's a freeable page by MADV_FREE */
+                               dec_mm_counter(mm, MM_ANONPAGES);
+                               rp->lazyfreed++;
+                               goto discard;
+                       }
 
-out_unmap:
-       pte_unmap_unlock(pte, ptl);
-       if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
+                       if (swap_duplicate(entry) < 0) {
+                               set_pte_at(mm, address, pvmw.pte, pteval);
+                               ret = SWAP_FAIL;
+                               page_vma_mapped_walk_done(&pvmw);
+                               break;
+                       }
+                       if (list_empty(&mm->mmlist)) {
+                               spin_lock(&mmlist_lock);
+                               if (list_empty(&mm->mmlist))
+                                       list_add(&mm->mmlist, &init_mm.mmlist);
+                               spin_unlock(&mmlist_lock);
+                       }
+                       dec_mm_counter(mm, MM_ANONPAGES);
+                       inc_mm_counter(mm, MM_SWAPENTS);
+                       swp_pte = swp_entry_to_pte(entry);
+                       if (pte_soft_dirty(pteval))
+                               swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       set_pte_at(mm, address, pvmw.pte, swp_pte);
+               } else
+                       dec_mm_counter(mm, mm_counter_file(page));
+discard:
+               page_remove_rmap(subpage, PageHuge(page));
+               put_page(page);
                mmu_notifier_invalidate_page(mm, address);
-out:
+       }
        return ret;
 }
 
@@ -1608,7 +1474,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
 
 static int page_mapcount_is_zero(struct page *page)
 {
-       return !page_mapcount(page);
+       return !total_mapcount(page);
 }
 
 /**
@@ -1755,7 +1621,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
                bool locked)
 {
        struct anon_vma *anon_vma;
-       pgoff_t pgoff;
+       pgoff_t pgoff_start, pgoff_end;
        struct anon_vma_chain *avc;
        int ret = SWAP_AGAIN;
 
@@ -1769,8 +1635,10 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
        if (!anon_vma)
                return ret;
 
-       pgoff = page_to_pgoff(page);
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
+       pgoff_start = page_to_pgoff(page);
+       pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
+       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
+                       pgoff_start, pgoff_end) {
                struct vm_area_struct *vma = avc->vma;
                unsigned long address = vma_address(page, vma);
 
@@ -1808,7 +1676,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
                bool locked)
 {
        struct address_space *mapping = page_mapping(page);
-       pgoff_t pgoff;
+       pgoff_t pgoff_start, pgoff_end;
        struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
 
@@ -1823,10 +1691,12 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
        if (!mapping)
                return ret;
 
-       pgoff = page_to_pgoff(page);
+       pgoff_start = page_to_pgoff(page);
+       pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
        if (!locked)
                i_mmap_lock_read(mapping);
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+       vma_interval_tree_foreach(vma, &mapping->i_mmap,
+                       pgoff_start, pgoff_end) {
                unsigned long address = vma_address(page, vma);
 
                cond_resched();
diff --git a/mm/rodata_test.c b/mm/rodata_test.c
new file mode 100644 (file)
index 0000000..0fd2167
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * rodata_test.c: functional test for mark_rodata_ro function
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/uaccess.h>
+#include <asm/sections.h>
+
+const int rodata_test_data = 0xC3;
+EXPORT_SYMBOL_GPL(rodata_test_data);
+
+void rodata_test(void)
+{
+       unsigned long start, end;
+       int zero = 0;
+
+       /* test 1: read the value */
+       /* If this test fails, some previous testrun has clobbered the state */
+       if (!rodata_test_data) {
+               pr_err("rodata_test: test 1 fails (start data)\n");
+               return;
+       }
+
+       /* test 2: write to the variable; this should fault */
+       if (!probe_kernel_write((void *)&rodata_test_data,
+                                               (void *)&zero, sizeof(zero))) {
+               pr_err("rodata_test: test data was not read only\n");
+               return;
+       }
+
+       /* test 3: check the value hasn't changed */
+       if (rodata_test_data == zero) {
+               pr_err("rodata_test: test data was changed\n");
+               return;
+       }
+
+       /* test 4: check if the rodata section is PAGE_SIZE aligned */
+       start = (unsigned long)__start_rodata;
+       end = (unsigned long)__end_rodata;
+       if (start & (PAGE_SIZE - 1)) {
+               pr_err("rodata_test: start of .rodata is not page size aligned\n");
+               return;
+       }
+       if (end & (PAGE_SIZE - 1)) {
+               pr_err("rodata_test: end of .rodata is not page size aligned\n");
+               return;
+       }
+
+       pr_info("rodata_test: all tests were successful\n");
+}
index 9c6d22ff44e26280505729463acaa96e8bf88d6a..a26649a6633fbfd8e81296d27bdfe8c812079cf1 100644 (file)
@@ -1908,8 +1908,9 @@ static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync
        return ret;
 }
 
-static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int shmem_fault(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        struct inode *inode = file_inode(vma->vm_file);
        gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
        enum sgp_type sgp;
@@ -2330,7 +2331,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
        pgoff_t index = pos >> PAGE_SHIFT;
 
        /* i_mutex is held by caller */
-       if (unlikely(info->seals)) {
+       if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
                if (info->seals & F_SEAL_WRITE)
                        return -EPERM;
                if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
index 23ff74e618388508e359a5317d93ae284778f779..09d0e849b07f47d82f5d9a5cda4862517d297cb2 100644 (file)
@@ -528,6 +528,9 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 
 static int shutdown_cache(struct kmem_cache *s)
 {
+       /* free asan quarantined objects */
+       kasan_cache_shutdown(s);
+
        if (__kmem_cache_shutdown(s) != 0)
                return -EBUSY;
 
@@ -816,7 +819,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
        get_online_cpus();
        get_online_mems();
 
-       kasan_cache_destroy(s);
        mutex_lock(&slab_mutex);
 
        s->refcount--;
index aabf2e90fe32d14b73554c314db20f6d6a554e12..c4910f14f9579ef1d8b165355f9294715968bf2d 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -209,9 +209,10 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
 {
        int *pgmoved = arg;
 
-       if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-               enum lru_list lru = page_lru_base_type(page);
-               list_move_tail(&page->lru, &lruvec->lists[lru]);
+       if (PageLRU(page) && !PageUnevictable(page)) {
+               del_page_from_lru_list(page, lruvec, page_lru(page));
+               ClearPageActive(page);
+               add_page_to_lru_list_tail(page, lruvec, page_lru(page));
                (*pgmoved)++;
        }
 }
@@ -235,7 +236,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
  */
 void rotate_reclaimable_page(struct page *page)
 {
-       if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
+       if (!PageLocked(page) && !PageDirty(page) &&
            !PageUnevictable(page) && PageLRU(page)) {
                struct pagevec *pvec;
                unsigned long flags;
index 2cac12cc9abe2dbbd7e5e385de91cf93a4ab61d1..fadc6a1c0da0b28e45b786ae5466ef60c3ec88f1 100644 (file)
@@ -1671,7 +1671,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
         * that.
         */
        start_mm = &init_mm;
-       atomic_inc(&init_mm.mm_users);
+       mmget(&init_mm);
 
        /*
         * Keep on scanning until all entries have gone.  Usually,
@@ -1720,7 +1720,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
                if (atomic_read(&start_mm->mm_users) == 1) {
                        mmput(start_mm);
                        start_mm = &init_mm;
-                       atomic_inc(&init_mm.mm_users);
+                       mmget(&init_mm);
                }
 
                /*
@@ -1757,13 +1757,13 @@ int try_to_unuse(unsigned int type, bool frontswap,
                        struct mm_struct *prev_mm = start_mm;
                        struct mm_struct *mm;
 
-                       atomic_inc(&new_start_mm->mm_users);
-                       atomic_inc(&prev_mm->mm_users);
+                       mmget(new_start_mm);
+                       mmget(prev_mm);
                        spin_lock(&mmlist_lock);
                        while (swap_count(*swap_map) && !retval &&
                                        (p = p->next) != &start_mm->mmlist) {
                                mm = list_entry(p, struct mm_struct, mmlist);
-                               if (!atomic_inc_not_zero(&mm->mm_users))
+                               if (!mmget_not_zero(mm))
                                        continue;
                                spin_unlock(&mmlist_lock);
                                mmput(prev_mm);
@@ -1781,7 +1781,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
 
                                if (set_start_mm && *swap_map < swcount) {
                                        mmput(new_start_mm);
-                                       atomic_inc(&mm->mm_users);
+                                       mmget(mm);
                                        new_start_mm = mm;
                                        set_start_mm = 0;
                                }
index dd7b24e083c5b1f76851eb0b5e3359dde92d910e..6263affdef8866135f28256b5b1f5e598515d204 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/buffer_head.h> /* grr. try_to_release_page,
                                   do_invalidatepage */
+#include <linux/shmem_fs.h>
 #include <linux/cleancache.h>
 #include <linux/rmap.h>
 #include "internal.h"
@@ -785,7 +786,7 @@ EXPORT_SYMBOL(truncate_setsize);
  */
 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
 {
-       int bsize = 1 << inode->i_blkbits;
+       int bsize = i_blocksize(inode);
        loff_t rounded_from;
        struct page *page;
        pgoff_t index;
index 1e5c2f94e8a3299a47c3ba5bda9e6f59ea8afbf0..9f0ad2a4f10244c1f8aed11128cde5e3cc9eb0bb 100644 (file)
@@ -197,22 +197,25 @@ retry:
         * retry, dst_vma will be set to NULL and we must lookup again.
         */
        if (!dst_vma) {
-               err = -EINVAL;
+               err = -ENOENT;
                dst_vma = find_vma(dst_mm, dst_start);
                if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
                        goto out_unlock;
-
-               if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
-                       goto out_unlock;
-
                /*
-                * Make sure the remaining dst range is both valid and
-                * fully within a single existing vma.
+                * Only allow __mcopy_atomic_hugetlb on userfaultfd
+                * registered ranges.
                 */
+               if (!dst_vma->vm_userfaultfd_ctx.ctx)
+                       goto out_unlock;
+
                if (dst_start < dst_vma->vm_start ||
                    dst_start + len > dst_vma->vm_end)
                        goto out_unlock;
 
+               err = -EINVAL;
+               if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
+                       goto out_unlock;
+
                vm_shared = dst_vma->vm_flags & VM_SHARED;
        }
 
@@ -220,12 +223,6 @@ retry:
                    (len - copied) & (vma_hpagesize - 1)))
                goto out_unlock;
 
-       /*
-        * Only allow __mcopy_atomic_hugetlb on userfaultfd registered ranges.
-        */
-       if (!dst_vma->vm_userfaultfd_ctx.ctx)
-               goto out_unlock;
-
        /*
         * If not shared, ensure the dst_vma has a anon_vma.
         */
@@ -404,22 +401,35 @@ retry:
         * Make sure the vma is not shared, that the dst range is
         * both valid and fully within a single existing vma.
         */
-       err = -EINVAL;
+       err = -ENOENT;
        dst_vma = find_vma(dst_mm, dst_start);
        if (!dst_vma)
                goto out_unlock;
        /*
-        * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
-        * it will overwrite vm_ops, so vma_is_anonymous must return false.
+        * Be strict and only allow __mcopy_atomic on userfaultfd
+        * registered ranges to prevent userland errors going
+        * unnoticed. As far as the VM consistency is concerned, it
+        * would be perfectly safe to remove this check, but there's
+        * no useful usage for __mcopy_atomic ouside of userfaultfd
+        * registered ranges. This is after all why these are ioctls
+        * belonging to the userfaultfd and not syscalls.
         */
-       if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
-           dst_vma->vm_flags & VM_SHARED))
+       if (!dst_vma->vm_userfaultfd_ctx.ctx)
                goto out_unlock;
 
        if (dst_start < dst_vma->vm_start ||
            dst_start + len > dst_vma->vm_end)
                goto out_unlock;
 
+       err = -EINVAL;
+       /*
+        * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
+        * it will overwrite vm_ops, so vma_is_anonymous must return false.
+        */
+       if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
+           dst_vma->vm_flags & VM_SHARED))
+               goto out_unlock;
+
        /*
         * If this is a HUGETLB vma, pass off to appropriate routine
         */
@@ -427,18 +437,6 @@ retry:
                return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
                                                src_start, len, zeropage);
 
-       /*
-        * Be strict and only allow __mcopy_atomic on userfaultfd
-        * registered ranges to prevent userland errors going
-        * unnoticed. As far as the VM consistency is concerned, it
-        * would be perfectly safe to remove this check, but there's
-        * no useful usage for __mcopy_atomic ouside of userfaultfd
-        * registered ranges. This is after all why these are ioctls
-        * belonging to the userfaultfd and not syscalls.
-        */
-       if (!dst_vma->vm_userfaultfd_ctx.ctx)
-               goto out_unlock;
-
        if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
                goto out_unlock;
 
index 3cb2164f40993818cdf141e3414e89c2a734ec90..b8f538863b5a19a83332f962ab3766004b0ef4bb 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -11,6 +11,7 @@
 #include <linux/mman.h>
 #include <linux/hugetlb.h>
 #include <linux/vmalloc.h>
+#include <linux/userfaultfd_k.h>
 
 #include <asm/sections.h>
 #include <linux/uaccess.h>
@@ -297,14 +298,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
        unsigned long ret;
        struct mm_struct *mm = current->mm;
        unsigned long populate;
+       LIST_HEAD(uf);
 
        ret = security_mmap_file(file, prot, flag);
        if (!ret) {
                if (down_write_killable(&mm->mmap_sem))
                        return -EINTR;
                ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
-                                   &populate);
+                                   &populate, &uf);
                up_write(&mm->mmap_sem);
+               userfaultfd_unmap_complete(mm, &uf);
                if (populate)
                        mm_populate(ret, populate);
        }
index d89034a393f276457118b0696613c92c83ae8fcc..be93949b4885991e81a5e0aa0e5ba4d919579f69 100644 (file)
@@ -1642,6 +1642,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        for (i = 0; i < area->nr_pages; i++) {
                struct page *page;
 
+               if (fatal_signal_pending(current)) {
+                       area->nr_pages = i;
+                       goto fail;
+               }
+
                if (node == NUMA_NO_NODE)
                        page = alloc_page(alloc_mask);
                else
@@ -2654,7 +2659,7 @@ static int s_show(struct seq_file *m, void *p)
                seq_printf(m, " pages=%d", v->nr_pages);
 
        if (v->phys_addr)
-               seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
+               seq_printf(m, " phys=%pa", &v->phys_addr);
 
        if (v->flags & VM_IOREMAP)
                seq_puts(m, " ioremap");
index 149fdf6c5c56f927f3613538c61b3c5831c80af9..6063581f705c48b97a78a087d8c67127af9c8d68 100644 (file)
@@ -112,8 +112,15 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
                                                    unsigned long reclaimed)
 {
        unsigned long scale = scanned + reclaimed;
-       unsigned long pressure;
+       unsigned long pressure = 0;
 
+       /*
+        * reclaimed can be greater than scanned in cases
+        * like THP, where the scanned is 1 and reclaimed
+        * could be 512
+        */
+       if (reclaimed >= scanned)
+               goto out;
        /*
         * We calculate the ratio (in percents) of how many pages were
         * scanned vs. reclaimed in a given time frame (window). Note that
@@ -124,6 +131,7 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
        pressure = scale - (reclaimed * scale / scanned);
        pressure = pressure * 100 / scale;
 
+out:
        pr_debug("%s: %3lu  (s: %lu  r: %lu)\n", __func__, pressure,
                 scanned, reclaimed);
 
index 7bb23ff229b6677aa3afde74ac48641dfbad7ada..70aa739c6b68beeca628dde8d947023ce4b18ca3 100644 (file)
@@ -87,6 +87,7 @@ struct scan_control {
        /* The highest zone to isolate pages for reclaim from */
        enum zone_type reclaim_idx;
 
+       /* Writepage batching in laptop mode; RECLAIM_WRITE */
        unsigned int may_writepage:1;
 
        /* Can mapped pages be reclaimed? */
@@ -1055,6 +1056,15 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 *    throttling so we could easily OOM just because too many
                 *    pages are in writeback and there is nothing else to
                 *    reclaim. Wait for the writeback to complete.
+                *
+                * In cases 1) and 2) we activate the pages to get them out of
+                * the way while we continue scanning for clean pages on the
+                * inactive list and refilling from the active list. The
+                * observation here is that waiting for disk writes is more
+                * expensive than potentially causing reloads down the line.
+                * Since they're marked for immediate reclaim, they won't put
+                * memory pressure on the cache working set any longer than it
+                * takes to write them to disk.
                 */
                if (PageWriteback(page)) {
                        /* Case 1 above */
@@ -1062,7 +1072,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                            PageReclaim(page) &&
                            test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
                                nr_immediate++;
-                               goto keep_locked;
+                               goto activate_locked;
 
                        /* Case 2 above */
                        } else if (sane_reclaim(sc) ||
@@ -1080,7 +1090,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                 */
                                SetPageReclaim(page);
                                nr_writeback++;
-                               goto keep_locked;
+                               goto activate_locked;
 
                        /* Case 3 above */
                        } else {
@@ -1152,13 +1162,18 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
                if (PageDirty(page)) {
                        /*
-                        * Only kswapd can writeback filesystem pages to
-                        * avoid risk of stack overflow but only writeback
-                        * if many dirty pages have been encountered.
+                        * Only kswapd can writeback filesystem pages
+                        * to avoid risk of stack overflow. But avoid
+                        * injecting inefficient single-page IO into
+                        * flusher writeback as much as possible: only
+                        * write pages when we've encountered many
+                        * dirty pages, and when we've already scanned
+                        * the rest of the LRU for clean pages and see
+                        * the same dirty pages again (PageReclaim).
                         */
                        if (page_is_file_cache(page) &&
-                                       (!current_is_kswapd() ||
-                                        !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
+                           (!current_is_kswapd() || !PageReclaim(page) ||
+                            !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
                                /*
                                 * Immediately reclaim when written back.
                                 * Similar in principal to deactivate_page()
@@ -1168,7 +1183,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                inc_node_page_state(page, NR_VMSCAN_IMMEDIATE);
                                SetPageReclaim(page);
 
-                               goto keep_locked;
+                               goto activate_locked;
                        }
 
                        if (references == PAGEREF_RECLAIM_CLEAN)
@@ -1373,13 +1388,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
         * wants to isolate pages it will be able to operate on without
         * blocking - clean pages for the most part.
         *
-        * ISOLATE_CLEAN means that only clean pages should be isolated. This
-        * is used by reclaim when it is cannot write to backing storage
-        *
         * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
         * that it is possible to migrate without blocking
         */
-       if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+       if (mode & ISOLATE_ASYNC_MIGRATE) {
                /* All the caller can do on PageWriteback is block */
                if (PageWriteback(page))
                        return ret;
@@ -1387,10 +1399,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
                if (PageDirty(page)) {
                        struct address_space *mapping;
 
-                       /* ISOLATE_CLEAN means only clean pages */
-                       if (mode & ISOLATE_CLEAN)
-                               return ret;
-
                        /*
                         * Only pages without mappings or that have a
                         * ->migratepage callback are possible to migrate
@@ -1731,8 +1739,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
        if (!sc->may_unmap)
                isolate_mode |= ISOLATE_UNMAPPED;
-       if (!sc->may_writepage)
-               isolate_mode |= ISOLATE_CLEAN;
 
        spin_lock_irq(&pgdat->lru_lock);
 
@@ -1806,12 +1812,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
                /*
                 * If dirty pages are scanned that are not queued for IO, it
-                * implies that flushers are not keeping up. In this case, flag
-                * the pgdat PGDAT_DIRTY and kswapd will start writing pages from
-                * reclaim context.
+                * implies that flushers are not doing their job. This can
+                * happen when memory pressure pushes dirty pages to the end of
+                * the LRU before the dirty limits are breached and the dirty
+                * data has expired. It can also happen when the proportion of
+                * dirty pages grows not through writes but through memory
+                * pressure reclaiming all the clean cache. And in some cases,
+                * the flushers simply cannot keep up with the allocation
+                * rate. Nudge the flusher threads in case they are asleep, but
+                * also allow kswapd to start writing pages during reclaim.
                 */
-               if (stat.nr_unqueued_dirty == nr_taken)
+               if (stat.nr_unqueued_dirty == nr_taken) {
+                       wakeup_flusher_threads(0, WB_REASON_VMSCAN);
                        set_bit(PGDAT_DIRTY, &pgdat->flags);
+               }
 
                /*
                 * If kswapd scans pages marked marked for immediate
@@ -1929,8 +1943,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
        if (!sc->may_unmap)
                isolate_mode |= ISOLATE_UNMAPPED;
-       if (!sc->may_writepage)
-               isolate_mode |= ISOLATE_CLEAN;
 
        spin_lock_irq(&pgdat->lru_lock);
 
@@ -2759,8 +2771,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                          struct scan_control *sc)
 {
        int initial_priority = sc->priority;
-       unsigned long total_scanned = 0;
-       unsigned long writeback_threshold;
 retry:
        delayacct_freepages_start();
 
@@ -2773,7 +2783,6 @@ retry:
                sc->nr_scanned = 0;
                shrink_zones(zonelist, sc);
 
-               total_scanned += sc->nr_scanned;
                if (sc->nr_reclaimed >= sc->nr_to_reclaim)
                        break;
 
@@ -2786,20 +2795,6 @@ retry:
                 */
                if (sc->priority < DEF_PRIORITY - 2)
                        sc->may_writepage = 1;
-
-               /*
-                * Try to write back as many pages as we just scanned.  This
-                * tends to cause slow streaming writers to write data to the
-                * disk smoothly, at the dirtying rate, which is nice.   But
-                * that's undesirable in laptop mode, where we *want* lumpy
-                * writeout.  So in laptop mode, write out the whole world.
-                */
-               writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
-               if (total_scanned > writeback_threshold) {
-                       wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
-                                               WB_REASON_TRY_TO_FREE_PAGES);
-                       sc->may_writepage = 1;
-               }
        } while (--sc->priority >= 0);
 
        delayacct_freepages_end();
@@ -3101,6 +3096,7 @@ static bool zone_balanced(struct zone *zone, int order, int classzone_idx)
         */
        clear_bit(PGDAT_CONGESTED, &zone->zone_pgdat->flags);
        clear_bit(PGDAT_DIRTY, &zone->zone_pgdat->flags);
+       clear_bit(PGDAT_WRITEBACK, &zone->zone_pgdat->flags);
 
        return true;
 }
index a67f5796b9951b617334a54adf06869bdc537f47..ac839fca0e76ae3cc5a025684cb1516301922d92 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/memcontrol.h>
 #include <linux/writeback.h>
+#include <linux/shmem_fs.h>
 #include <linux/pagemap.h>
 #include <linux/atomic.h>
 #include <linux/module.h>
@@ -354,10 +355,8 @@ void workingset_update_node(struct radix_tree_node *node, void *private)
         * as node->private_list is protected by &mapping->tree_lock.
         */
        if (node->count && node->count == node->exceptional) {
-               if (list_empty(&node->private_list)) {
-                       node->private_data = mapping;
+               if (list_empty(&node->private_list))
                        list_lru_add(&shadow_nodes, &node->private_list);
-               }
        } else {
                if (!list_empty(&node->private_list))
                        list_lru_del(&shadow_nodes, &node->private_list);
@@ -435,7 +434,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
         */
 
        node = container_of(item, struct radix_tree_node, private_list);
-       mapping = node->private_data;
+       mapping = container_of(node->root, struct address_space, page_tree);
 
        /* Coming from the list, invert the lock order */
        if (!spin_trylock(&mapping->tree_lock)) {
index 207e5ddc87a2c4790c72d9133ad192c747e259f1..8970a2fd3b1a5354fb4bc843292a1c7358eed51c 100644 (file)
 /*****************
  * Structures
 *****************/
+struct z3fold_pool;
+struct z3fold_ops {
+       int (*evict)(struct z3fold_pool *pool, unsigned long handle);
+};
+
+enum buddy {
+       HEADLESS = 0,
+       FIRST,
+       MIDDLE,
+       LAST,
+       BUDDIES_MAX
+};
+
+/*
+ * struct z3fold_header - z3fold page metadata occupying the first chunk of each
+ *                     z3fold page, except for HEADLESS pages
+ * @buddy:     links the z3fold page into the relevant list in the pool
+ * @page_lock:         per-page lock
+ * @refcount:          reference cound for the z3fold page
+ * @first_chunks:      the size of the first buddy in chunks, 0 if free
+ * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
+ * @last_chunks:       the size of the last buddy in chunks, 0 if free
+ * @first_num:         the starting number (for the first handle)
+ */
+struct z3fold_header {
+       struct list_head buddy;
+       spinlock_t page_lock;
+       struct kref refcount;
+       unsigned short first_chunks;
+       unsigned short middle_chunks;
+       unsigned short last_chunks;
+       unsigned short start_middle;
+       unsigned short first_num:2;
+};
+
 /*
  * NCHUNKS_ORDER determines the internal allocation granularity, effectively
  * adjusting internal fragmentation.  It also determines the number of
  * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
- * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk
- * in allocated page is occupied by z3fold header, NCHUNKS will be calculated
- * to 63 which shows the max number of free chunks in z3fold page, also there
- * will be 63 freelists per pool.
+ * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
+ * in the beginning of an allocated page are occupied by z3fold header, so
+ * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
+ * which shows the max number of free chunks in z3fold page, also there will
+ * be 63, or 62, respectively, freelists per pool.
  */
 #define NCHUNKS_ORDER  6
 
 #define CHUNK_SHIFT    (PAGE_SHIFT - NCHUNKS_ORDER)
 #define CHUNK_SIZE     (1 << CHUNK_SHIFT)
-#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
+#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
+#define ZHDR_CHUNKS    (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
+#define TOTAL_CHUNKS   (PAGE_SIZE >> CHUNK_SHIFT)
 #define NCHUNKS                ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
 
 #define BUDDY_MASK     (0x3)
 
-struct z3fold_pool;
-struct z3fold_ops {
-       int (*evict)(struct z3fold_pool *pool, unsigned long handle);
-};
-
 /**
  * struct z3fold_pool - stores metadata for each z3fold pool
  * @lock:      protects all pool fields and first|last_chunk fields of any
@@ -64,8 +97,6 @@ struct z3fold_ops {
  * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
  *             the lists each z3fold page is added to depends on the size of
  *             its free region.
- * @buddied:   list tracking the z3fold pages that contain 3 buddies;
- *             these z3fold pages are full
  * @lru:       list tracking the z3fold pages in LRU order by most recently
  *             added buddy.
  * @pages_nr:  number of z3fold pages in the pool.
@@ -78,49 +109,22 @@ struct z3fold_ops {
 struct z3fold_pool {
        spinlock_t lock;
        struct list_head unbuddied[NCHUNKS];
-       struct list_head buddied;
        struct list_head lru;
-       u64 pages_nr;
+       atomic64_t pages_nr;
        const struct z3fold_ops *ops;
        struct zpool *zpool;
        const struct zpool_ops *zpool_ops;
 };
 
-enum buddy {
-       HEADLESS = 0,
-       FIRST,
-       MIDDLE,
-       LAST,
-       BUDDIES_MAX
-};
-
-/*
- * struct z3fold_header - z3fold page metadata occupying the first chunk of each
- *                     z3fold page, except for HEADLESS pages
- * @buddy:     links the z3fold page into the relevant list in the pool
- * @first_chunks:      the size of the first buddy in chunks, 0 if free
- * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
- * @last_chunks:       the size of the last buddy in chunks, 0 if free
- * @first_num:         the starting number (for the first handle)
- */
-struct z3fold_header {
-       struct list_head buddy;
-       unsigned short first_chunks;
-       unsigned short middle_chunks;
-       unsigned short last_chunks;
-       unsigned short start_middle;
-       unsigned short first_num:2;
-};
-
 /*
  * Internal z3fold page flags
  */
 enum z3fold_page_flags {
-       UNDER_RECLAIM = 0,
-       PAGE_HEADLESS,
+       PAGE_HEADLESS = 0,
        MIDDLE_CHUNK_MAPPED,
 };
 
+
 /*****************
  * Helpers
 *****************/
@@ -140,10 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
        struct z3fold_header *zhdr = page_address(page);
 
        INIT_LIST_HEAD(&page->lru);
-       clear_bit(UNDER_RECLAIM, &page->private);
        clear_bit(PAGE_HEADLESS, &page->private);
        clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 
+       spin_lock_init(&zhdr->page_lock);
+       kref_init(&zhdr->refcount);
        zhdr->first_chunks = 0;
        zhdr->middle_chunks = 0;
        zhdr->last_chunks = 0;
@@ -154,9 +159,36 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
 }
 
 /* Resets the struct page fields and frees the page */
-static void free_z3fold_page(struct z3fold_header *zhdr)
+static void free_z3fold_page(struct page *page)
+{
+       __free_page(page);
+}
+
+static void release_z3fold_page(struct kref *ref)
+{
+       struct z3fold_header *zhdr;
+       struct page *page;
+
+       zhdr = container_of(ref, struct z3fold_header, refcount);
+       page = virt_to_page(zhdr);
+
+       if (!list_empty(&zhdr->buddy))
+               list_del(&zhdr->buddy);
+       if (!list_empty(&page->lru))
+               list_del(&page->lru);
+       free_z3fold_page(page);
+}
+
+/* Lock a z3fold page */
+static inline void z3fold_page_lock(struct z3fold_header *zhdr)
+{
+       spin_lock(&zhdr->page_lock);
+}
+
+/* Unlock a z3fold page */
+static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
 {
-       __free_page(virt_to_page(zhdr));
+       spin_unlock(&zhdr->page_lock);
 }
 
 /*
@@ -204,9 +236,10 @@ static int num_free_chunks(struct z3fold_header *zhdr)
         */
        if (zhdr->middle_chunks != 0) {
                int nfree_before = zhdr->first_chunks ?
-                       0 : zhdr->start_middle - 1;
+                       0 : zhdr->start_middle - ZHDR_CHUNKS;
                int nfree_after = zhdr->last_chunks ?
-                       0 : NCHUNKS - zhdr->start_middle - zhdr->middle_chunks;
+                       0 : TOTAL_CHUNKS -
+                               (zhdr->start_middle + zhdr->middle_chunks);
                nfree = max(nfree_before, nfree_after);
        } else
                nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
@@ -236,9 +269,8 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
        spin_lock_init(&pool->lock);
        for_each_unbuddied_list(i, 0)
                INIT_LIST_HEAD(&pool->unbuddied[i]);
-       INIT_LIST_HEAD(&pool->buddied);
        INIT_LIST_HEAD(&pool->lru);
-       pool->pages_nr = 0;
+       atomic64_set(&pool->pages_nr, 0);
        pool->ops = ops;
        return pool;
 }
@@ -254,25 +286,58 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
        kfree(pool);
 }
 
+static inline void *mchunk_memmove(struct z3fold_header *zhdr,
+                               unsigned short dst_chunk)
+{
+       void *beg = zhdr;
+       return memmove(beg + (dst_chunk << CHUNK_SHIFT),
+                      beg + (zhdr->start_middle << CHUNK_SHIFT),
+                      zhdr->middle_chunks << CHUNK_SHIFT);
+}
+
+#define BIG_CHUNK_GAP  3
 /* Has to be called with lock held */
 static int z3fold_compact_page(struct z3fold_header *zhdr)
 {
        struct page *page = virt_to_page(zhdr);
-       void *beg = zhdr;
 
+       if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
+               return 0; /* can't move middle chunk, it's used */
+
+       if (zhdr->middle_chunks == 0)
+               return 0; /* nothing to compact */
 
-       if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
-           zhdr->middle_chunks != 0 &&
-           zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
-               memmove(beg + ZHDR_SIZE_ALIGNED,
-                       beg + (zhdr->start_middle << CHUNK_SHIFT),
-                       zhdr->middle_chunks << CHUNK_SHIFT);
+       if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+               /* move to the beginning */
+               mchunk_memmove(zhdr, ZHDR_CHUNKS);
                zhdr->first_chunks = zhdr->middle_chunks;
                zhdr->middle_chunks = 0;
                zhdr->start_middle = 0;
                zhdr->first_num++;
                return 1;
        }
+
+       /*
+        * moving data is expensive, so let's only do that if
+        * there's substantial gain (at least BIG_CHUNK_GAP chunks)
+        */
+       if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
+           zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
+                       BIG_CHUNK_GAP) {
+               mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
+               zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
+               return 1;
+       } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
+                  TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
+                                       + zhdr->middle_chunks) >=
+                       BIG_CHUNK_GAP) {
+               unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
+                       zhdr->middle_chunks;
+               mchunk_memmove(zhdr, new_start);
+               zhdr->start_middle = new_start;
+               return 1;
+       }
+
        return 0;
 }
 
@@ -313,50 +378,63 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
                bud = HEADLESS;
        else {
                chunks = size_to_chunks(size);
-               spin_lock(&pool->lock);
 
                /* First, try to find an unbuddied z3fold page. */
                zhdr = NULL;
                for_each_unbuddied_list(i, chunks) {
-                       if (!list_empty(&pool->unbuddied[i])) {
-                               zhdr = list_first_entry(&pool->unbuddied[i],
+                       spin_lock(&pool->lock);
+                       zhdr = list_first_entry_or_null(&pool->unbuddied[i],
                                                struct z3fold_header, buddy);
-                               page = virt_to_page(zhdr);
-                               if (zhdr->first_chunks == 0) {
-                                       if (zhdr->middle_chunks != 0 &&
-                                           chunks >= zhdr->start_middle)
-                                               bud = LAST;
-                                       else
-                                               bud = FIRST;
-                               } else if (zhdr->last_chunks == 0)
+                       if (!zhdr) {
+                               spin_unlock(&pool->lock);
+                               continue;
+                       }
+                       kref_get(&zhdr->refcount);
+                       list_del_init(&zhdr->buddy);
+                       spin_unlock(&pool->lock);
+
+                       page = virt_to_page(zhdr);
+                       z3fold_page_lock(zhdr);
+                       if (zhdr->first_chunks == 0) {
+                               if (zhdr->middle_chunks != 0 &&
+                                   chunks >= zhdr->start_middle)
                                        bud = LAST;
-                               else if (zhdr->middle_chunks == 0)
-                                       bud = MIDDLE;
-                               else {
-                                       pr_err("No free chunks in unbuddied\n");
-                                       WARN_ON(1);
-                                       continue;
-                               }
-                               list_del(&zhdr->buddy);
-                               goto found;
+                               else
+                                       bud = FIRST;
+                       } else if (zhdr->last_chunks == 0)
+                               bud = LAST;
+                       else if (zhdr->middle_chunks == 0)
+                               bud = MIDDLE;
+                       else {
+                               z3fold_page_unlock(zhdr);
+                               spin_lock(&pool->lock);
+                               if (kref_put(&zhdr->refcount,
+                                            release_z3fold_page))
+                                       atomic64_dec(&pool->pages_nr);
+                               spin_unlock(&pool->lock);
+                               pr_err("No free chunks in unbuddied\n");
+                               WARN_ON(1);
+                               continue;
                        }
+                       goto found;
                }
                bud = FIRST;
-               spin_unlock(&pool->lock);
        }
 
        /* Couldn't find unbuddied z3fold page, create new one */
        page = alloc_page(gfp);
        if (!page)
                return -ENOMEM;
-       spin_lock(&pool->lock);
-       pool->pages_nr++;
+
+       atomic64_inc(&pool->pages_nr);
        zhdr = init_z3fold_page(page);
 
        if (bud == HEADLESS) {
                set_bit(PAGE_HEADLESS, &page->private);
+               spin_lock(&pool->lock);
                goto headless;
        }
+       z3fold_page_lock(zhdr);
 
 found:
        if (bud == FIRST)
@@ -365,17 +443,15 @@ found:
                zhdr->last_chunks = chunks;
        else {
                zhdr->middle_chunks = chunks;
-               zhdr->start_middle = zhdr->first_chunks + 1;
+               zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
        }
 
+       spin_lock(&pool->lock);
        if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
                        zhdr->middle_chunks == 0) {
                /* Add to unbuddied list */
                freechunks = num_free_chunks(zhdr);
                list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
-       } else {
-               /* Add to buddied list */
-               list_add(&zhdr->buddy, &pool->buddied);
        }
 
 headless:
@@ -387,6 +463,8 @@ headless:
 
        *handle = encode_handle(zhdr, bud);
        spin_unlock(&pool->lock);
+       if (bud != HEADLESS)
+               z3fold_page_unlock(zhdr);
 
        return 0;
 }
@@ -408,7 +486,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
        struct page *page;
        enum buddy bud;
 
-       spin_lock(&pool->lock);
        zhdr = handle_to_z3fold_header(handle);
        page = virt_to_page(zhdr);
 
@@ -416,6 +493,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
                /* HEADLESS page stored */
                bud = HEADLESS;
        } else {
+               z3fold_page_lock(zhdr);
                bud = handle_to_buddy(handle);
 
                switch (bud) {
@@ -432,38 +510,36 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
                default:
                        pr_err("%s: unknown bud %d\n", __func__, bud);
                        WARN_ON(1);
-                       spin_unlock(&pool->lock);
+                       z3fold_page_unlock(zhdr);
                        return;
                }
        }
 
-       if (test_bit(UNDER_RECLAIM, &page->private)) {
-               /* z3fold page is under reclaim, reclaim will free */
-               spin_unlock(&pool->lock);
-               return;
-       }
-
-       if (bud != HEADLESS) {
-               /* Remove from existing buddy list */
-               list_del(&zhdr->buddy);
-       }
-
-       if (bud == HEADLESS ||
-           (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
-                       zhdr->last_chunks == 0)) {
-               /* z3fold page is empty, free */
+       if (bud == HEADLESS) {
+               spin_lock(&pool->lock);
                list_del(&page->lru);
-               clear_bit(PAGE_HEADLESS, &page->private);
-               free_z3fold_page(zhdr);
-               pool->pages_nr--;
+               spin_unlock(&pool->lock);
+               free_z3fold_page(page);
+               atomic64_dec(&pool->pages_nr);
        } else {
-               z3fold_compact_page(zhdr);
-               /* Add to the unbuddied list */
-               freechunks = num_free_chunks(zhdr);
-               list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+               if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
+                   zhdr->last_chunks != 0) {
+                       z3fold_compact_page(zhdr);
+                       /* Add to the unbuddied list */
+                       spin_lock(&pool->lock);
+                       if (!list_empty(&zhdr->buddy))
+                               list_del(&zhdr->buddy);
+                       freechunks = num_free_chunks(zhdr);
+                       list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+                       spin_unlock(&pool->lock);
+               }
+               z3fold_page_unlock(zhdr);
+               spin_lock(&pool->lock);
+               if (kref_put(&zhdr->refcount, release_z3fold_page))
+                       atomic64_dec(&pool->pages_nr);
+               spin_unlock(&pool->lock);
        }
 
-       spin_unlock(&pool->lock);
 }
 
 /**
@@ -510,20 +586,25 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
        unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
 
        spin_lock(&pool->lock);
-       if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
-                       retries == 0) {
+       if (!pool->ops || !pool->ops->evict || retries == 0) {
                spin_unlock(&pool->lock);
                return -EINVAL;
        }
        for (i = 0; i < retries; i++) {
+               if (list_empty(&pool->lru)) {
+                       spin_unlock(&pool->lock);
+                       return -EINVAL;
+               }
                page = list_last_entry(&pool->lru, struct page, lru);
-               list_del(&page->lru);
+               list_del_init(&page->lru);
 
-               /* Protect z3fold page against free */
-               set_bit(UNDER_RECLAIM, &page->private);
                zhdr = page_address(page);
                if (!test_bit(PAGE_HEADLESS, &page->private)) {
-                       list_del(&zhdr->buddy);
+                       if (!list_empty(&zhdr->buddy))
+                               list_del_init(&zhdr->buddy);
+                       kref_get(&zhdr->refcount);
+                       spin_unlock(&pool->lock);
+                       z3fold_page_lock(zhdr);
                        /*
                         * We need encode the handles before unlocking, since
                         * we can race with free that will set
@@ -538,13 +619,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
                                middle_handle = encode_handle(zhdr, MIDDLE);
                        if (zhdr->last_chunks)
                                last_handle = encode_handle(zhdr, LAST);
+                       z3fold_page_unlock(zhdr);
                } else {
                        first_handle = encode_handle(zhdr, HEADLESS);
                        last_handle = middle_handle = 0;
+                       spin_unlock(&pool->lock);
                }
 
-               spin_unlock(&pool->lock);
-
                /* Issue the eviction callback(s) */
                if (middle_handle) {
                        ret = pool->ops->evict(pool, middle_handle);
@@ -562,36 +643,40 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
                                goto next;
                }
 next:
-               spin_lock(&pool->lock);
-               clear_bit(UNDER_RECLAIM, &page->private);
-               if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
-                   (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
-                    zhdr->middle_chunks == 0)) {
-                       /*
-                        * All buddies are now free, free the z3fold page and
-                        * return success.
-                        */
-                       clear_bit(PAGE_HEADLESS, &page->private);
-                       free_z3fold_page(zhdr);
-                       pool->pages_nr--;
-                       spin_unlock(&pool->lock);
-                       return 0;
-               }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
-                       if (zhdr->first_chunks != 0 &&
-                           zhdr->last_chunks != 0 &&
-                           zhdr->middle_chunks != 0) {
-                               /* Full, add to buddied list */
-                               list_add(&zhdr->buddy, &pool->buddied);
+               if (test_bit(PAGE_HEADLESS, &page->private)) {
+                       if (ret == 0) {
+                               free_z3fold_page(page);
+                               return 0;
                        } else {
+                               spin_lock(&pool->lock);
+                       }
+               } else {
+                       z3fold_page_lock(zhdr);
+                       if ((zhdr->first_chunks || zhdr->last_chunks ||
+                            zhdr->middle_chunks) &&
+                           !(zhdr->first_chunks && zhdr->last_chunks &&
+                             zhdr->middle_chunks)) {
                                z3fold_compact_page(zhdr);
                                /* add to unbuddied list */
+                               spin_lock(&pool->lock);
                                freechunks = num_free_chunks(zhdr);
                                list_add(&zhdr->buddy,
                                         &pool->unbuddied[freechunks]);
+                               spin_unlock(&pool->lock);
+                       }
+                       z3fold_page_unlock(zhdr);
+                       spin_lock(&pool->lock);
+                       if (kref_put(&zhdr->refcount, release_z3fold_page)) {
+                               atomic64_dec(&pool->pages_nr);
+                               return 0;
                        }
                }
 
-               /* add to beginning of LRU */
+               /*
+                * Add to the beginning of LRU.
+                * Pool lock has to be kept here to ensure the page has
+                * not already been released
+                */
                list_add(&page->lru, &pool->lru);
        }
        spin_unlock(&pool->lock);
@@ -615,7 +700,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
        void *addr;
        enum buddy buddy;
 
-       spin_lock(&pool->lock);
        zhdr = handle_to_z3fold_header(handle);
        addr = zhdr;
        page = virt_to_page(zhdr);
@@ -623,6 +707,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
        if (test_bit(PAGE_HEADLESS, &page->private))
                goto out;
 
+       z3fold_page_lock(zhdr);
        buddy = handle_to_buddy(handle);
        switch (buddy) {
        case FIRST:
@@ -641,8 +726,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
                addr = NULL;
                break;
        }
+
+       z3fold_page_unlock(zhdr);
 out:
-       spin_unlock(&pool->lock);
        return addr;
 }
 
@@ -657,31 +743,28 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
        struct page *page;
        enum buddy buddy;
 
-       spin_lock(&pool->lock);
        zhdr = handle_to_z3fold_header(handle);
        page = virt_to_page(zhdr);
 
-       if (test_bit(PAGE_HEADLESS, &page->private)) {
-               spin_unlock(&pool->lock);
+       if (test_bit(PAGE_HEADLESS, &page->private))
                return;
-       }
 
+       z3fold_page_lock(zhdr);
        buddy = handle_to_buddy(handle);
        if (buddy == MIDDLE)
                clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
-       spin_unlock(&pool->lock);
+       z3fold_page_unlock(zhdr);
 }
 
 /**
  * z3fold_get_pool_size() - gets the z3fold pool size in pages
  * @pool:      pool whose size is being queried
  *
- * Returns: size in pages of the given pool.  The pool lock need not be
- * taken to access pages_nr.
+ * Returns: size in pages of the given pool.
  */
 static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
 {
-       return pool->pages_nr;
+       return atomic64_read(&pool->pages_nr);
 }
 
 /*****************
@@ -780,8 +863,8 @@ MODULE_ALIAS("zpool-z3fold");
 
 static int __init init_z3fold(void)
 {
-       /* Make sure the z3fold header will fit in one chunk */
-       BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
+       /* Make sure the z3fold header is not larger than the page size */
+       BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
        zpool_register_driver(&z3fold_zpool_driver);
 
        return 0;
index a1f24989ac23a71aab531f57c5f80140d2967860..b7b1fb6c8c21d4d4d1c6b7cc17f179e062dce94b 100644 (file)
@@ -24,7 +24,6 @@
  *
  * Usage of struct page flags:
  *     PG_private: identifies the first component page
- *     PG_private2: identifies the last component page
  *     PG_owner_priv_1: identifies the huge component page
  *
  */
@@ -268,10 +267,6 @@ struct zs_pool {
 #endif
 };
 
-/*
- * A zspage's class index and fullness group
- * are encoded in its (first)page->mapping
- */
 #define FULLNESS_BITS  2
 #define CLASS_BITS     8
 #define ISOLATED_BITS  3
@@ -938,7 +933,6 @@ static void reset_page(struct page *page)
 {
        __ClearPageMovable(page);
        ClearPagePrivate(page);
-       ClearPagePrivate2(page);
        set_page_private(page, 0);
        page_mapcount_reset(page);
        ClearPageHugeObject(page);
@@ -1085,7 +1079,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
         * 2. each sub-page point to zspage using page->private
         *
         * we set PG_private to identify the first page (i.e. no other sub-page
-        * has this flag set) and PG_private_2 to identify the last page.
+        * has this flag set).
         */
        for (i = 0; i < nr_pages; i++) {
                page = pages[i];
@@ -1100,8 +1094,6 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
                } else {
                        prev_page->freelist = page;
                }
-               if (i == nr_pages - 1)
-                       SetPagePrivate2(page);
                prev_page = page;
        }
 }
index cabf09e0128beebdee2b8a959361fe6464fb3469..eedc27894b1013187cc81831e956e4c8344b5176 100644 (file)
@@ -76,6 +76,8 @@ static u64 zswap_duplicate_entry;
 * tunables
 **********************************/
 
+#define ZSWAP_PARAM_UNSET ""
+
 /* Enable/disable zswap (disabled by default) */
 static bool zswap_enabled;
 static int zswap_enabled_param_set(const char *,
@@ -185,6 +187,9 @@ static bool zswap_init_started;
 /* fatal error during init */
 static bool zswap_init_failed;
 
+/* init completed, but couldn't create the initial pool */
+static bool zswap_has_pool;
+
 /*********************************
 * helpers and fwd declarations
 **********************************/
@@ -424,7 +429,8 @@ static struct zswap_pool *__zswap_pool_current(void)
        struct zswap_pool *pool;
 
        pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
-       WARN_ON(!pool);
+       WARN_ONCE(!pool && zswap_has_pool,
+                 "%s: no page storage pool!\n", __func__);
 
        return pool;
 }
@@ -443,7 +449,7 @@ static struct zswap_pool *zswap_pool_current_get(void)
        rcu_read_lock();
 
        pool = __zswap_pool_current();
-       if (!pool || !zswap_pool_get(pool))
+       if (!zswap_pool_get(pool))
                pool = NULL;
 
        rcu_read_unlock();
@@ -459,7 +465,9 @@ static struct zswap_pool *zswap_pool_last_get(void)
 
        list_for_each_entry_rcu(pool, &zswap_pools, list)
                last = pool;
-       if (!WARN_ON(!last) && !zswap_pool_get(last))
+       WARN_ONCE(!last && zswap_has_pool,
+                 "%s: no page storage pool!\n", __func__);
+       if (!zswap_pool_get(last))
                last = NULL;
 
        rcu_read_unlock();
@@ -495,6 +503,17 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
        gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
        int ret;
 
+       if (!zswap_has_pool) {
+               /* if either are unset, pool initialization failed, and we
+                * need both params to be set correctly before trying to
+                * create a pool.
+                */
+               if (!strcmp(type, ZSWAP_PARAM_UNSET))
+                       return NULL;
+               if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
+                       return NULL;
+       }
+
        pool = kzalloc(sizeof(*pool), GFP_KERNEL);
        if (!pool) {
                pr_err("pool alloc failed\n");
@@ -544,29 +563,41 @@ error:
 
 static __init struct zswap_pool *__zswap_pool_create_fallback(void)
 {
-       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
-               if (!strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
-                       pr_err("default compressor %s not available\n",
-                              zswap_compressor);
-                       return NULL;
-               }
+       bool has_comp, has_zpool;
+
+       has_comp = crypto_has_comp(zswap_compressor, 0, 0);
+       if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
                pr_err("compressor %s not available, using default %s\n",
                       zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
                param_free_charp(&zswap_compressor);
                zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
+               has_comp = crypto_has_comp(zswap_compressor, 0, 0);
        }
-       if (!zpool_has_pool(zswap_zpool_type)) {
-               if (!strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
-                       pr_err("default zpool %s not available\n",
-                              zswap_zpool_type);
-                       return NULL;
-               }
+       if (!has_comp) {
+               pr_err("default compressor %s not available\n",
+                      zswap_compressor);
+               param_free_charp(&zswap_compressor);
+               zswap_compressor = ZSWAP_PARAM_UNSET;
+       }
+
+       has_zpool = zpool_has_pool(zswap_zpool_type);
+       if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
                pr_err("zpool %s not available, using default %s\n",
                       zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
                param_free_charp(&zswap_zpool_type);
                zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+               has_zpool = zpool_has_pool(zswap_zpool_type);
+       }
+       if (!has_zpool) {
+               pr_err("default zpool %s not available\n",
+                      zswap_zpool_type);
+               param_free_charp(&zswap_zpool_type);
+               zswap_zpool_type = ZSWAP_PARAM_UNSET;
        }
 
+       if (!has_comp || !has_zpool)
+               return NULL;
+
        return zswap_pool_create(zswap_zpool_type, zswap_compressor);
 }
 
@@ -582,6 +613,9 @@ static void zswap_pool_destroy(struct zswap_pool *pool)
 
 static int __must_check zswap_pool_get(struct zswap_pool *pool)
 {
+       if (!pool)
+               return 0;
+
        return kref_get_unless_zero(&pool->kref);
 }
 
@@ -639,7 +673,7 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
        }
 
        /* no change required */
-       if (!strcmp(s, *(char **)kp->arg))
+       if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
                return 0;
 
        /* if this is load-time (pre-init) param setting,
@@ -670,21 +704,26 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
        pool = zswap_pool_find_get(type, compressor);
        if (pool) {
                zswap_pool_debug("using existing", pool);
+               WARN_ON(pool == zswap_pool_current());
                list_del_rcu(&pool->list);
-       } else {
-               spin_unlock(&zswap_pools_lock);
-               pool = zswap_pool_create(type, compressor);
-               spin_lock(&zswap_pools_lock);
        }
 
+       spin_unlock(&zswap_pools_lock);
+
+       if (!pool)
+               pool = zswap_pool_create(type, compressor);
+
        if (pool)
                ret = param_set_charp(s, kp);
        else
                ret = -EINVAL;
 
+       spin_lock(&zswap_pools_lock);
+
        if (!ret) {
                put_pool = zswap_pool_current();
                list_add_rcu(&pool->list, &zswap_pools);
+               zswap_has_pool = true;
        } else if (pool) {
                /* add the possibly pre-existing pool to the end of the pools
                 * list; if it's new (and empty) then it'll be removed and
@@ -696,6 +735,17 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
 
        spin_unlock(&zswap_pools_lock);
 
+       if (!zswap_has_pool && !pool) {
+               /* if initial pool creation failed, and this pool creation also
+                * failed, maybe both compressor and zpool params were bad.
+                * Allow changing this param, so pool creation will succeed
+                * when the other param is changed. We already verified this
+                * param is ok in the zpool_has_pool() or crypto_has_comp()
+                * checks above.
+                */
+               ret = param_set_charp(s, kp);
+       }
+
        /* drop the ref from either the old current pool,
         * or the new pool we failed to add
         */
@@ -724,6 +774,10 @@ static int zswap_enabled_param_set(const char *val,
                pr_err("can't enable, initialization failed\n");
                return -ENODEV;
        }
+       if (!zswap_has_pool && zswap_init_started) {
+               pr_err("can't enable, no pool configured\n");
+               return -ENODEV;
+       }
 
        return param_set_bool(val, kp);
 }
@@ -1205,22 +1259,21 @@ static int __init init_zswap(void)
                goto hp_fail;
 
        pool = __zswap_pool_create_fallback();
-       if (!pool) {
+       if (pool) {
+               pr_info("loaded using pool %s/%s\n", pool->tfm_name,
+                       zpool_get_type(pool->zpool));
+               list_add(&pool->list, &zswap_pools);
+               zswap_has_pool = true;
+       } else {
                pr_err("pool creation failed\n");
-               goto pool_fail;
+               zswap_enabled = false;
        }
-       pr_info("loaded using pool %s/%s\n", pool->tfm_name,
-               zpool_get_type(pool->zpool));
-
-       list_add(&pool->list, &zswap_pools);
 
        frontswap_register_ops(&zswap_frontswap_ops);
        if (zswap_debugfs_init())
                pr_warn("debugfs initialization failed\n");
        return 0;
 
-pool_fail:
-       cpuhp_remove_state_nocalls(CPUHP_MM_ZSWP_POOL_PREPARE);
 hp_fail:
        cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
 dstmem_fail:
index 10d2bdce686e68e08d2624592c210fb0302096d0..465cc24b41e5b46e31421d350a893eea22f02169 100644 (file)
@@ -1656,7 +1656,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        ddp->deh_dport = usat->sat_port;
        ddp->deh_sport = at->src_port;
 
-       SOCK_DEBUG(sk, "SK %p: Copy user data (%Zd bytes).\n", sk, len);
+       SOCK_DEBUG(sk, "SK %p: Copy user data (%zd bytes).\n", sk, len);
 
        err = memcpy_from_msg(skb_put(skb, len), msg, len);
        if (err) {
@@ -1720,7 +1720,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
                 */
                aarp_send_ddp(dev, skb, &usat->sat_addr, NULL);
        }
-       SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len);
+       SOCK_DEBUG(sk, "SK %p: Done write (%zd).\n", sk, len);
 
 out:
        release_sock(sk);
index 3b3b1a292ec8e2e862d7fde7b5b258a2f72c5a6a..a190800572bdfe3473d9b49f19ac20354f6f3539 100644 (file)
@@ -451,7 +451,7 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
                        return;
        }
        if (end_of_tlvs - tlvs != 0)
-               pr_info("(%s) ignoring %Zd bytes of trailing TLV garbage\n",
+               pr_info("(%s) ignoring %zd bytes of trailing TLV garbage\n",
                        dev->name, end_of_tlvs - tlvs);
 }
 
index 48f9471e7c85f810494e9964ede5a84bb0227cfc..f64d6566021fccab9208518a1c53934b2da7f708 100644 (file)
@@ -851,7 +851,7 @@ static int hci_sock_release(struct socket *sock)
 
        if (hdev) {
                if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
-                       /* When releasing an user channel exclusive access,
+                       /* When releasing a user channel exclusive access,
                         * call hci_dev_do_close directly instead of calling
                         * hci_dev_close to ensure the exclusive access will
                         * be released and the controller brought back down.
@@ -1172,7 +1172,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
                                /* In case the transport is already up and
                                 * running, clear the error here.
                                 *
-                                * This can happen when opening an user
+                                * This can happen when opening a user
                                 * channel and HCI_AUTO_OFF grace period
                                 * is still active.
                                 */
@@ -1190,7 +1190,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
                if (!hci_sock_gen_cookie(sk)) {
                        /* In the case when a cookie has already been assigned,
                         * this socket will transition from a raw socket into
-                        * an user channel socket. For a clean transition, send
+                        * a user channel socket. For a clean transition, send
                         * the close notification first.
                         */
                        skb = create_monitor_ctrl_close(sk);
index 62e68c0dc68740bc1364204902ea7e97e44a7e92..b838213c408e24eab36adea29819d98d145dff15 100644 (file)
@@ -997,10 +997,10 @@ err_vlan_add:
        RCU_INIT_POINTER(p->vlgrp, NULL);
        synchronize_rcu();
        vlan_tunnel_deinit(vg);
-err_vlan_enabled:
 err_tunnel_init:
        rhashtable_destroy(&vg->vlan_hash);
 err_rhtbl:
+err_vlan_enabled:
        kfree(vg);
 
        goto out;
index 9024283d2bca8206d6976bb7bb72523b514bd68d..279527f8b1fe74f30d75da640da8ef23f789ba3c 100644 (file)
@@ -187,7 +187,7 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par)
        expected_length += ebt_mac_wormhash_size(wh_src);
 
        if (em->match_size != EBT_ALIGN(expected_length)) {
-               pr_info("wrong size: %d against expected %d, rounded to %Zd\n",
+               pr_info("wrong size: %d against expected %d, rounded to %zd\n",
                        em->match_size, expected_length,
                        EBT_ALIGN(expected_length));
                return -EINVAL;
index 50f040fdb2a97f4278fce4130a0cf4bbe3c87052..b9233b9903990bd38721213ce287a8045debd8c7 100644 (file)
@@ -69,8 +69,8 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
        dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n",
             __func__, lock_name, type, cookie, tag, desc, flags);
        ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock",
-                            CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-                            lock_op_page, lock_op_buf_size, NULL, NULL);
+                            CEPH_OSD_FLAG_WRITE, lock_op_page,
+                            lock_op_buf_size, NULL, NULL);
 
        dout("%s: status %d\n", __func__, ret);
        __free_page(lock_op_page);
@@ -117,8 +117,8 @@ int ceph_cls_unlock(struct ceph_osd_client *osdc,
 
        dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie);
        ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock",
-                            CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-                            unlock_op_page, unlock_op_buf_size, NULL, NULL);
+                            CEPH_OSD_FLAG_WRITE, unlock_op_page,
+                            unlock_op_buf_size, NULL, NULL);
 
        dout("%s: status %d\n", __func__, ret);
        __free_page(unlock_op_page);
@@ -170,8 +170,8 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
        dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name,
             cookie, ENTITY_NAME(*locker));
        ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock",
-                            CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-                            break_op_page, break_op_buf_size, NULL, NULL);
+                            CEPH_OSD_FLAG_WRITE, break_op_page,
+                            break_op_buf_size, NULL, NULL);
 
        dout("%s: status %d\n", __func__, ret);
        __free_page(break_op_page);
@@ -278,7 +278,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
        int get_info_op_buf_size;
        int name_len = strlen(lock_name);
        struct page *get_info_op_page, *reply_page;
-       size_t reply_len;
+       size_t reply_len = PAGE_SIZE;
        void *p, *end;
        int ret;
 
index 80d7c3a97cb84355e82e9d8f4c83fbf5b0d82893..5bf94c04f64547e2cfff79c0655bcc68944f4e12 100644 (file)
@@ -45,7 +45,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
 
 void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
 {
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b);
 }
@@ -54,14 +53,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
 {
        kfree(b->item_weights);
        kfree(b->sum_weights);
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b);
 }
 
 void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
 {
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b->node_weights);
        kfree(b);
@@ -71,7 +68,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
 {
        kfree(b->straws);
        kfree(b->item_weights);
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b);
 }
@@ -79,7 +75,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
 void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
 {
        kfree(b->item_weights);
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b);
 }
index 130ab407c5ecf8ca5c0943759efff91c7bf258e8..b5cd8c21bfdfbf4d85bd93993f1f02807c5200be 100644 (file)
@@ -54,7 +54,6 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
        return -1;
 }
 
-
 /*
  * bucket choose methods
  *
@@ -72,59 +71,60 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
  * Since this is expensive, we optimize for the r=0 case, which
  * captures the vast majority of calls.
  */
-static int bucket_perm_choose(struct crush_bucket *bucket,
+static int bucket_perm_choose(const struct crush_bucket *bucket,
+                             struct crush_work_bucket *work,
                              int x, int r)
 {
        unsigned int pr = r % bucket->size;
        unsigned int i, s;
 
        /* start a new permutation if @x has changed */
-       if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
+       if (work->perm_x != (__u32)x || work->perm_n == 0) {
                dprintk("bucket %d new x=%d\n", bucket->id, x);
-               bucket->perm_x = x;
+               work->perm_x = x;
 
                /* optimize common r=0 case */
                if (pr == 0) {
                        s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
                                bucket->size;
-                       bucket->perm[0] = s;
-                       bucket->perm_n = 0xffff;   /* magic value, see below */
+                       work->perm[0] = s;
+                       work->perm_n = 0xffff;   /* magic value, see below */
                        goto out;
                }
 
                for (i = 0; i < bucket->size; i++)
-                       bucket->perm[i] = i;
-               bucket->perm_n = 0;
-       } else if (bucket->perm_n == 0xffff) {
+                       work->perm[i] = i;
+               work->perm_n = 0;
+       } else if (work->perm_n == 0xffff) {
                /* clean up after the r=0 case above */
                for (i = 1; i < bucket->size; i++)
-                       bucket->perm[i] = i;
-               bucket->perm[bucket->perm[0]] = 0;
-               bucket->perm_n = 1;
+                       work->perm[i] = i;
+               work->perm[work->perm[0]] = 0;
+               work->perm_n = 1;
        }
 
        /* calculate permutation up to pr */
-       for (i = 0; i < bucket->perm_n; i++)
-               dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
-       while (bucket->perm_n <= pr) {
-               unsigned int p = bucket->perm_n;
+       for (i = 0; i < work->perm_n; i++)
+               dprintk(" perm_choose have %d: %d\n", i, work->perm[i]);
+       while (work->perm_n <= pr) {
+               unsigned int p = work->perm_n;
                /* no point in swapping the final entry */
                if (p < bucket->size - 1) {
                        i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
                                (bucket->size - p);
                        if (i) {
-                               unsigned int t = bucket->perm[p + i];
-                               bucket->perm[p + i] = bucket->perm[p];
-                               bucket->perm[p] = t;
+                               unsigned int t = work->perm[p + i];
+                               work->perm[p + i] = work->perm[p];
+                               work->perm[p] = t;
                        }
                        dprintk(" perm_choose swap %d with %d\n", p, p+i);
                }
-               bucket->perm_n++;
+               work->perm_n++;
        }
        for (i = 0; i < bucket->size; i++)
-               dprintk(" perm_choose  %d: %d\n", i, bucket->perm[i]);
+               dprintk(" perm_choose  %d: %d\n", i, work->perm[i]);
 
-       s = bucket->perm[pr];
+       s = work->perm[pr];
 out:
        dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
                bucket->size, x, r, pr, s);
@@ -132,14 +132,14 @@ out:
 }
 
 /* uniform */
-static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
-                                int x, int r)
+static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket,
+                                struct crush_work_bucket *work, int x, int r)
 {
-       return bucket_perm_choose(&bucket->h, x, r);
+       return bucket_perm_choose(&bucket->h, work, x, r);
 }
 
 /* list */
-static int bucket_list_choose(struct crush_bucket_list *bucket,
+static int bucket_list_choose(const struct crush_bucket_list *bucket,
                              int x, int r)
 {
        int i;
@@ -155,8 +155,9 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
                w *= bucket->sum_weights[i];
                w = w >> 16;
                /*dprintk(" scaled %llx\n", w);*/
-               if (w < bucket->item_weights[i])
+               if (w < bucket->item_weights[i]) {
                        return bucket->h.items[i];
+               }
        }
 
        dprintk("bad list sums for bucket %d\n", bucket->h.id);
@@ -192,7 +193,7 @@ static int terminal(int x)
        return x & 1;
 }
 
-static int bucket_tree_choose(struct crush_bucket_tree *bucket,
+static int bucket_tree_choose(const struct crush_bucket_tree *bucket,
                              int x, int r)
 {
        int n;
@@ -224,7 +225,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
 
 /* straw */
 
-static int bucket_straw_choose(struct crush_bucket_straw *bucket,
+static int bucket_straw_choose(const struct crush_bucket_straw *bucket,
                               int x, int r)
 {
        __u32 i;
@@ -301,7 +302,7 @@ static __u64 crush_ln(unsigned int xin)
  *
  */
 
-static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
                                int x, int r)
 {
        unsigned int i, high = 0;
@@ -344,37 +345,42 @@ static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
                        high_draw = draw;
                }
        }
+
        return bucket->h.items[high];
 }
 
 
-static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
+static int crush_bucket_choose(const struct crush_bucket *in,
+                              struct crush_work_bucket *work,
+                              int x, int r)
 {
        dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
        BUG_ON(in->size == 0);
        switch (in->alg) {
        case CRUSH_BUCKET_UNIFORM:
-               return bucket_uniform_choose((struct crush_bucket_uniform *)in,
-                                         x, r);
+               return bucket_uniform_choose(
+                       (const struct crush_bucket_uniform *)in,
+                       work, x, r);
        case CRUSH_BUCKET_LIST:
-               return bucket_list_choose((struct crush_bucket_list *)in,
+               return bucket_list_choose((const struct crush_bucket_list *)in,
                                          x, r);
        case CRUSH_BUCKET_TREE:
-               return bucket_tree_choose((struct crush_bucket_tree *)in,
+               return bucket_tree_choose((const struct crush_bucket_tree *)in,
                                          x, r);
        case CRUSH_BUCKET_STRAW:
-               return bucket_straw_choose((struct crush_bucket_straw *)in,
-                                          x, r);
+               return bucket_straw_choose(
+                       (const struct crush_bucket_straw *)in,
+                       x, r);
        case CRUSH_BUCKET_STRAW2:
-               return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
-                                           x, r);
+               return bucket_straw2_choose(
+                       (const struct crush_bucket_straw2 *)in,
+                       x, r);
        default:
                dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
                return in->items[0];
        }
 }
 
-
 /*
  * true if device is marked "out" (failed, fully offloaded)
  * of the cluster
@@ -416,7 +422,8 @@ static int is_out(const struct crush_map *map,
  * @parent_r: r value passed from the parent
  */
 static int crush_choose_firstn(const struct crush_map *map,
-                              struct crush_bucket *bucket,
+                              struct crush_work *work,
+                              const struct crush_bucket *bucket,
                               const __u32 *weight, int weight_max,
                               int x, int numrep, int type,
                               int *out, int outpos,
@@ -434,7 +441,7 @@ static int crush_choose_firstn(const struct crush_map *map,
        int rep;
        unsigned int ftotal, flocal;
        int retry_descent, retry_bucket, skip_rep;
-       struct crush_bucket *in = bucket;
+       const struct crush_bucket *in = bucket;
        int r;
        int i;
        int item = 0;
@@ -473,9 +480,13 @@ static int crush_choose_firstn(const struct crush_map *map,
                                if (local_fallback_retries > 0 &&
                                    flocal >= (in->size>>1) &&
                                    flocal > local_fallback_retries)
-                                       item = bucket_perm_choose(in, x, r);
+                                       item = bucket_perm_choose(
+                                               in, work->work[-1-in->id],
+                                               x, r);
                                else
-                                       item = crush_bucket_choose(in, x, r);
+                                       item = crush_bucket_choose(
+                                               in, work->work[-1-in->id],
+                                               x, r);
                                if (item >= map->max_devices) {
                                        dprintk("   bad item %d\n", item);
                                        skip_rep = 1;
@@ -518,19 +529,21 @@ static int crush_choose_firstn(const struct crush_map *map,
                                                        sub_r = r >> (vary_r-1);
                                                else
                                                        sub_r = 0;
-                                               if (crush_choose_firstn(map,
-                                                        map->buckets[-1-item],
-                                                        weight, weight_max,
-                                                        x, stable ? 1 : outpos+1, 0,
-                                                        out2, outpos, count,
-                                                        recurse_tries, 0,
-                                                        local_retries,
-                                                        local_fallback_retries,
-                                                        0,
-                                                        vary_r,
-                                                        stable,
-                                                        NULL,
-                                                        sub_r) <= outpos)
+                                               if (crush_choose_firstn(
+                                                           map,
+                                                           work,
+                                                           map->buckets[-1-item],
+                                                           weight, weight_max,
+                                                           x, stable ? 1 : outpos+1, 0,
+                                                           out2, outpos, count,
+                                                           recurse_tries, 0,
+                                                           local_retries,
+                                                           local_fallback_retries,
+                                                           0,
+                                                           vary_r,
+                                                           stable,
+                                                           NULL,
+                                                           sub_r) <= outpos)
                                                        /* didn't get leaf */
                                                        reject = 1;
                                        } else {
@@ -539,14 +552,12 @@ static int crush_choose_firstn(const struct crush_map *map,
                                        }
                                }
 
-                               if (!reject) {
+                               if (!reject && !collide) {
                                        /* out? */
                                        if (itemtype == 0)
                                                reject = is_out(map, weight,
                                                                weight_max,
                                                                item, x);
-                                       else
-                                               reject = 0;
                                }
 
 reject:
@@ -600,7 +611,8 @@ reject:
  *
  */
 static void crush_choose_indep(const struct crush_map *map,
-                              struct crush_bucket *bucket,
+                              struct crush_work *work,
+                              const struct crush_bucket *bucket,
                               const __u32 *weight, int weight_max,
                               int x, int left, int numrep, int type,
                               int *out, int outpos,
@@ -610,7 +622,7 @@ static void crush_choose_indep(const struct crush_map *map,
                               int *out2,
                               int parent_r)
 {
-       struct crush_bucket *in = bucket;
+       const struct crush_bucket *in = bucket;
        int endpos = outpos + left;
        int rep;
        unsigned int ftotal;
@@ -678,7 +690,9 @@ static void crush_choose_indep(const struct crush_map *map,
                                        break;
                                }
 
-                               item = crush_bucket_choose(in, x, r);
+                               item = crush_bucket_choose(
+                                       in, work->work[-1-in->id],
+                                       x, r);
                                if (item >= map->max_devices) {
                                        dprintk("   bad item %d\n", item);
                                        out[rep] = CRUSH_ITEM_NONE;
@@ -724,13 +738,15 @@ static void crush_choose_indep(const struct crush_map *map,
 
                                if (recurse_to_leaf) {
                                        if (item < 0) {
-                                               crush_choose_indep(map,
-                                                  map->buckets[-1-item],
-                                                  weight, weight_max,
-                                                  x, 1, numrep, 0,
-                                                  out2, rep,
-                                                  recurse_tries, 0,
-                                                  0, NULL, r);
+                                               crush_choose_indep(
+                                                       map,
+                                                       work,
+                                                       map->buckets[-1-item],
+                                                       weight, weight_max,
+                                                       x, 1, numrep, 0,
+                                                       out2, rep,
+                                                       recurse_tries, 0,
+                                                       0, NULL, r);
                                                if (out2[rep] == CRUSH_ITEM_NONE) {
                                                        /* placed nothing; no leaf */
                                                        break;
@@ -781,6 +797,53 @@ static void crush_choose_indep(const struct crush_map *map,
 #endif
 }
 
+
+/*
+ * This takes a chunk of memory and sets it up to be a shiny new
+ * working area for a CRUSH placement computation. It must be called
+ * on any newly allocated memory before passing it in to
+ * crush_do_rule. It may be used repeatedly after that, so long as the
+ * map has not changed. If the map /has/ changed, you must make sure
+ * the working size is no smaller than what was allocated and re-run
+ * crush_init_workspace.
+ *
+ * If you do retain the working space between calls to crush, make it
+ * thread-local.
+ */
+void crush_init_workspace(const struct crush_map *map, void *v)
+{
+       struct crush_work *w = v;
+       __s32 b;
+
+       /*
+        * We work by moving through the available space and setting
+        * values and pointers as we go.
+        *
+        * It's a bit like Forth's use of the 'allot' word since we
+        * set the pointer first and then reserve the space for it to
+        * point to by incrementing the point.
+        */
+       v += sizeof(struct crush_work *);
+       w->work = v;
+       v += map->max_buckets * sizeof(struct crush_work_bucket *);
+       for (b = 0; b < map->max_buckets; ++b) {
+               if (!map->buckets[b])
+                       continue;
+
+               w->work[b] = v;
+               switch (map->buckets[b]->alg) {
+               default:
+                       v += sizeof(struct crush_work_bucket);
+                       break;
+               }
+               w->work[b]->perm_x = 0;
+               w->work[b]->perm_n = 0;
+               w->work[b]->perm = v;
+               v += map->buckets[b]->size * sizeof(__u32);
+       }
+       BUG_ON(v - (void *)w != map->working_size);
+}
+
 /**
  * crush_do_rule - calculate a mapping with the given input and rule
  * @map: the crush_map
@@ -790,24 +853,25 @@ static void crush_choose_indep(const struct crush_map *map,
  * @result_max: maximum result size
  * @weight: weight vector (for map leaves)
  * @weight_max: size of weight vector
- * @scratch: scratch vector for private use; must be >= 3 * result_max
+ * @cwin: pointer to at least crush_work_size() bytes of memory
  */
 int crush_do_rule(const struct crush_map *map,
                  int ruleno, int x, int *result, int result_max,
                  const __u32 *weight, int weight_max,
-                 int *scratch)
+                 void *cwin)
 {
        int result_len;
-       int *a = scratch;
-       int *b = scratch + result_max;
-       int *c = scratch + result_max*2;
+       struct crush_work *cw = cwin;
+       int *a = cwin + map->working_size;
+       int *b = a + result_max;
+       int *c = b + result_max;
+       int *w = a;
+       int *o = b;
        int recurse_to_leaf;
-       int *w;
        int wsize = 0;
-       int *o;
        int osize;
        int *tmp;
-       struct crush_rule *rule;
+       const struct crush_rule *rule;
        __u32 step;
        int i, j;
        int numrep;
@@ -835,12 +899,10 @@ int crush_do_rule(const struct crush_map *map,
 
        rule = map->rules[ruleno];
        result_len = 0;
-       w = a;
-       o = b;
 
        for (step = 0; step < rule->len; step++) {
                int firstn = 0;
-               struct crush_rule_step *curstep = &rule->steps[step];
+               const struct crush_rule_step *curstep = &rule->steps[step];
 
                switch (curstep->op) {
                case CRUSH_RULE_TAKE:
@@ -936,6 +998,7 @@ int crush_do_rule(const struct crush_map *map,
                                                recurse_tries = choose_tries;
                                        osize += crush_choose_firstn(
                                                map,
+                                               cw,
                                                map->buckets[bno],
                                                weight, weight_max,
                                                x, numrep,
@@ -956,6 +1019,7 @@ int crush_do_rule(const struct crush_map *map,
                                                    numrep : (result_max-osize));
                                        crush_choose_indep(
                                                map,
+                                               cw,
                                                map->buckets[bno],
                                                weight, weight_max,
                                                x, out_size, numrep,
@@ -997,5 +1061,6 @@ int crush_do_rule(const struct crush_map *map,
                        break;
                }
        }
+
        return result_len;
 }
index 292e33bd916e650c0317ab630a0c60a400d21c7d..85747b7f91a91894d4902636d5145dc957184df3 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <linux/err.h>
 #include <linux/scatterlist.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <crypto/aes.h>
 #include <crypto/skcipher.h>
index f3378ba1a82893024b9012c5421099bce87f1824..b65bbf9f45ebb22c8ac51af34c6b1c29ef7ed17c 100644 (file)
@@ -460,7 +460,6 @@ static void request_init(struct ceph_osd_request *req)
 
        kref_init(&req->r_kref);
        init_completion(&req->r_completion);
-       init_completion(&req->r_done_completion);
        RB_CLEAR_NODE(&req->r_node);
        RB_CLEAR_NODE(&req->r_mc_node);
        INIT_LIST_HEAD(&req->r_unsafe_item);
@@ -672,7 +671,8 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
        BUG_ON(length > previous);
 
        op->extent.length = length;
-       op->indata_len -= previous - length;
+       if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
+               op->indata_len -= previous - length;
 }
 EXPORT_SYMBOL(osd_req_op_extent_update);
 
@@ -1636,7 +1636,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
        bool need_send = false;
        bool promoted = false;
 
-       WARN_ON(req->r_tid || req->r_got_reply);
+       WARN_ON(req->r_tid);
        dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
@@ -1704,17 +1704,10 @@ promote:
 
 static void account_request(struct ceph_osd_request *req)
 {
-       unsigned int mask = CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK;
+       WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK));
+       WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
 
-       if (req->r_flags & CEPH_OSD_FLAG_READ) {
-               WARN_ON(req->r_flags & mask);
-               req->r_flags |= CEPH_OSD_FLAG_ACK;
-       } else if (req->r_flags & CEPH_OSD_FLAG_WRITE)
-               WARN_ON(!(req->r_flags & mask));
-       else
-               WARN_ON(1);
-
-       WARN_ON(req->r_unsafe_callback && (req->r_flags & mask) != mask);
+       req->r_flags |= CEPH_OSD_FLAG_ONDISK;
        atomic_inc(&req->r_osdc->num_requests);
 }
 
@@ -1749,15 +1742,15 @@ static void finish_request(struct ceph_osd_request *req)
 
 static void __complete_request(struct ceph_osd_request *req)
 {
-       if (req->r_callback)
+       if (req->r_callback) {
+               dout("%s req %p tid %llu cb %pf result %d\n", __func__, req,
+                    req->r_tid, req->r_callback, req->r_result);
                req->r_callback(req);
-       else
-               complete_all(&req->r_completion);
+       }
 }
 
 /*
- * Note that this is open-coded in handle_reply(), which has to deal
- * with ack vs commit, dup acks, etc.
+ * This is open-coded in handle_reply().
  */
 static void complete_request(struct ceph_osd_request *req, int err)
 {
@@ -1766,7 +1759,7 @@ static void complete_request(struct ceph_osd_request *req, int err)
        req->r_result = err;
        finish_request(req);
        __complete_request(req);
-       complete_all(&req->r_done_completion);
+       complete_all(&req->r_completion);
        ceph_osdc_put_request(req);
 }
 
@@ -1792,7 +1785,7 @@ static void cancel_request(struct ceph_osd_request *req)
 
        cancel_map_check(req);
        finish_request(req);
-       complete_all(&req->r_done_completion);
+       complete_all(&req->r_completion);
        ceph_osdc_put_request(req);
 }
 
@@ -2169,7 +2162,6 @@ static void linger_commit_cb(struct ceph_osd_request *req)
        mutex_lock(&lreq->lock);
        dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
             lreq->linger_id, req->r_result);
-       WARN_ON(!__linger_registered(lreq));
        linger_reg_commit_complete(lreq, req->r_result);
        lreq->committed = true;
 
@@ -2785,31 +2777,8 @@ e_inval:
 }
 
 /*
- * We are done with @req if
- *   - @m is a safe reply, or
- *   - @m is an unsafe reply and we didn't want a safe one
- */
-static bool done_request(const struct ceph_osd_request *req,
-                        const struct MOSDOpReply *m)
-{
-       return (m->result < 0 ||
-               (m->flags & CEPH_OSD_FLAG_ONDISK) ||
-               !(req->r_flags & CEPH_OSD_FLAG_ONDISK));
-}
-
-/*
- * handle osd op reply.  either call the callback if it is specified,
- * or do the completion to wake up the waiting thread.
- *
- * ->r_unsafe_callback is set? yes                     no
- *
- * first reply is OK (needed   r_cb/r_completion,      r_cb/r_completion,
- * any or needed/got safe)     r_done_completion       r_done_completion
- *
- * first reply is unsafe       r_unsafe_cb(true)       (nothing)
- *
- * when we get the safe reply  r_unsafe_cb(false),     r_cb/r_completion,
- *                             r_done_completion       r_done_completion
+ * Handle MOSDOpReply.  Set ->r_result and call the callback if it is
+ * specified.
  */
 static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
 {
@@ -2818,7 +2787,6 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
        struct MOSDOpReply m;
        u64 tid = le64_to_cpu(msg->hdr.tid);
        u32 data_len = 0;
-       bool already_acked;
        int ret;
        int i;
 
@@ -2897,50 +2865,22 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
                       le32_to_cpu(msg->hdr.data_len), req->r_tid);
                goto fail_request;
        }
-       dout("%s req %p tid %llu acked %d result %d data_len %u\n", __func__,
-            req, req->r_tid, req->r_got_reply, m.result, data_len);
-
-       already_acked = req->r_got_reply;
-       if (!already_acked) {
-               req->r_result = m.result ?: data_len;
-               req->r_replay_version = m.replay_version; /* struct */
-               req->r_got_reply = true;
-       } else if (!(m.flags & CEPH_OSD_FLAG_ONDISK)) {
-               dout("req %p tid %llu dup ack\n", req, req->r_tid);
-               goto out_unlock_session;
-       }
-
-       if (done_request(req, &m)) {
-               finish_request(req);
-               if (req->r_linger) {
-                       WARN_ON(req->r_unsafe_callback);
-                       dout("req %p tid %llu cb (locked)\n", req, req->r_tid);
-                       __complete_request(req);
-               }
-       }
+       dout("%s req %p tid %llu result %d data_len %u\n", __func__,
+            req, req->r_tid, m.result, data_len);
 
+       /*
+        * Since we only ever request ONDISK, we should only ever get
+        * one (type of) reply back.
+        */
+       WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+       req->r_result = m.result ?: data_len;
+       finish_request(req);
        mutex_unlock(&osd->lock);
        up_read(&osdc->lock);
 
-       if (done_request(req, &m)) {
-               if (already_acked && req->r_unsafe_callback) {
-                       dout("req %p tid %llu safe-cb\n", req, req->r_tid);
-                       req->r_unsafe_callback(req, false);
-               } else if (!req->r_linger) {
-                       dout("req %p tid %llu cb\n", req, req->r_tid);
-                       __complete_request(req);
-               }
-               complete_all(&req->r_done_completion);
-               ceph_osdc_put_request(req);
-       } else {
-               if (req->r_unsafe_callback) {
-                       dout("req %p tid %llu unsafe-cb\n", req, req->r_tid);
-                       req->r_unsafe_callback(req, true);
-               } else {
-                       WARN_ON(1);
-               }
-       }
-
+       __complete_request(req);
+       complete_all(&req->r_completion);
+       ceph_osdc_put_request(req);
        return;
 
 fail_request:
@@ -3540,7 +3480,7 @@ again:
                        up_read(&osdc->lock);
                        dout("%s waiting on req %p tid %llu last_tid %llu\n",
                             __func__, req, req->r_tid, last_tid);
-                       wait_for_completion(&req->r_done_completion);
+                       wait_for_completion(&req->r_completion);
                        ceph_osdc_put_request(req);
                        goto again;
                }
@@ -3599,7 +3539,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
 
        ceph_oid_copy(&lreq->t.base_oid, oid);
        ceph_oloc_copy(&lreq->t.base_oloc, oloc);
-       lreq->t.flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+       lreq->t.flags = CEPH_OSD_FLAG_WRITE;
        lreq->mtime = CURRENT_TIME;
 
        lreq->reg_req = alloc_linger_request(lreq);
@@ -3657,7 +3597,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
 
        ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
        ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
-       req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+       req->r_flags = CEPH_OSD_FLAG_WRITE;
        req->r_mtime = CURRENT_TIME;
        osd_req_op_watch_init(req, 0, lreq->linger_id,
                              CEPH_OSD_WATCH_OP_UNWATCH);
@@ -4022,7 +3962,7 @@ EXPORT_SYMBOL(ceph_osdc_maybe_request_map);
  * Execute an OSD class method on an object.
  *
  * @flags: CEPH_OSD_FLAG_*
- * @resp_len: out param for reply length
+ * @resp_len: in/out param for reply length
  */
 int ceph_osdc_call(struct ceph_osd_client *osdc,
                   struct ceph_object_id *oid,
@@ -4035,6 +3975,9 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
        struct ceph_osd_request *req;
        int ret;
 
+       if (req_len > PAGE_SIZE || (resp_page && *resp_len > PAGE_SIZE))
+               return -E2BIG;
+
        req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
        if (!req)
                return -ENOMEM;
@@ -4053,7 +3996,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
                                                  0, false, false);
        if (resp_page)
                osd_req_op_cls_response_data_pages(req, 0, &resp_page,
-                                                  PAGE_SIZE, 0, false, false);
+                                                  *resp_len, 0, false, false);
 
        ceph_osdc_start_request(osdc, req, false);
        ret = ceph_osdc_wait_request(osdc, req);
@@ -4220,8 +4163,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
        int page_align = off & ~PAGE_MASK;
 
        req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
-                                   CEPH_OSD_OP_WRITE,
-                                   CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+                                   CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
                                    snapc, truncate_seq, truncate_size,
                                    true);
        if (IS_ERR(req))
index d2436880b3056da8342845103c23aa59c66f8066..6824c0ec8373e721ac9ca2d837f488ff22233e1f 100644 (file)
@@ -153,6 +153,32 @@ bad:
         return -EINVAL;
 }
 
+static void crush_finalize(struct crush_map *c)
+{
+       __s32 b;
+
+       /* Space for the array of pointers to per-bucket workspace */
+       c->working_size = sizeof(struct crush_work) +
+           c->max_buckets * sizeof(struct crush_work_bucket *);
+
+       for (b = 0; b < c->max_buckets; b++) {
+               if (!c->buckets[b])
+                       continue;
+
+               switch (c->buckets[b]->alg) {
+               default:
+                       /*
+                        * The base case, permutation variables and
+                        * the pointer to the permutation array.
+                        */
+                       c->working_size += sizeof(struct crush_work_bucket);
+                       break;
+               }
+               /* Every bucket has a permutation array. */
+               c->working_size += c->buckets[b]->size * sizeof(__u32);
+       }
+}
+
 static struct crush_map *crush_decode(void *pbyval, void *end)
 {
        struct crush_map *c;
@@ -246,10 +272,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
                b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
                if (b->items == NULL)
                        goto badmem;
-               b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
-               if (b->perm == NULL)
-                       goto badmem;
-               b->perm_n = 0;
 
                ceph_decode_need(p, end, b->size*sizeof(u32), bad);
                for (j = 0; j < b->size; j++)
@@ -368,6 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
        dout("crush decode tunable chooseleaf_stable = %d\n",
             c->chooseleaf_stable);
 
+       crush_finalize(c);
+
 done:
        dout("crush_decode success\n");
        return c;
@@ -719,7 +743,7 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
        map->pool_max = -1;
        map->pg_temp = RB_ROOT;
        map->primary_temp = RB_ROOT;
-       mutex_init(&map->crush_scratch_mutex);
+       mutex_init(&map->crush_workspace_mutex);
 
        return map;
 }
@@ -753,6 +777,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
        kfree(map->osd_weight);
        kfree(map->osd_addr);
        kfree(map->osd_primary_affinity);
+       kfree(map->crush_workspace);
        kfree(map);
 }
 
@@ -808,6 +833,31 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
        return 0;
 }
 
+static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
+{
+       void *workspace;
+       size_t work_size;
+
+       if (IS_ERR(crush))
+               return PTR_ERR(crush);
+
+       work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE);
+       dout("%s work_size %zu bytes\n", __func__, work_size);
+       workspace = kmalloc(work_size, GFP_NOIO);
+       if (!workspace) {
+               crush_destroy(crush);
+               return -ENOMEM;
+       }
+       crush_init_workspace(crush, workspace);
+
+       if (map->crush)
+               crush_destroy(map->crush);
+       kfree(map->crush_workspace);
+       map->crush = crush;
+       map->crush_workspace = workspace;
+       return 0;
+}
+
 #define OSDMAP_WRAPPER_COMPAT_VER      7
 #define OSDMAP_CLIENT_DATA_COMPAT_VER  1
 
@@ -1214,13 +1264,9 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 
        /* crush */
        ceph_decode_32_safe(p, end, len, e_inval);
-       map->crush = crush_decode(*p, min(*p + len, end));
-       if (IS_ERR(map->crush)) {
-               err = PTR_ERR(map->crush);
-               map->crush = NULL;
+       err = osdmap_set_crush(map, crush_decode(*p, min(*p + len, end)));
+       if (err)
                goto bad;
-       }
-       *p += len;
 
        /* ignore the rest */
        *p = end;
@@ -1375,7 +1421,6 @@ e_inval:
 struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
                                             struct ceph_osdmap *map)
 {
-       struct crush_map *newcrush = NULL;
        struct ceph_fsid fsid;
        u32 epoch = 0;
        struct ceph_timespec modified;
@@ -1414,12 +1459,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
        /* new crush? */
        ceph_decode_32_safe(p, end, len, e_inval);
        if (len > 0) {
-               newcrush = crush_decode(*p, min(*p+len, end));
-               if (IS_ERR(newcrush)) {
-                       err = PTR_ERR(newcrush);
-                       newcrush = NULL;
+               err = osdmap_set_crush(map,
+                                      crush_decode(*p, min(*p + len, end)));
+               if (err)
                        goto bad;
-               }
                *p += len;
        }
 
@@ -1439,12 +1482,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 
        map->epoch++;
        map->modified = modified;
-       if (newcrush) {
-               if (map->crush)
-                       crush_destroy(map->crush);
-               map->crush = newcrush;
-               newcrush = NULL;
-       }
 
        /* new_pools */
        err = decode_new_pools(p, end, map);
@@ -1505,8 +1542,6 @@ bad:
        print_hex_dump(KERN_DEBUG, "osdmap: ",
                       DUMP_PREFIX_OFFSET, 16, 1,
                       start, end - start, true);
-       if (newcrush)
-               crush_destroy(newcrush);
        return ERR_PTR(err);
 }
 
@@ -1942,10 +1977,10 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 
        BUG_ON(result_max > CEPH_PG_MAX_SIZE);
 
-       mutex_lock(&map->crush_scratch_mutex);
+       mutex_lock(&map->crush_workspace_mutex);
        r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-                         weight, weight_max, map->crush_scratch_ary);
-       mutex_unlock(&map->crush_scratch_mutex);
+                         weight, weight_max, map->crush_workspace);
+       mutex_unlock(&map->crush_workspace_mutex);
 
        return r;
 }
@@ -1978,8 +2013,14 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
                return;
        }
 
-       len = do_crush(osdmap, ruleno, pps, raw->osds,
-                      min_t(int, pi->size, ARRAY_SIZE(raw->osds)),
+       if (pi->size > ARRAY_SIZE(raw->osds)) {
+               pr_err_ratelimited("pool %lld ruleset %d type %d too wide: size %d > %zu\n",
+                      pi->id, pi->crush_ruleset, pi->type, pi->size,
+                      ARRAY_SIZE(raw->osds));
+               return;
+       }
+
+       len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
                       osdmap->osd_weight, osdmap->max_osd);
        if (len < 0) {
                pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
index 154683f5f14cdc6e99fa01a4e79e22e69924eef4..705414e78ae0b05d2d1b8d5d8f8e8fbb6007bfb4 100644 (file)
@@ -18,8 +18,6 @@
  * 02110-1301, USA.
  */
 
-#include <stddef.h>
-
 #include <linux/types.h>
 #include <linux/export.h>
 #include <linux/ceph/libceph.h>
index 304f2deae5f9897e60a79ed8b69d6ef208295ded..e63bf61b19be029e30ac40443c0e2edb24de4a73 100644 (file)
@@ -4883,6 +4883,39 @@ void __napi_schedule(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule);
 
+/**
+ *     napi_schedule_prep - check if napi can be scheduled
+ *     @n: napi context
+ *
+ * Test if NAPI routine is already running, and if not mark
+ * it as running.  This is used as a condition variable
+ * insure only one NAPI poll instance runs.  We also make
+ * sure there is no pending NAPI disable.
+ */
+bool napi_schedule_prep(struct napi_struct *n)
+{
+       unsigned long val, new;
+
+       do {
+               val = READ_ONCE(n->state);
+               if (unlikely(val & NAPIF_STATE_DISABLE))
+                       return false;
+               new = val | NAPIF_STATE_SCHED;
+
+               /* Sets STATE_MISSED bit if STATE_SCHED was already set
+                * This was suggested by Alexander Duyck, as compiler
+                * emits better code than :
+                * if (val & NAPIF_STATE_SCHED)
+                *     new |= NAPIF_STATE_MISSED;
+                */
+               new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
+                                                  NAPIF_STATE_MISSED;
+       } while (cmpxchg(&n->state, val, new) != val);
+
+       return !(val & NAPIF_STATE_SCHED);
+}
+EXPORT_SYMBOL(napi_schedule_prep);
+
 /**
  * __napi_schedule_irqoff - schedule for receive
  * @n: entry to schedule
@@ -4897,7 +4930,7 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
 
 bool napi_complete_done(struct napi_struct *n, int work_done)
 {
-       unsigned long flags;
+       unsigned long flags, val, new;
 
        /*
         * 1) Don't let napi dequeue from the cpu poll list
@@ -4927,7 +4960,27 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
                list_del_init(&n->poll_list);
                local_irq_restore(flags);
        }
-       WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+
+       do {
+               val = READ_ONCE(n->state);
+
+               WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
+
+               new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
+
+               /* If STATE_MISSED was set, leave STATE_SCHED set,
+                * because we will call napi->poll() one more time.
+                * This C code was suggested by Alexander Duyck to help gcc.
+                */
+               new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
+                                                   NAPIF_STATE_SCHED;
+       } while (cmpxchg(&n->state, val, new) != val);
+
+       if (unlikely(val & NAPIF_STATE_MISSED)) {
+               __napi_schedule(n);
+               return false;
+       }
+
        return true;
 }
 EXPORT_SYMBOL(napi_complete_done);
@@ -4953,6 +5006,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 {
        int rc;
 
+       /* Busy polling means there is a high chance device driver hard irq
+        * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
+        * set in napi_schedule_prep().
+        * Since we are about to call napi->poll() once more, we can safely
+        * clear NAPI_STATE_MISSED.
+        *
+        * Note: x86 could use a single "lock and ..." instruction
+        * to perform these two clear_bit()
+        */
+       clear_bit(NAPI_STATE_MISSED, &napi->state);
        clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
 
        local_bh_disable();
@@ -5088,8 +5151,13 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
        struct napi_struct *napi;
 
        napi = container_of(timer, struct napi_struct, timer);
-       if (napi->gro_list)
-               napi_schedule_irqoff(napi);
+
+       /* Note : we use a relaxed variant of napi_schedule_prep() not setting
+        * NAPI_STATE_MISSED, since we do not react to a device IRQ.
+        */
+       if (napi->gro_list && !napi_disable_pending(napi) &&
+           !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
+               __napi_schedule_irqoff(napi);
 
        return HRTIMER_NORESTART;
 }
index 8fedc2d497709b3dea9202894f45bf5cab043361..4a05d78768502df69275b4f91cb03bb2ada9f4c3 100644 (file)
@@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
        struct dccp_sock *dp = dccp_sk(sk);
        struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
        const int old_state = sk->sk_state;
+       bool acceptable;
        int queued = 0;
 
        /*
@@ -603,8 +604,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
         */
        if (sk->sk_state == DCCP_LISTEN) {
                if (dh->dccph_type == DCCP_PKT_REQUEST) {
-                       if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
-                                                                   skb) < 0)
+                       /* It is possible that we process SYN packets from backlog,
+                        * so we need to make sure to disable BH right there.
+                        */
+                       local_bh_disable();
+                       acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
+                       local_bh_enable();
+                       if (!acceptable)
                                return 1;
                        consume_skb(skb);
                        return 0;
index e0bd013a1e5ed9fc8fbd16329a877f0231e13271..eedba7670b517fc1b44cbf8b3e085fb81b6a713e 100644 (file)
@@ -279,7 +279,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
        pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
 
        if (size > mtu) {
-               pr_debug("size = %Zu, mtu = %u\n", size, mtu);
+               pr_debug("size = %zu, mtu = %u\n", size, mtu);
                err = -EMSGSIZE;
                goto out_dev;
        }
@@ -645,7 +645,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
        pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
 
        if (size > mtu) {
-               pr_debug("size = %Zu, mtu = %u\n", size, mtu);
+               pr_debug("size = %zu, mtu = %u\n", size, mtu);
                err = -EMSGSIZE;
                goto out_dev;
        }
index b39a791f6756fc831857774b984febc71e37fae3..42bfd08109dd78ab509493e8d2205d72845bb3eb 100644 (file)
@@ -622,6 +622,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
        [RTA_ENCAP]             = { .type = NLA_NESTED },
        [RTA_UID]               = { .type = NLA_U32 },
+       [RTA_MARK]              = { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
index d8cea210af0ed939a30c345fc2169a53b9098976..2f0d8233950faeac91f287644bc9476f19f74578 100644 (file)
@@ -2388,7 +2388,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 
        seq_printf(seq,
                   "Basic info: size of leaf:"
-                  " %Zd bytes, size of tnode: %Zd bytes.\n",
+                  " %zd bytes, size of tnode: %zd bytes.\n",
                   LEAF_SIZE, TNODE_SIZE(0));
 
        for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
index beacd028848c903e1d58e8d7b180d7814ca2f871..c0317c940bcdc303015f500b52198e0862440e17 100644 (file)
@@ -2596,7 +2596,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
                const char *name =  vif->dev ? vif->dev->name : "none";
 
                seq_printf(seq,
-                          "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
+                          "%2zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
                           vif - mrt->vif_table,
                           name, vif->bytes_in, vif->pkt_in,
                           vif->bytes_out, vif->pkt_out,
index fcfd071f470576281f425f7f3f145db12643e7eb..bc1486f2c0643355ddac067cb79f075cafd788d1 100644 (file)
@@ -235,7 +235,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
        }
 
        if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
-               pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n",
+               pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
                         *len, sizeof(struct sockaddr_in));
                return -EINVAL;
        }
index f6f713376e6e429610b070ba65f685cbfd6ae6ae..2f3895ddc275d2e72b2f93d816a27f41b292384a 100644 (file)
@@ -69,7 +69,7 @@ static void dump_arp_packet(struct nf_log_buf *m,
 
        ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
        if (ap == NULL) {
-               nf_log_buf_add(m, " INCOMPLETE [%Zu bytes]",
+               nf_log_buf_add(m, " INCOMPLETE [%zu bytes]",
                               skb->len - sizeof(_arph));
                return;
        }
index 2c0ff327b6dfe6919f22bf52687816e19c2c0444..39c393cc0fd3c17130cd5d8d8b37f31ad3aeafd9 100644 (file)
@@ -5886,9 +5886,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                if (th->syn) {
                        if (th->fin)
                                goto discard;
-                       if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
-                               return 1;
+                       /* It is possible that we process SYN packets from backlog,
+                        * so we need to make sure to disable BH right there.
+                        */
+                       local_bh_disable();
+                       acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+                       local_bh_enable();
 
+                       if (!acceptable)
+                               return 1;
                        consume_skb(skb);
                        return 0;
                }
index 3a2025f5bf2c333a37d18329cdec88fdc1827870..cfc485a8e1c0286ae98d723e3858171090673ddd 100644 (file)
@@ -5692,13 +5692,18 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
        struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
        struct net *net = (struct net *)ctl->extra2;
 
+       if (!rtnl_trylock())
+               return restart_syscall();
+
        ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write) {
                new_val = *((int *)ctl->data);
 
-               if (check_addr_gen_mode(new_val) < 0)
-                       return -EINVAL;
+               if (check_addr_gen_mode(new_val) < 0) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
                /* request for default */
                if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
@@ -5707,20 +5712,23 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
                /* request for individual net device */
                } else {
                        if (!idev)
-                               return ret;
+                               goto out;
 
-                       if (check_stable_privacy(idev, net, new_val) < 0)
-                               return -EINVAL;
+                       if (check_stable_privacy(idev, net, new_val) < 0) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
 
                        if (idev->cnf.addr_gen_mode != new_val) {
                                idev->cnf.addr_gen_mode = new_val;
-                               rtnl_lock();
                                addrconf_dev_config(idev->dev);
-                               rtnl_unlock();
                        }
                }
        }
 
+out:
+       rtnl_unlock();
+
        return ret;
 }
 
index 055c51b80f5dd1fa37873a665bbe3810d3f17c3e..97c724224da7b2b12e4dea38c3ff6dcf218f1a3a 100644 (file)
@@ -64,7 +64,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
        nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
 
        /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
-       nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+       nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
               ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
               (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
               ih->hop_limit,
index f54f4265b37f291ea10c8f67a45a243d2095074c..d94f1dfa54c84234dc7dc2ea1edfe29942ca66e8 100644 (file)
@@ -2891,6 +2891,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
        [RTA_ENCAP]             = { .type = NLA_NESTED },
        [RTA_EXPIRES]           = { .type = NLA_U32 },
        [RTA_UID]               = { .type = NLA_U32 },
+       [RTA_MARK]              = { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
index 1215693fdd22897b5217b878763d56d087a25372..35dbf3dc3d28314178a1ae2ab52f46ab27cb93ef 100644 (file)
@@ -51,7 +51,7 @@ irnet_ctrl_write(irnet_socket *       ap,
   char *       next;           /* Next command to process */
   int          length;         /* Length of current command */
 
-  DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count);
+  DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count);
 
   /* Check for overflow... */
   DABORT(count >= IRNET_MAX_COMMAND, -ENOMEM,
@@ -66,7 +66,7 @@ irnet_ctrl_write(irnet_socket *       ap,
 
   /* Safe terminate the string */
   command[count] = '\0';
-  DEBUG(CTRL_INFO, "Command line received is ``%s'' (%Zd).\n",
+  DEBUG(CTRL_INFO, "Command line received is ``%s'' (%zd).\n",
        command, count);
 
   /* Check every commands in the command line */
@@ -285,7 +285,7 @@ irnet_ctrl_read(irnet_socket *      ap,
   char         event[75];
   ssize_t      ret = 0;
 
-  DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count);
+  DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count);
 
 #ifdef INITIAL_DISCOVERY
   /* Check if we have read the log */
@@ -328,7 +328,7 @@ irnet_ctrl_read(irnet_socket *      ap,
   if(ret != 0)
     {
       /* No, return the error code */
-      DEXIT(CTRL_TRACE, " - ret %Zd\n", ret);
+      DEXIT(CTRL_TRACE, " - ret %zd\n", ret);
       return ret;
     }
 
@@ -568,7 +568,7 @@ dev_irnet_write(struct file *       file,
 {
   irnet_socket *       ap = file->private_data;
 
-  DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
+  DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n",
        file, ap, count);
   DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n");
 
@@ -592,7 +592,7 @@ dev_irnet_read(struct file *        file,
 {
   irnet_socket *       ap = file->private_data;
 
-  DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
+  DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n",
        file, ap, count);
   DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n");
 
index b58000efee7377011c9607b09e1d8106b3632f15..8adab6335ced9f1018318094be20c132a70f8475 100644 (file)
@@ -1058,10 +1058,10 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 
        /* Debug */
        if (session->send_seq)
-               l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %Zd bytes, ns=%u\n",
+               l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %zd bytes, ns=%u\n",
                         session->name, data_len, session->ns - 1);
        else
-               l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %Zd bytes\n",
+               l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %zd bytes\n",
                         session->name, data_len);
 
        if (session->debug & L2TP_MSG_DATA) {
index c28b0af9c1f21735915433aecd632165a4a82580..6e7b6a07b7d536a64f7e9ec296adf2bbba8d961c 100644 (file)
@@ -681,7 +681,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
                   2 + /* NULL SSID */
                   /* Channel Switch Announcement */
                   2 + sizeof(struct ieee80211_channel_sw_ie) +
-                  /* Mesh Channel Swith Parameters */
+                  /* Mesh Channel Switch Parameters */
                   2 + sizeof(struct ieee80211_mesh_chansw_params_ie) +
                   2 + 8 + /* supported rates */
                   2 + 3; /* DS params */
index 05ccd55b5d83d4737347be7c9b9ada5cbb43a89f..83b8b11f24ea1dadc0501bd8a4c15598195524a2 100644 (file)
@@ -463,9 +463,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
        unsigned long flags;
 
        spin_lock_irqsave(&local->ack_status_lock, flags);
-       skb = idr_find(&local->ack_status_frames, info->ack_frame_id);
-       if (skb)
-               idr_remove(&local->ack_status_frames, info->ack_frame_id);
+       skb = idr_remove(&local->ack_status_frames, info->ack_frame_id);
        spin_unlock_irqrestore(&local->ack_status_lock, flags);
 
        if (!skb)
index 096a45103f14cbbaccc07b7574b9fcfc7e55ff92..e6a2753dff9e91dac406e657ad0d49875f052503 100644 (file)
@@ -1429,7 +1429,7 @@ int __init ip_vs_conn_init(void)
                "(size=%d, memory=%ldKbytes)\n",
                ip_vs_conn_tab_size,
                (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
-       IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
+       IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
                  sizeof(struct ip_vs_conn));
 
        for (idx = 0; idx < ip_vs_conn_tab_size; idx++)
index 6be5c538b71e6fda1085bf01849af5d9df169895..75f798f8e83b706701787706c6fad2facad35155 100644 (file)
@@ -163,7 +163,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
                return -ENOMEM;
 
        svc->sched_data = s;
-       IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
+       IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for "
                  "current service\n",
                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 
@@ -183,7 +183,7 @@ static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
 
        /* release the table itself */
        kfree_rcu(s, rcu_head);
-       IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
+       IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n",
                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 }
 
index cccf4d637412ea076d3150ca5f80afbb6ac1d880..5824927cf8e02b7fa02f319177d96219c9427033 100644 (file)
@@ -356,7 +356,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
                return -ENOMEM;
 
        svc->sched_data = tbl;
-       IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
+       IP_VS_DBG(6, "LBLC hash table (memory=%zdbytes) allocated for "
                  "current service\n", sizeof(*tbl));
 
        /*
@@ -393,7 +393,7 @@ static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)
 
        /* release the table itself */
        kfree_rcu(tbl, rcu_head);
-       IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
+       IP_VS_DBG(6, "LBLC hash table (memory=%zdbytes) released\n",
                  sizeof(*tbl));
 }
 
index 796d70e47dddfcd7f291df239ddab8270819cbe5..703f11877beece84cb56ec62d4bd13e87c0d67c3 100644 (file)
@@ -519,7 +519,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
                return -ENOMEM;
 
        svc->sched_data = tbl;
-       IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
+       IP_VS_DBG(6, "LBLCR hash table (memory=%zdbytes) allocated for "
                  "current service\n", sizeof(*tbl));
 
        /*
@@ -556,7 +556,7 @@ static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
 
        /* release the table itself */
        kfree_rcu(tbl, rcu_head);
-       IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
+       IP_VS_DBG(6, "LBLCR hash table (memory=%zdbytes) released\n",
                  sizeof(*tbl));
 }
 
index 1e373a5e44e34cf172f3843b1fcc700ed3a57cbc..16aaac6eedc963dee56f088c8933dc962bbd27c6 100644 (file)
@@ -239,7 +239,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
                return -ENOMEM;
 
        svc->sched_data = s;
-       IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
+       IP_VS_DBG(6, "SH hash table (memory=%zdbytes) allocated for "
                  "current service\n",
                  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
 
@@ -259,7 +259,7 @@ static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
 
        /* release the table itself */
        kfree_rcu(s, rcu_head);
-       IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
+       IP_VS_DBG(6, "SH hash table (memory=%zdbytes) released\n",
                  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
 }
 
index 9350530c16c1b0e9524591945214fc8775e5a9e6..b03c28084f814a9f0b805357e5164ef7b00b0985 100644 (file)
@@ -1791,7 +1791,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
        u16 mtu, min_mtu;
 
        IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
-       IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+       IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
                  sizeof(struct ip_vs_sync_conn_v0));
 
        if (!ipvs->sync_state) {
index e19a69787d994a506ed7e237598aa2cd6c4014ef..4b2e1fb28bb438d695715fc492f52bf7809ade5d 100644 (file)
@@ -410,7 +410,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
        struct net *net = nf_ct_exp_net(expect);
        struct hlist_node *next;
        unsigned int h;
-       int ret = 1;
+       int ret = 0;
 
        if (!master_help) {
                ret = -ESHUTDOWN;
@@ -460,14 +460,14 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
 
        spin_lock_bh(&nf_conntrack_expect_lock);
        ret = __nf_ct_expect_check(expect);
-       if (ret <= 0)
+       if (ret < 0)
                goto out;
 
        nf_ct_expect_insert(expect);
 
        spin_unlock_bh(&nf_conntrack_expect_lock);
        nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
-       return ret;
+       return 0;
 out:
        spin_unlock_bh(&nf_conntrack_expect_lock);
        return ret;
index e3ed200608788bc5e8dc96a14b8674d0fbaa7d4b..4aecef4a89fb135e5b50ac382f19645aafaaff94 100644 (file)
@@ -300,7 +300,7 @@ static int find_pattern(const char *data, size_t dlen,
 {
        size_t i = plen;
 
-       pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
+       pr_debug("find_pattern `%s': dlen = %zu\n", pattern, dlen);
 
        if (dlen <= plen) {
                /* Short packet: try for partial? */
index 3b79f34b5095eff787cffa727bd286b341821a3d..de8782345c863777c8cedf95a5ccf60504e9586f 100644 (file)
@@ -48,7 +48,7 @@ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
        if (helper == NULL)
                return NF_DROP;
 
-       /* This is an user-space helper not yet configured, skip. */
+       /* This is a user-space helper not yet configured, skip. */
        if ((helper->flags &
            (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) ==
             NF_CT_HELPER_F_USERSPACE)
index c6b8022c0e47d43e11a7f2f351f3f0650c9ba604..bf548a7a71ec9b49cf308af041811d2eb5f33c8c 100644 (file)
@@ -528,6 +528,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
                if (!nft_ct_tmpl_alloc_pcpu())
                        return -ENOMEM;
                nft_ct_pcpu_template_refcnt++;
+               len = sizeof(u16);
                break;
 #endif
        default:
index 97f9649bcc7e8b26969e29dab5b00ad28d5db6c3..152d226552c174929fd8973f023eaac888e4b0a9 100644 (file)
@@ -258,7 +258,7 @@ static int nft_bitmap_init(const struct nft_set *set,
 {
        struct nft_bitmap *priv = nft_set_priv(set);
 
-       priv->bitmap_size = nft_bitmap_total_size(set->klen);
+       priv->bitmap_size = nft_bitmap_size(set->klen);
 
        return 0;
 }
index 016db6be94b996c797c9c4de0fd324c737706393..14857afc9937d30cae604fe839029593c1006944 100644 (file)
@@ -667,7 +667,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems,
            COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
                return -EINVAL;
 
-       /* compat_xt_entry match has less strict aligment requirements,
+       /* compat_xt_entry match has less strict alignment requirements,
         * otherwise they are identical.  In case of padding differences
         * we need to add compat version of xt_check_entry_match.
         */
index 91fe46f1e4ccf018a554c149a5ce3e804dc9991a..0f557b2433112190d21c57ff8078dd4a8d719c0a 100644 (file)
@@ -45,8 +45,8 @@
 #include "ib.h"
 #include "ib_mr.h"
 
-unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
-unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
+static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
+static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
 unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
 
 module_param(rds_ib_mr_1m_pool_size, int, 0444);
index 540458928f3c8eab4529b4072826b882563c941e..ec550626e221cb80fa5aaec7a00116dc3bd55c5d 100644 (file)
@@ -136,7 +136,7 @@ struct rds_ib_connection {
        struct rds_ib_work_ring i_send_ring;
        struct rm_data_op       *i_data_op;
        struct rds_header       *i_send_hdrs;
-       u64                     i_send_hdrs_dma;
+       dma_addr_t              i_send_hdrs_dma;
        struct rds_ib_send_work *i_sends;
        atomic_t                i_signaled_sends;
 
@@ -146,7 +146,7 @@ struct rds_ib_connection {
        struct rds_ib_incoming  *i_ibinc;
        u32                     i_recv_data_rem;
        struct rds_header       *i_recv_hdrs;
-       u64                     i_recv_hdrs_dma;
+       dma_addr_t              i_recv_hdrs_dma;
        struct rds_ib_recv_work *i_recvs;
        u64                     i_ack_recv;     /* last ACK received */
        struct rds_ib_refill_cache i_cache_incs;
@@ -164,7 +164,7 @@ struct rds_ib_connection {
        struct rds_header       *i_ack;
        struct ib_send_wr       i_ack_wr;
        struct ib_sge           i_ack_sge;
-       u64                     i_ack_dma;
+       dma_addr_t              i_ack_dma;
        unsigned long           i_ack_queued;
 
        /* Flow control related information
@@ -235,7 +235,7 @@ struct rds_ib_device {
        int                     *vector_load;
 };
 
-#define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device)
+#define ibdev_to_node(ibdev) dev_to_node((ibdev)->dev.parent)
 #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
 
 /* bits for i_ack_flags */
index 1c754f4acbe5dade44b2a6ad09ec5c534345815c..5d6e98a79a5e4b3de1f472c5fc513fce545bf6f9 100644 (file)
@@ -45,7 +45,6 @@
 
 struct rds_ib_fmr {
        struct ib_fmr           *fmr;
-       u64                     *dma;
 };
 
 enum rds_ib_fr_state {
@@ -108,8 +107,6 @@ struct rds_ib_mr_pool {
 };
 
 extern struct workqueue_struct *rds_ib_mr_wq;
-extern unsigned int rds_ib_mr_1m_pool_size;
-extern unsigned int rds_ib_mr_8k_pool_size;
 extern bool prefer_frmr;
 
 struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
index 199b46e93e64ee7786e8a8d441ba5eb5b02bf31f..7fb59c3f1542af319b882399b4a0f563dc0b8a0d 100644 (file)
@@ -290,10 +290,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
        cp.exclusive            = false;
        cp.service_id           = srx->srx_service;
        call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+       /* The socket has been unlocked. */
        if (!IS_ERR(call))
                call->notify_rx = notify_rx;
 
-       release_sock(&rx->sk);
+       mutex_unlock(&call->user_mutex);
        _leave(" = %p", call);
        return call;
 }
@@ -310,7 +311,10 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call);
 void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 {
        _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+
+       mutex_lock(&call->user_mutex);
        rxrpc_release_call(rxrpc_sk(sock->sk), call);
+       mutex_unlock(&call->user_mutex);
        rxrpc_put_call(call, rxrpc_call_put_kernel);
 }
 EXPORT_SYMBOL(rxrpc_kernel_end_call);
@@ -450,14 +454,16 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
        case RXRPC_SERVER_BOUND:
        case RXRPC_SERVER_LISTENING:
                ret = rxrpc_do_sendmsg(rx, m, len);
-               break;
+               /* The socket has been unlocked */
+               goto out;
        default:
                ret = -EINVAL;
-               break;
+               goto error_unlock;
        }
 
 error_unlock:
        release_sock(&rx->sk);
+out:
        _leave(" = %d", ret);
        return ret;
 }
index 12be432be9b2feb5dc9a85716c676888599b5624..26a7b1db1361e554733b0ff40a54d8e68e59af09 100644 (file)
@@ -467,6 +467,7 @@ struct rxrpc_call {
        struct rxrpc_connection *conn;          /* connection carrying call */
        struct rxrpc_peer       *peer;          /* Peer record for remote address */
        struct rxrpc_sock __rcu *socket;        /* socket responsible */
+       struct mutex            user_mutex;     /* User access mutex */
        ktime_t                 ack_at;         /* When deferred ACK needs to happen */
        ktime_t                 resend_at;      /* When next resend needs to happen */
        ktime_t                 ping_at;        /* When next to send a ping */
index 7c4c64ab8da2e241d63ee16a0ae3521c98dc2e5c..0ed181f53f32a0145c03b0006b92de5c7a0101aa 100644 (file)
@@ -323,6 +323,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
  *
  * If we want to report an error, we mark the skb with the packet type and
  * abort code and return NULL.
+ *
+ * The call is returned with the user access mutex held.
  */
 struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
                                           struct rxrpc_connection *conn,
@@ -371,6 +373,18 @@ found_service:
        trace_rxrpc_receive(call, rxrpc_receive_incoming,
                            sp->hdr.serial, sp->hdr.seq);
 
+       /* Lock the call to prevent rxrpc_kernel_send/recv_data() and
+        * sendmsg()/recvmsg() inconveniently stealing the mutex once the
+        * notification is generated.
+        *
+        * The BUG should never happen because the kernel should be well
+        * behaved enough not to access the call before the first notification
+        * event and userspace is prevented from doing so until the state is
+        * appropriate.
+        */
+       if (!mutex_trylock(&call->user_mutex))
+               BUG();
+
        /* Make the call live. */
        rxrpc_incoming_call(rx, call, skb);
        conn = call->conn;
@@ -429,10 +443,12 @@ out:
 /*
  * handle acceptance of a call by userspace
  * - assign the user call ID to the call at the front of the queue
+ * - called with the socket locked.
  */
 struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
                                     unsigned long user_call_ID,
                                     rxrpc_notify_rx_t notify_rx)
+       __releases(&rx->sk.sk_lock.slock)
 {
        struct rxrpc_call *call;
        struct rb_node *parent, **pp;
@@ -446,6 +462,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 
        if (list_empty(&rx->to_be_accepted)) {
                write_unlock(&rx->call_lock);
+               release_sock(&rx->sk);
                kleave(" = -ENODATA [empty]");
                return ERR_PTR(-ENODATA);
        }
@@ -470,10 +487,39 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
         */
        call = list_entry(rx->to_be_accepted.next,
                          struct rxrpc_call, accept_link);
+       write_unlock(&rx->call_lock);
+
+       /* We need to gain the mutex from the interrupt handler without
+        * upsetting lockdep, so we have to release it there and take it here.
+        * We are, however, still holding the socket lock, so other accepts
+        * must wait for us and no one can add the user ID behind our backs.
+        */
+       if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+               release_sock(&rx->sk);
+               kleave(" = -ERESTARTSYS");
+               return ERR_PTR(-ERESTARTSYS);
+       }
+
+       write_lock(&rx->call_lock);
        list_del_init(&call->accept_link);
        sk_acceptq_removed(&rx->sk);
        rxrpc_see_call(call);
 
+       /* Find the user ID insertion point. */
+       pp = &rx->calls.rb_node;
+       parent = NULL;
+       while (*pp) {
+               parent = *pp;
+               call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+               if (user_call_ID < call->user_call_ID)
+                       pp = &(*pp)->rb_left;
+               else if (user_call_ID > call->user_call_ID)
+                       pp = &(*pp)->rb_right;
+               else
+                       BUG();
+       }
+
        write_lock_bh(&call->state_lock);
        switch (call->state) {
        case RXRPC_CALL_SERVER_ACCEPTING:
@@ -499,6 +545,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
        write_unlock(&rx->call_lock);
        rxrpc_notify_socket(call);
        rxrpc_service_prealloc(rx, GFP_KERNEL);
+       release_sock(&rx->sk);
        _leave(" = %p{%d}", call, call->debug_id);
        return call;
 
@@ -515,6 +562,7 @@ id_in_use:
        write_unlock(&rx->call_lock);
 out:
        rxrpc_service_prealloc(rx, GFP_KERNEL);
+       release_sock(&rx->sk);
        _leave(" = %d", ret);
        return ERR_PTR(ret);
 }
index 8b94db3c9b2ecb5f093798eeae0e8630ac0114ab..d79cd36987a95b86f2af9fac4688ab86e20f41d5 100644 (file)
@@ -115,6 +115,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
        if (!call->rxtx_annotations)
                goto nomem_2;
 
+       mutex_init(&call->user_mutex);
        setup_timer(&call->timer, rxrpc_call_timer_expired,
                    (unsigned long)call);
        INIT_WORK(&call->processor, &rxrpc_process_call);
@@ -194,14 +195,16 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
 }
 
 /*
- * set up a call for the given data
- * - called in process context with IRQs enabled
+ * Set up a call for the given parameters.
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
  */
 struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
                                         struct rxrpc_conn_parameters *cp,
                                         struct sockaddr_rxrpc *srx,
                                         unsigned long user_call_ID,
                                         gfp_t gfp)
+       __releases(&rx->sk.sk_lock.slock)
 {
        struct rxrpc_call *call, *xcall;
        struct rb_node *parent, **pp;
@@ -212,6 +215,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 
        call = rxrpc_alloc_client_call(srx, gfp);
        if (IS_ERR(call)) {
+               release_sock(&rx->sk);
                _leave(" = %ld", PTR_ERR(call));
                return call;
        }
@@ -219,6 +223,11 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
        trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
                         here, (const void *)user_call_ID);
 
+       /* We need to protect a partially set up call against the user as we
+        * will be acting outside the socket lock.
+        */
+       mutex_lock(&call->user_mutex);
+
        /* Publish the call, even though it is incompletely set up as yet */
        write_lock(&rx->call_lock);
 
@@ -250,6 +259,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
        list_add_tail(&call->link, &rxrpc_calls);
        write_unlock(&rxrpc_call_lock);
 
+       /* From this point on, the call is protected by its own lock. */
+       release_sock(&rx->sk);
+
        /* Set up or get a connection record and set the protocol parameters,
         * including channel number and call ID.
         */
@@ -279,6 +291,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
         */
 error_dup_user_ID:
        write_unlock(&rx->call_lock);
+       release_sock(&rx->sk);
        ret = -EEXIST;
 
 error:
@@ -287,6 +300,7 @@ error:
        trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
                         here, ERR_PTR(ret));
        rxrpc_release_call(rx, call);
+       mutex_unlock(&call->user_mutex);
        rxrpc_put_call(call, rxrpc_call_put);
        _leave(" = %d", ret);
        return ERR_PTR(ret);
index 78ec33477adf6c516fc26fd3c4991280164a6666..9f4cfa25af7c92c406e81d8003b8aa07c7892a04 100644 (file)
@@ -1194,6 +1194,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
                        goto reject_packet;
                }
                rxrpc_send_ping(call, skb, skew);
+               mutex_unlock(&call->user_mutex);
        }
 
        rxrpc_input_call_packet(call, skb, skew);
index f3a688e108430a9e9d32e822e54df0700940aacf..22447dbcc380211f1bd1eb84ad9fc2b48137ff0c 100644 (file)
@@ -487,6 +487,20 @@ try_again:
 
        trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0);
 
+       /* We're going to drop the socket lock, so we need to lock the call
+        * against interference by sendmsg.
+        */
+       if (!mutex_trylock(&call->user_mutex)) {
+               ret = -EWOULDBLOCK;
+               if (flags & MSG_DONTWAIT)
+                       goto error_requeue_call;
+               ret = -ERESTARTSYS;
+               if (mutex_lock_interruptible(&call->user_mutex) < 0)
+                       goto error_requeue_call;
+       }
+
+       release_sock(&rx->sk);
+
        if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
                BUG();
 
@@ -502,7 +516,7 @@ try_again:
                                       &call->user_call_ID);
                }
                if (ret < 0)
-                       goto error;
+                       goto error_unlock_call;
        }
 
        if (msg->msg_name) {
@@ -533,12 +547,12 @@ try_again:
        }
 
        if (ret < 0)
-               goto error;
+               goto error_unlock_call;
 
        if (call->state == RXRPC_CALL_COMPLETE) {
                ret = rxrpc_recvmsg_term(call, msg);
                if (ret < 0)
-                       goto error;
+                       goto error_unlock_call;
                if (!(flags & MSG_PEEK))
                        rxrpc_release_call(rx, call);
                msg->msg_flags |= MSG_EOR;
@@ -551,8 +565,21 @@ try_again:
                msg->msg_flags &= ~MSG_MORE;
        ret = copied;
 
-error:
+error_unlock_call:
+       mutex_unlock(&call->user_mutex);
        rxrpc_put_call(call, rxrpc_call_put);
+       trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
+       return ret;
+
+error_requeue_call:
+       if (!(flags & MSG_PEEK)) {
+               write_lock_bh(&rx->recvmsg_lock);
+               list_add(&call->recvmsg_link, &rx->recvmsg_q);
+               write_unlock_bh(&rx->recvmsg_lock);
+               trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0);
+       } else {
+               rxrpc_put_call(call, rxrpc_call_put);
+       }
 error_no_call:
        release_sock(&rx->sk);
        trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
@@ -609,7 +636,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
        iov.iov_len = size - *_offset;
        iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
 
-       lock_sock(sock->sk);
+       mutex_lock(&call->user_mutex);
 
        switch (call->state) {
        case RXRPC_CALL_CLIENT_RECV_REPLY:
@@ -648,7 +675,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 read_phase_complete:
        ret = 1;
 out:
-       release_sock(sock->sk);
+       mutex_unlock(&call->user_mutex);
        _leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
        return ret;
 
index 0a6ef217aa8ada693f570ae03e9bede1e261e687..31c1538c1a8de69b8afe4335b9e5b263700ce980 100644 (file)
@@ -59,9 +59,12 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
                }
 
                trace_rxrpc_transmit(call, rxrpc_transmit_wait);
-               release_sock(&rx->sk);
+               mutex_unlock(&call->user_mutex);
                *timeo = schedule_timeout(*timeo);
-               lock_sock(&rx->sk);
+               if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+                       ret = sock_intr_errno(*timeo);
+                       break;
+               }
        }
 
        remove_wait_queue(&call->waitq, &myself);
@@ -171,7 +174,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 /*
  * send data through a socket
  * - must be called in process context
- * - caller holds the socket locked
+ * - The caller holds the call user access mutex, but not the socket lock.
  */
 static int rxrpc_send_data(struct rxrpc_sock *rx,
                           struct rxrpc_call *call,
@@ -437,10 +440,13 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
 
 /*
  * Create a new client call for sendmsg().
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
  */
 static struct rxrpc_call *
 rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
                                  unsigned long user_call_ID, bool exclusive)
+       __releases(&rx->sk.sk_lock.slock)
 {
        struct rxrpc_conn_parameters cp;
        struct rxrpc_call *call;
@@ -450,8 +456,10 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 
        _enter("");
 
-       if (!msg->msg_name)
+       if (!msg->msg_name) {
+               release_sock(&rx->sk);
                return ERR_PTR(-EDESTADDRREQ);
+       }
 
        key = rx->key;
        if (key && !rx->key->payload.data[0])
@@ -464,6 +472,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
        cp.exclusive            = rx->exclusive | exclusive;
        cp.service_id           = srx->srx_service;
        call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+       /* The socket is now unlocked */
 
        _leave(" = %p\n", call);
        return call;
@@ -475,6 +484,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
  * - the socket may be either a client socket or a server socket
  */
 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+       __releases(&rx->sk.sk_lock.slock)
 {
        enum rxrpc_command cmd;
        struct rxrpc_call *call;
@@ -488,12 +498,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
        ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
                                 &exclusive);
        if (ret < 0)
-               return ret;
+               goto error_release_sock;
 
        if (cmd == RXRPC_CMD_ACCEPT) {
+               ret = -EINVAL;
                if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
-                       return -EINVAL;
+                       goto error_release_sock;
                call = rxrpc_accept_call(rx, user_call_ID, NULL);
+               /* The socket is now unlocked. */
                if (IS_ERR(call))
                        return PTR_ERR(call);
                rxrpc_put_call(call, rxrpc_call_put);
@@ -502,12 +514,29 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 
        call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
        if (!call) {
+               ret = -EBADSLT;
                if (cmd != RXRPC_CMD_SEND_DATA)
-                       return -EBADSLT;
+                       goto error_release_sock;
+               ret = -EBUSY;
+               if (call->state == RXRPC_CALL_UNINITIALISED ||
+                   call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
+                   call->state == RXRPC_CALL_SERVER_PREALLOC ||
+                   call->state == RXRPC_CALL_SERVER_SECURING ||
+                   call->state == RXRPC_CALL_SERVER_ACCEPTING)
+                       goto error_release_sock;
                call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
                                                         exclusive);
+               /* The socket is now unlocked... */
                if (IS_ERR(call))
                        return PTR_ERR(call);
+               /* ... and we have the call lock. */
+       } else {
+               ret = mutex_lock_interruptible(&call->user_mutex);
+               release_sock(&rx->sk);
+               if (ret < 0) {
+                       ret = -ERESTARTSYS;
+                       goto error_put;
+               }
        }
 
        _debug("CALL %d USR %lx ST %d on CONN %p",
@@ -535,9 +564,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
                ret = rxrpc_send_data(rx, call, msg, len);
        }
 
+       mutex_unlock(&call->user_mutex);
+error_put:
        rxrpc_put_call(call, rxrpc_call_put);
        _leave(" = %d", ret);
        return ret;
+
+error_release_sock:
+       release_sock(&rx->sk);
+       return ret;
 }
 
 /**
@@ -562,7 +597,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
        ASSERTCMP(msg->msg_name, ==, NULL);
        ASSERTCMP(msg->msg_control, ==, NULL);
 
-       lock_sock(sock->sk);
+       mutex_lock(&call->user_mutex);
 
        _debug("CALL %d USR %lx ST %d on CONN %p",
               call->debug_id, call->user_call_ID, call->state, call->conn);
@@ -577,7 +612,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
                ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
        }
 
-       release_sock(sock->sk);
+       mutex_unlock(&call->user_mutex);
        _leave(" = %d", ret);
        return ret;
 }
@@ -598,12 +633,12 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
 {
        _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
 
-       lock_sock(sock->sk);
+       mutex_lock(&call->user_mutex);
 
        if (rxrpc_abort_call(why, call, 0, abort_code, error))
                rxrpc_send_abort_packet(call);
 
-       release_sock(sock->sk);
+       mutex_unlock(&call->user_mutex);
        _leave("");
 }
 
index fc458968fe4bd818dc9b6475ffa0db07b830d43a..2a28ab20487f03f61ed8d74cb511bce2973ce242 100644 (file)
@@ -884,14 +884,17 @@ int sctp_hash_transport(struct sctp_transport *t)
        arg.paddr = &t->ipaddr;
        arg.lport = htons(t->asoc->base.bind_addr.port);
 
+       rcu_read_lock();
        list = rhltable_lookup(&sctp_transport_hashtable, &arg,
                               sctp_hash_params);
 
        rhl_for_each_entry_rcu(transport, tmp, list, node)
                if (transport->asoc->ep == t->asoc->ep) {
+                       rcu_read_unlock();
                        err = -EEXIST;
                        goto out;
                }
+       rcu_read_unlock();
 
        err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
                                  &t->node, sctp_hash_params);
index 85406d5f8f41e739738c51988e4a093f1859b000..71ce6b945dcb54d831425bdb02e315a14dae69ef 100644 (file)
@@ -177,7 +177,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 {
        sctp_xmit_t retval;
 
-       pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__,
+       pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__,
                 packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
 
        switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
index 5b63ceb3bf3758f441a3240d7b516c5e543bfc98..3379668af3686de2ec14db980b1ef527a6d1045f 100644 (file)
@@ -643,9 +643,7 @@ void sctp_transport_reset(struct sctp_transport *t)
        t->srtt = 0;
        t->rttvar = 0;
 
-       /* Reset these additional varibles so that we have a clean
-        * slate.
-        */
+       /* Reset these additional variables so that we have a clean slate. */
        t->partial_bytes_acked = 0;
        t->flight_size = 0;
        t->error_count = 0;
index cdeb1d81483350549fc558c0331ee2aa45a5c9c2..4f16953e495436d9f97ac8471650e11224fe0744 100644 (file)
@@ -763,7 +763,7 @@ err_put_ctx:
 err:
        kfree(buf);
 out:
-       dprintk("RPC:       %s returning %Zd\n", __func__, err);
+       dprintk("RPC:       %s returning %zd\n", __func__, err);
        return err;
 }
 
index 1530825985221a1aeb5f77ee81f4251acdef9d96..a54a7a3d28f5300e7940769b1b3bc0b5daa7cfbb 100644 (file)
@@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
        case RPC_GSS_PROC_DESTROY:
                if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
                        goto auth_err;
-               rsci->h.expiry_time = seconds_since_boot();
-               set_bit(CACHE_NEGATIVE, &rsci->h.flags);
+               /* Delete the entry from the cache_list and call cache_put */
+               sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
                if (resv->iov_len + 4 > PAGE_SIZE)
                        goto drop;
                svc_putnl(resv, RPC_SUCCESS);
index f39e3e11f9aa283698ced6a8ca92fed5f68140e5..d8639da06d9cd4815a407ef4dec4340bee68caf4 100644 (file)
@@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
        cache_purge(cd);
        spin_lock(&cache_list_lock);
        write_lock(&cd->hash_lock);
-       if (cd->entries) {
-               write_unlock(&cd->hash_lock);
-               spin_unlock(&cache_list_lock);
-               goto out;
-       }
        if (current_detail == cd)
                current_detail = NULL;
        list_del_init(&cd->others);
@@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
                /* module must be being unloaded so its safe to kill the worker */
                cancel_delayed_work_sync(&cache_cleaner);
        }
-       return;
-out:
-       printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
 }
 EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
 
@@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush);
 
 void cache_purge(struct cache_detail *detail)
 {
-       time_t now = seconds_since_boot();
-       if (detail->flush_time >= now)
-               now = detail->flush_time + 1;
-       /* 'now' is the maximum value any 'last_refresh' can have */
-       detail->flush_time = now;
-       detail->nextcheck = seconds_since_boot();
-       cache_flush();
+       struct cache_head *ch = NULL;
+       struct hlist_head *head = NULL;
+       struct hlist_node *tmp = NULL;
+       int i = 0;
+
+       write_lock(&detail->hash_lock);
+       if (!detail->entries) {
+               write_unlock(&detail->hash_lock);
+               return;
+       }
+
+       dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
+       for (i = 0; i < detail->hash_size; i++) {
+               head = &detail->hash_table[i];
+               hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
+                       hlist_del_init(&ch->cache_list);
+                       detail->entries--;
+
+                       set_bit(CACHE_CLEANED, &ch->flags);
+                       write_unlock(&detail->hash_lock);
+                       cache_fresh_unlocked(ch, detail);
+                       cache_put(ch, detail);
+                       write_lock(&detail->hash_lock);
+               }
+       }
+       write_unlock(&detail->hash_lock);
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
@@ -1855,3 +1866,15 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
 
+void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
+{
+       write_lock(&cd->hash_lock);
+       if (!hlist_unhashed(&h->cache_list)){
+               hlist_del_init(&h->cache_list);
+               cd->entries--;
+               write_unlock(&cd->hash_lock);
+               cache_put(h, cd);
+       } else
+               write_unlock(&cd->hash_lock);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
index 75f290bddca1bbfd5c8a9db9bab1ac3d231f9ada..b94efd93d3e498a94bec4fee5eec8b9748052bdb 100644 (file)
@@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
                for (i = 0; i < progp->pg_nvers; i++) {
                        if (progp->pg_vers[i] == NULL)
                                continue;
-                       if (progp->pg_vers[i]->vs_hidden == 0)
+                       if (!progp->pg_vers[i]->vs_hidden)
                                return 1;
                }
        }
@@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net,
                        if (vers->vs_hidden)
                                continue;
 
+                       /*
+                        * Don't register a UDP port if we need congestion
+                        * control.
+                        */
+                       if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
+                               continue;
+
                        error = __svc_register(net, progp->pg_name, progp->pg_prog,
                                                i, family, proto, port);
 
@@ -1169,6 +1176,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
          !(versp = progp->pg_vers[vers]))
                goto err_bad_vers;
 
+       /*
+        * Some protocol versions (namely NFSv4) require some form of
+        * congestion control.  (See RFC 7530 section 3.1 paragraph 2)
+        * In other words, UDP is not allowed. We mark those when setting
+        * up the svc_xprt, and verify that here.
+        *
+        * The spec is not very clear about what error should be returned
+        * when someone tries to access a server that is listening on UDP
+        * for lower versions. RPC_PROG_MISMATCH seems to be the closest
+        * fit.
+        */
+       if (versp->vs_need_cong_ctrl &&
+           !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
+               goto err_bad_vers;
+
        procp = versp->vs_proc + proc;
        if (proc >= versp->vs_nproc || !procp->pc_func)
                goto err_bad_proc;
@@ -1260,7 +1282,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
        return 0;
 
 err_short_len:
-       svc_printk(rqstp, "short len %Zd, dropping request\n",
+       svc_printk(rqstp, "short len %zd, dropping request\n",
                        argv->iov_len);
        goto close;
 
index de066acdb34e322fd0ffa78da3e65d380c2f4fc2..8931e33b65412d7b8bbe8b3872e5f7d7b27d92d5 100644 (file)
@@ -278,7 +278,7 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
                               rqstp->rq_respages[0], tailoff);
 
 out:
-       dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
+       dprintk("svc: socket %p sendto([%p %zu... ], %d) = %d (addr %s)\n",
                svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
                xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
 
@@ -346,7 +346,7 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
        if (len == buflen)
                set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
-       dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
+       dprintk("svc: socket %p recvfrom(%p, %zu) = %d\n",
                svsk, iov[0].iov_base, iov[0].iov_len, len);
        return len;
 }
@@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
        svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
                      &svsk->sk_xprt, serv);
        set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
+       set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
        if (sk->sk_state == TCP_LISTEN) {
                dprintk("setting up TCP socket for listening\n");
                set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
index cb1e48e54eb1440181976a352229783f202f896d..ff1df40f0d261bc956f1af3410d8780f4c582b83 100644 (file)
@@ -201,19 +201,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
 {
        struct rpc_xprt *xprt = rqst->rq_xprt;
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-       struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+       __be32 *p;
        int rc;
 
        /* Space in the send buffer for an RPC/RDMA header is reserved
         * via xprt->tsh_size.
         */
-       headerp->rm_xid = rqst->rq_xid;
-       headerp->rm_vers = rpcrdma_version;
-       headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
-       headerp->rm_type = rdma_msg;
-       headerp->rm_body.rm_chunks[0] = xdr_zero;
-       headerp->rm_body.rm_chunks[1] = xdr_zero;
-       headerp->rm_body.rm_chunks[2] = xdr_zero;
+       p = rqst->rq_buffer;
+       *p++ = rqst->rq_xid;
+       *p++ = rpcrdma_version;
+       *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+       *p++ = rdma_msg;
+       *p++ = xdr_zero;
+       *p++ = xdr_zero;
+       *p   = xdr_zero;
 
 #ifdef SVCRDMA_BACKCHANNEL_DEBUG
        pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
index 0ba9887f3e22bab9a1e3e809df5c4e2c23a510fe..1c4aabf0f65772c13265421262feb030ab4a58ca 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2016 Oracle. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
 
 #define RPCDBG_FACILITY        RPCDBG_SVCXPRT
 
-/*
- * Decodes a read chunk list. The expected format is as follows:
- *    descrim  : xdr_one
- *    position : __be32 offset into XDR stream
- *    handle   : __be32 RKEY
- *    . . .
- *  end-of-list: xdr_zero
- */
-static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
 {
-       struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
+       __be32 *next;
 
-       while (ch->rc_discrim != xdr_zero) {
-               if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
-                   (unsigned long)vaend) {
-                       dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
+       while (*p++ != xdr_zero) {
+               next = p + rpcrdma_readchunk_maxsz - 1;
+               if (next > end)
                        return NULL;
-               }
-               ch++;
+               p = next;
        }
-       return &ch->rc_position;
+       return p;
 }
 
-/*
- * Decodes a write chunk list. The expected format is as follows:
- *    descrim  : xdr_one
- *    nchunks  : <count>
- *       handle   : __be32 RKEY           ---+
- *       length   : __be32 <len of segment>  |
- *       offset   : remove va                + <count>
- *       . . .                               |
- *                                        ---+
- */
-static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
 {
-       unsigned long start, end;
-       int nchunks;
-
-       struct rpcrdma_write_array *ary =
-               (struct rpcrdma_write_array *)va;
+       __be32 *next;
 
-       /* Check for not write-array */
-       if (ary->wc_discrim == xdr_zero)
-               return &ary->wc_nchunks;
-
-       if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
-           (unsigned long)vaend) {
-               dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
-               return NULL;
-       }
-       nchunks = be32_to_cpu(ary->wc_nchunks);
-
-       start = (unsigned long)&ary->wc_array[0];
-       end = (unsigned long)vaend;
-       if (nchunks < 0 ||
-           nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
-           (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
-               dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
-                       ary, nchunks, vaend);
-               return NULL;
+       while (*p++ != xdr_zero) {
+               next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+               if (next > end)
+                       return NULL;
+               p = next;
        }
-       /*
-        * rs_length is the 2nd 4B field in wc_target and taking its
-        * address skips the list terminator
-        */
-       return &ary->wc_array[nchunks].wc_target.rs_length;
+       return p;
 }
 
-static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
 {
-       unsigned long start, end;
-       int nchunks;
-       struct rpcrdma_write_array *ary =
-               (struct rpcrdma_write_array *)va;
-
-       /* Check for no reply-array */
-       if (ary->wc_discrim == xdr_zero)
-               return &ary->wc_nchunks;
-
-       if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
-           (unsigned long)vaend) {
-               dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
-               return NULL;
-       }
-       nchunks = be32_to_cpu(ary->wc_nchunks);
-
-       start = (unsigned long)&ary->wc_array[0];
-       end = (unsigned long)vaend;
-       if (nchunks < 0 ||
-           nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
-           (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
-               dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
-                       ary, nchunks, vaend);
-               return NULL;
+       __be32 *next;
+
+       if (*p++ != xdr_zero) {
+               next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+               if (next > end)
+                       return NULL;
+               p = next;
        }
-       return (__be32 *)&ary->wc_array[nchunks];
+       return p;
 }
 
 /**
@@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
  */
 int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
 {
-       struct rpcrdma_msg *rmsgp;
-       __be32 *va, *vaend;
-       unsigned int len;
-       u32 hdr_len;
+       __be32 *p, *end, *rdma_argp;
+       unsigned int hdr_len;
 
        /* Verify that there's enough bytes for header + something */
-       if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) {
-               dprintk("svcrdma: header too short = %d\n",
-                       rq_arg->len);
-               return -EINVAL;
-       }
+       if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
+               goto out_short;
 
-       rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base;
-       if (rmsgp->rm_vers != rpcrdma_version) {
-               dprintk("%s: bad version %u\n", __func__,
-                       be32_to_cpu(rmsgp->rm_vers));
-               return -EPROTONOSUPPORT;
-       }
+       rdma_argp = rq_arg->head[0].iov_base;
+       if (*(rdma_argp + 1) != rpcrdma_version)
+               goto out_version;
 
-       switch (be32_to_cpu(rmsgp->rm_type)) {
-       case RDMA_MSG:
-       case RDMA_NOMSG:
+       switch (*(rdma_argp + 3)) {
+       case rdma_msg:
+       case rdma_nomsg:
                break;
 
-       case RDMA_DONE:
-               /* Just drop it */
-               dprintk("svcrdma: dropping RDMA_DONE message\n");
-               return 0;
-
-       case RDMA_ERROR:
-               /* Possible if this is a backchannel reply.
-                * XXX: We should cancel this XID, though.
-                */
-               dprintk("svcrdma: dropping RDMA_ERROR message\n");
-               return 0;
-
-       case RDMA_MSGP:
-               /* Pull in the extra for the padded case, bump our pointer */
-               rmsgp->rm_body.rm_padded.rm_align =
-                       be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
-               rmsgp->rm_body.rm_padded.rm_thresh =
-                       be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
-
-               va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
-               rq_arg->head[0].iov_base = va;
-               len = (u32)((unsigned long)va - (unsigned long)rmsgp);
-               rq_arg->head[0].iov_len -= len;
-               if (len > rq_arg->len)
-                       return -EINVAL;
-               return len;
-       default:
-               dprintk("svcrdma: bad rdma procedure (%u)\n",
-                       be32_to_cpu(rmsgp->rm_type));
-               return -EINVAL;
-       }
+       case rdma_done:
+               goto out_drop;
 
-       /* The chunk list may contain either a read chunk list or a write
-        * chunk list and a reply chunk list.
-        */
-       va = &rmsgp->rm_body.rm_chunks[0];
-       vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len);
-       va = decode_read_list(va, vaend);
-       if (!va) {
-               dprintk("svcrdma: failed to decode read list\n");
-               return -EINVAL;
-       }
-       va = decode_write_list(va, vaend);
-       if (!va) {
-               dprintk("svcrdma: failed to decode write list\n");
-               return -EINVAL;
-       }
-       va = decode_reply_array(va, vaend);
-       if (!va) {
-               dprintk("svcrdma: failed to decode reply chunk\n");
-               return -EINVAL;
+       case rdma_error:
+               goto out_drop;
+
+       default:
+               goto out_proc;
        }
 
-       rq_arg->head[0].iov_base = va;
-       hdr_len = (unsigned long)va - (unsigned long)rmsgp;
+       end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
+       p = xdr_check_read_list(rdma_argp + 4, end);
+       if (!p)
+               goto out_inval;
+       p = xdr_check_write_list(p, end);
+       if (!p)
+               goto out_inval;
+       p = xdr_check_reply_chunk(p, end);
+       if (!p)
+               goto out_inval;
+       if (p > end)
+               goto out_inval;
+
+       rq_arg->head[0].iov_base = p;
+       hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
        rq_arg->head[0].iov_len -= hdr_len;
        return hdr_len;
+
+out_short:
+       dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+       return -EINVAL;
+
+out_version:
+       dprintk("svcrdma: bad xprt version: %u\n",
+               be32_to_cpup(rdma_argp + 1));
+       return -EPROTONOSUPPORT;
+
+out_drop:
+       dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+       return 0;
+
+out_proc:
+       dprintk("svcrdma: bad rdma procedure (%u)\n",
+               be32_to_cpup(rdma_argp + 3));
+       return -EINVAL;
+
+out_inval:
+       dprintk("svcrdma: failed to parse transport header\n");
+       return -EINVAL;
 }
 
 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
@@ -249,7 +175,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
 
        *va++ = rmsgp->rm_xid;
        *va++ = rmsgp->rm_vers;
-       *va++ = cpu_to_be32(xprt->sc_max_requests);
+       *va++ = xprt->sc_fc_credits;
        *va++ = rdma_error;
        *va++ = cpu_to_be32(err);
        if (err == ERR_VERS) {
@@ -260,32 +186,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
        return (int)((unsigned long)va - (unsigned long)startp);
 }
 
-int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
+/**
+ * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
+ * @rdma_resp: buffer containing Reply transport header
+ *
+ * Returns length of transport header, in bytes.
+ */
+unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
 {
-       struct rpcrdma_write_array *wr_ary;
+       unsigned int nsegs;
+       __be32 *p;
 
-       /* There is no read-list in a reply */
+       p = rdma_resp;
 
-       /* skip write list */
-       wr_ary = (struct rpcrdma_write_array *)
-               &rmsgp->rm_body.rm_chunks[1];
-       if (wr_ary->wc_discrim)
-               wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
-                       wc_target.rs_length;
-       else
-               wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_nchunks;
-
-       /* skip reply array */
-       if (wr_ary->wc_discrim)
-               wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
-       else
-               wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_nchunks;
-
-       return (unsigned long) wr_ary - (unsigned long) rmsgp;
+       /* RPC-over-RDMA V1 replies never have a Read list. */
+       p += rpcrdma_fixed_maxsz + 1;
+
+       /* Skip Write list. */
+       while (*p++ != xdr_zero) {
+               nsegs = be32_to_cpup(p++);
+               p += nsegs * rpcrdma_segment_maxsz;
+       }
+
+       /* Skip Reply chunk. */
+       if (*p++ != xdr_zero) {
+               nsegs = be32_to_cpup(p++);
+               p += nsegs * rpcrdma_segment_maxsz;
+       }
+
+       return (unsigned long)p - (unsigned long)rdma_resp;
 }
 
 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
@@ -326,19 +255,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
        seg->rs_offset = rs_offset;
        seg->rs_length = cpu_to_be32(write_len);
 }
-
-void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
-                                 struct rpcrdma_msg *rdma_argp,
-                                 struct rpcrdma_msg *rdma_resp,
-                                 enum rpcrdma_proc rdma_type)
-{
-       rdma_resp->rm_xid = rdma_argp->rm_xid;
-       rdma_resp->rm_vers = rdma_argp->rm_vers;
-       rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
-       rdma_resp->rm_type = cpu_to_be32(rdma_type);
-
-       /* Encode <nul> chunks lists */
-       rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
-       rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
-       rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
-}
index 172b537f8cfc942ef62574b74cff7ac5f421fba9..f7b2daf72a86582807798379ac3be336b061a958 100644 (file)
@@ -606,26 +606,24 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 
        dprintk("svcrdma: rqstp=%p\n", rqstp);
 
-       spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
+       spin_lock(&rdma_xprt->sc_rq_dto_lock);
        if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
-               ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
-                                 struct svc_rdma_op_ctxt,
-                                 dto_q);
-               list_del_init(&ctxt->dto_q);
-               spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+               ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
+               spin_unlock(&rdma_xprt->sc_rq_dto_lock);
                rdma_read_complete(rqstp, ctxt);
                goto complete;
        } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
-               ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
-                                 struct svc_rdma_op_ctxt,
-                                 dto_q);
-               list_del_init(&ctxt->dto_q);
+               ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
        } else {
                atomic_inc(&rdma_stat_rq_starve);
                clear_bit(XPT_DATA, &xprt->xpt_flags);
                ctxt = NULL;
        }
-       spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+       spin_unlock(&rdma_xprt->sc_rq_dto_lock);
        if (!ctxt) {
                /* This is the EAGAIN path. The svc_recv routine will
                 * return -EAGAIN, the nfsd thread will go to call into
index ad4d286a83c5195fe663dd581cd49a5c9f9a6166..515221b16d0956ea027e91985c89606c403d5109 100644 (file)
@@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 
        /* Prepare the SGE for the RPCRDMA Header */
        ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
-       ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
+       ctxt->sge[0].length =
+           svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
        ctxt->sge[0].addr =
            ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
                            ctxt->sge[0].length, DMA_TO_DEVICE);
@@ -559,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        struct rpcrdma_msg *rdma_argp;
        struct rpcrdma_msg *rdma_resp;
        struct rpcrdma_write_array *wr_ary, *rp_ary;
-       enum rpcrdma_proc reply_type;
        int ret;
        int inline_bytes;
        struct page *res_page;
        struct svc_rdma_req_map *vec;
        u32 inv_rkey;
+       __be32 *p;
 
        dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
 
@@ -596,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        if (!res_page)
                goto err0;
        rdma_resp = page_address(res_page);
-       if (rp_ary)
-               reply_type = RDMA_NOMSG;
-       else
-               reply_type = RDMA_MSG;
-       svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
-                                        rdma_resp, reply_type);
+
+       p = &rdma_resp->rm_xid;
+       *p++ = rdma_argp->rm_xid;
+       *p++ = rdma_argp->rm_vers;
+       *p++ = rdma->sc_fc_credits;
+       *p++ = rp_ary ? rdma_nomsg : rdma_msg;
+
+       /* Start with empty chunks */
+       *p++ = xdr_zero;
+       *p++ = xdr_zero;
+       *p   = xdr_zero;
 
        /* Send any write-chunk data and build resp write-list */
        if (wr_ary) {
index 39652d390a9c60bc026199a7dcb5ef996bcd65ab..c13a5c35ce14d992515fa99e456976ed0cd1c382 100644 (file)
@@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
        ctxt = kmalloc(sizeof(*ctxt), flags);
        if (ctxt) {
                ctxt->xprt = xprt;
-               INIT_LIST_HEAD(&ctxt->free);
-               INIT_LIST_HEAD(&ctxt->dto_q);
+               INIT_LIST_HEAD(&ctxt->list);
        }
        return ctxt;
 }
@@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
                        dprintk("svcrdma: No memory for RDMA ctxt\n");
                        return false;
                }
-               list_add(&ctxt->free, &xprt->sc_ctxts);
+               list_add(&ctxt->list, &xprt->sc_ctxts);
        }
        return true;
 }
@@ -189,15 +188,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
        struct svc_rdma_op_ctxt *ctxt = NULL;
 
-       spin_lock_bh(&xprt->sc_ctxt_lock);
+       spin_lock(&xprt->sc_ctxt_lock);
        xprt->sc_ctxt_used++;
        if (list_empty(&xprt->sc_ctxts))
                goto out_empty;
 
        ctxt = list_first_entry(&xprt->sc_ctxts,
-                               struct svc_rdma_op_ctxt, free);
-       list_del_init(&ctxt->free);
-       spin_unlock_bh(&xprt->sc_ctxt_lock);
+                               struct svc_rdma_op_ctxt, list);
+       list_del(&ctxt->list);
+       spin_unlock(&xprt->sc_ctxt_lock);
 
 out:
        ctxt->count = 0;
@@ -209,15 +208,15 @@ out_empty:
        /* Either pre-allocation missed the mark, or send
         * queue accounting is broken.
         */
-       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       spin_unlock(&xprt->sc_ctxt_lock);
 
        ctxt = alloc_ctxt(xprt, GFP_NOIO);
        if (ctxt)
                goto out;
 
-       spin_lock_bh(&xprt->sc_ctxt_lock);
+       spin_lock(&xprt->sc_ctxt_lock);
        xprt->sc_ctxt_used--;
-       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       spin_unlock(&xprt->sc_ctxt_lock);
        WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
        return NULL;
 }
@@ -254,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
                for (i = 0; i < ctxt->count; i++)
                        put_page(ctxt->pages[i]);
 
-       spin_lock_bh(&xprt->sc_ctxt_lock);
+       spin_lock(&xprt->sc_ctxt_lock);
        xprt->sc_ctxt_used--;
-       list_add(&ctxt->free, &xprt->sc_ctxts);
-       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       list_add(&ctxt->list, &xprt->sc_ctxts);
+       spin_unlock(&xprt->sc_ctxt_lock);
 }
 
 static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
@@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
                struct svc_rdma_op_ctxt *ctxt;
 
                ctxt = list_first_entry(&xprt->sc_ctxts,
-                                       struct svc_rdma_op_ctxt, free);
-               list_del(&ctxt->free);
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
                kfree(ctxt);
        }
 }
@@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
        /* All wc fields are now known to be valid */
        ctxt->byte_len = wc->byte_len;
        spin_lock(&xprt->sc_rq_dto_lock);
-       list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
+       list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
        spin_unlock(&xprt->sc_rq_dto_lock);
 
        set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
@@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
 
                read_hdr = ctxt->read_hdr;
                spin_lock(&xprt->sc_rq_dto_lock);
-               list_add_tail(&read_hdr->dto_q,
+               list_add_tail(&read_hdr->list,
                              &xprt->sc_read_complete_q);
                spin_unlock(&xprt->sc_rq_dto_lock);
 
@@ -557,7 +556,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
                return NULL;
        svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
        INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
-       INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
        INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
@@ -571,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        spin_lock_init(&cma_xprt->sc_ctxt_lock);
        spin_lock_init(&cma_xprt->sc_map_lock);
 
+       /*
+        * Note that this implies that the underlying transport support
+        * has some form of congestion control (see RFC 7530 section 3.1
+        * paragraph 2). For now, we assume that all supported RDMA
+        * transports are suitable here.
+        */
+       set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
+
        if (listener)
                set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
 
@@ -923,14 +929,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_fastreg_mr *frmr = NULL;
 
-       spin_lock_bh(&rdma->sc_frmr_q_lock);
+       spin_lock(&rdma->sc_frmr_q_lock);
        if (!list_empty(&rdma->sc_frmr_q)) {
                frmr = list_entry(rdma->sc_frmr_q.next,
                                  struct svc_rdma_fastreg_mr, frmr_list);
                list_del_init(&frmr->frmr_list);
                frmr->sg_nents = 0;
        }
-       spin_unlock_bh(&rdma->sc_frmr_q_lock);
+       spin_unlock(&rdma->sc_frmr_q_lock);
        if (frmr)
                return frmr;
 
@@ -943,10 +949,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
        if (frmr) {
                ib_dma_unmap_sg(rdma->sc_cm_id->device,
                                frmr->sg, frmr->sg_nents, frmr->direction);
-               spin_lock_bh(&rdma->sc_frmr_q_lock);
+               spin_lock(&rdma->sc_frmr_q_lock);
                WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
                list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
-               spin_unlock_bh(&rdma->sc_frmr_q_lock);
+               spin_unlock(&rdma->sc_frmr_q_lock);
        }
 }
 
@@ -1002,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        newxprt->sc_max_req_size = svcrdma_max_req_size;
        newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
                                         svcrdma_max_requests);
+       newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
        newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
                                            svcrdma_max_bc_requests);
        newxprt->sc_rq_depth = newxprt->sc_max_requests +
@@ -1027,13 +1034,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                goto errout;
        }
        newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
-                                       0, IB_POLL_SOFTIRQ);
+                                       0, IB_POLL_WORKQUEUE);
        if (IS_ERR(newxprt->sc_sq_cq)) {
                dprintk("svcrdma: error creating SQ CQ for connect request\n");
                goto errout;
        }
        newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
-                                       0, IB_POLL_SOFTIRQ);
+                                       0, IB_POLL_WORKQUEUE);
        if (IS_ERR(newxprt->sc_rq_cq)) {
                dprintk("svcrdma: error creating RQ CQ for connect request\n");
                goto errout;
@@ -1213,20 +1220,18 @@ static void __svc_rdma_free(struct work_struct *work)
         */
        while (!list_empty(&rdma->sc_read_complete_q)) {
                struct svc_rdma_op_ctxt *ctxt;
-               ctxt = list_entry(rdma->sc_read_complete_q.next,
-                                 struct svc_rdma_op_ctxt,
-                                 dto_q);
-               list_del_init(&ctxt->dto_q);
+               ctxt = list_first_entry(&rdma->sc_read_complete_q,
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
                svc_rdma_put_context(ctxt, 1);
        }
 
        /* Destroy queued, but not processed recv completions */
        while (!list_empty(&rdma->sc_rq_dto_q)) {
                struct svc_rdma_op_ctxt *ctxt;
-               ctxt = list_entry(rdma->sc_rq_dto_q.next,
-                                 struct svc_rdma_op_ctxt,
-                                 dto_q);
-               list_del_init(&ctxt->dto_q);
+               ctxt = list_first_entry(&rdma->sc_rq_dto_q,
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
                svc_rdma_put_context(ctxt, 1);
        }
 
index af392d9b9ceca5beb848175ec811388e706a09a7..956c7bce80d1b2184c6a61708052d33b7e9bea2d 100644 (file)
@@ -1188,7 +1188,7 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r
        char *p;
 
        len = sizeof(transport->tcp_xid) - transport->tcp_offset;
-       dprintk("RPC:       reading XID (%Zu bytes)\n", len);
+       dprintk("RPC:       reading XID (%zu bytes)\n", len);
        p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
        used = xdr_skb_read_bits(desc, p, len);
        transport->tcp_offset += used;
@@ -1219,7 +1219,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
         */
        offset = transport->tcp_offset - sizeof(transport->tcp_xid);
        len = sizeof(transport->tcp_calldir) - offset;
-       dprintk("RPC:       reading CALL/REPLY flag (%Zu bytes)\n", len);
+       dprintk("RPC:       reading CALL/REPLY flag (%zu bytes)\n", len);
        p = ((char *) &transport->tcp_calldir) + offset;
        used = xdr_skb_read_bits(desc, p, len);
        transport->tcp_offset += used;
@@ -1310,7 +1310,7 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
                return;
        }
 
-       dprintk("RPC:       XID %08x read %Zd bytes\n",
+       dprintk("RPC:       XID %08x read %zd bytes\n",
                        ntohl(transport->tcp_xid), r);
        dprintk("RPC:       xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
                        "tcp_reclen = %u\n", xprt, transport->tcp_copied,
@@ -1456,7 +1456,7 @@ static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_s
        desc->count -= len;
        desc->offset += len;
        transport->tcp_offset += len;
-       dprintk("RPC:       discarded %Zu bytes\n", len);
+       dprintk("RPC:       discarded %zu bytes\n", len);
        xs_tcp_check_fraghdr(transport);
 }
 
index 982c52ca6473569148dbffb5585a46888149bf48..baa3c7be04ad8897761f92abd2b5d428d24f8471 100755 (executable)
@@ -424,7 +424,7 @@ our $typeTypedefs = qr{(?x:
 our $zero_initializer = qr{(?:(?:0[xX])?0+$Int_type?|NULL|false)\b};
 
 our $logFunctions = qr{(?x:
-       printk(?:_ratelimited|_once|)|
+       printk(?:_ratelimited|_once|_deferred_once|_deferred|)|
        (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)|
        WARN(?:_RATELIMIT|_ONCE|)|
        panic|
@@ -1848,6 +1848,8 @@ my $prefix = '';
 sub show_type {
        my ($type) = @_;
 
+       $type =~ tr/[a-z]/[A-Z]/;
+
        return defined $use_type{$type} if (scalar keys %use_type > 0);
 
        return !defined $ignore_type{$type};
@@ -2134,7 +2136,7 @@ sub process {
        my $in_header_lines = $file ? 0 : 1;
        my $in_commit_log = 0;          #Scanning lines before patch
        my $has_commit_log = 0;         #Encountered lines before patch
-       my $commit_log_possible_stack_dump = 0;
+       my $commit_log_possible_stack_dump = 0;
        my $commit_log_long_line = 0;
        my $commit_log_has_diff = 0;
        my $reported_maintainer_file = 0;
@@ -2154,6 +2156,7 @@ sub process {
        my $realline = 0;
        my $realcnt = 0;
        my $here = '';
+       my $context_function;           #undef'd unless there's a known function
        my $in_comment = 0;
        my $comment_edge = 0;
        my $first_line = 0;
@@ -2192,7 +2195,8 @@ sub process {
                        }
                        #next;
                }
-               if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
+               if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) {
+                       my $context = $4;
                        $realline=$1-1;
                        if (defined $2) {
                                $realcnt=$3+1;
@@ -2201,6 +2205,12 @@ sub process {
                        }
                        $in_comment = 0;
 
+                       if ($context =~ /\b(\w+)\s*\(/) {
+                               $context_function = $1;
+                       } else {
+                               undef $context_function;
+                       }
+
                        # Guestimate if this is a continuing comment.  Run
                        # the context looking for a comment "edge".  If this
                        # edge is a close comment then we must be in a comment
@@ -2695,6 +2705,7 @@ sub process {
 
 # Check for FSF mailing addresses.
                if ($rawline =~ /\bwrite to the Free/i ||
+                   $rawline =~ /\b675\s+Mass\s+Ave/i ||
                    $rawline =~ /\b59\s+Temple\s+Pl/i ||
                    $rawline =~ /\b51\s+Franklin\s+St/i) {
                        my $herevet = "$here\n" . cat_vet($rawline) . "\n";
@@ -5095,6 +5106,12 @@ sub process {
                        }
                }
 
+# check for single line unbalanced braces
+               if ($sline =~ /^.\s*\}\s*else\s*$/ ||
+                   $sline =~ /^.\s*else\s*\{\s*$/) {
+                       CHK("BRACES", "Unbalanced braces around else statement\n" . $herecurr);
+               }
+
 # check for unnecessary blank lines around braces
                if (($line =~ /^.\s*}\s*$/ && $prevrawline =~ /^.\s*$/)) {
                        if (CHK("BRACES",
@@ -5157,6 +5174,16 @@ sub process {
                             "break quoted strings at a space character\n" . $hereprev);
                }
 
+#check for an embedded function name in a string when the function is known
+# as part of a diff.  This does not work for -f --file checking as it
+#depends on patch context providing the function name
+               if ($line =~ /^\+.*$String/ &&
+                   defined($context_function) &&
+                   get_quoted_string($line, $rawline) =~ /\b$context_function\b/) {
+                       WARN("EMBEDDED_FUNCTION_NAME",
+                            "Prefer using \"%s\", __func__ to embedded function names\n" . $herecurr);
+               }
+
 # check for spaces before a quoted newline
                if ($rawline =~ /^.*\".*\s\\n/) {
                        if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
@@ -5179,18 +5206,27 @@ sub process {
                             "Consecutive strings are generally better as a single string\n" . $herecurr);
                }
 
-# check for %L{u,d,i} and 0x%[udi] in strings
-               my $string;
+# check for non-standard and hex prefixed decimal printf formats
+               my $show_L = 1; #don't show the same defect twice
+               my $show_Z = 1;
                while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) {
-                       $string = substr($rawline, $-[1], $+[1] - $-[1]);
+                       my $string = substr($rawline, $-[1], $+[1] - $-[1]);
                        $string =~ s/%%/__/g;
-                       if ($string =~ /(?<!%)%[\*\d\.\$]*L[udi]/) {
+                       # check for %L
+                       if ($show_L && $string =~ /%[\*\d\.\$]*L([diouxX])/) {
                                WARN("PRINTF_L",
-                                    "\%Ld/%Lu are not-standard C, use %lld/%llu\n" . $herecurr);
-                               last;
-                       }
-                       if ($string =~ /0x%[\*\d\.\$\Llzth]*[udi]/) {
-                               ERROR("PRINTF_0xDECIMAL",
+                                    "\%L$1 is non-standard C, use %ll$1\n" . $herecurr);
+                               $show_L = 0;
+                       }
+                       # check for %Z
+                       if ($show_Z && $string =~ /%[\*\d\.\$]*Z([diouxX])/) {
+                               WARN("PRINTF_Z",
+                                    "%Z$1 is non-standard C, use %z$1\n" . $herecurr);
+                               $show_Z = 0;
+                       }
+                       # check for 0x<decimal>
+                       if ($string =~ /0x%[\*\d\.\$\Llzth]*[diou]/) {
+                               ERROR("PRINTF_0XDECIMAL",
                                      "Prefixing 0x with decimal output is defective\n" . $herecurr);
                        }
                }
@@ -5269,6 +5305,12 @@ sub process {
                        }
                }
 
+# check for logging continuations
+               if ($line =~ /\bprintk\s*\(\s*KERN_CONT\b|\bpr_cont\s*\(/) {
+                       WARN("LOGGING_CONTINUATION",
+                            "Avoid logging continuation uses where feasible\n" . $herecurr);
+               }
+
 # check for mask then right shift without a parentheses
                if ($^V && $^V ge 5.10.0 &&
                    $line =~ /$LvalOrFunc\s*\&\s*($LvalOrFunc)\s*>>/ &&
index 694a075381b0dc781ad76062594bb21464da6bed..3033be701e9a24c5c8c090ce6f453df070e8cb23 100755 (executable)
@@ -81,6 +81,9 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
        } elsif ($arch eq 'nios2') {
                #25a8:  defffb04        addi    sp,sp,-20
                $re = qr/.*addi.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
+       } elsif ($arch eq 'openrisc') {
+               # c000043c:       9c 21 fe f0     l.addi r1,r1,-272
+               $re = qr/.*l\.addi.*r1,r1,-(([0-9]{2}|[3-9])[0-9]{2})/o;
        } elsif ($arch eq 'parisc' || $arch eq 'parisc64') {
                $re = qr/.*ldo ($x{1,8})\(sp\),sp/o;
        } elsif ($arch eq 'ppc') {
index faac4b10d8eaf9ddd472f2bd6dcba98a49310710..0b6002b36f204f89fa0b0f1c23390fb54307fd21 100755 (executable)
@@ -318,7 +318,7 @@ if ($arch eq "x86_64") {
     # instruction or the addiu one. herein, we record the address of the
     # first one, and then we can replace this instruction by a branch
     # instruction to jump over the profiling function to filter the
-    # indicated functions, or swith back to the lui instruction to trace
+    # indicated functions, or switch back to the lui instruction to trace
     # them, which means dynamic tracing.
     #
     #       c: 3c030000        lui     v1,0x0
index b3a1994b5df758064f974455c5809e4e8beac261..0458b037c8a137daa0f0fc205cabc188b18ae513 100644 (file)
@@ -62,15 +62,19 @@ adress||address
 adresses||addresses
 adviced||advised
 afecting||affecting
+againt||against
 agaist||against
 albumns||albums
 alegorical||allegorical
+algined||aligned
 algorith||algorithm
 algorithmical||algorithmically
 algoritm||algorithm
 algoritms||algorithms
 algorrithm||algorithm
 algorritm||algorithm
+aligment||alignment
+alignement||alignment
 allign||align
 allocatrd||allocated
 allocte||allocate
@@ -86,6 +90,10 @@ alue||value
 ambigious||ambiguous
 amoung||among
 amout||amount
+an union||a union
+an user||a user
+an userspace||a userspace
+an one||a one
 analysator||analyzer
 ang||and
 anniversery||anniversary
@@ -98,6 +106,7 @@ appearence||appearance
 applicaion||application
 appliction||application
 applictions||applications
+applys||applies
 appplications||applications
 appropiate||appropriate
 appropriatly||appropriately
@@ -237,6 +246,9 @@ commited||committed
 commiting||committing
 committ||commit
 commoditiy||commodity
+comsume||consume
+comsumer||consumer
+comsuming||consuming
 compability||compatibility
 compaibility||compatibility
 compatability||compatibility
@@ -258,6 +270,7 @@ comunication||communication
 conbination||combination
 conditionaly||conditionally
 conected||connected
+configuartion||configuration
 configuratoin||configuration
 configuraton||configuration
 configuretion||configuration
@@ -310,6 +323,9 @@ defintion||definition
 defintions||definitions
 defualt||default
 defult||default
+deintializing||deinitializing
+deintialize||deinitialize
+deintialized||deinitialized
 deivce||device
 delared||declared
 delare||declare
@@ -352,6 +368,7 @@ differrence||difference
 difinition||definition
 diplay||display
 direectly||directly
+disassocation||disassociation
 disapear||disappear
 disapeared||disappeared
 disappared||disappeared
@@ -375,10 +392,12 @@ easilly||easily
 ecspecially||especially
 edditable||editable
 editting||editing
+efective||effective
 efficently||efficiently
 ehther||ether
 eigth||eight
 eletronic||electronic
+embeded||embedded
 enabledi||enabled
 enchanced||enhanced
 encorporating||incorporating
@@ -414,6 +433,7 @@ expecially||especially
 explicite||explicit
 explicitely||explicitly
 explict||explicit
+explictely||explicitly
 explictly||explicitly
 expresion||expression
 exprimental||experimental
@@ -445,6 +465,7 @@ finsih||finish
 flusing||flushing
 folloing||following
 followign||following
+followings||following
 follwing||following
 forseeable||foreseeable
 forse||force
@@ -537,6 +558,7 @@ initalise||initialize
 initalize||initialize
 initation||initiation
 initators||initiators
+initialiazation||initialization
 initializiation||initialization
 initialzed||initialized
 initilization||initialization
@@ -566,6 +588,7 @@ interruptted||interrupted
 interupted||interrupted
 interupt||interrupt
 intial||initial
+intialization||initialization
 intialized||initialized
 intialize||initialize
 intregral||integral
@@ -666,6 +689,7 @@ neccecary||necessary
 neccesary||necessary
 neccessary||necessary
 necesary||necessary
+neded||needed
 negaive||negative
 negoitation||negotiation
 negotation||negotiation
@@ -688,6 +712,8 @@ occure||occurred
 occured||occurred
 occuring||occurring
 offet||offset
+omited||omitted
+omiting||omitting
 omitt||omit
 ommiting||omitting
 ommitted||omitted
@@ -706,8 +732,11 @@ oustanding||outstanding
 overaall||overall
 overhread||overhead
 overlaping||overlapping
+overrided||overridden
 overriden||overridden
 overun||overrun
+overwritting||overwriting
+overwriten||overwritten
 pacakge||package
 pachage||package
 packacge||package
@@ -718,6 +747,7 @@ pakage||package
 pallette||palette
 paln||plan
 paramameters||parameters
+paramaters||parameters
 paramater||parameter
 parametes||parameters
 parametised||parametrised
@@ -962,6 +992,7 @@ straming||streaming
 struc||struct
 structres||structures
 stuct||struct
+strucuture||structure
 stucture||structure
 sturcture||structure
 subdirectoires||subdirectories
@@ -991,6 +1022,13 @@ suspeneded||suspended
 suspicously||suspiciously
 swaping||swapping
 switchs||switches
+swith||switch
+swithable||switchable
+swithc||switch
+swithced||switched
+swithcing||switching
+swithed||switched
+swithing||switching
 symetric||symmetric
 synax||syntax
 synchonized||synchronized
@@ -1007,6 +1045,7 @@ targetting||targeting
 teh||the
 temorary||temporary
 temproarily||temporarily
+therfore||therefore
 thier||their
 threds||threads
 threshhold||threshold
@@ -1050,6 +1089,7 @@ unkmown||unknown
 unknonw||unknown
 unknow||unknown
 unkown||unknown
+unneded||unneeded
 unneedingly||unnecessarily
 unnsupported||unsupported
 unmached||unmatched
@@ -1078,6 +1118,7 @@ vaid||valid
 vaild||valid
 valide||valid
 variantions||variations
+varible||variable
 varient||variant
 vaule||value
 verbse||verbose
index c354807381c11949604a458b5286989385395a7b..c9e8a9898ce48111af344584534c19f9cdc483a9 100644 (file)
@@ -424,10 +424,9 @@ out:
        return ret;
 }
 
-static int sel_mmap_policy_fault(struct vm_area_struct *vma,
-                                struct vm_fault *vmf)
+static int sel_mmap_policy_fault(struct vm_fault *vmf)
 {
-       struct policy_load_memory *plm = vma->vm_file->private_data;
+       struct policy_load_memory *plm = vmf->vma->vm_file->private_data;
        unsigned long offset;
        struct page *page;
 
index 7d10e5d418bb00978d7f30aadc6d86d3b2533bd7..9db4709a687771923f21da25baf1aa788f018db2 100644 (file)
@@ -360,7 +360,7 @@ int ebitmap_read(struct ebitmap *e, void *fp)
 
        if (mapunit != BITS_PER_U64) {
                printk(KERN_ERR "SELinux: ebitmap: map size %u does not "
-                      "match my size %Zd (high bit was %d)\n",
+                      "match my size %zd (high bit was %d)\n",
                       mapunit, BITS_PER_U64, e->highbit);
                goto bad;
        }
index d719db4219cd2222aacc2d75a3bc4f64925ba435..9c92f29a38ea4fa9b481f0e06a0e640b5538751a 100644 (file)
@@ -2266,7 +2266,7 @@ int policydb_read(struct policydb *p, void *fp)
        len = le32_to_cpu(buf[1]);
        if (len != strlen(POLICYDB_STRING)) {
                printk(KERN_ERR "SELinux:  policydb string length %d does not "
-                      "match expected length %Zu\n",
+                      "match expected length %zu\n",
                       len, strlen(POLICYDB_STRING));
                goto bad;
        }
index 9d33c1e85c79b1f6b66f74e6117a198e60244b28..aec9c92250fd72b46ac14c65a6efef1bacc7589c 100644 (file)
@@ -3245,10 +3245,9 @@ static unsigned int snd_pcm_capture_poll(struct file *file, poll_table * wait)
 /*
  * mmap status record
  */
-static int snd_pcm_mmap_status_fault(struct vm_area_struct *area,
-                                               struct vm_fault *vmf)
+static int snd_pcm_mmap_status_fault(struct vm_fault *vmf)
 {
-       struct snd_pcm_substream *substream = area->vm_private_data;
+       struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
        
        if (substream == NULL)
@@ -3282,10 +3281,9 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file
 /*
  * mmap control record
  */
-static int snd_pcm_mmap_control_fault(struct vm_area_struct *area,
-                                               struct vm_fault *vmf)
+static int snd_pcm_mmap_control_fault(struct vm_fault *vmf)
 {
-       struct snd_pcm_substream *substream = area->vm_private_data;
+       struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
        
        if (substream == NULL)
@@ -3341,10 +3339,9 @@ snd_pcm_default_page_ops(struct snd_pcm_substream *substream, unsigned long ofs)
 /*
  * fault callback for mmapping a RAM page
  */
-static int snd_pcm_mmap_data_fault(struct vm_area_struct *area,
-                                               struct vm_fault *vmf)
+static int snd_pcm_mmap_data_fault(struct vm_fault *vmf)
 {
-       struct snd_pcm_substream *substream = area->vm_private_data;
+       struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
        unsigned long offset;
        struct page * page;
index f4234edb878c7a01ebbc296bf52cb0debb24b087..8cf0dc7a07a4ce90a2114ddaa1ed0ca5c797bc22 100644 (file)
@@ -3093,7 +3093,7 @@ static int patch_cm9739(struct snd_ac97 * ac97)
        /* set-up multi channel */
        /* bit 14: 0 = SPDIF, 1 = EAPD */
        /* bit 13: enable internal vref output for mic */
-       /* bit 12: disable center/lfe (swithable) */
+       /* bit 12: disable center/lfe (switchable) */
        /* bit 10: disable surround/line (switchable) */
        /* bit 9: mix 2 surround off */
        /* bit 4: undocumented; 0 mutes the CM9739A, which defaults to 1 */
index 5cf920bfda2728a27509e14406c93b801f7382e1..be5694718546eca59afeccc588d325a4414d98f3 100644 (file)
@@ -203,7 +203,7 @@ struct dsp_task_tree_context_block {
 
        u32       saverfe;                                      
 
-       /* Value may be overwriten by stack save algorithm.
+       /* Value may be overwritten by stack save algorithm.
           Retain the size of the stack data saved here if used */
        ___DSP_DUAL_16BIT_ALLOC(
              reserved1,        
index 9ec4dba8a793bf04294a61423b94d07fedf4f29f..07a9deb1747779245b42bd1f50e8e423c5f2a8f1 100644 (file)
@@ -2866,7 +2866,7 @@ static unsigned int ca0132_capture_pcm_delay(struct hda_pcm_stream *info,
 #define CA0132_CODEC_MUTE(xname, nid, dir) \
        CA0132_CODEC_MUTE_MONO(xname, nid, 3, dir)
 
-/* The followings are for tuning of products */
+/* The following are for tuning of products */
 #ifdef ENABLE_TUNING_CONTROLS
 
 static unsigned int voice_focus_vals_lookup[] = {
index f7ac8d5e862cf9b0e861ad790095c2d2295dc807..27c03e40c9b1b96eab2c6c94ba9a0d3789bafd93 100644 (file)
@@ -254,7 +254,7 @@ static int snd_wm8766_ctl_put(struct snd_kcontrol *kcontrol,
        int n = kcontrol->private_value;
        u16 val, regval1, regval2;
 
-       /* this also works for enum because value is an union */
+       /* this also works for enum because value is a union */
        regval1 = ucontrol->value.integer.value[0];
        regval2 = ucontrol->value.integer.value[1];
        if (wm->ctl[n].flags & WM8766_FLAG_INVERT) {
index ebd2fe4b4a57c4e25b5905d741602ba8ef58ca9b..553669b103c24d75516666169522b65e582d465f 100644 (file)
@@ -528,7 +528,7 @@ static int snd_wm8776_ctl_put(struct snd_kcontrol *kcontrol,
        int n = kcontrol->private_value;
        u16 val, regval1, regval2;
 
-       /* this also works for enum because value is an union */
+       /* this also works for enum because value is a union */
        regval1 = ucontrol->value.integer.value[0];
        regval2 = ucontrol->value.integer.value[1];
        if (wm->ctl[n].flags & WM8776_FLAG_INVERT) {
index 565f7f55c3ca4d44bf0156e7da0c7bafe3863271..1e25095fd144383930fb837806c4b91d147039ca 100644 (file)
@@ -2051,7 +2051,7 @@ static void snd_korg1212_proc_read(struct snd_info_entry *entry,
        snd_iprintf(buffer, korg1212->card->longname);
        snd_iprintf(buffer, " (index #%d)\n", korg1212->card->number + 1);
        snd_iprintf(buffer, "\nGeneral settings\n");
-       snd_iprintf(buffer, "    period size: %Zd bytes\n", K1212_PERIOD_BYTES);
+       snd_iprintf(buffer, "    period size: %zd bytes\n", K1212_PERIOD_BYTES);
        snd_iprintf(buffer, "     clock mode: %s\n", clockSourceName[korg1212->clkSrcRate] );
        snd_iprintf(buffer, "  left ADC Sens: %d\n", korg1212->leftADCInSens );
        snd_iprintf(buffer, " right ADC Sens: %d\n", korg1212->rightADCInSens );
@@ -2276,7 +2276,7 @@ static int snd_korg1212_create(struct snd_card *card, struct pci_dev *pci,
 
        if (snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(pci),
                                sizeof(struct KorgSharedBuffer), &korg1212->dma_shared) < 0) {
-               snd_printk(KERN_ERR "korg1212: can not allocate shared buffer memory (%Zd bytes)\n", sizeof(struct KorgSharedBuffer));
+               snd_printk(KERN_ERR "korg1212: can not allocate shared buffer memory (%zd bytes)\n", sizeof(struct KorgSharedBuffer));
                 snd_korg1212_free(korg1212);
                 return -ENOMEM;
         }
index 80633055e17e35c56131a24e1b373cd6d63d3989..a99808ab01fe58718787865984ffd5cbb50c841c 100644 (file)
@@ -292,7 +292,7 @@ static int pcxhr_dsp_load(struct pcxhr_mgr *mgr, int index,
        int err, card_index;
 
        dev_dbg(&mgr->pci->dev,
-               "loading dsp [%d] size = %Zd\n", index, dsp->size);
+               "loading dsp [%d] size = %zd\n", index, dsp->size);
 
        switch (index) {
        case PCXHR_FIRMWARE_XLX_INT_INDEX:
index 56aa1ba73ccc1ccb488bc662aa7ad459213eb696..5f97791f00d7255241e25b619b4401b7e3a53ef5 100644 (file)
@@ -201,7 +201,7 @@ static int vxp_load_xilinx_binary(struct vx_core *_chip, const struct firmware *
        c |= (int)vx_inb(chip, RXM) << 8;
        c |= vx_inb(chip, RXL);
 
-       snd_printdd(KERN_DEBUG "xilinx: dsp size received 0x%x, orig 0x%Zx\n", c, fw->size);
+       snd_printdd(KERN_DEBUG "xilinx: dsp size received 0x%x, orig 0x%zx\n", c, fw->size);
 
        vx_outb(chip, ICR, ICR_HF0);
 
index b84d7d34f1886b6c72f669f17b4a3cef5a196ef0..cdd44abfc9e0f13d0e4131dc11bb05522f7663b6 100644 (file)
@@ -883,7 +883,7 @@ static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
 static void snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
 {
        /*
-        * avsetting driver seems to never change the followings
+        * avsetting driver seems to never change the following
         * so, init them here once
         */
 
index 818b052377f3f2e656d77ad12e835df299663872..ec1067a679da406019bd4c98e6b6cf22fd5a4432 100644 (file)
@@ -506,7 +506,7 @@ static int acp_init(void __iomem *acp_mmio)
        return 0;
 }
 
-/* Deintialize ACP */
+/* Deinitialize ACP */
 static int acp_deinit(void __iomem *acp_mmio)
 {
        u32 val;
index 624b3b9cb079f772f96fd473a9dbd5b186b63bb6..63b2745f8169ae2ed39a195b2a71577958f2360b 100644 (file)
@@ -1269,7 +1269,7 @@ void wm_hubs_set_bias_level(struct snd_soc_codec *codec,
                break;
 
        case SND_SOC_BIAS_ON:
-               /* Turn off any unneded single ended outputs */
+               /* Turn off any unneeded single ended outputs */
                val = 0;
                mask = 0;
 
index 1d82f68305c3e2b1783c5415e2a589073b20ca20..8cfffa70c144aff3c8f8157d3e7c000c85236620 100644 (file)
@@ -368,7 +368,7 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair)
        fsl_asrc_set_watermarks(pair, ASRC_INPUTFIFO_THRESHOLD,
                                ASRC_INPUTFIFO_THRESHOLD);
 
-       /* Configure the followings only for Ideal Ratio mode */
+       /* Configure the following only for Ideal Ratio mode */
        if (!ideal)
                return 0;
 
index 924971b6ded54f52ef56de0c84910258531bf1b9..9b031352ea3c24708a46e188813169393c998866 100644 (file)
@@ -82,7 +82,7 @@ struct lpass_variant {
         **/
        u32     dmactl_audif_start;
        u32     wrdma_channel_start;
-       /* SOC specific intialization like clocks */
+       /* SOC specific initialization like clocks */
        int (*init)(struct platform_device *pdev);
        int (*exit)(struct platform_device *pdev);
        int (*alloc_dma_channel)(struct lpass_data *data, int direction);
index a110d3987d4ae912b46c57f2f36f258b985b56e5..6dca408faae334d223494c33e14d503518b497d7 100644 (file)
@@ -3041,7 +3041,7 @@ static int snd_soc_register_dais(struct snd_soc_component *component,
        unsigned int i;
        int ret;
 
-       dev_dbg(dev, "ASoC: dai register %s #%Zu\n", dev_name(dev), count);
+       dev_dbg(dev, "ASoC: dai register %s #%zu\n", dev_name(dev), count);
 
        component->dai_drv = dai_drv;
 
index aff3d8129ac965376deea8618ae81727db41cbfa..3e9b1c0bb1ce3cb1864e1825ade56720803df289 100644 (file)
@@ -344,7 +344,7 @@ static int soc_tplg_widget_load(struct soc_tplg *tplg,
        return 0;
 }
 
-/* pass DAI configurations to component driver for extra intialization */
+/* pass DAI configurations to component driver for extra initialization */
 static int soc_tplg_dai_load(struct soc_tplg *tplg,
        struct snd_soc_dai_driver *dai_drv)
 {
@@ -354,7 +354,7 @@ static int soc_tplg_dai_load(struct soc_tplg *tplg,
        return 0;
 }
 
-/* pass link configurations to component driver for extra intialization */
+/* pass link configurations to component driver for extra initialization */
 static int soc_tplg_dai_link_load(struct soc_tplg *tplg,
        struct snd_soc_dai_link *link)
 {
index cf5dc33f4a6d6d582bea62d239f23b4e08bf750a..cf45bf1f7ee0b0a441f5a9258deaf203983f0f84 100644 (file)
@@ -137,13 +137,12 @@ static void usb_stream_hwdep_vm_open(struct vm_area_struct *area)
        snd_printdd(KERN_DEBUG "%i\n", atomic_read(&us122l->mmap_count));
 }
 
-static int usb_stream_hwdep_vm_fault(struct vm_area_struct *area,
-                                    struct vm_fault *vmf)
+static int usb_stream_hwdep_vm_fault(struct vm_fault *vmf)
 {
        unsigned long offset;
        struct page *page;
        void *vaddr;
-       struct us122l *us122l = area->vm_private_data;
+       struct us122l *us122l = vmf->vma->vm_private_data;
        struct usb_stream *s;
 
        mutex_lock(&us122l->mutex);
index 0b34dbc8f3020436d3e740bb61dea9f648a745ab..605e1047c01dfbc2dd025b9033f133aaef5fc5f4 100644 (file)
 #include "usbusx2y.h"
 #include "usX2Yhwdep.h"
 
-static int snd_us428ctls_vm_fault(struct vm_area_struct *area,
-                                 struct vm_fault *vmf)
+static int snd_us428ctls_vm_fault(struct vm_fault *vmf)
 {
        unsigned long offset;
        struct page * page;
        void *vaddr;
 
        snd_printdd("ENTER, start %lXh, pgoff %ld\n",
-                  area->vm_start,
+                  vmf->vma->vm_start,
                   vmf->pgoff);
        
        offset = vmf->pgoff << PAGE_SHIFT;
-       vaddr = (char*)((struct usX2Ydev *)area->vm_private_data)->us428ctls_sharedmem + offset;
+       vaddr = (char *)((struct usX2Ydev *)vmf->vma->vm_private_data)->us428ctls_sharedmem + offset;
        page = virt_to_page(vaddr);
        get_page(page);
        vmf->page = page;
index 90766a92e7fdf471e7f109def88bd21ebb50048f..f95164b91152da9e137594698bb90ca657b0c08f 100644 (file)
@@ -652,14 +652,13 @@ static void snd_usX2Y_hwdep_pcm_vm_close(struct vm_area_struct *area)
 }
 
 
-static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_area_struct *area,
-                                       struct vm_fault *vmf)
+static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_fault *vmf)
 {
        unsigned long offset;
        void *vaddr;
 
        offset = vmf->pgoff << PAGE_SHIFT;
-       vaddr = (char*)((struct usX2Ydev *)area->vm_private_data)->hwdep_pcm_shm + offset;
+       vaddr = (char *)((struct usX2Ydev *)vmf->vma->vm_private_data)->hwdep_pcm_shm + offset;
        vmf->page = virt_to_page(vaddr);
        get_page(vmf->page);
        return 0;
index aaf7ed329a453a87b91a38f73b6cc25c27764c82..477f00eda59184ce9e844310fc04027706838cae 100644 (file)
@@ -35,8 +35,8 @@ all: $(OUTPUT)fixdep
 
 clean:
        $(call QUIET_CLEAN, fixdep)
-       $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
-       $(Q)rm -f fixdep
+       $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+       $(Q)rm -f $(OUTPUT)fixdep
 
 $(OUTPUT)fixdep-in.o: FORCE
        $(Q)$(MAKE) $(build)=fixdep
index ad22e4e7bc593960dcd2eab055b1658227b7bf56..d360f39a445b0ef8edde51a4b2171b437b09d368 100644 (file)
@@ -3,4 +3,7 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 fixdep:
        $(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep
 
+fixdep-clean:
+       $(Q)$(MAKE) -C $(srctree)/tools/build clean
+
 .PHONY: fixdep
index 18663f59d72f7c221286da025be45ac08b590165..68b8c1516c5a719b9fd422849e4431bafdaf8065 100644 (file)
@@ -20,4 +20,7 @@ static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
                (((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
 }
 
+#define __set_bit(nr, addr)    set_bit(nr, addr)
+#define __clear_bit(nr, addr)  clear_bit(nr, addr)
+
 #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
index beda1a884b50f0465ace7e3f01ca6014e5d3e755..4790f047a89c476624975d51514698f210900a19 100644 (file)
        unlikely(__ret_warn_on);                \
 })
 
+#define WARN_ON(condition) ({                                  \
+       int __ret_warn_on = !!(condition);                      \
+       if (unlikely(__ret_warn_on))                            \
+               __WARN_printf("assertion failed at %s:%d\n",    \
+                               __FILE__, __LINE__);            \
+       unlikely(__ret_warn_on);                                \
+})
+
 #define WARN_ON_ONCE(condition) ({                     \
        static int __warned;                            \
        int __ret_warn_once = !!(condition);            \
index eef41d500e9e5497b21532980bc1be180a02eefe..e8b9f518e36b2cddfbd415db8cde2f9a8581d876 100644 (file)
@@ -4,6 +4,7 @@
 #include <string.h>
 #include <linux/bitops.h>
 #include <stdlib.h>
+#include <linux/kernel.h>
 
 #define DECLARE_BITMAP(name,bits) \
        unsigned long name[BITS_TO_LONGS(bits)]
index fc446343ff417760dc8e3d98ebd0e33e5deb3dad..1aecad369af57f07cfc32db53dfd414cc6f5433c 100644 (file)
@@ -2,7 +2,6 @@
 #define _TOOLS_LINUX_BITOPS_H_
 
 #include <asm/types.h>
-#include <linux/kernel.h>
 #include <linux/compiler.h>
 
 #ifndef __WORDSIZE
index 6326ede9aecef7f6b417f67990894032e79b3f4a..8de163b17c0d00011d33083d247936d8265465e7 100644 (file)
@@ -25,6 +25,8 @@
 #endif
 
 #define __user
+#define __rcu
+#define __read_mostly
 
 #ifndef __attribute_const__
 # define __attribute_const__
@@ -54,6 +56,8 @@
 # define unlikely(x)           __builtin_expect(!!(x), 0)
 #endif
 
+#define uninitialized_var(x) x = *(&(x))
+
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 
 #include <linux/types.h>
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
new file mode 100644 (file)
index 0000000..58397dc
--- /dev/null
@@ -0,0 +1,5 @@
+#define spinlock_t             pthread_mutex_t
+#define DEFINE_SPINLOCK(x)     pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+
+#define spin_lock_irqsave(x, f)                (void)f, pthread_mutex_lock(x)
+#define spin_unlock_irqrestore(x, f)   (void)f, pthread_mutex_unlock(x)
index d48b70ceb25a9b0f33ee12478c5fde78de001c40..207c2eeddab064d7c304efed09653ba6e227d6e9 100644 (file)
@@ -27,7 +27,7 @@
 #include "bpf.h"
 
 /*
- * When building perf, unistd.h is overrided. __NR_bpf is
+ * When building perf, unistd.h is overridden. __NR_bpf is
  * required to be defined explicitly.
  */
 #ifndef __NR_bpf
index 6d8b8f22cf55d8a825d6dd4d56f992dd5e1d63b8..42c15f906aac5baf9debab89e881003b60c889c4 100644 (file)
@@ -34,7 +34,7 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 {
        unsigned long tmp;
 
-       if (!nbits || start >= nbits)
+       if (unlikely(start >= nbits))
                return nbits;
 
        tmp = addr[start / BITS_PER_LONG] ^ invert;
index f2ea78021450a53390b6754a78d25851317a1d2a..7ce724fc054478c923605385fd00d8471de831e6 100644 (file)
@@ -5225,13 +5225,13 @@ int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec)
 }
 
 /**
- * pevent_data_prempt_count - parse the preempt count from the record
+ * pevent_data_preempt_count - parse the preempt count from the record
  * @pevent: a handle to the pevent
  * @rec: the record to parse
  *
  * This returns the preempt count from a record.
  */
-int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec)
+int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec)
 {
        return parse_common_pc(pevent, rec->data);
 }
index 74cecba87daaa654504a7c66154b70490c23e8bd..66342804161c80ea611b3dfa554a602fadc4213e 100644 (file)
@@ -710,7 +710,7 @@ void pevent_data_lat_fmt(struct pevent *pevent,
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
 struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type);
 int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec);
-int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec);
+int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec);
 int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec);
 const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid);
 struct cmdline;
index f7350fcedc70dc6c0ef04159d9fa6e33cc76bcdc..a59e061c0b4a0abcf3e9fc7a76e462fd2c41954b 100644 (file)
@@ -31,9 +31,8 @@
 #define INSN_CALL_DYNAMIC      8
 #define INSN_RETURN            9
 #define INSN_CONTEXT_SWITCH    10
-#define INSN_BUG               11
-#define INSN_NOP               12
-#define INSN_OTHER             13
+#define INSN_NOP               11
+#define INSN_OTHER             12
 #define INSN_LAST              INSN_OTHER
 
 int arch_decode_instruction(struct elf *elf, struct section *sec,
index 039636ffb6c8a3edb6c14fd9a2b3a854ab84f982..6ac99e3266eb8218aa485081501e9e0da17c4c77 100644 (file)
@@ -118,9 +118,6 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
                         op2 == 0x35)
                        /* sysenter, sysret */
                        *type = INSN_CONTEXT_SWITCH;
-               else if (op2 == 0x0b || op2 == 0xb9)
-                       /* ud2 */
-                       *type = INSN_BUG;
                else if (op2 == 0x0d || op2 == 0x1f)
                        /* nopl/nopw */
                        *type = INSN_NOP;
index e8a1f699058a29ba695bfbf24781562c665e4525..5fc52ee3264c1ad9b191c454c404b5df21486beb 100644 (file)
@@ -51,7 +51,7 @@ struct instruction {
        unsigned int len, state;
        unsigned char type;
        unsigned long immediate;
-       bool alt_group, visited;
+       bool alt_group, visited, dead_end;
        struct symbol *call_dest;
        struct instruction *jump_dest;
        struct list_head alts;
@@ -329,6 +329,54 @@ static int decode_instructions(struct objtool_file *file)
        return 0;
 }
 
+/*
+ * Find all uses of the unreachable() macro, which are code path dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+       struct section *sec;
+       struct rela *rela;
+       struct instruction *insn;
+       bool found;
+
+       sec = find_section_by_name(file->elf, ".rela__unreachable");
+       if (!sec)
+               return 0;
+
+       list_for_each_entry(rela, &sec->rela_list, list) {
+               if (rela->sym->type != STT_SECTION) {
+                       WARN("unexpected relocation symbol type in .rela__unreachable");
+                       return -1;
+               }
+               insn = find_insn(file, rela->sym->sec, rela->addend);
+               if (insn)
+                       insn = list_prev_entry(insn, list);
+               else if (rela->addend == rela->sym->sec->len) {
+                       found = false;
+                       list_for_each_entry_reverse(insn, &file->insn_list, list) {
+                               if (insn->sec == rela->sym->sec) {
+                                       found = true;
+                                       break;
+                               }
+                       }
+
+                       if (!found) {
+                               WARN("can't find unreachable insn at %s+0x%x",
+                                    rela->sym->sec->name, rela->addend);
+                               return -1;
+                       }
+               } else {
+                       WARN("can't find unreachable insn at %s+0x%x",
+                            rela->sym->sec->name, rela->addend);
+                       return -1;
+               }
+
+               insn->dead_end = true;
+       }
+
+       return 0;
+}
+
 /*
  * Warnings shouldn't be reported for ignored functions.
  */
@@ -843,6 +891,10 @@ static int decode_sections(struct objtool_file *file)
        if (ret)
                return ret;
 
+       ret = add_dead_ends(file);
+       if (ret)
+               return ret;
+
        add_ignores(file);
 
        ret = add_jump_destinations(file);
@@ -1037,13 +1089,13 @@ static int validate_branch(struct objtool_file *file,
 
                        return 0;
 
-               case INSN_BUG:
-                       return 0;
-
                default:
                        break;
                }
 
+               if (insn->dead_end)
+                       return 0;
+
                insn = next_insn_same_sec(file, insn);
                if (!insn) {
                        WARN("%s: unexpected end of section", sec->name);
index 8ffbd272952d2e57d21451408579c684a551aa65..a89273d8e74417b8d7feb4b96e7fab6a52513339 100644 (file)
@@ -39,6 +39,10 @@ OPTIONS
 --verbose::
         Be more verbose. (Show symbol address, etc)
 
+-q::
+--quiet::
+       Do not show any message.  (Suppress -v)
+
 -D::
 --dump-raw-trace::
         Dump raw trace in ASCII.
index 66dbe3dee74bcaae5b523bf3f80741b8608c87e2..a79c84ae61aaf616b931385c31969911d1d530bd 100644 (file)
@@ -73,6 +73,10 @@ OPTIONS
        Be verbose, for instance, show the raw counts in addition to the
        diff.
 
+-q::
+--quiet::
+       Do not show any message.  (Suppress -v)
+
 -f::
 --force::
         Don't do ownership validation.
index 27256bc68eda0268fc79d9d9eca06bb349a8212e..b16003ec14a743bcdcc8473798388b0849aa3523 100644 (file)
@@ -157,7 +157,7 @@ OPTIONS
 
 -a::
 --all-cpus::
-        System-wide collection from all CPUs.
+        System-wide collection from all CPUs (default if no target is specified).
 
 -p::
 --pid=::
index f2914f03ae7bb83f11a49ab194b224566e676967..c04cc0647c16e6d8bbb458d655ab8ddbec3262c9 100644 (file)
@@ -25,6 +25,10 @@ OPTIONS
 --verbose::
         Be more verbose. (show symbol address, etc)
 
+-q::
+--quiet::
+       Do not show any message.  (Suppress -v)
+
 -n::
 --show-nr-samples::
        Show the number of samples for each symbol
index d96ccd4844df9a49f33b05c6c0384b5f8e6eef05..aecf2a87e7d60bf4a759f47d7c60aac1b12aec07 100644 (file)
@@ -63,7 +63,7 @@ report::
 
 -a::
 --all-cpus::
-        system-wide collection from all CPUs
+        system-wide collection from all CPUs (default if no target is specified)
 
 -c::
 --scale::
index 8a6479c0eac9522394c0ac4fa2cdb860f142db31..170b0289a7bcd3682b3c67249bb0eac2384b70d1 100644 (file)
@@ -22,7 +22,7 @@ If you have debuginfo enabled, try: perf report -s sym,srcline
 For memory address profiling, try: perf mem record / perf mem report
 For tracepoint events, try: perf report -s trace_fields
 To record callchains for each sample: perf record -g
-To record every process run by an user: perf record -u <user>
+To record every process run by a user: perf record -u <user>
 Skip collecing build-id when recording: perf record -B
 To change sampling frequency to 100 Hz: perf record -F 100
 See assembly instructions with percentage: perf annotate <symbol>
index 2b941efadb04e34779f00c4b6348262da0fb3dd6..27c9fbca7bd9c79eb703ad2d37d4280f9d286cc4 100644 (file)
@@ -175,6 +175,10 @@ PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
 PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
 
+ifeq ($(CC), clang)
+  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+endif
+
 FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
 FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
 FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
@@ -601,6 +605,9 @@ else
       PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
       PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
       PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+      ifeq ($(CC), clang)
+        PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+      endif
       FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 
       ifneq ($(feature-libpython), 1)
index 4da19b6ba94acd03c9764733f84d089def765dbf..79fe31f20a17644e416642bb9d3a213c97286479 100644 (file)
@@ -726,13 +726,13 @@ config-clean:
        $(call QUIET_CLEAN, config)
        $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
 
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean
        $(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
        $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
        $(Q)$(RM) $(OUTPUT).config-detected
        $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
        $(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
-               $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
+               $(OUTPUT)util/intel-pt-decoder/inat-tables.c \
                $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
                $(OUTPUT)pmu-events/pmu-events.c
        $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
index ebb628332a6e59a938347eca53c3da81c859aee5..4f52d85f5ebc574daa91f29b1a3d24758c3d276d 100644 (file)
@@ -410,6 +410,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
+       OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
        OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
@@ -463,6 +464,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
                annotate.sym_hist_filter = argv[0];
        }
 
+       if (quiet)
+               perf_quiet_option();
+
        file.path  = input_name;
 
        annotate.session = perf_session__new(&file, false, &annotate.tool);
index 70a2893475912e3eee09f5b8787136329dec2116..1b96a3122228f913af671d315ad0a71cedb6c11d 100644 (file)
@@ -691,7 +691,7 @@ static void hists__process(struct hists *hists)
        hists__precompute(hists);
        hists__output_resort(hists, NULL);
 
-       hists__fprintf(hists, true, 0, 0, 0, stdout,
+       hists__fprintf(hists, !quiet, 0, 0, 0, stdout,
                       symbol_conf.use_callchain);
 }
 
@@ -739,12 +739,14 @@ static void data_process(void)
                                hists__link(hists_base, hists);
                }
 
-               fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
-                       perf_evsel__name(evsel_base));
+               if (!quiet) {
+                       fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
+                               perf_evsel__name(evsel_base));
+               }
 
                first = false;
 
-               if (verbose || data__files_cnt > 2)
+               if (verbose > 0 || ((data__files_cnt > 2) && !quiet))
                        data__fprintf();
 
                /* Don't sort callchain for perf diff */
@@ -807,6 +809,7 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
+       OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
        OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
                    "Show only items with match in baseline"),
        OPT_CALLBACK('c', "compute", &compute,
@@ -1328,6 +1331,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 
        argc = parse_options(argc, argv, options, diff_usage, 0);
 
+       if (quiet)
+               perf_quiet_option();
+
        if (symbol__init(NULL) < 0)
                return -1;
 
index cd7bc4d104e27e878e1ed694bf6be1d9b89d0a9d..6114e07ca6131ca94ed1a6107fb69ae3ccbab145 100644 (file)
@@ -42,8 +42,8 @@ static int parse_record_events(const struct option *opt,
 
                fprintf(stderr, "%-13s%-*s%s\n",
                        e->tag,
-                       verbose ? 25 : 0,
-                       verbose ? perf_mem_events__name(j) : "",
+                       verbose > 0 ? 25 : 0,
+                       verbose > 0 ? perf_mem_events__name(j) : "",
                        e->supported ? ": available" : "");
        }
        exit(0);
index 6cd6776052e7a940f78c78d43e623709256a0711..bc84a375295d7cb4920df6a4be1f91403bcdc04b 100644 (file)
@@ -432,7 +432,7 @@ static int record__open(struct record *rec)
 try_again:
                if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
-                               if (verbose)
+                               if (verbose > 0)
                                        ui__warning("%s\n", msg);
                                goto try_again;
                        }
@@ -1677,8 +1677,12 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
        argc = parse_options(argc, argv, record_options, record_usage,
                            PARSE_OPT_STOP_AT_NON_OPTION);
+       if (quiet)
+               perf_quiet_option();
+
+       /* Make system wide (-a) the default target. */
        if (!argc && target__none(&rec->opts.target))
-               usage_with_options(record_usage, record_options);
+               rec->opts.target.system_wide = true;
 
        if (nr_cgroups && !rec->opts.target.system_wide) {
                usage_with_options_msg(record_usage, record_options,
index dbd7fa0288616e3c29003d9de62e8a62bf068051..0a88670e56f35f6d8c5397e80264001acaf35e94 100644 (file)
@@ -320,6 +320,9 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
        size_t size = sizeof(buf);
        int socked_id = hists->socket_filter;
 
+       if (quiet)
+               return 0;
+
        if (symbol_conf.filter_relative) {
                nr_samples = hists->stats.nr_non_filtered_samples;
                nr_events = hists->stats.total_non_filtered_period;
@@ -372,7 +375,11 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 {
        struct perf_evsel *pos;
 
-       fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
+       if (!quiet) {
+               fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n",
+                       evlist->stats.total_lost_samples);
+       }
+
        evlist__for_each_entry(evlist, pos) {
                struct hists *hists = evsel__hists(pos);
                const char *evname = perf_evsel__name(pos);
@@ -382,7 +389,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
                        continue;
 
                hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
-               hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout,
+               hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
                               symbol_conf.use_callchain);
                fprintf(stdout, "\n\n");
        }
@@ -716,6 +723,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                    "input file name"),
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
+       OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
        OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
@@ -863,6 +871,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                report.symbol_filter_str = argv[0];
        }
 
+       if (quiet)
+               perf_quiet_option();
+
        if (symbol_conf.vmlinux_name &&
            access(symbol_conf.vmlinux_name, R_OK)) {
                pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
@@ -983,14 +994,14 @@ repeat:
                goto error;
        }
 
-       if (report.header || report.header_only) {
+       if ((report.header || report.header_only) && !quiet) {
                perf_session__fprintf_info(session, stdout,
                                           report.show_full_info);
                if (report.header_only) {
                        ret = 0;
                        goto error;
                }
-       } else if (use_browser == 0) {
+       } else if (use_browser == 0 && !quiet) {
                fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n",
                      stdout);
        }
@@ -1009,7 +1020,7 @@ repeat:
                 * providing it only in verbose mode not to bloat too
                 * much struct symbol.
                 */
-               if (verbose) {
+               if (verbose > 0) {
                        /*
                         * XXX: Need to provide a less kludgy way to ask for
                         * more space per symbol, the u32 is for the index on
index 270eb2d8ca6b24bb6b7c74ff0b068417e655d8e7..b94cf0de715ab9a2d6205c12053916c31d276a13 100644 (file)
@@ -460,7 +460,7 @@ static struct task_desc *register_pid(struct perf_sched *sched,
        BUG_ON(!sched->tasks);
        sched->tasks[task->nr] = task;
 
-       if (verbose)
+       if (verbose > 0)
                printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm);
 
        return task;
@@ -794,7 +794,7 @@ replay_wakeup_event(struct perf_sched *sched,
        const u32 pid    = perf_evsel__intval(evsel, sample, "pid");
        struct task_desc *waker, *wakee;
 
-       if (verbose) {
+       if (verbose > 0) {
                printf("sched_wakeup event %p\n", evsel);
 
                printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid);
@@ -822,7 +822,7 @@ static int replay_switch_event(struct perf_sched *sched,
        int cpu = sample->cpu;
        s64 delta;
 
-       if (verbose)
+       if (verbose > 0)
                printf("sched_switch event %p\n", evsel);
 
        if (cpu >= MAX_CPUS || cpu < 0)
@@ -870,7 +870,7 @@ static int replay_fork_event(struct perf_sched *sched,
                goto out_put;
        }
 
-       if (verbose) {
+       if (verbose > 0) {
                printf("fork event\n");
                printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
                printf("...  child: %s/%d\n", thread__comm_str(child), child->tid);
@@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
        timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
        color_fprintf(stdout, color, "  %12s secs ", stimestamp);
-       if (new_shortname || (verbose && sched_in->tid)) {
+       if (new_shortname || (verbose > 0 && sched_in->tid)) {
                const char *pid_color = color;
 
                if (thread__has_color(sched_in))
@@ -2050,7 +2050,7 @@ static void save_task_callchain(struct perf_sched *sched,
 
        if (thread__resolve_callchain(thread, cursor, evsel, sample,
                                      NULL, NULL, sched->max_stack + 2) != 0) {
-               if (verbose)
+               if (verbose > 0)
                        error("Failed to resolve callchain. Skipping\n");
 
                return;
index f28719178b519b92be214b2fe0405205e6776652..13b54999ad79ecd4f765d557bf57764486f1fac8 100644 (file)
@@ -573,7 +573,7 @@ try_again:
                        if (errno == EINVAL || errno == ENOSYS ||
                            errno == ENOENT || errno == EOPNOTSUPP ||
                            errno == ENXIO) {
-                               if (verbose)
+                               if (verbose > 0)
                                        ui__warning("%s event is not supported by the kernel.\n",
                                                    perf_evsel__name(counter));
                                counter->supported = false;
@@ -582,7 +582,7 @@ try_again:
                                    !(counter->leader->nr_members > 1))
                                        continue;
                        } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
-                                if (verbose)
+                                if (verbose > 0)
                                         ui__warning("%s\n", msg);
                                 goto try_again;
                         }
@@ -1765,7 +1765,7 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, i
 
        cpu = map->map[idx];
 
-       if (cpu >= env->nr_cpus_online)
+       if (cpu >= env->nr_cpus_avail)
                return -1;
 
        return cpu;
@@ -2445,8 +2445,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        } else if (big_num_opt == 0) /* User passed --no-big-num */
                big_num = false;
 
+       /* Make system wide (-a) the default target. */
        if (!argc && target__none(&target))
-               usage_with_options(stat_usage, stat_options);
+               target.system_wide = true;
 
        if (run_count < 0) {
                pr_err("Run count must be a positive number\n");
@@ -2538,7 +2539,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
        status = 0;
        for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
-               if (run_count != 1 && verbose)
+               if (run_count != 1 && verbose > 0)
                        fprintf(output, "[ perf stat: executing run #%d ... ]\n",
                                run_idx + 1);
 
index 5a7fd7af3a6de39d4a0d5c5ed5a758c92aeb88f3..ab9077915763f19a71b1b36a8b50135e6a0ffbb3 100644 (file)
@@ -871,7 +871,7 @@ try_again:
                if (perf_evsel__open(counter, top->evlist->cpus,
                                     top->evlist->threads) < 0) {
                        if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
-                               if (verbose)
+                               if (verbose > 0)
                                        ui__warning("%s\n", msg);
                                goto try_again;
                        }
index 40ef9b293d1b4ffa0213108ca3ec391621c1267d..256f1fac6f7e0069ebb047cf080fa55999dd0ae1 100644 (file)
@@ -1399,7 +1399,7 @@ static struct syscall *trace__syscall_info(struct trace *trace,
        return &trace->syscalls.table[id];
 
 out_cant_read:
-       if (verbose) {
+       if (verbose > 0) {
                fprintf(trace->output, "Problems reading syscall %d", id);
                if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
                        fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
@@ -1801,10 +1801,10 @@ static void print_location(FILE *f, struct perf_sample *sample,
                           bool print_dso, bool print_sym)
 {
 
-       if ((verbose || print_dso) && al->map)
+       if ((verbose > 0 || print_dso) && al->map)
                fprintf(f, "%s@", al->map->dso->long_name);
 
-       if ((verbose || print_sym) && al->sym)
+       if ((verbose > 0 || print_sym) && al->sym)
                fprintf(f, "%s+0x%" PRIx64, al->sym->name,
                        al->addr - al->sym->start);
        else if (al->map)
index f67bbb0aa36e572c46b5847736cc3d3efa188f8f..0544398d6e2dd599e92a3771a72a52138a72d2f7 100644 (file)
@@ -49,7 +49,7 @@ static char *mapfile(const char *fn, size_t *size)
        int err;
        int fd = open(fn, O_RDONLY);
 
-       if (fd < 0 && verbose && fn) {
+       if (fd < 0 && verbose > 0 && fn) {
                pr_err("Error opening events file '%s': %s\n", fn,
                                strerror(errno));
        }
index 28d1605b033896aa4b87844eeffa6d9dc1646e6b..88dc51f4c27b2df8c5d8dc482a189b9831cb2832 100644 (file)
@@ -144,7 +144,7 @@ static int run_dir(const char *d, const char *perf)
        int vcnt = min(verbose, (int) sizeof(v) - 1);
        char cmd[3*PATH_MAX];
 
-       if (verbose)
+       if (verbose > 0)
                vcnt++;
 
        snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
index 37e326bfd2dc3a273032eeac68de3c8d4383104f..83c4669cbc5b9e30576321026dd38d68aa117fde 100644 (file)
@@ -299,7 +299,7 @@ static int run_test(struct test *test, int subtest)
                if (!dont_fork) {
                        pr_debug("test child forked, pid %d\n", getpid());
 
-                       if (!verbose) {
+                       if (verbose <= 0) {
                                int nullfd = open("/dev/null", O_WRONLY);
 
                                if (nullfd >= 0) {
index ff5bc6363a79de05084aca11ec856f468ce9903d..d1f693041324a8a6670ff56081537c3ed3f528f0 100644 (file)
@@ -599,7 +599,7 @@ static int do_test_code_reading(bool try_kcore)
                                continue;
                        }
 
-                       if (verbose) {
+                       if (verbose > 0) {
                                char errbuf[512];
                                perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
                                pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
index a2b5ff9bf83d615b67d97a00ec38e04e3f8f6291..bc5982f42dc3a173d81e5100172578ac4bdb280b 100644 (file)
@@ -19,7 +19,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
 {
        int printed = 0;
 
-       if (!verbose)
+       if (verbose <= 0)
                return 0;
 
        printed += fprintf(fp, "\n%s: ", prefix);
index d357dab72e68862e90d916092755a12f535ec1a4..482b5365e68d85b3ea4915631f398873e575d72b 100644 (file)
@@ -76,7 +76,7 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
         * Skip this test if user's .perfconfig doesn't set [llvm] section
         * and clang is not found in $PATH, and this is not perf test -v
         */
-       if (!force && (verbose == 0 &&
+       if (!force && (verbose <= 0 &&
                       !llvm_param.user_set_param &&
                       llvm__search_clang())) {
                pr_debug("No clang and no verbosive, skip this test\n");
index aa9276bfe3e9b7b6ed1def3889142c645c1e8e30..1dc8380144220bd2c37f5c6338ad9a38e1c641bf 100644 (file)
@@ -1808,7 +1808,7 @@ static void debug_warn(const char *warn, va_list params)
 {
        char msg[1024];
 
-       if (!verbose)
+       if (verbose <= 0)
                return;
 
        vsnprintf(msg, sizeof(msg), warn, params);
index 541da7a68f91fc4631c0c20f313a9b58f720771b..87893f3ba5f1766cffa093ce44db50de45526279 100644 (file)
@@ -172,13 +172,13 @@ int test__PERF_RECORD(int subtest __maybe_unused)
 
                                err = perf_evlist__parse_sample(evlist, event, &sample);
                                if (err < 0) {
-                                       if (verbose)
+                                       if (verbose > 0)
                                                perf_event__fprintf(event, stderr);
                                        pr_debug("Couldn't parse sample\n");
                                        goto out_delete_evlist;
                                }
 
-                               if (verbose) {
+                               if (verbose > 0) {
                                        pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
                                        perf_event__fprintf(event, stderr);
                                }
index 7a52834ee0d0e848802b104719ee7726e3b97aae..fa79509da535403888c41cfa33a3ca3ff89033fd 100644 (file)
@@ -15,7 +15,7 @@ int test__python_use(int subtest __maybe_unused)
        int ret;
 
        if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
-                    PYTHONPATH, PYTHON, verbose ? "" : "2> /dev/null") < 0)
+                    PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
                return -1;
 
        ret = system(cmd) ? -1 : 0;
index a4a4b4625ac3d8864531b781c747945b53f1f849..f2d2e542d0ee77a8abf7bfc6074f0934da85b5c4 100644 (file)
@@ -109,7 +109,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
        TEST_ASSERT_VAL("failed to allocate thread_map",
                        threads);
 
-       if (verbose)
+       if (verbose > 0)
                thread_map__fprintf(threads, stderr);
 
        TEST_ASSERT_VAL("failed to remove thread",
@@ -117,7 +117,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
 
        TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1);
 
-       if (verbose)
+       if (verbose > 0)
                thread_map__fprintf(threads, stderr);
 
        TEST_ASSERT_VAL("failed to remove thread",
@@ -125,7 +125,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
 
        TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0);
 
-       if (verbose)
+       if (verbose > 0)
                thread_map__fprintf(threads, stderr);
 
        TEST_ASSERT_VAL("failed to not remove thread",
index 98fe69ac553c8462f4fe1dcbf610ff67e3c54948..803f893550d64c03c6354acf88045821cc7fe171 100644 (file)
@@ -65,7 +65,9 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
        session = perf_session__new(&file, false, NULL);
        TEST_ASSERT_VAL("can't get session", session);
 
-       for (i = 0; i < session->header.env.nr_cpus_online; i++) {
+       for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
+               if (!cpu_map__has(map, i))
+                       continue;
                pr_debug("CPU %d, core %d, socket %d\n", i,
                         session->header.env.cpu[i].core_id,
                         session->header.env.cpu[i].socket_id);
index a5082331f2464929ed4ff6d3cae9869c41b1772e..862b043e59243588671c75298b337d8cd4e29c3b 100644 (file)
@@ -168,7 +168,7 @@ next_pair:
                err = -1;
        }
 
-       if (!verbose)
+       if (verbose <= 0)
                goto out;
 
        header_printed = false;
index 98a34664bb7eb16667ee7169129e7e5b5b8022db..9ce142de536d0dcbb4017d2d80a40a9a668775c4 100644 (file)
@@ -73,7 +73,7 @@ static int map_browser__run(struct map_browser *browser)
 
        if (ui_browser__show(&browser->b, browser->map->dso->long_name,
                             "Press ESC to exit, %s / to search",
-                            verbose ? "" : "restart with -v to use") < 0)
+                            verbose > 0 ? "" : "restart with -v to use") < 0)
                return -1;
 
        while (1) {
@@ -81,7 +81,7 @@ static int map_browser__run(struct map_browser *browser)
 
                switch (key) {
                case '/':
-                       if (verbose)
+                       if (verbose > 0)
                                map_browser__search(browser);
                default:
                        break;
@@ -117,7 +117,7 @@ int map__browse(struct map *map)
 
                if (maxaddr < pos->end)
                        maxaddr = pos->end;
-               if (verbose) {
+               if (verbose > 0) {
                        u32 *idx = symbol__browser_index(pos);
                        *idx = mb.b.nr_entries;
                }
index 18cfcdc90356f89f75b328870cc5fcc621b2005f..5d632dca672aef2851778fb50be672e39c99bf50 100644 (file)
@@ -648,7 +648,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
                ret += fmt->width(fmt, &dummy_hpp, hists);
        }
 
-       if (verbose && hists__has(hists, sym)) /* Addr + origin */
+       if (verbose > 0 && hists__has(hists, sym)) /* Addr + origin */
                ret += 3 + BITS_PER_LONG / 4;
 
        return ret;
index 06cc04e5806a2692fffabbc2c038b82380dfcafd..273f21fa32b55999ab1271e6d3cc316c3a257a41 100644 (file)
@@ -1768,7 +1768,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
        printf("%-*.*s----\n",
               graph_dotted_len, graph_dotted_len, graph_dotted_line);
 
-       if (verbose)
+       if (verbose > 0)
                symbol__annotate_hits(sym, evsel);
 
        list_for_each_entry(pos, &notes->src->source, node) {
index 8fdee24725a7f59febb37eeb85b3a22d8dd1f82b..eafbf11442b224f90ad9c5d704df75d86985f917 100644 (file)
@@ -12,8 +12,8 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
 {
        FILE *fp;
        char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
+       char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
        char *token, *saved_ptr = NULL;
-       int found = 0;
 
        fp = fopen("/proc/mounts", "r");
        if (!fp)
@@ -24,31 +24,43 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
         * and inspect every cgroupfs mount point to find one that has
         * perf_event subsystem
         */
+       path_v1[0] = '\0';
+       path_v2[0] = '\0';
+
        while (fscanf(fp, "%*s %"STR(PATH_MAX)"s %"STR(PATH_MAX)"s %"
                                STR(PATH_MAX)"s %*d %*d\n",
                                mountpoint, type, tokens) == 3) {
 
-               if (!strcmp(type, "cgroup")) {
+               if (!path_v1[0] && !strcmp(type, "cgroup")) {
 
                        token = strtok_r(tokens, ",", &saved_ptr);
 
                        while (token != NULL) {
                                if (!strcmp(token, "perf_event")) {
-                                       found = 1;
+                                       strcpy(path_v1, mountpoint);
                                        break;
                                }
                                token = strtok_r(NULL, ",", &saved_ptr);
                        }
                }
-               if (found)
+
+               if (!path_v2[0] && !strcmp(type, "cgroup2"))
+                       strcpy(path_v2, mountpoint);
+
+               if (path_v1[0] && path_v2[0])
                        break;
        }
        fclose(fp);
-       if (!found)
+
+       if (path_v1[0])
+               path = path_v1;
+       else if (path_v2[0])
+               path = path_v2;
+       else
                return -1;
 
-       if (strlen(mountpoint) < maxlen) {
-               strcpy(buf, mountpoint);
+       if (strlen(path) < maxlen) {
+               strcpy(buf, path);
                return 0;
        }
        return -1;
index 2c0b52264a468103e9bbd32b8631d76fae585ff4..8c750493911369976d0fa171e74f3b678f62c054 100644 (file)
@@ -9,6 +9,7 @@
 #include "asm/bug.h"
 
 static int max_cpu_num;
+static int max_present_cpu_num;
 static int max_node_num;
 static int *cpunode_map;
 
@@ -442,6 +443,7 @@ static void set_max_cpu_num(void)
 
        /* set up default */
        max_cpu_num = 4096;
+       max_present_cpu_num = 4096;
 
        mnt = sysfs__mountpoint();
        if (!mnt)
@@ -455,6 +457,17 @@ static void set_max_cpu_num(void)
        }
 
        ret = get_max_num(path, &max_cpu_num);
+       if (ret)
+               goto out;
+
+       /* get the highest present cpu number for a sparse allocation */
+       ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
+       if (ret == PATH_MAX) {
+               pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+               goto out;
+       }
+
+       ret = get_max_num(path, &max_present_cpu_num);
 
 out:
        if (ret)
@@ -505,6 +518,15 @@ int cpu__max_cpu(void)
        return max_cpu_num;
 }
 
+int cpu__max_present_cpu(void)
+{
+       if (unlikely(!max_present_cpu_num))
+               set_max_cpu_num();
+
+       return max_present_cpu_num;
+}
+
+
 int cpu__get_node(int cpu)
 {
        if (unlikely(cpunode_map == NULL)) {
index 06bd689f598972fa4e57add5487ad62ac685a22f..1a0549af8f5c944b4fc2b8b619164a107dcf613d 100644 (file)
@@ -62,6 +62,7 @@ int cpu__setup_cpunode_map(void);
 
 int cpu__max_node(void);
 int cpu__max_cpu(void);
+int cpu__max_present_cpu(void);
 int cpu__get_node(int cpu);
 
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
index c1838b643108bda4d6fc536ae232f20e643a9ce2..03eb81f30d0d0d471fdff09e8779ed02fd73eb36 100644 (file)
@@ -203,11 +203,28 @@ int perf_debug_option(const char *str)
                v = (v < 0) || (v > 10) ? 0 : v;
        }
 
+       if (quiet)
+               v = -1;
+
        *var->ptr = v;
        free(s);
        return 0;
 }
 
+int perf_quiet_option(void)
+{
+       struct debug_variable *var = &debug_variables[0];
+
+       /* disable all debug messages */
+       while (var->name) {
+               *var->ptr = -1;
+               var++;
+       }
+
+       quiet = true;
+       return 0;
+}
+
 #define DEBUG_WRAPPER(__n, __l)                                \
 static int pr_ ## __n ## _wrapper(const char *fmt, ...)        \
 {                                                      \
index d242adc3d5a2ac7217879a37d0f5ed26b344351a..98832f5531d3d0931b8a21560e56330bf1db5cdc 100644 (file)
@@ -54,5 +54,6 @@ int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
 void perf_debug_setup(void);
+int perf_quiet_option(void);
 
 #endif /* __PERF_DEBUG_H */
index 28d41e709128f51414817ad21a16ea636c007709..d38b62a700ca126c293756baa83dbc27df61e53a 100644 (file)
@@ -951,7 +951,7 @@ static struct dso *__dso__findlink_by_longname(struct rb_root *root,
                if (rc == 0) {
                        /*
                         * In case the new DSO is a duplicate of an existing
-                        * one, print an one-time warning & put the new entry
+                        * one, print a one-time warning & put the new entry
                         * at the end of the list of duplicates.
                         */
                        if (!dso || (dso == this))
@@ -1058,7 +1058,7 @@ int dso__name_len(const struct dso *dso)
 {
        if (!dso)
                return strlen("[unknown]");
-       if (verbose)
+       if (verbose > 0)
                return dso->long_name_len;
 
        return dso->short_name_len;
index bb964e86b09de18bb1eafe3765e5fccb1e87e448..075fc77286bf05feb5cca14fc3825f34d0fdaed1 100644 (file)
@@ -66,7 +66,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
                return 0;
 
        if (env->nr_cpus_avail == 0)
-               env->nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
+               env->nr_cpus_avail = cpu__max_present_cpu();
 
        nr_cpus = env->nr_cpus_avail;
        if (nr_cpus == -1)
index 3d12c16e51034a8591eba97221d20b37603dd40a..05714d548584b30297892854469c584d280f3a8b 100644 (file)
@@ -295,11 +295,7 @@ static int write_nrcpus(int fd, struct perf_header *h __maybe_unused,
        u32 nrc, nra;
        int ret;
 
-       nr = sysconf(_SC_NPROCESSORS_CONF);
-       if (nr < 0)
-               return -1;
-
-       nrc = (u32)(nr & UINT_MAX);
+       nrc = cpu__max_present_cpu();
 
        nr = sysconf(_SC_NPROCESSORS_ONLN);
        if (nr < 0)
@@ -505,24 +501,29 @@ static void free_cpu_topo(struct cpu_topo *tp)
 
 static struct cpu_topo *build_cpu_topology(void)
 {
-       struct cpu_topo *tp;
+       struct cpu_topo *tp = NULL;
        void *addr;
        u32 nr, i;
        size_t sz;
        long ncpus;
        int ret = -1;
+       struct cpu_map *map;
 
-       ncpus = sysconf(_SC_NPROCESSORS_CONF);
-       if (ncpus < 0)
+       ncpus = cpu__max_present_cpu();
+
+       /* build online CPU map */
+       map = cpu_map__new(NULL);
+       if (map == NULL) {
+               pr_debug("failed to get system cpumap\n");
                return NULL;
+       }
 
        nr = (u32)(ncpus & UINT_MAX);
 
        sz = nr * sizeof(char *);
-
        addr = calloc(1, sizeof(*tp) + 2 * sz);
        if (!addr)
-               return NULL;
+               goto out_free;
 
        tp = addr;
        tp->cpu_nr = nr;
@@ -532,10 +533,16 @@ static struct cpu_topo *build_cpu_topology(void)
        tp->thread_siblings = addr;
 
        for (i = 0; i < nr; i++) {
+               if (!cpu_map__has(map, i))
+                       continue;
+
                ret = build_cpu_topo(tp, i);
                if (ret < 0)
                        break;
        }
+
+out_free:
+       cpu_map__put(map);
        if (ret) {
                free_cpu_topo(tp);
                tp = NULL;
@@ -1126,7 +1133,7 @@ static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused,
 {
        int nr, i;
        char *str;
-       int cpu_nr = ph->env.nr_cpus_online;
+       int cpu_nr = ph->env.nr_cpus_avail;
 
        nr = ph->env.nr_sibling_cores;
        str = ph->env.sibling_cores;
@@ -1781,7 +1788,7 @@ static int process_cpu_topology(struct perf_file_section *section,
        u32 nr, i;
        char *str;
        struct strbuf sb;
-       int cpu_nr = ph->env.nr_cpus_online;
+       int cpu_nr = ph->env.nr_cpus_avail;
        u64 size = 0;
 
        ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
@@ -1862,7 +1869,7 @@ static int process_cpu_topology(struct perf_file_section *section,
                if (ph->needs_swap)
                        nr = bswap_32(nr);
 
-               if (nr > (u32)cpu_nr) {
+               if (nr != (u32)-1 && nr > (u32)cpu_nr) {
                        pr_debug("socket_id number is too big."
                                 "You may need to upgrade the perf tool.\n");
                        goto free_cpu;
index 32c6a939e4cc6879d872574e27b7dab28970cb2c..eaf72a938fb423ed4ba46982c69324d2341839bc 100644 (file)
@@ -69,7 +69,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
         */
        if (h->ms.sym) {
                symlen = h->ms.sym->namelen + 4;
-               if (verbose)
+               if (verbose > 0)
                        symlen += BITS_PER_LONG / 4 + 2 + 3;
                hists__new_col_len(hists, HISTC_SYMBOL, symlen);
        } else {
@@ -93,7 +93,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
        if (h->branch_info) {
                if (h->branch_info->from.sym) {
                        symlen = (int)h->branch_info->from.sym->namelen + 4;
-                       if (verbose)
+                       if (verbose > 0)
                                symlen += BITS_PER_LONG / 4 + 2 + 3;
                        hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
 
@@ -107,7 +107,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 
                if (h->branch_info->to.sym) {
                        symlen = (int)h->branch_info->to.sym->namelen + 4;
-                       if (verbose)
+                       if (verbose > 0)
                                symlen += BITS_PER_LONG / 4 + 2 + 3;
                        hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
 
index 281e44af31e2fb0b2e32928290f3c4cdfd03cccf..67a8aebc67ab492a9936ad56ae9655526f9d9781 100644 (file)
@@ -2318,24 +2318,20 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term)
        return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
 }
 
-static int new_term(struct parse_events_term **_term, int type_val,
-                   int type_term, char *config,
-                   char *str, u64 num, int err_term, int err_val)
+static int new_term(struct parse_events_term **_term,
+                   struct parse_events_term *temp,
+                   char *str, u64 num)
 {
        struct parse_events_term *term;
 
-       term = zalloc(sizeof(*term));
+       term = malloc(sizeof(*term));
        if (!term)
                return -ENOMEM;
 
+       *term = *temp;
        INIT_LIST_HEAD(&term->list);
-       term->type_val  = type_val;
-       term->type_term = type_term;
-       term->config = config;
-       term->err_term = err_term;
-       term->err_val  = err_val;
 
-       switch (type_val) {
+       switch (term->type_val) {
        case PARSE_EVENTS__TERM_TYPE_NUM:
                term->val.num = num;
                break;
@@ -2353,15 +2349,22 @@ static int new_term(struct parse_events_term **_term, int type_val,
 
 int parse_events_term__num(struct parse_events_term **term,
                           int type_term, char *config, u64 num,
+                          bool no_value,
                           void *loc_term_, void *loc_val_)
 {
        YYLTYPE *loc_term = loc_term_;
        YYLTYPE *loc_val = loc_val_;
 
-       return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
-                       config, NULL, num,
-                       loc_term ? loc_term->first_column : 0,
-                       loc_val ? loc_val->first_column : 0);
+       struct parse_events_term temp = {
+               .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+               .type_term = type_term,
+               .config    = config,
+               .no_value  = no_value,
+               .err_term  = loc_term ? loc_term->first_column : 0,
+               .err_val   = loc_val  ? loc_val->first_column  : 0,
+       };
+
+       return new_term(term, &temp, NULL, num);
 }
 
 int parse_events_term__str(struct parse_events_term **term,
@@ -2371,37 +2374,45 @@ int parse_events_term__str(struct parse_events_term **term,
        YYLTYPE *loc_term = loc_term_;
        YYLTYPE *loc_val = loc_val_;
 
-       return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
-                       config, str, 0,
-                       loc_term ? loc_term->first_column : 0,
-                       loc_val ? loc_val->first_column : 0);
+       struct parse_events_term temp = {
+               .type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+               .type_term = type_term,
+               .config    = config,
+               .err_term  = loc_term ? loc_term->first_column : 0,
+               .err_val   = loc_val  ? loc_val->first_column  : 0,
+       };
+
+       return new_term(term, &temp, str, 0);
 }
 
 int parse_events_term__sym_hw(struct parse_events_term **term,
                              char *config, unsigned idx)
 {
        struct event_symbol *sym;
+       struct parse_events_term temp = {
+               .type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+               .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+               .config    = config ?: (char *) "event",
+       };
 
        BUG_ON(idx >= PERF_COUNT_HW_MAX);
        sym = &event_symbols_hw[idx];
 
-       if (config)
-               return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
-                               PARSE_EVENTS__TERM_TYPE_USER, config,
-                               (char *) sym->symbol, 0, 0, 0);
-       else
-               return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
-                               PARSE_EVENTS__TERM_TYPE_USER,
-                               (char *) "event", (char *) sym->symbol,
-                               0, 0, 0);
+       return new_term(term, &temp, (char *) sym->symbol, 0);
 }
 
 int parse_events_term__clone(struct parse_events_term **new,
                             struct parse_events_term *term)
 {
-       return new_term(new, term->type_val, term->type_term, term->config,
-                       term->val.str, term->val.num,
-                       term->err_term, term->err_val);
+       struct parse_events_term temp = {
+               .type_val  = term->type_val,
+               .type_term = term->type_term,
+               .config    = term->config,
+               .err_term  = term->err_term,
+               .err_val   = term->err_val,
+       };
+
+       return new_term(new, &temp, term->val.str, term->val.num);
 }
 
 void parse_events_terms__purge(struct list_head *terms)
index da246a3ddb69f7700316dedf0b00beb054046f15..1af6a267c21bfd3a437e3bed1f2122f96c4dced7 100644 (file)
@@ -94,6 +94,7 @@ struct parse_events_term {
        int type_term;
        struct list_head list;
        bool used;
+       bool no_value;
 
        /* error string indexes for within parsed string */
        int err_term;
@@ -122,6 +123,7 @@ void parse_events__shrink_config_terms(void);
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
 int parse_events_term__num(struct parse_events_term **term,
                           int type_term, char *config, u64 num,
+                          bool novalue,
                           void *loc_term, void *loc_val);
 int parse_events_term__str(struct parse_events_term **term,
                           int type_term, char *config, char *str,
index a14b47ab3879bd67db8854d495ba2e195c4f9068..30f018ea137096c22d6798e4bcd7723373db26af 100644 (file)
@@ -252,7 +252,7 @@ PE_KERNEL_PMU_EVENT sep_dc
                        if (!strcasecmp(alias->name, $1)) {
                                ALLOC_LIST(head);
                                ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, 1, &@1, NULL));
+                                       $1, 1, false, &@1, NULL));
                                list_add_tail(&term->list, head);
 
                                if (!parse_events_add_pmu(data, list,
@@ -282,7 +282,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
 
        ALLOC_LIST(head);
        ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       &pmu_name, 1, &@1, NULL));
+                                       &pmu_name, 1, false, &@1, NULL));
        list_add_tail(&term->list, head);
 
        ALLOC_LIST(list);
@@ -548,7 +548,7 @@ PE_NAME '=' PE_VALUE
        struct parse_events_term *term;
 
        ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3, &@1, &@3));
+                                       $1, $3, false, &@1, &@3));
        $$ = term;
 }
 |
@@ -566,7 +566,7 @@ PE_NAME
        struct parse_events_term *term;
 
        ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, 1, &@1, NULL));
+                                       $1, 1, true, &@1, NULL));
        $$ = term;
 }
 |
@@ -591,7 +591,7 @@ PE_TERM '=' PE_VALUE
 {
        struct parse_events_term *term;
 
-       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, &@1, &@3));
+       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
        $$ = term;
 }
 |
@@ -599,7 +599,7 @@ PE_TERM
 {
        struct parse_events_term *term;
 
-       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
+       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
        $$ = term;
 }
 |
@@ -620,7 +620,7 @@ PE_NAME array '=' PE_VALUE
        struct parse_events_term *term;
 
        ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $4, &@1, &@4));
+                                       $1, $4, false, &@1, &@4));
        term->array = $2;
        $$ = term;
 }
index 49bfee0e3d9ed0b483180a74b7bfd96954f4968b..12f84dd2ac5dfddc9f7e124fa25b5da4bb2be021 100644 (file)
@@ -745,7 +745,7 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
                }
        }
 
-       if (verbose)
+       if (verbose > 0)
                printf("Required parameter '%s' not specified\n", term->config);
 
        return -1;
@@ -803,7 +803,7 @@ static int pmu_config_term(struct list_head *formats,
 
        format = pmu_find_format(formats, term->config);
        if (!format) {
-               if (verbose)
+               if (verbose > 0)
                        printf("Invalid event/parameter '%s'\n", term->config);
                if (err) {
                        char *pmu_term = pmu_formats_string(formats);
@@ -834,11 +834,20 @@ static int pmu_config_term(struct list_head *formats,
         * Either directly use a numeric term, or try to translate string terms
         * using event parameters.
         */
-       if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+       if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+               if (term->no_value &&
+                   bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) {
+                       if (err) {
+                               err->idx = term->err_val;
+                               err->str = strdup("no value assigned for term");
+                       }
+                       return -EINVAL;
+               }
+
                val = term->val.num;
-       else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+       else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
                if (strcmp(term->val.str, "?")) {
-                       if (verbose) {
+                       if (verbose > 0) {
                                pr_info("Invalid sysfs entry %s=%s\n",
                                                term->config, term->val.str);
                        }
@@ -1223,7 +1232,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
                        printf("%*s", 8, "[");
                        wordwrap(aliases[j].desc, 8, columns, 0);
                        printf("]\n");
-                       if (verbose)
+                       if (verbose > 0)
                                printf("%*s%s/%s/\n", 8, "", aliases[j].pmu, aliases[j].str);
                } else
                        printf("  %-50s [Kernel PMU event]\n", aliases[j].name);
index 35f5b7b7715c39e054591644d88b369bbf35de5e..28fb62c32678483cd6d54a9c88e620022e1f9d08 100644 (file)
@@ -594,7 +594,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
        pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
                 tp->module ? : "kernel");
 
-       dinfo = debuginfo_cache__open(tp->module, verbose == 0);
+       dinfo = debuginfo_cache__open(tp->module, verbose <= 0);
        if (dinfo)
                ret = debuginfo__find_probe_point(dinfo,
                                                 (unsigned long)addr, pp);
index 0d9d6e0803b88b6fe3909c0b8b83f24fa47580f6..57cd268d4275bd2c8a3f8f1e43f29fab1fe1a835 100644 (file)
@@ -464,7 +464,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                /* Verify it is a data structure  */
                tag = dwarf_tag(&type);
                if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
-                       pr_warning("%s is not a data structure nor an union.\n",
+                       pr_warning("%s is not a data structure nor a union.\n",
                                   varname);
                        return -EINVAL;
                }
@@ -479,7 +479,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
        } else {
                /* Verify it is a data structure  */
                if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
-                       pr_warning("%s is not a data structure nor an union.\n",
+                       pr_warning("%s is not a data structure nor a union.\n",
                                   varname);
                        return -EINVAL;
                }
index 581e0efd6356839567a9ae5303a3fa6581b8ee3b..783326cfbaa6bfdaeef277dc9545f55213ffa942 100644 (file)
@@ -369,10 +369,10 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
                if (node->map) {
                        struct map *map = node->map;
                        const char *dsoname = "[unknown]";
-                       if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+                       if (map && map->dso) {
                                if (symbol_conf.show_kernel_path && map->dso->long_name)
                                        dsoname = map->dso->long_name;
-                               else if (map->dso->name)
+                               else
                                        dsoname = map->dso->name;
                        }
                        pydict_set_item_string_decref(pyelem, "dso",
index 4cdbc8f5f14dbf0dff51304ffb59812ce7273ea6..1dd617d116b5d844f23c88592f9dac9f061a7ff6 100644 (file)
@@ -932,7 +932,7 @@ static void branch_stack__printf(struct perf_sample *sample)
 
                printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
                        i, e->from, e->to,
-                       e->flags.cycles,
+                       (unsigned short)e->flags.cycles,
                        e->flags.mispred ? "M" : " ",
                        e->flags.predicted ? "P" : " ",
                        e->flags.abort ? "A" : " ",
index c8680984d2d6680f56a974a1b54056a5e74263e1..af415febbc46e65fd0bdda5cbf1c93f7fa3f9d76 100644 (file)
@@ -1,8 +1,15 @@
 #!/usr/bin/python2
 
-from distutils.core import setup, Extension
 from os import getenv
 
+cc = getenv("CC")
+if cc == "clang":
+    from _sysconfigdata import build_time_vars
+    from re import sub
+    build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"])
+
+from distutils.core import setup, Extension
+
 from distutils.command.build_ext   import build_ext   as _build_ext
 from distutils.command.install_lib import install_lib as _install_lib
 
index df622f4e301e2a2284ee5e728b09ade055680796..0ff622288d243c4edad03a904b6da3a52cf4da50 100644 (file)
@@ -151,7 +151,7 @@ static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
        if (!dso_l || !dso_r)
                return cmp_null(dso_r, dso_l);
 
-       if (verbose) {
+       if (verbose > 0) {
                dso_name_l = dso_l->long_name;
                dso_name_r = dso_r->long_name;
        } else {
@@ -172,8 +172,8 @@ static int _hist_entry__dso_snprintf(struct map *map, char *bf,
                                     size_t size, unsigned int width)
 {
        if (map && map->dso) {
-               const char *dso_name = !verbose ? map->dso->short_name :
-                       map->dso->long_name;
+               const char *dso_name = verbose > 0 ? map->dso->long_name :
+                       map->dso->short_name;
                return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
        }
 
@@ -261,7 +261,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
 {
        size_t ret = 0;
 
-       if (verbose) {
+       if (verbose > 0) {
                char o = map ? dso__symtab_origin(map->dso) : '!';
                ret += repsep_snprintf(bf, size, "%-#*llx %c ",
                                       BITS_PER_LONG / 4 + 2, ip, o);
index 7aff317fc7c4827b53986a4a7085ca8d8e2ef58f..796c847e2f000d6722ed2cb35321e7f3b8b0489e 100644 (file)
@@ -108,7 +108,7 @@ struct hist_entry {
                /*
                 * Since perf diff only supports the stdio output, TUI
                 * fields are only accessed from perf report (or perf
-                * top).  So make it an union to reduce memory usage.
+                * top).  So make it a union to reduce memory usage.
                 */
                struct hist_entry_diff  diff;
                struct /* for TUI */ {
index 39345c2ddfc22edcfde844e5eb95b72930f4eeda..0d51334a9b4628090f35ffe4da921b4e0ecb15b0 100644 (file)
@@ -344,7 +344,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
        for (i = 0; i < 3; i++)
                update_stats(&ps->res_stats[i], count[i]);
 
-       if (verbose) {
+       if (verbose > 0) {
                fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
                        perf_evsel__name(counter), count[0], count[1], count[2]);
        }
index adbc6c02c3aaac757028e6bbe1ae63cd11ee437a..4e59ddeb4eda7cd8e75ec2c0c42e1fec4de0432a 100644 (file)
@@ -213,7 +213,7 @@ static bool want_demangle(bool is_kernel_sym)
 
 static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
 {
-       int demangle_flags = verbose ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+       int demangle_flags = verbose > 0 ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
        char *demangled = NULL;
 
        /*
index 590d12a25f6e0b8c625011e72d1aa2189e9e1c43..3e701f0e9c1438f781b6dd0248212d250da800a5 100644 (file)
@@ -285,20 +285,24 @@ static int get_freq_hardware(unsigned int cpu, unsigned int human)
 
 /* --hwlimits / -l */
 
-static int get_hardware_limits(unsigned int cpu)
+static int get_hardware_limits(unsigned int cpu, unsigned int human)
 {
        unsigned long min, max;
 
-       printf(_("  hardware limits: "));
        if (cpufreq_get_hardware_limits(cpu, &min, &max)) {
                printf(_("Not Available\n"));
                return -EINVAL;
        }
 
-       print_speed(min);
-       printf(" - ");
-       print_speed(max);
-       printf("\n");
+       if (human) {
+               printf(_("  hardware limits: "));
+               print_speed(min);
+               printf(" - ");
+               print_speed(max);
+               printf("\n");
+       } else {
+               printf("%lu %lu\n", min, max);
+       }
        return 0;
 }
 
@@ -456,7 +460,7 @@ static void debug_output_one(unsigned int cpu)
        get_related_cpus(cpu);
        get_affected_cpus(cpu);
        get_latency(cpu, 1);
-       get_hardware_limits(cpu);
+       get_hardware_limits(cpu, 1);
 
        freqs = cpufreq_get_available_frequencies(cpu);
        if (freqs) {
@@ -622,7 +626,7 @@ int cmd_freq_info(int argc, char **argv)
                        ret = get_driver(cpu);
                        break;
                case 'l':
-                       ret = get_hardware_limits(cpu);
+                       ret = get_hardware_limits(cpu, human);
                        break;
                case 'w':
                        ret = get_freq_hardware(cpu, human);
@@ -639,7 +643,6 @@ int cmd_freq_info(int argc, char **argv)
                }
                if (ret)
                        return ret;
-               printf("\n");
        }
        return ret;
 }
index be93ab02b4903d54fcf1a3787392146a2d75472b..6e4eb2fc2d1e78edc356692dcadfe3bfaebd65ec 100755 (executable)
@@ -179,6 +179,7 @@ my $localversion;
 my $iteration = 0;
 my $successes = 0;
 my $stty_orig;
+my $run_command_status = 0;
 
 my $bisect_good;
 my $bisect_bad;
@@ -1325,26 +1326,44 @@ sub wait_for_monitor;
 
 sub reboot {
     my ($time) = @_;
+    my $powercycle = 0;
 
-    # Make sure everything has been written to disk
-    run_ssh("sync");
+    # test if the machine can be connected to within 5 seconds
+    my $stat = run_ssh("echo check machine status", 5);
+    if (!$stat) {
+       doprint("power cycle\n");
+       $powercycle = 1;
+    }
+
+    if ($powercycle) {
+       run_command "$power_cycle";
 
-    if (defined($time)) {
        start_monitor;
        # flush out current monitor
        # May contain the reboot success line
        wait_for_monitor 1;
-    }
 
-    # try to reboot normally
-    if (run_command $reboot) {
-       if (defined($powercycle_after_reboot)) {
-           sleep $powercycle_after_reboot;
+    } else {
+       # Make sure everything has been written to disk
+       run_ssh("sync");
+
+       if (defined($time)) {
+           start_monitor;
+           # flush out current monitor
+           # May contain the reboot success line
+           wait_for_monitor 1;
+       }
+
+       # try to reboot normally
+       if (run_command $reboot) {
+           if (defined($powercycle_after_reboot)) {
+               sleep $powercycle_after_reboot;
+               run_command "$power_cycle";
+           }
+       } else {
+           # nope? power cycle it.
            run_command "$power_cycle";
        }
-    } else {
-       # nope? power cycle it.
-       run_command "$power_cycle";
     }
 
     if (defined($time)) {
@@ -1412,6 +1431,10 @@ sub dodie {
            system("stty $stty_orig");
     }
 
+    if (defined($post_test)) {
+       run_command $post_test;
+    }
+
     die @_, "\n";
 }
 
@@ -1624,10 +1647,6 @@ sub save_logs {
 
 sub fail {
 
-       if (defined($post_test)) {
-               run_command $post_test;
-       }
-
        if ($die_on_failure) {
                dodie @_;
        }
@@ -1660,23 +1679,26 @@ sub fail {
            save_logs "fail", $store_failures;
         }
 
+       if (defined($post_test)) {
+               run_command $post_test;
+       }
+
        return 1;
 }
 
 sub run_command {
-    my ($command, $redirect) = @_;
+    my ($command, $redirect, $timeout) = @_;
     my $start_time;
     my $end_time;
     my $dolog = 0;
     my $dord = 0;
     my $pid;
 
-    $start_time = time;
-
     $command =~ s/\$SSH_USER/$ssh_user/g;
     $command =~ s/\$MACHINE/$machine/g;
 
     doprint("$command ... ");
+    $start_time = time;
 
     $pid = open(CMD, "$command 2>&1 |") or
        (fail "unable to exec $command" and return 0);
@@ -1693,13 +1715,30 @@ sub run_command {
        $dord = 1;
     }
 
-    while (<CMD>) {
-       print LOG if ($dolog);
-       print RD  if ($dord);
+    my $hit_timeout = 0;
+
+    while (1) {
+       my $fp = \*CMD;
+       if (defined($timeout)) {
+           doprint "timeout = $timeout\n";
+       }
+       my $line = wait_for_input($fp, $timeout);
+       if (!defined($line)) {
+           my $now = time;
+           if (defined($timeout) && (($now - $start_time) >= $timeout)) {
+               doprint "Hit timeout of $timeout, killing process\n";
+               $hit_timeout = 1;
+               kill 9, $pid;
+           }
+           last;
+       }
+       print LOG $line if ($dolog);
+       print RD $line if ($dord);
     }
 
     waitpid($pid, 0);
-    my $failed = $?;
+    # shift 8 for real exit status
+    $run_command_status = $? >> 8;
 
     close(CMD);
     close(LOG) if ($dolog);
@@ -1714,21 +1753,25 @@ sub run_command {
        doprint "[$delta seconds] ";
     }
 
-    if ($failed) {
+    if ($hit_timeout) {
+       $run_command_status = 1;
+    }
+
+    if ($run_command_status) {
        doprint "FAILED!\n";
     } else {
        doprint "SUCCESS\n";
     }
 
-    return !$failed;
+    return !$run_command_status;
 }
 
 sub run_ssh {
-    my ($cmd) = @_;
+    my ($cmd, $timeout) = @_;
     my $cp_exec = $ssh_exec;
 
     $cp_exec =~ s/\$SSH_COMMAND/$cmd/g;
-    return run_command "$cp_exec";
+    return run_command "$cp_exec", undef , $timeout;
 }
 
 sub run_scp {
@@ -2489,10 +2532,6 @@ sub halt {
 sub success {
     my ($i) = @_;
 
-    if (defined($post_test)) {
-       run_command $post_test;
-    }
-
     $successes++;
 
     my $name = "";
@@ -2517,6 +2556,10 @@ sub success {
        doprint "Reboot and wait $sleep_time seconds\n";
        reboot_to_good $sleep_time;
     }
+
+    if (defined($post_test)) {
+       run_command $post_test;
+    }
 }
 
 sub answer_bisect {
@@ -2537,16 +2580,15 @@ sub answer_bisect {
 }
 
 sub child_run_test {
-    my $failed = 0;
 
     # child should have no power
     $reboot_on_error = 0;
     $poweroff_on_error = 0;
     $die_on_failure = 1;
 
-    run_command $run_test, $testlog or $failed = 1;
+    run_command $run_test, $testlog;
 
-    exit $failed;
+    exit $run_command_status;
 }
 
 my $child_done;
@@ -2629,7 +2671,7 @@ sub do_run_test {
     }
 
     waitpid $child_pid, 0;
-    $child_exit = $?;
+    $child_exit = $? >> 8;
 
     my $end_time = time;
     $test_time = $end_time - $start_time;
@@ -3330,7 +3372,6 @@ sub config_bisect {
     save_config \%good_configs, $good_config;
     save_config \%bad_configs, $bad_config;
 
-
     if (defined($config_bisect_check) && $config_bisect_check ne "0") {
        if ($config_bisect_check ne "good") {
            doprint "Testing bad config\n";
index 11d888ca6a9237392607cf99e3e767878c5716ec..d4706c0ffcebdead866851aa98fcb9323e394fe5 100644 (file)
@@ -1,2 +1,6 @@
+generated/map-shift.h
+idr.c
+idr-test
 main
+multiorder
 radix-tree.c
index 3635e4d3eca7fcd8dae99f740c587b7ac02162b9..f11315bedefc3d68152bef22da0f8ab60f8c5be6 100644 (file)
@@ -1,29 +1,47 @@
 
-CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
+CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
 LDFLAGS += -lpthread -lurcu
-TARGETS = main
-OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \
-        regression1.o regression2.o regression3.o multiorder.o \
-        iteration_check.o benchmark.o
+TARGETS = main idr-test multiorder
+CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
+        tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
-ifdef BENCHMARK
-       CFLAGS += -DBENCHMARK=1
+ifndef SHIFT
+       SHIFT=3
 endif
 
-targets: $(TARGETS)
+targets: mapshift $(TARGETS)
 
 main:  $(OFILES)
-       $(CC) $(CFLAGS) $(LDFLAGS) $(OFILES) -o main
+       $(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
+
+idr-test: idr-test.o $(CORE_OFILES)
+       $(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test
+
+multiorder: multiorder.o $(CORE_OFILES)
+       $(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
 
 clean:
-       $(RM) -f $(TARGETS) *.o radix-tree.c
+       $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
 
-find_next_bit.o: ../../lib/find_bit.c
-       $(CC) $(CFLAGS) -c -o $@ $<
+vpath %.c ../../lib
 
-$(OFILES): *.h */*.h \
+$(OFILES): *.h */*.h generated/map-shift.h \
        ../../include/linux/*.h \
-       ../../../include/linux/radix-tree.h
+       ../../include/asm/*.h \
+       ../../../include/linux/radix-tree.h \
+       ../../../include/linux/idr.h
 
 radix-tree.c: ../../../lib/radix-tree.c
        sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+idr.c: ../../../lib/idr.c
+       sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+.PHONY: mapshift
+
+mapshift:
+       @if ! grep -qw $(SHIFT) generated/map-shift.h; then             \
+               echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >          \
+                               generated/map-shift.h;                  \
+       fi
index 215ca86c7605527d59a208cb8b34f686a5ac4ba4..9b09ddfe462fd3b2ea782805560c349e720a4637 100644 (file)
@@ -71,7 +71,7 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
        tagged = benchmark_iter(&tree, true);
        normal = benchmark_iter(&tree, false);
 
-       printf("Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
+       printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
                size, step, order, tagged, normal);
 
        item_kill_tree(&tree);
@@ -85,8 +85,8 @@ void benchmark(void)
                                128, 256, 512, 12345, 0};
        int c, s;
 
-       printf("starting benchmarks\n");
-       printf("RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
+       printv(1, "starting benchmarks\n");
+       printv(1, "RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
 
        for (c = 0; size[c]; c++)
                for (s = 0; step[s]; s++)
index ad18cf5a2a3a1df8dfc34ec8ed7d0c18471e24ff..cf88dc5b8832a20b89e2130055f12e179233187f 100644 (file)
@@ -1,3 +1 @@
 #define CONFIG_RADIX_TREE_MULTIORDER 1
-#define CONFIG_SHMEM 1
-#define CONFIG_SWAP 1
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
new file mode 100644 (file)
index 0000000..a26098c
--- /dev/null
@@ -0,0 +1,444 @@
+/*
+ * idr-test.c: Test the IDR API
+ * Copyright (c) 2016 Matthew Wilcox <willy@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/bitmap.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include "test.h"
+
+#define DUMMY_PTR      ((void *)0x12)
+
+int item_idr_free(int id, void *p, void *data)
+{
+       struct item *item = p;
+       assert(item->index == id);
+       free(p);
+
+       return 0;
+}
+
+void item_idr_remove(struct idr *idr, int id)
+{
+       struct item *item = idr_find(idr, id);
+       assert(item->index == id);
+       idr_remove(idr, id);
+       free(item);
+}
+
+void idr_alloc_test(void)
+{
+       unsigned long i;
+       DEFINE_IDR(idr);
+
+       assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0, 0x4000, GFP_KERNEL) == 0);
+       assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0x3ffd, 0x4000, GFP_KERNEL) == 0x3ffd);
+       idr_remove(&idr, 0x3ffd);
+       idr_remove(&idr, 0);
+
+       for (i = 0x3ffe; i < 0x4003; i++) {
+               int id;
+               struct item *item;
+
+               if (i < 0x4000)
+                       item = item_create(i, 0);
+               else
+                       item = item_create(i - 0x3fff, 0);
+
+               id = idr_alloc_cyclic(&idr, item, 1, 0x4000, GFP_KERNEL);
+               assert(id == item->index);
+       }
+
+       idr_for_each(&idr, item_idr_free, &idr);
+       idr_destroy(&idr);
+}
+
+void idr_replace_test(void)
+{
+       DEFINE_IDR(idr);
+
+       idr_alloc(&idr, (void *)-1, 10, 11, GFP_KERNEL);
+       idr_replace(&idr, &idr, 10);
+
+       idr_destroy(&idr);
+}
+
+/*
+ * Unlike the radix tree, you can put a NULL pointer -- with care -- into
+ * the IDR.  Some interfaces, like idr_find() do not distinguish between
+ * "present, value is NULL" and "not present", but that's exactly what some
+ * users want.
+ */
+void idr_null_test(void)
+{
+       int i;
+       DEFINE_IDR(idr);
+
+       assert(idr_is_empty(&idr));
+
+       assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+       assert(!idr_is_empty(&idr));
+       idr_remove(&idr, 0);
+       assert(idr_is_empty(&idr));
+
+       assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+       assert(!idr_is_empty(&idr));
+       idr_destroy(&idr);
+       assert(idr_is_empty(&idr));
+
+       for (i = 0; i < 10; i++) {
+               assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == i);
+       }
+
+       assert(idr_replace(&idr, DUMMY_PTR, 3) == NULL);
+       assert(idr_replace(&idr, DUMMY_PTR, 4) == NULL);
+       assert(idr_replace(&idr, NULL, 4) == DUMMY_PTR);
+       assert(idr_replace(&idr, DUMMY_PTR, 11) == ERR_PTR(-ENOENT));
+       idr_remove(&idr, 5);
+       assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 5);
+       idr_remove(&idr, 5);
+
+       for (i = 0; i < 9; i++) {
+               idr_remove(&idr, i);
+               assert(!idr_is_empty(&idr));
+       }
+       idr_remove(&idr, 8);
+       assert(!idr_is_empty(&idr));
+       idr_remove(&idr, 9);
+       assert(idr_is_empty(&idr));
+
+       assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+       assert(idr_replace(&idr, DUMMY_PTR, 3) == ERR_PTR(-ENOENT));
+       assert(idr_replace(&idr, DUMMY_PTR, 0) == NULL);
+       assert(idr_replace(&idr, NULL, 0) == DUMMY_PTR);
+
+       idr_destroy(&idr);
+       assert(idr_is_empty(&idr));
+
+       for (i = 1; i < 10; i++) {
+               assert(idr_alloc(&idr, NULL, 1, 0, GFP_KERNEL) == i);
+       }
+
+       idr_destroy(&idr);
+       assert(idr_is_empty(&idr));
+}
+
+void idr_nowait_test(void)
+{
+       unsigned int i;
+       DEFINE_IDR(idr);
+
+       idr_preload(GFP_KERNEL);
+
+       for (i = 0; i < 3; i++) {
+               struct item *item = item_create(i, 0);
+               assert(idr_alloc(&idr, item, i, i + 1, GFP_NOWAIT) == i);
+       }
+
+       idr_preload_end();
+
+       idr_for_each(&idr, item_idr_free, &idr);
+       idr_destroy(&idr);
+}
+
+void idr_checks(void)
+{
+       unsigned long i;
+       DEFINE_IDR(idr);
+
+       for (i = 0; i < 10000; i++) {
+               struct item *item = item_create(i, 0);
+               assert(idr_alloc(&idr, item, 0, 20000, GFP_KERNEL) == i);
+       }
+
+       assert(idr_alloc(&idr, DUMMY_PTR, 5, 30, GFP_KERNEL) < 0);
+
+       for (i = 0; i < 5000; i++)
+               item_idr_remove(&idr, i);
+
+       idr_remove(&idr, 3);
+
+       idr_for_each(&idr, item_idr_free, &idr);
+       idr_destroy(&idr);
+
+       assert(idr_is_empty(&idr));
+
+       idr_remove(&idr, 3);
+       idr_remove(&idr, 0);
+
+       for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
+               struct item *item = item_create(i, 0);
+               assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
+       }
+       assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
+
+       idr_for_each(&idr, item_idr_free, &idr);
+       idr_destroy(&idr);
+       idr_destroy(&idr);
+
+       assert(idr_is_empty(&idr));
+
+       for (i = 1; i < 10000; i++) {
+               struct item *item = item_create(i, 0);
+               assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
+       }
+
+       idr_for_each(&idr, item_idr_free, &idr);
+       idr_destroy(&idr);
+
+       idr_replace_test();
+       idr_alloc_test();
+       idr_null_test();
+       idr_nowait_test();
+}
+
+/*
+ * Check that we get the correct error when we run out of memory doing
+ * allocations.  To ensure we run out of memory, just "forget" to preload.
+ * The first test is for not having a bitmap available, and the second test
+ * is for not being able to allocate a level of the radix tree.
+ */
+void ida_check_nomem(void)
+{
+       DEFINE_IDA(ida);
+       int id, err;
+
+       err = ida_get_new_above(&ida, 256, &id);
+       assert(err == -EAGAIN);
+       err = ida_get_new_above(&ida, 1UL << 30, &id);
+       assert(err == -EAGAIN);
+}
+
+/*
+ * Check what happens when we fill a leaf and then delete it.  This may
+ * discover mishandling of IDR_FREE.
+ */
+void ida_check_leaf(void)
+{
+       DEFINE_IDA(ida);
+       int id;
+       unsigned long i;
+
+       for (i = 0; i < IDA_BITMAP_BITS; i++) {
+               assert(ida_pre_get(&ida, GFP_KERNEL));
+               assert(!ida_get_new(&ida, &id));
+               assert(id == i);
+       }
+
+       ida_destroy(&ida);
+       assert(ida_is_empty(&ida));
+
+       assert(ida_pre_get(&ida, GFP_KERNEL));
+       assert(!ida_get_new(&ida, &id));
+       assert(id == 0);
+       ida_destroy(&ida);
+       assert(ida_is_empty(&ida));
+}
+
+/*
+ * Check handling of conversions between exceptional entries and full bitmaps.
+ */
+void ida_check_conv(void)
+{
+       DEFINE_IDA(ida);
+       int id;
+       unsigned long i;
+
+       for (i = 0; i < IDA_BITMAP_BITS * 2; i += IDA_BITMAP_BITS) {
+               assert(ida_pre_get(&ida, GFP_KERNEL));
+               assert(!ida_get_new_above(&ida, i + 1, &id));
+               assert(id == i + 1);
+               assert(!ida_get_new_above(&ida, i + BITS_PER_LONG, &id));
+               assert(id == i + BITS_PER_LONG);
+               ida_remove(&ida, i + 1);
+               ida_remove(&ida, i + BITS_PER_LONG);
+               assert(ida_is_empty(&ida));
+       }
+
+       assert(ida_pre_get(&ida, GFP_KERNEL));
+
+       for (i = 0; i < IDA_BITMAP_BITS * 2; i++) {
+               assert(ida_pre_get(&ida, GFP_KERNEL));
+               assert(!ida_get_new(&ida, &id));
+               assert(id == i);
+       }
+
+       for (i = IDA_BITMAP_BITS * 2; i > 0; i--) {
+               ida_remove(&ida, i - 1);
+       }
+       assert(ida_is_empty(&ida));
+
+       for (i = 0; i < IDA_BITMAP_BITS + BITS_PER_LONG - 4; i++) {
+               assert(ida_pre_get(&ida, GFP_KERNEL));
+               assert(!ida_get_new(&ida, &id));
+               assert(id == i);
+       }
+
+       for (i = IDA_BITMAP_BITS + BITS_PER_LONG - 4; i > 0; i--) {
+               ida_remove(&ida, i - 1);
+       }
+       assert(ida_is_empty(&ida));
+
+       radix_tree_cpu_dead(1);
+       for (i = 0; i < 1000000; i++) {
+               int err = ida_get_new(&ida, &id);
+               if (err == -EAGAIN) {
+                       assert((i % IDA_BITMAP_BITS) == (BITS_PER_LONG - 2));
+                       assert(ida_pre_get(&ida, GFP_KERNEL));
+                       err = ida_get_new(&ida, &id);
+               } else {
+                       assert((i % IDA_BITMAP_BITS) != (BITS_PER_LONG - 2));
+               }
+               assert(!err);
+               assert(id == i);
+       }
+       ida_destroy(&ida);
+}
+
+/*
+ * Check allocations up to and slightly above the maximum allowed (2^31-1) ID.
+ * Allocating up to 2^31-1 should succeed, and then allocating the next one
+ * should fail.
+ */
+void ida_check_max(void)
+{
+       DEFINE_IDA(ida);
+       int id, err;
+       unsigned long i, j;
+
+       for (j = 1; j < 65537; j *= 2) {
+               unsigned long base = (1UL << 31) - j;
+               for (i = 0; i < j; i++) {
+                       assert(ida_pre_get(&ida, GFP_KERNEL));
+                       assert(!ida_get_new_above(&ida, base, &id));
+                       assert(id == base + i);
+               }
+               assert(ida_pre_get(&ida, GFP_KERNEL));
+               err = ida_get_new_above(&ida, base, &id);
+               assert(err == -ENOSPC);
+               ida_destroy(&ida);
+               assert(ida_is_empty(&ida));
+               rcu_barrier();
+       }
+}
+
+void ida_check_random(void)
+{
+       DEFINE_IDA(ida);
+       DECLARE_BITMAP(bitmap, 2048);
+       int id;
+       unsigned int i;
+       time_t s = time(NULL);
+
+ repeat:
+       memset(bitmap, 0, sizeof(bitmap));
+       for (i = 0; i < 100000; i++) {
+               int i = rand();
+               int bit = i & 2047;
+               if (test_bit(bit, bitmap)) {
+                       __clear_bit(bit, bitmap);
+                       ida_remove(&ida, bit);
+               } else {
+                       __set_bit(bit, bitmap);
+                       ida_pre_get(&ida, GFP_KERNEL);
+                       assert(!ida_get_new_above(&ida, bit, &id));
+                       assert(id == bit);
+               }
+       }
+       ida_destroy(&ida);
+       if (time(NULL) < s + 10)
+               goto repeat;
+}
+
+void ida_checks(void)
+{
+       DEFINE_IDA(ida);
+       int id;
+       unsigned long i;
+
+       radix_tree_cpu_dead(1);
+       ida_check_nomem();
+
+       for (i = 0; i < 10000; i++) {
+               assert(ida_pre_get(&ida, GFP_KERNEL));
+               assert(!ida_get_new(&ida, &id));
+               assert(id == i);
+       }
+
+       ida_remove(&ida, 20);
+       ida_remove(&ida, 21);
+       for (i = 0; i < 3; i++) {
+               assert(ida_pre_get(&ida, GFP_KERNEL));
+               assert(!ida_get_new(&ida, &id));
+               if (i == 2)
+                       assert(id == 10000);
+       }
+
+       for (i = 0; i < 5000; i++)
+               ida_remove(&ida, i);
+
+       assert(ida_pre_get(&ida, GFP_KERNEL));
+       assert(!ida_get_new_above(&ida, 5000, &id));
+       assert(id == 10001);
+
+       ida_destroy(&ida);
+
+       assert(ida_is_empty(&ida));
+
+       assert(ida_pre_get(&ida, GFP_KERNEL));
+       assert(!ida_get_new_above(&ida, 1, &id));
+       assert(id == 1);
+
+       ida_remove(&ida, id);
+       assert(ida_is_empty(&ida));
+       ida_destroy(&ida);
+       assert(ida_is_empty(&ida));
+
+       assert(ida_pre_get(&ida, GFP_KERNEL));
+       assert(!ida_get_new_above(&ida, 1, &id));
+       ida_destroy(&ida);
+       assert(ida_is_empty(&ida));
+
+       assert(ida_pre_get(&ida, GFP_KERNEL));
+       assert(!ida_get_new_above(&ida, 1, &id));
+       assert(id == 1);
+       assert(ida_pre_get(&ida, GFP_KERNEL));
+       assert(!ida_get_new_above(&ida, 1025, &id));
+       assert(id == 1025);
+       assert(ida_pre_get(&ida, GFP_KERNEL));
+       assert(!ida_get_new_above(&ida, 10000, &id));
+       assert(id == 10000);
+       ida_remove(&ida, 1025);
+       ida_destroy(&ida);
+       assert(ida_is_empty(&ida));
+
+       ida_check_leaf();
+       ida_check_max();
+       ida_check_conv();
+       ida_check_random();
+
+       radix_tree_cpu_dead(1);
+}
+
+int __weak main(void)
+{
+       radix_tree_init();
+       idr_checks();
+       ida_checks();
+       rcu_barrier();
+       if (nr_allocated)
+               printf("nr_allocated = %d\n", nr_allocated);
+       return 0;
+}
index 7572b7ed930ecccbf043b6f9989f64b6cd27d0e7..a92bab51370139f673001cef8bf193463498bd98 100644 (file)
@@ -177,7 +177,7 @@ void iteration_test(unsigned order, unsigned test_duration)
 {
        int i;
 
-       printf("Running %siteration tests for %d seconds\n",
+       printv(1, "Running %siteration tests for %d seconds\n",
                        order > 0 ? "multiorder " : "", test_duration);
 
        max_order = order;
index d31ea7c9abec8231b37e6e61efbd56e7e7f09706..cf48c8473f4869a8205a9eaa94b6f423079b9be5 100644 (file)
@@ -5,7 +5,7 @@
 #include <unistd.h>
 #include <assert.h>
 
-#include <linux/mempool.h>
+#include <linux/gfp.h>
 #include <linux/poison.h>
 #include <linux/slab.h>
 #include <linux/radix-tree.h>
@@ -13,6 +13,8 @@
 
 int nr_allocated;
 int preempt_count;
+int kmalloc_verbose;
+int test_verbose;
 
 struct kmem_cache {
        pthread_mutex_t lock;
@@ -22,27 +24,6 @@ struct kmem_cache {
        void (*ctor)(void *);
 };
 
-void *mempool_alloc(mempool_t *pool, int gfp_mask)
-{
-       return pool->alloc(gfp_mask, pool->data);
-}
-
-void mempool_free(void *element, mempool_t *pool)
-{
-       pool->free(element, pool->data);
-}
-
-mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-                       mempool_free_t *free_fn, void *pool_data)
-{
-       mempool_t *ret = malloc(sizeof(*ret));
-
-       ret->alloc = alloc_fn;
-       ret->free = free_fn;
-       ret->data = pool_data;
-       return ret;
-}
-
 void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 {
        struct radix_tree_node *node;
@@ -54,9 +35,9 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
        if (cachep->nr_objs) {
                cachep->nr_objs--;
                node = cachep->objs;
-               cachep->objs = node->private_data;
+               cachep->objs = node->parent;
                pthread_mutex_unlock(&cachep->lock);
-               node->private_data = NULL;
+               node->parent = NULL;
        } else {
                pthread_mutex_unlock(&cachep->lock);
                node = malloc(cachep->size);
@@ -65,6 +46,8 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
        }
 
        uatomic_inc(&nr_allocated);
+       if (kmalloc_verbose)
+               printf("Allocating %p from slab\n", node);
        return node;
 }
 
@@ -72,6 +55,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 {
        assert(objp);
        uatomic_dec(&nr_allocated);
+       if (kmalloc_verbose)
+               printf("Freeing %p to slab\n", objp);
        pthread_mutex_lock(&cachep->lock);
        if (cachep->nr_objs > 10) {
                memset(objp, POISON_FREE, cachep->size);
@@ -79,7 +64,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
        } else {
                struct radix_tree_node *node = objp;
                cachep->nr_objs++;
-               node->private_data = cachep->objs;
+               node->parent = cachep->objs;
                cachep->objs = node;
        }
        pthread_mutex_unlock(&cachep->lock);
@@ -89,6 +74,8 @@ void *kmalloc(size_t size, gfp_t gfp)
 {
        void *ret = malloc(size);
        uatomic_inc(&nr_allocated);
+       if (kmalloc_verbose)
+               printf("Allocating %p from malloc\n", ret);
        return ret;
 }
 
@@ -97,6 +84,8 @@ void kfree(void *p)
        if (!p)
                return;
        uatomic_dec(&nr_allocated);
+       if (kmalloc_verbose)
+               printf("Freeing %p to malloc\n", p);
        free(p);
 }
 
diff --git a/tools/testing/radix-tree/linux/bitops.h b/tools/testing/radix-tree/linux/bitops.h
deleted file mode 100644 (file)
index a13e9bc..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-
-#include <linux/types.h>
-#include <linux/bitops/find.h>
-#include <linux/bitops/hweight.h>
-#include <linux/kernel.h>
-
-#define BIT_MASK(nr)           (1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)           ((nr) / BITS_PER_LONG)
-#define BITS_PER_BYTE          8
-#define BITS_TO_LONGS(nr)      DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-       *p  |= mask;
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-       *p &= ~mask;
-}
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-       *p ^= mask;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-       unsigned long old = *p;
-
-       *p = old | mask;
-       return (old & mask) != 0;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-       unsigned long old = *p;
-
-       *p = old & ~mask;
-       return (old & mask) != 0;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr,
-                                           volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-       unsigned long old = *p;
-
-       *p = old ^ mask;
-       return (old & mask) != 0;
-}
-
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit(int nr, const volatile unsigned long *addr)
-{
-       return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-       int num = 0;
-
-       if ((word & 0xffffffff) == 0) {
-               num += 32;
-               word >>= 32;
-       }
-       if ((word & 0xffff) == 0) {
-               num += 16;
-               word >>= 16;
-       }
-       if ((word & 0xff) == 0) {
-               num += 8;
-               word >>= 8;
-       }
-       if ((word & 0xf) == 0) {
-               num += 4;
-               word >>= 4;
-       }
-       if ((word & 0x3) == 0) {
-               num += 2;
-               word >>= 2;
-       }
-       if ((word & 0x1) == 0)
-               num += 1;
-       return num;
-}
-
-unsigned long find_next_bit(const unsigned long *addr,
-                           unsigned long size,
-                           unsigned long offset);
-
-static inline unsigned long hweight_long(unsigned long w)
-{
-       return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
-}
-
-#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/__ffs.h b/tools/testing/radix-tree/linux/bitops/__ffs.h
deleted file mode 100644 (file)
index 9a3274a..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS___FFS_H_
-#define _ASM_GENERIC_BITOPS___FFS_H_
-
-#include <asm/types.h>
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-       int num = 0;
-
-#if BITS_PER_LONG == 64
-       if ((word & 0xffffffff) == 0) {
-               num += 32;
-               word >>= 32;
-       }
-#endif
-       if ((word & 0xffff) == 0) {
-               num += 16;
-               word >>= 16;
-       }
-       if ((word & 0xff) == 0) {
-               num += 8;
-               word >>= 8;
-       }
-       if ((word & 0xf) == 0) {
-               num += 4;
-               word >>= 4;
-       }
-       if ((word & 0x3) == 0) {
-               num += 2;
-               word >>= 2;
-       }
-       if ((word & 0x1) == 0)
-               num += 1;
-       return num;
-}
-
-#endif /* _ASM_GENERIC_BITOPS___FFS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/ffs.h b/tools/testing/radix-tree/linux/bitops/ffs.h
deleted file mode 100644 (file)
index fbbb43a..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FFS_H_
-#define _ASM_GENERIC_BITOPS_FFS_H_
-
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static inline int ffs(int x)
-{
-       int r = 1;
-
-       if (!x)
-               return 0;
-       if (!(x & 0xffff)) {
-               x >>= 16;
-               r += 16;
-       }
-       if (!(x & 0xff)) {
-               x >>= 8;
-               r += 8;
-       }
-       if (!(x & 0xf)) {
-               x >>= 4;
-               r += 4;
-       }
-       if (!(x & 3)) {
-               x >>= 2;
-               r += 2;
-       }
-       if (!(x & 1)) {
-               x >>= 1;
-               r += 1;
-       }
-       return r;
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FFS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/ffz.h b/tools/testing/radix-tree/linux/bitops/ffz.h
deleted file mode 100644 (file)
index 6744bd4..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FFZ_H_
-#define _ASM_GENERIC_BITOPS_FFZ_H_
-
-/*
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-#define ffz(x)  __ffs(~(x))
-
-#endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/find.h b/tools/testing/radix-tree/linux/bitops/find.h
deleted file mode 100644 (file)
index 72a51e5..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FIND_H_
-#define _ASM_GENERIC_BITOPS_FIND_H_
-
-extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
-               size, unsigned long offset);
-
-extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
-               long size, unsigned long offset);
-
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
-#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
-
-#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/fls.h b/tools/testing/radix-tree/linux/bitops/fls.h
deleted file mode 100644 (file)
index 850859b..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FLS_H_
-#define _ASM_GENERIC_BITOPS_FLS_H_
-
-/**
- * fls - find last (most-significant) bit set
- * @x: the word to search
- *
- * This is defined the same way as ffs.
- * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
- */
-
-static inline int fls(int x)
-{
-       int r = 32;
-
-       if (!x)
-               return 0;
-       if (!(x & 0xffff0000u)) {
-               x <<= 16;
-               r -= 16;
-       }
-       if (!(x & 0xff000000u)) {
-               x <<= 8;
-               r -= 8;
-       }
-       if (!(x & 0xf0000000u)) {
-               x <<= 4;
-               r -= 4;
-       }
-       if (!(x & 0xc0000000u)) {
-               x <<= 2;
-               r -= 2;
-       }
-       if (!(x & 0x80000000u)) {
-               x <<= 1;
-               r -= 1;
-       }
-       return r;
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/fls64.h b/tools/testing/radix-tree/linux/bitops/fls64.h
deleted file mode 100644 (file)
index 1b6b17c..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FLS64_H_
-#define _ASM_GENERIC_BITOPS_FLS64_H_
-
-#include <asm/types.h>
-
-static inline int fls64(__u64 x)
-{
-       __u32 h = x >> 32;
-       if (h)
-               return fls(h) + 32;
-       return fls(x);
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/hweight.h b/tools/testing/radix-tree/linux/bitops/hweight.h
deleted file mode 100644 (file)
index fbbc383..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
-#define _ASM_GENERIC_BITOPS_HWEIGHT_H_
-
-#include <asm/types.h>
-
-extern unsigned int hweight32(unsigned int w);
-extern unsigned int hweight16(unsigned int w);
-extern unsigned int hweight8(unsigned int w);
-extern unsigned long hweight64(__u64 w);
-
-#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/le.h b/tools/testing/radix-tree/linux/bitops/le.h
deleted file mode 100644 (file)
index b9c7e5d..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_LE_H_
-#define _ASM_GENERIC_BITOPS_LE_H_
-
-#include <asm/types.h>
-#include <asm/byteorder.h>
-
-#define BITOP_WORD(nr)         ((nr) / BITS_PER_LONG)
-#define BITOP_LE_SWIZZLE       ((BITS_PER_LONG-1) & ~0x7)
-
-#if defined(__LITTLE_ENDIAN)
-
-#define generic_test_le_bit(nr, addr) test_bit(nr, addr)
-#define generic___set_le_bit(nr, addr) __set_bit(nr, addr)
-#define generic___clear_le_bit(nr, addr) __clear_bit(nr, addr)
-
-#define generic_test_and_set_le_bit(nr, addr) test_and_set_bit(nr, addr)
-#define generic_test_and_clear_le_bit(nr, addr) test_and_clear_bit(nr, addr)
-
-#define generic___test_and_set_le_bit(nr, addr) __test_and_set_bit(nr, addr)
-#define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr)
-
-#define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset)
-
-#elif defined(__BIG_ENDIAN)
-
-#define generic_test_le_bit(nr, addr) \
-       test_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___set_le_bit(nr, addr) \
-       __set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___clear_le_bit(nr, addr) \
-       __clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-#define generic_test_and_set_le_bit(nr, addr) \
-       test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic_test_and_clear_le_bit(nr, addr) \
-       test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-#define generic___test_and_set_le_bit(nr, addr) \
-       __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___test_and_clear_le_bit(nr, addr) \
-       __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr,
-               unsigned long size, unsigned long offset);
-
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-
-#define generic_find_first_zero_le_bit(addr, size) \
-        generic_find_next_zero_le_bit((addr), (size), 0)
-
-#endif /* _ASM_GENERIC_BITOPS_LE_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/non-atomic.h b/tools/testing/radix-tree/linux/bitops/non-atomic.h
deleted file mode 100644 (file)
index 6a1bcb9..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-
-#include <asm/types.h>
-
-#define BITOP_WORD(nr)         ((nr) / BITS_PER_LONG)
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-       *p  |= mask;
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-       *p &= ~mask;
-}
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-       *p ^= mask;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-       unsigned long old = *p;
-
-       *p = old | mask;
-       return (old & mask) != 0;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-       unsigned long old = *p;
-
-       *p = old & ~mask;
-       return (old & mask) != 0;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr,
-                                           volatile unsigned long *addr)
-{
-       unsigned long mask = BIT_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-       unsigned long old = *p;
-
-       *p = old ^ mask;
-       return (old & mask) != 0;
-}
-
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit(int nr, const volatile unsigned long *addr)
-{
-       return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
-
-#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/testing/radix-tree/linux/export.h b/tools/testing/radix-tree/linux/export.h
deleted file mode 100644 (file)
index b6afd13..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-
-#define EXPORT_SYMBOL(sym)
index 5b09b2ce6c33ad0e732a70e65347258e00f5a741..39a0dcb9475a3c4e261db2b195b7691d0b39c491 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _GFP_H
 #define _GFP_H
 
+#include <linux/types.h>
+
 #define __GFP_BITS_SHIFT 26
 #define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 #define __GFP_DIRECT_RECLAIM   0x400000u
 #define __GFP_KSWAPD_RECLAIM   0x2000000u
 
-#define __GFP_RECLAIM          (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+#define __GFP_RECLAIM  (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+
+#define GFP_ATOMIC     (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL     (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT     (__GFP_KSWAPD_RECLAIM)
 
-#define GFP_ATOMIC             (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
-#define GFP_KERNEL             (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 
 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 {
diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/radix-tree/linux/idr.h
new file mode 100644 (file)
index 0000000..4e342f2
--- /dev/null
@@ -0,0 +1 @@
+#include "../../../../include/linux/idr.h"
index 360cabb3c4e7631e5c4f4cdd14747e776e132fd9..1bb0afc213099feb547052670d820eb9842e649b 100644 (file)
@@ -1 +1 @@
-/* An empty file stub that allows radix-tree.c to compile. */
+#define __init
index 9b43b4975d832b8c1b682fd5ef0ac0f4d5b5cf6d..b21a77fddcf7357cf4b9cffc7f1746e0e2053d05 100644 (file)
@@ -1,64 +1,21 @@
 #ifndef _KERNEL_H
 #define _KERNEL_H
 
-#include <assert.h>
+#include "../../include/linux/kernel.h"
 #include <string.h>
 #include <stdio.h>
-#include <stddef.h>
 #include <limits.h>
 
-#include "../../include/linux/compiler.h"
-#include "../../include/linux/err.h"
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
 #include "../../../include/linux/kconfig.h"
 
-#ifdef BENCHMARK
-#define RADIX_TREE_MAP_SHIFT   6
-#else
-#define RADIX_TREE_MAP_SHIFT   3
-#endif
-
-#ifndef NULL
-#define NULL   0
-#endif
-
-#define BUG_ON(expr)   assert(!(expr))
-#define WARN_ON(expr)  assert(!(expr))
-#define __init
-#define __must_check
-#define panic(expr)
 #define printk printf
-#define __force
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define pr_debug printk
-
-#define smp_rmb()      barrier()
-#define smp_wmb()      barrier()
-#define cpu_relax()    barrier()
+#define pr_cont printk
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
-#define container_of(ptr, type, member) ({                      \
-       const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
-       (type *)( (char *)__mptr - offsetof(type, member) );})
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
-#define cond_resched() sched_yield()
-
-static inline int in_interrupt(void)
-{
-       return 0;
-}
-
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
-#define round_down(x, y) ((x) & ~__round_mask(x, y))
-
-#define xchg(ptr, x)   uatomic_xchg(ptr, x)
-
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/mempool.h b/tools/testing/radix-tree/linux/mempool.h
deleted file mode 100644 (file)
index 6a2dc55..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-
-#include <linux/slab.h>
-
-typedef void *(mempool_alloc_t)(int gfp_mask, void *pool_data);
-typedef void (mempool_free_t)(void *element, void *pool_data);
-
-typedef struct {
-       mempool_alloc_t *alloc;
-       mempool_free_t *free;
-       void *data;
-} mempool_t;
-
-void *mempool_alloc(mempool_t *pool, int gfp_mask);
-void mempool_free(void *element, mempool_t *pool);
-mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-                       mempool_free_t *free_fn, void *pool_data);
index 5837f1d56f1788c7b113d9ffc89fe32a545c3317..3ea01a1a88c2b2adcbb475ca3f609586c2758831 100644 (file)
@@ -1,7 +1,10 @@
-
+#define DECLARE_PER_CPU(type, val) extern type val
 #define DEFINE_PER_CPU(type, val) type val
 
 #define __get_cpu_var(var)     var
 #define this_cpu_ptr(var)      var
+#define this_cpu_read(var)     var
+#define this_cpu_xchg(var, val)                uatomic_xchg(&var, val)
+#define this_cpu_cmpxchg(var, old, new)        uatomic_cmpxchg(&var, old, new)
 #define per_cpu_ptr(ptr, cpu)   ({ (void)(cpu); (ptr); })
 #define per_cpu(var, cpu)      (*per_cpu_ptr(&(var), cpu))
index 65c04c226965d9a0816f889103f42a24957f3fc8..35c5ac81529fa9d9f6c0ab526db47d793dd209f3 100644 (file)
@@ -1,4 +1,14 @@
+#ifndef __LINUX_PREEMPT_H
+#define __LINUX_PREEMPT_H
+
 extern int preempt_count;
 
 #define preempt_disable()      uatomic_inc(&preempt_count)
 #define preempt_enable()       uatomic_dec(&preempt_count)
+
+static inline int in_interrupt(void)
+{
+       return 0;
+}
+
+#endif /* __LINUX_PREEMPT_H */
index ce694ddd4aea7d42b0d0c88d6807a244678c150b..bf1bb231f9b5cd98a4c0451eaf1eb8744ee89cf5 100644 (file)
@@ -1 +1,26 @@
+#ifndef _TEST_RADIX_TREE_H
+#define _TEST_RADIX_TREE_H
+
+#include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
+
+extern int kmalloc_verbose;
+extern int test_verbose;
+
+static inline void trace_call_rcu(struct rcu_head *head,
+               void (*func)(struct rcu_head *head))
+{
+       if (kmalloc_verbose)
+               printf("Delaying free of %p to slab\n", (char *)head -
+                               offsetof(struct radix_tree_node, rcu_head));
+       call_rcu(head, func);
+}
+
+#define printv(verbosity_level, fmt, ...) \
+       if(test_verbose >= verbosity_level) \
+               printf(fmt, ##__VA_ARGS__)
+
+#undef call_rcu
+#define call_rcu(x, y) trace_call_rcu(x, y)
+
+#endif /* _TEST_RADIX_TREE_H */
diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h
deleted file mode 100644 (file)
index 8491d89..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _TYPES_H
-#define _TYPES_H
-
-#include "../../include/linux/types.h"
-
-#define __rcu
-#define __read_mostly
-
-static inline void INIT_LIST_HEAD(struct list_head *list)
-{
-       list->next = list;
-       list->prev = list;
-}
-
-typedef struct {
-       unsigned int x;
-} spinlock_t;
-
-#define uninitialized_var(x) x = x
-
-#include <linux/gfp.h>
-
-#endif
index f7e9801a6754f884fe86ff932e9a38ab29b18747..b829127d56705747a0a74c73e8b11b8cae8ecc27 100644 (file)
@@ -3,6 +3,7 @@
 #include <unistd.h>
 #include <time.h>
 #include <assert.h>
+#include <limits.h>
 
 #include <linux/slab.h>
 #include <linux/radix-tree.h>
@@ -67,7 +68,7 @@ void big_gang_check(bool long_run)
 
        for (i = 0; i < (long_run ? 1000 : 3); i++) {
                __big_gang_check();
-               printf("%d ", i);
+               printv(2, "%d ", i);
                fflush(stdout);
        }
 }
@@ -128,14 +129,19 @@ void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsign
                        putchar('.'); */
                if (idx[i] < start || idx[i] > end) {
                        if (item_tag_get(tree, idx[i], totag)) {
-                               printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag));
+                               printv(2, "%lu-%lu: %lu, tags %d-%d\n", start,
+                                      end, idx[i], item_tag_get(tree, idx[i],
+                                                                fromtag),
+                                      item_tag_get(tree, idx[i], totag));
                        }
                        assert(!item_tag_get(tree, idx[i], totag));
                        continue;
                }
                if (item_tag_get(tree, idx[i], fromtag) ^
                        item_tag_get(tree, idx[i], totag)) {
-                       printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag));
+                       printv(2, "%lu-%lu: %lu, tags %d-%d\n", start, end,
+                              idx[i], item_tag_get(tree, idx[i], fromtag),
+                              item_tag_get(tree, idx[i], totag));
                }
                assert(!(item_tag_get(tree, idx[i], fromtag) ^
                         item_tag_get(tree, idx[i], totag)));
@@ -237,7 +243,7 @@ static void __locate_check(struct radix_tree_root *tree, unsigned long index,
        item = item_lookup(tree, index);
        index2 = find_item(tree, item);
        if (index != index2) {
-               printf("index %ld order %d inserted; found %ld\n",
+               printv(2, "index %ld order %d inserted; found %ld\n",
                        index, order, index2);
                abort();
        }
@@ -288,43 +294,48 @@ static void single_thread_tests(bool long_run)
 {
        int i;
 
-       printf("starting single_thread_tests: %d allocated, preempt %d\n",
+       printv(1, "starting single_thread_tests: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        multiorder_checks();
        rcu_barrier();
-       printf("after multiorder_check: %d allocated, preempt %d\n",
+       printv(2, "after multiorder_check: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        locate_check();
        rcu_barrier();
-       printf("after locate_check: %d allocated, preempt %d\n",
+       printv(2, "after locate_check: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        tag_check();
        rcu_barrier();
-       printf("after tag_check: %d allocated, preempt %d\n",
+       printv(2, "after tag_check: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        gang_check();
        rcu_barrier();
-       printf("after gang_check: %d allocated, preempt %d\n",
+       printv(2, "after gang_check: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        add_and_check();
        rcu_barrier();
-       printf("after add_and_check: %d allocated, preempt %d\n",
+       printv(2, "after add_and_check: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        dynamic_height_check();
        rcu_barrier();
-       printf("after dynamic_height_check: %d allocated, preempt %d\n",
+       printv(2, "after dynamic_height_check: %d allocated, preempt %d\n",
+               nr_allocated, preempt_count);
+       idr_checks();
+       ida_checks();
+       rcu_barrier();
+       printv(2, "after idr_checks: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        big_gang_check(long_run);
        rcu_barrier();
-       printf("after big_gang_check: %d allocated, preempt %d\n",
+       printv(2, "after big_gang_check: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        for (i = 0; i < (long_run ? 2000 : 3); i++) {
                copy_tag_check();
-               printf("%d ", i);
+               printv(2, "%d ", i);
                fflush(stdout);
        }
        rcu_barrier();
-       printf("after copy_tag_check: %d allocated, preempt %d\n",
+       printv(2, "after copy_tag_check: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
 }
 
@@ -334,24 +345,28 @@ int main(int argc, char **argv)
        int opt;
        unsigned int seed = time(NULL);
 
-       while ((opt = getopt(argc, argv, "ls:")) != -1) {
+       while ((opt = getopt(argc, argv, "ls:v")) != -1) {
                if (opt == 'l')
                        long_run = true;
                else if (opt == 's')
                        seed = strtoul(optarg, NULL, 0);
+               else if (opt == 'v')
+                       test_verbose++;
        }
 
        printf("random seed %u\n", seed);
        srand(seed);
 
+       printf("running tests\n");
+
        rcu_register_thread();
        radix_tree_init();
 
        regression1_test();
        regression2_test();
        regression3_test();
-       iteration_test(0, 10);
-       iteration_test(7, 20);
+       iteration_test(0, 10 + 90 * long_run);
+       iteration_test(7, 10 + 90 * long_run);
        single_thread_tests(long_run);
 
        /* Free any remaining preallocated nodes */
@@ -360,9 +375,11 @@ int main(int argc, char **argv)
        benchmark();
 
        rcu_barrier();
-       printf("after rcu_barrier: %d allocated, preempt %d\n",
+       printv(2, "after rcu_barrier: %d allocated, preempt %d\n",
                nr_allocated, preempt_count);
        rcu_unregister_thread();
 
+       printf("tests completed\n");
+
        exit(0);
 }
index f79812a5e0708b9146c2dcb479d804637262b1b8..06c71178d07d98ab03bbf1575a3d98fa5299f7b1 100644 (file)
@@ -30,7 +30,7 @@ static void __multiorder_tag_test(int index, int order)
        /* our canonical entry */
        base = index & ~((1 << order) - 1);
 
-       printf("Multiorder tag test with index %d, canonical entry %d\n",
+       printv(2, "Multiorder tag test with index %d, canonical entry %d\n",
                        index, base);
 
        err = item_insert_order(&tree, index, order);
@@ -150,7 +150,7 @@ static void multiorder_check(unsigned long index, int order)
        struct item *item2 = item_create(min, order);
        RADIX_TREE(tree, GFP_KERNEL);
 
-       printf("Multiorder index %ld, order %d\n", index, order);
+       printv(2, "Multiorder index %ld, order %d\n", index, order);
 
        assert(item_insert_order(&tree, index, order) == 0);
 
@@ -188,7 +188,7 @@ static void multiorder_shrink(unsigned long index, int order)
        RADIX_TREE(tree, GFP_KERNEL);
        struct radix_tree_node *node;
 
-       printf("Multiorder shrink index %ld, order %d\n", index, order);
+       printv(2, "Multiorder shrink index %ld, order %d\n", index, order);
 
        assert(item_insert_order(&tree, 0, order) == 0);
 
@@ -209,7 +209,8 @@ static void multiorder_shrink(unsigned long index, int order)
                item_check_absent(&tree, i);
 
        if (!item_delete(&tree, 0)) {
-               printf("failed to delete index %ld (order %d)\n", index, order);                abort();
+               printv(2, "failed to delete index %ld (order %d)\n", index, order);
+               abort();
        }
 
        for (i = 0; i < 2*max; i++)
@@ -234,7 +235,7 @@ void multiorder_iteration(void)
        void **slot;
        int i, j, err;
 
-       printf("Multiorder iteration test\n");
+       printv(1, "Multiorder iteration test\n");
 
 #define NUM_ENTRIES 11
        int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
@@ -275,7 +276,7 @@ void multiorder_tagged_iteration(void)
        void **slot;
        int i, j;
 
-       printf("Multiorder tagged iteration test\n");
+       printv(1, "Multiorder tagged iteration test\n");
 
 #define MT_NUM_ENTRIES 9
        int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
@@ -355,6 +356,10 @@ void multiorder_tagged_iteration(void)
        item_kill_tree(&tree);
 }
 
+/*
+ * Basic join checks: make sure we can't find an entry in the tree after
+ * a larger entry has replaced it
+ */
 static void multiorder_join1(unsigned long index,
                                unsigned order1, unsigned order2)
 {
@@ -373,6 +378,10 @@ static void multiorder_join1(unsigned long index,
        item_kill_tree(&tree);
 }
 
+/*
+ * Check that the accounting of exceptional entries is handled correctly
+ * by joining an exceptional entry to a normal pointer.
+ */
 static void multiorder_join2(unsigned order1, unsigned order2)
 {
        RADIX_TREE(tree, GFP_KERNEL);
@@ -386,6 +395,9 @@ static void multiorder_join2(unsigned order1, unsigned order2)
        assert(item2 == (void *)0x12UL);
        assert(node->exceptional == 1);
 
+       item2 = radix_tree_lookup(&tree, 0);
+       free(item2);
+
        radix_tree_join(&tree, 0, order1, item1);
        item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
        assert(item2 == item1);
@@ -453,7 +465,7 @@ static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
 {
        struct radix_tree_preload *rtp = &radix_tree_preloads;
        if (rtp->nr != 0)
-               printf("split(%u %u) remaining %u\n", old_order, new_order,
+               printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
                                                        rtp->nr);
        /*
         * Can't check for equality here as some nodes may have been
@@ -461,7 +473,7 @@ static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
         * nodes allocated since they should have all been preloaded.
         */
        if (nr_allocated > alloc)
-               printf("split(%u %u) allocated %u %u\n", old_order, new_order,
+               printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
                                                        alloc, nr_allocated);
 }
 
@@ -471,6 +483,7 @@ static void __multiorder_split(int old_order, int new_order)
        void **slot;
        struct radix_tree_iter iter;
        unsigned alloc;
+       struct item *item;
 
        radix_tree_preload(GFP_KERNEL);
        assert(item_insert_order(&tree, 0, old_order) == 0);
@@ -479,7 +492,7 @@ static void __multiorder_split(int old_order, int new_order)
        /* Wipe out the preloaded cache or it'll confuse check_mem() */
        radix_tree_cpu_dead(0);
 
-       radix_tree_tag_set(&tree, 0, 2);
+       item = radix_tree_tag_set(&tree, 0, 2);
 
        radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
        alloc = nr_allocated;
@@ -492,6 +505,7 @@ static void __multiorder_split(int old_order, int new_order)
        radix_tree_preload_end();
 
        item_kill_tree(&tree);
+       free(item);
 }
 
 static void __multiorder_split2(int old_order, int new_order)
@@ -633,3 +647,10 @@ void multiorder_checks(void)
 
        radix_tree_cpu_dead(0);
 }
+
+int __weak main(void)
+{
+       radix_tree_init();
+       multiorder_checks();
+       return 0;
+}
index 0d6813a61b37f904b436e74448c910a350c10bb3..bf97742fc18c1cc62e923acc448a5db64b5578fb 100644 (file)
@@ -193,7 +193,7 @@ void regression1_test(void)
        long arg;
 
        /* Regression #1 */
-       printf("running regression test 1, should finish in under a minute\n");
+       printv(1, "running regression test 1, should finish in under a minute\n");
        nr_threads = 2;
        pthread_barrier_init(&worker_barrier, NULL, nr_threads);
 
@@ -216,5 +216,5 @@ void regression1_test(void)
 
        free(threads);
 
-       printf("regression test 1, done\n");
+       printv(1, "regression test 1, done\n");
 }
index a41325d7a170eff26cca5f789f718492209eb1d5..42dd2a33ed249080560d18ad99585553da843a29 100644 (file)
@@ -80,7 +80,7 @@ void regression2_test(void)
        unsigned long int start, end;
        struct page *pages[1];
 
-       printf("running regression test 2 (should take milliseconds)\n");
+       printv(1, "running regression test 2 (should take milliseconds)\n");
        /* 0. */
        for (i = 0; i <= max_slots - 1; i++) {
                p = page_alloc();
@@ -103,7 +103,7 @@ void regression2_test(void)
 
        /* 4. */
        for (i = max_slots - 1; i >= 0; i--)
-               radix_tree_delete(&mt_tree, i);
+               free(radix_tree_delete(&mt_tree, i));
 
        /* 5. */
        // NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot
@@ -114,7 +114,9 @@ void regression2_test(void)
                PAGECACHE_TAG_TOWRITE);
 
        /* We remove all the remained nodes */
-       radix_tree_delete(&mt_tree, max_slots);
+       free(radix_tree_delete(&mt_tree, max_slots));
 
-       printf("regression test 2, done\n");
+       BUG_ON(!radix_tree_empty(&mt_tree));
+
+       printv(1, "regression test 2, done\n");
 }
index b594841fae8594c52a530b39cca7c2bdc772fb20..670c3d2ae7b189f9405d1ac536013a32ba9db188 100644 (file)
@@ -34,21 +34,21 @@ void regression3_test(void)
        void **slot;
        bool first;
 
-       printf("running regression test 3 (should take milliseconds)\n");
+       printv(1, "running regression test 3 (should take milliseconds)\n");
 
        radix_tree_insert(&root, 0, ptr0);
        radix_tree_tag_set(&root, 0, 0);
 
        first = true;
        radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
-               printf("tagged %ld %p\n", iter.index, *slot);
+               printv(2, "tagged %ld %p\n", iter.index, *slot);
                if (first) {
                        radix_tree_insert(&root, 1, ptr);
                        radix_tree_tag_set(&root, 1, 0);
                        first = false;
                }
                if (radix_tree_deref_retry(*slot)) {
-                       printf("retry at %ld\n", iter.index);
+                       printv(2, "retry at %ld\n", iter.index);
                        slot = radix_tree_iter_retry(&iter);
                        continue;
                }
@@ -57,13 +57,13 @@ void regression3_test(void)
 
        first = true;
        radix_tree_for_each_slot(slot, &root, &iter, 0) {
-               printf("slot %ld %p\n", iter.index, *slot);
+               printv(2, "slot %ld %p\n", iter.index, *slot);
                if (first) {
                        radix_tree_insert(&root, 1, ptr);
                        first = false;
                }
                if (radix_tree_deref_retry(*slot)) {
-                       printk("retry at %ld\n", iter.index);
+                       printv(2, "retry at %ld\n", iter.index);
                        slot = radix_tree_iter_retry(&iter);
                        continue;
                }
@@ -72,30 +72,30 @@ void regression3_test(void)
 
        first = true;
        radix_tree_for_each_contig(slot, &root, &iter, 0) {
-               printk("contig %ld %p\n", iter.index, *slot);
+               printv(2, "contig %ld %p\n", iter.index, *slot);
                if (first) {
                        radix_tree_insert(&root, 1, ptr);
                        first = false;
                }
                if (radix_tree_deref_retry(*slot)) {
-                       printk("retry at %ld\n", iter.index);
+                       printv(2, "retry at %ld\n", iter.index);
                        slot = radix_tree_iter_retry(&iter);
                        continue;
                }
        }
 
        radix_tree_for_each_slot(slot, &root, &iter, 0) {
-               printf("slot %ld %p\n", iter.index, *slot);
+               printv(2, "slot %ld %p\n", iter.index, *slot);
                if (!iter.index) {
-                       printf("next at %ld\n", iter.index);
+                       printv(2, "next at %ld\n", iter.index);
                        slot = radix_tree_iter_resume(slot, &iter);
                }
        }
 
        radix_tree_for_each_contig(slot, &root, &iter, 0) {
-               printf("contig %ld %p\n", iter.index, *slot);
+               printv(2, "contig %ld %p\n", iter.index, *slot);
                if (!iter.index) {
-                       printf("next at %ld\n", iter.index);
+                       printv(2, "next at %ld\n", iter.index);
                        slot = radix_tree_iter_resume(slot, &iter);
                }
        }
@@ -103,9 +103,9 @@ void regression3_test(void)
        radix_tree_tag_set(&root, 0, 0);
        radix_tree_tag_set(&root, 1, 0);
        radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
-               printf("tagged %ld %p\n", iter.index, *slot);
+               printv(2, "tagged %ld %p\n", iter.index, *slot);
                if (!iter.index) {
-                       printf("next at %ld\n", iter.index);
+                       printv(2, "next at %ld\n", iter.index);
                        slot = radix_tree_iter_resume(slot, &iter);
                }
        }
@@ -113,5 +113,5 @@ void regression3_test(void)
        radix_tree_delete(&root, 0);
        radix_tree_delete(&root, 1);
 
-       printf("regression test 3 passed\n");
+       printv(1, "regression test 3 passed\n");
 }
index fd98c132207aa353f4acce5413b4bf7c23c1cfda..d4ff009892456a3b588df788488027da45209ecf 100644 (file)
@@ -49,10 +49,10 @@ void simple_checks(void)
        }
        verify_tag_consistency(&tree, 0);
        verify_tag_consistency(&tree, 1);
-       printf("before item_kill_tree: %d allocated\n", nr_allocated);
+       printv(2, "before item_kill_tree: %d allocated\n", nr_allocated);
        item_kill_tree(&tree);
        rcu_barrier();
-       printf("after item_kill_tree: %d allocated\n", nr_allocated);
+       printv(2, "after item_kill_tree: %d allocated\n", nr_allocated);
 }
 
 /*
@@ -257,7 +257,7 @@ static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag)
 
                gang_check(tree, thrash_state, tag);
 
-               printf("%d(%d) %d(%d) %d(%d) %d(%d) / "
+               printv(2, "%d(%d) %d(%d) %d(%d) %d(%d) / "
                                "%d(%d) present, %d(%d) tagged\n",
                        insert_chunk, nr_inserted,
                        delete_chunk, nr_deleted,
@@ -296,13 +296,13 @@ static void __leak_check(void)
 {
        RADIX_TREE(tree, GFP_KERNEL);
 
-       printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+       printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
        item_insert(&tree, 1000000);
-       printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+       printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
        item_delete(&tree, 1000000);
-       printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+       printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
        item_kill_tree(&tree);
-       printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+       printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 }
 
 static void single_check(void)
@@ -336,15 +336,15 @@ void tag_check(void)
        extend_checks();
        contract_checks();
        rcu_barrier();
-       printf("after extend_checks: %d allocated\n", nr_allocated);
+       printv(2, "after extend_checks: %d allocated\n", nr_allocated);
        __leak_check();
        leak_check();
        rcu_barrier();
-       printf("after leak_check: %d allocated\n", nr_allocated);
+       printv(2, "after leak_check: %d allocated\n", nr_allocated);
        simple_checks();
        rcu_barrier();
-       printf("after simple_checks: %d allocated\n", nr_allocated);
+       printv(2, "after simple_checks: %d allocated\n", nr_allocated);
        thrash_tags();
        rcu_barrier();
-       printf("after thrash_tags: %d allocated\n", nr_allocated);
+       printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
 }
index e5726e3736468a7003f2b9529a36d84c659ace58..1a257d738a1e4e618563de835d55fc847c5daac1 100644 (file)
@@ -29,15 +29,28 @@ int __item_insert(struct radix_tree_root *root, struct item *item)
        return __radix_tree_insert(root, item->index, item->order, item);
 }
 
-int item_insert(struct radix_tree_root *root, unsigned long index)
+struct item *item_create(unsigned long index, unsigned int order)
 {
-       return __item_insert(root, item_create(index, 0));
+       struct item *ret = malloc(sizeof(*ret));
+
+       ret->index = index;
+       ret->order = order;
+       return ret;
 }
 
 int item_insert_order(struct radix_tree_root *root, unsigned long index,
                        unsigned order)
 {
-       return __item_insert(root, item_create(index, order));
+       struct item *item = item_create(index, order);
+       int err = __item_insert(root, item);
+       if (err)
+               free(item);
+       return err;
+}
+
+int item_insert(struct radix_tree_root *root, unsigned long index)
+{
+       return item_insert_order(root, index, 0);
 }
 
 void item_sanity(struct item *item, unsigned long index)
@@ -61,15 +74,6 @@ int item_delete(struct radix_tree_root *root, unsigned long index)
        return 0;
 }
 
-struct item *item_create(unsigned long index, unsigned int order)
-{
-       struct item *ret = malloc(sizeof(*ret));
-
-       ret->index = index;
-       ret->order = order;
-       return ret;
-}
-
 void item_check_present(struct radix_tree_root *root, unsigned long index)
 {
        struct item *item;
index 056a23b56467ca54b07b5104da54eb4c5083e984..b30e11d9d271c39ccb284019938876ae2785f7ca 100644 (file)
@@ -34,6 +34,8 @@ void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(unsigned order, unsigned duration);
 void benchmark(void);
+void idr_checks(void);
+void ida_checks(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
index 831022b12848de68695065040b38bd7ec55f7577..e8b79a7b50bd52322a60541146f39ebbe4a36057 100644 (file)
@@ -1,6 +1,7 @@
 TARGETS =  bpf
 TARGETS += breakpoints
 TARGETS += capabilities
+TARGETS += cpufreq
 TARGETS += cpu-hotplug
 TARGETS += efivarfs
 TARGETS += exec
@@ -8,6 +9,7 @@ TARGETS += firmware
 TARGETS += ftrace
 TARGETS += futex
 TARGETS += gpio
+TARGETS += intel_pstate
 TARGETS += ipc
 TARGETS += kcmp
 TARGETS += lib
@@ -49,29 +51,44 @@ override LDFLAGS =
 override MAKEFLAGS =
 endif
 
+BUILD := $(O)
+ifndef BUILD
+  BUILD := $(KBUILD_OUTPUT)
+endif
+ifndef BUILD
+  BUILD := $(shell pwd)
+endif
+
+export BUILD
 all:
-       for TARGET in $(TARGETS); do \
-               make -C $$TARGET; \
+       for TARGET in $(TARGETS); do            \
+               BUILD_TARGET=$$BUILD/$$TARGET;  \
+               mkdir $$BUILD_TARGET  -p;       \
+               make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
        done;
 
 run_tests: all
        for TARGET in $(TARGETS); do \
-               make -C $$TARGET run_tests; \
+               BUILD_TARGET=$$BUILD/$$TARGET;  \
+               make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
        done;
 
 hotplug:
        for TARGET in $(TARGETS_HOTPLUG); do \
-               make -C $$TARGET; \
+               BUILD_TARGET=$$BUILD/$$TARGET;  \
+               make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
        done;
 
 run_hotplug: hotplug
        for TARGET in $(TARGETS_HOTPLUG); do \
-               make -C $$TARGET run_full_test; \
+               BUILD_TARGET=$$BUILD/$$TARGET;  \
+               make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
        done;
 
 clean_hotplug:
        for TARGET in $(TARGETS_HOTPLUG); do \
-               make -C $$TARGET clean; \
+               BUILD_TARGET=$$BUILD/$$TARGET;  \
+               make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
        done;
 
 run_pstore_crash:
@@ -86,7 +103,8 @@ ifdef INSTALL_PATH
        @# Ask all targets to install their files
        mkdir -p $(INSTALL_PATH)
        for TARGET in $(TARGETS); do \
-               make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+               BUILD_TARGET=$$BUILD/$$TARGET;  \
+               make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
        done;
 
        @# Ask all targets to emit their test scripts
@@ -95,10 +113,11 @@ ifdef INSTALL_PATH
        echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
 
        for TARGET in $(TARGETS); do \
+               BUILD_TARGET=$$BUILD/$$TARGET;  \
                echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
                echo "echo ========================================" >> $(ALL_SCRIPT); \
                echo "cd $$TARGET" >> $(ALL_SCRIPT); \
-               make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+               make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
                echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
        done;
 
@@ -109,7 +128,8 @@ endif
 
 clean:
        for TARGET in $(TARGETS); do \
-               make -C $$TARGET clean; \
+               BUILD_TARGET=$$BUILD/$$TARGET;  \
+               make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
        done;
 
 .PHONY: install
index c7816fe60feb92b2e53ab7f1c1e2a560de34c866..4b498265dae6dc3b52b35818453f2c895809b323 100644 (file)
@@ -3,15 +3,12 @@ BPFOBJ := $(LIBDIR)/bpf/bpf.o
 
 CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
 
-test_objs = test_verifier test_tag test_maps test_lru_map test_lpm_map
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
-TEST_PROGS := $(test_objs) test_kmod.sh
-TEST_FILES := $(test_objs)
+TEST_PROGS := test_kmod.sh
 
 .PHONY: all clean force
 
-all: $(test_objs)
-
 # force a rebuild of BPFOBJ when its dependencies are updated
 force:
 
@@ -21,6 +18,3 @@ $(BPFOBJ): force
 $(test_objs): $(BPFOBJ)
 
 include ../lib.mk
-
-clean:
-       $(RM) $(test_objs)
index 61b79e8df1f4587c655fd3c440575241196e3a92..72aa103e4141d3b252c00a8bcf680edc4cd2dfbb 100644 (file)
@@ -3,17 +3,13 @@ uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
 ifeq ($(ARCH),x86)
-TEST_PROGS := breakpoint_test
+TEST_GEN_PROGS := breakpoint_test
 endif
 ifeq ($(ARCH),aarch64)
-TEST_PROGS := breakpoint_test_arm64
+TEST_GEN_PROGS := breakpoint_test_arm64
 endif
 
-TEST_PROGS += step_after_suspend_test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS += step_after_suspend_test
 
 include ../lib.mk
 
-clean:
-       rm -fr breakpoint_test breakpoint_test_arm64 step_after_suspend_test
index 008602aed9209237254ca334a0a745f06c0888c4..29b8adfdac717d76cd32def4aaf61e94744b733f 100644 (file)
@@ -1,15 +1,8 @@
-TEST_FILES := validate_cap
-TEST_PROGS := test_execve
-
-BINARIES := $(TEST_FILES) $(TEST_PROGS)
+TEST_GEN_FILES := validate_cap
+TEST_GEN_PROGS := test_execve
 
 CFLAGS += -O2 -g -std=gnu99 -Wall
 LDLIBS += -lcap-ng -lrt -ldl
 
-all: $(BINARIES)
-
-clean:
-       $(RM) $(BINARIES)
-
 include ../lib.mk
 
diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
new file mode 100644 (file)
index 0000000..3955cd9
--- /dev/null
@@ -0,0 +1,8 @@
+all:
+
+TEST_PROGS := main.sh
+TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh
+
+include ../lib.mk
+
+clean:
diff --git a/tools/testing/selftests/cpufreq/cpu.sh b/tools/testing/selftests/cpufreq/cpu.sh
new file mode 100755 (executable)
index 0000000..8e08a83
--- /dev/null
@@ -0,0 +1,84 @@
+#!/bin/bash
+#
+# CPU helpers
+
+# protect against multiple inclusion
+if [ $FILE_CPU ]; then
+       return 0
+else
+       FILE_CPU=DONE
+fi
+
+source cpufreq.sh
+
+for_each_cpu()
+{
+       cpus=$(ls $CPUROOT | grep "cpu[0-9].*")
+       for cpu in $cpus; do
+               $@ $cpu
+       done
+}
+
+for_each_non_boot_cpu()
+{
+       cpus=$(ls $CPUROOT | grep "cpu[1-9].*")
+       for cpu in $cpus; do
+               $@ $cpu
+       done
+}
+
+#$1: cpu
+offline_cpu()
+{
+       printf "Offline $1\n"
+       echo 0 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+online_cpu()
+{
+       printf "Online $1\n"
+       echo 1 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+reboot_cpu()
+{
+       offline_cpu $1
+       online_cpu $1
+}
+
+# Reboot CPUs
+# param: number of times we want to run the loop
+reboot_cpus()
+{
+       printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+       for i in `seq 1 $1`; do
+               for_each_non_boot_cpu offline_cpu
+               for_each_non_boot_cpu online_cpu
+               printf "\n"
+       done
+
+       printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Prints warning for all CPUs with missing cpufreq directory
+print_unmanaged_cpus()
+{
+       for_each_cpu cpu_should_have_cpufreq_directory
+}
+
+# Counts CPUs with cpufreq directories
+count_cpufreq_managed_cpus()
+{
+       count=0;
+
+       for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do
+               if [ -d $CPUROOT/$cpu/cpufreq ]; then
+                       let count=count+1;
+               fi
+       done
+
+       echo $count;
+}
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
new file mode 100755 (executable)
index 0000000..1ed3832
--- /dev/null
@@ -0,0 +1,241 @@
+#!/bin/bash
+
+# protect against multiple inclusion
+if [ $FILE_CPUFREQ ]; then
+       return 0
+else
+       FILE_CPUFREQ=DONE
+fi
+
+source cpu.sh
+
+
+# $1: cpu
+cpu_should_have_cpufreq_directory()
+{
+       if [ ! -d $CPUROOT/$1/cpufreq ]; then
+               printf "Warning: No cpufreq directory present for $1\n"
+       fi
+}
+
+cpu_should_not_have_cpufreq_directory()
+{
+       if [ -d $CPUROOT/$1/cpufreq ]; then
+               printf "Warning: cpufreq directory present for $1\n"
+       fi
+}
+
+for_each_policy()
+{
+       policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+       for policy in $policies; do
+               $@ $policy
+       done
+}
+
+for_each_policy_concurrent()
+{
+       policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+       for policy in $policies; do
+               $@ $policy &
+       done
+}
+
+# $1: Path
+read_cpufreq_files_in_dir()
+{
+       local files=`ls $1`
+
+       printf "Printing directory: $1\n\n"
+
+       for file in $files; do
+               if [ -f $1/$file ]; then
+                       printf "$file:"
+                       cat $1/$file
+               else
+                       printf "\n"
+                       read_cpufreq_files_in_dir "$1/$file"
+               fi
+       done
+       printf "\n"
+}
+
+
+read_all_cpufreq_files()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+       read_cpufreq_files_in_dir $CPUFREQROOT
+
+       printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# UPDATE CPUFREQ FILES
+
+# $1: directory path
+update_cpufreq_files_in_dir()
+{
+       local files=`ls $1`
+
+       printf "Updating directory: $1\n\n"
+
+       for file in $files; do
+               if [ -f $1/$file ]; then
+                       # file is writable ?
+                       local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+
+                       if [ ! -z $wfile ]; then
+                               # scaling_setspeed is a special file and we
+                               # should skip updating it
+                               if [ $file != "scaling_setspeed" ]; then
+                                       local val=$(cat $1/$file)
+                                       printf "Writing $val to: $file\n"
+                                       echo $val > $1/$file
+                               fi
+                       fi
+               else
+                       printf "\n"
+                       update_cpufreq_files_in_dir "$1/$file"
+               fi
+       done
+
+       printf "\n"
+}
+
+# Update all writable files with their existing values
+update_all_cpufreq_files()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+       update_cpufreq_files_in_dir $CPUFREQROOT
+
+       printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# CHANGE CPU FREQUENCIES
+
+# $1: policy
+find_current_freq()
+{
+       cat $CPUFREQROOT/$1/scaling_cur_freq
+}
+
+# $1: policy
+# $2: frequency
+set_cpu_frequency()
+{
+       printf "Change frequency for $1 to $2\n"
+       echo $2 > $CPUFREQROOT/$1/scaling_setspeed
+}
+
+# $1: policy
+test_all_frequencies()
+{
+       local filepath="$CPUFREQROOT/$1"
+
+       backup_governor $1
+
+       local found=$(switch_governor $1 "userspace")
+       if [ $found = 1 ]; then
+               printf "${FUNCNAME[0]}: userspace governor not available for: $1\n"
+               return;
+       fi
+
+       printf "Switched governor for $1 to userspace\n\n"
+
+       local freqs=$(cat $filepath/scaling_available_frequencies)
+       printf "Available frequencies for $1: $freqs\n\n"
+
+       # Set all frequencies one-by-one
+       for freq in $freqs; do
+               set_cpu_frequency $1 $freq
+       done
+
+       printf "\n"
+
+       restore_governor $1
+}
+
+# $1: loop count
+shuffle_frequency_for_all_cpus()
+{
+       printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+       for i in `seq 1 $1`; do
+               for_each_policy test_all_frequencies
+       done
+       printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Basic cpufreq tests
+cpufreq_basic_tests()
+{
+       printf "*** RUNNING CPUFREQ SANITY TESTS ***\n"
+       printf "====================================\n\n"
+
+       count=$(count_cpufreq_managed_cpus)
+       if [ $count = 0 ]; then
+               printf "No cpu is managed by cpufreq core, exiting\n"
+               exit;
+       else
+               printf "CPUFreq manages: $count CPUs\n\n"
+       fi
+
+       # Detect & print which CPUs are not managed by cpufreq
+       print_unmanaged_cpus
+
+       # read/update all cpufreq files
+       read_all_cpufreq_files
+       update_all_cpufreq_files
+
+       # hotplug cpus
+       reboot_cpus 5
+
+       # Test all frequencies
+       shuffle_frequency_for_all_cpus 2
+
+       # Test all governors
+       shuffle_governors_for_all_cpus 1
+}
+
+# Suspend/resume
+# $1: "suspend" or "hibernate", $2: loop count
+do_suspend()
+{
+       printf "** Test: Running ${FUNCNAME[0]}: Trying $1 for $2 loops **\n\n"
+
+       # Is the directory available
+       if [ ! -d $SYSFS/power/ -o ! -f $SYSFS/power/state ]; then
+               printf "$SYSFS/power/state not available\n"
+               return 1
+       fi
+
+       if [ $1 = "suspend" ]; then
+               filename="mem"
+       elif [ $1 = "hibernate" ]; then
+               filename="disk"
+       else
+               printf "$1 is not a valid option\n"
+               return 1
+       fi
+
+       if [ -n $filename ]; then
+               present=$(cat $SYSFS/power/state | grep $filename)
+
+               if [ -z "$present" ]; then
+                       printf "Tried to $1 but $filename isn't present in $SYSFS/power/state\n"
+                       return 1;
+               fi
+
+               for i in `seq 1 $2`; do
+                       printf "Starting $1\n"
+                       echo $filename > $SYSFS/power/state
+                       printf "Came out of $1\n"
+
+                       printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
+                       cpufreq_basic_tests
+               done
+       fi
+}
diff --git a/tools/testing/selftests/cpufreq/governor.sh b/tools/testing/selftests/cpufreq/governor.sh
new file mode 100755 (executable)
index 0000000..def6451
--- /dev/null
@@ -0,0 +1,153 @@
+#!/bin/bash
+#
+# Test governors
+
+# protect against multiple inclusion
+if [ $FILE_GOVERNOR ]; then
+       return 0
+else
+       FILE_GOVERNOR=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+
+CUR_GOV=
+CUR_FREQ=
+
+# Find governor's directory path
+# $1: policy, $2: governor
+find_gov_directory()
+{
+       if [ -d $CPUFREQROOT/$2 ]; then
+               printf "$CPUFREQROOT/$2\n"
+       elif [ -d $CPUFREQROOT/$1/$2 ]; then
+               printf "$CPUFREQROOT/$1/$2\n"
+       else
+               printf "INVALID\n"
+       fi
+}
+
+# $1: policy
+find_current_governor()
+{
+       cat $CPUFREQROOT/$1/scaling_governor
+}
+
+# $1: policy
+backup_governor()
+{
+       CUR_GOV=$(find_current_governor $1)
+
+       printf "Governor backup done for $1: $CUR_GOV\n"
+
+       if [ $CUR_GOV == "userspace" ]; then
+               CUR_FREQ=$(find_current_freq $1)
+               printf "Governor frequency backup done for $1: $CUR_FREQ\n"
+       fi
+
+       printf "\n"
+}
+
+# $1: policy
+restore_governor()
+{
+       __switch_governor $1 $CUR_GOV
+
+       printf "Governor restored for $1 to $CUR_GOV\n"
+
+       if [ $CUR_GOV == "userspace" ]; then
+               set_cpu_frequency $1 $CUR_FREQ
+               printf "Governor frequency restored for $1: $CUR_FREQ\n"
+       fi
+
+       printf "\n"
+}
+
+# param:
+# $1: policy, $2: governor
+__switch_governor()
+{
+       echo $2 > $CPUFREQROOT/$1/scaling_governor
+}
+
+# param:
+# $1: cpu, $2: governor
+__switch_governor_for_cpu()
+{
+       echo $2 > $CPUROOT/$1/cpufreq/scaling_governor
+}
+
+# SWITCH GOVERNORS
+
+# $1: cpu, $2: governor
+switch_governor()
+{
+       local filepath=$CPUFREQROOT/$1/scaling_available_governors
+
+       # check if governor is available
+       local found=$(cat $filepath | grep $2 | wc -l)
+       if [ $found = 0 ]; then
+               echo 1;
+               return
+       fi
+
+       __switch_governor $1 $2
+       echo 0;
+}
+
+# $1: policy, $2: governor
+switch_show_governor()
+{
+       cur_gov=find_current_governor
+       if [ $cur_gov == "userspace" ]; then
+               cur_freq=find_current_freq
+       fi
+
+       # switch governor
+       __switch_governor $1 $2
+
+       printf "\nSwitched governor for $1 to $2\n\n"
+
+       if [ $2 == "userspace" -o $2 == "powersave" -o $2 == "performance" ]; then
+               printf "No files to read for $2 governor\n\n"
+               return
+       fi
+
+       # show governor files
+       local govpath=$(find_gov_directory $1 $2)
+       read_cpufreq_files_in_dir $govpath
+}
+
+# $1: function to be called, $2: policy
+call_for_each_governor()
+{
+       local filepath=$CPUFREQROOT/$2/scaling_available_governors
+
+       # Exit if cpu isn't managed by cpufreq core
+       if [ ! -f $filepath ]; then
+               return;
+       fi
+
+       backup_governor $2
+
+       local governors=$(cat $filepath)
+       printf "Available governors for $2: $governors\n"
+
+       for governor in $governors; do
+               $1 $2 $governor
+       done
+
+       restore_governor $2
+}
+
+# $1: loop count
+shuffle_governors_for_all_cpus()
+{
+       printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+       for i in `seq 1 $1`; do
+               for_each_policy call_for_each_governor switch_show_governor
+       done
+       printf "%s\n\n" "------------------------------------------------"
+}
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
new file mode 100755 (executable)
index 0000000..01bac76
--- /dev/null
@@ -0,0 +1,194 @@
+#!/bin/bash
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+source module.sh
+source special-tests.sh
+
+FUNC=basic     # do basic tests by default
+OUTFILE=cpufreq_selftest
+SYSFS=
+CPUROOT=
+CPUFREQROOT=
+
+helpme()
+{
+       printf "Usage: $0 [-h] [-todg args]
+       [-h <help>]
+       [-o <output-file-for-dump>]
+       [-t <basic: Basic cpufreq testing
+            suspend: suspend/resume,
+            hibernate: hibernate/resume,
+            modtest: test driver or governor modules. Only to be used with -d or -g options,
+            sptest1: Simple governor switch to produce lockdep.
+            sptest2: Concurrent governor switch to produce lockdep.
+            sptest3: Governor races, shuffle between governors quickly.
+            sptest4: CPU hotplugs with updates to cpufreq files.>]
+       [-d <driver's module name: only with \"-t modtest>\"]
+       [-g <governor's module name: only with \"-t modtest>\"]
+       \n"
+       exit 2
+}
+
+prerequisite()
+{
+       msg="skip all tests:"
+
+       if [ $UID != 0 ]; then
+               echo $msg must be run as root >&2
+               exit 2
+       fi
+
+       taskset -p 01 $$
+
+       SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+       if [ ! -d "$SYSFS" ]; then
+               echo $msg sysfs is not mounted >&2
+               exit 2
+       fi
+
+       CPUROOT=$SYSFS/devices/system/cpu
+       CPUFREQROOT="$CPUROOT/cpufreq"
+
+       if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
+               echo $msg cpus not available in sysfs >&2
+               exit 2
+       fi
+
+       if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
+               echo $msg cpufreq directory not available in sysfs >&2
+               exit 2
+       fi
+}
+
+parse_arguments()
+{
+       while getopts ht:o:d:g: arg
+       do
+               case $arg in
+                       h) # --help
+                               helpme
+                               ;;
+
+                       t) # --func_type (Function to perform: basic, suspend, hibernate, modtest, sptest1/2/3/4 (default: basic))
+                               FUNC=$OPTARG
+                               ;;
+
+                       o) # --output-file (Output file to store dumps)
+                               OUTFILE=$OPTARG
+                               ;;
+
+                       d) # --driver-mod-name (Name of the driver module)
+                               DRIVER_MOD=$OPTARG
+                               ;;
+
+                       g) # --governor-mod-name (Name of the governor module)
+                               GOVERNOR_MOD=$OPTARG
+                               ;;
+
+                       \?)
+                               helpme
+                               ;;
+               esac
+       done
+}
+
+do_test()
+{
+       # Check if CPUs are managed by cpufreq or not
+       count=$(count_cpufreq_managed_cpus)
+
+       if [ $count = 0 -a $FUNC != "modtest" ]; then
+               echo "No cpu is managed by cpufreq core, exiting"
+               exit 2;
+       fi
+
+       case "$FUNC" in
+               "basic")
+               cpufreq_basic_tests
+               ;;
+
+               "suspend")
+               do_suspend "suspend" 1
+               ;;
+
+               "hibernate")
+               do_suspend "hibernate" 1
+               ;;
+
+               "modtest")
+               # Do we have modules in place?
+               if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
+                       echo "No driver or governor module passed with -d or -g"
+                       exit 2;
+               fi
+
+               if [ $DRIVER_MOD ]; then
+                       if [ $GOVERNOR_MOD ]; then
+                               module_test $DRIVER_MOD $GOVERNOR_MOD
+                       else
+                               module_driver_test $DRIVER_MOD
+                       fi
+               else
+                       if [ $count = 0 ]; then
+                               echo "No cpu is managed by cpufreq core, exiting"
+                               exit 2;
+                       fi
+
+                       module_governor_test $GOVERNOR_MOD
+               fi
+               ;;
+
+               "sptest1")
+               simple_lockdep
+               ;;
+
+               "sptest2")
+               concurrent_lockdep
+               ;;
+
+               "sptest3")
+               governor_race
+               ;;
+
+               "sptest4")
+               hotplug_with_updates
+               ;;
+
+               *)
+               echo "Invalid [-f] function type"
+               helpme
+               ;;
+       esac
+}
+
+# clear dumps
+# $1: file name
+clear_dumps()
+{
+       echo "" > $1.txt
+       echo "" > $1.dmesg_cpufreq.txt
+       echo "" > $1.dmesg_full.txt
+}
+
+# $1: output file name
+dmesg_dumps()
+{
+       dmesg | grep cpufreq >> $1.dmesg_cpufreq.txt
+
+       # We may need the full logs as well
+       dmesg >> $1.dmesg_full.txt
+}
+
+# Parse arguments
+parse_arguments $@
+
+# Make sure all requirements are met
+prerequisite
+
+# Run requested functions
+clear_dumps $OUTFILE
+do_test >> $OUTFILE.txt
+dmesg_dumps $OUTFILE
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
new file mode 100755 (executable)
index 0000000..8ff2244
--- /dev/null
@@ -0,0 +1,243 @@
+#!/bin/bash
+#
+# Modules specific tests cases
+
+# protect against multiple inclusion
+if [ $FILE_MODULE ]; then
+       return 0
+else
+       FILE_MODULE=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Check basic insmod/rmmod
+# $1: module
+test_basic_insmod_rmmod()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+       printf "Inserting $1 module\n"
+       # insert module
+       insmod $1
+       if [ $? != 0 ]; then
+               printf "Insmod $1 failed\n"
+               exit;
+       fi
+
+       printf "Removing $1 module\n"
+       # remove module
+       rmmod $1
+       if [ $? != 0 ]; then
+               printf "rmmod $1 failed\n"
+               exit;
+       fi
+
+       printf "\n"
+}
+
+# Insert cpufreq driver module and perform basic tests
+# $1: cpufreq-driver module to insert
+# $2: If we want to play with CPUs (1) or not (0)
+module_driver_test_single()
+{
+       printf "** Test: Running ${FUNCNAME[0]} for driver $1 and cpus_hotplug=$2 **\n\n"
+
+       if [ $2 -eq 1 ]; then
+               # offline all non-boot CPUs
+               for_each_non_boot_cpu offline_cpu
+               printf "\n"
+       fi
+
+       # insert module
+       printf "Inserting $1 module\n\n"
+       insmod $1
+       if [ $? != 0 ]; then
+               printf "Insmod $1 failed\n"
+               return;
+       fi
+
+       if [ $2 -eq 1 ]; then
+               # online all non-boot CPUs
+               for_each_non_boot_cpu online_cpu
+               printf "\n"
+       fi
+
+       # run basic tests
+       cpufreq_basic_tests
+
+       # remove module
+       printf "Removing $1 module\n\n"
+       rmmod $1
+       if [ $? != 0 ]; then
+               printf "rmmod $1 failed\n"
+               return;
+       fi
+
+       # There shouldn't be any cpufreq directories now.
+       for_each_cpu cpu_should_not_have_cpufreq_directory
+       printf "\n"
+}
+
+# $1: cpufreq-driver module to insert
+module_driver_test()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+       # check if module is present or not
+       ls $1 > /dev/null
+       if [ $? != 0 ]; then
+               printf "$1: not present in `pwd` folder\n"
+               return;
+       fi
+
+       # test basic module tests
+       test_basic_insmod_rmmod $1
+
+       # Do simple module test
+       module_driver_test_single $1 0
+
+       # Remove CPUs before inserting module and then bring them back
+       module_driver_test_single $1 1
+       printf "\n"
+}
+
+# find governor name based on governor module name
+# $1: governor module name
+find_gov_name()
+{
+       if [ $1 = "cpufreq_ondemand.ko" ]; then
+               printf "ondemand"
+       elif [ $1 = "cpufreq_conservative.ko" ]; then
+               printf "conservative"
+       elif [ $1 = "cpufreq_userspace.ko" ]; then
+               printf "userspace"
+       elif [ $1 = "cpufreq_performance.ko" ]; then
+               printf "performance"
+       elif [ $1 = "cpufreq_powersave.ko" ]; then
+               printf "powersave"
+       elif [ $1 = "cpufreq_schedutil.ko" ]; then
+               printf "schedutil"
+       fi
+}
+
+# $1: governor string, $2: governor module, $3: policy
+# example: module_governor_test_single "ondemand" "cpufreq_ondemand.ko" 2
+module_governor_test_single()
+{
+       printf "** Test: Running ${FUNCNAME[0]} for $3 **\n\n"
+
+       backup_governor $3
+
+       # switch to new governor
+       printf "Switch from $CUR_GOV to $1\n"
+       switch_show_governor $3 $1
+
+       # try removing module, it should fail as governor is used
+       printf "Removing $2 module\n\n"
+       rmmod $2
+       if [ $? = 0 ]; then
+               printf "WARN: rmmod $2 succeeded even if governor is used\n"
+               insmod $2
+       else
+               printf "Pass: unable to remove $2 while it is being used\n\n"
+       fi
+
+       # switch back to old governor
+       printf "Switchback to $CUR_GOV from $1\n"
+       restore_governor $3
+       printf "\n"
+}
+
+# Insert cpufreq governor module and perform basic tests
+# $1: cpufreq-governor module to insert
+module_governor_test()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+       # check if module is present or not
+       ls $1 > /dev/null
+       if [ $? != 0 ]; then
+               printf "$1: not present in `pwd` folder\n"
+               return;
+       fi
+
+       # test basic module tests
+       test_basic_insmod_rmmod $1
+
+       # insert module
+       printf "Inserting $1 module\n\n"
+       insmod $1
+       if [ $? != 0 ]; then
+               printf "Insmod $1 failed\n"
+               return;
+       fi
+
+       # switch to new governor for each cpu
+       for_each_policy module_governor_test_single $(find_gov_name $1) $1
+
+       # remove module
+       printf "Removing $1 module\n\n"
+       rmmod $1
+       if [ $? != 0 ]; then
+               printf "rmmod $1 failed\n"
+               return;
+       fi
+       printf "\n"
+}
+
+# test modules: driver and governor
+# $1: driver module, $2: governor module
+module_test()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+       # check if modules are present or not
+       ls $1 $2 > /dev/null
+       if [ $? != 0 ]; then
+               printf "$1 or $2: is not present in `pwd` folder\n"
+               return;
+       fi
+
+       # TEST1: Insert gov after driver
+       # insert driver module
+       printf "Inserting $1 module\n\n"
+       insmod $1
+       if [ $? != 0 ]; then
+               printf "Insmod $1 failed\n"
+               return;
+       fi
+
+       # run governor tests
+       module_governor_test $2
+
+       # remove driver module
+       printf "Removing $1 module\n\n"
+       rmmod $1
+       if [ $? != 0 ]; then
+               printf "rmmod $1 failed\n"
+               return;
+       fi
+
+       # TEST2: Insert driver after governor
+       # insert governor module
+       printf "Inserting $2 module\n\n"
+       insmod $2
+       if [ $? != 0 ]; then
+               printf "Insmod $2 failed\n"
+               return;
+       fi
+
+       # run governor tests
+       module_driver_test $1
+
+       # remove driver module
+       printf "Removing $2 module\n\n"
+       rmmod $2
+       if [ $? != 0 ]; then
+               printf "rmmod $2 failed\n"
+               return;
+       fi
+}
diff --git a/tools/testing/selftests/cpufreq/special-tests.sh b/tools/testing/selftests/cpufreq/special-tests.sh
new file mode 100755 (executable)
index 0000000..58b730f
--- /dev/null
@@ -0,0 +1,115 @@
+#!/bin/bash
+#
+# Special test cases reported by people
+
+# Testcase 1: Reported here: http://marc.info/?l=linux-pm&m=140618592709858&w=2
+
+# protect against multiple inclusion
+if [ $FILE_SPECIAL ]; then
+       return 0
+else
+       FILE_SPECIAL=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Test 1
+# $1: policy
+__simple_lockdep()
+{
+       # switch to ondemand
+       __switch_governor $1 "ondemand"
+
+       # cat ondemand files
+       local ondir=$(find_gov_directory $1 "ondemand")
+       if [ -z $ondir ]; then
+               printf "${FUNCNAME[0]}Ondemand directory not created, quit"
+               return
+       fi
+
+       cat $ondir/*
+
+       # switch to conservative
+       __switch_governor $1 "conservative"
+}
+
+simple_lockdep()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+       for_each_policy __simple_lockdep
+}
+
+# Test 2
+# $1: policy
+__concurrent_lockdep()
+{
+       for i in `seq 0 100`; do
+               __simple_lockdep $1
+       done
+}
+
+concurrent_lockdep()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+       for_each_policy_concurrent __concurrent_lockdep
+}
+
+# Test 3
+quick_shuffle()
+{
+       # this is called concurrently from governor_race
+       for I in `seq 1000`
+       do
+               echo ondemand | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+               echo userspace | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+       done
+}
+
+governor_race()
+{
+       printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+       # run 8 concurrent instances
+       for I in `seq 8`
+       do
+               quick_shuffle &
+       done
+}
+
+# Test 4
+# $1: cpu
+hotplug_with_updates_cpu()
+{
+       local filepath="$CPUROOT/$1/cpufreq"
+
+       # switch to ondemand
+       __switch_governor_for_cpu $1 "ondemand"
+
+       for i in `seq 1 5000`
+       do
+               reboot_cpu $1
+       done &
+
+       local freqs=$(cat $filepath/scaling_available_frequencies)
+       local oldfreq=$(cat $filepath/scaling_min_freq)
+
+       for j in `seq 1 5000`
+       do
+               # Set all frequencies one-by-one
+               for freq in $freqs; do
+                       echo $freq > $filepath/scaling_min_freq
+               done
+       done
+
+       # restore old freq
+       echo $oldfreq > $filepath/scaling_min_freq
+}
+
+hotplug_with_updates()
+{
+       for_each_non_boot_cpu hotplug_with_updates_cpu
+}
index 736c3ddfc787499a3a114ab87ad173054e5468c2..c49dcea69319a36e98eabb9a44bdc5b75ff94575 100644 (file)
@@ -1,13 +1,7 @@
 CFLAGS = -Wall
 
-test_objs = open-unlink create-read
-
-all: $(test_objs)
-
+TEST_GEN_FILES := open-unlink create-read
 TEST_PROGS := efivarfs.sh
-TEST_FILES := $(test_objs)
 
 include ../lib.mk
 
-clean:
-       rm -f $(test_objs)
index d4300602bf376a5f6e87b1eb17620d3652dd9795..2e13035dff7f6f4bfcf28d59e38bd1d7231bacce 100644 (file)
@@ -1,27 +1,23 @@
 CFLAGS = -Wall
-BINARIES = execveat
-DEPS = execveat.symlink execveat.denatured script subdir
-all: $(BINARIES) $(DEPS)
 
-subdir:
+TEST_GEN_PROGS := execveat
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+# Makefile is a run-time dependency, since it's accessed by the execveat test
+TEST_FILES := Makefile
+
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
+
+include ../lib.mk
+
+$(OUTPUT)/subdir:
        mkdir -p $@
-script:
+$(OUTPUT)/script:
        echo '#!/bin/sh' > $@
        echo 'exit $$*' >> $@
        chmod +x $@
-execveat.symlink: execveat
-       ln -s -f $< $@
-execveat.denatured: execveat
+$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
+       cd $(OUTPUT) && ln -s -f $(shell basename $<) $(shell basename $@)
+$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
        cp $< $@
        chmod -x $@
-%: %.c
-       $(CC) $(CFLAGS) -o $@ $^
-
-TEST_PROGS := execveat
-# Makefile is a run-time dependency, since it's accessed by the execveat test
-TEST_FILES := $(DEPS) Makefile
-
-include ../lib.mk
 
-clean:
-       rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx*
index 4e6ed13e7f66150de10a8f645eff508fc0565cff..a8a5e21850e740a099228963db130961e542b838 100644 (file)
@@ -1,9 +1,7 @@
 all:
 
 TEST_PROGS := ftracetest
-TEST_DIRS := test.d
+TEST_FILES := test.d
+EXTRA_CLEAN := $(OUTPUT)/logs/*
 
 include ../lib.mk
-
-clean:
-       rm -rf logs/*
index 6a1752956283cbef8f70ae83c103fa73a1dae4ab..653c5cd9e44d6bc4fba89b79ae8d1d707993e542 100644 (file)
@@ -3,13 +3,18 @@ SUBDIRS := functional
 TEST_PROGS := run.sh
 
 .PHONY: all clean
-all:
-       for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
 
 include ../lib.mk
 
+all:
+       for DIR in $(SUBDIRS); do               \
+               BUILD_TARGET=$$OUTPUT/$$DIR;    \
+               mkdir $$BUILD_TARGET  -p;       \
+               make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+       done
+
 override define RUN_TESTS
-       ./run.sh
+       @if [ `dirname $(OUTPUT)` = $(PWD) ]; then ./run.sh; fi
 endef
 
 override define INSTALL_RULE
@@ -17,7 +22,9 @@ override define INSTALL_RULE
        install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
 
        @for SUBDIR in $(SUBDIRS); do \
-               $(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+               BUILD_TARGET=$$OUTPUT/$$SUBDIR; \
+               mkdir $$BUILD_TARGET  -p;       \
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
        done;
 endef
 
@@ -26,4 +33,8 @@ override define EMIT_TESTS
 endef
 
 clean:
-       for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
+       for DIR in $(SUBDIRS); do               \
+               BUILD_TARGET=$$OUTPUT/$$DIR;    \
+               mkdir $$BUILD_TARGET  -p;       \
+               make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+       done
index 9d6b75ef7b5d679a698862b2406b619e633e8672..a648e7a6cbc3d3d5fc3ea4a1d254e9246950721e 100644 (file)
@@ -2,8 +2,11 @@ INCLUDES := -I../include -I../../
 CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
 LDFLAGS := $(LDFLAGS) -pthread -lrt
 
-HEADERS := ../include/futextest.h
-TARGETS := \
+HEADERS := \
+       ../include/futextest.h \
+       ../include/atomic.h \
+       ../include/logging.h
+TEST_GEN_FILES := \
        futex_wait_timeout \
        futex_wait_wouldblock \
        futex_requeue_pi \
@@ -12,14 +15,8 @@ TARGETS := \
        futex_wait_uninitialized_heap \
        futex_wait_private_mapped_file
 
-TEST_PROGS := $(TARGETS) run.sh
-
-.PHONY: all clean
-all: $(TARGETS)
-
-$(TARGETS): $(HEADERS)
+TEST_PROGS := run.sh
 
 include ../../lib.mk
 
-clean:
-       rm -f $(TARGETS)
+$(TEST_GEN_FILES): $(HEADERS)
index 014aa01197af5084c097de3e5a425894ad630140..e14469103f073d071f5e4b43c845440865f72287 100644 (file)
@@ -21,6 +21,7 @@
 #ifndef _LOGGING_H
 #define _LOGGING_H
 
+#include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 #include <linux/futex.h>
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
new file mode 100644 (file)
index 0000000..7d14f74
--- /dev/null
@@ -0,0 +1 @@
+gpio-mockup-chardev
index f5f1a28715ffbf75fc098c9f194fe8e0a53eca57..19678e90efb25d1231e6bdfd97f8b6ea99a6f6bf 100644 (file)
@@ -1,15 +1,10 @@
-CC := $(CROSS_COMPILE)gcc
 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
 LDFLAGS := $(LDFLAGS) -lm
 
-TARGETS := msr aperf
+TEST_GEN_FILES := msr aperf
 
-TEST_PROGS := $(TARGETS) run.sh
+TEST_PROGS := run.sh
 
-.PHONY: all clean
-all: $(TARGETS)
+include ../lib.mk
 
-$(TARGETS): $(HEADERS)
-
-clean:
-       rm -f $(TARGETS)
+$(TEST_GEN_FILES): $(HEADERS)
index 6046e183f4add3276adbbf4722f65435548f8a96..cd72f3dc83e9dec7ba51c38c674447c0174ff5b4 100644 (file)
@@ -14,7 +14,7 @@ void usage(char *name) {
 }
 
 int main(int argc, char **argv) {
-       int i, cpu, fd;
+       unsigned int i, cpu, fd;
        char msr_file_name[64];
        long long tsc, old_tsc, new_tsc;
        long long aperf, old_aperf, new_aperf;
index 84b66a3c1f740cc846e2ab3755eaf26ec148ab86..9af04c9353c0f89f966b7b642d5d586633ce8f3e 100644 (file)
@@ -1 +1,2 @@
 msgque_test
+msgque
index 25d2e702c68a53f7021d47404fd59d69241f430c..30ef4c7f53eaa3d29baaa47b69fd9090e328b8a9 100644 (file)
@@ -11,12 +11,7 @@ endif
 
 CFLAGS += -I../../../../usr/include/
 
-all:
-       $(CC) $(CFLAGS) msgque.c -o msgque_test
-
-TEST_PROGS := msgque_test
+TEST_GEN_PROGS := msgque
 
 include ../lib.mk
 
-clean:
-       rm -fr ./msgque_test
index 2ae7450a9a8984e2f0ba86a96842010375076757..47aa9887f9d4bfcf53907cafff36b947469cbb80 100644 (file)
@@ -1,10 +1,8 @@
 CFLAGS += -I../../../../usr/include/
 
-all: kcmp_test
+TEST_GEN_PROGS := kcmp_test
 
-TEST_PROGS := kcmp_test
+EXTRA_CLEAN := $(OUTPUT)/kcmp-test-file
 
 include ../lib.mk
 
-clean:
-       $(RM) kcmp_test kcmp-test-file
index 50a93f5f13d64d5b0f9b4851d03f955da9b6e1d9..ce96d80ad64f4da000ecec590365582d399fadf3 100644 (file)
@@ -2,9 +2,15 @@
 # Makefile can operate with or without the kbuild infrastructure.
 CC := $(CROSS_COMPILE)gcc
 
+TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
+
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+
 define RUN_TESTS
-       @for TEST in $(TEST_PROGS); do \
-               (./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \
+       @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+               BASENAME_TEST=`basename $$TEST`;        \
+               cd `dirname $$TEST`; (./$$BASENAME_TEST && echo "selftests: $$BASENAME_TEST [PASS]") || echo "selftests:  $$BASENAME_TEST [FAIL]"; cd -;\
        done;
 endef
 
@@ -14,8 +20,13 @@ run_tests: all
 define INSTALL_RULE
        @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then                                        \
                mkdir -p ${INSTALL_PATH};                                                                               \
-               echo "rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";       \
-               rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;              \
+               echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";    \
+               rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;           \
+       fi
+       @if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then                                    \
+               mkdir -p ${INSTALL_PATH};                                                                               \
+               echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";        \
+               rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;               \
        fi
 endef
 
@@ -27,12 +38,25 @@ else
 endif
 
 define EMIT_TESTS
-       @for TEST in $(TEST_PROGS); do \
-               echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \
+       @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+               BASENAME_TEST=`basename $$TEST`;        \
+               echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
        done;
 endef
 
 emit_tests:
        $(EMIT_TESTS)
 
+clean:
+       $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+
+$(OUTPUT)/%:%.c
+       $(LINK.c) $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/%.o:%.S
+       $(COMPILE.S) $^ -o $@
+
+$(OUTPUT)/%:%.S
+       $(LINK.S) $^ $(LDLIBS) -o $@
+
 .PHONY: run_tests all clean install emit_tests
index a1a97085847d2e173f836864f35dfccc38229c2a..02845532b0592315399ed384c98efb25a79c1034 100644 (file)
@@ -1,10 +1,6 @@
 CFLAGS += -g -I../../../../usr/include/
 
-TEST_PROGS := membarrier_test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := membarrier_test
 
 include ../lib.mk
 
-clean:
-       $(RM) $(TEST_PROGS)
index fd396ac811b6a73f086e605ef93e0d2c92401ad6..79891d033de162cdcc921f3de7802738fa0e0a50 100644 (file)
@@ -1,22 +1,13 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS += -D_FILE_OFFSET_BITS=64
 CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
-TEST_PROGS := memfd_test
-
-all: $(TEST_PROGS)
-
-include ../lib.mk
-
-build_fuse: fuse_mnt fuse_test
+TEST_PROGS := run_fuse_test.sh
+TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
 fuse_mnt: LDFLAGS += $(shell pkg-config fuse --libs)
 
-run_fuse: build_fuse
-       @./run_fuse_test.sh || echo "fuse_test: [FAIL]"
+include ../lib.mk
 
-clean:
-       $(RM) memfd_test fuse_test
index 5e35c9c50b72bbebcfb9393928bc10a03fb44479..9093d7ffe87f158e2f1f72d68e791e23afdcac40 100644 (file)
@@ -1,14 +1,11 @@
 # Makefile for mount selftests.
 CFLAGS = -Wall \
          -O2
-all: unprivileged-remount-test
 
-unprivileged-remount-test: unprivileged-remount-test.c
-       $(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test
+TEST_GEN_PROGS := unprivileged-remount-test
 
 include ../lib.mk
 
-TEST_PROGS := unprivileged-remount-test
 override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
                      then      \
                                ./unprivileged-remount-test ; \
@@ -17,5 +14,3 @@ override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
                      fi
 override EMIT_TESTS := echo "$(RUN_TESTS)"
 
-clean:
-       rm -f unprivileged-remount-test
index eebac29acbd91fc6d10b6258f006834834e4b243..79a664aeb8d76509a2f8e46aadf742571765a26a 100644 (file)
@@ -1,8 +1,6 @@
 CFLAGS += -O2
 LDLIBS = -lrt -lpthread -lpopt
-TEST_PROGS := mq_open_tests mq_perf_tests
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := mq_open_tests mq_perf_tests
 
 include ../lib.mk
 
@@ -16,5 +14,3 @@ override define EMIT_TESTS
        echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
 endef
 
-clean:
-       rm -f mq_open_tests mq_perf_tests
index e24e4c82542e076856c0b99463e9e8cbb1363211..fbfe5d0d5c2e05028e6af86d8db76d95e41c4fc5 100644 (file)
@@ -3,20 +3,13 @@
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
-NET_PROGS =  socket
-NET_PROGS += psock_fanout psock_tpacket
-NET_PROGS += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-NET_PROGS += reuseport_dualstack
-
-all: $(NET_PROGS)
 reuseport_bpf_numa: LDFLAGS += -lnuma
-%: %.c
-       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh
-TEST_FILES := $(NET_PROGS)
+TEST_GEN_FILES =  socket
+TEST_GEN_FILES += psock_fanout psock_tpacket
+TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_FILES += reuseport_dualstack
 
 include ../lib.mk
 
-clean:
-       $(RM) $(NET_PROGS)
index 2306054a901a4059102eecf7d47bbab6fe1de630..9ff7c7f80625c18b97edc0854193c6ab48f936a4 100644 (file)
@@ -1,12 +1,5 @@
-TEST_PROGS := owner pidns
+TEST_GEN_PROGS := owner pidns
 
 CFLAGS := -Wall -Werror
 
-all: owner pidns
-owner: owner.c
-pidns: pidns.c
-
-clean:
-       $(RM) owner pidns
-
 include ../lib.mk
index c2c4211ba58b83facb996ceab3ecac7f9077a31e..1c5d0575802e47113b49b4b0facfd7bff47a6620 100644 (file)
@@ -34,31 +34,35 @@ endif
 all: $(SUB_DIRS)
 
 $(SUB_DIRS):
-       $(MAKE) -k -C $@ all
+       BUILD_TARGET=$$OUTPUT/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
 
 include ../lib.mk
 
 override define RUN_TESTS
        @for TARGET in $(SUB_DIRS); do \
-               $(MAKE) -C $$TARGET run_tests; \
+               BUILD_TARGET=$$OUTPUT/$$TARGET; \
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
        done;
 endef
 
 override define INSTALL_RULE
        @for TARGET in $(SUB_DIRS); do \
-               $(MAKE) -C $$TARGET install; \
+               BUILD_TARGET=$$OUTPUT/$$TARGET; \
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
        done;
 endef
 
 override define EMIT_TESTS
        @for TARGET in $(SUB_DIRS); do \
-               $(MAKE) -s -C $$TARGET emit_tests; \
+               BUILD_TARGET=$$OUTPUT/$$TARGET; \
+               $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
        done;
 endef
 
 clean:
        @for TARGET in $(SUB_DIRS); do \
-               $(MAKE) -C $$TARGET clean; \
+               BUILD_TARGET=$$OUTPUT/$$TARGET; \
+               $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
        done;
        rm -f tags
 
index ad6a4e49da916e0da10d68b0311efb2c7d2d95b8..16b22004e75f4526e3b527f410fa78a9ce4bcc44 100644 (file)
@@ -1,10 +1,5 @@
-TEST_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c copy_paste_unaligned_common.c
+TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned
 
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS)
+$(TEST_GEN_PROGS): ../harness.c ../utils.c copy_paste_unaligned_common.c
index 545077f98f72d72ea5fce37892812295c0ebe32b..fb96a89bd953bf471fce6c6766ab00e0664ddfbe 100644 (file)
@@ -1,16 +1,11 @@
-TEST_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
+TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
 
 CFLAGS += -O2
 
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
-
-context_switch: ../utils.c
-context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
-context_switch: LDLIBS += -lpthread
-
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c
+
+$(OUTPUT)/context_switch: ../utils.c
+$(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
+$(OUTPUT)/context_switch: LDLIBS += -lpthread
index e164d14664661a72b43f4d11df41c08d5c5b54f2..e9351bb4285d086c67677c47193d9a3be8251b56 100644 (file)
@@ -1,10 +1,5 @@
-TEST_PROGS := cp_abort
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c
+TEST_GEN_PROGS := cp_abort
 
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS)
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
index 384843ea0d40b66f06e96fde05a7a2b6abb53751..681ab19d0a8486b5dafe7b25e5148e4c4d4e0e0b 100644 (file)
@@ -7,19 +7,14 @@ CFLAGS += -maltivec
 # Use our CFLAGS for the implicit .S rule
 ASFLAGS = $(CFLAGS)
 
-TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
+TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
 EXTRA_SOURCES := validate.c ../harness.c
 
-all: $(TEST_PROGS)
-
-copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
-copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
-memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
-memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
-
-$(TEST_PROGS): $(EXTRA_SOURCES)
-
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(OUTPUT)/copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
+$(OUTPUT)/copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
+$(OUTPUT)/memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
+$(OUTPUT)/memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
+
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
index 49327ee84e3a32fc26415afe04f8075e52a8872e..c5639deb8887ed49acd3e3238c0384a903b6fbf2 100644 (file)
@@ -1,14 +1,9 @@
-TEST_PROGS := dscr_default_test dscr_explicit_test dscr_user_test      \
+TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test  \
              dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test  \
              dscr_sysfs_thread_test
 
-dscr_default_test: LDLIBS += -lpthread
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
-
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
+
+$(TEST_GEN_PROGS): ../harness.c
index a505b66d408a67d4392a7efda885b436d0456587..fa8bae920c911f81f97c26f9f6e72157f7595379 100644 (file)
@@ -1,22 +1,17 @@
-TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
 
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
-$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
-
-fpu_syscall: fpu_asm.S
-fpu_preempt: fpu_asm.S
-fpu_signal:  fpu_asm.S
+include ../../lib.mk
 
-vmx_syscall: vmx_asm.S
-vmx_preempt: vmx_asm.S
-vmx_signal: vmx_asm.S
+$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
 
-vsx_preempt: CFLAGS += -mvsx
-vsx_preempt: vsx_asm.S
+$(OUTPUT)/fpu_syscall: fpu_asm.S
+$(OUTPUT)/fpu_preempt: fpu_asm.S
+$(OUTPUT)/fpu_signal:  fpu_asm.S
 
-include ../../lib.mk
+$(OUTPUT)/vmx_syscall: vmx_asm.S
+$(OUTPUT)/vmx_preempt: vmx_asm.S
+$(OUTPUT)/vmx_signal: vmx_asm.S
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
+$(OUTPUT)/vsx_preempt: vsx_asm.S
index 3bdb96eae55869e04a988f0d6cc30ef9d01fc6ce..1cffe54dccfb8a31f4b704756ce0e82b4393e38c 100644 (file)
@@ -1,19 +1,15 @@
 noarg:
        $(MAKE) -C ../
 
-TEST_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao
-TEST_FILES := tempfile
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao
+TEST_GEN_FILES := tempfile
 
-all: $(TEST_PROGS) $(TEST_FILES)
-
-$(TEST_PROGS): ../harness.c
+include ../../lib.mk
 
-prot_sao: ../utils.c
+$(TEST_GEN_PROGS): ../harness.c
 
-include ../../lib.mk
+$(OUTPUT)/prot_sao: ../utils.c
 
-tempfile:
-       dd if=/dev/zero of=tempfile bs=64k count=1
+$(OUTPUT)/tempfile:
+       dd if=/dev/zero of=$@ bs=64k count=1
 
-clean:
-       rm -f $(TEST_PROGS) tempfile
index ac41a7177f2e2cd659ca89c9a56c8473a502dde3..e4e55d1d3e0fae00a3c91d39ba35242e57f06b22 100644 (file)
@@ -1,44 +1,44 @@
 noarg:
        $(MAKE) -C ../
 
-TEST_PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
 
-all: $(TEST_PROGS) ebb
+include ../../lib.mk
+
+all: $(TEST_GEN_PROGS) ebb
 
-$(TEST_PROGS): $(EXTRA_SOURCES)
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
 
 # loop.S can only be built 64-bit
-count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
+$(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
        $(CC) $(CFLAGS) -m64 -o $@ $^
 
-per_event_excludes: ../utils.c
-
-include ../../lib.mk
+$(OUTPUT)/per_event_excludes: ../utils.c
 
 DEFAULT_RUN_TESTS := $(RUN_TESTS)
 override define RUN_TESTS
        $(DEFAULT_RUN_TESTS)
-       $(MAKE) -C ebb run_tests
+       TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
 endef
 
 DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
 override define EMIT_TESTS
        $(DEFAULT_EMIT_TESTS)
-       $(MAKE) -s -C ebb emit_tests
+       TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
 endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
        $(DEFAULT_INSTALL_RULE)
-       $(MAKE) -C ebb install
+       TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
 endef
 
 clean:
-       rm -f $(TEST_PROGS) loop.o
-       $(MAKE) -C ebb clean
+       $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
+       TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
 
 ebb:
-       $(MAKE) -k -C $@ all
+       TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
 
 .PHONY: all run_tests clean ebb
index 8d2279c4bb4b6a81cb5713f6ba3bf92b72f2e3e7..6001fb0a377a70f9ee1a57acd19424ec44bda6ae 100644 (file)
@@ -4,7 +4,7 @@ noarg:
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
-TEST_PROGS := reg_access_test event_attributes_test cycles_test        \
+TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test    \
         cycles_with_freeze_test pmc56_overflow_test            \
         ebb_vs_cpu_event_test cpu_event_vs_ebb_test            \
         cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test    \
@@ -16,16 +16,11 @@ TEST_PROGS := reg_access_test event_attributes_test cycles_test     \
         lost_exception_test no_handler_test                    \
         cycles_with_mmcr2_test
 
-all: $(TEST_PROGS)
+include ../../../lib.mk
 
-$(TEST_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
               ebb.c ebb_handler.S trace.c busy_loop.S
 
-instruction_count_test: ../loop.S
-
-lost_exception_test: ../lib.c
-
-include ../../../lib.mk
+$(OUTPUT)/instruction_count_test: ../loop.S
 
-clean:
-       rm -f $(TEST_PROGS)
+$(OUTPUT)/lost_exception_test: ../lib.c
index b68c6221d3d1bdad82a50356fa5c73083611d948..175366db7be8bc1261194ac80d009cd33e0c9c59 100644 (file)
@@ -1,12 +1,7 @@
 CFLAGS += -I$(CURDIR)
 
-TEST_PROGS := load_unaligned_zeropad
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
+TEST_GEN_PROGS := load_unaligned_zeropad
 
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c
index 2a728f4d2873de636379c277feae38f004c60934..557b9379f3bb9c83ebc1326c27a77e21f855119e 100644 (file)
@@ -2,14 +2,9 @@
 CFLAGS += -m64
 CFLAGS += -I$(CURDIR)
 
-TEST_PROGS := memcmp
+TEST_GEN_PROGS := memcmp
 EXTRA_SOURCES := memcmp_64.S ../harness.c
 
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): $(EXTRA_SOURCES)
-
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
index e21d10674e54135637b540ae8b0457e0ea3075cd..b92c2a132c4f5d038465ef190584da31de3a79a9 100644 (file)
@@ -1,18 +1,15 @@
-TEST_PROGS := switch_endian_test
+TEST_GEN_PROGS := switch_endian_test
 
 ASFLAGS += -O2 -Wall -g -nostdlib -m64
 
-all: $(TEST_PROGS)
+EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
 
-switch_endian_test: check-reversed.S
+include ../../lib.mk
+
+$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
 
-check-reversed.o: check.o
+$(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
        $(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@
 
-check-reversed.S: check-reversed.o
+$(OUTPUT)/check-reversed.S: $(OUTPUT)/check-reversed.o
        hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
-
-include ../../lib.mk
-
-clean:
-       rm -f $(TEST_PROGS) *.o check-reversed.S
index b35c7945bec550210c36269308352e7f25d0c2e4..da22ca7c38c185a5bb4df90a81fa71a9fbccc6c1 100644 (file)
@@ -1,12 +1,7 @@
-TEST_PROGS := ipc_unmuxed
+TEST_GEN_PROGS := ipc_unmuxed
 
 CFLAGS += -I../../../../../usr/include
 
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
-
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c
index c6c53c82fdd64fddb16aef156870e0f1656a4f81..5576ee6a51f21bb32a0ede4337d9da049e37fd42 100644 (file)
@@ -1,23 +1,19 @@
 SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu \
        tm-signal-context-chk-vmx tm-signal-context-chk-vsx
 
-TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
+TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
        tm-vmxcopy tm-fork tm-tar tm-tmspr $(SIGNAL_CONTEXT_CHK_TESTS)
 
-all: $(TEST_PROGS)
+include ../../lib.mk
 
-$(TEST_PROGS): ../harness.c ../utils.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 CFLAGS += -mhtm
 
-tm-syscall: tm-syscall-asm.S
-tm-syscall: CFLAGS += -I../../../../../usr/include
-tm-tmspr: CFLAGS += -pthread
+$(OUTPUT)/tm-syscall: tm-syscall-asm.S
+$(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
+$(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 
+SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
 $(SIGNAL_CONTEXT_CHK_TESTS): CFLAGS += -mhtm -m64 -mvsx
-
-include ../../lib.mk
-
-clean:
-       rm -f $(TEST_PROGS) *.o
index a485f2e286ae22cae0706c7cd0fc735d4c7206e3..f8ced26748f84408d902cc4ce58012874c545a58 100644 (file)
@@ -1,12 +1,8 @@
-TEST_PROGS := test-vphn
+TEST_GEN_PROGS := test-vphn
 
 CFLAGS += -m64
 
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
-
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS)
+$(TEST_GEN_PROGS): ../harness.c
+
index bd7abe24ea081442850c88d1b559fe9926a2580c..c5f2440ba1f707fe3c447cedcd00756c1c735496 100644 (file)
@@ -5,11 +5,9 @@ all:
 
 TEST_PROGS := pstore_tests pstore_post_reboot_tests
 TEST_FILES := common_tests pstore_crash_test
+EXTRA_CLEAN := logs/* *uuid
 
 include ../lib.mk
 
 run_crash:
        @sh pstore_crash_test || { echo "pstore_crash_test: [FAIL]"; exit 1; }
-
-clean:
-       rm -rf logs/* *uuid
index 453927fea90cae7b65005fb661bf7b8254686d32..8a2bc5562179b4c2c489976b1861aac6968c0fee 100644 (file)
@@ -1,11 +1,5 @@
 CFLAGS += -iquote../../../../include/uapi -Wall
-peeksiginfo: peeksiginfo.c
 
-all: peeksiginfo
-
-clean:
-       rm -f peeksiginfo
-
-TEST_PROGS := peeksiginfo
+TEST_GEN_PROGS := peeksiginfo
 
 include ../lib.mk
index 8401e87e34e17e8103a6d02925c57e486f017f38..5fa6fd2246b18a45290efef20789a4948d9425d9 100644 (file)
@@ -1,10 +1,6 @@
-TEST_PROGS := seccomp_bpf
+TEST_GEN_PROGS := seccomp_bpf
 CFLAGS += -Wl,-no-as-needed -Wall
 LDFLAGS += -lpthread
 
-all: $(TEST_PROGS)
-
 include ../lib.mk
 
-clean:
-       $(RM) $(TEST_PROGS)
index 56af56eda6fa50fc259716dd397de5cb15cfc07c..f68fbf80d8bea52a94ac38b3c80b185ad138dc1c 100644 (file)
@@ -1,8 +1,5 @@
 CFLAGS = -Wall
-BINARIES = sas
-all: $(BINARIES)
+TEST_GEN_PROGS = sas
 
 include ../lib.mk
 
-clean:
-       rm -rf $(BINARIES)
index 1bb01258e5596f5d2216588cbef04b8d0297819e..ccd07343d4188f4b4bf1b9d94514876ade60249b 100644 (file)
@@ -57,7 +57,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
                exit(EXIT_FAILURE);
        }
        if (stk.ss_flags != SS_DISABLE)
-               printf("[FAIL]\tss_flags=%i, should be SS_DISABLE\n",
+               printf("[FAIL]\tss_flags=%x, should be SS_DISABLE\n",
                                stk.ss_flags);
        else
                printf("[OK]\tsigaltstack is disabled in sighandler\n");
@@ -122,7 +122,8 @@ int main(void)
        if (stk.ss_flags == SS_DISABLE) {
                printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n");
        } else {
-               printf("[FAIL]\tInitial sigaltstack state was %i; should have been SS_DISABLE\n", stk.ss_flags);
+               printf("[FAIL]\tInitial sigaltstack state was %x; "
+                      "should have been SS_DISABLE\n", stk.ss_flags);
                return EXIT_FAILURE;
        }
 
@@ -165,7 +166,7 @@ int main(void)
                exit(EXIT_FAILURE);
        }
        if (stk.ss_flags != SS_AUTODISARM) {
-               printf("[FAIL]\tss_flags=%i, should be SS_AUTODISARM\n",
+               printf("[FAIL]\tss_flags=%x, should be SS_AUTODISARM\n",
                                stk.ss_flags);
                exit(EXIT_FAILURE);
        }
index bbd0b5398b613c08ee77eb4fbf9740383d874c0d..4685b3e421fcd48f261d32a54edc6fa1658abc70 100644 (file)
@@ -1,11 +1,5 @@
-all: get_size
+CFLAGS := -static -ffreestanding -nostartfiles -s
 
-get_size: get_size.c
-       $(CC) -static -ffreestanding -nostartfiles -s $< -o $@
-
-TEST_PROGS := get_size
+TEST_GEN_PROGS := get_size
 
 include ../lib.mk
-
-clean:
-       $(RM) get_size
index 1d5556869137929564c490473c35386ca3be3692..b90e50c36f9f99a49c414fc351cf0d6f3f1ebf16 100644 (file)
@@ -1,20 +1,16 @@
-CC = $(CROSS_COMPILE)gcc
 BUILD_FLAGS = -DKTEST
 CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS)
 LDFLAGS += -lrt -lpthread
 
 # these are all "safe" tests that don't modify
 # system time or require escalated privledges
-TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
             inconsistency-check raw_skew threadtest rtctest
 
-TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
+TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
                      skew_consistency clocksource-switch leap-a-day \
                      leapcrash set-tai set-2038 set-tz
 
-bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED)
-
-all: ${bins}
 
 include ../lib.mk
 
@@ -34,5 +30,3 @@ run_destructive_tests: run_tests
        ./set-tai
        ./set-2038
 
-clean:
-       rm -f ${bins}
index 900dfaf810510d6578a01d287b010c6f61b0b1c4..4cff7e7ddcc47b80ef30a06a779ea45dae5a5f3e 100644 (file)
@@ -1,41 +1,33 @@
 # Makefile for vm selftests
 
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
-BINARIES = compaction_test
-BINARIES += hugepage-mmap
-BINARIES += hugepage-shm
-BINARIES += map_hugetlb
-BINARIES += mlock2-tests
-BINARIES += on-fault-limit
-BINARIES += thuge-gen
-BINARIES += transhuge-stress
-BINARIES += userfaultfd
-BINARIES += userfaultfd_hugetlb
-BINARIES += userfaultfd_shmem
-BINARIES += mlock-random-test
-
-all: $(BINARIES)
-%: %.c
-       $(CC) $(CFLAGS) -o $@ $^ -lrt
-userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h
-       $(CC) $(CFLAGS) -O2 -o $@ $< -lpthread
-
-userfaultfd_hugetlb: userfaultfd.c ../../../../usr/include/linux/kernel.h
+LDLIBS = -lrt
+TEST_GEN_FILES = compaction_test
+TEST_GEN_FILES += hugepage-mmap
+TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += on-fault-limit
+TEST_GEN_FILES += thuge-gen
+TEST_GEN_FILES += transhuge-stress
+TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += userfaultfd_hugetlb
+TEST_GEN_FILES += userfaultfd_shmem
+TEST_GEN_FILES += mlock-random-test
+
+TEST_PROGS := run_vmtests
+
+include ../lib.mk
+
+$(OUTPUT)/userfaultfd: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h
+
+$(OUTPUT)/userfaultfd_hugetlb: userfaultfd.c ../../../../usr/include/linux/kernel.h
        $(CC) $(CFLAGS) -DHUGETLB_TEST -O2 -o $@ $< -lpthread
 
-userfaultfd_shmem: userfaultfd.c ../../../../usr/include/linux/kernel.h
+$(OUTPUT)/userfaultfd_shmem: userfaultfd.c  ../../../../usr/include/linux/kernel.h
        $(CC) $(CFLAGS) -DSHMEM_TEST -O2 -o $@ $< -lpthread
 
-mlock-random-test: mlock-random-test.c
-       $(CC) $(CFLAGS) -o $@ $< -lcap
+$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
 
 ../../../../usr/include/linux/kernel.h:
        make -C ../../../.. headers_install
-
-TEST_PROGS := run_vmtests
-TEST_FILES := $(BINARIES)
-
-include ../lib.mk
-
-clean:
-       $(RM) $(BINARIES)
index 5a840a605a162be7657920277621d50486961a5d..e9449c8018887785e9ab17219019423244e1b2b0 100644 (file)
@@ -398,12 +398,12 @@ static void *uffd_poll_thread(void *arg)
                        uffd = msg.arg.fork.ufd;
                        pollfd[0].fd = uffd;
                        break;
-               case UFFD_EVENT_MADVDONTNEED:
-                       uffd_reg.range.start = msg.arg.madv_dn.start;
-                       uffd_reg.range.len = msg.arg.madv_dn.end -
-                               msg.arg.madv_dn.start;
+               case UFFD_EVENT_REMOVE:
+                       uffd_reg.range.start = msg.arg.remove.start;
+                       uffd_reg.range.len = msg.arg.remove.end -
+                               msg.arg.remove.start;
                        if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
-                               fprintf(stderr, "madv_dn failure\n"), exit(1);
+                               fprintf(stderr, "remove failure\n"), exit(1);
                        break;
                case UFFD_EVENT_REMAP:
                        area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -569,9 +569,9 @@ static int userfaultfd_open(int features)
  * part is accessed after mremap. Since hugetlbfs does not support
  * mremap, the entire monitored area is accessed in a single pass for
  * HUGETLB_TEST.
- * The release of the pages currently generates event only for
- * anonymous memory (UFFD_EVENT_MADVDONTNEED), hence it is not checked
- * for hugetlb and shmem.
+ * The release of the pages currently generates event for shmem and
+ * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
+ * for hugetlb.
  */
 static int faulting_process(void)
 {
@@ -610,7 +610,6 @@ static int faulting_process(void)
                }
        }
 
-#ifndef SHMEM_TEST
        if (release_pages(area_dst))
                return 1;
 
@@ -618,7 +617,6 @@ static int faulting_process(void)
                if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
                        fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
        }
-#endif /* SHMEM_TEST */
 
 #endif /* HUGETLB_TEST */
 
@@ -715,14 +713,14 @@ static int userfaultfd_events_test(void)
        pid_t pid;
        char c;
 
-       printf("testing events (fork, remap, madv_dn): ");
+       printf("testing events (fork, remap, remove): ");
        fflush(stdout);
 
        if (release_pages(area_dst))
                return 1;
 
        features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
-               UFFD_FEATURE_EVENT_MADVDONTNEED;
+               UFFD_FEATURE_EVENT_REMOVE;
        if (userfaultfd_open(features) < 0)
                return 1;
        fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
index 83d8b1c6cb0e54d03b9f59403e7b72acee089de8..3a5ebae5303e26a0cff0f3a906027ea35c638fbf 100644 (file)
@@ -17,6 +17,9 @@ TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
 
 UNAME_M := $(shell uname -m)
@@ -40,10 +43,10 @@ all_64: $(BINARIES_64)
 clean:
        $(RM) $(BINARIES_32) $(BINARIES_64)
 
-$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
        $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
 
-$(TARGETS_C_64BIT_ALL:%=%_64): %_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
        $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
 
 # x86_64 users should be encouraged to install 32-bit libraries
@@ -65,12 +68,12 @@ warn_32bit_failure:
 endif
 
 # Some tests have additional dependencies.
-sysret_ss_attrs_64: thunks.S
-ptrace_syscall_32: raw_syscall_helper_32.S
-test_syscall_vdso_32: thunks_32.S
+$(OUTPUT)/sysret_ss_attrs_64: thunks.S
+$(OUTPUT)/ptrace_syscall_32: raw_syscall_helper_32.S
+$(OUTPUT)/test_syscall_vdso_32: thunks_32.S
 
 # check_initial_reg_state is special: it needs a custom entry, and it
 # needs to be static so that its interpreter doesn't destroy its initial
 # state.
-check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
-check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
+$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
+$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
index df9e0a0cdf294e1e334c6558c9c4f87a38387e1d..3237bc010e1c32b6cf0742a901a5d28b74391c5b 100644 (file)
@@ -192,7 +192,7 @@ void lots_o_noops_around_write(int *write_to_me)
 #define SYS_pkey_alloc  381
 #define SYS_pkey_free   382
 #define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x18
+#define si_pkey_offset 0x14
 #else
 #define SYS_mprotect_key 329
 #define SYS_pkey_alloc  330
@@ -462,7 +462,7 @@ void pkey_disable_set(int pkey, int flags)
        unsigned long syscall_flags = 0;
        int ret;
        int pkey_rights;
-       u32 orig_pkru;
+       u32 orig_pkru = rdpkru();
 
        dprintf1("START->%s(%d, 0x%x)\n", __func__,
                pkey, flags);
@@ -812,8 +812,6 @@ void setup_hugetlbfs(void)
 {
        int err;
        int fd;
-       int validated_nr_pages;
-       int i;
        char buf[] = "123";
 
        if (geteuid() != 0) {
@@ -1116,11 +1114,6 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
                err = sys_pkey_free(i);
                pkey_assert(err);
 
-               /* not enforced when pkey_get() is not a syscall
-               err = pkey_get(i, 0);
-               pkey_assert(err < 0);
-               */
-
                err = sys_pkey_free(i);
                pkey_assert(err);
 
@@ -1133,14 +1126,8 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
 void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
 {
        int err;
-       int bad_flag = (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) + 1;
        int bad_pkey = NR_PKEYS+99;
 
-       /* not enforced when pkey_get() is not a syscall
-       err = pkey_get(bad_pkey, bad_flag);
-       pkey_assert(err < 0);
-       */
-
        /* pass a known-invalid pkey in: */
        err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
        pkey_assert(err);
@@ -1149,8 +1136,6 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
 /* Assumes that all pkeys other than 'pkey' are unallocated */
 void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
 {
-       unsigned long flags;
-       unsigned long init_val;
        int err;
        int allocated_pkeys[NR_PKEYS] = {0};
        int nr_allocated_pkeys = 0;
index 29d80346e3eb5a14108e3e3f514182806ab584e1..c3a87e5f9d36482c4a606afe9c6e82bfff8fe90c 100644 (file)
@@ -2,8 +2,7 @@ all:
 
 TEST_PROGS := zram.sh
 TEST_FILES := zram01.sh zram02.sh zram_lib.sh
+EXTRA_CLEAN := err.log
 
 include ../lib.mk
 
-clean:
-       $(RM) err.log
index 3815e940fbeacb444bffc22c581f127d98730a59..2366177172f67cd3480dd5f11b62ce412d9bf567 100644 (file)
@@ -204,7 +204,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
        work->addr = hva;
        work->arch = *arch;
        work->mm = current->mm;
-       atomic_inc(&work->mm->mm_users);
+       mmget(work->mm);
        kvm_get_kvm(work->vcpu->kvm);
 
        /* this can't really happen otherwise gfn_to_pfn_async
index cc4d6e0dd2a2333b260947f54dfadec3f23edcf8..35f71409d9ee4dc1510e82ad2bff8726e0664c95 100644 (file)
@@ -611,7 +611,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
                return ERR_PTR(-ENOMEM);
 
        spin_lock_init(&kvm->mmu_lock);
-       atomic_inc(&current->mm->mm_count);
+       mmgrab(current->mm);
        kvm->mm = current->mm;
        kvm_eventfd_init(kvm);
        mutex_init(&kvm->lock);
@@ -2350,9 +2350,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
 
-static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kvm_vcpu_fault(struct vm_fault *vmf)
 {
-       struct kvm_vcpu *vcpu = vma->vm_file->private_data;
+       struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
        struct page *page;
 
        if (vmf->pgoff == 0)