Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 14 Dec 2016 17:17:42 +0000 (09:17 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 14 Dec 2016 17:17:42 +0000 (09:17 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 14 Dec 2016 17:17:42 +0000 (09:17 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 14 Dec 2016 17:17:42 +0000 (09:17 -0800)
diff --combined MAINTAINERS

index 1174508ee59755896d62df095a6bbc787b629b7a,0774714c4e67c74e69ccc62214ccc9862ff5141c..8007e2811264205bd76ad7b39f632cae74ea90c1
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -35,13 -35,13 +35,13 @@@ trivial patch so apply some common sens
   
         PLEASE check your patch with the automated style checker
         (scripts/checkpatch.pl) to catch trivial style violations.
- -      See Documentation/CodingStyle for guidance here.
+ +      See Documentation/process/coding-style.rst for guidance here.
   
         PLEASE CC: the maintainers and mailing lists that are generated
         by scripts/get_maintainer.pl.  The results returned by the
         script will be best if you have git installed and are making
         your changes in a branch derived from Linus' latest git tree.
- -      See Documentation/SubmittingPatches for details.
+ +      See Documentation/process/submitting-patches.rst for details.
   
         PLEASE try to include any credit lines you want added with the
         patch. It avoids people being missed off by mistake and makes
@@@ -54,7 -54,7 +54,7 @@@
         of the Linux Foundation certificate of contribution and should
         include a Signed-off-by: line.  The current version of this
         "Developer's Certificate of Origin" (DCO) is listed in the file
- -      Documentation/SubmittingPatches.
+ +      Documentation/process/submitting-patches.rst.
   
   6.    Make sure you have the right to send any changes you make. If you
         do changes at work you may find your employer owns the patch
@@@ -74,14 -74,9 +74,14 @@@ Descriptions of section entries
            These reviewers should be CCed on patches.
         L: Mailing list that is relevant to this area
         W: Web-page with status/info
+ +      B: URI for where to file bugs. A web-page with detailed bug
+ +         filing info, a direct bug tracker link, or a mailto: URI.
+ +      C: URI for chat protocol, server and channel where developers
+ +         usually hang out, for example irc://server/channel.
         Q: Patchwork web based patch tracking system site
         T: SCM tree type and location.
            Type is one of: git, hg, quilt, stgit, topgit
+ +      B: Bug tracking system location.
         S: Status, one of the following:
            Supported:   Someone is actually paid to look after this.
            Maintained:  Someone actually looks after it.
@@@ -260,12 -255,6 +260,12 @@@ L:       linux-gpio@vger.kernel.or
   S:    Maintained
   F:    drivers/gpio/gpio-104-idio-16.c
   
+ +ACCES 104-QUAD-8 IIO DRIVER
+ +M:    William Breathitt Gray <vilhelm.gray@gmail.com>
+ +L:    linux-iio@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/iio/counter/104-quad-8.c
+ +
   ACENIC DRIVER
   M:    Jes Sorensen <jes@trained-monkey.org>
   L:    linux-acenic@sunsite.dk
@@@ -292,7 -281,6 +292,7 @@@ L: linux-acpi@vger.kernel.or
   W:    https://01.org/linux-acpi
   Q:    https://patchwork.kernel.org/project/linux-acpi/list/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+ +B:    https://bugzilla.kernel.org
   S:    Supported
   F:    drivers/acpi/
   F:    drivers/pnp/pnpacpi/
@@@ -316,8 -304,6 +316,8 @@@ W: https://acpica.org
   W:    https://github.com/acpica/acpica/
   Q:    https://patchwork.kernel.org/project/linux-acpi/list/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+ +B:    https://bugzilla.kernel.org
+ +B:    https://bugs.acpica.org
   S:    Supported
   F:    drivers/acpi/acpica/
   F:    include/acpi/
@@@ -327,7 -313,6 +327,7 @@@ ACPI FAN DRIVE
   M:    Zhang Rui <rui.zhang@intel.com>
   L:    linux-acpi@vger.kernel.org
   W:    https://01.org/linux-acpi
+ +B:    https://bugzilla.kernel.org
   S:    Supported
   F:    drivers/acpi/fan.c
   
@@@ -343,7 -328,6 +343,7 @@@ ACPI THERMAL DRIVE
   M:    Zhang Rui <rui.zhang@intel.com>
   L:    linux-acpi@vger.kernel.org
   W:    https://01.org/linux-acpi
+ +B:    https://bugzilla.kernel.org
   S:    Supported
   F:    drivers/acpi/*thermal*
   
@@@ -351,7 -335,6 +351,7 @@@ ACPI VIDEO DRIVE
   M:    Zhang Rui <rui.zhang@intel.com>
   L:    linux-acpi@vger.kernel.org
   W:    https://01.org/linux-acpi
+ +B:    https://bugzilla.kernel.org
   S:    Supported
   F:    drivers/acpi/acpi_video.c
   
@@@ -587,11 -570,6 +587,11 @@@ T:       git git://linuxtv.org/anttip/media_t
   S:    Maintained
   F:    drivers/media/usb/airspy/
   
+ +ALACRITECH GIGABIT ETHERNET DRIVER
+ +M:    Lino Sanfilippo <LinoSanfilippo@gmx.de>
+ +S:    Maintained
+ +F:    drivers/net/ethernet/alacritech/*
+ +
   ALCATEL SPEEDTOUCH USB DRIVER
   M:    Duncan Sands <duncan.sands@free.fr>
   L:    linux-usb@vger.kernel.org
@@@ -809,7 -787,7 +809,7 @@@ S: Supporte
   F:    drivers/iio/*/ad*
   X:    drivers/iio/*/adjd*
   F:    drivers/staging/iio/*/ad*
- -F:    staging/iio/trigger/iio-trig-bfin-timer.c
+ +F:    drivers/staging/iio/trigger/iio-trig-bfin-timer.c
   
   ANALOG DEVICES INC DMA DRIVERS
   M:    Lars-Peter Clausen <lars@metafoo.de>
@@@ -1058,7 -1036,6 +1058,7 @@@ F:      arch/arm/mach-meson
   F:    arch/arm/boot/dts/meson*
   F:    arch/arm64/boot/dts/amlogic/
   F:    drivers/pinctrl/meson/
+ +F:    drivers/mmc/host/meson*
   N:    meson
   
   ARM/Annapurna Labs ALPINE ARCHITECTURE
@@@ -1798,7 -1775,6 +1798,7 @@@ F:      drivers/char/hw_random/st-rng.
   F:    drivers/clocksource/arm_global_timer.c
   F:    drivers/clocksource/clksrc_st_lpc.c
   F:    drivers/cpufreq/sti-cpufreq.c
+ +F:    drivers/dma/st_fdma*
   F:    drivers/i2c/busses/i2c-st.c
   F:    drivers/media/rc/st_rc.c
   F:    drivers/media/platform/sti/c8sectpfe/
@@@ -1809,7 -1785,6 +1809,7 @@@ F:      drivers/phy/phy-stih407-usb.
   F:    drivers/phy/phy-stih41x-usb.c
   F:    drivers/pinctrl/pinctrl-st.c
   F:    drivers/remoteproc/st_remoteproc.c
+ +F:    drivers/remoteproc/st_slim_rproc.c
   F:    drivers/reset/sti/
   F:    drivers/rtc/rtc-st-lpc.c
   F:    drivers/tty/serial/st-asc.c
@@@ -1818,7 -1793,6 +1818,7 @@@ F:      drivers/usb/host/ehci-st.
   F:    drivers/usb/host/ohci-st.c
   F:    drivers/watchdog/st_lpc_wdt.c
   F:    drivers/ata/ahci_st.c
+ +F:    include/linux/remoteproc/st_slim_rproc.h
   
   ARM/STM32 ARCHITECTURE
   M:    Maxime Coquelin <mcoquelin.stm32@gmail.com>
@@@ -2556,8 -2530,6 +2556,8 @@@ L:      netdev@vger.kernel.or
   L:    linux-kernel@vger.kernel.org
   S:    Supported
   F:    kernel/bpf/
+ +F:    tools/testing/selftests/bpf/
+ +F:    lib/test_bpf.c
   
   BROADCOM B44 10/100 ETHERNET DRIVER
   M:    Michael Chan <michael.chan@broadcom.com>
@@@ -2618,7 -2590,6 +2618,7 @@@ L:      linux-arm-kernel@lists.infradead.or
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rpi/linux-rpi.git
   S:    Maintained
   N:    bcm2835
+ +F:    drivers/staging/vc04_services
   
   BROADCOM BCM47XX MIPS ARCHITECTURE
   M:    Hauke Mehrtens <hauke@hauke-m.de>
@@@ -2771,14 -2742,6 +2771,14 @@@ L:    bcm-kernel-feedback-list@broadcom.co
   S:    Maintained
   F:    drivers/mtd/nand/brcmnand/
   
+ +BROADCOM STB AVS CPUFREQ DRIVER
+ +M:    Markus Mayer <mmayer@broadcom.com>
+ +M:    bcm-kernel-feedback-list@broadcom.com
+ +L:    linux-pm@vger.kernel.org
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt
+ +F:    drivers/cpufreq/brcmstb*
+ +
   BROADCOM SPECIFIC AMBA DRIVER (BCMA)
   M:    Rafał Miłecki <zajec5@gmail.com>
   L:    linux-wireless@vger.kernel.org
@@@ -2967,7 -2930,7 +2967,7 @@@ CAPELLA MICROSYSTEMS LIGHT SENSOR DRIVE
   M:    Kevin Tsai <ktsai@capellamicro.com>
   S:    Maintained
   F:    drivers/iio/light/cm*
- -F:    Documentation/devicetree/bindings/i2c/trivial-devices.txt
+ +F:    Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst
   
   CAVIUM I2C DRIVER
   M:    Jan Glauber <jglauber@cavium.com>
@@@ -3067,12 -3030,6 +3067,12 @@@ F:    drivers/usb/host/whci
   F:    drivers/usb/wusbcore/
   F:    include/linux/usb/wusb*
   
+ +HT16K33 LED CONTROLLER DRIVER
+ +M:    Robin van der Gracht <robin@protonic.nl>
+ +S:    Maintained
+ +F:    drivers/auxdisplay/ht16k33.c
+ +F:    Documentation/devicetree/bindings/display/ht16k33.txt
+ +
   CFAG12864B LCD DRIVER
   M:    Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
   W:    http://miguelojeda.es/auxdisplay.htm
@@@ -3121,7 -3078,7 +3121,7 @@@ M:      Harry Wei <harryxiyou@gmail.com
   L:    xiyoulinuxkernelgroup@googlegroups.com (subscribers-only)
   L:    linux-kernel@zh-kernel.org (moderated for non-subscribers)
   S:    Maintained
- -F:    Documentation/zh_CN/
+ +F:    Documentation/translations/zh_CN/
   
   CHIPIDEA USB HIGH SPEED DUAL ROLE CONTROLLER
   M:    Peter Chen <Peter.Chen@nxp.com>
@@@ -3377,7 -3334,6 +3377,7 @@@ L:      linux-pm@vger.kernel.or
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
   T:    git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
+ +B:    https://bugzilla.kernel.org
   F:    Documentation/cpu-freq/
   F:    drivers/cpufreq/
   F:    include/linux/cpufreq.h
@@@ -3417,7 -3373,6 +3417,7 @@@ M:      Daniel Lezcano <daniel.lezcano@linar
   L:    linux-pm@vger.kernel.org
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
+ +B:    https://bugzilla.kernel.org
   F:    drivers/cpuidle/*
   F:    include/linux/cpuidle.h
   
@@@ -3956,7 -3911,7 +3956,7 @@@ F:      include/linux/dma-buf
   F:    include/linux/reservation.h
   F:    include/linux/*fence.h
   F:    Documentation/dma-buf-sharing.txt
- -T:    git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
+ +T:    git git://anongit.freedesktop.org/drm/drm-misc
   
   SYNC FILE FRAMEWORK
   M:    Sumit Semwal <sumit.semwal@linaro.org>
@@@ -3964,12 -3919,10 +3964,12 @@@ R:   Gustavo Padovan <gustavo@padovan.org
   S:    Maintained
   L:    linux-media@vger.kernel.org
   L:    dri-devel@lists.freedesktop.org
- -F:    drivers/dma-buf/sync_file.c
+ +F:    drivers/dma-buf/sync_*
+ +F:    drivers/dma-buf/sw_sync.c
   F:    include/linux/sync_file.h
+ +F:    include/uapi/linux/sync_file.h
   F:    Documentation/sync_file.txt
- -T:    git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
+ +T:    git git://anongit.freedesktop.org/drm/drm-misc
   
   DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
   M:    Vinod Koul <vinod.koul@intel.com>
@@@ -4057,8 -4010,6 +4057,8 @@@ DRM DRIVER
   M:    David Airlie <airlied@linux.ie>
   L:    dri-devel@lists.freedesktop.org
   T:    git git://people.freedesktop.org/~airlied/linux
+ +B:    https://bugs.freedesktop.org/
+ +C:    irc://chat.freenode.net/dri-devel
   S:    Maintained
   F:    drivers/gpu/drm/
   F:    drivers/gpu/vga/
@@@ -4069,30 -4020,11 +4069,30 @@@ F:   Documentation/gpu
   F:    include/drm/
   F:    include/uapi/drm/
   
+ +DRM DRIVERS AND MISC GPU PATCHES
+ +M:    Daniel Vetter <daniel.vetter@intel.com>
+ +M:    Jani Nikula <jani.nikula@linux.intel.com>
+ +M:    Sean Paul <seanpaul@chromium.org>
+ +W:    https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
+ +S:    Maintained
+ +T:    git git://anongit.freedesktop.org/drm/drm-misc
+ +F:    Documentation/gpu/
+ +F:    drivers/gpu/vga/
+ +F:    drivers/gpu/drm/*
+ +F:    include/drm/drm*
+ +F:    include/uapi/drm/drm*
+ +
   DRM DRIVER FOR AST SERVER GRAPHICS CHIPS
   M:    Dave Airlie <airlied@redhat.com>
   S:    Odd Fixes
   F:    drivers/gpu/drm/ast/
   
+ +DRM DRIVERS FOR BRIDGE CHIPS
+ +M:    Archit Taneja <architt@codeaurora.org>
+ +S:    Maintained
+ +T:    git git://anongit.freedesktop.org/drm/drm-misc
+ +F:    drivers/gpu/drm/bridge/
+ +
   DRM DRIVER FOR BOCHS VIRTUAL GPU
   M:    Gerd Hoffmann <kraxel@redhat.com>
   S:    Odd Fixes
@@@ -4128,9 -4060,8 +4128,9 @@@ INTEL DRM DRIVERS (excluding Poulsbo, M
   M:    Daniel Vetter <daniel.vetter@intel.com>
   M:    Jani Nikula <jani.nikula@linux.intel.com>
   L:    intel-gfx@lists.freedesktop.org
- -L:    dri-devel@lists.freedesktop.org
   W:    https://01.org/linuxgraphics/
+ +B:    https://01.org/linuxgraphics/documentation/how-report-bugs
+ +C:    irc://chat.freenode.net/intel-gfx
   Q:    http://patchwork.freedesktop.org/project/intel-gfx/
   T:    git git://anongit.freedesktop.org/drm-intel
   S:    Supported
@@@ -4139,16 -4070,6 +4139,16 @@@ F:    include/drm/i915
   F:    include/uapi/drm/i915_drm.h
   F:    Documentation/gpu/i915.rst
   
+ +INTEL GVT-g DRIVERS (Intel GPU Virtualization)
+ +M:      Zhenyu Wang <zhenyuw@linux.intel.com>
+ +M:      Zhi Wang <zhi.a.wang@intel.com>
+ +L:      igvt-g-dev@lists.01.org
+ +L:      intel-gfx@lists.freedesktop.org
+ +W:      https://01.org/igvt-g
+ +T:      git https://github.com/01org/gvt-linux.git
+ +S:      Supported
+ +F:      drivers/gpu/drm/i915/gvt/
+ +
   DRM DRIVERS FOR ATMEL HLCDC
   M:    Boris Brezillon <boris.brezillon@free-electrons.com>
   L:    dri-devel@lists.freedesktop.org
@@@ -4163,15 -4084,6 +4163,15 @@@ S:    Supporte
   F:    drivers/gpu/drm/sun4i/
   F:    Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
   
+ +DRM DRIVERS FOR AMLOGIC SOCS
+ +M:    Neil Armstrong <narmstrong@baylibre.com>
+ +L:    dri-devel@lists.freedesktop.org
+ +L:    linux-amlogic@lists.infradead.org
+ +W:    http://linux-meson.com/
+ +S:    Supported
+ +F:    drivers/gpu/drm/meson/
+ +F:    Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt
+ +
   DRM DRIVERS FOR EXYNOS
   M:    Inki Dae <inki.dae@samsung.com>
   M:    Joonyoung Shim <jy0922.shim@samsung.com>
@@@ -4211,7 -4123,6 +4211,7 @@@ F:      drivers/gpu/drm/gma500
   
   DRM DRIVERS FOR HISILICON
   M:    Xinliang Liu <z.liuxinliang@hisilicon.com>
+ +M:    Rongrong Zou <zourongrong@gmail.com>
   R:    Xinwei Kong <kong.kongxinwei@hisilicon.com>
   R:    Chen Feng <puck.chen@hisilicon.com>
   L:    dri-devel@lists.freedesktop.org
@@@ -4336,7 -4247,6 +4336,7 @@@ DRM DRIVERS FOR VIVANTE GPU I
   M:    Lucas Stach <l.stach@pengutronix.de>
   R:    Russell King <linux+etnaviv@armlinux.org.uk>
   R:    Christian Gmeiner <christian.gmeiner@gmail.com>
+ +L:    etnaviv@lists.freedesktop.org
   L:    dri-devel@lists.freedesktop.org
   S:    Maintained
   F:    drivers/gpu/drm/etnaviv/
@@@ -4377,13 -4287,6 +4377,13 @@@ S:    Maintaine
   F:    drivers/gpu/drm/tilcdc/
   F:    Documentation/devicetree/bindings/display/tilcdc/
   
+ +DRM DRIVERS FOR ZTE ZX
+ +M:    Shawn Guo <shawnguo@kernel.org>
+ +L:    dri-devel@lists.freedesktop.org
+ +S:    Maintained
+ +F:    drivers/gpu/drm/zte/
+ +F:    Documentation/devicetree/bindings/display/zte,vou.txt
+ +
   DSBR100 USB FM RADIO DRIVER
   M:    Alexey Klimov <klimov.linux@gmail.com>
   L:    linux-media@vger.kernel.org
@@@ -4728,14 -4631,12 +4728,14 @@@ L:   linux-efi@vger.kernel.or
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
   S:    Maintained
   F:    Documentation/efi-stub.txt
- -F:    arch/ia64/kernel/efi.c
+ +F:    arch/*/kernel/efi.c
   F:    arch/x86/boot/compressed/eboot.[ch]
- -F:    arch/x86/include/asm/efi.h
+ +F:    arch/*/include/asm/efi.h
   F:    arch/x86/platform/efi/
   F:    drivers/firmware/efi/
   F:    include/linux/efi*.h
+ +F:    arch/arm/boot/compressed/efi-header.S
+ +F:    arch/arm64/kernel/efi-entry.S
   
   EFI VARIABLE FILESYSTEM
   M:    Matthew Garrett <matthew.garrett@nebula.com>
@@@ -5049,9 -4950,7 +5049,9 @@@ K:      fmc_d.*registe
   FPGA MANAGER FRAMEWORK
   M:    Alan Tull <atull@opensource.altera.com>
   R:    Moritz Fischer <moritz.fischer@ettus.com>
+ +L:    linux-fpga@vger.kernel.org
   S:    Maintained
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git
   F:    drivers/fpga/
   F:    include/linux/fpga/fpga-mgr.h
   W:    http://www.rocketboards.org
@@@ -5069,9 -4968,10 +5069,9 @@@ F:     drivers/net/wan/dlci.
   F:    drivers/net/wan/sdla.c
   
   FRAMEBUFFER LAYER
- -M:    Tomi Valkeinen <tomi.valkeinen@ti.com>
   L:    linux-fbdev@vger.kernel.org
   Q:    http://patchwork.kernel.org/project/linux-fbdev/list/
- -S:    Maintained
+ +S:    Orphan
   F:    Documentation/fb/
   F:    drivers/video/
   F:    include/video/
@@@ -5144,18 -5044,9 +5144,18 @@@ S:    Maintaine
   F:    drivers/net/ethernet/freescale/fman
   F:    Documentation/devicetree/bindings/powerpc/fsl/fman.txt
   
+ +FREESCALE SOC DRIVERS
+ +M:    Scott Wood <oss@buserror.net>
+ +L:    linuxppc-dev@lists.ozlabs.org
+ +L:    linux-arm-kernel@lists.infradead.org
+ +S:    Maintained
+ +F:    drivers/soc/fsl/
+ +F:    include/linux/fsl/
+ +
   FREESCALE QUICC ENGINE LIBRARY
+ +M:    Qiang Zhao <qiang.zhao@nxp.com>
   L:    linuxppc-dev@lists.ozlabs.org
- -S:    Orphan
+ +S:    Maintained
   F:    drivers/soc/fsl/qe/
   F:    include/soc/fsl/*qe*.h
   F:    include/soc/fsl/*ucc*.h
@@@ -5207,6 -5098,13 +5207,6 @@@ F:     sound/soc/fsl/fsl
   F:    sound/soc/fsl/imx*
   F:    sound/soc/fsl/mpc8610_hpcd.c
   
- -FREESCALE QORIQ MANAGEMENT COMPLEX DRIVER
- -M:    "J. German Rivera" <German.Rivera@freescale.com>
- -M:    Stuart Yoder <stuart.yoder@nxp.com>
- -L:    linux-kernel@vger.kernel.org
- -S:    Maintained
- -F:    drivers/staging/fsl-mc/
- -
   FREEVXFS FILESYSTEM
   M:    Christoph Hellwig <hch@infradead.org>
   W:    ftp://ftp.openlinux.org/pub/people/hch/vxfs
@@@ -5240,6 -5138,7 +5240,7 @@@ F:      include/linux/fscache*.
   FS-CRYPTO: FILE SYSTEM LEVEL ENCRYPTION SUPPORT
   M:    Theodore Y. Ts'o <tytso@mit.edu>
   M:    Jaegeuk Kim <jaegeuk@kernel.org>
+ L:    linux-fsdevel@vger.kernel.org
   S:    Supported
   F:    fs/crypto/
   F:    include/linux/fscrypto.h
@@@ -5304,7 -5203,6 +5305,7 @@@ L:      kernel-hardening@lists.openwall.co
   S:    Maintained
   F:    scripts/gcc-plugins/
   F:    scripts/gcc-plugin.sh
+ +F:    scripts/Makefile.gcc-plugins
   F:    Documentation/gcc-plugins.txt
   
   GCOV BASED KERNEL PROFILING
@@@ -5766,7 -5664,6 +5767,7 @@@ HIBERNATION (aka Software Suspend, aka 
   M:    "Rafael J. Wysocki" <rjw@rjwysocki.net>
   M:    Pavel Machek <pavel@ucw.cz>
   L:    linux-pm@vger.kernel.org
+ +B:    https://bugzilla.kernel.org
   S:    Supported
   F:    arch/x86/power/
   F:    drivers/base/power/
@@@ -5948,7 -5845,6 +5949,7 @@@ F:      drivers/input/serio/hyperv-keyboard.
   F:    drivers/pci/host/pci-hyperv.c
   F:    drivers/net/hyperv/
   F:    drivers/scsi/storvsc_drv.c
+ +F:    drivers/uio/uio_hv_generic.c
   F:    drivers/video/fbdev/hyperv_fb.c
   F:    include/linux/hyperv.h
   F:    tools/hv/
@@@ -6192,9 -6088,14 +6193,9 @@@ S:     Maintaine
   F:    Documentation/cdrom/ide-cd
   F:    drivers/ide/ide-cd*
   
- -IDLE-I7300
- -M:    Andy Henroid <andrew.d.henroid@intel.com>
- -L:    linux-pm@vger.kernel.org
- -S:    Supported
- -F:    drivers/idle/i7300_idle.c
- -
   IEEE 802.15.4 SUBSYSTEM
   M:    Alexander Aring <aar@pengutronix.de>
+ +M:    Stefan Schmidt <stefan@osg.samsung.com>
   L:    linux-wpan@vger.kernel.org
   W:    http://wpan.cakelab.org/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git
@@@ -6224,22 -6125,6 +6225,22 @@@ L:    linux-media@vger.kernel.or
   S:    Maintained
   F:    drivers/media/rc/iguanair.c
   
+ +IIO DIGITAL POTENTIOMETER DAC
+ +M:    Peter Rosin <peda@axentia.se>
+ +L:    linux-iio@vger.kernel.org
+ +S:    Maintained
+ +F:    Documentation/ABI/testing/sysfs-bus-iio-dac-dpot-dac
+ +F:    Documentation/devicetree/bindings/iio/dac/dpot-dac.txt
+ +F:    drivers/iio/dac/dpot-dac.c
+ +
+ +IIO ENVELOPE DETECTOR
+ +M:    Peter Rosin <peda@axentia.se>
+ +L:    linux-iio@vger.kernel.org
+ +S:    Maintained
+ +F:    Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector
+ +F:    Documentation/devicetree/bindings/iio/adc/envelope-detector.txt
+ +F:    drivers/iio/adc/envelope-detector.c
+ +
   IIO SUBSYSTEM AND DRIVERS
   M:    Jonathan Cameron <jic23@kernel.org>
   R:    Hartmut Knaack <knaack.h@gmx.de>
@@@ -6397,11 -6282,9 +6398,11 @@@ S:    Maintaine
   F:    drivers/platform/x86/intel-vbtn.c
   
   INTEL IDLE DRIVER
+ +M:    Jacob Pan <jacob.jun.pan@linux.intel.com>
   M:    Len Brown <lenb@kernel.org>
   L:    linux-pm@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
+ +B:    https://bugzilla.kernel.org
   S:    Supported
   F:    drivers/idle/intel_idle.c
   
@@@ -6621,13 -6504,6 +6622,13 @@@ S:    Maintaine
   F:    arch/x86/include/asm/pmc_core.h
   F:    drivers/platform/x86/intel_pmc_core*
   
+ +INVENSENSE MPU-3050 GYROSCOPE DRIVER
+ +M:    Linus Walleij <linus.walleij@linaro.org>
+ +L:    linux-iio@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/iio/gyro/mpu3050*
+ +F:    Documentation/devicetree/bindings/iio/gyroscope/inv,mpu3050.txt
+ +
   IOC3 ETHERNET DRIVER
   M:    Ralf Baechle <ralf@linux-mips.org>
   L:    linux-mips@linux-mips.org
@@@ -7209,7 -7085,6 +7210,7 @@@ F:      drivers/scsi/53c700
   LED SUBSYSTEM
   M:    Richard Purdie <rpurdie@rpsys.net>
   M:    Jacek Anaszewski <j.anaszewski@samsung.com>
+ +M:    Pavel Machek <pavel@ucw.cz>
   L:    linux-leds@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
   S:    Maintained
@@@ -7682,10 -7557,8 +7683,10 @@@ S:    Maintaine
   MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER
   M:    Andrew Lunn <andrew@lunn.ch>
   M:    Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ +L:    netdev@vger.kernel.org
   S:    Maintained
   F:    drivers/net/dsa/mv88e6xxx/
+ +F:    Documentation/devicetree/bindings/net/dsa/marvell.txt
   
   MARVELL ARMADA DRM SUPPORT
   M:    Russell King <rmk+kernel@armlinux.org.uk>
@@@ -7835,7 -7708,6 +7836,7 @@@ MCP4531 MICROCHIP DIGITAL POTENTIOMETE
   M:    Peter Rosin <peda@axentia.se>
   L:    linux-iio@vger.kernel.org
   S:    Maintained
+ +F:    Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
   F:    drivers/iio/potentiometer/mcp4531.c
   
   MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
@@@ -8186,7 -8058,6 +8187,7 @@@ F:      drivers/infiniband/hw/mlx4
   F:    include/linux/mlx4/
   
   MELLANOX MLX5 core VPI driver
+ +M:    Saeed Mahameed <saeedm@mellanox.com>
   M:    Matan Barak <matanb@mellanox.com>
   M:    Leon Romanovsky <leonro@mellanox.com>
   L:    netdev@vger.kernel.org
@@@ -8400,12 -8271,6 +8401,12 @@@ T:    git git://linuxtv.org/mkrufky/tuners
   S:    Maintained
   F:    drivers/media/tuners/mxl5007t.*
   
+ +MXSFB DRM DRIVER
+ +M:    Marek Vasut <marex@denx.de>
+ +S:    Supported
+ +F:    drivers/gpu/drm/mxsfb/
+ +F:    Documentation/devicetree/bindings/display/mxsfb-drm.txt
+ +
   MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE)
   M:    Hyong-Youb Kim <hykim@myri.com>
   L:    netdev@vger.kernel.org
@@@ -8583,6 -8448,7 +8584,6 @@@ F:      include/uapi/linux/net_namespace.
   F:    tools/net/
   F:    tools/testing/selftests/net/
   F:    lib/random32.c
- -F:    lib/test_bpf.c
   
   NETWORKING [IPv4/IPv6]
   M:    "David S. Miller" <davem@davemloft.net>
@@@ -8811,16 -8677,6 +8812,16 @@@ L:    linux-nvme@lists.infradead.or
   S:    Supported
   F:    drivers/nvme/target/
   
+ +NVM EXPRESS FC TRANSPORT DRIVERS
+ +M:    James Smart <james.smart@broadcom.com>
+ +L:    linux-nvme@lists.infradead.org
+ +S:    Supported
+ +F:    include/linux/nvme-fc.h
+ +F:    include/linux/nvme-fc-driver.h
+ +F:    drivers/nvme/host/fc.c
+ +F:    drivers/nvme/target/fc.c
+ +F:    drivers/nvme/target/fcloop.c
+ +
   NVMEM FRAMEWORK
   M:    Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
   M:    Maxime Ripard <maxime.ripard@free-electrons.com>
@@@ -8883,7 -8739,6 +8884,7 @@@ F:      drivers/regulator/tps65217-regulator
   F:    drivers/regulator/tps65218-regulator.c
   F:    drivers/regulator/tps65910-regulator.c
   F:    drivers/regulator/twl-regulator.c
+ +F:    drivers/regulator/twl6030-regulator.c
   F:    include/linux/i2c-omap.h
   
   OMAP DEVICE TREE SUPPORT
@@@ -9104,11 -8959,9 +9105,11 @@@ F:    drivers/of/resolver.
   
   OPENRISC ARCHITECTURE
   M:    Jonas Bonn <jonas@southpole.se>
- -W:    http://openrisc.net
+ +M:    Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ +M:    Stafford Horne <shorne@gmail.com>
+ +L:    openrisc@lists.librecores.org
+ +W:    http://openrisc.io
   S:    Maintained
- -T:    git git://openrisc.net/~jonas/linux
   F:    arch/openrisc/
   
   OPENVSWITCH
@@@ -9240,7 -9093,7 +9241,7 @@@ F:      drivers/misc/panel.
   
   PARALLEL PORT SUBSYSTEM
   M:    Sudip Mukherjee <sudipm.mukherjee@gmail.com>
- -M:    Sudip Mukherjee <sudip@vectorindia.org>
+ +M:    Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
   L:    linux-parport@lists.infradead.org (subscribers-only)
   S:    Maintained
   F:    drivers/parport/
@@@ -9395,12 -9248,11 +9396,12 @@@ S:   Maintaine
   F:    drivers/pci/host/*layerscape*
   
   PCI DRIVER FOR IMX6
- -M:    Richard Zhu <Richard.Zhu@freescale.com>
+ +M:    Richard Zhu <hongxing.zhu@nxp.com>
   M:    Lucas Stach <l.stach@pengutronix.de>
   L:    linux-pci@vger.kernel.org
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
+ +F:    Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
   F:    drivers/pci/host/*imx6*
   
   PCI DRIVER FOR TI KEYSTONE
@@@ -9459,11 -9311,17 +9460,11 @@@ F:   drivers/pci/host/pci-exynos.
   
   PCI DRIVER FOR SYNOPSIS DESIGNWARE
   M:    Jingoo Han <jingoohan1@gmail.com>
- -M:    Pratyush Anand <pratyush.anand@gmail.com>
- -L:    linux-pci@vger.kernel.org
- -S:    Maintained
- -F:    drivers/pci/host/*designware*
- -
- -PCI DRIVER FOR SYNOPSYS PROTOTYPING DEVICE
- -M:    Jose Abreu <Jose.Abreu@synopsys.com>
+ +M:    Joao Pinto <Joao.Pinto@synopsys.com>
   L:    linux-pci@vger.kernel.org
   S:    Maintained
   F:    Documentation/devicetree/bindings/pci/designware-pcie.txt
- -F:    drivers/pci/host/pcie-designware-plat.c
+ +F:    drivers/pci/host/*designware*
   
   PCI DRIVER FOR GENERIC OF HOSTS
   M:    Will Deacon <will.deacon@arm.com>
@@@ -9478,7 -9336,7 +9479,7 @@@ PCI DRIVER FOR INTEL VOLUME MANAGEMENT 
   M:    Keith Busch <keith.busch@intel.com>
   L:    linux-pci@vger.kernel.org
   S:    Supported
- -F:    arch/x86/pci/vmd.c
+ +F:    drivers/pci/host/vmd.c
   
   PCIE DRIVER FOR ST SPEAR13XX
   M:    Pratyush Anand <pratyush.anand@gmail.com>
@@@ -9711,8 -9569,8 +9712,8 @@@ F:      arch/mips/boot/dts/pistachio
   F:      arch/mips/configs/pistachio*_defconfig
   
   PKTCDVD DRIVER
- -M:    Jiri Kosina <jikos@kernel.org>
- -S:    Maintained
+ +S:    Orphan
+ +M:    linux-block@vger.kernel.org
   F:    drivers/block/pktcdvd.c
   F:    include/linux/pktcdvd.h
   F:    include/uapi/linux/pktcdvd.h
@@@ -9765,7 -9623,6 +9766,7 @@@ POWER MANAGEMENT COR
   M:    "Rafael J. Wysocki" <rjw@rjwysocki.net>
   L:    linux-pm@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+ +B:    https://bugzilla.kernel.org
   S:    Supported
   F:    drivers/base/power/
   F:    include/linux/pm.h
@@@ -9947,7 -9804,7 +9948,7 @@@ F:      drivers/media/usb/pwc/
   
   PWM FAN DRIVER
   M:    Kamil Debski <kamil@wypas.org>
- -M:    Lukasz Majewski <l.majewski@samsung.com>
+ +M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
   L:    linux-hwmon@vger.kernel.org
   S:    Supported
   F:    Documentation/devicetree/bindings/hwmon/pwm-fan.txt
@@@ -10089,12 -9946,6 +10090,12 @@@ F:  fs/qnx4
   F:    include/uapi/linux/qnx4_fs.h
   F:    include/uapi/linux/qnxtypes.h
   
+ +QORIQ DPAA2 FSL-MC BUS DRIVER
+ +M:    Stuart Yoder <stuart.yoder@nxp.com>
+ +L:    linux-kernel@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/staging/fsl-mc/
+ +
   QT1010 MEDIA DRIVER
   M:    Antti Palosaari <crope@iki.fi>
   L:    linux-media@vger.kernel.org
@@@ -10557,7 -10408,7 +10558,7 @@@ F:   arch/s390/pci
   F:    drivers/pci/hotplug/s390_pci_hpc.c
   
   S390 ZCRYPT DRIVER
- -M:    Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
+ +M:    Harald Freudenberger <freude@de.ibm.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
   S:    Supported
@@@ -10724,7 -10575,7 +10725,7 @@@ L:   netdev@vger.kernel.or
   F:    drivers/net/ethernet/samsung/sxgbe/
   
   SAMSUNG THERMAL DRIVER
- -M:    Lukasz Majewski <l.majewski@samsung.com>
+ +M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
   L:    linux-pm@vger.kernel.org
   L:    linux-samsung-soc@vger.kernel.org
   S:    Supported
@@@ -10850,11 -10701,6 +10851,11 @@@ W: http://www.sunplus.co
   S:    Supported
   F:    arch/score/
   
+ +SCR24X CHIP CARD INTERFACE DRIVER
+ +M:    Lubomir Rintel <lkundrak@v3.sk>
+ +S:    Supported
+ +F:    drivers/char/pcmcia/scr24x_cs.c
+ +
   SYSTEM CONTROL & POWER INTERFACE (SCPI) Message Protocol drivers
   M:    Sudeep Holla <sudeep.holla@arm.com>
   L:    linux-arm-kernel@lists.infradead.org
@@@ -11258,7 -11104,7 +11259,7 @@@ F:   include/media/i2c/ov2659.
   SILICON MOTION SM712 FRAME BUFFER DRIVER
   M:    Sudip Mukherjee <sudipm.mukherjee@gmail.com>
   M:    Teddy Wang <teddy.wang@siliconmotion.com>
- -M:    Sudip Mukherjee <sudip@vectorindia.org>
+ +M:    Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
   L:    linux-fbdev@vger.kernel.org
   S:    Maintained
   F:    drivers/video/fbdev/sm712*
@@@ -11620,7 -11466,7 +11621,7 @@@ STABLE BRANC
   M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
   L:    stable@vger.kernel.org
   S:    Supported
- -F:    Documentation/stable_kernel_rules.txt
+ +F:    Documentation/process/stable-kernel-rules.rst
   
   STAGING SUBSYSTEM
   M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@@ -11686,11 -11532,17 +11687,11 @@@ F:        drivers/staging/rtl8712
   STAGING - SILICON MOTION SM750 FRAME BUFFER DRIVER
   M:    Sudip Mukherjee <sudipm.mukherjee@gmail.com>
   M:    Teddy Wang <teddy.wang@siliconmotion.com>
- -M:    Sudip Mukherjee <sudip@vectorindia.org>
+ +M:    Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
   L:    linux-fbdev@vger.kernel.org
   S:    Maintained
   F:    drivers/staging/sm750fb/
   
- -STAGING - SLICOSS
- -M:    Lior Dotan <liodot@gmail.com>
- -M:    Christopher Harrer <charrer@alacritech.com>
- -S:    Odd Fixes
- -F:    drivers/staging/slicoss/
- -
   STAGING - SPEAKUP CONSOLE SPEECH DRIVER
   M:    William Hubbs <w.d.hubbs@gmail.com>
   M:    Chris Brannon <chris@the-brannons.com>
@@@ -11760,7 -11612,6 +11761,7 @@@ M:   "Rafael J. Wysocki" <rjw@rjwysocki.n
   M:    Len Brown <len.brown@intel.com>
   M:    Pavel Machek <pavel@ucw.cz>
   L:    linux-pm@vger.kernel.org
+ +B:    https://bugzilla.kernel.org
   S:    Supported
   F:    Documentation/power/
   F:    arch/x86/kernel/acpi/
@@@ -12904,15 -12755,6 +12905,15 @@@ F: drivers/vfio
   F:    include/linux/vfio.h
   F:    include/uapi/linux/vfio.h
   
+ +VFIO MEDIATED DEVICE DRIVERS
+ +M:    Kirti Wankhede <kwankhede@nvidia.com>
+ +L:    kvm@vger.kernel.org
+ +S:    Maintained
+ +F:    Documentation/vfio-mediated-device.txt
+ +F:    drivers/vfio/mdev/
+ +F:    include/linux/mdev.h
+ +F:    samples/vfio-mdev/
+ +
   VFIO PLATFORM DRIVER
   M:    Baptiste Reynal <b.reynal@virtualopensystems.com>
   L:    kvm@vger.kernel.org
@@@ -13065,7 -12907,7 +13066,7 @@@ M:   Greg Kroah-Hartman <gregkh@linuxfoun
   L:    devel@driverdev.osuosl.org
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
- -F:    Documentation/vme_api.txt
+ +F:    Documentation/driver-api/vme.rst
   F:    drivers/staging/vme/
   F:    drivers/vme/
   F:    include/linux/vme*
@@@ -13289,7 -13131,7 +13290,7 @@@ T:   git git://git.kernel.org/pub/scm/lin
   S:    Maintained
   F:    include/linux/workqueue.h
   F:    kernel/workqueue.c
- -F:    Documentation/workqueue.txt
+ +F:    Documentation/core-api/workqueue.rst
   
   X-POWERS MULTIFUNCTION PMIC DEVICE DRIVERS
   M:    Chen-Yu Tsai <wens@csie.org>
@@@ -13354,6 -13196,7 +13355,6 @@@ F:   drivers/media/tuners/tuner-xc2028.
   
   XEN HYPERVISOR INTERFACE
   M:    Boris Ostrovsky <boris.ostrovsky@oracle.com>
- -M:    David Vrabel <david.vrabel@citrix.com>
   M:    Juergen Gross <jgross@suse.com>
   L:    xen-devel@lists.xenproject.org (moderated for non-subscribers)
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git
diff --combined fs/dax.c

index 6916ed37d4631846a7478cfa3e3dd14c8808d373,5bfd27b4a69c684346a7cd9d755178b7cbdf6228..5ae8e11ad78677ef3103fc569d0959742a380370
--- 1/fs/dax.c
--- 2/fs/dax.c
+++ b/fs/dax.c
@@@ -34,25 -34,11 +34,11 @@@
   #include <linux/iomap.h>
   #include "internal.h"
   
- /*
-  * We use lowest available bit in exceptional entry for locking, other two
-  * bits to determine entry type. In total 3 special bits.
-  */
- #define RADIX_DAX_SHIFT       (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)
- #define RADIX_DAX_PTE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
- #define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
- #define RADIX_DAX_TYPE_MASK (RADIX_DAX_PTE | RADIX_DAX_PMD)
- #define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_TYPE_MASK)
- #define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
- #define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
-               RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE) | \
-               RADIX_TREE_EXCEPTIONAL_ENTRY))
- 
   /* We choose 4096 entries - same as per-zone page wait tables */
   #define DAX_WAIT_TABLE_BITS 12
   #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
   
- wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
+ static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
   
   static int __init init_dax_wait_table(void)
   {
@@@ -64,14 -50,6 +50,6 @@@
   }
   fs_initcall(init_dax_wait_table);
   
- static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
-                                             pgoff_t index)
- {
-       unsigned long hash = hash_long((unsigned long)mapping ^ index,
-                                      DAX_WAIT_TABLE_BITS);
-       return wait_table + hash;
- }
- 
   static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
   {
         struct request_queue *q = bdev->bd_queue;
@@@ -98,209 -76,52 +76,52 @@@ static void dax_unmap_atomic(struct blo
         blk_queue_exit(bdev->bd_queue);
   }
   
- struct page *read_dax_sector(struct block_device *bdev, sector_t n)
+ static int dax_is_pmd_entry(void *entry)
   {
-       struct page *page = alloc_pages(GFP_KERNEL, 0);
-       struct blk_dax_ctl dax = {
-               .size = PAGE_SIZE,
-               .sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
-       };
-       long rc;
- 
-       if (!page)
-               return ERR_PTR(-ENOMEM);
- 
-       rc = dax_map_atomic(bdev, &dax);
-       if (rc < 0)
-               return ERR_PTR(rc);
-       memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
-       dax_unmap_atomic(bdev, &dax);
-       return page;
+       return (unsigned long)entry & RADIX_DAX_PMD;
   }
   
- static bool buffer_written(struct buffer_head *bh)
+ static int dax_is_pte_entry(void *entry)
   {
-       return buffer_mapped(bh) && !buffer_unwritten(bh);
+       return !((unsigned long)entry & RADIX_DAX_PMD);
   }
   
- /*
-  * When ext4 encounters a hole, it returns without modifying the buffer_head
-  * which means that we can't trust b_size.  To cope with this, we set b_state
-  * to 0 before calling get_block and, if any bit is set, we know we can trust
-  * b_size.  Unfortunate, really, since ext4 knows precisely how long a hole is
-  * and would save us time calling get_block repeatedly.
-  */
- static bool buffer_size_valid(struct buffer_head *bh)
+ static int dax_is_zero_entry(void *entry)
   {
-       return bh->b_state != 0;
+       return (unsigned long)entry & RADIX_DAX_HZP;
   }
   
- 
- static sector_t to_sector(const struct buffer_head *bh,
-               const struct inode *inode)
+ static int dax_is_empty_entry(void *entry)
   {
-       sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
- 
-       return sector;
+       return (unsigned long)entry & RADIX_DAX_EMPTY;
   }
   
- static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
-                     loff_t start, loff_t end, get_block_t get_block,
-                     struct buffer_head *bh)
+ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
   {
-       loff_t pos = start, max = start, bh_max = start;
-       bool hole = false;
-       struct block_device *bdev = NULL;
-       int rw = iov_iter_rw(iter), rc;
-       long map_len = 0;
+       struct page *page = alloc_pages(GFP_KERNEL, 0);
         struct blk_dax_ctl dax = {
-               .addr = ERR_PTR(-EIO),
+               .size = PAGE_SIZE,
+               .sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
         };
-       unsigned blkbits = inode->i_blkbits;
-       sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
-                                                               >> blkbits;
- 
-       if (rw == READ)
-               end = min(end, i_size_read(inode));
- 
-       while (pos < end) {
-               size_t len;
-               if (pos == max) {
-                       long page = pos >> PAGE_SHIFT;
-                       sector_t block = page << (PAGE_SHIFT - blkbits);
-                       unsigned first = pos - (block << blkbits);
-                       long size;
- 
-                       if (pos == bh_max) {
-                               bh->b_size = PAGE_ALIGN(end - pos);
-                               bh->b_state = 0;
-                               rc = get_block(inode, block, bh, rw == WRITE);
-                               if (rc)
-                                       break;
-                               if (!buffer_size_valid(bh))
-                                       bh->b_size = 1 << blkbits;
-                               bh_max = pos - first + bh->b_size;
-                               bdev = bh->b_bdev;
-                               /*
-                                * We allow uninitialized buffers for writes
-                                * beyond EOF as those cannot race with faults
-                                */
-                               WARN_ON_ONCE(
-                                       (buffer_new(bh) && block < file_blks) ||
-                                       (rw == WRITE && buffer_unwritten(bh)));
-                       } else {
-                               unsigned done = bh->b_size -
-                                               (bh_max - (pos - first));
-                               bh->b_blocknr += done >> blkbits;
-                               bh->b_size -= done;
-                       }
- 
-                       hole = rw == READ && !buffer_written(bh);
-                       if (hole) {
-                               size = bh->b_size - first;
-                       } else {
-                               dax_unmap_atomic(bdev, &dax);
-                               dax.sector = to_sector(bh, inode);
-                               dax.size = bh->b_size;
-                               map_len = dax_map_atomic(bdev, &dax);
-                               if (map_len < 0) {
-                                       rc = map_len;
-                                       break;
-                               }
-                               dax.addr += first;
-                               size = map_len - first;
-                       }
-                       /*
-                        * pos + size is one past the last offset for IO,
-                        * so pos + size can overflow loff_t at extreme offsets.
-                        * Cast to u64 to catch this and get the true minimum.
-                        */
-                       max = min_t(u64, pos + size, end);
-               }
- 
-               if (iov_iter_rw(iter) == WRITE) {
-                       len = copy_from_iter_pmem(dax.addr, max - pos, iter);
-               } else if (!hole)
-                       len = copy_to_iter((void __force *) dax.addr, max - pos,
-                                       iter);
-               else
-                       len = iov_iter_zero(max - pos, iter);
- 
-               if (!len) {
-                       rc = -EFAULT;
-                       break;
-               }
+       long rc;
   
-               pos += len;
-               if (!IS_ERR(dax.addr))
-                       dax.addr += len;
-       }
+       if (!page)
+               return ERR_PTR(-ENOMEM);
   
+       rc = dax_map_atomic(bdev, &dax);
+       if (rc < 0)
+               return ERR_PTR(rc);
+       memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
         dax_unmap_atomic(bdev, &dax);
- 
-       return (pos == start) ? rc : pos - start;
- }
- 
- /**
-  * dax_do_io - Perform I/O to a DAX file
-  * @iocb: The control block for this I/O
-  * @inode: The file which the I/O is directed at
-  * @iter: The addresses to do I/O from or to
-  * @get_block: The filesystem method used to translate file offsets to blocks
-  * @end_io: A filesystem callback for I/O completion
-  * @flags: See below
-  *
-  * This function uses the same locking scheme as do_blockdev_direct_IO:
-  * If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
-  * caller for writes.  For reads, we take and release the i_mutex ourselves.
-  * If DIO_LOCKING is not set, the filesystem takes care of its own locking.
-  * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
-  * is in progress.
-  */
- ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
-                 struct iov_iter *iter, get_block_t get_block,
-                 dio_iodone_t end_io, int flags)
- {
-       struct buffer_head bh;
-       ssize_t retval = -EINVAL;
-       loff_t pos = iocb->ki_pos;
-       loff_t end = pos + iov_iter_count(iter);
- 
-       memset(&bh, 0, sizeof(bh));
-       bh.b_bdev = inode->i_sb->s_bdev;
- 
-       if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
-               inode_lock(inode);
- 
-       /* Protects against truncate */
-       if (!(flags & DIO_SKIP_DIO_COUNT))
-               inode_dio_begin(inode);
- 
-       retval = dax_io(inode, iter, pos, end, get_block, &bh);
- 
-       if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
-               inode_unlock(inode);
- 
-       if (end_io) {
-               int err;
- 
-               err = end_io(iocb, pos, retval, bh.b_private);
-               if (err)
-                       retval = err;
-       }
- 
-       if (!(flags & DIO_SKIP_DIO_COUNT))
-               inode_dio_end(inode);
-       return retval;
+       return page;
   }
- EXPORT_SYMBOL_GPL(dax_do_io);
   
   /*
    * DAX radix tree locking
    */
   struct exceptional_entry_key {
         struct address_space *mapping;
-       unsigned long index;
+       pgoff_t entry_start;
   };
   
   struct wait_exceptional_entry_queue {
@@@ -308,6 -129,26 +129,26 @@@
         struct exceptional_entry_key key;
   };
   
+ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
+               pgoff_t index, void *entry, struct exceptional_entry_key *key)
+ {
+       unsigned long hash;
+ 
+       /*
+        * If 'entry' is a PMD, align the 'index' that we use for the wait
+        * queue to the start of that PMD.  This ensures that all offsets in
+        * the range covered by the PMD map to the same bit lock.
+        */
+       if (dax_is_pmd_entry(entry))
+               index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
+ 
+       key->mapping = mapping;
+       key->entry_start = index;
+ 
+       hash = hash_long((unsigned long)mapping ^ index, DAX_WAIT_TABLE_BITS);
+       return wait_table + hash;
+ }
+ 
   static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
                                        int sync, void *keyp)
   {
@@@ -316,7 -157,7 +157,7 @@@
                 container_of(wait, struct wait_exceptional_entry_queue, wait);
   
         if (key->mapping != ewait->key.mapping ||
-           key->index != ewait->key.index)
+           key->entry_start != ewait->key.entry_start)
                 return 0;
         return autoremove_wake_function(wait, mode, sync, NULL);
   }
@@@ -342,7 -183,7 +183,7 @@@ static inline void *lock_slot(struct ad
                 radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
   
         entry |= RADIX_DAX_ENTRY_LOCK;
- -      radix_tree_replace_slot(slot, (void *)entry);
+ +      radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
         return (void *)entry;
   }
   
@@@ -356,7 -197,7 +197,7 @@@ static inline void *unlock_slot(struct 
                 radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
   
         entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
- -      radix_tree_replace_slot(slot, (void *)entry);
+ +      radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
         return (void *)entry;
   }
   
@@@ -372,24 -213,24 +213,24 @@@
   static void *get_unlocked_mapping_entry(struct address_space *mapping,
                                         pgoff_t index, void ***slotp)
   {
-       void *ret, **slot;
+       void *entry, **slot;
         struct wait_exceptional_entry_queue ewait;
-       wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+       wait_queue_head_t *wq;
   
         init_wait(&ewait.wait);
         ewait.wait.func = wake_exceptional_entry_func;
-       ewait.key.mapping = mapping;
-       ewait.key.index = index;
   
         for (;;) {
-               ret = __radix_tree_lookup(&mapping->page_tree, index, NULL,
+               entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
                                           &slot);
-               if (!ret || !radix_tree_exceptional_entry(ret) ||
+               if (!entry || !radix_tree_exceptional_entry(entry) ||
                     !slot_locked(mapping, slot)) {
                         if (slotp)
                                 *slotp = slot;
-                       return ret;
+                       return entry;
                 }
+ 
+               wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
                 prepare_to_wait_exclusive(wq, &ewait.wait,
                                           TASK_UNINTERRUPTIBLE);
                 spin_unlock_irq(&mapping->tree_lock);
@@@ -399,52 -240,156 +240,156 @@@
         }
   }
   
+ static void put_locked_mapping_entry(struct address_space *mapping,
+                                    pgoff_t index, void *entry)
+ {
+       if (!radix_tree_exceptional_entry(entry)) {
+               unlock_page(entry);
+               put_page(entry);
+       } else {
+               dax_unlock_mapping_entry(mapping, index);
+       }
+ }
+ 
+ /*
+  * Called when we are done with radix tree entry we looked up via
+  * get_unlocked_mapping_entry() and which we didn't lock in the end.
+  */
+ static void put_unlocked_mapping_entry(struct address_space *mapping,
+                                      pgoff_t index, void *entry)
+ {
+       if (!radix_tree_exceptional_entry(entry))
+               return;
+ 
+       /* We have to wake up next waiter for the radix tree entry lock */
+       dax_wake_mapping_entry_waiter(mapping, index, entry, false);
+ }
+ 
   /*
    * Find radix tree entry at given index. If it points to a page, return with
    * the page locked. If it points to the exceptional entry, return with the
    * radix tree entry locked. If the radix tree doesn't contain given index,
    * create empty exceptional entry for the index and return with it locked.
    *
+  * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will
+  * either return that locked entry or will return an error.  This error will
+  * happen if there are any 4k entries (either zero pages or DAX entries)
+  * within the 2MiB range that we are requesting.
+  *
+  * We always favor 4k entries over 2MiB entries. There isn't a flow where we
+  * evict 4k entries in order to 'upgrade' them to a 2MiB entry.  A 2MiB
+  * insertion will fail if it finds any 4k entries already in the tree, and a
+  * 4k insertion will cause an existing 2MiB entry to be unmapped and
+  * downgraded to 4k entries.  This happens for both 2MiB huge zero pages as
+  * well as 2MiB empty entries.
+  *
+  * The exception to this downgrade path is for 2MiB DAX PMD entries that have
+  * real storage backing them.  We will leave these real 2MiB DAX entries in
+  * the tree, and PTE writes will simply dirty the entire 2MiB DAX entry.
+  *
    * Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For
    * persistent memory the benefit is doubtful. We can add that later if we can
    * show it helps.
    */
- static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index)
+ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
+               unsigned long size_flag)
   {
-       void *ret, **slot;
+       bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
+       void *entry, **slot;
   
   restart:
         spin_lock_irq(&mapping->tree_lock);
-       ret = get_unlocked_mapping_entry(mapping, index, &slot);
+       entry = get_unlocked_mapping_entry(mapping, index, &slot);
+ 
+       if (entry) {
+               if (size_flag & RADIX_DAX_PMD) {
+                       if (!radix_tree_exceptional_entry(entry) ||
+                           dax_is_pte_entry(entry)) {
+                               put_unlocked_mapping_entry(mapping, index,
+                                               entry);
+                               entry = ERR_PTR(-EEXIST);
+                               goto out_unlock;
+                       }
+               } else { /* trying to grab a PTE entry */
+                       if (radix_tree_exceptional_entry(entry) &&
+                           dax_is_pmd_entry(entry) &&
+                           (dax_is_zero_entry(entry) ||
+                            dax_is_empty_entry(entry))) {
+                               pmd_downgrade = true;
+                       }
+               }
+       }
+ 
         /* No entry for given index? Make sure radix tree is big enough. */
-       if (!ret) {
+       if (!entry || pmd_downgrade) {
                 int err;
   
+               if (pmd_downgrade) {
+                       /*
+                        * Make sure 'entry' remains valid while we drop
+                        * mapping->tree_lock.
+                        */
+                       entry = lock_slot(mapping, slot);
+               }
+ 
                 spin_unlock_irq(&mapping->tree_lock);
+               /*
+                * Besides huge zero pages the only other thing that gets
+                * downgraded are empty entries which don't need to be
+                * unmapped.
+                */
+               if (pmd_downgrade && dax_is_zero_entry(entry))
+                       unmap_mapping_range(mapping,
+                               (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
+ 
                 err = radix_tree_preload(
                                 mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
-               if (err)
+               if (err) {
+                       if (pmd_downgrade)
+                               put_locked_mapping_entry(mapping, index, entry);
                         return ERR_PTR(err);
-               ret = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
-                              RADIX_DAX_ENTRY_LOCK);
+               }
                 spin_lock_irq(&mapping->tree_lock);
-               err = radix_tree_insert(&mapping->page_tree, index, ret);
+ 
+               if (pmd_downgrade) {
+                       radix_tree_delete(&mapping->page_tree, index);
+                       mapping->nrexceptional--;
+                       dax_wake_mapping_entry_waiter(mapping, index, entry,
+                                       true);
+               }
+ 
+               entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
+ 
+               err = __radix_tree_insert(&mapping->page_tree, index,
+                               dax_radix_order(entry), entry);
                 radix_tree_preload_end();
                 if (err) {
                         spin_unlock_irq(&mapping->tree_lock);
-                       /* Someone already created the entry? */
-                       if (err == -EEXIST)
+                       /*
+                        * Someone already created the entry?  This is a
+                        * normal failure when inserting PMDs in a range
+                        * that already contains PTEs.  In that case we want
+                        * to return -EEXIST immediately.
+                        */
+                       if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
                                 goto restart;
+                       /*
+                        * Our insertion of a DAX PMD entry failed, most
+                        * likely because it collided with a PTE sized entry
+                        * at a different index in the PMD range.  We haven't
+                        * inserted anything into the radix tree and have no
+                        * waiters to wake.
+                        */
                         return ERR_PTR(err);
                 }
                 /* Good, we have inserted empty locked entry into the tree. */
                 mapping->nrexceptional++;
                 spin_unlock_irq(&mapping->tree_lock);
-               return ret;
+               return entry;
         }
         /* Normal page in radix tree? */
-       if (!radix_tree_exceptional_entry(ret)) {
-               struct page *page = ret;
+       if (!radix_tree_exceptional_entry(entry)) {
+               struct page *page = entry;
   
                 get_page(page);
                 spin_unlock_irq(&mapping->tree_lock);
@@@ -457,15 -402,26 +402,26 @@@
                 }
                 return page;
         }
-       ret = lock_slot(mapping, slot);
+       entry = lock_slot(mapping, slot);
+  out_unlock:
         spin_unlock_irq(&mapping->tree_lock);
-       return ret;
+       return entry;
   }
   
+ /*
+  * We do not necessarily hold the mapping->tree_lock when we call this
+  * function so it is possible that 'entry' is no longer a valid item in the
+  * radix tree.  This is okay because all we really need to do is to find the
+  * correct waitqueue where tasks might be waiting for that old 'entry' and
+  * wake them.
+  */
   void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-                                  pgoff_t index, bool wake_all)
+               pgoff_t index, void *entry, bool wake_all)
   {
-       wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+       struct exceptional_entry_key key;
+       wait_queue_head_t *wq;
+ 
+       wq = dax_entry_waitqueue(mapping, index, entry, &key);
   
         /*
          * Checking for locked entry and prepare_to_wait_exclusive() happens
@@@ -473,54 -429,24 +429,24 @@@
          * So at this point all tasks that could have seen our entry locked
          * must be in the waitqueue and the following check will see them.
          */
-       if (waitqueue_active(wq)) {
-               struct exceptional_entry_key key;
- 
-               key.mapping = mapping;
-               key.index = index;
+       if (waitqueue_active(wq))
                 __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
-       }
   }
   
   void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
   {
-       void *ret, **slot;
+       void *entry, **slot;
   
         spin_lock_irq(&mapping->tree_lock);
-       ret = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
-       if (WARN_ON_ONCE(!ret || !radix_tree_exceptional_entry(ret) ||
+       entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
+       if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
                          !slot_locked(mapping, slot))) {
                 spin_unlock_irq(&mapping->tree_lock);
                 return;
         }
         unlock_slot(mapping, slot);
         spin_unlock_irq(&mapping->tree_lock);
-       dax_wake_mapping_entry_waiter(mapping, index, false);
- }
- 
- static void put_locked_mapping_entry(struct address_space *mapping,
-                                    pgoff_t index, void *entry)
- {
-       if (!radix_tree_exceptional_entry(entry)) {
-               unlock_page(entry);
-               put_page(entry);
-       } else {
-               dax_unlock_mapping_entry(mapping, index);
-       }
- }
- 
- /*
-  * Called when we are done with radix tree entry we looked up via
-  * get_unlocked_mapping_entry() and which we didn't lock in the end.
-  */
- static void put_unlocked_mapping_entry(struct address_space *mapping,
-                                      pgoff_t index, void *entry)
- {
-       if (!radix_tree_exceptional_entry(entry))
-               return;
- 
-       /* We have to wake up next waiter for the radix tree entry lock */
-       dax_wake_mapping_entry_waiter(mapping, index, false);
+       dax_wake_mapping_entry_waiter(mapping, index, entry, false);
   }
   
   /*
@@@ -547,7 -473,7 +473,7 @@@ int dax_delete_mapping_entry(struct add
         radix_tree_delete(&mapping->page_tree, index);
         mapping->nrexceptional--;
         spin_unlock_irq(&mapping->tree_lock);
-       dax_wake_mapping_entry_waiter(mapping, index, true);
+       dax_wake_mapping_entry_waiter(mapping, index, entry, true);
   
         return 1;
   }
@@@ -600,11 -526,17 +526,17 @@@ static int copy_user_dax(struct block_d
         return 0;
   }
   
- #define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
- 
+ /*
+  * By this point grab_mapping_entry() has ensured that we have a locked entry
+  * of the appropriate size so we don't have to worry about downgrading PMDs to
+  * PTEs.  If we happen to be trying to insert a PTE and there is a PMD
+  * already in the tree, we will skip the insertion and just dirty the PMD as
+  * appropriate.
+  */
   static void *dax_insert_mapping_entry(struct address_space *mapping,
                                       struct vm_fault *vmf,
-                                     void *entry, sector_t sector)
+                                     void *entry, sector_t sector,
+                                     unsigned long flags)
   {
         struct radix_tree_root *page_tree = &mapping->page_tree;
         int error = 0;
@@@ -627,30 -559,41 +559,43 @@@
                 error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
                 if (error)
                         return ERR_PTR(error);
+       } else if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_HZP)) {
+               /* replacing huge zero page with PMD block mapping */
+               unmap_mapping_range(mapping,
+                       (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
         }
   
         spin_lock_irq(&mapping->tree_lock);
-       new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) |
-                      RADIX_DAX_ENTRY_LOCK);
+       new_entry = dax_radix_locked_entry(sector, flags);
+ 
         if (hole_fill) {
                 __delete_from_page_cache(entry, NULL);
                 /* Drop pagecache reference */
                 put_page(entry);
-               error = radix_tree_insert(page_tree, index, new_entry);
+               error = __radix_tree_insert(page_tree, index,
+                               dax_radix_order(new_entry), new_entry);
                 if (error) {
                         new_entry = ERR_PTR(error);
                         goto unlock;
                 }
                 mapping->nrexceptional++;
-       } else {
+       } else if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+               /*
+                * Only swap our new entry into the radix tree if the current
+                * entry is a zero page or an empty entry.  If a normal PTE or
+                * PMD entry is already in the tree, we leave it alone.  This
+                * means that if we are trying to insert a PTE and the
+                * existing entry is a PMD, we will just leave the PMD in the
+                * tree and dirty it if necessary.
+                */
+ +              struct radix_tree_node *node;
                 void **slot;
                 void *ret;
   
- -              ret = __radix_tree_lookup(page_tree, index, NULL, &slot);
+ +              ret = __radix_tree_lookup(page_tree, index, &node, &slot);
                 WARN_ON_ONCE(ret != entry);
- -              radix_tree_replace_slot(slot, new_entry);
+ +              __radix_tree_replace(page_tree, node, slot,
+ +                                   new_entry, NULL, NULL);
         }
         if (vmf->flags & FAULT_FLAG_WRITE)
                 radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
@@@ -674,7 -617,6 +619,6 @@@ static int dax_writeback_one(struct blo
                 struct address_space *mapping, pgoff_t index, void *entry)
   {
         struct radix_tree_root *page_tree = &mapping->page_tree;
-       int type = RADIX_DAX_TYPE(entry);
         struct radix_tree_node *node;
         struct blk_dax_ctl dax;
         void **slot;
@@@ -695,13 -637,21 +639,21 @@@
         if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
                 goto unlock;
   
-       if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+       if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
+                               dax_is_zero_entry(entry))) {
                 ret = -EIO;
                 goto unlock;
         }
   
-       dax.sector = RADIX_DAX_SECTOR(entry);
-       dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+       /*
+        * Even if dax_writeback_mapping_range() was given a wbc->range_start
+        * in the middle of a PMD, the 'index' we are given will be aligned to
+        * the start index of the PMD, as will the sector we pull from
+        * 'entry'.  This allows us to flush for PMD_SIZE and not have to
+        * worry about partial PMD writebacks.
+        */
+       dax.sector = dax_radix_sector(entry);
+       dax.size = PAGE_SIZE << dax_radix_order(entry);
         spin_unlock_irq(&mapping->tree_lock);
   
         /*
@@@ -740,12 -690,11 +692,11 @@@ int dax_writeback_mapping_range(struct 
                 struct block_device *bdev, struct writeback_control *wbc)
   {
         struct inode *inode = mapping->host;
-       pgoff_t start_index, end_index, pmd_index;
+       pgoff_t start_index, end_index;
         pgoff_t indices[PAGEVEC_SIZE];
         struct pagevec pvec;
         bool done = false;
         int i, ret = 0;
-       void *entry;
   
         if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
                 return -EIO;
@@@ -755,15 -704,6 +706,6 @@@
   
         start_index = wbc->range_start >> PAGE_SHIFT;
         end_index = wbc->range_end >> PAGE_SHIFT;
-       pmd_index = DAX_PMD_INDEX(start_index);
- 
-       rcu_read_lock();
-       entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
-       rcu_read_unlock();
- 
-       /* see if the start of our range is covered by a PMD entry */
-       if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
-               start_index = pmd_index;
   
         tag_pages_for_writeback(mapping, start_index, end_index);
   
@@@ -808,7 -748,7 +750,7 @@@ static int dax_insert_mapping(struct ad
                 return PTR_ERR(dax.addr);
         dax_unmap_atomic(bdev, &dax);
   
-       ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector);
+       ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector, 0);
         if (IS_ERR(ret))
                 return PTR_ERR(ret);
         *entryp = ret;
@@@ -816,323 -756,6 +758,6 @@@
         return vm_insert_mixed(vma, vaddr, dax.pfn);
   }
   
- /**
-  * dax_fault - handle a page fault on a DAX file
-  * @vma: The virtual memory area where the fault occurred
-  * @vmf: The description of the fault
-  * @get_block: The filesystem method used to translate file offsets to blocks
-  *
-  * When a page fault occurs, filesystems may call this helper in their
-  * fault handler for DAX files. dax_fault() assumes the caller has done all
-  * the necessary locking for the page fault to proceed successfully.
-  */
- int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-                       get_block_t get_block)
- {
-       struct file *file = vma->vm_file;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       void *entry;
-       struct buffer_head bh;
-       unsigned long vaddr = (unsigned long)vmf->virtual_address;
-       unsigned blkbits = inode->i_blkbits;
-       sector_t block;
-       pgoff_t size;
-       int error;
-       int major = 0;
- 
-       /*
-        * Check whether offset isn't beyond end of file now. Caller is supposed
-        * to hold locks serializing us with truncate / punch hole so this is
-        * a reliable test.
-        */
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (vmf->pgoff >= size)
-               return VM_FAULT_SIGBUS;
- 
-       memset(&bh, 0, sizeof(bh));
-       block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
-       bh.b_bdev = inode->i_sb->s_bdev;
-       bh.b_size = PAGE_SIZE;
- 
-       entry = grab_mapping_entry(mapping, vmf->pgoff);
-       if (IS_ERR(entry)) {
-               error = PTR_ERR(entry);
-               goto out;
-       }
- 
-       error = get_block(inode, block, &bh, 0);
-       if (!error && (bh.b_size < PAGE_SIZE))
-               error = -EIO;           /* fs corruption? */
-       if (error)
-               goto unlock_entry;
- 
-       if (vmf->cow_page) {
-               struct page *new_page = vmf->cow_page;
-               if (buffer_written(&bh))
-                       error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
-                                       bh.b_size, new_page, vaddr);
-               else
-                       clear_user_highpage(new_page, vaddr);
-               if (error)
-                       goto unlock_entry;
-               if (!radix_tree_exceptional_entry(entry)) {
-                       vmf->page = entry;
-                       return VM_FAULT_LOCKED;
-               }
-               vmf->entry = entry;
-               return VM_FAULT_DAX_LOCKED;
-       }
- 
-       if (!buffer_mapped(&bh)) {
-               if (vmf->flags & FAULT_FLAG_WRITE) {
-                       error = get_block(inode, block, &bh, 1);
-                       count_vm_event(PGMAJFAULT);
-                       mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
-                       major = VM_FAULT_MAJOR;
-                       if (!error && (bh.b_size < PAGE_SIZE))
-                               error = -EIO;
-                       if (error)
-                               goto unlock_entry;
-               } else {
-                       return dax_load_hole(mapping, entry, vmf);
-               }
-       }
- 
-       /* Filesystem should not return unwritten buffers to us! */
-       WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-       error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
-                       bh.b_size, &entry, vma, vmf);
-  unlock_entry:
-       put_locked_mapping_entry(mapping, vmf->pgoff, entry);
-  out:
-       if (error == -ENOMEM)
-               return VM_FAULT_OOM | major;
-       /* -EBUSY is fine, somebody else faulted on the same PTE */
-       if ((error < 0) && (error != -EBUSY))
-               return VM_FAULT_SIGBUS | major;
-       return VM_FAULT_NOPAGE | major;
- }
- EXPORT_SYMBOL_GPL(dax_fault);
- 
- #if defined(CONFIG_TRANSPARENT_HUGEPAGE)
- /*
-  * The 'colour' (ie low bits) within a PMD of a page offset.  This comes up
-  * more often than one might expect in the below function.
-  */
- #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
- 
- static void __dax_dbg(struct buffer_head *bh, unsigned long address,
-               const char *reason, const char *fn)
- {
-       if (bh) {
-               char bname[BDEVNAME_SIZE];
-               bdevname(bh->b_bdev, bname);
-               pr_debug("%s: %s addr: %lx dev %s state %lx start %lld "
-                       "length %zd fallback: %s\n", fn, current->comm,
-                       address, bname, bh->b_state, (u64)bh->b_blocknr,
-                       bh->b_size, reason);
-       } else {
-               pr_debug("%s: %s addr: %lx fallback: %s\n", fn,
-                       current->comm, address, reason);
-       }
- }
- 
- #define dax_pmd_dbg(bh, address, reason)      __dax_dbg(bh, address, reason, "dax_pmd")
- 
- /**
-  * dax_pmd_fault - handle a PMD fault on a DAX file
-  * @vma: The virtual memory area where the fault occurred
-  * @vmf: The description of the fault
-  * @get_block: The filesystem method used to translate file offsets to blocks
-  *
-  * When a page fault occurs, filesystems may call this helper in their
-  * pmd_fault handler for DAX files.
-  */
- int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
-               pmd_t *pmd, unsigned int flags, get_block_t get_block)
- {
-       struct file *file = vma->vm_file;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       struct buffer_head bh;
-       unsigned blkbits = inode->i_blkbits;
-       unsigned long pmd_addr = address & PMD_MASK;
-       bool write = flags & FAULT_FLAG_WRITE;
-       struct block_device *bdev;
-       pgoff_t size, pgoff;
-       sector_t block;
-       int result = 0;
-       bool alloc = false;
- 
-       /* dax pmd mappings require pfn_t_devmap() */
-       if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
-               return VM_FAULT_FALLBACK;
- 
-       /* Fall back to PTEs if we're going to COW */
-       if (write && !(vma->vm_flags & VM_SHARED)) {
-               split_huge_pmd(vma, pmd, address);
-               dax_pmd_dbg(NULL, address, "cow write");
-               return VM_FAULT_FALLBACK;
-       }
-       /* If the PMD would extend outside the VMA */
-       if (pmd_addr < vma->vm_start) {
-               dax_pmd_dbg(NULL, address, "vma start unaligned");
-               return VM_FAULT_FALLBACK;
-       }
-       if ((pmd_addr + PMD_SIZE) > vma->vm_end) {
-               dax_pmd_dbg(NULL, address, "vma end unaligned");
-               return VM_FAULT_FALLBACK;
-       }
- 
-       pgoff = linear_page_index(vma, pmd_addr);
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (pgoff >= size)
-               return VM_FAULT_SIGBUS;
-       /* If the PMD would cover blocks out of the file */
-       if ((pgoff | PG_PMD_COLOUR) >= size) {
-               dax_pmd_dbg(NULL, address,
-                               "offset + huge page size > file size");
-               return VM_FAULT_FALLBACK;
-       }
- 
-       memset(&bh, 0, sizeof(bh));
-       bh.b_bdev = inode->i_sb->s_bdev;
-       block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
- 
-       bh.b_size = PMD_SIZE;
- 
-       if (get_block(inode, block, &bh, 0) != 0)
-               return VM_FAULT_SIGBUS;
- 
-       if (!buffer_mapped(&bh) && write) {
-               if (get_block(inode, block, &bh, 1) != 0)
-                       return VM_FAULT_SIGBUS;
-               alloc = true;
-               WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
-       }
- 
-       bdev = bh.b_bdev;
- 
-       /*
-        * If the filesystem isn't willing to tell us the length of a hole,
-        * just fall back to PTEs.  Calling get_block 512 times in a loop
-        * would be silly.
-        */
-       if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
-               dax_pmd_dbg(&bh, address, "allocated block too small");
-               return VM_FAULT_FALLBACK;
-       }
- 
-       /*
-        * If we allocated new storage, make sure no process has any
-        * zero pages covering this hole
-        */
-       if (alloc) {
-               loff_t lstart = pgoff << PAGE_SHIFT;
-               loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
- 
-               truncate_pagecache_range(inode, lstart, lend);
-       }
- 
-       if (!write && !buffer_mapped(&bh)) {
-               spinlock_t *ptl;
-               pmd_t entry;
-               struct page *zero_page = mm_get_huge_zero_page(vma->vm_mm);
- 
-               if (unlikely(!zero_page)) {
-                       dax_pmd_dbg(&bh, address, "no zero page");
-                       goto fallback;
-               }
- 
-               ptl = pmd_lock(vma->vm_mm, pmd);
-               if (!pmd_none(*pmd)) {
-                       spin_unlock(ptl);
-                       dax_pmd_dbg(&bh, address, "pmd already present");
-                       goto fallback;
-               }
- 
-               dev_dbg(part_to_dev(bdev->bd_part),
-                               "%s: %s addr: %lx pfn: <zero> sect: %llx\n",
-                               __func__, current->comm, address,
-                               (unsigned long long) to_sector(&bh, inode));
- 
-               entry = mk_pmd(zero_page, vma->vm_page_prot);
-               entry = pmd_mkhuge(entry);
-               set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry);
-               result = VM_FAULT_NOPAGE;
-               spin_unlock(ptl);
-       } else {
-               struct blk_dax_ctl dax = {
-                       .sector = to_sector(&bh, inode),
-                       .size = PMD_SIZE,
-               };
-               long length = dax_map_atomic(bdev, &dax);
- 
-               if (length < 0) {
-                       dax_pmd_dbg(&bh, address, "dax-error fallback");
-                       goto fallback;
-               }
-               if (length < PMD_SIZE) {
-                       dax_pmd_dbg(&bh, address, "dax-length too small");
-                       dax_unmap_atomic(bdev, &dax);
-                       goto fallback;
-               }
-               if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) {
-                       dax_pmd_dbg(&bh, address, "pfn unaligned");
-                       dax_unmap_atomic(bdev, &dax);
-                       goto fallback;
-               }
- 
-               if (!pfn_t_devmap(dax.pfn)) {
-                       dax_unmap_atomic(bdev, &dax);
-                       dax_pmd_dbg(&bh, address, "pfn not in memmap");
-                       goto fallback;
-               }
-               dax_unmap_atomic(bdev, &dax);
- 
-               /*
-                * For PTE faults we insert a radix tree entry for reads, and
-                * leave it clean.  Then on the first write we dirty the radix
-                * tree entry via the dax_pfn_mkwrite() path.  This sequence
-                * allows the dax_pfn_mkwrite() call to be simpler and avoid a
-                * call into get_block() to translate the pgoff to a sector in
-                * order to be able to create a new radix tree entry.
-                *
-                * The PMD path doesn't have an equivalent to
-                * dax_pfn_mkwrite(), though, so for a read followed by a
-                * write we traverse all the way through dax_pmd_fault()
-                * twice.  This means we can just skip inserting a radix tree
-                * entry completely on the initial read and just wait until
-                * the write to insert a dirty entry.
-                */
-               if (write) {
-                       /*
-                        * We should insert radix-tree entry and dirty it here.
-                        * For now this is broken...
-                        */
-               }
- 
-               dev_dbg(part_to_dev(bdev->bd_part),
-                               "%s: %s addr: %lx pfn: %lx sect: %llx\n",
-                               __func__, current->comm, address,
-                               pfn_t_to_pfn(dax.pfn),
-                               (unsigned long long) dax.sector);
-               result |= vmf_insert_pfn_pmd(vma, address, pmd,
-                               dax.pfn, write);
-       }
- 
-  out:
-       return result;
- 
-  fallback:
-       count_vm_event(THP_FAULT_FALLBACK);
-       result = VM_FAULT_FALLBACK;
-       goto out;
- }
- EXPORT_SYMBOL_GPL(dax_pmd_fault);
- #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- 
   /**
    * dax_pfn_mkwrite - handle first write to DAX page
    * @vma: The virtual memory area where the fault occurred
@@@ -1193,62 -816,14 +818,14 @@@ int __dax_zero_page_range(struct block_
   }
   EXPORT_SYMBOL_GPL(__dax_zero_page_range);
   
- /**
-  * dax_zero_page_range - zero a range within a page of a DAX file
-  * @inode: The file being truncated
-  * @from: The file offset that is being truncated to
-  * @length: The number of bytes to zero
-  * @get_block: The filesystem method used to translate file offsets to blocks
-  *
-  * This function can be called by a filesystem when it is zeroing part of a
-  * page in a DAX file.  This is intended for hole-punch operations.  If
-  * you are truncating a file, the helper function dax_truncate_page() may be
-  * more convenient.
-  */
- int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
-                                                       get_block_t get_block)
- {
-       struct buffer_head bh;
-       pgoff_t index = from >> PAGE_SHIFT;
-       unsigned offset = from & (PAGE_SIZE-1);
-       int err;
- 
-       /* Block boundary? Nothing to do */
-       if (!length)
-               return 0;
-       BUG_ON((offset + length) > PAGE_SIZE);
- 
-       memset(&bh, 0, sizeof(bh));
-       bh.b_bdev = inode->i_sb->s_bdev;
-       bh.b_size = PAGE_SIZE;
-       err = get_block(inode, index, &bh, 0);
-       if (err < 0 || !buffer_written(&bh))
-               return err;
- 
-       return __dax_zero_page_range(bh.b_bdev, to_sector(&bh, inode),
-                       offset, length);
- }
- EXPORT_SYMBOL_GPL(dax_zero_page_range);
- 
- /**
-  * dax_truncate_page - handle a partial page being truncated in a DAX file
-  * @inode: The file being truncated
-  * @from: The file offset that is being truncated to
-  * @get_block: The filesystem method used to translate file offsets to blocks
-  *
-  * Similar to block_truncate_page(), this function can be called by a
-  * filesystem when it is truncating a DAX file to handle the partial page.
-  */
- int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
+ #ifdef CONFIG_FS_IOMAP
+ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
   {
-       unsigned length = PAGE_ALIGN(from) - from;
-       return dax_zero_page_range(inode, from, length, get_block);
+       return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
   }
- EXPORT_SYMBOL_GPL(dax_truncate_page);
   
- #ifdef CONFIG_FS_IOMAP
   static loff_t
- iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
                 struct iomap *iomap)
   {
         struct iov_iter *iter = data;
@@@ -1272,8 -847,7 +849,7 @@@
                 struct blk_dax_ctl dax = { 0 };
                 ssize_t map_len;
   
-               dax.sector = iomap->blkno +
-                       (((pos & PAGE_MASK) - iomap->offset) >> 9);
+               dax.sector = dax_iomap_sector(iomap, pos);
                 dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
                 map_len = dax_map_atomic(iomap->bdev, &dax);
                 if (map_len < 0) {
@@@ -1305,7 -879,7 +881,7 @@@
   }
   
   /**
-  * iomap_dax_rw - Perform I/O to a DAX file
+  * dax_iomap_rw - Perform I/O to a DAX file
    * @iocb:     The control block for this I/O
    * @iter:     The addresses to do I/O from or to
    * @ops:      iomap ops passed from the file system
@@@ -1315,7 -889,7 +891,7 @@@
    * and evicting any page cache pages in the region under I/O.
    */
   ssize_t
- iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
                 struct iomap_ops *ops)
   {
         struct address_space *mapping = iocb->ki_filp->f_mapping;
@@@ -1345,7 -919,7 +921,7 @@@
   
         while (iov_iter_count(iter)) {
                 ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
-                               iter, iomap_dax_actor);
+                               iter, dax_iomap_actor);
                 if (ret <= 0)
                         break;
                 pos += ret;
@@@ -1355,10 -929,10 +931,10 @@@
         iocb->ki_pos += done;
         return done ? done : ret;
   }
- EXPORT_SYMBOL_GPL(iomap_dax_rw);
+ EXPORT_SYMBOL_GPL(dax_iomap_rw);
   
   /**
-  * iomap_dax_fault - handle a page fault on a DAX file
+  * dax_iomap_fault - handle a page fault on a DAX file
    * @vma: The virtual memory area where the fault occurred
    * @vmf: The description of the fault
    * @ops: iomap ops passed from the file system
@@@ -1367,7 -941,7 +943,7 @@@
    * or mkwrite handler for DAX files. Assumes the caller has done all the
    * necessary locking for the page fault to proceed successfully.
    */
- int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                         struct iomap_ops *ops)
   {
         struct address_space *mapping = vma->vm_file->f_mapping;
@@@ -1376,8 -950,9 +952,9 @@@
         loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
         sector_t sector;
         struct iomap iomap = { 0 };
-       unsigned flags = 0;
+       unsigned flags = IOMAP_FAULT;
         int error, major = 0;
+       int locked_status = 0;
         void *entry;
   
         /*
@@@ -1388,7 -963,7 +965,7 @@@
         if (pos >= i_size_read(inode))
                 return VM_FAULT_SIGBUS;
   
-       entry = grab_mapping_entry(mapping, vmf->pgoff);
+       entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
         if (IS_ERR(entry)) {
                 error = PTR_ERR(entry);
                 goto out;
@@@ -1407,10 -982,10 +984,10 @@@
                 goto unlock_entry;
         if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
                 error = -EIO;           /* fs corruption? */
-               goto unlock_entry;
+               goto finish_iomap;
         }
   
-       sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+       sector = dax_iomap_sector(&iomap, pos);
   
         if (vmf->cow_page) {
                 switch (iomap.type) {
@@@ -1429,13 -1004,15 +1006,15 @@@
                 }
   
                 if (error)
-                       goto unlock_entry;
+                       goto finish_iomap;
                 if (!radix_tree_exceptional_entry(entry)) {
                         vmf->page = entry;
-                       return VM_FAULT_LOCKED;
+                       locked_status = VM_FAULT_LOCKED;
+               } else {
+                       vmf->entry = entry;
+                       locked_status = VM_FAULT_DAX_LOCKED;
                 }
-               vmf->entry = entry;
-               return VM_FAULT_DAX_LOCKED;
+               goto finish_iomap;
         }
   
         switch (iomap.type) {
@@@ -1450,8 -1027,10 +1029,10 @@@
                 break;
         case IOMAP_UNWRITTEN:
         case IOMAP_HOLE:
-               if (!(vmf->flags & FAULT_FLAG_WRITE))
-                       return dax_load_hole(mapping, entry, vmf);
+               if (!(vmf->flags & FAULT_FLAG_WRITE)) {
+                       locked_status = dax_load_hole(mapping, entry, vmf);
+                       break;
+               }
                 /*FALLTHRU*/
         default:
                 WARN_ON_ONCE(1);
@@@ -1459,15 -1038,218 +1040,218 @@@
                 break;
         }
   
+  finish_iomap:
+       if (ops->iomap_end) {
+               if (error) {
+                       /* keep previous error */
+                       ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags,
+                                       &iomap);
+               } else {
+                       error = ops->iomap_end(inode, pos, PAGE_SIZE,
+                                       PAGE_SIZE, flags, &iomap);
+               }
+       }
    unlock_entry:
-       put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+       if (!locked_status || error)
+               put_locked_mapping_entry(mapping, vmf->pgoff, entry);
    out:
         if (error == -ENOMEM)
                 return VM_FAULT_OOM | major;
         /* -EBUSY is fine, somebody else faulted on the same PTE */
         if (error < 0 && error != -EBUSY)
                 return VM_FAULT_SIGBUS | major;
+       if (locked_status) {
+               WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
+               return locked_status;
+       }
         return VM_FAULT_NOPAGE | major;
   }
- EXPORT_SYMBOL_GPL(iomap_dax_fault);
+ EXPORT_SYMBOL_GPL(dax_iomap_fault);
+ 
+ #ifdef CONFIG_FS_DAX_PMD
+ /*
+  * The 'colour' (ie low bits) within a PMD of a page offset.  This comes up
+  * more often than one might expect in the below functions.
+  */
+ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
+ 
+ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
+               struct vm_fault *vmf, unsigned long address,
+               struct iomap *iomap, loff_t pos, bool write, void **entryp)
+ {
+       struct address_space *mapping = vma->vm_file->f_mapping;
+       struct block_device *bdev = iomap->bdev;
+       struct blk_dax_ctl dax = {
+               .sector = dax_iomap_sector(iomap, pos),
+               .size = PMD_SIZE,
+       };
+       long length = dax_map_atomic(bdev, &dax);
+       void *ret;
+ 
+       if (length < 0) /* dax_map_atomic() failed */
+               return VM_FAULT_FALLBACK;
+       if (length < PMD_SIZE)
+               goto unmap_fallback;
+       if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
+               goto unmap_fallback;
+       if (!pfn_t_devmap(dax.pfn))
+               goto unmap_fallback;
+ 
+       dax_unmap_atomic(bdev, &dax);
+ 
+       ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
+                       RADIX_DAX_PMD);
+       if (IS_ERR(ret))
+               return VM_FAULT_FALLBACK;
+       *entryp = ret;
+ 
+       return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
+ 
+  unmap_fallback:
+       dax_unmap_atomic(bdev, &dax);
+       return VM_FAULT_FALLBACK;
+ }
+ 
+ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
+               struct vm_fault *vmf, unsigned long address,
+               struct iomap *iomap, void **entryp)
+ {
+       struct address_space *mapping = vma->vm_file->f_mapping;
+       unsigned long pmd_addr = address & PMD_MASK;
+       struct page *zero_page;
+       spinlock_t *ptl;
+       pmd_t pmd_entry;
+       void *ret;
+ 
+       zero_page = mm_get_huge_zero_page(vma->vm_mm);
+ 
+       if (unlikely(!zero_page))
+               return VM_FAULT_FALLBACK;
+ 
+       ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
+                       RADIX_DAX_PMD | RADIX_DAX_HZP);
+       if (IS_ERR(ret))
+               return VM_FAULT_FALLBACK;
+       *entryp = ret;
+ 
+       ptl = pmd_lock(vma->vm_mm, pmd);
+       if (!pmd_none(*pmd)) {
+               spin_unlock(ptl);
+               return VM_FAULT_FALLBACK;
+       }
+ 
+       pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
+       pmd_entry = pmd_mkhuge(pmd_entry);
+       set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
+       spin_unlock(ptl);
+       return VM_FAULT_NOPAGE;
+ }
+ 
+ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
+               pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
+ {
+       struct address_space *mapping = vma->vm_file->f_mapping;
+       unsigned long pmd_addr = address & PMD_MASK;
+       bool write = flags & FAULT_FLAG_WRITE;
+       unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
+       struct inode *inode = mapping->host;
+       int result = VM_FAULT_FALLBACK;
+       struct iomap iomap = { 0 };
+       pgoff_t max_pgoff, pgoff;
+       struct vm_fault vmf;
+       void *entry;
+       loff_t pos;
+       int error;
+ 
+       /* Fall back to PTEs if we're going to COW */
+       if (write && !(vma->vm_flags & VM_SHARED))
+               goto fallback;
+ 
+       /* If the PMD would extend outside the VMA */
+       if (pmd_addr < vma->vm_start)
+               goto fallback;
+       if ((pmd_addr + PMD_SIZE) > vma->vm_end)
+               goto fallback;
+ 
+       /*
+        * Check whether offset isn't beyond end of file now. Caller is
+        * supposed to hold locks serializing us with truncate / punch hole so
+        * this is a reliable test.
+        */
+       pgoff = linear_page_index(vma, pmd_addr);
+       max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+ 
+       if (pgoff > max_pgoff)
+               return VM_FAULT_SIGBUS;
+ 
+       /* If the PMD would extend beyond the file size */
+       if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
+               goto fallback;
+ 
+       /*
+        * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
+        * PMD or a HZP entry.  If it can't (because a 4k page is already in
+        * the tree, for instance), it will return -EEXIST and we just fall
+        * back to 4k entries.
+        */
+       entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+       if (IS_ERR(entry))
+               goto fallback;
+ 
+       /*
+        * Note that we don't use iomap_apply here.  We aren't doing I/O, only
+        * setting up a mapping, so really we're using iomap_begin() as a way
+        * to look up our filesystem block.
+        */
+       pos = (loff_t)pgoff << PAGE_SHIFT;
+       error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
+       if (error)
+               goto unlock_entry;
+       if (iomap.offset + iomap.length < pos + PMD_SIZE)
+               goto finish_iomap;
+ 
+       vmf.pgoff = pgoff;
+       vmf.flags = flags;
+       vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
+ 
+       switch (iomap.type) {
+       case IOMAP_MAPPED:
+               result = dax_pmd_insert_mapping(vma, pmd, &vmf, address,
+                               &iomap, pos, write, &entry);
+               break;
+       case IOMAP_UNWRITTEN:
+       case IOMAP_HOLE:
+               if (WARN_ON_ONCE(write))
+                       goto finish_iomap;
+               result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
+                               &entry);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               break;
+       }
+ 
+  finish_iomap:
+       if (ops->iomap_end) {
+               if (result == VM_FAULT_FALLBACK) {
+                       ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags,
+                                       &iomap);
+               } else {
+                       error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE,
+                                       iomap_flags, &iomap);
+                       if (error)
+                               result = VM_FAULT_FALLBACK;
+               }
+       }
+  unlock_entry:
+       put_locked_mapping_entry(mapping, pgoff, entry);
+  fallback:
+       if (result == VM_FAULT_FALLBACK) {
+               split_huge_pmd(vma, pmd, address);
+               count_vm_event(THP_FAULT_FALLBACK);
+       }
+       return result;
+ }
+ EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
+ #endif /* CONFIG_FS_DAX_PMD */
   #endif /* CONFIG_FS_IOMAP */
diff --combined fs/ext4/page-io.c

index e0b3b54cdef32651d32685bc6cfe56ae23602dcf,902a3e3059b3442b1d47ffb939f68ab5df8950f8..e2332a65e8fbb0d12ef754f8f70c5ee453013525
--- 1/fs/ext4/page-io.c
--- 2/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@@ -340,7 -340,7 +340,7 @@@ void ext4_io_submit(struct ext4_io_subm
   
         if (bio) {
                 int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
- -                                WRITE_SYNC : 0;
+ +                                REQ_SYNC : 0;
                 bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
                 submit_bio(io->io_bio);
         }
@@@ -470,7 -470,8 +470,8 @@@ int ext4_bio_write_page(struct ext4_io_
                 gfp_t gfp_flags = GFP_NOFS;
   
         retry_encrypt:
-               data_page = fscrypt_encrypt_page(inode, page, gfp_flags);
+               data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
+                                               page->index, gfp_flags);
                 if (IS_ERR(data_page)) {
                         ret = PTR_ERR(data_page);
                         if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
diff --combined fs/ext4/super.c

index caa4147cda47b599e84a301f0acc13aa006f519c,79af71d4fccd8870f866706777d4d66034df1795..dfc8309d7755d55a6e7e73814ede13f204d60cae
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -863,7 -863,6 +863,6 @@@ static void ext4_put_super(struct super
         percpu_counter_destroy(&sbi->s_dirs_counter);
         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
         percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
-       brelse(sbi->s_sbh);
   #ifdef CONFIG_QUOTA
         for (i = 0; i < EXT4_MAXQUOTAS; i++)
                 kfree(sbi->s_qf_names[i]);
@@@ -895,6 -894,7 +894,7 @@@
         }
         if (sbi->s_mmp_tsk)
                 kthread_stop(sbi->s_mmp_tsk);
+       brelse(sbi->s_sbh);
         sb->s_fs_info = NULL;
         /*
          * Now that we are completely done shutting down the
@@@ -1114,37 -1114,55 +1114,55 @@@ static int ext4_prepare_context(struct 
   static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
                                                         void *fs_data)
   {
-       handle_t *handle;
-       int res, res2;
+       handle_t *handle = fs_data;
+       int res, res2, retries = 0;
+ 
+       /*
+        * If a journal handle was specified, then the encryption context is
+        * being set on a new inode via inheritance and is part of a larger
+        * transaction to create the inode.  Otherwise the encryption context is
+        * being set on an existing inode in its own transaction.  Only in the
+        * latter case should the "retry on ENOSPC" logic be used.
+        */
   
-       /* fs_data is null when internally used. */
-       if (fs_data) {
-               res  = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
-                               EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
-                               len, 0);
+       if (handle) {
+               res = ext4_xattr_set_handle(handle, inode,
+                                           EXT4_XATTR_INDEX_ENCRYPTION,
+                                           EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+                                           ctx, len, 0);
                 if (!res) {
                         ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
                         ext4_clear_inode_state(inode,
                                         EXT4_STATE_MAY_INLINE_DATA);
+                       /*
+                        * Update inode->i_flags - e.g. S_DAX may get disabled
+                        */
+                       ext4_set_inode_flags(inode);
                 }
                 return res;
         }
   
+ retry:
         handle = ext4_journal_start(inode, EXT4_HT_MISC,
                         ext4_jbd2_credits_xattr(inode));
         if (IS_ERR(handle))
                 return PTR_ERR(handle);
   
-       res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
-                       EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
-                       len, 0);
+       res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
+                                   EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+                                   ctx, len, 0);
         if (!res) {
                 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+               /* Update inode->i_flags - e.g. S_DAX may get disabled */
+               ext4_set_inode_flags(inode);
                 res = ext4_mark_inode_dirty(handle, inode);
                 if (res)
                         EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
         }
         res2 = ext4_journal_stop(handle);
+ 
+       if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+               goto retry;
         if (!res)
                 res = res2;
         return res;
@@@ -1883,12 -1901,6 +1901,6 @@@ static int parse_options(char *options
                         return 0;
                 }
         }
-       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
-           test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
-               ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
-                        "in data=ordered mode");
-               return 0;
-       }
         return 1;
   }
   
@@@ -2330,7 -2342,7 +2342,7 @@@ static void ext4_orphan_cleanup(struct 
                                 struct ext4_super_block *es)
   {
         unsigned int s_flags = sb->s_flags;
-       int nr_orphans = 0, nr_truncates = 0;
+       int ret, nr_orphans = 0, nr_truncates = 0;
   #ifdef CONFIG_QUOTA
         int i;
   #endif
@@@ -2412,7 -2424,9 +2424,9 @@@
                                   inode->i_ino, inode->i_size);
                         inode_lock(inode);
                         truncate_inode_pages(inode->i_mapping, inode->i_size);
-                       ext4_truncate(inode);
+                       ret = ext4_truncate(inode);
+                       if (ret)
+                               ext4_std_error(inode->i_sb, ret);
                         inode_unlock(inode);
                         nr_truncates++;
                 } else {
@@@ -3193,10 -3207,15 +3207,15 @@@ static int count_overhead(struct super_
                         ext4_set_bit(s++, buf);
                         count++;
                 }
-               for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
-                       ext4_set_bit(EXT4_B2C(sbi, s++), buf);
-                       count++;
+               j = ext4_bg_num_gdb(sb, grp);
+               if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
+                       ext4_error(sb, "Invalid number of block group "
+                                  "descriptor blocks: %d", j);
+                       j = EXT4_BLOCKS_PER_GROUP(sb) - s;
                 }
+               count += j;
+               for (; j > 0; j--)
+                       ext4_set_bit(EXT4_B2C(sbi, s++), buf);
         }
         if (!count)
                 return 0;
@@@ -3301,7 -3320,7 +3320,7 @@@ static int ext4_fill_super(struct super
         char *orig_data = kstrdup(data, GFP_KERNEL);
         struct buffer_head *bh;
         struct ext4_super_block *es = NULL;
-       struct ext4_sb_info *sbi;
+       struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
         ext4_fsblk_t block;
         ext4_fsblk_t sb_block = get_sb_block(&data);
         ext4_fsblk_t logical_sb_block;
@@@ -3320,16 -3339,14 +3339,14 @@@
         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
         ext4_group_t first_not_zeroed;
   
-       sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
-       if (!sbi)
-               goto out_free_orig;
+       if ((data && !orig_data) || !sbi)
+               goto out_free_base;
   
         sbi->s_blockgroup_lock =
                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
-       if (!sbi->s_blockgroup_lock) {
-               kfree(sbi);
-               goto out_free_orig;
-       }
+       if (!sbi->s_blockgroup_lock)
+               goto out_free_base;
+ 
         sb->s_fs_info = sbi;
         sbi->s_sb = sb;
         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
@@@ -3475,11 -3492,19 +3492,19 @@@
          */
         sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
   
-       if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
-                          &journal_devnum, &journal_ioprio, 0)) {
-               ext4_msg(sb, KERN_WARNING,
-                        "failed to parse options in superblock: %s",
-                        sbi->s_es->s_mount_opts);
+       if (sbi->s_es->s_mount_opts[0]) {
+               char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
+                                             sizeof(sbi->s_es->s_mount_opts),
+                                             GFP_KERNEL);
+               if (!s_mount_opts)
+                       goto failed_mount;
+               if (!parse_options(s_mount_opts, sb, &journal_devnum,
+                                  &journal_ioprio, 0)) {
+                       ext4_msg(sb, KERN_WARNING,
+                                "failed to parse options in superblock: %s",
+                                s_mount_opts);
+               }
+               kfree(s_mount_opts);
         }
         sbi->s_def_mount_opt = sbi->s_mount_opt;
         if (!parse_options((char *) data, sb, &journal_devnum,
@@@ -3505,6 -3530,11 +3530,11 @@@
                                  "both data=journal and dax");
                         goto failed_mount;
                 }
+               if (ext4_has_feature_encrypt(sb)) {
+                       ext4_msg(sb, KERN_WARNING,
+                                "encrypted files will use data=ordered "
+                                "instead of data journaling mode");
+               }
                 if (test_opt(sb, DELALLOC))
                         clear_opt(sb, DELALLOC);
         } else {
@@@ -3660,12 -3690,16 +3690,16 @@@
   
         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
-       if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
-               goto cantfind_ext4;
   
         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
         if (sbi->s_inodes_per_block == 0)
                 goto cantfind_ext4;
+       if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
+           sbi->s_inodes_per_group > blocksize * 8) {
+               ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
+                        sbi->s_blocks_per_group);
+               goto failed_mount;
+       }
         sbi->s_itb_per_group = sbi->s_inodes_per_group /
                                         sbi->s_inodes_per_block;
         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
@@@ -3748,13 -3782,6 +3782,6 @@@
         }
         sbi->s_cluster_ratio = clustersize / blocksize;
   
-       if (sbi->s_inodes_per_group > blocksize * 8) {
-               ext4_msg(sb, KERN_ERR,
-                      "#inodes per group too big: %lu",
-                      sbi->s_inodes_per_group);
-               goto failed_mount;
-       }
- 
         /* Do we have standard group size of clustersize * 8 blocks ? */
         if (sbi->s_blocks_per_group == clustersize << 3)
                 set_opt2(sb, STD_GROUP_SIZE);
@@@ -3814,6 -3841,15 +3841,15 @@@
                         (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
                    EXT4_DESC_PER_BLOCK(sb);
+       if (ext4_has_feature_meta_bg(sb)) {
+               if (le32_to_cpu(es->s_first_meta_bg) >= db_count) {
+                       ext4_msg(sb, KERN_WARNING,
+                                "first meta block group too large: %u "
+                                "(group descriptor block count %u)",
+                                le32_to_cpu(es->s_first_meta_bg), db_count);
+                       goto failed_mount;
+               }
+       }
         sbi->s_group_desc = ext4_kvmalloc(db_count *
                                           sizeof(struct buffer_head *),
                                           GFP_KERNEL);
@@@ -3967,6 -4003,14 +4003,14 @@@
         default:
                 break;
         }
+ 
+       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
+           test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+               ext4_msg(sb, KERN_ERR, "can't mount with "
+                       "journal_async_commit in data=ordered mode");
+               goto failed_mount_wq;
+       }
+ 
         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
   
         sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
@@@ -4160,7 -4204,9 +4204,9 @@@ no_journal
   
         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
                 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
-                        "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+                        "Opts: %.*s%s%s", descr,
+                        (int) sizeof(sbi->s_es->s_mount_opts),
+                        sbi->s_es->s_mount_opts,
                          *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
   
         if (es->s_error_count)
@@@ -4239,8 -4285,8 +4285,8 @@@ failed_mount
   out_fail:
         sb->s_fs_info = NULL;
         kfree(sbi->s_blockgroup_lock);
+ out_free_base:
         kfree(sbi);
- out_free_orig:
         kfree(orig_data);
         return err ? err : ret;
   }
@@@ -4550,7 -4596,8 +4596,8 @@@ static int ext4_commit_super(struct sup
                                 &EXT4_SB(sb)->s_freeinodes_counter));
         BUFFER_TRACE(sbh, "marking dirty");
         ext4_superblock_csum_set(sb);
-       lock_buffer(sbh);
+       if (sync)
+               lock_buffer(sbh);
         if (buffer_write_io_error(sbh)) {
                 /*
                  * Oh, dear.  A previous attempt to write the
@@@ -4566,10 -4613,10 +4613,10 @@@
                 set_buffer_uptodate(sbh);
         }
         mark_buffer_dirty(sbh);
-       unlock_buffer(sbh);
         if (sync) {
+               unlock_buffer(sbh);
                 error = __sync_dirty_buffer(sbh,
- -                      test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
+ +                      test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC);
                 if (error)
                         return error;
   
@@@ -4857,6 -4904,13 +4904,13 @@@ static int ext4_remount(struct super_bl
                         err = -EINVAL;
                         goto restore_opts;
                 }
+       } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
+               if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                               "journal_async_commit in data=ordered mode");
+                       err = -EINVAL;
+                       goto restore_opts;
+               }
         }
   
         if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
@@@ -5366,7 -5420,7 +5420,7 @@@ static int ext4_quota_off(struct super_
         handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
         if (IS_ERR(handle))
                 goto out;
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       inode->i_mtime = inode->i_ctime = current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
         ext4_journal_stop(handle);
   
diff --combined fs/f2fs/data.c

index 7c344b3ad70faf87b65acdfb5f4c915f96176f03,9f0ba90b92e4aee60622322973ef80d14b24e1e6..9ac262564fa6b5934b8ec4987425ea3205bba1cf
--- 1/fs/f2fs/data.c
--- 2/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@@ -29,26 -29,6 +29,26 @@@
   #include "trace.h"
   #include <trace/events/f2fs.h>
   
+ +static bool __is_cp_guaranteed(struct page *page)
+ +{
+ +      struct address_space *mapping = page->mapping;
+ +      struct inode *inode;
+ +      struct f2fs_sb_info *sbi;
+ +
+ +      if (!mapping)
+ +              return false;
+ +
+ +      inode = mapping->host;
+ +      sbi = F2FS_I_SB(inode);
+ +
+ +      if (inode->i_ino == F2FS_META_INO(sbi) ||
+ +                      inode->i_ino ==  F2FS_NODE_INO(sbi) ||
+ +                      S_ISDIR(inode->i_mode) ||
+ +                      is_cold_data(page))
+ +              return true;
+ +      return false;
+ +}
+ +
   static void f2fs_read_end_io(struct bio *bio)
   {
         struct bio_vec *bvec;
@@@ -91,7 -71,6 +91,7 @@@ static void f2fs_write_end_io(struct bi
   
         bio_for_each_segment_all(bvec, bio, i) {
                 struct page *page = bvec->bv_page;
+ +              enum count_type type = WB_DATA_TYPE(page);
   
                 fscrypt_pullback_bio_page(&page, true);
   
@@@ -99,57 -78,15 +99,57 @@@
                         mapping_set_error(page->mapping, -EIO);
                         f2fs_stop_checkpoint(sbi, true);
                 }
+ +              dec_page_count(sbi, type);
+ +              clear_cold_data(page);
                 end_page_writeback(page);
         }
- -      if (atomic_dec_and_test(&sbi->nr_wb_bios) &&
+ +      if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
                                 wq_has_sleeper(&sbi->cp_wait))
                 wake_up(&sbi->cp_wait);
   
         bio_put(bio);
   }
   
+ +/*
+ + * Return true, if pre_bio's bdev is same as its target device.
+ + */
+ +struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+ +                              block_t blk_addr, struct bio *bio)
+ +{
+ +      struct block_device *bdev = sbi->sb->s_bdev;
+ +      int i;
+ +
+ +      for (i = 0; i < sbi->s_ndevs; i++) {
+ +              if (FDEV(i).start_blk <= blk_addr &&
+ +                                      FDEV(i).end_blk >= blk_addr) {
+ +                      blk_addr -= FDEV(i).start_blk;
+ +                      bdev = FDEV(i).bdev;
+ +                      break;
+ +              }
+ +      }
+ +      if (bio) {
+ +              bio->bi_bdev = bdev;
+ +              bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+ +      }
+ +      return bdev;
+ +}
+ +
+ +int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < sbi->s_ndevs; i++)
+ +              if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
+ +                      return i;
+ +      return 0;
+ +}
+ +
+ +static bool __same_bdev(struct f2fs_sb_info *sbi,
+ +                              block_t blk_addr, struct bio *bio)
+ +{
+ +      return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
+ +}
+ +
   /*
    * Low-level block read/write IO operations.
    */
@@@ -160,7 -97,8 +160,7 @@@ static struct bio *__bio_alloc(struct f
   
         bio = f2fs_bio_alloc(npages);
   
- -      bio->bi_bdev = sbi->sb->s_bdev;
- -      bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+ +      f2fs_target_device(sbi, blk_addr, bio);
         bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
         bio->bi_private = is_read ? NULL : sbi;
   
@@@ -171,7 -109,8 +171,7 @@@ static inline void __submit_bio(struct 
                                 struct bio *bio, enum page_type type)
   {
         if (!is_read_io(bio_op(bio))) {
- -              atomic_inc(&sbi->nr_wb_bios);
- -              if (f2fs_sb_mounted_hmsmr(sbi->sb) &&
+ +              if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
                         current->plug && (type == DATA || type == NODE))
                         blk_finish_plug(current->plug);
         }
@@@ -259,9 -198,11 +259,9 @@@ static void __f2fs_submit_merged_bio(st
         if (type >= META_FLUSH) {
                 io->fio.type = META_FLUSH;
                 io->fio.op = REQ_OP_WRITE;
- -              if (test_opt(sbi, NOBARRIER))
- -                      io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO;
- -              else
- -                      io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META |
- -                                                              REQ_PRIO;
+ +              io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO;
+ +              if (!test_opt(sbi, NOBARRIER))
+ +                      io->fio.op_flags |= REQ_FUA;
         }
         __submit_merged_bio(io);
   out:
@@@ -329,24 -270,22 +329,24 @@@ void f2fs_submit_page_mbio(struct f2fs_
                 verify_block_addr(sbi, fio->old_blkaddr);
         verify_block_addr(sbi, fio->new_blkaddr);
   
+ +      bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+ +
+ +      if (!is_read)
+ +              inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+ +
         down_write(&io->io_rwsem);
   
         if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
- -          (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
+ +          (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
+ +                      !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
                 __submit_merged_bio(io);
   alloc_new:
         if (io->bio == NULL) {
- -              int bio_blocks = MAX_BIO_BLOCKS(sbi);
- -
                 io->bio = __bio_alloc(sbi, fio->new_blkaddr,
- -                                              bio_blocks, is_read);
+ +                                              BIO_MAX_PAGES, is_read);
                 io->fio = *fio;
         }
   
- -      bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
- -
         if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
                                                         PAGE_SIZE) {
                 __submit_merged_bio(io);
@@@ -544,7 -483,7 +544,7 @@@ struct page *find_data_page(struct inod
                 return page;
         f2fs_put_page(page, 0);
   
- -      page = get_read_data_page(inode, index, READ_SYNC, false);
+ +      page = get_read_data_page(inode, index, 0, false);
         if (IS_ERR(page))
                 return page;
   
@@@ -570,7 -509,7 +570,7 @@@ struct page *get_lock_data_page(struct 
         struct address_space *mapping = inode->i_mapping;
         struct page *page;
   repeat:
- -      page = get_read_data_page(inode, index, READ_SYNC, for_write);
+ +      page = get_read_data_page(inode, index, 0, for_write);
         if (IS_ERR(page))
                 return page;
   
@@@ -651,6 -590,7 +651,6 @@@ static int __allocate_data_block(struc
         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
         struct f2fs_summary sum;
         struct node_info ni;
- -      int seg = CURSEG_WARM_DATA;
         pgoff_t fofs;
         blkcnt_t count = 1;
   
@@@ -668,8 -608,11 +668,8 @@@ alloc
         get_node_info(sbi, dn->nid, &ni);
         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
   
- -      if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
- -              seg = CURSEG_DIRECT_IO;
- -
         allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
- -                                                              &sum, seg);
+ +                                              &sum, CURSEG_WARM_DATA);
         set_data_blkaddr(dn);
   
         /* update i_size */
@@@ -681,18 -624,11 +681,18 @@@
         return 0;
   }
   
- -ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
+ +static inline bool __force_buffered_io(struct inode *inode, int rw)
+ +{
+ +      return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) ||
+ +                      (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
+ +                      F2FS_I_SB(inode)->s_ndevs);
+ +}
+ +
+ +int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
   {
         struct inode *inode = file_inode(iocb->ki_filp);
         struct f2fs_map_blocks map;
- -      ssize_t ret = 0;
+ +      int err = 0;
   
         map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
         map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
@@@ -704,22 -640,19 +704,22 @@@
         map.m_next_pgofs = NULL;
   
         if (iocb->ki_flags & IOCB_DIRECT) {
- -              ret = f2fs_convert_inline_inode(inode);
- -              if (ret)
- -                      return ret;
- -              return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ +              err = f2fs_convert_inline_inode(inode);
+ +              if (err)
+ +                      return err;
+ +              return f2fs_map_blocks(inode, &map, 1,
+ +                      __force_buffered_io(inode, WRITE) ?
+ +                              F2FS_GET_BLOCK_PRE_AIO :
+ +                              F2FS_GET_BLOCK_PRE_DIO);
         }
         if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
- -              ret = f2fs_convert_inline_inode(inode);
- -              if (ret)
- -                      return ret;
+ +              err = f2fs_convert_inline_inode(inode);
+ +              if (err)
+ +                      return err;
         }
         if (!f2fs_has_inline_data(inode))
                 return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
- -      return ret;
+ +      return err;
   }
   
   /*
@@@ -743,6 -676,7 +743,6 @@@ int f2fs_map_blocks(struct inode *inode
         unsigned int ofs_in_node, last_ofs_in_node;
         blkcnt_t prealloc;
         struct extent_info ei;
- -      bool allocated = false;
         block_t blkaddr;
   
         if (!maxblocks)
@@@ -782,7 -716,7 +782,7 @@@ next_dnode
         }
   
         prealloc = 0;
- -      ofs_in_node = dn.ofs_in_node;
+ +      last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
         end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
   
   next_block:
@@@ -801,8 -735,10 +801,8 @@@
                                 }
                         } else {
                                 err = __allocate_data_block(&dn);
- -                              if (!err) {
+ +                              if (!err)
                                         set_inode_flag(inode, FI_APPEND_WRITE);
- -                                      allocated = true;
- -                              }
                         }
                         if (err)
                                 goto sync_out;
@@@ -857,6 -793,7 +857,6 @@@ skip
                 err = reserve_new_blocks(&dn, prealloc);
                 if (err)
                         goto sync_out;
- -              allocated = dn.node_changed;
   
                 map->m_len += dn.ofs_in_node - ofs_in_node;
                 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
@@@ -875,8 -812,9 +875,8 @@@
   
         if (create) {
                 f2fs_unlock_op(sbi);
- -              f2fs_balance_fs(sbi, allocated);
+ +              f2fs_balance_fs(sbi, dn.node_changed);
         }
- -      allocated = false;
         goto next_dnode;
   
   sync_out:
@@@ -884,7 -822,7 +884,7 @@@
   unlock_out:
         if (create) {
                 f2fs_unlock_op(sbi);
- -              f2fs_balance_fs(sbi, allocated);
+ +              f2fs_balance_fs(sbi, dn.node_changed);
         }
   out:
         trace_f2fs_map_blocks(inode, map, err);
@@@ -896,19 -834,19 +896,19 @@@ static int __get_data_block(struct inod
                         pgoff_t *next_pgofs)
   {
         struct f2fs_map_blocks map;
- -      int ret;
+ +      int err;
   
         map.m_lblk = iblock;
         map.m_len = bh->b_size >> inode->i_blkbits;
         map.m_next_pgofs = next_pgofs;
   
- -      ret = f2fs_map_blocks(inode, &map, create, flag);
- -      if (!ret) {
+ +      err = f2fs_map_blocks(inode, &map, create, flag);
+ +      if (!err) {
                 map_bh(bh, inode->i_sb, map.m_pblk);
                 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
                 bh->b_size = map.m_len << inode->i_blkbits;
         }
- -      return ret;
+ +      return err;
   }
   
   static int get_data_block(struct inode *inode, sector_t iblock,
@@@ -953,6 -891,7 +953,6 @@@ int f2fs_fiemap(struct inode *inode, st
         struct buffer_head map_bh;
         sector_t start_blk, last_blk;
         pgoff_t next_pgofs;
- -      loff_t isize;
         u64 logical = 0, phys = 0, size = 0;
         u32 flags = 0;
         int ret = 0;
@@@ -969,6 -908,13 +969,6 @@@
   
         inode_lock(inode);
   
- -      isize = i_size_read(inode);
- -      if (start >= isize)
- -              goto out;
- -
- -      if (start + len > isize)
- -              len = isize - start;
- -
         if (logical_to_blk(inode, len) == 0)
                 len = blk_to_logical(inode, 1);
   
@@@ -987,11 -933,13 +987,11 @@@ next
         /* HOLE */
         if (!buffer_mapped(&map_bh)) {
                 start_blk = next_pgofs;
- -              /* Go through holes util pass the EOF */
- -              if (blk_to_logical(inode, start_blk) < isize)
+ +
+ +              if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
+ +                                      F2FS_I_SB(inode)->max_file_blocks))
                         goto prep_next;
- -              /* Found a hole beyond isize means no more extents.
- -               * Note that the premise is that filesystems don't
- -               * punch holes beyond isize and keep size unchanged.
- -               */
+ +
                 flags |= FIEMAP_EXTENT_LAST;
         }
   
@@@ -1034,6 -982,7 +1034,6 @@@ static struct bio *f2fs_grab_bio(struc
   {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
         struct fscrypt_ctx *ctx = NULL;
- -      struct block_device *bdev = sbi->sb->s_bdev;
         struct bio *bio;
   
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
@@@ -1051,7 -1000,8 +1051,7 @@@
                         fscrypt_release_ctx(ctx);
                 return ERR_PTR(-ENOMEM);
         }
- -      bio->bi_bdev = bdev;
- -      bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
+ +      f2fs_target_device(sbi, blkaddr, bio);
         bio->bi_end_io = f2fs_read_end_io;
         bio->bi_private = ctx;
   
@@@ -1146,8 -1096,7 +1146,8 @@@ got_it
                  * This page will go to BIO.  Do we need to send this
                  * BIO off first?
                  */
- -              if (bio && (last_block_in_bio != block_nr - 1)) {
+ +              if (bio && (last_block_in_bio != block_nr - 1 ||
+ +                      !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
   submit_and_realloc:
                         __submit_bio(F2FS_I_SB(inode), bio, DATA);
                         bio = NULL;
@@@ -1246,7 -1195,9 +1246,9 @@@ int do_write_data_page(struct f2fs_io_i
                                                         fio->old_blkaddr);
   retry_encrypt:
                 fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
-                                                               gfp_flags);
+                                                       PAGE_SIZE, 0,
+                                                       fio->page->index,
+                                                       gfp_flags);
                 if (IS_ERR(fio->encrypted_page)) {
                         err = PTR_ERR(fio->encrypted_page);
                         if (err == -ENOMEM) {
@@@ -1302,7 -1253,7 +1304,7 @@@ static int f2fs_write_data_page(struct 
                 .sbi = sbi,
                 .type = DATA,
                 .op = REQ_OP_WRITE,
- -              .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
+ +              .op_flags = wbc_to_write_flags(wbc),
                 .page = page,
                 .encrypted_page = NULL,
         };
@@@ -1362,6 -1313,7 +1364,6 @@@ done
         if (err && err != -ENOENT)
                 goto redirty_out;
   
- -      clear_cold_data(page);
   out:
         inode_dec_dirty_pages(inode);
         if (err)
@@@ -1382,8 -1334,6 +1384,8 @@@
   
   redirty_out:
         redirty_page_for_writepage(wbc, page);
+ +      if (!err)
+ +              return AOP_WRITEPAGE_ACTIVATE;
         unlock_page(page);
         return err;
   }
@@@ -1479,15 -1429,6 +1481,15 @@@ continue_unlock
   
                         ret = mapping->a_ops->writepage(page, wbc);
                         if (unlikely(ret)) {
+ +                              /*
+ +                               * keep nr_to_write, since vfs uses this to
+ +                               * get # of written pages.
+ +                               */
+ +                              if (ret == AOP_WRITEPAGE_ACTIVATE) {
+ +                                      unlock_page(page);
+ +                                      ret = 0;
+ +                                      continue;
+ +                              }
                                 done_index = page->index + 1;
                                 done = 1;
                                 break;
@@@ -1724,7 -1665,7 +1726,7 @@@ repeat
                         err = PTR_ERR(bio);
                         goto fail;
                 }
- -              bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
+ +              bio->bi_opf = REQ_OP_READ;
                 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
                         bio_put(bio);
                         err = -EFAULT;
@@@ -1775,6 -1716,7 +1777,6 @@@ static int f2fs_write_end(struct file *
                 goto unlock_out;
   
         set_page_dirty(page);
- -      clear_cold_data(page);
   
         if (pos + copied > i_size_read(inode))
                 f2fs_i_size_write(inode, pos + copied);
@@@ -1811,7 -1753,9 +1813,7 @@@ static ssize_t f2fs_direct_IO(struct ki
         if (err)
                 return err;
   
- -      if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
- -              return 0;
- -      if (test_opt(F2FS_I_SB(inode), LFS))
+ +      if (__force_buffered_io(inode, rw))
                 return 0;
   
         trace_f2fs_direct_IO_enter(inode, offset, count, rw);
@@@ -1843,14 -1787,12 +1845,14 @@@ void f2fs_invalidate_page(struct page *
                 return;
   
         if (PageDirty(page)) {
- -              if (inode->i_ino == F2FS_META_INO(sbi))
+ +              if (inode->i_ino == F2FS_META_INO(sbi)) {
                         dec_page_count(sbi, F2FS_DIRTY_META);
- -              else if (inode->i_ino == F2FS_NODE_INO(sbi))
+ +              } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
                         dec_page_count(sbi, F2FS_DIRTY_NODES);
- -              else
+ +              } else {
                         inode_dec_dirty_pages(inode);
+ +                      remove_dirty_inode(inode);
+ +              }
         }
   
         /* This is atomic written page, keep Private */
diff --combined fs/f2fs/f2fs.h

index 23c86e8cf5237e55bd375cb57ec13de10f264dbb,8e94b7bda42b0ba9a4628b92c598ba77a5e722dd..2da8c3aa0ce5db222ed1c60aa6d394140d564aae
--- 1/fs/f2fs/f2fs.h
--- 2/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@@ -103,7 -103,7 +103,7 @@@ struct f2fs_mount_info 
   };
   
   #define F2FS_FEATURE_ENCRYPT  0x0001
- -#define F2FS_FEATURE_HMSMR    0x0002
+ +#define F2FS_FEATURE_BLKZONED 0x0002
   
   #define F2FS_HAS_FEATURE(sb, mask)                                    \
         ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@@ -401,7 -401,6 +401,7 @@@ struct f2fs_map_blocks 
   #define FADVISE_LOST_PINO_BIT 0x02
   #define FADVISE_ENCRYPT_BIT   0x04
   #define FADVISE_ENC_NAME_BIT  0x08
+ +#define FADVISE_KEEP_SIZE_BIT 0x10
   
   #define file_is_cold(inode)   is_file(inode, FADVISE_COLD_BIT)
   #define file_wrong_pino(inode)        is_file(inode, FADVISE_LOST_PINO_BIT)
@@@ -414,8 -413,6 +414,8 @@@
   #define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT)
   #define file_enc_name(inode)  is_file(inode, FADVISE_ENC_NAME_BIT)
   #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
+ +#define file_keep_isize(inode)        is_file(inode, FADVISE_KEEP_SIZE_BIT)
+ +#define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT)
   
   #define DEF_DIR_LEVEL         0
   
@@@ -431,7 -428,7 +431,7 @@@ struct f2fs_inode_info 
         /* Use below internally in f2fs*/
         unsigned long flags;            /* use to pass per-file flags */
         struct rw_semaphore i_sem;      /* protect fi info */
- -      struct percpu_counter dirty_pages;      /* # of dirty pages */
+ +      atomic_t dirty_pages;           /* # of dirty pages */
         f2fs_hash_t chash;              /* hash value of given file name */
         unsigned int clevel;            /* maximum level of given file name */
         nid_t i_xattr_nid;              /* node id that contains xattrs */
@@@ -496,26 -493,20 +496,26 @@@ static inline bool __is_front_mergeable
         return __is_extent_mergeable(cur, front);
   }
   
- -extern void f2fs_mark_inode_dirty_sync(struct inode *);
+ +extern void f2fs_mark_inode_dirty_sync(struct inode *, bool);
   static inline void __try_update_largest_extent(struct inode *inode,
                         struct extent_tree *et, struct extent_node *en)
   {
         if (en->ei.len > et->largest.len) {
                 et->largest = en->ei;
- -              f2fs_mark_inode_dirty_sync(inode);
+ +              f2fs_mark_inode_dirty_sync(inode, true);
         }
   }
   
+ +enum nid_list {
+ +      FREE_NID_LIST,
+ +      ALLOC_NID_LIST,
+ +      MAX_NID_LIST,
+ +};
+ +
   struct f2fs_nm_info {
         block_t nat_blkaddr;            /* base disk address of NAT */
         nid_t max_nid;                  /* maximum possible node ids */
- -      nid_t available_nids;           /* maximum available node ids */
+ +      nid_t available_nids;           /* # of available node ids */
         nid_t next_scan_nid;            /* the next nid to be scanned */
         unsigned int ram_thresh;        /* control the memory footprint */
         unsigned int ra_nid_pages;      /* # of nid pages to be readaheaded */
@@@ -531,9 -522,9 +531,9 @@@
   
         /* free node ids management */
         struct radix_tree_root free_nid_root;/* root of the free_nid cache */
- -      struct list_head free_nid_list; /* a list for free nids */
- -      spinlock_t free_nid_list_lock;  /* protect free nid list */
- -      unsigned int fcnt;              /* the number of free node id */
+ +      struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */
+ +      unsigned int nid_cnt[MAX_NID_LIST];     /* the number of free node id */
+ +      spinlock_t nid_list_lock;       /* protect nid lists ops */
         struct mutex build_lock;        /* lock for build free nids */
   
         /* for checkpoint */
@@@ -594,6 -585,7 +594,6 @@@ enum 
         CURSEG_WARM_NODE,       /* direct node blocks of normal files */
         CURSEG_COLD_NODE,       /* indirect node blocks */
         NO_CHECK_TYPE,
- -      CURSEG_DIRECT_IO,       /* to use for the direct IO path */
   };
   
   struct flush_cmd {
@@@ -657,7 -649,6 +657,7 @@@ struct f2fs_sm_info 
    * f2fs monitors the number of several block types such as on-writeback,
    * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
    */
+ +#define WB_DATA_TYPE(p)       (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
   enum count_type {
         F2FS_DIRTY_DENTS,
         F2FS_DIRTY_DATA,
@@@ -665,8 -656,6 +665,8 @@@
         F2FS_DIRTY_META,
         F2FS_INMEM_PAGES,
         F2FS_DIRTY_IMETA,
+ +      F2FS_WB_CP_DATA,
+ +      F2FS_WB_DATA,
         NR_COUNT_TYPE,
   };
   
@@@ -699,7 -688,7 +699,7 @@@ struct f2fs_io_info 
         struct f2fs_sb_info *sbi;       /* f2fs_sb_info pointer */
         enum page_type type;    /* contains DATA/NODE/META/META_FLUSH */
         int op;                 /* contains REQ_OP_ */
- -      int op_flags;           /* rq_flag_bits */
+ +      int op_flags;           /* req_flag_bits */
         block_t new_blkaddr;    /* new block address to be written */
         block_t old_blkaddr;    /* old block address before Cow */
         struct page *page;      /* page to be written */
@@@ -715,20 -704,6 +715,20 @@@ struct f2fs_bio_info 
         struct rw_semaphore io_rwsem;   /* blocking op for bio */
   };
   
+ +#define FDEV(i)                               (sbi->devs[i])
+ +#define RDEV(i)                               (raw_super->devs[i])
+ +struct f2fs_dev_info {
+ +      struct block_device *bdev;
+ +      char path[MAX_PATH_LEN];
+ +      unsigned int total_segments;
+ +      block_t start_blk;
+ +      block_t end_blk;
+ +#ifdef CONFIG_BLK_DEV_ZONED
+ +      unsigned int nr_blkz;                   /* Total number of zones */
+ +      u8 *blkz_type;                          /* Array of zones type */
+ +#endif
+ +};
+ +
   enum inode_type {
         DIR_INODE,                      /* for dirty dir inode */
         FILE_INODE,                     /* for dirty regular/symlink inode */
@@@ -775,12 -750,6 +775,12 @@@ struct f2fs_sb_info 
         u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
         u8 key_prefix_size;
   #endif
+ +
+ +#ifdef CONFIG_BLK_DEV_ZONED
+ +      unsigned int blocks_per_blkz;           /* F2FS blocks per zone */
+ +      unsigned int log_blocks_per_blkz;       /* log2 F2FS blocks per zone */
+ +#endif
+ +
         /* for node-related operations */
         struct f2fs_nm_info *nm_info;           /* node manager */
         struct inode *node_inode;               /* cache node blocks */
@@@ -795,7 -764,6 +795,7 @@@
   
         /* for checkpoint */
         struct f2fs_checkpoint *ckpt;           /* raw checkpoint pointer */
+ +      int cur_cp_pack;                        /* remain current cp pack */
         spinlock_t cp_lock;                     /* for flag in ckpt */
         struct inode *meta_inode;               /* cache meta blocks */
         struct mutex cp_mutex;                  /* checkpoint procedure lock */
@@@ -847,9 -815,10 +847,9 @@@
         block_t discard_blks;                   /* discard command candidats */
         block_t last_valid_block_count;         /* for recovery */
         u32 s_next_generation;                  /* for NFS support */
- -      atomic_t nr_wb_bios;                    /* # of writeback bios */
   
         /* # of pages, see count_type */
- -      struct percpu_counter nr_pages[NR_COUNT_TYPE];
+ +      atomic_t nr_pages[NR_COUNT_TYPE];
         /* # of allocated blocks */
         struct percpu_counter alloc_valid_block_count;
   
@@@ -894,8 -863,6 +894,8 @@@
   
         /* For shrinker support */
         struct list_head s_list;
+ +      int s_ndevs;                            /* number of devices */
+ +      struct f2fs_dev_info *devs;             /* for device list */
         struct mutex umount_mutex;
         unsigned int shrinker_run_no;
   
@@@ -1138,6 -1105,13 +1138,6 @@@ static inline void clear_ckpt_flags(str
         spin_unlock(&sbi->cp_lock);
   }
   
- -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
- -{
- -      struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
- -
- -      return blk_queue_discard(q);
- -}
- -
   static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
   {
         down_read(&sbi->cp_rwsem);
@@@ -1258,10 -1232,9 +1258,10 @@@ static inline void dec_valid_block_coun
   
   static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
   {
- -      percpu_counter_inc(&sbi->nr_pages[count_type]);
+ +      atomic_inc(&sbi->nr_pages[count_type]);
   
- -      if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES)
+ +      if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
+ +              count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
                 return;
   
         set_sbi_flag(sbi, SBI_IS_DIRTY);
@@@ -1269,14 -1242,14 +1269,14 @@@
   
   static inline void inode_inc_dirty_pages(struct inode *inode)
   {
- -      percpu_counter_inc(&F2FS_I(inode)->dirty_pages);
+ +      atomic_inc(&F2FS_I(inode)->dirty_pages);
         inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
                                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
   }
   
   static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
   {
- -      percpu_counter_dec(&sbi->nr_pages[count_type]);
+ +      atomic_dec(&sbi->nr_pages[count_type]);
   }
   
   static inline void inode_dec_dirty_pages(struct inode *inode)
@@@ -1285,19 -1258,19 +1285,19 @@@
                         !S_ISLNK(inode->i_mode))
                 return;
   
- -      percpu_counter_dec(&F2FS_I(inode)->dirty_pages);
+ +      atomic_dec(&F2FS_I(inode)->dirty_pages);
         dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
                                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
   }
   
   static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
   {
- -      return percpu_counter_sum_positive(&sbi->nr_pages[count_type]);
+ +      return atomic_read(&sbi->nr_pages[count_type]);
   }
   
- -static inline s64 get_dirty_pages(struct inode *inode)
+ +static inline int get_dirty_pages(struct inode *inode)
   {
- -      return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages);
+ +      return atomic_read(&F2FS_I(inode)->dirty_pages);
   }
   
   static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
@@@ -1356,27 -1329,22 +1356,27 @@@ static inline void *__bitmap_ptr(struc
   
   static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
   {
- -      block_t start_addr;
- -      struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- -      unsigned long long ckpt_version = cur_cp_version(ckpt);
- -
- -      start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+ +      block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
   
- -      /*
- -       * odd numbered checkpoint should at cp segment 0
- -       * and even segment must be at cp segment 1
- -       */
- -      if (!(ckpt_version & 1))
+ +      if (sbi->cur_cp_pack == 2)
                 start_addr += sbi->blocks_per_seg;
+ +      return start_addr;
+ +}
+ +
+ +static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi)
+ +{
+ +      block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
   
+ +      if (sbi->cur_cp_pack == 1)
+ +              start_addr += sbi->blocks_per_seg;
         return start_addr;
   }
   
+ +static inline void __set_cp_next_pack(struct f2fs_sb_info *sbi)
+ +{
+ +      sbi->cur_cp_pack = (sbi->cur_cp_pack == 1) ? 2 : 1;
+ +}
+ +
   static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
   {
         return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
@@@ -1653,7 -1621,7 +1653,7 @@@ static inline void __mark_inode_dirty_f
                         return;
         case FI_DATA_EXIST:
         case FI_INLINE_DOTS:
- -              f2fs_mark_inode_dirty_sync(inode);
+ +              f2fs_mark_inode_dirty_sync(inode, true);
         }
   }
   
@@@ -1680,7 -1648,7 +1680,7 @@@ static inline void set_acl_inode(struc
   {
         F2FS_I(inode)->i_acl_mode = mode;
         set_inode_flag(inode, FI_ACL_MODE);
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, false);
   }
   
   static inline void f2fs_i_links_write(struct inode *inode, bool inc)
@@@ -1689,7 -1657,7 +1689,7 @@@
                 inc_nlink(inode);
         else
                 drop_nlink(inode);
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, true);
   }
   
   static inline void f2fs_i_blocks_write(struct inode *inode,
@@@ -1700,7 -1668,7 +1700,7 @@@
   
         inode->i_blocks = add ? inode->i_blocks + diff :
                                 inode->i_blocks - diff;
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, true);
         if (clean || recover)
                 set_inode_flag(inode, FI_AUTO_RECOVER);
   }
@@@ -1714,27 -1682,34 +1714,27 @@@ static inline void f2fs_i_size_write(st
                 return;
   
         i_size_write(inode, i_size);
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, true);
         if (clean || recover)
                 set_inode_flag(inode, FI_AUTO_RECOVER);
   }
   
- -static inline bool f2fs_skip_inode_update(struct inode *inode)
- -{
- -      if (!is_inode_flag_set(inode, FI_AUTO_RECOVER))
- -              return false;
- -      return F2FS_I(inode)->last_disk_size == i_size_read(inode);
- -}
- -
   static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
   {
         F2FS_I(inode)->i_current_depth = depth;
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, true);
   }
   
   static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid)
   {
         F2FS_I(inode)->i_xattr_nid = xnid;
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, true);
   }
   
   static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino)
   {
         F2FS_I(inode)->i_pino = pino;
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, true);
   }
   
   static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri)
@@@ -1862,31 -1837,13 +1862,31 @@@ static inline int is_file(struct inode 
   static inline void set_file(struct inode *inode, int type)
   {
         F2FS_I(inode)->i_advise |= type;
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, true);
   }
   
   static inline void clear_file(struct inode *inode, int type)
   {
         F2FS_I(inode)->i_advise &= ~type;
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, true);
+ +}
+ +
+ +static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
+ +{
+ +      if (dsync) {
+ +              struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ +              bool ret;
+ +
+ +              spin_lock(&sbi->inode_lock[DIRTY_META]);
+ +              ret = list_empty(&F2FS_I(inode)->gdirty_list);
+ +              spin_unlock(&sbi->inode_lock[DIRTY_META]);
+ +              return ret;
+ +      }
+ +      if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) ||
+ +                      file_keep_isize(inode) ||
+ +                      i_size_read(inode) & PAGE_MASK)
+ +              return false;
+ +      return F2FS_I(inode)->last_disk_size == i_size_read(inode);
   }
   
   static inline int f2fs_readonly(struct super_block *sb)
@@@ -1998,7 -1955,7 +1998,7 @@@ void set_de_type(struct f2fs_dir_entry 
   unsigned char get_de_type(struct f2fs_dir_entry *);
   struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
                         f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
- -bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
+ +int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
                         unsigned int, struct fscrypt_str *);
   void do_make_empty_dir(struct inode *, struct inode *,
                         struct f2fs_dentry_ptr *);
@@@ -2038,7 -1995,7 +2038,7 @@@ static inline int f2fs_add_link(struct 
   /*
    * super.c
    */
- -int f2fs_inode_dirtied(struct inode *);
+ +int f2fs_inode_dirtied(struct inode *, bool);
   void f2fs_inode_synced(struct inode *);
   int f2fs_commit_super(struct f2fs_sb_info *, bool);
   int f2fs_sync_fs(struct super_block *, int);
@@@ -2077,7 -2034,7 +2077,7 @@@ void move_node_page(struct page *, int)
   int fsync_node_pages(struct f2fs_sb_info *, struct inode *,
                         struct writeback_control *, bool);
   int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
- -void build_free_nids(struct f2fs_sb_info *);
+ +void build_free_nids(struct f2fs_sb_info *, bool);
   bool alloc_nid(struct f2fs_sb_info *, nid_t *);
   void alloc_nid_done(struct f2fs_sb_info *, nid_t);
   void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
@@@ -2103,7 -2060,7 +2103,7 @@@ void f2fs_balance_fs(struct f2fs_sb_inf
   void f2fs_balance_fs_bg(struct f2fs_sb_info *);
   int f2fs_issue_flush(struct f2fs_sb_info *);
   int create_flush_cmd_control(struct f2fs_sb_info *);
- -void destroy_flush_cmd_control(struct f2fs_sb_info *);
+ +void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
   void invalidate_blocks(struct f2fs_sb_info *, block_t);
   bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
   void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
@@@ -2175,15 -2132,12 +2175,15 @@@ void f2fs_submit_merged_bio_cond(struc
   void f2fs_flush_merged_bios(struct f2fs_sb_info *);
   int f2fs_submit_page_bio(struct f2fs_io_info *);
   void f2fs_submit_page_mbio(struct f2fs_io_info *);
+ +struct block_device *f2fs_target_device(struct f2fs_sb_info *,
+ +                              block_t, struct bio *);
+ +int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
   void set_data_blkaddr(struct dnode_of_data *);
   void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
   int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
   int reserve_new_block(struct dnode_of_data *);
   int f2fs_get_block(struct dnode_of_data *, pgoff_t);
- -ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
+ +int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
   int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
   struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
   struct page *find_data_page(struct inode *, pgoff_t);
@@@ -2206,7 -2160,7 +2206,7 @@@ int f2fs_migrate_page(struct address_sp
   int start_gc_thread(struct f2fs_sb_info *);
   void stop_gc_thread(struct f2fs_sb_info *);
   block_t start_bidx_of_node(unsigned int, struct inode *);
- -int f2fs_gc(struct f2fs_sb_info *, bool);
+ +int f2fs_gc(struct f2fs_sb_info *, bool, bool);
   void build_gc_manager(struct f2fs_sb_info *);
   
   /*
@@@ -2227,12 -2181,12 +2227,12 @@@ struct f2fs_stat_info 
         unsigned long long hit_largest, hit_cached, hit_rbtree;
         unsigned long long hit_total, total_ext;
         int ext_tree, zombie_tree, ext_node;
- -      s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
- -      s64 inmem_pages;
+ +      int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
+ +      int inmem_pages;
         unsigned int ndirty_dirs, ndirty_files, ndirty_all;
- -      int nats, dirty_nats, sits, dirty_sits, fnids;
+ +      int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
         int total_count, utilization;
- -      int bg_gc, wb_bios;
+ +      int bg_gc, nr_wb_cp_data, nr_wb_data;
         int inline_xattr, inline_inode, inline_dir, orphans;
         unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
         unsigned int bimodal, avg_vblocks;
@@@ -2458,30 -2412,9 +2458,30 @@@ static inline int f2fs_sb_has_crypto(st
         return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
   }
   
- -static inline int f2fs_sb_mounted_hmsmr(struct super_block *sb)
+ +static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb)
+ +{
+ +      return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED);
+ +}
+ +
+ +#ifdef CONFIG_BLK_DEV_ZONED
+ +static inline int get_blkz_type(struct f2fs_sb_info *sbi,
+ +                      struct block_device *bdev, block_t blkaddr)
+ +{
+ +      unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
+ +      int i;
+ +
+ +      for (i = 0; i < sbi->s_ndevs; i++)
+ +              if (FDEV(i).bdev == bdev)
+ +                      return FDEV(i).blkz_type[zno];
+ +      return -EINVAL;
+ +}
+ +#endif
+ +
+ +static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
   {
- -      return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_HMSMR);
+ +      struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
+ +
+ +      return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb);
   }
   
   static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
@@@ -2520,8 -2453,8 +2520,8 @@@ static inline bool f2fs_may_encrypt(str
   #define fscrypt_pullback_bio_page     fscrypt_notsupp_pullback_bio_page
   #define fscrypt_restore_control_page  fscrypt_notsupp_restore_control_page
   #define fscrypt_zeroout_range         fscrypt_notsupp_zeroout_range
- #define fscrypt_process_policy                fscrypt_notsupp_process_policy
- #define fscrypt_get_policy            fscrypt_notsupp_get_policy
+ #define fscrypt_ioctl_set_policy      fscrypt_notsupp_ioctl_set_policy
+ #define fscrypt_ioctl_get_policy      fscrypt_notsupp_ioctl_get_policy
   #define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context
   #define fscrypt_inherit_context               fscrypt_notsupp_inherit_context
   #define fscrypt_get_encryption_info   fscrypt_notsupp_get_encryption_info
diff --combined fs/f2fs/file.c

index 383b5c29f46b7718393ec0011b956a4208300940,f0c83f74557d04498b41c93b67adac59a3aa703b..49f10dce817dc9e4806b6a417b96391a1c794fd1
--- 1/fs/f2fs/file.c
--- 2/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@@ -94,6 -94,8 +94,6 @@@ mapped
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                 f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
   
- -      /* if gced page is attached, don't write to cold segment */
- -      clear_cold_data(page);
   out:
         sb_end_pagefault(inode->i_sb);
         f2fs_update_time(sbi, REQ_TIME);
@@@ -208,7 -210,7 +208,7 @@@ static int f2fs_do_sync_file(struct fil
         }
   
         /* if the inode is dirty, let's recover all the time */
- -      if (!datasync && !f2fs_skip_inode_update(inode)) {
+ +      if (!f2fs_skip_inode_update(inode, datasync)) {
                 f2fs_write_inode(inode, NULL);
                 goto go_write;
         }
@@@ -262,7 -264,7 +262,7 @@@ sync_nodes
         }
   
         if (need_inode_block_update(sbi, ino)) {
- -              f2fs_mark_inode_dirty_sync(inode);
+ +              f2fs_mark_inode_dirty_sync(inode, true);
                 f2fs_write_inode(inode, NULL);
                 goto sync_nodes;
         }
@@@ -630,7 -632,7 +630,7 @@@ int f2fs_truncate(struct inode *inode
                 return err;
   
         inode->i_mtime = inode->i_ctime = current_time(inode);
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      f2fs_mark_inode_dirty_sync(inode, false);
         return 0;
   }
   
@@@ -677,7 -679,6 +677,7 @@@ int f2fs_setattr(struct dentry *dentry
   {
         struct inode *inode = d_inode(dentry);
         int err;
+ +      bool size_changed = false;
   
         err = setattr_prepare(dentry, attr);
         if (err)
@@@ -693,6 -694,7 +693,6 @@@
                         err = f2fs_truncate(inode);
                         if (err)
                                 return err;
- -                      f2fs_balance_fs(F2FS_I_SB(inode), true);
                 } else {
                         /*
                          * do not trim all blocks after i_size if target size is
@@@ -708,8 -710,6 +708,8 @@@
                         }
                         inode->i_mtime = inode->i_ctime = current_time(inode);
                 }
+ +
+ +              size_changed = true;
         }
   
         __setattr_copy(inode, attr);
@@@ -722,12 -722,7 +722,12 @@@
                 }
         }
   
- -      f2fs_mark_inode_dirty_sync(inode);
+ +      /* file size may changed here */
+ +      f2fs_mark_inode_dirty_sync(inode, size_changed);
+ +
+ +      /* inode change will produce dirty node pages flushed by checkpoint */
+ +      f2fs_balance_fs(F2FS_I_SB(inode), true);
+ +
         return err;
   }
   
@@@ -972,7 -967,7 +972,7 @@@ static int __clone_blkaddrs(struct inod
                                 new_size = (dst + i) << PAGE_SHIFT;
                                 if (dst_inode->i_size < new_size)
                                         f2fs_i_size_write(dst_inode, new_size);
- -                      } while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen);
+ +                      } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
   
                         f2fs_put_dnode(&dn);
                 } else {
@@@ -1223,9 -1218,6 +1223,9 @@@ static int f2fs_zero_range(struct inod
                         ret = f2fs_do_zero_range(&dn, index, end);
                         f2fs_put_dnode(&dn);
                         f2fs_unlock_op(sbi);
+ +
+ +                      f2fs_balance_fs(sbi, dn.node_changed);
+ +
                         if (ret)
                                 goto out;
   
@@@ -1321,15 -1313,15 +1321,15 @@@ static int expand_inode_data(struct ino
         pgoff_t pg_end;
         loff_t new_size = i_size_read(inode);
         loff_t off_end;
- -      int ret;
+ +      int err;
   
- -      ret = inode_newsize_ok(inode, (len + offset));
- -      if (ret)
- -              return ret;
+ +      err = inode_newsize_ok(inode, (len + offset));
+ +      if (err)
+ +              return err;
   
- -      ret = f2fs_convert_inline_inode(inode);
- -      if (ret)
- -              return ret;
+ +      err = f2fs_convert_inline_inode(inode);
+ +      if (err)
+ +              return err;
   
         f2fs_balance_fs(sbi, true);
   
@@@ -1341,12 -1333,12 +1341,12 @@@
         if (off_end)
                 map.m_len++;
   
- -      ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
- -      if (ret) {
+ +      err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ +      if (err) {
                 pgoff_t last_off;
   
                 if (!map.m_len)
- -                      return ret;
+ +                      return err;
   
                 last_off = map.m_lblk + map.m_len - 1;
   
@@@ -1360,7 -1352,7 +1360,7 @@@
         if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
                 f2fs_i_size_write(inode, new_size);
   
- -      return ret;
+ +      return err;
   }
   
   static long f2fs_fallocate(struct file *file, int mode,
@@@ -1401,9 -1393,7 +1401,9 @@@
   
         if (!ret) {
                 inode->i_mtime = inode->i_ctime = current_time(inode);
- -              f2fs_mark_inode_dirty_sync(inode);
+ +              f2fs_mark_inode_dirty_sync(inode, false);
+ +              if (mode & FALLOC_FL_KEEP_SIZE)
+ +                      file_set_keep_isize(inode);
                 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         }
   
@@@ -1536,7 -1526,7 +1536,7 @@@ static int f2fs_ioc_start_atomic_write(
                 goto out;
   
         f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
- -              "Unexpected flush for atomic writes: ino=%lu, npages=%lld",
+ +              "Unexpected flush for atomic writes: ino=%lu, npages=%u",
                                         inode->i_ino, get_dirty_pages(inode));
         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
         if (ret)
@@@ -1762,31 -1752,16 +1762,16 @@@ static bool uuid_is_nonzero(__u8 u[16]
   
   static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
   {
-       struct fscrypt_policy policy;
         struct inode *inode = file_inode(filp);
   
-       if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
-                                                       sizeof(policy)))
-               return -EFAULT;
- 
         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   
-       return fscrypt_process_policy(filp, &policy);
+       return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
   }
   
   static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
   {
-       struct fscrypt_policy policy;
-       struct inode *inode = file_inode(filp);
-       int err;
- 
-       err = fscrypt_get_policy(inode, &policy);
-       if (err)
-               return err;
- 
-       if (copy_to_user((struct fscrypt_policy __user *)arg, &policy, sizeof(policy)))
-               return -EFAULT;
-       return 0;
+       return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
   }
   
   static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
@@@ -1852,7 -1827,7 +1837,7 @@@ static int f2fs_ioc_gc(struct file *fil
                 mutex_lock(&sbi->gc_mutex);
         }
   
- -      ret = f2fs_gc(sbi, sync);
+ +      ret = f2fs_gc(sbi, sync, true);
   out:
         mnt_drop_write_file(filp);
         return ret;
@@@ -2266,15 -2241,12 +2251,15 @@@ static ssize_t f2fs_file_write_iter(str
         inode_lock(inode);
         ret = generic_write_checks(iocb, from);
         if (ret > 0) {
- -              ret = f2fs_preallocate_blocks(iocb, from);
- -              if (!ret) {
- -                      blk_start_plug(&plug);
- -                      ret = __generic_file_write_iter(iocb, from);
- -                      blk_finish_plug(&plug);
+ +              int err = f2fs_preallocate_blocks(iocb, from);
+ +
+ +              if (err) {
+ +                      inode_unlock(inode);
+ +                      return err;
                 }
+ +              blk_start_plug(&plug);
+ +              ret = __generic_file_write_iter(iocb, from);
+ +              blk_finish_plug(&plug);
         }
         inode_unlock(inode);
   
diff --combined fs/xfs/xfs_aops.c

index 6be5204a06d3ac1fc8da7e486b92ac00cf0a45bb,561cf1456c6ca1ed07484e5daf455c6f40015959..38755ca96c7a6d884c0c13421ab1d0b08fbc1f4b
--- 1/fs/xfs/xfs_aops.c
--- 2/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@@ -495,8 -495,8 +495,8 @@@ xfs_submit_ioend
   
         ioend->io_bio->bi_private = ioend;
         ioend->io_bio->bi_end_io = xfs_end_bio;
- -      bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- -                       (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ +      ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ +
         /*
          * If we are failing the IO now, just mark the ioend with an
          * error and finish it. This will run IO completion immediately
@@@ -567,7 -567,8 +567,7 @@@ xfs_chain_bio
   
         bio_chain(ioend->io_bio, new);
         bio_get(ioend->io_bio);         /* for xfs_destroy_ioend */
- -      bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- -                        (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ +      ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
         submit_bio(ioend->io_bio);
         ioend->io_bio = new;
   }
@@@ -1297,8 -1298,7 +1297,7 @@@ __xfs_get_blocks
         sector_t                iblock,
         struct buffer_head      *bh_result,
         int                     create,
-       bool                    direct,
-       bool                    dax_fault)
+       bool                    direct)
   {
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
@@@ -1419,13 -1419,8 +1418,8 @@@
                 if (ISUNWRITTEN(&imap))
                         set_buffer_unwritten(bh_result);
                 /* direct IO needs special help */
-               if (create) {
-                       if (dax_fault)
-                               ASSERT(!ISUNWRITTEN(&imap));
-                       else
-                               xfs_map_direct(inode, bh_result, &imap, offset,
-                                               is_cow);
-               }
+               if (create)
+                       xfs_map_direct(inode, bh_result, &imap, offset, is_cow);
         }
   
         /*
@@@ -1465,7 -1460,7 +1459,7 @@@ xfs_get_blocks
         struct buffer_head      *bh_result,
         int                     create)
   {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
+       return __xfs_get_blocks(inode, iblock, bh_result, create, false);
   }
   
   int
@@@ -1475,17 -1470,7 +1469,7 @@@ xfs_get_blocks_direct
         struct buffer_head      *bh_result,
         int                     create)
   {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
- }
- 
- int
- xfs_get_blocks_dax_fault(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
- {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
+       return __xfs_get_blocks(inode, iblock, bh_result, create, true);
   }
   
   /*
diff --combined include/uapi/linux/fs.h

index c1d11df07b289fe47af000de9cf94e7e84cc120f,0496d37abe289884310add3545a2ede4d7de04a8..36da93fbf18860a08e75590e54d34caa6967d9ec
--- 1/include/uapi/linux/fs.h
--- 2/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@@ -225,10 -225,6 +225,10 @@@ struct fsxattr 
   #define BLKSECDISCARD _IO(0x12,125)
   #define BLKROTATIONAL _IO(0x12,126)
   #define BLKZEROOUT _IO(0x12,127)
+ +/*
+ + * A jump here: 130-131 are reserved for zoned block devices
+ + * (see uapi/linux/blkzoned.h)
+ + */
   
   #define BMAP_IOCTL 1          /* obsolete - kept for compatibility */
   #define FIBMAP           _IO(0x00,1)  /* bmap access */
@@@ -258,6 -254,20 +258,20 @@@
   /* Policy provided via an ioctl on the topmost directory */
   #define FS_KEY_DESCRIPTOR_SIZE        8
   
+ #define FS_POLICY_FLAGS_PAD_4         0x00
+ #define FS_POLICY_FLAGS_PAD_8         0x01
+ #define FS_POLICY_FLAGS_PAD_16                0x02
+ #define FS_POLICY_FLAGS_PAD_32                0x03
+ #define FS_POLICY_FLAGS_PAD_MASK      0x03
+ #define FS_POLICY_FLAGS_VALID         0x03
+ 
+ /* Encryption algorithms */
+ #define FS_ENCRYPTION_MODE_INVALID            0
+ #define FS_ENCRYPTION_MODE_AES_256_XTS                1
+ #define FS_ENCRYPTION_MODE_AES_256_GCM                2
+ #define FS_ENCRYPTION_MODE_AES_256_CBC                3
+ #define FS_ENCRYPTION_MODE_AES_256_CTS                4
+ 
   struct fscrypt_policy {
         __u8 version;
         __u8 contents_encryption_mode;
diff --combined mm/filemap.c

index 5b4dd03130da33b91cca705e6d202fffed177b55,db26ebc6c62f0c9a966f5ab8033f6442527f0eb5..69568388c699493ac694a960ca7c24b90b13e080
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -132,29 -132,43 +132,28 @@@ static int page_cache_tree_insert(struc
                 if (!dax_mapping(mapping)) {
                         if (shadowp)
                                 *shadowp = p;
- -                      if (node)
- -                              workingset_node_shadows_dec(node);
                 } else {
                         /* DAX can replace empty locked entry with a hole */
                         WARN_ON_ONCE(p !=
-                               (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
-                                        RADIX_DAX_ENTRY_LOCK));
+                               dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
- -                      /* DAX accounts exceptional entries as normal pages */
- -                      if (node)
- -                              workingset_node_pages_dec(node);
                         /* Wakeup waiters for exceptional entry lock */
-                       dax_wake_mapping_entry_waiter(mapping, page->index,
+                       dax_wake_mapping_entry_waiter(mapping, page->index, p,
                                                       false);
                 }
         }
- -      radix_tree_replace_slot(slot, page);
+ +      __radix_tree_replace(&mapping->page_tree, node, slot, page,
+ +                           workingset_update_node, mapping);
         mapping->nrpages++;
- -      if (node) {
- -              workingset_node_pages_inc(node);
- -              /*
- -               * Don't track node that contains actual pages.
- -               *
- -               * Avoid acquiring the list_lru lock if already
- -               * untracked.  The list_empty() test is safe as
- -               * node->private_list is protected by
- -               * mapping->tree_lock.
- -               */
- -              if (!list_empty(&node->private_list))
- -                      list_lru_del(&workingset_shadow_nodes,
- -                                   &node->private_list);
- -      }
         return 0;
   }
   
   static void page_cache_tree_delete(struct address_space *mapping,
                                    struct page *page, void *shadow)
   {
- -      int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
+ +      int i, nr;
+ +
+ +      /* hugetlb pages are represented by one entry in the radix tree */
+ +      nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
   
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(PageTail(page), page);
@@@ -167,11 -181,44 +166,11 @@@
                 __radix_tree_lookup(&mapping->page_tree, page->index + i,
                                     &node, &slot);
   
- -              radix_tree_clear_tags(&mapping->page_tree, node, slot);
- -
- -              if (!node) {
- -                      VM_BUG_ON_PAGE(nr != 1, page);
- -                      /*
- -                       * We need a node to properly account shadow
- -                       * entries. Don't plant any without. XXX
- -                       */
- -                      shadow = NULL;
- -              }
- -
- -              radix_tree_replace_slot(slot, shadow);
+ +              VM_BUG_ON_PAGE(!node && nr != 1, page);
   
- -              if (!node)
- -                      break;
- -
- -              workingset_node_pages_dec(node);
- -              if (shadow)
- -                      workingset_node_shadows_inc(node);
- -              else
- -                      if (__radix_tree_delete_node(&mapping->page_tree, node))
- -                              continue;
- -
- -              /*
- -               * Track node that only contains shadow entries. DAX mappings
- -               * contain no shadow entries and may contain other exceptional
- -               * entries so skip those.
- -               *
- -               * Avoid acquiring the list_lru lock if already tracked.
- -               * The list_empty() test is safe as node->private_list is
- -               * protected by mapping->tree_lock.
- -               */
- -              if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
- -                              list_empty(&node->private_list)) {
- -                      node->private_data = mapping;
- -                      list_lru_add(&workingset_shadow_nodes,
- -                                      &node->private_list);
- -              }
+ +              radix_tree_clear_tags(&mapping->page_tree, node, slot);
+ +              __radix_tree_replace(&mapping->page_tree, node, slot, shadow,
+ +                                   workingset_update_node, mapping);
         }
   
         if (shadow) {
@@@ -1684,9 -1731,6 +1683,9 @@@ find_page
                         if (inode->i_blkbits == PAGE_SHIFT ||
                                         !mapping->a_ops->is_partially_uptodate)
                                 goto page_not_up_to_date;
+ +                      /* pipes can't handle partially uptodate pages */
+ +                      if (unlikely(iter->type & ITER_PIPE))
+ +                              goto page_not_up_to_date;
                         if (!trylock_page(page))
                                 goto page_not_up_to_date;
                         /* Did it get truncated before we got the lock? */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 14 Dec 2016 17:17:42 +0000 (09:17 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 14 Dec 2016 17:17:42 +0000 (09:17 -0800)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
fs/dax.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/page-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/data.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/f2fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_aops.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history