]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/linux-arm-soc
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 29 Aug 2011 23:33:32 +0000 (16:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 29 Aug 2011 23:33:32 +0000 (16:33 -0700)
* 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/linux-arm-soc:
  ARM: mach-footbridge: add missing header file <video/vga.h>
  ARM: mach-orion5x: add missing header file <linux/vga.h>
  arm: fix compile failure in orion5x/dns323-setup.c
  at91: at91sam9261.c: fix typo in t2_clk alias for atmel_tcb.0
  ARM: S5P: fix bug in spdif_clk_get_rate
  ARM: EXYNOS4: Add restart hook for proper reboot
  ARM: EXYNOS4: Increase reset delay for USB HOST PHY
  ARM: S5P: add required chained_irq_enter/exit to gpio-int code
  ARM: EXYNOS4: add required chained_irq_enter/exit to eint code
  ARM: SAMSUNG: Add chained enrty/exit call to timer interrupt handler
  ARM: S3C64XX: Fix build break in PM debug
  ARM: S5PV210: Fix build warning
  ARM: EXYNOS4: Fix the IRQ definitions for MIPI CSIS device
  ARM: EXYNOS4: remove duplicated inclusion
  ARM: EXYNOS4: Fix wrong devname to support clkdev
  ARM: EXYNOS4: Use the correct regulator names on universal_c210
  ARM: SAMSUNG: Fix Section mismatch in samsung_bl_set()
  ARM: S5P64X0: Replace irq_gc_ack() with irq_gc_ack_set_bit()

446 files changed:
Documentation/networking/00-INDEX
Documentation/networking/ip-sysctl.txt
Documentation/networking/scaling.txt
Documentation/power/runtime_pm.txt
MAINTAINERS
Makefile
arch/alpha/include/asm/sysinfo.h
arch/alpha/include/asm/thread_info.h
arch/alpha/kernel/osf_sys.c
arch/alpha/kernel/systbls.S
arch/arm/boot/compressed/mmcif-sh7372.c
arch/arm/boot/compressed/sdhi-sh7372.c
arch/arm/kernel/calls.S
arch/arm/mach-shmobile/board-ag5evm.c
arch/arm/mach-shmobile/board-ap4evb.c
arch/arm/mach-shmobile/board-mackerel.c
arch/arm/mach-shmobile/clock-sh7372.c
arch/arm/mach-shmobile/clock-sh73a0.c
arch/arm/mach-shmobile/include/mach/sh7372.h
arch/arm/mach-shmobile/intc-sh7372.c
arch/arm/mach-shmobile/setup-sh7372.c
arch/arm/plat-omap/omap_device.c
arch/avr32/kernel/syscall_table.S
arch/blackfin/mach-common/entry.S
arch/cris/arch-v10/kernel/entry.S
arch/cris/arch-v32/kernel/entry.S
arch/cris/include/asm/serial.h [new file with mode: 0644]
arch/frv/kernel/entry.S
arch/h8300/kernel/syscalls.S
arch/ia64/kernel/entry.S
arch/m32r/kernel/syscall_table.S
arch/m68k/include/asm/page_mm.h
arch/m68k/kernel/syscalltable.S
arch/microblaze/kernel/syscall_table.S
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-64.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-o32.S
arch/mn10300/kernel/entry.S
arch/parisc/kernel/syscall_table.S
arch/powerpc/include/asm/systbl.h
arch/powerpc/sysdev/fsl_rio.c
arch/s390/kernel/compat_wrapper.S
arch/s390/kernel/early.c
arch/s390/kernel/ipl.c
arch/s390/kernel/syscalls.S
arch/sh/include/asm/ptrace.h
arch/sh/kernel/cpu/sh4a/setup-sh7757.c
arch/sh/kernel/idle.c
arch/sh/kernel/syscalls_32.S
arch/sh/kernel/syscalls_64.S
arch/sh/kernel/traps_32.c
arch/sparc/include/asm/sigcontext.h
arch/sparc/kernel/Makefile
arch/sparc/kernel/irq.h
arch/sparc/kernel/signal32.c
arch/sparc/kernel/signal_32.c
arch/sparc/kernel/signal_64.c
arch/sparc/kernel/sigutil.h [new file with mode: 0644]
arch/sparc/kernel/sigutil_32.c [new file with mode: 0644]
arch/sparc/kernel/sigutil_64.c [new file with mode: 0644]
arch/sparc/kernel/sys32.S
arch/sparc/kernel/systbls_32.S
arch/sparc/kernel/systbls_64.S
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/unistd_64.h
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/syscall_table_32.S
arch/x86/platform/mrst/mrst.c
arch/x86/platform/olpc/olpc.c
arch/x86/vdso/vdso32/sysenter.S
arch/xtensa/include/asm/unistd.h
drivers/base/devres.c
drivers/base/devtmpfs.c
drivers/base/firmware_class.c
drivers/base/platform.c
drivers/base/power/clock_ops.c
drivers/bcma/main.c
drivers/bluetooth/ath3k.c
drivers/bluetooth/btusb.c
drivers/char/msm_smd_pkt.c
drivers/clocksource/sh_cmt.c
drivers/firewire/sbp2.c
drivers/firmware/google/gsmi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_test.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_bo_util.c
drivers/hid/Kconfig
drivers/hid/hid-apple.c
drivers/hid/hid-core.c
drivers/hid/hid-ids.h
drivers/hid/hid-wiimote.c
drivers/hid/usbhid/hid-quirks.c
drivers/hwmon/i5k_amb.c
drivers/hwmon/ntc_thermistor.c
drivers/i2c/busses/i2c-nomadik.c
drivers/i2c/busses/i2c-omap.c
drivers/input/joystick/analog.c
drivers/input/keyboard/ep93xx_keypad.c
drivers/input/keyboard/tegra-kbc.c
drivers/input/misc/ad714x-i2c.c
drivers/input/misc/ad714x-spi.c
drivers/input/misc/ad714x.c
drivers/input/misc/ad714x.h
drivers/input/misc/mma8450.c
drivers/input/misc/mpu3050.c
drivers/input/mouse/bcm5974.c
drivers/input/tablet/wacom_sys.c
drivers/input/tablet/wacom_wac.c
drivers/input/touchscreen/atmel_mxt_ts.c
drivers/input/touchscreen/max11801_ts.c
drivers/input/touchscreen/tnetv107x-ts.c
drivers/leds/leds-ams-delta.c
drivers/leds/leds-bd2802.c
drivers/leds/leds-hp6xx.c
drivers/misc/Kconfig
drivers/misc/ab8500-pwm.c
drivers/misc/fsa9480.c
drivers/misc/pti.c
drivers/misc/ti-st/st_core.c
drivers/misc/ti-st/st_kim.c
drivers/misc/ti-st/st_ll.c
drivers/net/bonding/bond_main.c
drivers/net/can/sja1000/plx_pci.c
drivers/net/can/ti_hecc.c
drivers/net/cassini.c
drivers/net/e1000e/e1000.h
drivers/net/e1000e/ich8lan.c
drivers/net/e1000e/netdev.c
drivers/net/forcedeth.c
drivers/net/gianfar.c
drivers/net/gianfar_ethtool.c
drivers/net/ibmveth.c
drivers/net/ixgbe/ixgbe_main.c
drivers/net/phy/national.c
drivers/net/rionet.c
drivers/net/sh_eth.c
drivers/net/usb/cdc_ncm.c
drivers/net/via-velocity.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
drivers/net/wireless/ath/ath9k/main.c
drivers/net/wireless/ath/carl9170/main.c
drivers/net/wireless/iwlwifi/iwl-pci.c
drivers/net/wireless/rt2x00/rt2800usb.c
drivers/net/wireless/rt2x00/rt2x00usb.c
drivers/net/wireless/wl12xx/acx.c
drivers/net/wireless/wl12xx/main.c
drivers/net/wireless/wl12xx/sdio.c
drivers/net/wireless/wl12xx/testmode.c
drivers/power/max8997_charger.c
drivers/power/max8998_charger.c
drivers/power/s3c_adc_battery.c
drivers/rapidio/rio-scan.c
drivers/rtc/rtc-s3c.c
drivers/s390/block/dasd_ioctl.c
drivers/s390/char/sclp_cmd.c
drivers/sh/intc/chip.c
drivers/staging/brcm80211/brcmsmac/otp.c
drivers/staging/brcm80211/brcmsmac/types.h
drivers/staging/octeon/ethernet-rgmii.c
drivers/staging/octeon/ethernet-spi.c
drivers/staging/tidspbridge/core/dsp-clock.c
drivers/staging/zcache/tmem.c
drivers/staging/zcache/zcache-main.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_configfs.c
drivers/target/iscsi/iscsi_target_erl1.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/iscsi/iscsi_target_parameters.c
drivers/target/iscsi/iscsi_target_util.c
drivers/target/target_core_cdb.c
drivers/target/target_core_device.c
drivers/target/target_core_fabric_configfs.c
drivers/target/target_core_pr.c
drivers/target/target_core_rd.c
drivers/target/target_core_tpg.c
drivers/target/target_core_transport.c
drivers/target/tcm_fc/tfc_conf.c
drivers/tty/pty.c
drivers/tty/serial/8250.c
drivers/tty/serial/8250_pci.c
drivers/tty/serial/8250_pnp.c
drivers/tty/serial/atmel_serial.c
drivers/tty/serial/max3107-aava.c
drivers/tty/serial/max3107.c
drivers/tty/serial/mrst_max3110.c
drivers/tty/serial/omap-serial.c
drivers/tty/serial/pch_uart.c
drivers/tty/serial/samsung.c
drivers/tty/serial/serial_core.c
drivers/tty/serial/sh-sci.c
drivers/tty/serial/ucc_uart.c
drivers/tty/tty_io.c
drivers/usb/core/hcd.c
drivers/usb/gadget/f_phonet.c
drivers/usb/host/ehci-hub.c
drivers/usb/host/ehci-s5p.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/musb/blackfin.c
drivers/usb/musb/cppi_dma.c
drivers/usb/musb/musb_core.h
drivers/usb/musb/musb_gadget.c
drivers/usb/musb/musb_regs.h
drivers/usb/musb/tusb6010.c
drivers/usb/musb/tusb6010_omap.c
drivers/usb/musb/ux500_dma.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/option.c
drivers/video/backlight/adp8870_bl.c
drivers/video/backlight/ep93xx_bl.c
drivers/video/backlight/pwm_bl.c
drivers/w1/masters/ds2490.c
drivers/w1/masters/matrox_w1.c
drivers/w1/slaves/w1_ds2408.c
drivers/w1/slaves/w1_smem.c
drivers/w1/slaves/w1_therm.c
drivers/w1/w1.c
drivers/w1/w1.h
drivers/w1/w1_family.c
drivers/w1/w1_family.h
drivers/w1/w1_int.c
drivers/w1/w1_int.h
drivers/w1/w1_io.c
drivers/w1/w1_log.h
drivers/w1/w1_netlink.c
drivers/w1/w1_netlink.h
fs/compat.c
fs/fuse/dev.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/xfs/Makefile
fs/xfs/kmem.c [new file with mode: 0644]
fs/xfs/kmem.h [new file with mode: 0644]
fs/xfs/linux-2.6/kmem.c [deleted file]
fs/xfs/linux-2.6/kmem.h [deleted file]
fs/xfs/linux-2.6/mrlock.h [deleted file]
fs/xfs/linux-2.6/time.h [deleted file]
fs/xfs/linux-2.6/xfs_acl.c [deleted file]
fs/xfs/linux-2.6/xfs_aops.c [deleted file]
fs/xfs/linux-2.6/xfs_aops.h [deleted file]
fs/xfs/linux-2.6/xfs_buf.c [deleted file]
fs/xfs/linux-2.6/xfs_buf.h [deleted file]
fs/xfs/linux-2.6/xfs_discard.c [deleted file]
fs/xfs/linux-2.6/xfs_discard.h [deleted file]
fs/xfs/linux-2.6/xfs_export.c [deleted file]
fs/xfs/linux-2.6/xfs_export.h [deleted file]
fs/xfs/linux-2.6/xfs_file.c [deleted file]
fs/xfs/linux-2.6/xfs_fs_subr.c [deleted file]
fs/xfs/linux-2.6/xfs_globals.c [deleted file]
fs/xfs/linux-2.6/xfs_ioctl.c [deleted file]
fs/xfs/linux-2.6/xfs_ioctl.h [deleted file]
fs/xfs/linux-2.6/xfs_ioctl32.c [deleted file]
fs/xfs/linux-2.6/xfs_ioctl32.h [deleted file]
fs/xfs/linux-2.6/xfs_iops.c [deleted file]
fs/xfs/linux-2.6/xfs_iops.h [deleted file]
fs/xfs/linux-2.6/xfs_linux.h [deleted file]
fs/xfs/linux-2.6/xfs_message.c [deleted file]
fs/xfs/linux-2.6/xfs_message.h [deleted file]
fs/xfs/linux-2.6/xfs_quotaops.c [deleted file]
fs/xfs/linux-2.6/xfs_stats.c [deleted file]
fs/xfs/linux-2.6/xfs_stats.h [deleted file]
fs/xfs/linux-2.6/xfs_super.c [deleted file]
fs/xfs/linux-2.6/xfs_super.h [deleted file]
fs/xfs/linux-2.6/xfs_sync.c [deleted file]
fs/xfs/linux-2.6/xfs_sync.h [deleted file]
fs/xfs/linux-2.6/xfs_sysctl.c [deleted file]
fs/xfs/linux-2.6/xfs_sysctl.h [deleted file]
fs/xfs/linux-2.6/xfs_trace.c [deleted file]
fs/xfs/linux-2.6/xfs_trace.h [deleted file]
fs/xfs/linux-2.6/xfs_vnode.h [deleted file]
fs/xfs/linux-2.6/xfs_xattr.c [deleted file]
fs/xfs/mrlock.h [new file with mode: 0644]
fs/xfs/quota/xfs_dquot.c [deleted file]
fs/xfs/quota/xfs_dquot.h [deleted file]
fs/xfs/quota/xfs_dquot_item.c [deleted file]
fs/xfs/quota/xfs_dquot_item.h [deleted file]
fs/xfs/quota/xfs_qm.c [deleted file]
fs/xfs/quota/xfs_qm.h [deleted file]
fs/xfs/quota/xfs_qm_bhv.c [deleted file]
fs/xfs/quota/xfs_qm_stats.c [deleted file]
fs/xfs/quota/xfs_qm_stats.h [deleted file]
fs/xfs/quota/xfs_qm_syscalls.c [deleted file]
fs/xfs/quota/xfs_quota_priv.h [deleted file]
fs/xfs/quota/xfs_trans_dquot.c [deleted file]
fs/xfs/support/uuid.c [deleted file]
fs/xfs/support/uuid.h [deleted file]
fs/xfs/time.h [new file with mode: 0644]
fs/xfs/uuid.c [new file with mode: 0644]
fs/xfs/uuid.h [new file with mode: 0644]
fs/xfs/xfs.h
fs/xfs/xfs_acl.c [new file with mode: 0644]
fs/xfs/xfs_aops.c [new file with mode: 0644]
fs/xfs/xfs_aops.h [new file with mode: 0644]
fs/xfs/xfs_buf.c [new file with mode: 0644]
fs/xfs/xfs_buf.h [new file with mode: 0644]
fs/xfs/xfs_discard.c [new file with mode: 0644]
fs/xfs/xfs_discard.h [new file with mode: 0644]
fs/xfs/xfs_dquot.c [new file with mode: 0644]
fs/xfs/xfs_dquot.h [new file with mode: 0644]
fs/xfs/xfs_dquot_item.c [new file with mode: 0644]
fs/xfs/xfs_dquot_item.h [new file with mode: 0644]
fs/xfs/xfs_export.c [new file with mode: 0644]
fs/xfs/xfs_export.h [new file with mode: 0644]
fs/xfs/xfs_file.c [new file with mode: 0644]
fs/xfs/xfs_fs_subr.c [new file with mode: 0644]
fs/xfs/xfs_globals.c [new file with mode: 0644]
fs/xfs/xfs_ioctl.c [new file with mode: 0644]
fs/xfs/xfs_ioctl.h [new file with mode: 0644]
fs/xfs/xfs_ioctl32.c [new file with mode: 0644]
fs/xfs/xfs_ioctl32.h [new file with mode: 0644]
fs/xfs/xfs_iops.c [new file with mode: 0644]
fs/xfs/xfs_iops.h [new file with mode: 0644]
fs/xfs/xfs_linux.h [new file with mode: 0644]
fs/xfs/xfs_message.c [new file with mode: 0644]
fs/xfs/xfs_message.h [new file with mode: 0644]
fs/xfs/xfs_qm.c [new file with mode: 0644]
fs/xfs/xfs_qm.h [new file with mode: 0644]
fs/xfs/xfs_qm_bhv.c [new file with mode: 0644]
fs/xfs/xfs_qm_stats.c [new file with mode: 0644]
fs/xfs/xfs_qm_stats.h [new file with mode: 0644]
fs/xfs/xfs_qm_syscalls.c [new file with mode: 0644]
fs/xfs/xfs_quota_priv.h [new file with mode: 0644]
fs/xfs/xfs_quotaops.c [new file with mode: 0644]
fs/xfs/xfs_stats.c [new file with mode: 0644]
fs/xfs/xfs_stats.h [new file with mode: 0644]
fs/xfs/xfs_super.c [new file with mode: 0644]
fs/xfs/xfs_super.h [new file with mode: 0644]
fs/xfs/xfs_sync.c [new file with mode: 0644]
fs/xfs/xfs_sync.h [new file with mode: 0644]
fs/xfs/xfs_sysctl.c [new file with mode: 0644]
fs/xfs/xfs_sysctl.h [new file with mode: 0644]
fs/xfs/xfs_trace.c [new file with mode: 0644]
fs/xfs/xfs_trace.h [new file with mode: 0644]
fs/xfs/xfs_trans_dquot.c [new file with mode: 0644]
fs/xfs/xfs_vnode.h [new file with mode: 0644]
fs/xfs/xfs_xattr.c [new file with mode: 0644]
include/asm-generic/unistd.h
include/linux/compat.h
include/linux/connector.h
include/linux/fs.h
include/linux/fuse.h
include/linux/personality.h
include/linux/pwm_backlight.h
include/linux/rio_regs.h
include/linux/syscalls.h
include/linux/ti_wilink_st.h
include/linux/tty.h
include/linux/tty_driver.h
include/linux/writeback.h
include/net/cfg80211.h
include/target/target_core_fabric_ops.h
kernel/irq/manage.c
kernel/printk.c
kernel/sys.c
kernel/sys_ni.c
kernel/sysctl_binary.c
kernel/sysctl_check.c
mm/memcontrol.c
mm/page-writeback.c
mm/vmscan.c
net/8021q/vlan_core.c
net/atm/br2684.c
net/bluetooth/af_bluetooth.c
net/bluetooth/bnep/bnep.h
net/bluetooth/bnep/core.c
net/bluetooth/cmtp/capi.c
net/bluetooth/cmtp/cmtp.h
net/bluetooth/cmtp/core.c
net/bluetooth/hci_core.c
net/bluetooth/hidp/core.c
net/bluetooth/l2cap_core.c
net/bluetooth/l2cap_sock.c
net/bluetooth/rfcomm/core.c
net/bluetooth/rfcomm/sock.c
net/bluetooth/sco.c
net/bridge/br_if.c
net/bridge/br_multicast.c
net/core/neighbour.c
net/core/netpoll.c
net/ipv4/igmp.c
net/ipv6/ipv6_sockglue.c
net/ipv6/mcast.c
net/ipv6/sit.c
net/mac80211/main.c
net/sched/act_mirred.c
net/socket.c
net/wireless/core.c
net/wireless/sysfs.c
scripts/checkpatch.pl
scripts/get_maintainer.pl
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_realtek.c
sound/soc/blackfin/bf5xx-ad193x.c
sound/soc/codecs/ad193x.c
sound/soc/codecs/ad193x.h
sound/soc/codecs/sta32x.c
sound/soc/codecs/wm8962.c
sound/soc/codecs/wm8996.c
sound/soc/ep93xx/ep93xx-i2s.c
sound/soc/fsl/fsl_dma.c
sound/soc/fsl/mpc8610_hpcd.c
sound/soc/fsl/p1022_ds.c
sound/soc/kirkwood/kirkwood-i2s.c
sound/soc/omap/ams-delta.c
sound/soc/samsung/Kconfig
sound/soc/samsung/h1940_uda1380.c
sound/soc/samsung/rx1950_uda1380.c
sound/soc/samsung/speyside_wm8962.c
sound/soc/soc-core.c
sound/soc/soc-io.c
sound/soc/soc-jack.c
sound/soc/soc-pcm.c
sound/soc/tegra/tegra_wm8903.c
tools/power/cpupower/Makefile
tools/power/cpupower/debug/x86_64/Makefile
tools/power/cpupower/debug/x86_64/centrino-decode.c [deleted symlink]
tools/power/cpupower/debug/x86_64/powernow-k8-decode.c [deleted symlink]
tools/power/cpupower/man/cpupower-frequency-info.1
tools/power/cpupower/man/cpupower-frequency-set.1
tools/power/cpupower/man/cpupower.1
tools/power/cpupower/utils/builtin.h
tools/power/cpupower/utils/cpufreq-info.c
tools/power/cpupower/utils/cpufreq-set.c
tools/power/cpupower/utils/cpuidle-info.c
tools/power/cpupower/utils/cpupower-info.c
tools/power/cpupower/utils/cpupower-set.c
tools/power/cpupower/utils/cpupower.c
tools/power/cpupower/utils/helpers/helpers.h
tools/power/cpupower/utils/helpers/sysfs.c
tools/power/cpupower/utils/helpers/sysfs.h
tools/power/cpupower/utils/helpers/topology.c
tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
tools/power/cpupower/utils/idle_monitor/mperf_monitor.c

index 4edd78dfb3622f195a4b05d5948a8e39ceb43070..bbce1215434a9e70797ce679bb6a15c5c2aa29ab 100644 (file)
@@ -1,13 +1,21 @@
 00-INDEX
        - this file
+3c359.txt
+       - information on the 3Com TokenLink Velocity XL (3c5359) driver.
 3c505.txt
        - information on the 3Com EtherLink Plus (3c505) driver.
+3c509.txt
+       - information on the 3Com Etherlink III Series Ethernet cards.
 6pack.txt
        - info on the 6pack protocol, an alternative to KISS for AX.25
 DLINK.txt
        - info on the D-Link DE-600/DE-620 parallel port pocket adapters
 PLIP.txt
        - PLIP: The Parallel Line Internet Protocol device driver
+README.ipw2100
+       - README for the Intel PRO/Wireless 2100 driver.
+README.ipw2200
+       - README for the Intel PRO/Wireless 2915ABG and 2200BG driver.
 README.sb1000
        - info on General Instrument/NextLevel SURFboard1000 cable modem.
 alias.txt
@@ -20,8 +28,12 @@ atm.txt
        - info on where to get ATM programs and support for Linux.
 ax25.txt
        - info on using AX.25 and NET/ROM code for Linux
+batman-adv.txt
+       - B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames.
 baycom.txt
        - info on the driver for Baycom style amateur radio modems
+bonding.txt
+       - Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux.
 bridge.txt
        - where to get user space programs for ethernet bridging with Linux.
 can.txt
@@ -34,32 +46,60 @@ cxacru.txt
        - Conexant AccessRunner USB ADSL Modem
 cxacru-cf.py
        - Conexant AccessRunner USB ADSL Modem configuration file parser
+cxgb.txt
+       - Release Notes for the Chelsio N210 Linux device driver.
+dccp.txt
+       - the Datagram Congestion Control Protocol (DCCP) (RFC 4340..42).
 de4x5.txt
        - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver
 decnet.txt
        - info on using the DECnet networking layer in Linux.
 depca.txt
        - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver
+dl2k.txt
+       - README for D-Link DL2000-based Gigabit Ethernet Adapters (dl2k.ko).
+dm9000.txt
+       - README for the Simtec DM9000 Network driver.
 dmfe.txt
        - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver.
+dns_resolver.txt
+       - The DNS resolver module allows kernel servies to make DNS queries.
+driver.txt
+       - Softnet driver issues.
 e100.txt
        - info on Intel's EtherExpress PRO/100 line of 10/100 boards
 e1000.txt
        - info on Intel's E1000 line of gigabit ethernet boards
+e1000e.txt
+       - README for the Intel Gigabit Ethernet Driver (e1000e).
 eql.txt
        - serial IP load balancing
 ewrk3.txt
        - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver
+fib_trie.txt
+       - Level Compressed Trie (LC-trie) notes: a structure for routing.
 filter.txt
        - Linux Socket Filtering
 fore200e.txt
        - FORE Systems PCA-200E/SBA-200E ATM NIC driver info.
 framerelay.txt
        - info on using Frame Relay/Data Link Connection Identifier (DLCI).
+gen_stats.txt
+       - Generic networking statistics for netlink users.
+generic_hdlc.txt
+       - The generic High Level Data Link Control (HDLC) layer.
 generic_netlink.txt
        - info on Generic Netlink
+gianfar.txt
+       - Gianfar Ethernet Driver.
 ieee802154.txt
        - Linux IEEE 802.15.4 implementation, API and drivers
+ifenslave.c
+       - Configure network interfaces for parallel routing (bonding).
+igb.txt
+       - README for the Intel Gigabit Ethernet Driver (igb).
+igbvf.txt
+       - README for the Intel Gigabit Ethernet Driver (igbvf).
 ip-sysctl.txt
        - /proc/sys/net/ipv4/* variables
 ip_dynaddr.txt
@@ -68,41 +108,117 @@ ipddp.txt
        - AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation
 iphase.txt
        - Interphase PCI ATM (i)Chip IA Linux driver info.
+ipv6.txt
+       - Options to the ipv6 kernel module.
+ipvs-sysctl.txt
+       - Per-inode explanation of the /proc/sys/net/ipv4/vs interface.
 irda.txt
        - where to get IrDA (infrared) utilities and info for Linux.
+ixgb.txt
+       - README for the Intel 10 Gigabit Ethernet Driver (ixgb).
+ixgbe.txt
+       - README for the Intel 10 Gigabit Ethernet Driver (ixgbe).
+ixgbevf.txt
+       - README for the Intel Virtual Function (VF) Driver (ixgbevf).
+l2tp.txt
+       - User guide to the L2TP tunnel protocol.
 lapb-module.txt
        - programming information of the LAPB module.
 ltpc.txt
        - the Apple or Farallon LocalTalk PC card driver
+mac80211-injection.txt
+       - HOWTO use packet injection with mac80211
 multicast.txt
        - Behaviour of cards under Multicast
+multiqueue.txt
+       - HOWTO for multiqueue network device support.
+netconsole.txt
+       - The network console module netconsole.ko: configuration and notes.
+netdev-features.txt
+       - Network interface features API description.
 netdevices.txt
        - info on network device driver functions exported to the kernel.
+netif-msg.txt
+       - Design of the network interface message level setting (NETIF_MSG_*).
+nfc.txt
+       - The Linux Near Field Communication (NFS) subsystem.
 olympic.txt
        - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info.
+operstates.txt
+       - Overview of network interface operational states.
+packet_mmap.txt
+       - User guide to memory mapped packet socket rings (PACKET_[RT]X_RING).
+phonet.txt
+       - The Phonet packet protocol used in Nokia cellular modems.
+phy.txt
+       - The PHY abstraction layer.
+pktgen.txt
+       - User guide to the kernel packet generator (pktgen.ko).
 policy-routing.txt
        - IP policy-based routing
+ppp_generic.txt
+       - Information about the generic PPP driver.
+proc_net_tcp.txt
+       - Per inode overview of the /proc/net/tcp and /proc/net/tcp6 interfaces.
+radiotap-headers.txt
+       - Background on radiotap headers.
 ray_cs.txt
        - Raylink Wireless LAN card driver info.
+rds.txt
+       - Background on the reliable, ordered datagram delivery method RDS.
+regulatory.txt
+       - Overview of the Linux wireless regulatory infrastructure.
+rxrpc.txt
+       - Guide to the RxRPC protocol.
+s2io.txt
+       - Release notes for Neterion Xframe I/II 10GbE driver.
+scaling.txt
+       - Explanation of network scaling techniques: RSS, RPS, RFS, aRFS, XPS.
+sctp.txt
+       - Notes on the Linux kernel implementation of the SCTP protocol.
+secid.txt
+       - Explanation of the secid member in flow structures.
 skfp.txt
        - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
 smc9.txt
        - the driver for SMC's 9000 series of Ethernet cards
 smctr.txt
        - SMC TokenCard TokenRing Linux driver info.
+spider-net.txt
+       - README for the Spidernet Driver (as found in PS3 / Cell BE).
+stmmac.txt
+       - README for the STMicro Synopsys Ethernet driver.
+tc-actions-env-rules.txt
+       - rules for traffic control (tc) actions.
+timestamping.txt
+       - overview of network packet timestamping variants.
 tcp.txt
        - short blurb on how TCP output takes place.
+tcp-thin.txt
+       - kernel tuning options for low rate 'thin' TCP streams.
 tlan.txt
        - ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info.
 tms380tr.txt
        - SysKonnect Token Ring ISA/PCI adapter driver info.
+tproxy.txt
+       - Transparent proxy support user guide.
 tuntap.txt
        - TUN/TAP device driver, allowing user space Rx/Tx of packets.
+udplite.txt
+       - UDP-Lite protocol (RFC 3828) introduction.
 vortex.txt
        - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
+vxge.txt
+       - README for the Neterion X3100 PCIe Server Adapter.
 x25.txt
        - general info on X.25 development.
 x25-iface.txt
        - description of the X.25 Packet Layer to LAPB device interface.
+xfrm_proc.txt
+       - description of the statistics package for XFRM.
+xfrm_sync.txt
+       - sync patches for XFRM enable migration of an SA between hosts.
+xfrm_sysctl.txt
+       - description of the XFRM configuration options.
 z8530drv.txt
        - info about Linux driver for Z8530 based HDLC cards for AX.25
index db2a4067013c2e25deabc419ee3e9cab14c2b282..81546990f41ca16b2c1c022dac487aea92fbc20b 100644 (file)
@@ -992,7 +992,7 @@ bindv6only - BOOLEAN
                TRUE: disable IPv4-mapped address feature
                FALSE: enable IPv4-mapped address feature
 
-       Default: FALSE (as specified in RFC2553bis)
+       Default: FALSE (as specified in RFC3493)
 
 IPv6 Fragmentation:
 
index 7254b4b5910e45b422028e3e3215fac73f5e9f8e..58fd7414e6c08120c14bf13dd300f45af0438b0e 100644 (file)
@@ -52,7 +52,8 @@ module parameter for specifying the number of hardware queues to
 configure. In the bnx2x driver, for instance, this parameter is called
 num_queues. A typical RSS configuration would be to have one receive queue
 for each CPU if the device supports enough queues, or otherwise at least
-one for each cache domain at a particular cache level (L1, L2, etc.).
+one for each memory domain, where a memory domain is a set of CPUs that
+share a particular memory level (L1, L2, NUMA node, etc.).
 
 The indirection table of an RSS device, which resolves a queue by masked
 hash, is usually programmed by the driver at initialization. The
@@ -82,11 +83,17 @@ RSS should be enabled when latency is a concern or whenever receive
 interrupt processing forms a bottleneck. Spreading load between CPUs
 decreases queue length. For low latency networking, the optimal setting
 is to allocate as many queues as there are CPUs in the system (or the
-NIC maximum, if lower). Because the aggregate number of interrupts grows
-with each additional queue, the most efficient high-rate configuration
+NIC maximum, if lower). The most efficient high-rate configuration
 is likely the one with the smallest number of receive queues where no
-CPU that processes receive interrupts reaches 100% utilization. Per-cpu
-load can be observed using the mpstat utility.
+receive queue overflows due to a saturated CPU, because in default
+mode with interrupt coalescing enabled, the aggregate number of
+interrupts (and thus work) grows with each additional queue.
+
+Per-cpu load can be observed using the mpstat utility, but note that on
+processors with hyperthreading (HT), each hyperthread is represented as
+a separate CPU. For interrupt handling, HT has shown no benefit in
+initial tests, so limit the number of queues to the number of CPU cores
+in the system.
 
 
 RPS: Receive Packet Steering
@@ -145,7 +152,7 @@ the bitmap.
 == Suggested Configuration
 
 For a single queue device, a typical RPS configuration would be to set
-the rps_cpus to the CPUs in the same cache domain of the interrupting
+the rps_cpus to the CPUs in the same memory domain of the interrupting
 CPU. If NUMA locality is not an issue, this could also be all CPUs in
 the system. At high interrupt rate, it might be wise to exclude the
 interrupting CPU from the map since that already performs much work.
@@ -154,7 +161,7 @@ For a multi-queue system, if RSS is configured so that a hardware
 receive queue is mapped to each CPU, then RPS is probably redundant
 and unnecessary. If there are fewer hardware queues than CPUs, then
 RPS might be beneficial if the rps_cpus for each queue are the ones that
-share the same cache domain as the interrupting CPU for that queue.
+share the same memory domain as the interrupting CPU for that queue.
 
 
 RFS: Receive Flow Steering
@@ -326,7 +333,7 @@ The queue chosen for transmitting a particular flow is saved in the
 corresponding socket structure for the flow (e.g. a TCP connection).
 This transmit queue is used for subsequent packets sent on the flow to
 prevent out of order (ooo) packets. The choice also amortizes the cost
-of calling get_xps_queues() over all packets in the connection. To avoid
+of calling get_xps_queues() over all packets in the flow. To avoid
 ooo packets, the queue for a flow can subsequently only be changed if
 skb->ooo_okay is set for a packet in the flow. This flag indicates that
 there are no outstanding packets in the flow, so the transmit queue can
index 4ce5450ab6e833cef372ef2755993353cae50935..6066e3a6b9a98c0f499059d1342444afb43be781 100644 (file)
@@ -431,8 +431,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
 
   void pm_runtime_irq_safe(struct device *dev);
     - set the power.irq_safe flag for the device, causing the runtime-PM
-      suspend and resume callbacks (but not the idle callback) to be invoked
-      with interrupts disabled
+      callbacks to be invoked with interrupts off
 
   void pm_runtime_mark_last_busy(struct device *dev);
     - set the power.last_busy field to the current time
index 069ee3b5c651ff31c7237bd514aff3bb40322873..28f65c249b97b396e976ec2fd85946dce6e48023 100644 (file)
@@ -1883,7 +1883,7 @@ S:        Maintained
 F:     drivers/connector/
 
 CONTROL GROUPS (CGROUPS)
-M:     Paul Menage <menage@google.com>
+M:     Paul Menage <paul@paulmenage.org>
 M:     Li Zefan <lizf@cn.fujitsu.com>
 L:     containers@lists.linux-foundation.org
 S:     Maintained
@@ -1932,7 +1932,7 @@ S:        Maintained
 F:     tools/power/cpupower
 
 CPUSETS
-M:     Paul Menage <menage@google.com>
+M:     Paul Menage <paul@paulmenage.org>
 W:     http://www.bullopensource.org/cpuset/
 W:     http://oss.sgi.com/projects/cpusets/
 S:     Supported
@@ -2649,11 +2649,11 @@ F:      drivers/net/wan/dlci.c
 F:     drivers/net/wan/sdla.c
 
 FRAMEBUFFER LAYER
-M:     Paul Mundt <lethal@linux-sh.org>
+M:     Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
 L:     linux-fbdev@vger.kernel.org
 W:     http://linux-fbdev.sourceforge.net/
 Q:     http://patchwork.kernel.org/project/linux-fbdev/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/fbdev-2.6.git
+T:     git git://github.com/schandinat/linux-2.6.git fbdev-next
 S:     Maintained
 F:     Documentation/fb/
 F:     Documentation/devicetree/bindings/fb/
@@ -4450,8 +4450,8 @@ M:        "David S. Miller" <davem@davemloft.net>
 L:     netdev@vger.kernel.org
 W:     http://www.linuxfoundation.org/en/Net
 W:     http://patchwork.ozlabs.org/project/netdev/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
 S:     Maintained
 F:     net/
 F:     include/net/
@@ -5532,6 +5532,7 @@ F:        include/media/*7146*
 
 SAMSUNG AUDIO (ASoC) DRIVERS
 M:     Jassi Brar <jassisinghbrar@gmail.com>
+M:     Sangbeom Kim <sbkim73@samsung.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
 F:     sound/soc/samsung
@@ -7087,7 +7088,7 @@ S:        Supported
 F:     drivers/mmc/host/vub300.c
 
 W1 DALLAS'S 1-WIRE BUS
-M:     Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+M:     Evgeniy Polyakov <zbr@ioremap.net>
 S:     Maintained
 F:     Documentation/w1/
 F:     drivers/w1/
index 788511f86a6233ff6a24a4704a556dd4a7fba14f..c3e90c530a654e4fe55f880ce47c513a9dd439e0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = "Divemaster Edition"
 
 # *DOCUMENTATION*
index 086aba284df25ba314e246eb2e975291b4191dfa..e77d77cd07b8b8bfc69277d32ae5782344c9320a 100644 (file)
 #define UAC_NOFIX                      2
 #define UAC_SIGBUS                     4
 
-
-#ifdef __KERNEL__
-
-/* This is the shift that is applied to the UAC bits as stored in the
-   per-thread flags.  See thread_info.h.  */
-#define UAC_SHIFT                      6
-
-#endif
-
 #endif /* __ASM_ALPHA_SYSINFO_H */
index 6f32f9c84a2ddde12a85826a4339f70c857ee2f0..ff73db022342f789124b484618251e8e6235c1c6 100644 (file)
@@ -74,9 +74,9 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_NEED_RESCHED       3       /* rescheduling necessary */
 #define TIF_POLLING_NRFLAG     8       /* poll_idle is polling NEED_RESCHED */
 #define TIF_DIE_IF_KERNEL      9       /* dik recursion lock */
-#define TIF_UAC_NOPRINT                10      /* see sysinfo.h */
-#define TIF_UAC_NOFIX          11
-#define TIF_UAC_SIGBUS         12
+#define TIF_UAC_NOPRINT                10      /* ! Preserve sequence of following */
+#define TIF_UAC_NOFIX          11      /* ! flags as they match            */
+#define TIF_UAC_SIGBUS         12      /* ! userspace part of 'osf_sysinfo' */
 #define TIF_MEMDIE             13      /* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK    14      /* restore signal mask in do_signal */
 #define TIF_FREEZE             16      /* is freezing for suspend */
@@ -97,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define _TIF_ALLWORK_MASK      (_TIF_WORK_MASK         \
                                 | _TIF_SYSCALL_TRACE)
 
-#define ALPHA_UAC_SHIFT                10
+#define ALPHA_UAC_SHIFT                TIF_UAC_NOPRINT
 #define ALPHA_UAC_MASK         (1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \
                                 1 << TIF_UAC_SIGBUS)
 
index 326f0a2d56e52922920aad724dfec0e78487986a..01e8715e26d9306148a8706d046cf863015c1721 100644 (file)
@@ -42,6 +42,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/sysinfo.h>
+#include <asm/thread_info.h>
 #include <asm/hwrpb.h>
 #include <asm/processor.h>
 
@@ -633,9 +634,10 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
        case GSI_UACPROC:
                if (nbytes < sizeof(unsigned int))
                        return -EINVAL;
-               w = (current_thread_info()->flags >> UAC_SHIFT) & UAC_BITMASK;
-               if (put_user(w, (unsigned int __user *)buffer))
-                       return -EFAULT;
+               w = (current_thread_info()->flags >> ALPHA_UAC_SHIFT) &
+                       UAC_BITMASK;
+               if (put_user(w, (unsigned int __user *)buffer))
+                       return -EFAULT;
                return 1;
 
        case GSI_PROC_TYPE:
@@ -756,8 +758,8 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
                        case SSIN_UACPROC:
                        again:
                                old = current_thread_info()->flags;
-                               new = old & ~(UAC_BITMASK << UAC_SHIFT);
-                               new = new | (w & UAC_BITMASK) << UAC_SHIFT;
+                               new = old & ~(UAC_BITMASK << ALPHA_UAC_SHIFT);
+                               new = new | (w & UAC_BITMASK) << ALPHA_UAC_SHIFT;
                                if (cmpxchg(&current_thread_info()->flags,
                                            old, new) != old)
                                        goto again;
index b9c28f3f19560340fdbc343d5ca90875ffc82661..6acea1f96de394d05ef0e13dbea2a29d85a7e0ee 100644 (file)
@@ -360,7 +360,7 @@ sys_call_table:
        .quad sys_newuname
        .quad sys_nanosleep                     /* 340 */
        .quad sys_mremap
-       .quad sys_nfsservctl
+       .quad sys_ni_syscall                    /* old nfsservctl */
        .quad sys_setresuid
        .quad sys_getresuid
        .quad sys_pciconfig_read                /* 345 */
index b6f61d9a5a1b5279bf8576267b5a788f9f1cfe40..672ae95db5c3177aedc1dfdb3825a490b31e9fc2 100644 (file)
@@ -82,7 +82,7 @@ asmlinkage void mmc_loader(unsigned char *buf, unsigned long len)
 
 
        /* Disable clock to MMC hardware block */
-       __raw_writel(__raw_readl(SMSTPCR3) & (1 << 12), SMSTPCR3);
+       __raw_writel(__raw_readl(SMSTPCR3) | (1 << 12), SMSTPCR3);
 
        mmc_update_progress(MMC_PROGRESS_DONE);
 }
index d403a8b24d7f322ad8b53c58ce047a06dc2bc0f8..d279294f238116fa4fbd3c71931149528516b483 100644 (file)
@@ -85,7 +85,7 @@ asmlinkage void mmc_loader(unsigned short *buf, unsigned long len)
                goto err;
 
         /* Disable clock to SDHI1 hardware block */
-        __raw_writel(__raw_readl(SMSTPCR3) & (1 << 13), SMSTPCR3);
+        __raw_writel(__raw_readl(SMSTPCR3) | (1 << 13), SMSTPCR3);
 
        mmc_update_progress(MMC_PROGRESS_DONE);
 
index 80f7896cc0164ad6f74469f1e777b90c1f306443..9943e9e74a1bda0b17bc6e1ee93ca3ab2b80f970 100644 (file)
                CALL(sys_ni_syscall)            /* vm86 */
                CALL(sys_ni_syscall)            /* was sys_query_module */
                CALL(sys_poll)
-               CALL(sys_nfsservctl)
+               CALL(sys_ni_syscall)            /* was nfsservctl */
 /* 170 */      CALL(sys_setresgid16)
                CALL(sys_getresgid16)
                CALL(sys_prctl)
index ce5c2513c6ce93f62f37356d55f2835fbf3a6332..cdfdd624d21dd27719c156639588c06e95be1b9d 100644 (file)
@@ -341,6 +341,7 @@ static struct platform_device mipidsi0_device = {
 static struct sh_mobile_sdhi_info sdhi0_info = {
        .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
        .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
+       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT,
        .tmio_caps      = MMC_CAP_SD_HIGHSPEED,
        .tmio_ocr_mask  = MMC_VDD_27_28 | MMC_VDD_28_29,
 };
@@ -382,7 +383,7 @@ void ag5evm_sdhi1_set_pwr(struct platform_device *pdev, int state)
 }
 
 static struct sh_mobile_sdhi_info sh_sdhi1_info = {
-       .tmio_flags     = TMIO_MMC_WRPROTECT_DISABLE,
+       .tmio_flags     = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
        .tmio_caps      = MMC_CAP_NONREMOVABLE | MMC_CAP_SDIO_IRQ,
        .tmio_ocr_mask  = MMC_VDD_32_33 | MMC_VDD_33_34,
        .set_pwr        = ag5evm_sdhi1_set_pwr,
index 9e0856b2f9e9fcc8adb4455267e0ec4964efa56d..523f608eb8cf0109609188b4589f56e03bdc4a36 100644 (file)
@@ -1412,6 +1412,7 @@ static void __init ap4evb_init(void)
        fsi_init_pm_clock();
        sh7372_pm_init();
        pm_clk_add(&fsi_device.dev, "spu2");
+       pm_clk_add(&lcdc1_device.dev, "hdmi");
 }
 
 static void __init ap4evb_timer_init(void)
index d41c01f83f152f75589cc228a186a6f98da59a5e..17c19dc2560431b99699e28569f52f68f9202cc8 100644 (file)
@@ -641,6 +641,8 @@ static struct usbhs_private usbhs0_private = {
                },
                .driver_param = {
                        .buswait_bwait  = 4,
+                       .d0_tx_id       = SHDMA_SLAVE_USB0_TX,
+                       .d1_rx_id       = SHDMA_SLAVE_USB0_RX,
                },
        },
 };
@@ -810,6 +812,8 @@ static struct usbhs_private usbhs1_private = {
                        .buswait_bwait  = 4,
                        .pipe_type      = usbhs1_pipe_cfg,
                        .pipe_size      = ARRAY_SIZE(usbhs1_pipe_cfg),
+                       .d0_tx_id       = SHDMA_SLAVE_USB1_TX,
+                       .d1_rx_id       = SHDMA_SLAVE_USB1_RX,
                },
        },
 };
@@ -1588,6 +1592,7 @@ static void __init mackerel_init(void)
        hdmi_init_pm_clock();
        sh7372_pm_init();
        pm_clk_add(&fsi_device.dev, "spu2");
+       pm_clk_add(&hdmi_lcdc_device.dev, "hdmi");
 }
 
 static void __init mackerel_timer_init(void)
index 6b1619a65dbac16bf4535f106ab3b2c03016ed9f..66975921e6467b363e037ca355c82952a17a300a 100644 (file)
@@ -503,16 +503,17 @@ static struct clk *late_main_clks[] = {
        &sh7372_fsidivb_clk,
 };
 
-enum { MSTP001,
+enum { MSTP001, MSTP000,
        MSTP131, MSTP130,
        MSTP129, MSTP128, MSTP127, MSTP126, MSTP125,
        MSTP118, MSTP117, MSTP116, MSTP113,
        MSTP106, MSTP101, MSTP100,
        MSTP223,
-       MSTP218, MSTP217, MSTP216,
-       MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
-       MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312,
-       MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403,
+       MSTP218, MSTP217, MSTP216, MSTP214, MSTP208, MSTP207,
+       MSTP206, MSTP205, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
+       MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312,
+       MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP407, MSTP406,
+       MSTP405, MSTP404, MSTP403, MSTP400,
        MSTP_NR };
 
 #define MSTP(_parent, _reg, _bit, _flags) \
@@ -520,6 +521,7 @@ enum { MSTP001,
 
 static struct clk mstp_clks[MSTP_NR] = {
        [MSTP001] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR0, 1, 0), /* IIC2 */
+       [MSTP000] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR0, 0, 0), /* MSIOF0 */
        [MSTP131] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 31, 0), /* VEU3 */
        [MSTP130] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 30, 0), /* VEU2 */
        [MSTP129] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 29, 0), /* VEU1 */
@@ -538,14 +540,16 @@ static struct clk mstp_clks[MSTP_NR] = {
        [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */
        [MSTP217] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */
        [MSTP216] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 16, 0), /* DMAC3 */
+       [MSTP214] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 14, 0), /* USBDMAC */
+       [MSTP208] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 8, 0), /* MSIOF1 */
        [MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */
        [MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */
+       [MSTP205] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 5, 0), /* MSIOF2 */
        [MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */
        [MSTP203] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 3, 0), /* SCIFA1 */
        [MSTP202] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 2, 0), /* SCIFA2 */
        [MSTP201] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 1, 0), /* SCIFA3 */
        [MSTP200] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 0, 0), /* SCIFA4 */
-       [MSTP329] = MSTP(&r_clk, SMSTPCR3, 29, 0), /* CMT10 */
        [MSTP328] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR3, 28, 0), /* FSI2 */
        [MSTP323] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 23, 0), /* IIC1 */
        [MSTP322] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 22, 0), /* USB0 */
@@ -557,8 +561,12 @@ static struct clk mstp_clks[MSTP_NR] = {
        [MSTP413] = MSTP(&pllc1_div2_clk, SMSTPCR4, 13, 0), /* HDMI */
        [MSTP411] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 11, 0), /* IIC3 */
        [MSTP410] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 10, 0), /* IIC4 */
+       [MSTP407] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 7, 0), /* USB-DMAC1 */
        [MSTP406] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 6, 0), /* USB1 */
+       [MSTP405] = MSTP(&r_clk, SMSTPCR4, 5, 0), /* CMT4 */
+       [MSTP404] = MSTP(&r_clk, SMSTPCR4, 4, 0), /* CMT3 */
        [MSTP403] = MSTP(&r_clk, SMSTPCR4, 3, 0), /* KEYSC */
+       [MSTP400] = MSTP(&r_clk, SMSTPCR4, 0, 0), /* CMT2 */
 };
 
 static struct clk_lookup lookups[] = {
@@ -609,6 +617,7 @@ static struct clk_lookup lookups[] = {
 
        /* MSTP32 clocks */
        CLKDEV_DEV_ID("i2c-sh_mobile.2", &mstp_clks[MSTP001]), /* IIC2 */
+       CLKDEV_DEV_ID("spi_sh_msiof.0", &mstp_clks[MSTP000]), /* MSIOF0 */
        CLKDEV_DEV_ID("uio_pdrv_genirq.4", &mstp_clks[MSTP131]), /* VEU3 */
        CLKDEV_DEV_ID("uio_pdrv_genirq.3", &mstp_clks[MSTP130]), /* VEU2 */
        CLKDEV_DEV_ID("uio_pdrv_genirq.2", &mstp_clks[MSTP129]), /* VEU1 */
@@ -629,14 +638,16 @@ static struct clk_lookup lookups[] = {
        CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* DMAC1 */
        CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[MSTP217]), /* DMAC2 */
        CLKDEV_DEV_ID("sh-dma-engine.2", &mstp_clks[MSTP216]), /* DMAC3 */
+       CLKDEV_DEV_ID("sh-dma-engine.3", &mstp_clks[MSTP214]), /* USB-DMAC0 */
+       CLKDEV_DEV_ID("spi_sh_msiof.1", &mstp_clks[MSTP208]), /* MSIOF1 */
        CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */
        CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP206]), /* SCIFB */
+       CLKDEV_DEV_ID("spi_sh_msiof.2", &mstp_clks[MSTP205]), /* MSIOF2 */
        CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */
        CLKDEV_DEV_ID("sh-sci.1", &mstp_clks[MSTP203]), /* SCIFA1 */
        CLKDEV_DEV_ID("sh-sci.2", &mstp_clks[MSTP202]), /* SCIFA2 */
        CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP201]), /* SCIFA3 */
        CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP200]), /* SCIFA4 */
-       CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */
        CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */
        CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */
        CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */
@@ -650,11 +661,17 @@ static struct clk_lookup lookups[] = {
        CLKDEV_DEV_ID("sh-mobile-hdmi", &mstp_clks[MSTP413]), /* HDMI */
        CLKDEV_DEV_ID("i2c-sh_mobile.3", &mstp_clks[MSTP411]), /* IIC3 */
        CLKDEV_DEV_ID("i2c-sh_mobile.4", &mstp_clks[MSTP410]), /* IIC4 */
+       CLKDEV_DEV_ID("sh-dma-engine.4", &mstp_clks[MSTP407]), /* USB-DMAC1 */
        CLKDEV_DEV_ID("r8a66597_hcd.1", &mstp_clks[MSTP406]), /* USB1 */
        CLKDEV_DEV_ID("r8a66597_udc.1", &mstp_clks[MSTP406]), /* USB1 */
        CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[MSTP406]), /* USB1 */
+       CLKDEV_DEV_ID("sh_cmt.4", &mstp_clks[MSTP405]), /* CMT4 */
+       CLKDEV_DEV_ID("sh_cmt.3", &mstp_clks[MSTP404]), /* CMT3 */
        CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */
+       CLKDEV_DEV_ID("sh_cmt.2", &mstp_clks[MSTP400]), /* CMT2 */
 
+       CLKDEV_ICK_ID("hdmi", "sh_mobile_lcdc_fb.1",
+                     &div6_reparent_clks[DIV6_HDMI]),
        CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]),
        CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]),
        CLKDEV_ICK_ID("ickb", "sh_fsi2", &div6_reparent_clks[DIV6_FSIB]),
index 6db2ccabc2bf95fe0cf4d99a90b382d0271d116c..61a846bb30f2034ec3ae69253aea2d2d6d695aa8 100644 (file)
@@ -365,7 +365,7 @@ void __init sh73a0_clock_init(void)
        __raw_writel(0x108, SD2CKCR);
 
        /* detect main clock parent */
-       switch ((__raw_readl(CKSCR) >> 24) & 0x03) {
+       switch ((__raw_readl(CKSCR) >> 28) & 0x03) {
        case 0:
                main_clk.parent = &sh73a0_extal1_clk;
                break;
index ce595cee86cd50e313984131ab5329578b98612d..24e63a85e6699a51660352601ca8d378e0570f36 100644 (file)
@@ -459,6 +459,10 @@ enum {
        SHDMA_SLAVE_SDHI2_TX,
        SHDMA_SLAVE_MMCIF_RX,
        SHDMA_SLAVE_MMCIF_TX,
+       SHDMA_SLAVE_USB0_TX,
+       SHDMA_SLAVE_USB0_RX,
+       SHDMA_SLAVE_USB1_TX,
+       SHDMA_SLAVE_USB1_RX,
 };
 
 extern struct clk sh7372_extal1_clk;
index 3b28743c77eb738f502737f434f0aee4c0f7cdf3..739315e30eb9f5c9f5b06d474df30602157dec3c 100644 (file)
@@ -379,7 +379,7 @@ enum {
        /* BBIF2 */
        VPU,
        TSIF1,
-       _3DG_SGX530,
+       /* 3DG */
        _2DDMAC,
        IIC2_ALI2, IIC2_TACKI2, IIC2_WAITI2, IIC2_DTEI2,
        IPMMU_IPMMUR, IPMMU_IPMMUR2,
@@ -436,7 +436,7 @@ static struct intc_vect intcs_vectors[] = {
        /* BBIF2 */
        INTCS_VECT(VPU, 0x980),
        INTCS_VECT(TSIF1, 0x9a0),
-       INTCS_VECT(_3DG_SGX530, 0x9e0),
+       /* 3DG */
        INTCS_VECT(_2DDMAC, 0xa00),
        INTCS_VECT(IIC2_ALI2, 0xa80), INTCS_VECT(IIC2_TACKI2, 0xaa0),
        INTCS_VECT(IIC2_WAITI2, 0xac0), INTCS_VECT(IIC2_DTEI2, 0xae0),
@@ -521,7 +521,7 @@ static struct intc_mask_reg intcs_mask_registers[] = {
            RTDMAC_1_DEI3, RTDMAC_1_DEI2, RTDMAC_1_DEI1, RTDMAC_1_DEI0 } },
        { 0xffd20198, 0xffd201d8, 8, /* IMR6SA / IMCR6SA */
          { 0, 0, MSIOF, 0,
-           _3DG_SGX530, 0, 0, 0 } },
+           0, 0, 0, 0 } },
        { 0xffd2019c, 0xffd201dc, 8, /* IMR7SA / IMCR7SA */
          { 0, TMU_TUNI2, TMU_TUNI1, TMU_TUNI0,
            0, 0, 0, 0 } },
@@ -561,7 +561,6 @@ static struct intc_prio_reg intcs_prio_registers[] = {
                                              TMU_TUNI2, TSIF1 } },
        { 0xffd2001c, 0, 16, 4, /* IPRHS */ { 0, 0, VEU, BEU } },
        { 0xffd20020, 0, 16, 4, /* IPRIS */ { 0, MSIOF, TSIF0, IIC0 } },
-       { 0xffd20024, 0, 16, 4, /* IPRJS */ { 0, _3DG_SGX530, 0, 0 } },
        { 0xffd20028, 0, 16, 4, /* IPRKS */ { 0, 0, LMB, 0 } },
        { 0xffd2002c, 0, 16, 4, /* IPRLS */ { IPMMU, 0, 0, 0 } },
        { 0xffd20030, 0, 16, 4, /* IPRMS */ { IIC2, 0, 0, 0 } },
index 79f0413d8725cb8af1bbc5e5ba9d91e7f29feca8..2d9b1b1a25387fe1b4605663a5dc2a9135c6f5a5 100644 (file)
@@ -169,35 +169,35 @@ static struct platform_device scif6_device = {
 };
 
 /* CMT */
-static struct sh_timer_config cmt10_platform_data = {
-       .name = "CMT10",
-       .channel_offset = 0x10,
-       .timer_bit = 0,
+static struct sh_timer_config cmt2_platform_data = {
+       .name = "CMT2",
+       .channel_offset = 0x40,
+       .timer_bit = 5,
        .clockevent_rating = 125,
        .clocksource_rating = 125,
 };
 
-static struct resource cmt10_resources[] = {
+static struct resource cmt2_resources[] = {
        [0] = {
-               .name   = "CMT10",
-               .start  = 0xe6138010,
-               .end    = 0xe613801b,
+               .name   = "CMT2",
+               .start  = 0xe6130040,
+               .end    = 0xe613004b,
                .flags  = IORESOURCE_MEM,
        },
        [1] = {
-               .start  = evt2irq(0x0b00), /* CMT1_CMT10 */
+               .start  = evt2irq(0x0b80), /* CMT2 */
                .flags  = IORESOURCE_IRQ,
        },
 };
 
-static struct platform_device cmt10_device = {
+static struct platform_device cmt2_device = {
        .name           = "sh_cmt",
-       .id             = 10,
+       .id             = 2,
        .dev = {
-               .platform_data  = &cmt10_platform_data,
+               .platform_data  = &cmt2_platform_data,
        },
-       .resource       = cmt10_resources,
-       .num_resources  = ARRAY_SIZE(cmt10_resources),
+       .resource       = cmt2_resources,
+       .num_resources  = ARRAY_SIZE(cmt2_resources),
 };
 
 /* TMU */
@@ -602,6 +602,150 @@ static struct platform_device dma2_device = {
        },
 };
 
+/*
+ * USB-DMAC
+ */
+
+unsigned int usbts_shift[] = {3, 4, 5};
+
+enum {
+       XMIT_SZ_8BYTE           = 0,
+       XMIT_SZ_16BYTE          = 1,
+       XMIT_SZ_32BYTE          = 2,
+};
+
+#define USBTS_INDEX2VAL(i) (((i) & 3) << 6)
+
+static const struct sh_dmae_channel sh7372_usb_dmae_channels[] = {
+       {
+               .offset = 0,
+       }, {
+               .offset = 0x20,
+       },
+};
+
+/* USB DMAC0 */
+static const struct sh_dmae_slave_config sh7372_usb_dmae0_slaves[] = {
+       {
+               .slave_id       = SHDMA_SLAVE_USB0_TX,
+               .chcr           = USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+       }, {
+               .slave_id       = SHDMA_SLAVE_USB0_RX,
+               .chcr           = USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+       },
+};
+
+static struct sh_dmae_pdata usb_dma0_platform_data = {
+       .slave          = sh7372_usb_dmae0_slaves,
+       .slave_num      = ARRAY_SIZE(sh7372_usb_dmae0_slaves),
+       .channel        = sh7372_usb_dmae_channels,
+       .channel_num    = ARRAY_SIZE(sh7372_usb_dmae_channels),
+       .ts_low_shift   = 6,
+       .ts_low_mask    = 0xc0,
+       .ts_high_shift  = 0,
+       .ts_high_mask   = 0,
+       .ts_shift       = usbts_shift,
+       .ts_shift_num   = ARRAY_SIZE(usbts_shift),
+       .dmaor_init     = DMAOR_DME,
+       .chcr_offset    = 0x14,
+       .chcr_ie_bit    = 1 << 5,
+       .dmaor_is_32bit = 1,
+       .needs_tend_set = 1,
+       .no_dmars       = 1,
+};
+
+static struct resource sh7372_usb_dmae0_resources[] = {
+       {
+               /* Channel registers and DMAOR */
+               .start  = 0xe68a0020,
+               .end    = 0xe68a0064 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               /* VCR/SWR/DMICR */
+               .start  = 0xe68a0000,
+               .end    = 0xe68a0014 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               /* IRQ for channels */
+               .start  = evt2irq(0x0a00),
+               .end    = evt2irq(0x0a00),
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device usb_dma0_device = {
+       .name           = "sh-dma-engine",
+       .id             = 3,
+       .resource       = sh7372_usb_dmae0_resources,
+       .num_resources  = ARRAY_SIZE(sh7372_usb_dmae0_resources),
+       .dev            = {
+               .platform_data  = &usb_dma0_platform_data,
+       },
+};
+
+/* USB DMAC1 */
+static const struct sh_dmae_slave_config sh7372_usb_dmae1_slaves[] = {
+       {
+               .slave_id       = SHDMA_SLAVE_USB1_TX,
+               .chcr           = USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+       }, {
+               .slave_id       = SHDMA_SLAVE_USB1_RX,
+               .chcr           = USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+       },
+};
+
+static struct sh_dmae_pdata usb_dma1_platform_data = {
+       .slave          = sh7372_usb_dmae1_slaves,
+       .slave_num      = ARRAY_SIZE(sh7372_usb_dmae1_slaves),
+       .channel        = sh7372_usb_dmae_channels,
+       .channel_num    = ARRAY_SIZE(sh7372_usb_dmae_channels),
+       .ts_low_shift   = 6,
+       .ts_low_mask    = 0xc0,
+       .ts_high_shift  = 0,
+       .ts_high_mask   = 0,
+       .ts_shift       = usbts_shift,
+       .ts_shift_num   = ARRAY_SIZE(usbts_shift),
+       .dmaor_init     = DMAOR_DME,
+       .chcr_offset    = 0x14,
+       .chcr_ie_bit    = 1 << 5,
+       .dmaor_is_32bit = 1,
+       .needs_tend_set = 1,
+       .no_dmars       = 1,
+};
+
+static struct resource sh7372_usb_dmae1_resources[] = {
+       {
+               /* Channel registers and DMAOR */
+               .start  = 0xe68c0020,
+               .end    = 0xe68c0064 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               /* VCR/SWR/DMICR */
+               .start  = 0xe68c0000,
+               .end    = 0xe68c0014 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               /* IRQ for channels */
+               .start  = evt2irq(0x1d00),
+               .end    = evt2irq(0x1d00),
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device usb_dma1_device = {
+       .name           = "sh-dma-engine",
+       .id             = 4,
+       .resource       = sh7372_usb_dmae1_resources,
+       .num_resources  = ARRAY_SIZE(sh7372_usb_dmae1_resources),
+       .dev            = {
+               .platform_data  = &usb_dma1_platform_data,
+       },
+};
+
 /* VPU */
 static struct uio_info vpu_platform_data = {
        .name = "VPU5HG",
@@ -818,7 +962,7 @@ static struct platform_device *sh7372_early_devices[] __initdata = {
        &scif4_device,
        &scif5_device,
        &scif6_device,
-       &cmt10_device,
+       &cmt2_device,
        &tmu00_device,
        &tmu01_device,
 };
@@ -829,6 +973,8 @@ static struct platform_device *sh7372_late_devices[] __initdata = {
        &dma0_device,
        &dma1_device,
        &dma2_device,
+       &usb_dma0_device,
+       &usb_dma1_device,
        &vpu_device,
        &veu0_device,
        &veu1_device,
index b6b40974495469314b3bec6faaf2cbeae981ea25..9a6a53854911de58ec87005e5c60442e5702f9d7 100644 (file)
@@ -622,7 +622,8 @@ static struct dev_pm_domain omap_device_pm_domain = {
                SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume,
                                   _od_runtime_idle)
                USE_PLATFORM_PM_SLEEP_OPS
-               SET_SYSTEM_SLEEP_PM_OPS(_od_suspend_noirq, _od_resume_noirq)
+               .suspend_noirq = _od_suspend_noirq,
+               .resume_noirq = _od_resume_noirq,
        }
 };
 
index c7fd394d28a4d4e968addf61f659e0e781d21b60..6eba53530d1c552ea9813a16419a679c27075415 100644 (file)
@@ -158,7 +158,7 @@ sys_call_table:
        .long   sys_sched_rr_get_interval
        .long   sys_nanosleep
        .long   sys_poll
-       .long   sys_nfsservctl          /* 145 */
+       .long   sys_ni_syscall          /* 145 was nfsservctl */
        .long   sys_setresgid
        .long   sys_getresgid
        .long   sys_prctl
index 225d311c97013048a6920b578b1956c57f6d57a2..e4137297b790067b7a803d0c8dabd48dd808700c 100644 (file)
@@ -1543,7 +1543,7 @@ ENTRY(_sys_call_table)
        .long _sys_ni_syscall   /* for vm86 */
        .long _sys_ni_syscall   /* old "query_module" */
        .long _sys_ni_syscall   /* sys_poll */
-       .long _sys_nfsservctl
+       .long _sys_ni_syscall   /* old nfsservctl */
        .long _sys_setresgid    /* setresgid16 */       /* 170 */
        .long _sys_getresgid    /* getresgid16 */
        .long _sys_prctl
index 1161883eb582964fb5ad583e20b5bf327dd83097..592fbe9dfb629cc84bf126855fd80df16aa53bff 100644 (file)
@@ -771,7 +771,7 @@ sys_call_table:
        .long sys_ni_syscall    /* sys_vm86 */
        .long sys_ni_syscall    /* Old sys_query_module */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* old nfsservctl */
        .long sys_setresgid16   /* 170 */
        .long sys_getresgid16
        .long sys_prctl
index 84fed7e91ada221772f5afa8f3e4d9707a899116..c3ea4694fbaf879c5b83e20b286e22587361dd22 100644 (file)
@@ -714,7 +714,7 @@ sys_call_table:
        .long sys_ni_syscall    /* sys_vm86 */
        .long sys_ni_syscall    /* Old sys_query_module */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* Old nfsservctl */
        .long sys_setresgid16   /* 170 */
        .long sys_getresgid16
        .long sys_prctl
diff --git a/arch/cris/include/asm/serial.h b/arch/cris/include/asm/serial.h
new file mode 100644 (file)
index 0000000..af7535a
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _ASM_SERIAL_H
+#define _ASM_SERIAL_H
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ */
+#define BASE_BAUD (1843200 / 16)
+
+#endif /* _ASM_SERIAL_H */
index 017d6d7b784fa5324f78dd5b4c11d431e8127278..5ba23f715ea5e7f0f3dfcc0cab6a860519e41ec6 100644 (file)
@@ -1358,7 +1358,7 @@ sys_call_table:
        .long sys_ni_syscall    /* for vm86 */
        .long sys_ni_syscall    /* Old sys_query_module */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* Old nfsservctl */
        .long sys_setresgid16   /* 170 */
        .long sys_getresgid16
        .long sys_prctl
index f4b2e67bcc34dcd93340baf8bfcf4cdb35176be7..4be2ea2fbe26a4ab5c823db232f84cf7484e0a6a 100644 (file)
@@ -183,7 +183,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
        .long SYMBOL_NAME(sys_ni_syscall)       /* for vm86 */
        .long SYMBOL_NAME(sys_ni_syscall)       /* sys_query_module */
        .long SYMBOL_NAME(sys_poll)
-       .long SYMBOL_NAME(sys_nfsservctl)
+       .long SYMBOL_NAME(sys_ni_syscall)       /* old nfsservctl */
        .long SYMBOL_NAME(sys_setresgid16)      /* 170 */
        .long SYMBOL_NAME(sys_getresgid16)
        .long SYMBOL_NAME(sys_prctl)
index 97dd2abdeb1a3b2cd66a409d1dc4bf8ec0345b55..198c753d1006a5abf0bc0c1b669ba53ebc754b34 100644 (file)
@@ -1614,7 +1614,7 @@ sys_call_table:
        data8 sys_sched_get_priority_min
        data8 sys_sched_rr_get_interval
        data8 sys_nanosleep
-       data8 sys_nfsservctl
+       data8 sys_ni_syscall                    // old nfsservctl
        data8 sys_prctl                         // 1170
        data8 sys_getpagesize
        data8 sys_mmap2
index 528f2e6ad06421e20d9c6f8d30e5d1c24f726350..f365c19795ef52df3da1429fc83fed3237fc09aa 100644 (file)
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
        .long sys_tas                   /* vm86 syscall holder */
        .long sys_ni_syscall            /* query_module syscall holder */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall            /* was nfsservctl */
        .long sys_setresgid             /* 170 */
        .long sys_getresgid
        .long sys_prctl
index 31d5570d65676784afd92a95665073e9179bb2e2..89f201434b5aa2575a5837bc6730346e06cc7e30 100644 (file)
@@ -162,7 +162,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
        pgdat->node_mem_map + (__pfn - pgdat->node_start_pfn);          \
 })
 #define page_to_pfn(_page) ({                                          \
-       struct page *__p = (_page);                                     \
+       const struct page *__p = (_page);                               \
        struct pglist_data *pgdat;                                      \
        pgdat = &pg_data_map[page_to_nid(__p)];                         \
        ((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn;          \
index 00d1452f9571073f98d4c5968fdb870f35daeb03..c468f2edaa85ec0cd2356e392a0f0e7efddb8218 100644 (file)
@@ -189,7 +189,7 @@ ENTRY(sys_call_table)
        .long sys_getpagesize
        .long sys_ni_syscall            /* old "query_module" */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall            /* old nfsservctl */
        .long sys_setresgid16           /* 170 */
        .long sys_getresgid16
        .long sys_prctl
index d915a122c86592fa29b8ebf395c4902881be5539..8789daa2a346683d43e7b42efe7935494b332e14 100644 (file)
@@ -173,7 +173,7 @@ ENTRY(sys_call_table)
        .long sys_ni_syscall            /* sys_vm86 */
        .long sys_ni_syscall            /* Old sys_query_module */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall            /* old nfsservctl */
        .long sys_setresgid             /* 170 */
        .long sys_getresgid
        .long sys_prctl
index e521420a45a54896f16c6caa0e73ebd39718fd4b..865bc7a6f5a19ed32d150886658894b215f3617d 100644 (file)
@@ -424,7 +424,7 @@ einval:     li      v0, -ENOSYS
        sys     sys_getresuid           3
        sys     sys_ni_syscall          0       /* was sys_query_module */
        sys     sys_poll                3
-       sys     sys_nfsservctl          3
+       sys     sys_ni_syscall          0       /* was nfsservctl */
        sys     sys_setresgid           3       /* 4190 */
        sys     sys_getresgid           3
        sys     sys_prctl               5
index 85874d6a8a709e31b8964a9ae65ee4326f4f6143..fb7334bea7316aedd8071ff41d63178539504cc8 100644 (file)
@@ -299,7 +299,7 @@ sys_call_table:
        PTR     sys_ni_syscall                  /* 5170, was get_kernel_syms */
        PTR     sys_ni_syscall                  /* was query_module */
        PTR     sys_quotactl
-       PTR     sys_nfsservctl
+       PTR     sys_ni_syscall                  /* was nfsservctl */
        PTR     sys_ni_syscall                  /* res. for getpmsg */
        PTR     sys_ni_syscall                  /* 5175  for putpmsg */
        PTR     sys_ni_syscall                  /* res. for afs_syscall */
index b85842fc87ae60d65d9f1599a275de3d07c284d1..f9296e894e465f83cccd9195df9baeb5d43dee17 100644 (file)
@@ -294,7 +294,7 @@ EXPORT(sysn32_call_table)
        PTR     sys_ni_syscall                  /* 6170, was get_kernel_syms */
        PTR     sys_ni_syscall                  /* was query_module */
        PTR     sys_quotactl
-       PTR     compat_sys_nfsservctl
+       PTR     sys_ni_syscall                  /* was nfsservctl */
        PTR     sys_ni_syscall                  /* res. for getpmsg */
        PTR     sys_ni_syscall                  /* 6175  for putpmsg */
        PTR     sys_ni_syscall                  /* res. for afs_syscall */
index 46c4763edf211b7d3af7c630ad76dba949e33787..4d7c9827706f3d8dc821cfb258a12c22a384ddc2 100644 (file)
@@ -392,7 +392,7 @@ sys_call_table:
        PTR     sys_getresuid
        PTR     sys_ni_syscall                  /* was query_module */
        PTR     sys_poll
-       PTR     compat_sys_nfsservctl
+       PTR     sys_ni_syscall                  /* was nfsservctl */
        PTR     sys_setresgid                   /* 4190 */
        PTR     sys_getresgid
        PTR     sys_prctl
index ae435e1d56694f71d5471b4000c130d7ea4c30d2..3e3620d9fc45eb2636aaae6e9a1d3f5e8f9762ed 100644 (file)
@@ -589,7 +589,7 @@ ENTRY(sys_call_table)
        .long sys_ni_syscall    /* vm86 */
        .long sys_ni_syscall    /* Old sys_query_module */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* was nfsservctl */
        .long sys_setresgid16   /* 170 */
        .long sys_getresgid16
        .long sys_prctl
index e66366fd2abc8cbf55e74455c9e26f22bd89aedb..3735abd7f8f6f067488b20de9b44ab487ac2dff2 100644 (file)
        ENTRY_SAME(ni_syscall)          /* query_module */
        ENTRY_SAME(poll)
        /* structs contain pointers and an in_addr... */
-       ENTRY_COMP(nfsservctl)
+       ENTRY_SAME(ni_syscall)          /* was nfsservctl */
        ENTRY_SAME(setresgid)           /* 170 */
        ENTRY_SAME(getresgid)
        ENTRY_SAME(prctl)
index f6736b7da463508e2b9777e851d2bdd2c597fec8..fa0d27a400de917ee45a7a85dd75c2dbbaabb745 100644 (file)
@@ -171,7 +171,7 @@ SYSCALL_SPU(setresuid)
 SYSCALL_SPU(getresuid)
 SYSCALL(ni_syscall)
 SYSCALL_SPU(poll)
-COMPAT_SYS(nfsservctl)
+SYSCALL(ni_syscall)
 SYSCALL_SPU(setresgid)
 SYSCALL_SPU(getresgid)
 COMPAT_SYS_SPU(prctl)
index 2de8551df40fc2ab6fc9bd0570a6fe83bf9994bf..c65f75aa7ff7be40f4f16bda3fcfd39d3eb45fd5 100644 (file)
@@ -54,6 +54,7 @@
 #define ODSR_CLEAR             0x1c00
 #define LTLEECSR_ENABLE_ALL    0xFFC000FC
 #define ESCSR_CLEAR            0x07120204
+#define IECSR_CLEAR            0x80000000
 
 #define RIO_PORT1_EDCSR                0x0640
 #define RIO_PORT2_EDCSR                0x0680
@@ -1089,11 +1090,11 @@ static void port_error_handler(struct rio_mport *port, int offset)
 
        if (offset == 0) {
                out_be32((u32 *)(rio_regs_win + RIO_PORT1_EDCSR), 0);
-               out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), 0);
+               out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), IECSR_CLEAR);
                out_be32((u32 *)(rio_regs_win + RIO_ESCSR), ESCSR_CLEAR);
        } else {
                out_be32((u32 *)(rio_regs_win + RIO_PORT2_EDCSR), 0);
-               out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), 0);
+               out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), IECSR_CLEAR);
                out_be32((u32 *)(rio_regs_win + RIO_PORT2_ESCSR), ESCSR_CLEAR);
        }
 }
index 08ab9aa6a0d574027f0dcd48ea9840bf2984ee96..7526db6bf501f8a5394f56dfc0cadfc343ef82e6 100644 (file)
@@ -665,12 +665,6 @@ ENTRY(sys32_poll_wrapper)
        lgfr    %r4,%r4                 # long
        jg      sys_poll                # branch to system call
 
-ENTRY(compat_sys_nfsservctl_wrapper)
-       lgfr    %r2,%r2                 # int
-       llgtr   %r3,%r3                 # struct compat_nfsctl_arg*
-       llgtr   %r4,%r4                 # union compat_nfsctl_res*
-       jg      compat_sys_nfsservctl   # branch to system call
-
 ENTRY(sys32_setresgid16_wrapper)
        llgfr   %r2,%r2                 # __kernel_old_gid_emu31_t
        llgfr   %r3,%r3                 # __kernel_old_gid_emu31_t
index 068f8465c4ee07c0156978517f9efe88dcc5e96a..f297456dba7a9eb3282fe0377097e99f97a488d0 100644 (file)
@@ -396,17 +396,19 @@ static __init void detect_machine_facilities(void)
 static __init void rescue_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
+       unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
        /*
-        * Move the initrd right behind the bss section in case it starts
-        * within the bss section. So we don't overwrite it when the bss
-        * section gets cleared.
+        * Just like in case of IPL from VM reader we make sure there is a
+        * gap of 4MB between end of kernel and start of initrd.
+        * That way we can also be sure that saving an NSS will succeed,
+        * which however only requires different segments.
         */
        if (!INITRD_START || !INITRD_SIZE)
                return;
-       if (INITRD_START >= (unsigned long) __bss_stop)
+       if (INITRD_START >= min_initrd_addr)
                return;
-       memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE);
-       INITRD_START = (unsigned long) __bss_stop;
+       memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+       INITRD_START = min_initrd_addr;
 #endif
 }
 
index 04361d5a42794524419bd1898ac78ef755491449..48c710206366308270e70a846c268c9e5cc8ea9f 100644 (file)
@@ -1220,7 +1220,7 @@ static int __init reipl_fcp_init(void)
        /* sysfs: create fcp kset for mixing attr group and bin attrs */
        reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
                                             &reipl_kset->kobj);
-       if (!reipl_kset) {
+       if (!reipl_fcp_kset) {
                free_page((unsigned long) reipl_block_fcp);
                return -ENOMEM;
        }
@@ -1618,7 +1618,8 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
 
 static void stop_run(struct shutdown_trigger *trigger)
 {
-       if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+       if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+           strcmp(trigger->name, ON_RESTART_STR) == 0)
                disabled_wait((unsigned long) __builtin_return_address(0));
        while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
                cpu_relax();
@@ -1717,7 +1718,7 @@ static void do_panic(void)
 /* on restart */
 
 static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
-       &reipl_action};
+       &stop_action};
 
 static ssize_t on_restart_show(struct kobject *kobj,
                               struct kobj_attribute *attr, char *page)
index 6ee39ef8fe4add9f0c47dd33ba52aa3050842ead..73eb08c874fb450ef6ba0464d4aa4ca845bb6911 100644 (file)
@@ -177,7 +177,7 @@ SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper)   /* 165 old get
 NI_SYSCALL                                                     /* for vm86 */
 NI_SYSCALL                                                     /* old sys_query_module */
 SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper)
-SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper)
+NI_SYSCALL                                                     /* old nfsservctl */
 SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper)      /* 170 old setresgid16 syscall */
 SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper)      /* old getresgid16 syscall */
 SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper)
index b97baf81a87bb8afda8a4d1558786cd14afa4f0b..2d3679b2447f262c4c83eebe5e6ff89890c2e0a2 100644 (file)
@@ -123,7 +123,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 struct perf_event;
 struct perf_sample_data;
 
-extern void ptrace_triggered(struct perf_event *bp, int nmi,
+extern void ptrace_triggered(struct perf_event *bp,
                      struct perf_sample_data *data, struct pt_regs *regs);
 
 #define task_pt_regs(task) \
index e915deafac89ba8af2774272a3ec04545bae9c1a..05559295d2ca8e8fa184e50bcca0a4b224e9d308 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/serial_sci.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/dma-mapping.h>
 #include <linux/sh_timer.h>
 #include <linux/sh_dma.h>
 
index 32114e0941ae7616aae53de88a9f79b895e973db..db4ecd731a003792783ddbc524fcc60f020d4bf7 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/atomic.h>
 #include <asm/smp.h>
 
-static void (*pm_idle)(void);
+void (*pm_idle)(void);
 
 static int hlt_counter;
 
index 39b051de4c7ca4d6c7d13fb983232bf2be1f26f6..293e39c59c00522c08e99a9877fcafc36836152b 100644 (file)
@@ -185,7 +185,7 @@ ENTRY(sys_call_table)
        .long sys_ni_syscall    /* vm86 */
        .long sys_ni_syscall    /* old "query_module" */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* was nfsservctl */
        .long sys_setresgid16   /* 170 */
        .long sys_getresgid16
        .long sys_prctl
index 089c4d825d087a27585a295ee7509d409a76b786..ceb34b94afa9cb08c6f396cbd29c84d937375c2d 100644 (file)
@@ -189,7 +189,7 @@ sys_call_table:
        .long sys_ni_syscall    /* vm86 */
        .long sys_ni_syscall    /* old "query_module" */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* was nfsservctl */
        .long sys_setresgid16           /* 170 */
        .long sys_getresgid16
        .long sys_prctl
index d9006f8ffc142532d99b0ff539f88831f4027999..7bbef95c9d1b4eb8daa1ffd055d57ea5e8bc6af2 100644 (file)
@@ -316,6 +316,35 @@ static int handle_unaligned_ins(insn_size_t instruction, struct pt_regs *regs,
                        break;
                }
                break;
+
+       case 9: /* mov.w @(disp,PC),Rn */
+               srcu = (unsigned char __user *)regs->pc;
+               srcu += 4;
+               srcu += (instruction & 0x00FF) << 1;
+               dst = (unsigned char *)rn;
+               *(unsigned long *)dst = 0;
+
+#if !defined(__LITTLE_ENDIAN__)
+               dst += 2;
+#endif
+
+               if (ma->from(dst, srcu, 2))
+                       goto fetch_fault;
+               sign_extend(2, dst);
+               ret = 0;
+               break;
+
+       case 0xd: /* mov.l @(disp,PC),Rn */
+               srcu = (unsigned char __user *)(regs->pc & ~0x3);
+               srcu += 4;
+               srcu += (instruction & 0x00FF) << 2;
+               dst = (unsigned char *)rn;
+               *(unsigned long *)dst = 0;
+
+               if (ma->from(dst, srcu, 4))
+                       goto fetch_fault;
+               ret = 0;
+               break;
        }
        return ret;
 
@@ -466,6 +495,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
                case 0x0500: /* mov.w @(disp,Rm),R0 */
                        goto simple;
                case 0x0B00: /* bf   lab - no delayslot*/
+                       ret = 0;
                        break;
                case 0x0F00: /* bf/s lab */
                        ret = handle_delayslot(regs, instruction, ma);
@@ -479,6 +509,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
                        }
                        break;
                case 0x0900: /* bt   lab - no delayslot */
+                       ret = 0;
                        break;
                case 0x0D00: /* bt/s lab */
                        ret = handle_delayslot(regs, instruction, ma);
@@ -494,6 +525,9 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
                }
                break;
 
+       case 0x9000: /* mov.w @(disp,Rm),Rn */
+               goto simple;
+
        case 0xA000: /* bra label */
                ret = handle_delayslot(regs, instruction, ma);
                if (ret==0)
@@ -507,6 +541,9 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
                        regs->pc += SH_PC_12BIT_OFFSET(instruction);
                }
                break;
+
+       case 0xD000: /* mov.l @(disp,Rm),Rn */
+               goto simple;
        }
        return ret;
 
index a1607d1803547cd075090f6835839e0f3236910e..69914d748130a9b151d4f4c8f17a496fe2d5d39e 100644 (file)
@@ -45,6 +45,19 @@ typedef struct {
        int                     si_mask;
 } __siginfo32_t;
 
+#define __SIGC_MAXWIN  7
+
+typedef struct {
+       unsigned long locals[8];
+       unsigned long ins[8];
+} __siginfo_reg_window;
+
+typedef struct {
+       int                     wsaved;
+       __siginfo_reg_window    reg_window[__SIGC_MAXWIN];
+       unsigned long           rwbuf_stkptrs[__SIGC_MAXWIN];
+} __siginfo_rwin_t;
+
 #ifdef CONFIG_SPARC64
 typedef struct {
        unsigned   int si_float_regs [64];
@@ -73,6 +86,7 @@ struct sigcontext {
                unsigned long   ss_size;
        }                       sigc_stack;
        unsigned long           sigc_mask;
+       __siginfo_rwin_t *      sigc_rwin_save;
 };
 
 #else
index b90b4a1d070ad3e33b6b2ba6e3de59327c59677d..cb85458f89d2c495dd80e6f649e865eea6d71929 100644 (file)
@@ -32,6 +32,7 @@ obj-$(CONFIG_SPARC32)   += sun4m_irq.o sun4c_irq.o sun4d_irq.o
 
 obj-y                   += process_$(BITS).o
 obj-y                   += signal_$(BITS).o
+obj-y                   += sigutil_$(BITS).o
 obj-$(CONFIG_SPARC32)   += ioport.o
 obj-y                   += setup_$(BITS).o
 obj-y                   += idprom.o
index 100b9c204e78f5143c13ae9beb3a6256d28165e5..42851122bbd9c35b184a22454540fbc182e1c7e9 100644 (file)
@@ -88,7 +88,7 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int)
 #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu)
 
 /* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
-#define SUN4D_IPI_IRQ 14
+#define SUN4D_IPI_IRQ 13
 
 extern void sun4d_ipi_interrupt(void);
 
index 75fad425e249bc40559f98d14ead5699839bbbb8..1ba95aff5d5958cdeb1142a24f18ea0c63717722 100644 (file)
@@ -29,6 +29,8 @@
 #include <asm/visasm.h>
 #include <asm/compat_signal.h>
 
+#include "sigutil.h"
+
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 /* This magic should be in g_upper[0] for all upper parts
@@ -44,14 +46,14 @@ typedef struct {
 struct signal_frame32 {
        struct sparc_stackf32   ss;
        __siginfo32_t           info;
-       /* __siginfo_fpu32_t * */ u32 fpu_save;
+       /* __siginfo_fpu_t * */ u32 fpu_save;
        unsigned int            insns[2];
        unsigned int            extramask[_COMPAT_NSIG_WORDS - 1];
        unsigned int            extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
        /* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
        siginfo_extra_v8plus_t  v8plus;
-       __siginfo_fpu_t         fpu_state;
-};
+       /* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
 
 typedef struct compat_siginfo{
        int si_signo;
@@ -110,18 +112,14 @@ struct rt_signal_frame32 {
        compat_siginfo_t        info;
        struct pt_regs32        regs;
        compat_sigset_t         mask;
-       /* __siginfo_fpu32_t * */ u32 fpu_save;
+       /* __siginfo_fpu_t * */ u32 fpu_save;
        unsigned int            insns[2];
        stack_t32               stack;
        unsigned int            extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
        /* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
        siginfo_extra_v8plus_t  v8plus;
-       __siginfo_fpu_t         fpu_state;
-};
-
-/* Align macros */
-#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame32) + 15) & (~15)))
-#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame32) + 15) & (~15)))
+       /* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 {
@@ -192,30 +190,13 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
        return 0;
 }
 
-static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err;
-       
-       err = __get_user(fprs, &fpu->si_fprs);
-       fprs_write(0);
-       regs->tstate &= ~TSTATE_PEF;
-       if (fprs & FPRS_DL)
-               err |= copy_from_user(fpregs, &fpu->si_float_regs[0], (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], (sizeof(unsigned int) * 32));
-       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       current_thread_info()->fpsaved[0] |= fprs;
-       return err;
-}
-
 void do_sigreturn32(struct pt_regs *regs)
 {
        struct signal_frame32 __user *sf;
+       compat_uptr_t fpu_save;
+       compat_uptr_t rwin_save;
        unsigned int psr;
-       unsigned pc, npc, fpu_save;
+       unsigned pc, npc;
        sigset_t set;
        unsigned seta[_COMPAT_NSIG_WORDS];
        int err, i;
@@ -273,8 +254,13 @@ void do_sigreturn32(struct pt_regs *regs)
        pt_regs_clear_syscall(regs);
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state32(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, compat_ptr(fpu_save));
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(compat_ptr(rwin_save)))
+                       goto segv;
+       }
        err |= __get_user(seta[0], &sf->info.si_mask);
        err |= copy_from_user(seta+1, &sf->extramask,
                              (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
@@ -300,7 +286,9 @@ segv:
 asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 {
        struct rt_signal_frame32 __user *sf;
-       unsigned int psr, pc, npc, fpu_save, u_ss_sp;
+       unsigned int psr, pc, npc, u_ss_sp;
+       compat_uptr_t fpu_save;
+       compat_uptr_t rwin_save;
        mm_segment_t old_fs;
        sigset_t set;
        compat_sigset_t seta;
@@ -359,8 +347,8 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
        pt_regs_clear_syscall(regs);
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state32(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, compat_ptr(fpu_save));
        err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t));
        err |= __get_user(u_ss_sp, &sf->stack.ss_sp);
        st.ss_sp = compat_ptr(u_ss_sp);
@@ -376,6 +364,12 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
        do_sigaltstack((stack_t __user *) &st, NULL, (unsigned long)sf);
        set_fs(old_fs);
        
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(compat_ptr(rwin_save)))
+                       goto segv;
+       }
+
        switch (_NSIG_WORDS) {
                case 4: set.sig[3] = seta.sig[6] + (((long)seta.sig[7]) << 32);
                case 3: set.sig[2] = seta.sig[4] + (((long)seta.sig[5]) << 32);
@@ -433,26 +427,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
        return (void __user *) sp;
 }
 
-static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err = 0;
-       
-       fprs = current_thread_info()->fpsaved[0];
-       if (fprs & FPRS_DL)
-               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-                                   (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-                                   (sizeof(unsigned int) * 32));
-       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       err |= __put_user(fprs, &fpu->si_fprs);
-
-       return err;
-}
-
 /* The I-cache flush instruction only works in the primary ASI, which
  * right now is the nucleus, aka. kernel space.
  *
@@ -515,18 +489,23 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                         int signo, sigset_t *oldset)
 {
        struct signal_frame32 __user *sf;
+       int i, err, wsaved;
+       void __user *tail;
        int sigframe_size;
        u32 psr;
-       int i, err;
        unsigned int seta[_COMPAT_NSIG_WORDS];
 
        /* 1. Make sure everything is clean */
        synchronize_user_stack();
        save_and_clear_fpu();
        
-       sigframe_size = SF_ALIGNEDSZ;
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
+
+       sigframe_size = sizeof(*sf);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
 
        sf = (struct signal_frame32 __user *)
                get_sigframe(&ka->sa, regs, sigframe_size);
@@ -534,8 +513,7 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
        if (invalid_frame_pointer(sf, sigframe_size))
                goto sigill;
 
-       if (get_thread_wsaved() != 0)
-               goto sigill;
+       tail = (sf + 1);
 
        /* 2. Save the current process state */
        if (test_thread_flag(TIF_32BIT)) {
@@ -560,11 +538,22 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                          &sf->v8plus.asi);
 
        if (psr & PSR_EF) {
-               err |= save_fpu_state32(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user((u64)fp, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user((u64)rwp, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
 
        switch (_NSIG_WORDS) {
        case 4: seta[7] = (oldset->sig[3] >> 32);
@@ -580,10 +569,21 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
        err |= __copy_to_user(sf->extramask, seta + 1,
                              (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
 
-       err |= copy_in_user((u32 __user *)sf,
-                           (u32 __user *)(regs->u_regs[UREG_FP]),
-                           sizeof(struct reg_window32));
-       
+       if (!wsaved) {
+               err |= copy_in_user((u32 __user *)sf,
+                                   (u32 __user *)(regs->u_regs[UREG_FP]),
+                                   sizeof(struct reg_window32));
+       } else {
+               struct reg_window *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               for (i = 0; i < 8; i++)
+                       err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+               for (i = 0; i < 6; i++)
+                       err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+               err |= __put_user(rp->ins[6], &sf->ss.fp);
+               err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+       }       
        if (err)
                goto sigsegv;
 
@@ -613,7 +613,6 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/
                if (err)
                        goto sigsegv;
-
                flush_signal_insns(address);
        }
        return 0;
@@ -632,18 +631,23 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                            siginfo_t *info)
 {
        struct rt_signal_frame32 __user *sf;
+       int i, err, wsaved;
+       void __user *tail;
        int sigframe_size;
        u32 psr;
-       int i, err;
        compat_sigset_t seta;
 
        /* 1. Make sure everything is clean */
        synchronize_user_stack();
        save_and_clear_fpu();
        
-       sigframe_size = RT_ALIGNEDSZ;
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
+
+       sigframe_size = sizeof(*sf);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
 
        sf = (struct rt_signal_frame32 __user *)
                get_sigframe(&ka->sa, regs, sigframe_size);
@@ -651,8 +655,7 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
        if (invalid_frame_pointer(sf, sigframe_size))
                goto sigill;
 
-       if (get_thread_wsaved() != 0)
-               goto sigill;
+       tail = (sf + 1);
 
        /* 2. Save the current process state */
        if (test_thread_flag(TIF_32BIT)) {
@@ -677,11 +680,22 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                          &sf->v8plus.asi);
 
        if (psr & PSR_EF) {
-               err |= save_fpu_state32(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user((u64)fp, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user((u64)rwp, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
 
        /* Update the siginfo structure.  */
        err |= copy_siginfo_to_user32(&sf->info, info);
@@ -703,9 +717,21 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
        }
        err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t));
 
-       err |= copy_in_user((u32 __user *)sf,
-                           (u32 __user *)(regs->u_regs[UREG_FP]),
-                           sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= copy_in_user((u32 __user *)sf,
+                                   (u32 __user *)(regs->u_regs[UREG_FP]),
+                                   sizeof(struct reg_window32));
+       } else {
+               struct reg_window *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               for (i = 0; i < 8; i++)
+                       err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+               for (i = 0; i < 6; i++)
+                       err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+               err |= __put_user(rp->ins[6], &sf->ss.fp);
+               err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+       }
        if (err)
                goto sigsegv;
        
index 5e5c5fd03783c997f5c344025e8f4784182a0ddc..04ede8f04add3f397ca46c8ebee249d85c92213c 100644 (file)
@@ -26,6 +26,8 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>    /* flush_sig_insns */
 
+#include "sigutil.h"
+
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
@@ -39,8 +41,8 @@ struct signal_frame {
        unsigned long           insns[2] __attribute__ ((aligned (8)));
        unsigned int            extramask[_NSIG_WORDS - 1];
        unsigned int            extra_size; /* Should be 0 */
-       __siginfo_fpu_t         fpu_state;
-};
+       __siginfo_rwin_t __user *rwin_save;
+} __attribute__((aligned(8)));
 
 struct rt_signal_frame {
        struct sparc_stackf     ss;
@@ -51,8 +53,8 @@ struct rt_signal_frame {
        unsigned int            insns[2];
        stack_t                 stack;
        unsigned int            extra_size; /* Should be 0 */
-       __siginfo_fpu_t         fpu_state;
-};
+       __siginfo_rwin_t __user *rwin_save;
+} __attribute__((aligned(8)));
 
 /* Align macros */
 #define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
@@ -79,43 +81,13 @@ asmlinkage int sys_sigsuspend(old_sigset_t set)
        return _sigpause_common(set);
 }
 
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       int err;
-#ifdef CONFIG_SMP
-       if (test_tsk_thread_flag(current, TIF_USEDFPU))
-               regs->psr &= ~PSR_EF;
-#else
-       if (current == last_task_used_math) {
-               last_task_used_math = NULL;
-               regs->psr &= ~PSR_EF;
-       }
-#endif
-       set_used_math();
-       clear_tsk_thread_flag(current, TIF_USEDFPU);
-
-       if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
-               return -EFAULT;
-
-       err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
-                              (sizeof(unsigned long) * 32));
-       err |= __get_user(current->thread.fsr, &fpu->si_fsr);
-       err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-       if (current->thread.fpqdepth != 0)
-               err |= __copy_from_user(&current->thread.fpqueue[0],
-                                       &fpu->si_fpqueue[0],
-                                       ((sizeof(unsigned long) +
-                                       (sizeof(unsigned long *)))*16));
-       return err;
-}
-
 asmlinkage void do_sigreturn(struct pt_regs *regs)
 {
        struct signal_frame __user *sf;
        unsigned long up_psr, pc, npc;
        sigset_t set;
        __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
        int err;
 
        /* Always make any pending restarted system calls return -EINTR */
@@ -150,9 +122,11 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
        pt_regs_clear_syscall(regs);
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-
        if (fpu_save)
                err |= restore_fpu_state(regs, fpu_save);
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (rwin_save)
+               err |= restore_rwin_state(rwin_save);
 
        /* This is pretty much atomic, no amount locking would prevent
         * the races which exist anyways.
@@ -180,6 +154,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
        struct rt_signal_frame __user *sf;
        unsigned int psr, pc, npc;
        __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
        mm_segment_t old_fs;
        sigset_t set;
        stack_t st;
@@ -207,8 +182,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
        pt_regs_clear_syscall(regs);
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-
-       if (fpu_save)
+       if (!err && fpu_save)
                err |= restore_fpu_state(regs, fpu_save);
        err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
        
@@ -228,6 +202,12 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
        do_sigaltstack((const stack_t __user *) &st, NULL, (unsigned long)sf);
        set_fs(old_fs);
 
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(rwin_save))
+                       goto segv;
+       }
+
        sigdelsetmask(&set, ~_BLOCKABLE);
        spin_lock_irq(&current->sighand->siglock);
        current->blocked = set;
@@ -280,53 +260,23 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re
        return (void __user *) sp;
 }
 
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       int err = 0;
-#ifdef CONFIG_SMP
-       if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
-               put_psr(get_psr() | PSR_EF);
-               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
-               regs->psr &= ~(PSR_EF);
-               clear_tsk_thread_flag(current, TIF_USEDFPU);
-       }
-#else
-       if (current == last_task_used_math) {
-               put_psr(get_psr() | PSR_EF);
-               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
-               last_task_used_math = NULL;
-               regs->psr &= ~(PSR_EF);
-       }
-#endif
-       err |= __copy_to_user(&fpu->si_float_regs[0],
-                             &current->thread.float_regs[0],
-                             (sizeof(unsigned long) * 32));
-       err |= __put_user(current->thread.fsr, &fpu->si_fsr);
-       err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-       if (current->thread.fpqdepth != 0)
-               err |= __copy_to_user(&fpu->si_fpqueue[0],
-                                     &current->thread.fpqueue[0],
-                                     ((sizeof(unsigned long) +
-                                     (sizeof(unsigned long *)))*16));
-       clear_used_math();
-       return err;
-}
-
 static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
                       int signo, sigset_t *oldset)
 {
        struct signal_frame __user *sf;
-       int sigframe_size, err;
+       int sigframe_size, err, wsaved;
+       void __user *tail;
 
        /* 1. Make sure everything is clean */
        synchronize_user_stack();
 
-       sigframe_size = SF_ALIGNEDSZ;
-       if (!used_math())
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = current_thread_info()->w_saved;
+
+       sigframe_size = sizeof(*sf);
+       if (used_math())
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
 
        sf = (struct signal_frame __user *)
                get_sigframe(&ka->sa, regs, sigframe_size);
@@ -334,8 +284,7 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
        if (invalid_frame_pointer(sf, sigframe_size))
                goto sigill_and_return;
 
-       if (current_thread_info()->w_saved != 0)
-               goto sigill_and_return;
+       tail = sf + 1;
 
        /* 2. Save the current process state */
        err = __copy_to_user(&sf->info.si_regs, regs, sizeof(struct pt_regs));
@@ -343,17 +292,34 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
        err |= __put_user(0, &sf->extra_size);
 
        if (used_math()) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user(fp, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user(rwp, &sf->rwin_save);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
 
        err |= __put_user(oldset->sig[0], &sf->info.si_mask);
        err |= __copy_to_user(sf->extramask, &oldset->sig[1],
                              (_NSIG_WORDS - 1) * sizeof(unsigned int));
-       err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-                             sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+                                     sizeof(struct reg_window32));
+       } else {
+               struct reg_window32 *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+       }
        if (err)
                goto sigsegv;
        
@@ -399,21 +365,24 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
                          int signo, sigset_t *oldset, siginfo_t *info)
 {
        struct rt_signal_frame __user *sf;
-       int sigframe_size;
+       int sigframe_size, wsaved;
+       void __user *tail;
        unsigned int psr;
        int err;
 
        synchronize_user_stack();
-       sigframe_size = RT_ALIGNEDSZ;
-       if (!used_math())
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = current_thread_info()->w_saved;
+       sigframe_size = sizeof(*sf);
+       if (used_math())
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
        sf = (struct rt_signal_frame __user *)
                get_sigframe(&ka->sa, regs, sigframe_size);
        if (invalid_frame_pointer(sf, sigframe_size))
                goto sigill;
-       if (current_thread_info()->w_saved != 0)
-               goto sigill;
 
+       tail = sf + 1;
        err  = __put_user(regs->pc, &sf->regs.pc);
        err |= __put_user(regs->npc, &sf->regs.npc);
        err |= __put_user(regs->y, &sf->regs.y);
@@ -425,11 +394,21 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
        err |= __put_user(0, &sf->extra_size);
 
        if (psr & PSR_EF) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user(fp, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user(rwp, &sf->rwin_save);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
        err |= __copy_to_user(&sf->mask, &oldset->sig[0], sizeof(sigset_t));
        
        /* Setup sigaltstack */
@@ -437,8 +416,15 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
        err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags);
        err |= __put_user(current->sas_ss_size, &sf->stack.ss_size);
        
-       err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-                             sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+                                     sizeof(struct reg_window32));
+       } else {
+               struct reg_window32 *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+       }
 
        err |= copy_siginfo_to_user(&sf->info, info);
 
index 006fe4515886dc6ae2a7a8e6cc9b6df9c16fda46..47509df3b893acfb365ec503cbe00f2dabeea2ac 100644 (file)
@@ -34,6 +34,7 @@
 
 #include "entry.h"
 #include "systbls.h"
+#include "sigutil.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -236,7 +237,7 @@ struct rt_signal_frame {
        __siginfo_fpu_t __user  *fpu_save;
        stack_t                 stack;
        sigset_t                mask;
-       __siginfo_fpu_t         fpu_state;
+       __siginfo_rwin_t        *rwin_save;
 };
 
 static long _sigpause_common(old_sigset_t set)
@@ -266,33 +267,12 @@ asmlinkage long sys_sigsuspend(old_sigset_t set)
        return _sigpause_common(set);
 }
 
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err;
-
-       err = __get_user(fprs, &fpu->si_fprs);
-       fprs_write(0);
-       regs->tstate &= ~TSTATE_PEF;
-       if (fprs & FPRS_DL)
-               err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
-                              (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
-                              (sizeof(unsigned int) * 32));
-       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       current_thread_info()->fpsaved[0] |= fprs;
-       return err;
-}
-
 void do_rt_sigreturn(struct pt_regs *regs)
 {
        struct rt_signal_frame __user *sf;
        unsigned long tpc, tnpc, tstate;
        __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
        sigset_t set;
        int err;
 
@@ -325,8 +305,8 @@ void do_rt_sigreturn(struct pt_regs *regs)
        regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
 
        err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, fpu_save);
 
        err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
        err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf);
@@ -334,6 +314,12 @@ void do_rt_sigreturn(struct pt_regs *regs)
        if (err)
                goto segv;
 
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(rwin_save))
+                       goto segv;
+       }
+
        regs->tpc = tpc;
        regs->tnpc = tnpc;
 
@@ -351,34 +337,13 @@ segv:
 }
 
 /* Checks if the fp is valid */
-static int invalid_frame_pointer(void __user *fp, int fplen)
+static int invalid_frame_pointer(void __user *fp)
 {
        if (((unsigned long) fp) & 15)
                return 1;
        return 0;
 }
 
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err = 0;
-       
-       fprs = current_thread_info()->fpsaved[0];
-       if (fprs & FPRS_DL)
-               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-                                   (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-                                   (sizeof(unsigned int) * 32));
-       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       err |= __put_user(fprs, &fpu->si_fprs);
-
-       return err;
-}
-
 static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize)
 {
        unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
@@ -414,34 +379,48 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
               int signo, sigset_t *oldset, siginfo_t *info)
 {
        struct rt_signal_frame __user *sf;
-       int sigframe_size, err;
+       int wsaved, err, sf_size;
+       void __user *tail;
 
        /* 1. Make sure everything is clean */
        synchronize_user_stack();
        save_and_clear_fpu();
        
-       sigframe_size = sizeof(struct rt_signal_frame);
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
 
+       sf_size = sizeof(struct rt_signal_frame);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sf_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sf_size += sizeof(__siginfo_rwin_t);
        sf = (struct rt_signal_frame __user *)
-               get_sigframe(ka, regs, sigframe_size);
-       
-       if (invalid_frame_pointer (sf, sigframe_size))
-               goto sigill;
+               get_sigframe(ka, regs, sf_size);
 
-       if (get_thread_wsaved() != 0)
+       if (invalid_frame_pointer (sf))
                goto sigill;
 
+       tail = (sf + 1);
+
        /* 2. Save the current process state */
        err = copy_to_user(&sf->regs, regs, sizeof (*regs));
 
        if (current_thread_info()->fpsaved[0] & FPRS_FEF) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fpu_save = tail;
+               tail += sizeof(__siginfo_fpu_t);
+               err |= save_fpu_state(regs, fpu_save);
+               err |= __put_user((u64)fpu_save, &sf->fpu_save);
        } else {
                err |= __put_user(0, &sf->fpu_save);
        }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwin_save = tail;
+               tail += sizeof(__siginfo_rwin_t);
+               err |= save_rwin_state(wsaved, rwin_save);
+               err |= __put_user((u64)rwin_save, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
        
        /* Setup sigaltstack */
        err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp);
@@ -450,10 +429,17 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 
        err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t));
 
-       err |= copy_in_user((u64 __user *)sf,
-                           (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS),
-                           sizeof(struct reg_window));
+       if (!wsaved) {
+               err |= copy_in_user((u64 __user *)sf,
+                                   (u64 __user *)(regs->u_regs[UREG_FP] +
+                                                  STACK_BIAS),
+                                   sizeof(struct reg_window));
+       } else {
+               struct reg_window *rp;
 
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= copy_to_user(sf, rp, sizeof(struct reg_window));
+       }
        if (info)
                err |= copy_siginfo_to_user(&sf->info, info);
        else {
diff --git a/arch/sparc/kernel/sigutil.h b/arch/sparc/kernel/sigutil.h
new file mode 100644 (file)
index 0000000..d223aa4
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _SIGUTIL_H
+#define _SIGUTIL_H
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin);
+int restore_rwin_state(__siginfo_rwin_t __user *rp);
+
+#endif /* _SIGUTIL_H */
diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c
new file mode 100644 (file)
index 0000000..35c7897
--- /dev/null
@@ -0,0 +1,120 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       int err = 0;
+#ifdef CONFIG_SMP
+       if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
+               put_psr(get_psr() | PSR_EF);
+               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
+               regs->psr &= ~(PSR_EF);
+               clear_tsk_thread_flag(current, TIF_USEDFPU);
+       }
+#else
+       if (current == last_task_used_math) {
+               put_psr(get_psr() | PSR_EF);
+               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
+               last_task_used_math = NULL;
+               regs->psr &= ~(PSR_EF);
+       }
+#endif
+       err |= __copy_to_user(&fpu->si_float_regs[0],
+                             &current->thread.float_regs[0],
+                             (sizeof(unsigned long) * 32));
+       err |= __put_user(current->thread.fsr, &fpu->si_fsr);
+       err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+       if (current->thread.fpqdepth != 0)
+               err |= __copy_to_user(&fpu->si_fpqueue[0],
+                                     &current->thread.fpqueue[0],
+                                     ((sizeof(unsigned long) +
+                                     (sizeof(unsigned long *)))*16));
+       clear_used_math();
+       return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       int err;
+#ifdef CONFIG_SMP
+       if (test_tsk_thread_flag(current, TIF_USEDFPU))
+               regs->psr &= ~PSR_EF;
+#else
+       if (current == last_task_used_math) {
+               last_task_used_math = NULL;
+               regs->psr &= ~PSR_EF;
+       }
+#endif
+       set_used_math();
+       clear_tsk_thread_flag(current, TIF_USEDFPU);
+
+       if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
+               return -EFAULT;
+
+       err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
+                              (sizeof(unsigned long) * 32));
+       err |= __get_user(current->thread.fsr, &fpu->si_fsr);
+       err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+       if (current->thread.fpqdepth != 0)
+               err |= __copy_from_user(&current->thread.fpqueue[0],
+                                       &fpu->si_fpqueue[0],
+                                       ((sizeof(unsigned long) +
+                                       (sizeof(unsigned long *)))*16));
+       return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+       int i, err = __put_user(wsaved, &rwin->wsaved);
+
+       for (i = 0; i < wsaved; i++) {
+               struct reg_window32 *rp;
+               unsigned long fp;
+
+               rp = &current_thread_info()->reg_window[i];
+               fp = current_thread_info()->rwbuf_stkptrs[i];
+               err |= copy_to_user(&rwin->reg_window[i], rp,
+                                   sizeof(struct reg_window32));
+               err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+       }
+       return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+       struct thread_info *t = current_thread_info();
+       int i, wsaved, err;
+
+       __get_user(wsaved, &rp->wsaved);
+       if (wsaved > NSWINS)
+               return -EFAULT;
+
+       err = 0;
+       for (i = 0; i < wsaved; i++) {
+               err |= copy_from_user(&t->reg_window[i],
+                                     &rp->reg_window[i],
+                                     sizeof(struct reg_window32));
+               err |= __get_user(t->rwbuf_stkptrs[i],
+                                 &rp->rwbuf_stkptrs[i]);
+       }
+       if (err)
+               return err;
+
+       t->w_saved = wsaved;
+       synchronize_user_stack();
+       if (t->w_saved)
+               return -EFAULT;
+       return 0;
+
+}
diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c
new file mode 100644 (file)
index 0000000..e7dc508
--- /dev/null
@@ -0,0 +1,93 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       unsigned long *fpregs = current_thread_info()->fpregs;
+       unsigned long fprs;
+       int err = 0;
+       
+       fprs = current_thread_info()->fpsaved[0];
+       if (fprs & FPRS_DL)
+               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
+                                   (sizeof(unsigned int) * 32));
+       if (fprs & FPRS_DU)
+               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
+                                   (sizeof(unsigned int) * 32));
+       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+       err |= __put_user(fprs, &fpu->si_fprs);
+
+       return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       unsigned long *fpregs = current_thread_info()->fpregs;
+       unsigned long fprs;
+       int err;
+
+       err = __get_user(fprs, &fpu->si_fprs);
+       fprs_write(0);
+       regs->tstate &= ~TSTATE_PEF;
+       if (fprs & FPRS_DL)
+               err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
+                              (sizeof(unsigned int) * 32));
+       if (fprs & FPRS_DU)
+               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
+                              (sizeof(unsigned int) * 32));
+       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+       current_thread_info()->fpsaved[0] |= fprs;
+       return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+       int i, err = __put_user(wsaved, &rwin->wsaved);
+
+       for (i = 0; i < wsaved; i++) {
+               struct reg_window *rp = &current_thread_info()->reg_window[i];
+               unsigned long fp = current_thread_info()->rwbuf_stkptrs[i];
+
+               err |= copy_to_user(&rwin->reg_window[i], rp,
+                                   sizeof(struct reg_window));
+               err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+       }
+       return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+       struct thread_info *t = current_thread_info();
+       int i, wsaved, err;
+
+       __get_user(wsaved, &rp->wsaved);
+       if (wsaved > NSWINS)
+               return -EFAULT;
+
+       err = 0;
+       for (i = 0; i < wsaved; i++) {
+               err |= copy_from_user(&t->reg_window[i],
+                                     &rp->reg_window[i],
+                                     sizeof(struct reg_window));
+               err |= __get_user(t->rwbuf_stkptrs[i],
+                                 &rp->rwbuf_stkptrs[i]);
+       }
+       if (err)
+               return err;
+
+       set_thread_wsaved(wsaved);
+       synchronize_user_stack();
+       if (get_thread_wsaved())
+               return -EFAULT;
+       return 0;
+}
index 44e5faf1ad5f47dbee83ff3b00e77f479d907d0d..d97f3eb72e064d70cf1a6dbb9cbd8d8cac4c2d40 100644 (file)
@@ -81,7 +81,6 @@ SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4)
 SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5)
 SIGN2(sys32_bdflush, sys_bdflush, %o0, %o1)
 SIGN1(sys32_mlockall, sys_mlockall, %o0)
-SIGN1(sys32_nfsservctl, compat_sys_nfsservctl, %o0)
 SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1)
 SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1)
 SIGN1(sys32_io_submit, compat_sys_io_submit, %o1)
index 6e492d59f6b1d7fa2d9f7974527ad56b5d7cc967..09d8ec454450bcfcbfa3ef7e041305c5fe1a7501 100644 (file)
@@ -67,7 +67,7 @@ sys_call_table:
 /*235*/        .long sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
 /*240*/        .long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
 /*245*/        .long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
-/*250*/        .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+/*250*/        .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
 /*255*/        .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
 /*260*/        .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
 /*265*/        .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
index f566518483b5bcda9998d635adabc18a18354373..edbec45d46884c9e1d33ab92be65119f0137369d 100644 (file)
@@ -68,7 +68,7 @@ sys_call_table32:
        .word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys32_mlockall
 /*240*/        .word sys_munlockall, sys32_sched_setparam, sys32_sched_getparam, sys32_sched_setscheduler, sys32_sched_getscheduler
        .word sys_sched_yield, sys32_sched_get_priority_max, sys32_sched_get_priority_min, sys32_sched_rr_get_interval, compat_sys_nanosleep
-/*250*/        .word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
+/*250*/        .word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys_nis_syscall
        .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
 /*260*/        .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
        .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
@@ -145,7 +145,7 @@ sys_call_table:
        .word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
 /*240*/        .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
        .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
-/*250*/        .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+/*250*/        .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
        .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
 /*260*/        .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
        .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
index a0e866d233eeb833ef9740e1247962d298cc5971..54edb207ff3a0cb181811219cdc46500c8ba66d5 100644 (file)
@@ -672,7 +672,7 @@ ia32_sys_call_table:
        .quad sys32_vm86_warning        /* vm86 */ 
        .quad quiet_ni_syscall  /* query_module */
        .quad sys_poll
-       .quad compat_sys_nfsservctl
+       .quad quiet_ni_syscall /* old nfsservctl */
        .quad sys_setresgid16   /* 170 */
        .quad sys_getresgid16
        .quad sys_prctl
index d92641cc7accd79aa30b382314a92cee2a93827d..2010405734442f296432c047e2bbedf70b9614a5 100644 (file)
@@ -414,7 +414,7 @@ __SYSCALL(__NR_query_module, sys_ni_syscall)
 __SYSCALL(__NR_quotactl, sys_quotactl)
 
 #define __NR_nfsservctl                                180
-__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
 
 /* reserved for LiS/STREAMS */
 #define __NR_getpmsg                           181
index adc66c3a1fef2417be8741d334d5f8460c23ff10..34b18594e72467212f0e430c329e9fa523f3aa09 100644 (file)
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
            ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
            APIC_DM_INIT;
        uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-       mdelay(10);
 
        val = (1UL << UVH_IPI_INT_SEND_SHFT) |
            (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
index 08119a37e53c1f11b044ce5c865c7ad9339c4fd0..6b96110bb0c33078bdc05de2b727fc5bcb03eb71 100644 (file)
@@ -149,7 +149,6 @@ struct set_mtrr_data {
  */
 static int mtrr_rendezvous_handler(void *info)
 {
-#ifdef CONFIG_SMP
        struct set_mtrr_data *data = info;
 
        /*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
        } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
                mtrr_if->set_all();
        }
-#endif
        return 0;
 }
 
index 5c1a91974918d1b6104c9068ed4eef41ff6e30ab..f3f6f5344001ee47b17eb0ff029179751dfd5171 100644 (file)
@@ -54,6 +54,7 @@
 #include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
 #include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
 661:   pushl_cfi $do_general_protection
 662:
 .section .altinstructions,"a"
-       .balign 4
-       .long 661b
-       .long 663f
-       .word X86_FEATURE_XMM
-       .byte 662b-661b
-       .byte 664f-663f
+       altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
 .previous
 .section .altinstr_replacement,"ax"
 663:   pushl $do_simd_coprocessor_error
index fbb0a045a1a23bc9bdc2f9dd23c6c9673e2e13f7..bc19be332bc99544ccbe426975b8b0cdb136f293 100644 (file)
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
        .long ptregs_vm86
        .long sys_ni_syscall    /* Old sys_query_module */
        .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* Old nfsservctl */
        .long sys_setresgid16   /* 170 */
        .long sys_getresgid16
        .long sys_prctl
index 7000e74b30877bea018f46946f8d99b1275e7624..58425adc22c6ae156b01b916fe6703428b6391f9 100644 (file)
@@ -689,7 +689,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
                        irq_attr.trigger = 1;
                        irq_attr.polarity = 1;
                        io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
-               }
+               } else
+                       pentry->irq = 0; /* No irq */
+
                switch (pentry->type) {
                case SFI_DEV_TYPE_IPC:
                        /* ID as IRQ is a hack that will go away */
index 8b9940e78e2fa6da6751f115f4d8b0ce87f5b53a..7cce722667b83dd06b506d06b771a572c0843598 100644 (file)
@@ -161,13 +161,13 @@ restart:
        if (inbuf && inlen) {
                /* write data to EC */
                for (i = 0; i < inlen; i++) {
+                       pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
+                       outb(inbuf[i], 0x68);
                        if (wait_on_ibf(0x6c, 0)) {
                                printk(KERN_ERR "olpc-ec:  timeout waiting for"
                                                " EC accept data!\n");
                                goto err;
                        }
-                       pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
-                       outb(inbuf[i], 0x68);
                }
        }
        if (outbuf && outlen) {
index e2800affa754d66d8ac3533f25834f0412ec9aee..e354bceee0e0490d29940736b9716f5b2be32d62 100644 (file)
@@ -43,7 +43,7 @@ __kernel_vsyscall:
        .space 7,0x90
 
        /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
-       jmp .Lenter_kernel
+       int $0x80
        /* 16: System call normal return point is here! */
 VDSO32_SYSENTER_RETURN:        /* Symbol used by sysenter.c via vdso32-syms.h */
        pop %ebp
index a6f934f37f1abe463432eb43644dbce871280690..798ee6d285a10d9b350e11f1a53c212f8f72c803 100644 (file)
@@ -455,7 +455,7 @@ __SYSCALL(203, sys_reboot, 3)
 #define __NR_quotactl                          204
 __SYSCALL(204, sys_quotactl, 4)
 #define __NR_nfsservctl                        205
-__SYSCALL(205, sys_nfsservctl, 3)
+__SYSCALL(205, sys_ni_syscall, 0)
 #define __NR__sysctl                           206
 __SYSCALL(206, sys_sysctl, 1)
 #define __NR_bdflush                           207
index cf7a0c78805278e64f195921b3991c577691da66..65cd74832450507b9f7b1949b6ca85c648109ff1 100644 (file)
@@ -397,6 +397,7 @@ static int remove_nodes(struct device *dev,
 
 static int release_nodes(struct device *dev, struct list_head *first,
                         struct list_head *end, unsigned long flags)
+       __releases(&dev->devres_lock)
 {
        LIST_HEAD(todo);
        int cnt;
index 33e1bed68fddf771ae9b12a716114215b59b5f68..a4760e095ff51b36a58871cdf9329bf4573f6c1d 100644 (file)
@@ -376,7 +376,7 @@ int devtmpfs_mount(const char *mntdir)
        return err;
 }
 
-static __initdata DECLARE_COMPLETION(setup_done);
+static DECLARE_COMPLETION(setup_done);
 
 static int handle(const char *name, mode_t mode, struct device *dev)
 {
index bbb03e6f7255b715e894080e66a7ccfae34b2c24..06ed6b4e7df5ecc0d236cd73ee2690933f8e8ee7 100644 (file)
@@ -521,11 +521,6 @@ static int _request_firmware(const struct firmware **firmware_p,
        if (!firmware_p)
                return -EINVAL;
 
-       if (WARN_ON(usermodehelper_is_disabled())) {
-               dev_err(device, "firmware: %s will not be loaded\n", name);
-               return -EBUSY;
-       }
-
        *firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
        if (!firmware) {
                dev_err(device, "%s: kmalloc(struct firmware) failed\n",
@@ -539,6 +534,12 @@ static int _request_firmware(const struct firmware **firmware_p,
                return 0;
        }
 
+       if (WARN_ON(usermodehelper_is_disabled())) {
+               dev_err(device, "firmware: %s will not be loaded\n", name);
+               retval = -EBUSY;
+               goto out;
+       }
+
        if (uevent)
                dev_dbg(device, "firmware: requesting %s\n", name);
 
index 0cad9c7f6bb50f68bdb0a371e44d51530a02ec06..99a5272d7c2fde1d3b148fc0fd2150fc9ffb8121 100644 (file)
@@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(platform_bus);
 
 /**
  * arch_setup_pdev_archdata - Allow manipulation of archdata before its used
- * @dev: platform device
+ * @pdev: platform device
  *
  * This is called before platform_device_add() such that any pdev_archdata may
  * be setup before the platform_notifier is called.  So if a user needs to
index a846b2f95cfbc5bfbf253f5955da6cb8cd07804f..2c18d584066d561cc08f35f864a41d2c9471b890 100644 (file)
@@ -19,7 +19,7 @@
 
 struct pm_clk_data {
        struct list_head clock_list;
-       struct mutex lock;
+       spinlock_t lock;
 };
 
 enum pce_status {
@@ -73,9 +73,9 @@ int pm_clk_add(struct device *dev, const char *con_id)
                }
        }
 
-       mutex_lock(&pcd->lock);
+       spin_lock_irq(&pcd->lock);
        list_add_tail(&ce->node, &pcd->clock_list);
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irq(&pcd->lock);
        return 0;
 }
 
@@ -83,8 +83,8 @@ int pm_clk_add(struct device *dev, const char *con_id)
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
  *
- * This routine must be called under the mutex protecting the PM list of clocks
- * corresponding the the @ce's device.
+ * This routine must be called under the spinlock protecting the PM list of
+ * clocks corresponding the the @ce's device.
  */
 static void __pm_clk_remove(struct pm_clock_entry *ce)
 {
@@ -123,7 +123,7 @@ void pm_clk_remove(struct device *dev, const char *con_id)
        if (!pcd)
                return;
 
-       mutex_lock(&pcd->lock);
+       spin_lock_irq(&pcd->lock);
 
        list_for_each_entry(ce, &pcd->clock_list, node) {
                if (!con_id && !ce->con_id) {
@@ -137,7 +137,7 @@ void pm_clk_remove(struct device *dev, const char *con_id)
                }
        }
 
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irq(&pcd->lock);
 }
 
 /**
@@ -158,7 +158,7 @@ int pm_clk_init(struct device *dev)
        }
 
        INIT_LIST_HEAD(&pcd->clock_list);
-       mutex_init(&pcd->lock);
+       spin_lock_init(&pcd->lock);
        dev->power.subsys_data = pcd;
        return 0;
 }
@@ -181,12 +181,12 @@ void pm_clk_destroy(struct device *dev)
 
        dev->power.subsys_data = NULL;
 
-       mutex_lock(&pcd->lock);
+       spin_lock_irq(&pcd->lock);
 
        list_for_each_entry_safe_reverse(ce, c, &pcd->clock_list, node)
                __pm_clk_remove(ce);
 
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irq(&pcd->lock);
 
        kfree(pcd);
 }
@@ -220,13 +220,14 @@ int pm_clk_suspend(struct device *dev)
 {
        struct pm_clk_data *pcd = __to_pcd(dev);
        struct pm_clock_entry *ce;
+       unsigned long flags;
 
        dev_dbg(dev, "%s()\n", __func__);
 
        if (!pcd)
                return 0;
 
-       mutex_lock(&pcd->lock);
+       spin_lock_irqsave(&pcd->lock, flags);
 
        list_for_each_entry_reverse(ce, &pcd->clock_list, node) {
                if (ce->status == PCE_STATUS_NONE)
@@ -238,7 +239,7 @@ int pm_clk_suspend(struct device *dev)
                }
        }
 
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irqrestore(&pcd->lock, flags);
 
        return 0;
 }
@@ -251,13 +252,14 @@ int pm_clk_resume(struct device *dev)
 {
        struct pm_clk_data *pcd = __to_pcd(dev);
        struct pm_clock_entry *ce;
+       unsigned long flags;
 
        dev_dbg(dev, "%s()\n", __func__);
 
        if (!pcd)
                return 0;
 
-       mutex_lock(&pcd->lock);
+       spin_lock_irqsave(&pcd->lock, flags);
 
        list_for_each_entry(ce, &pcd->clock_list, node) {
                if (ce->status == PCE_STATUS_NONE)
@@ -269,7 +271,7 @@ int pm_clk_resume(struct device *dev)
                }
        }
 
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irqrestore(&pcd->lock, flags);
 
        return 0;
 }
@@ -344,6 +346,7 @@ int pm_clk_suspend(struct device *dev)
 {
        struct pm_clk_data *pcd = __to_pcd(dev);
        struct pm_clock_entry *ce;
+       unsigned long flags;
 
        dev_dbg(dev, "%s()\n", __func__);
 
@@ -351,12 +354,12 @@ int pm_clk_suspend(struct device *dev)
        if (!pcd || !dev->driver)
                return 0;
 
-       mutex_lock(&pcd->lock);
+       spin_lock_irqsave(&pcd->lock, flags);
 
        list_for_each_entry_reverse(ce, &pcd->clock_list, node)
                clk_disable(ce->clk);
 
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irqrestore(&pcd->lock, flags);
 
        return 0;
 }
@@ -369,6 +372,7 @@ int pm_clk_resume(struct device *dev)
 {
        struct pm_clk_data *pcd = __to_pcd(dev);
        struct pm_clock_entry *ce;
+       unsigned long flags;
 
        dev_dbg(dev, "%s()\n", __func__);
 
@@ -376,12 +380,12 @@ int pm_clk_resume(struct device *dev)
        if (!pcd || !dev->driver)
                return 0;
 
-       mutex_lock(&pcd->lock);
+       spin_lock_irqsave(&pcd->lock, flags);
 
        list_for_each_entry(ce, &pcd->clock_list, node)
                clk_enable(ce->clk);
 
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irqrestore(&pcd->lock, flags);
 
        return 0;
 }
index 873e2e4ac55f01795a3b174b103ce0960c484bc2..73b7b1a18fab6466db1ca9aa3f53a6bd4a420ca5 100644 (file)
@@ -15,6 +15,7 @@ MODULE_LICENSE("GPL");
 static int bcma_bus_match(struct device *dev, struct device_driver *drv);
 static int bcma_device_probe(struct device *dev);
 static int bcma_device_remove(struct device *dev);
+static int bcma_device_uevent(struct device *dev, struct kobj_uevent_env *env);
 
 static ssize_t manuf_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -49,6 +50,7 @@ static struct bus_type bcma_bus_type = {
        .match          = bcma_bus_match,
        .probe          = bcma_device_probe,
        .remove         = bcma_device_remove,
+       .uevent         = bcma_device_uevent,
        .dev_attrs      = bcma_device_attrs,
 };
 
@@ -227,6 +229,16 @@ static int bcma_device_remove(struct device *dev)
        return 0;
 }
 
+static int bcma_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+       struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+
+       return add_uevent_var(env,
+                             "MODALIAS=bcma:m%04Xid%04Xrev%02Xcl%02X",
+                             core->id.manuf, core->id.id,
+                             core->id.rev, core->id.class);
+}
+
 static int __init bcma_modinit(void)
 {
        int err;
index a5854735bb2e7882124ad1dc2c1b9c9296c93820..db7cb8111fbe58a89f21ee6ccea50467356bd38a 100644 (file)
@@ -63,6 +63,7 @@ static struct usb_device_id ath3k_table[] = {
        /* Atheros AR3011 with sflash firmware*/
        { USB_DEVICE(0x0CF3, 0x3002) },
        { USB_DEVICE(0x13d3, 0x3304) },
+       { USB_DEVICE(0x0930, 0x0215) },
 
        /* Atheros AR9285 Malbec with sflash firmware */
        { USB_DEVICE(0x03F0, 0x311D) },
index 91d13a9e8c657f9b838c56ecbae6608965342bc0..3ef476070bafcfa88505ffc9ed72c8eb3f301092 100644 (file)
@@ -106,6 +106,7 @@ static struct usb_device_id blacklist_table[] = {
        /* Atheros 3011 with sflash firmware */
        { USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE },
        { USB_DEVICE(0x13d3, 0x3304), .driver_info = BTUSB_IGNORE },
+       { USB_DEVICE(0x0930, 0x0215), .driver_info = BTUSB_IGNORE },
 
        /* Atheros AR9285 Malbec with sflash firmware */
        { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE },
@@ -256,7 +257,9 @@ static void btusb_intr_complete(struct urb *urb)
 
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
-               if (err != -EPERM)
+               /* -EPERM: urb is being killed;
+                * -ENODEV: device got disconnected */
+               if (err != -EPERM && err != -ENODEV)
                        BT_ERR("%s urb %p failed to resubmit (%d)",
                                                hdev->name, urb, -err);
                usb_unanchor_urb(urb);
@@ -341,7 +344,9 @@ static void btusb_bulk_complete(struct urb *urb)
 
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
-               if (err != -EPERM)
+               /* -EPERM: urb is being killed;
+                * -ENODEV: device got disconnected */
+               if (err != -EPERM && err != -ENODEV)
                        BT_ERR("%s urb %p failed to resubmit (%d)",
                                                hdev->name, urb, -err);
                usb_unanchor_urb(urb);
@@ -431,7 +436,9 @@ static void btusb_isoc_complete(struct urb *urb)
 
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
-               if (err != -EPERM)
+               /* -EPERM: urb is being killed;
+                * -ENODEV: device got disconnected */
+               if (err != -EPERM && err != -ENODEV)
                        BT_ERR("%s urb %p failed to resubmit (%d)",
                                                hdev->name, urb, -err);
                usb_unanchor_urb(urb);
index b6f8a65c9960dd9cb5892f27e93db398fe3fd90d..8eca55deb3a35c4a0a6ffc41130e3422d22bd926 100644 (file)
@@ -379,9 +379,8 @@ static int __init smd_pkt_init(void)
        for (i = 0; i < NUM_SMD_PKT_PORTS; ++i) {
                smd_pkt_devp[i] = kzalloc(sizeof(struct smd_pkt_dev),
                                          GFP_KERNEL);
-               if (IS_ERR(smd_pkt_devp[i])) {
-                       r = PTR_ERR(smd_pkt_devp[i]);
-                       pr_err("kmalloc() failed %d\n", r);
+               if (!smd_pkt_devp[i]) {
+                       pr_err("kmalloc() failed\n");
                        goto clean_cdevs;
                }
 
index dc7c033ef587142ce080e33a2b2711a94ece1d89..32a77becc098534c2b0b77a585d48f4974538c65 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/clk.h>
 #include <linux/irq.h>
 #include <linux/err.h>
+#include <linux/delay.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/sh_timer.h>
@@ -150,13 +151,13 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start)
 
 static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate)
 {
-       int ret;
+       int k, ret;
 
        /* enable clock */
        ret = clk_enable(p->clk);
        if (ret) {
                dev_err(&p->pdev->dev, "cannot enable clock\n");
-               return ret;
+               goto err0;
        }
 
        /* make sure channel is disabled */
@@ -174,9 +175,38 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate)
        sh_cmt_write(p, CMCOR, 0xffffffff);
        sh_cmt_write(p, CMCNT, 0);
 
+       /*
+        * According to the sh73a0 user's manual, as CMCNT can be operated
+        * only by the RCLK (Pseudo 32 KHz), there's one restriction on
+        * modifying CMCNT register; two RCLK cycles are necessary before
+        * this register is either read or any modification of the value
+        * it holds is reflected in the LSI's actual operation.
+        *
+        * While at it, we're supposed to clear out the CMCNT as of this
+        * moment, so make sure it's processed properly here.  This will
+        * take RCLKx2 at maximum.
+        */
+       for (k = 0; k < 100; k++) {
+               if (!sh_cmt_read(p, CMCNT))
+                       break;
+               udelay(1);
+       }
+
+       if (sh_cmt_read(p, CMCNT)) {
+               dev_err(&p->pdev->dev, "cannot clear CMCNT\n");
+               ret = -ETIMEDOUT;
+               goto err1;
+       }
+
        /* enable channel */
        sh_cmt_start_stop_ch(p, 1);
        return 0;
+ err1:
+       /* stop clock */
+       clk_disable(p->clk);
+
+ err0:
+       return ret;
 }
 
 static void sh_cmt_disable(struct sh_cmt_priv *p)
index 41841a3e3f99c9acd4c4fc72c972626d8dffdcee..17cef864506a7b6778b109dd0d0b570514eab6eb 100644 (file)
@@ -1198,6 +1198,10 @@ static int sbp2_remove(struct device *dev)
 {
        struct fw_unit *unit = fw_unit(dev);
        struct sbp2_target *tgt = dev_get_drvdata(&unit->device);
+       struct sbp2_logical_unit *lu;
+
+       list_for_each_entry(lu, &tgt->lu_list, link)
+               cancel_delayed_work_sync(&lu->work);
 
        sbp2_target_put(tgt);
        return 0;
index 68810fd1a59d057da2625e57efd55313523eeadd..aa83de9db1b91ce380ed86ca9e6f4a421491095a 100644 (file)
@@ -420,7 +420,7 @@ static efi_status_t gsmi_get_next_variable(unsigned long *name_size,
 
 static efi_status_t gsmi_set_variable(efi_char16_t *name,
                                      efi_guid_t *vendor,
-                                     unsigned long attr,
+                                     u32 attr,
                                      unsigned long data_size,
                                      void *data)
 {
index ee1d701317f7a14fa4857c0fdf5ad993244a989a..56a8554d9039615b4b7772d474706ce2815a0a78 100644 (file)
@@ -878,7 +878,7 @@ static void assert_panel_unlocked(struct drm_i915_private *dev_priv,
        int pp_reg, lvds_reg;
        u32 val;
        enum pipe panel_pipe = PIPE_A;
-       bool locked = locked;
+       bool locked = true;
 
        if (HAS_PCH_SPLIT(dev_priv->dev)) {
                pp_reg = PCH_PP_CONTROL;
@@ -7238,8 +7238,6 @@ static void intel_setup_outputs(struct drm_device *dev)
                        intel_encoder_clones(dev, encoder->clone_mask);
        }
 
-       intel_panel_setup_backlight(dev);
-
        /* disable all the possible outputs/crtcs before entering KMS mode */
        drm_helper_disable_unused_functions(dev);
 }
index 7f65940f918f9a093002eb68e5b2c2f22722a486..4f0c1ecac72e07ed1138cf8fe59d67e4472050f7 100644 (file)
@@ -466,6 +466,16 @@ static bool radeon_connector_needs_extended_probe(struct radeon_device *dev,
                    (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
                        return true;
        }
+       /* TOSHIBA Satellite L300D with ATI Mobility Radeon x1100
+        * (RS690M) sends data to i2c bus for a HDMI connector that
+        * is not implemented */
+       if ((dev->pdev->device == 0x791f) &&
+           (dev->pdev->subsystem_vendor == 0x1179) &&
+           (dev->pdev->subsystem_device == 0xff68)) {
+               if ((connector_type == DRM_MODE_CONNECTOR_HDMIA) &&
+                   (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
+                       return true;
+       }
 
        /* Default: no EDID header probe required for DDC probing */
        return false;
index a3b011b494650fcd2786f0db4be6da79952c3491..b51e15725c6e45321b088664749a48101e79be28 100644 (file)
@@ -301,6 +301,8 @@ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64
                mc->mc_vram_size = mc->aper_size;
        }
        mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+       if (radeon_vram_limit && radeon_vram_limit < mc->real_vram_size)
+               mc->real_vram_size = radeon_vram_limit;
        dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
                        mc->mc_vram_size >> 20, mc->vram_start,
                        mc->vram_end, mc->real_vram_size >> 20);
index dee4a0c1b4b2f9e473cc59f1766ffb0e2121ba5f..602fa3541c454f8ac2c058cb5832520fad95612f 100644 (file)
@@ -40,10 +40,14 @@ void radeon_test_moves(struct radeon_device *rdev)
        size = 1024 * 1024;
 
        /* Number of tests =
-        * (Total GTT - IB pool - writeback page - ring buffer) / test size
+        * (Total GTT - IB pool - writeback page - ring buffers) / test size
         */
-       n = ((u32)(rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - RADEON_GPU_PAGE_SIZE -
-            rdev->cp.ring_size)) / size;
+       n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - rdev->cp.ring_size;
+       if (rdev->wb.wb_obj)
+               n -= RADEON_GPU_PAGE_SIZE;
+       if (rdev->ih.ring_obj)
+               n -= rdev->ih.ring_size;
+       n /= size;
 
        gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
        if (!gtt_obj) {
@@ -132,9 +136,15 @@ void radeon_test_moves(struct radeon_device *rdev)
                     gtt_start++, vram_start++) {
                        if (*vram_start != gtt_start) {
                                DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
-                                         "expected 0x%p (GTT map 0x%p-0x%p)\n",
-                                         i, *vram_start, gtt_start, gtt_map,
-                                         gtt_end);
+                                         "expected 0x%p (GTT/VRAM offset "
+                                         "0x%16llx/0x%16llx)\n",
+                                         i, *vram_start, gtt_start,
+                                         (unsigned long long)
+                                         (gtt_addr - rdev->mc.gtt_start +
+                                          (void*)gtt_start - gtt_map),
+                                         (unsigned long long)
+                                         (vram_addr - rdev->mc.vram_start +
+                                          (void*)gtt_start - gtt_map));
                                radeon_bo_kunmap(vram_obj);
                                goto out_cleanup;
                        }
@@ -175,9 +185,15 @@ void radeon_test_moves(struct radeon_device *rdev)
                     gtt_start++, vram_start++) {
                        if (*gtt_start != vram_start) {
                                DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
-                                         "expected 0x%p (VRAM map 0x%p-0x%p)\n",
-                                         i, *gtt_start, vram_start, vram_map,
-                                         vram_end);
+                                         "expected 0x%p (VRAM/GTT offset "
+                                         "0x%16llx/0x%16llx)\n",
+                                         i, *gtt_start, vram_start,
+                                         (unsigned long long)
+                                         (vram_addr - rdev->mc.vram_start +
+                                          (void*)vram_start - vram_map),
+                                         (unsigned long long)
+                                         (gtt_addr - rdev->mc.gtt_start +
+                                          (void*)vram_start - vram_map));
                                radeon_bo_kunmap(gtt_obj[i]);
                                goto out_cleanup;
                        }
index 60125ddba1e93551c41376a3e10e27a72519271e..9b86fb0e4122037056a3d5075ae5a5d1c8663874 100644 (file)
@@ -450,6 +450,29 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
                        return -EINVAL;
                mem->bus.base = rdev->mc.aper_base;
                mem->bus.is_iomem = true;
+#ifdef __alpha__
+               /*
+                * Alpha: use bus.addr to hold the ioremap() return,
+                * so we can modify bus.base below.
+                */
+               if (mem->placement & TTM_PL_FLAG_WC)
+                       mem->bus.addr =
+                               ioremap_wc(mem->bus.base + mem->bus.offset,
+                                          mem->bus.size);
+               else
+                       mem->bus.addr =
+                               ioremap_nocache(mem->bus.base + mem->bus.offset,
+                                               mem->bus.size);
+
+               /*
+                * Alpha: Use just the bus offset plus
+                * the hose/domain memory base for bus.base.
+                * It then can be used to build PTEs for VRAM
+                * access, as done in ttm_bo_vm_fault().
+                */
+               mem->bus.base = (mem->bus.base & 0x0ffffffffUL) +
+                       rdev->ddev->hose->dense_mem_base;
+#endif
                break;
        default:
                return -EINVAL;
index 56619f64b6bfa004c9b1900ac3dd36328593a6cf..a4d38d85909a0254e23fa3a1f0b607814329bf60 100644 (file)
@@ -353,8 +353,10 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 
                ret = ttm_tt_set_user(bo->ttm, current,
                                      bo->buffer_start, bo->num_pages);
-               if (unlikely(ret != 0))
+               if (unlikely(ret != 0)) {
                        ttm_tt_destroy(bo->ttm);
+                       bo->ttm = NULL;
+               }
                break;
        default:
                printk(KERN_ERR TTM_PFX "Illegal buffer object type\n");
@@ -390,10 +392,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
         * Create and bind a ttm if required.
         */
 
-       if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm == NULL)) {
-               ret = ttm_bo_add_ttm(bo, false);
-               if (ret)
-                       goto out_err;
+       if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
+               if (bo->ttm == NULL) {
+                       ret = ttm_bo_add_ttm(bo, false);
+                       if (ret)
+                               goto out_err;
+               }
 
                ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement);
                if (ret)
index 77dbf408c0d01d86e2fe27a5d3cbc836d17ab46c..ae3c6f5dd2b71acee36f863deafc3896a874010e 100644 (file)
@@ -635,13 +635,13 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
                if (ret)
                        return ret;
 
-               ttm_bo_free_old_node(bo);
                if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
                    (bo->ttm != NULL)) {
                        ttm_tt_unbind(bo->ttm);
                        ttm_tt_destroy(bo->ttm);
                        bo->ttm = NULL;
                }
+               ttm_bo_free_old_node(bo);
        } else {
                /**
                 * This should help pipeline ordinary buffer moves.
index 306b15f39c9c12a8e717699c0d1c25fe396aa714..1130a898712516d2bfc9149bf9c859e401b6ac57 100644 (file)
@@ -589,6 +589,7 @@ config HID_WACOM_POWER_SUPPLY
 config HID_WIIMOTE
        tristate "Nintendo Wii Remote support"
        depends on BT_HIDP
+       depends on LEDS_CLASS
        ---help---
        Support for the Nintendo Wii Remote bluetooth device.
 
index b85744fe846477221ad02221f78bb397c603f54d..18b3bc646bf322ee4a7d96f709bc667dd6a85c06 100644 (file)
@@ -444,6 +444,12 @@ static const struct hid_device_id apple_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
+               .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS),
+               .driver_data = APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
index 1a5cf0c9cfcade20b35386e183f36a69155f5a7a..242353df3dc450bbde4186f04740d66b422a970f 100644 (file)
@@ -1340,6 +1340,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) },
index db63ccf21cc867b3d6392c994d241ba7f0585a85..7d27d2b0445ac07f546e818dced2ce7125e189be 100644 (file)
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI   0x0245
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO    0x0246
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS    0x0247
+#define USB_DEVICE_ID_APPLE_ALU_REVB_ANSI      0x024f
+#define USB_DEVICE_ID_APPLE_ALU_REVB_ISO       0x0250
+#define USB_DEVICE_ID_APPLE_ALU_REVB_JIS       0x0251
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI  0x0239
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO   0x023a
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS   0x023b
 #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE        0x0001
 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE       0x0600
 
+#define USB_VENDOR_ID_SIGMA_MICRO      0x1c4f
+#define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD     0x0002
+
 #define USB_VENDOR_ID_SKYCABLE                 0x1223
 #define        USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER       0x3F07
 
index a594383ce03db0458ea13f34638ed7f0eafb8b19..85a02e5f9fe873e5097a55861ec55c77f50ee05f 100644 (file)
  * any later version.
  */
 
-#include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/hid.h>
 #include <linux/input.h>
+#include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include "hid-ids.h"
@@ -33,9 +33,9 @@ struct wiimote_state {
 };
 
 struct wiimote_data {
-       atomic_t ready;
        struct hid_device *hdev;
        struct input_dev *input;
+       struct led_classdev *leds[4];
 
        spinlock_t qlock;
        __u8 head;
@@ -53,8 +53,15 @@ struct wiimote_data {
 #define WIIPROTO_FLAGS_LEDS (WIIPROTO_FLAG_LED1 | WIIPROTO_FLAG_LED2 | \
                                        WIIPROTO_FLAG_LED3 | WIIPROTO_FLAG_LED4)
 
+/* return flag for led \num */
+#define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1))
+
 enum wiiproto_reqs {
+       WIIPROTO_REQ_NULL = 0x0,
        WIIPROTO_REQ_LED = 0x11,
+       WIIPROTO_REQ_DRM = 0x12,
+       WIIPROTO_REQ_STATUS = 0x20,
+       WIIPROTO_REQ_RETURN = 0x22,
        WIIPROTO_REQ_DRM_K = 0x30,
 };
 
@@ -87,9 +94,6 @@ static __u16 wiiproto_keymap[] = {
        BTN_MODE,       /* WIIPROTO_KEY_HOME */
 };
 
-#define dev_to_wii(pdev) hid_get_drvdata(container_of(pdev, struct hid_device, \
-                                                                       dev))
-
 static ssize_t wiimote_hid_send(struct hid_device *hdev, __u8 *buffer,
                                                                size_t count)
 {
@@ -192,66 +196,96 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds)
        wiimote_queue(wdata, cmd, sizeof(cmd));
 }
 
-#define wiifs_led_show_set(num)                                                \
-static ssize_t wiifs_led_show_##num(struct device *dev,                        \
-                       struct device_attribute *attr, char *buf)       \
-{                                                                      \
-       struct wiimote_data *wdata = dev_to_wii(dev);                   \
-       unsigned long flags;                                            \
-       int state;                                                      \
-                                                                       \
-       if (!atomic_read(&wdata->ready))                                \
-               return -EBUSY;                                          \
-                                                                       \
-       spin_lock_irqsave(&wdata->state.lock, flags);                   \
-       state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num);        \
-       spin_unlock_irqrestore(&wdata->state.lock, flags);              \
-                                                                       \
-       return sprintf(buf, "%d\n", state);                             \
-}                                                                      \
-static ssize_t wiifs_led_set_##num(struct device *dev,                 \
-       struct device_attribute *attr, const char *buf, size_t count)   \
-{                                                                      \
-       struct wiimote_data *wdata = dev_to_wii(dev);                   \
-       int tmp = simple_strtoul(buf, NULL, 10);                        \
-       unsigned long flags;                                            \
-       __u8 state;                                                     \
-                                                                       \
-       if (!atomic_read(&wdata->ready))                                \
-               return -EBUSY;                                          \
-                                                                       \
-       spin_lock_irqsave(&wdata->state.lock, flags);                   \
-                                                                       \
-       state = wdata->state.flags;                                     \
-                                                                       \
-       if (tmp)                                                        \
-               wiiproto_req_leds(wdata, state | WIIPROTO_FLAG_LED##num);\
-       else                                                            \
-               wiiproto_req_leds(wdata, state & ~WIIPROTO_FLAG_LED##num);\
-                                                                       \
-       spin_unlock_irqrestore(&wdata->state.lock, flags);              \
-                                                                       \
-       return count;                                                   \
-}                                                                      \
-static DEVICE_ATTR(led##num, S_IRUGO | S_IWUSR, wiifs_led_show_##num,  \
-                                               wiifs_led_set_##num)
-
-wiifs_led_show_set(1);
-wiifs_led_show_set(2);
-wiifs_led_show_set(3);
-wiifs_led_show_set(4);
+/*
+ * Check what peripherals of the wiimote are currently
+ * active and select a proper DRM that supports all of
+ * the requested data inputs.
+ */
+static __u8 select_drm(struct wiimote_data *wdata)
+{
+       return WIIPROTO_REQ_DRM_K;
+}
+
+static void wiiproto_req_drm(struct wiimote_data *wdata, __u8 drm)
+{
+       __u8 cmd[3];
+
+       if (drm == WIIPROTO_REQ_NULL)
+               drm = select_drm(wdata);
+
+       cmd[0] = WIIPROTO_REQ_DRM;
+       cmd[1] = 0;
+       cmd[2] = drm;
+
+       wiimote_queue(wdata, cmd, sizeof(cmd));
+}
+
+static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev)
+{
+       struct wiimote_data *wdata;
+       struct device *dev = led_dev->dev->parent;
+       int i;
+       unsigned long flags;
+       bool value = false;
+
+       wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev));
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i] == led_dev) {
+                       spin_lock_irqsave(&wdata->state.lock, flags);
+                       value = wdata->state.flags & WIIPROTO_FLAG_LED(i + 1);
+                       spin_unlock_irqrestore(&wdata->state.lock, flags);
+                       break;
+               }
+       }
+
+       return value ? LED_FULL : LED_OFF;
+}
+
+static void wiimote_leds_set(struct led_classdev *led_dev,
+                                               enum led_brightness value)
+{
+       struct wiimote_data *wdata;
+       struct device *dev = led_dev->dev->parent;
+       int i;
+       unsigned long flags;
+       __u8 state, flag;
+
+       wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev));
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i] == led_dev) {
+                       flag = WIIPROTO_FLAG_LED(i + 1);
+                       spin_lock_irqsave(&wdata->state.lock, flags);
+                       state = wdata->state.flags;
+                       if (value == LED_OFF)
+                               wiiproto_req_leds(wdata, state & ~flag);
+                       else
+                               wiiproto_req_leds(wdata, state | flag);
+                       spin_unlock_irqrestore(&wdata->state.lock, flags);
+                       break;
+               }
+       }
+}
 
 static int wiimote_input_event(struct input_dev *dev, unsigned int type,
                                                unsigned int code, int value)
+{
+       return 0;
+}
+
+static int wiimote_input_open(struct input_dev *dev)
 {
        struct wiimote_data *wdata = input_get_drvdata(dev);
 
-       if (!atomic_read(&wdata->ready))
-               return -EBUSY;
-       /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */
-       smp_rmb();
+       return hid_hw_open(wdata->hdev);
+}
 
-       return 0;
+static void wiimote_input_close(struct input_dev *dev)
+{
+       struct wiimote_data *wdata = input_get_drvdata(dev);
+
+       hid_hw_close(wdata->hdev);
 }
 
 static void handler_keys(struct wiimote_data *wdata, const __u8 *payload)
@@ -281,6 +315,26 @@ static void handler_keys(struct wiimote_data *wdata, const __u8 *payload)
        input_sync(wdata->input);
 }
 
+static void handler_status(struct wiimote_data *wdata, const __u8 *payload)
+{
+       handler_keys(wdata, payload);
+
+       /* on status reports the drm is reset so we need to resend the drm */
+       wiiproto_req_drm(wdata, WIIPROTO_REQ_NULL);
+}
+
+static void handler_return(struct wiimote_data *wdata, const __u8 *payload)
+{
+       __u8 err = payload[3];
+       __u8 cmd = payload[2];
+
+       handler_keys(wdata, payload);
+
+       if (err)
+               hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err,
+                                                                       cmd);
+}
+
 struct wiiproto_handler {
        __u8 id;
        size_t size;
@@ -288,6 +342,8 @@ struct wiiproto_handler {
 };
 
 static struct wiiproto_handler handlers[] = {
+       { .id = WIIPROTO_REQ_STATUS, .size = 6, .func = handler_status },
+       { .id = WIIPROTO_REQ_RETURN, .size = 4, .func = handler_return },
        { .id = WIIPROTO_REQ_DRM_K, .size = 2, .func = handler_keys },
        { .id = 0 }
 };
@@ -300,11 +356,6 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report,
        int i;
        unsigned long flags;
 
-       if (!atomic_read(&wdata->ready))
-               return -EBUSY;
-       /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */
-       smp_rmb();
-
        if (size < 1)
                return -EINVAL;
 
@@ -321,6 +372,58 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report,
        return 0;
 }
 
+static void wiimote_leds_destroy(struct wiimote_data *wdata)
+{
+       int i;
+       struct led_classdev *led;
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i]) {
+                       led = wdata->leds[i];
+                       wdata->leds[i] = NULL;
+                       led_classdev_unregister(led);
+                       kfree(led);
+               }
+       }
+}
+
+static int wiimote_leds_create(struct wiimote_data *wdata)
+{
+       int i, ret;
+       struct device *dev = &wdata->hdev->dev;
+       size_t namesz = strlen(dev_name(dev)) + 9;
+       struct led_classdev *led;
+       char *name;
+
+       for (i = 0; i < 4; ++i) {
+               led = kzalloc(sizeof(struct led_classdev) + namesz, GFP_KERNEL);
+               if (!led) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               name = (void*)&led[1];
+               snprintf(name, namesz, "%s:blue:p%d", dev_name(dev), i);
+               led->name = name;
+               led->brightness = 0;
+               led->max_brightness = 1;
+               led->brightness_get = wiimote_leds_get;
+               led->brightness_set = wiimote_leds_set;
+
+               ret = led_classdev_register(dev, led);
+               if (ret) {
+                       kfree(led);
+                       goto err;
+               }
+               wdata->leds[i] = led;
+       }
+
+       return 0;
+
+err:
+       wiimote_leds_destroy(wdata);
+       return ret;
+}
+
 static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 {
        struct wiimote_data *wdata;
@@ -341,6 +444,8 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 
        input_set_drvdata(wdata->input, wdata);
        wdata->input->event = wiimote_input_event;
+       wdata->input->open = wiimote_input_open;
+       wdata->input->close = wiimote_input_close;
        wdata->input->dev.parent = &wdata->hdev->dev;
        wdata->input->id.bustype = wdata->hdev->bus;
        wdata->input->id.vendor = wdata->hdev->vendor;
@@ -362,6 +467,12 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 
 static void wiimote_destroy(struct wiimote_data *wdata)
 {
+       wiimote_leds_destroy(wdata);
+
+       input_unregister_device(wdata->input);
+       cancel_work_sync(&wdata->worker);
+       hid_hw_stop(wdata->hdev);
+
        kfree(wdata);
 }
 
@@ -377,19 +488,6 @@ static int wiimote_hid_probe(struct hid_device *hdev,
                return -ENOMEM;
        }
 
-       ret = device_create_file(&hdev->dev, &dev_attr_led1);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led2);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led3);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led4);
-       if (ret)
-               goto err;
-
        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "HID parse failed\n");
@@ -408,9 +506,10 @@ static int wiimote_hid_probe(struct hid_device *hdev,
                goto err_stop;
        }
 
-       /* smp_wmb: Write wdata->xy first before wdata->ready is set to 1 */
-       smp_wmb();
-       atomic_set(&wdata->ready, 1);
+       ret = wiimote_leds_create(wdata);
+       if (ret)
+               goto err_free;
+
        hid_info(hdev, "New device registered\n");
 
        /* by default set led1 after device initialization */
@@ -420,15 +519,15 @@ static int wiimote_hid_probe(struct hid_device *hdev,
 
        return 0;
 
+err_free:
+       wiimote_destroy(wdata);
+       return ret;
+
 err_stop:
        hid_hw_stop(hdev);
 err:
        input_free_device(wdata->input);
-       device_remove_file(&hdev->dev, &dev_attr_led1);
-       device_remove_file(&hdev->dev, &dev_attr_led2);
-       device_remove_file(&hdev->dev, &dev_attr_led3);
-       device_remove_file(&hdev->dev, &dev_attr_led4);
-       wiimote_destroy(wdata);
+       kfree(wdata);
        return ret;
 }
 
@@ -437,16 +536,6 @@ static void wiimote_hid_remove(struct hid_device *hdev)
        struct wiimote_data *wdata = hid_get_drvdata(hdev);
 
        hid_info(hdev, "Device removed\n");
-
-       device_remove_file(&hdev->dev, &dev_attr_led1);
-       device_remove_file(&hdev->dev, &dev_attr_led2);
-       device_remove_file(&hdev->dev, &dev_attr_led3);
-       device_remove_file(&hdev->dev, &dev_attr_led4);
-
-       hid_hw_stop(hdev);
-       input_unregister_device(wdata->input);
-
-       cancel_work_sync(&wdata->worker);
        wiimote_destroy(wdata);
 }
 
index 621959d5cc42c6b6798328fe32ce32072fa8669a..4bdb5d46c52c2a21d4ead0e592b90391d6cf5ec8 100644 (file)
@@ -89,6 +89,7 @@ static const struct hid_blacklist {
 
        { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH, HID_QUIRK_MULTI_INPUT },
        { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT },
+       { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS },
        { 0, 0 }
 };
 
index c4c40be0edbfd6afb00a7e6dbf6a0b20a5544b3f..d22f241b6a67ea8b1f2030a54a913360db615a62 100644 (file)
@@ -114,7 +114,6 @@ struct i5k_amb_data {
        void __iomem *amb_mmio;
        struct i5k_device_attribute *attrs;
        unsigned int num_attrs;
-       unsigned long chipset_id;
 };
 
 static ssize_t show_name(struct device *dev, struct device_attribute *devattr,
@@ -444,8 +443,6 @@ static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data,
                goto out;
        }
 
-       data->chipset_id = devid;
-
        res = 0;
 out:
        pci_dev_put(pcidev);
@@ -478,23 +475,13 @@ out:
        return res;
 }
 
-static unsigned long i5k_channel_pci_id(struct i5k_amb_data *data,
-                                       unsigned long channel)
-{
-       switch (data->chipset_id) {
-       case PCI_DEVICE_ID_INTEL_5000_ERR:
-               return PCI_DEVICE_ID_INTEL_5000_FBD0 + channel;
-       case PCI_DEVICE_ID_INTEL_5400_ERR:
-               return PCI_DEVICE_ID_INTEL_5400_FBD0 + channel;
-       default:
-               BUG();
-       }
-}
-
-static unsigned long chipset_ids[] = {
-       PCI_DEVICE_ID_INTEL_5000_ERR,
-       PCI_DEVICE_ID_INTEL_5400_ERR,
-       0
+static struct {
+       unsigned long err;
+       unsigned long fbd0;
+} chipset_ids[] __devinitdata  = {
+       { PCI_DEVICE_ID_INTEL_5000_ERR, PCI_DEVICE_ID_INTEL_5000_FBD0 },
+       { PCI_DEVICE_ID_INTEL_5400_ERR, PCI_DEVICE_ID_INTEL_5400_FBD0 },
+       { 0, 0 }
 };
 
 #ifdef MODULE
@@ -510,8 +497,7 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
 {
        struct i5k_amb_data *data;
        struct resource *reso;
-       int i;
-       int res = -ENODEV;
+       int i, res;
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
@@ -520,22 +506,22 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
        /* Figure out where the AMB registers live */
        i = 0;
        do {
-               res = i5k_find_amb_registers(data, chipset_ids[i]);
+               res = i5k_find_amb_registers(data, chipset_ids[i].err);
+               if (res == 0)
+                       break;
                i++;
-       } while (res && chipset_ids[i]);
+       } while (chipset_ids[i].err);
 
        if (res)
                goto err;
 
        /* Copy the DIMM presence map for the first two channels */
-       res = i5k_channel_probe(&data->amb_present[0],
-                               i5k_channel_pci_id(data, 0));
+       res = i5k_channel_probe(&data->amb_present[0], chipset_ids[i].fbd0);
        if (res)
                goto err;
 
        /* Copy the DIMM presence map for the optional second two channels */
-       i5k_channel_probe(&data->amb_present[2],
-                         i5k_channel_pci_id(data, 1));
+       i5k_channel_probe(&data->amb_present[2], chipset_ids[i].fbd0 + 1);
 
        /* Set up resource regions */
        reso = request_mem_region(data->amb_base, data->amb_len, DRVNAME);
index d7926f4336b5f5b8d712e6e74c6118f8e2a24d13..eab11615dced6b54e71996bfd4df60c159bbe570 100644 (file)
@@ -211,8 +211,7 @@ static int lookup_comp(struct ntc_data *data,
        if (data->comp[mid].ohm <= ohm) {
                *i_low = mid;
                *i_high = mid - 1;
-       }
-       if (data->comp[mid].ohm > ohm) {
+       } else {
                *i_low = mid + 1;
                *i_high = mid;
        }
index 0c731ca69f1506d70b05da8c2d895c6493e4b1f3..b228e09c5d05aca9fbbfb6e639d1ba2fe0d9bcf4 100644 (file)
@@ -146,6 +146,7 @@ struct i2c_nmk_client {
  * @stop: stop condition
  * @xfer_complete: acknowledge completion for a I2C message
  * @result: controller propogated result
+ * @regulator: pointer to i2c regulator
  * @busy: Busy doing transfer
  */
 struct nmk_i2c_dev {
@@ -417,12 +418,12 @@ static int read_i2c(struct nmk_i2c_dev *dev)
        writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask,
                        dev->virtbase + I2C_IMSCR);
 
-       timeout = wait_for_completion_interruptible_timeout(
+       timeout = wait_for_completion_timeout(
                &dev->xfer_complete, dev->adap.timeout);
 
        if (timeout < 0) {
                dev_err(&dev->pdev->dev,
-                       "wait_for_completion_interruptible_timeout"
+                       "wait_for_completion_timeout"
                        "returned %d waiting for event\n", timeout);
                status = timeout;
        }
@@ -504,12 +505,12 @@ static int write_i2c(struct nmk_i2c_dev *dev)
        writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask,
                        dev->virtbase + I2C_IMSCR);
 
-       timeout = wait_for_completion_interruptible_timeout(
+       timeout = wait_for_completion_timeout(
                &dev->xfer_complete, dev->adap.timeout);
 
        if (timeout < 0) {
                dev_err(&dev->pdev->dev,
-                       "wait_for_completion_interruptible_timeout"
+                       "wait_for_completion_timeout "
                        "returned %d waiting for event\n", timeout);
                status = timeout;
        }
index 1a766cf74f6be3cbf05dc7cdd950c1292bce34e8..2dfb6317685613ae3bd4d55c33737658a1fc3bc1 100644 (file)
@@ -1139,41 +1139,12 @@ omap_i2c_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_SUSPEND
-static int omap_i2c_suspend(struct device *dev)
-{
-       if (!pm_runtime_suspended(dev))
-               if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
-                       dev->bus->pm->runtime_suspend(dev);
-
-       return 0;
-}
-
-static int omap_i2c_resume(struct device *dev)
-{
-       if (!pm_runtime_suspended(dev))
-               if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
-                       dev->bus->pm->runtime_resume(dev);
-
-       return 0;
-}
-
-static struct dev_pm_ops omap_i2c_pm_ops = {
-       .suspend = omap_i2c_suspend,
-       .resume = omap_i2c_resume,
-};
-#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops)
-#else
-#define OMAP_I2C_PM_OPS NULL
-#endif
-
 static struct platform_driver omap_i2c_driver = {
        .probe          = omap_i2c_probe,
        .remove         = omap_i2c_remove,
        .driver         = {
                .name   = "omap_i2c",
                .owner  = THIS_MODULE,
-               .pm     = OMAP_I2C_PM_OPS,
        },
 };
 
index 9882971827e6325bd7a424ef83b13b5679cec8d1..358cd7ee905b7ff4f9a7498e277341037437bf19 100644 (file)
@@ -139,7 +139,7 @@ struct analog_port {
 #include <linux/i8253.h>
 
 #define GET_TIME(x)    do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0)
-#define DELTA(x,y)     (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? CLOCK_TICK_RATE / HZ : 0)))
+#define DELTA(x,y)     (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
 #define TIME_NAME      (cpu_has_tsc?"TSC":"PIT")
 static unsigned int get_time_pit(void)
 {
index c8242dd190d0c920f64ff105903fd76c2e5a6be1..aa17e024d80329acffd48223b554d5f2528f513a 100644 (file)
@@ -20,6 +20,7 @@
  * flag.
  */
 
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
index f270447ba9519cb9497eea8a02d9869eef58ba4b..a5a77915c65003fb30eb058765945353f11d6298 100644 (file)
@@ -702,7 +702,7 @@ err_iounmap:
 err_free_mem_region:
        release_mem_region(res->start, resource_size(res));
 err_free_mem:
-       input_free_device(kbc->idev);
+       input_free_device(input_dev);
        kfree(kbc);
 
        return err;
index e21deb1baa8abfb193d46702023540e2d09a41ca..025417d74ca29c37ffa8393878aae2f257892427 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver (I2C bus)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -27,54 +27,49 @@ static int ad714x_i2c_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume);
 
-static int ad714x_i2c_write(struct device *dev, unsigned short reg,
-                               unsigned short data)
+static int ad714x_i2c_write(struct ad714x_chip *chip,
+                           unsigned short reg, unsigned short data)
 {
-       struct i2c_client *client = to_i2c_client(dev);
-       int ret = 0;
-       u8 *_reg = (u8 *)&reg;
-       u8 *_data = (u8 *)&data;
-
-       u8 tx[4] = {
-               _reg[1],
-               _reg[0],
-               _data[1],
-               _data[0]
-       };
-
-       ret = i2c_master_send(client, tx, 4);
-       if (ret < 0)
-               dev_err(&client->dev, "I2C write error\n");
-
-       return ret;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(reg);
+       chip->xfer_buf[1] = cpu_to_be16(data);
+
+       error = i2c_master_send(client, (u8 *)chip->xfer_buf,
+                               2 * sizeof(*chip->xfer_buf));
+       if (unlikely(error < 0)) {
+               dev_err(&client->dev, "I2C write error: %d\n", error);
+               return error;
+       }
+
+       return 0;
 }
 
-static int ad714x_i2c_read(struct device *dev, unsigned short reg,
-                               unsigned short *data)
+static int ad714x_i2c_read(struct ad714x_chip *chip,
+                          unsigned short reg, unsigned short *data, size_t len)
 {
-       struct i2c_client *client = to_i2c_client(dev);
-       int ret = 0;
-       u8 *_reg = (u8 *)&reg;
-       u8 *_data = (u8 *)data;
-
-       u8 tx[2] = {
-               _reg[1],
-               _reg[0]
-       };
-       u8 rx[2];
-
-       ret = i2c_master_send(client, tx, 2);
-       if (ret >= 0)
-               ret = i2c_master_recv(client, rx, 2);
-
-       if (unlikely(ret < 0)) {
-               dev_err(&client->dev, "I2C read error\n");
-       } else {
-               _data[0] = rx[1];
-               _data[1] = rx[0];
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int i;
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(reg);
+
+       error = i2c_master_send(client, (u8 *)chip->xfer_buf,
+                               sizeof(*chip->xfer_buf));
+       if (error >= 0)
+               error = i2c_master_recv(client, (u8 *)chip->xfer_buf,
+                                       len * sizeof(*chip->xfer_buf));
+
+       if (unlikely(error < 0)) {
+               dev_err(&client->dev, "I2C read error: %d\n", error);
+               return error;
        }
 
-       return ret;
+       for (i = 0; i < len; i++)
+               data[i] = be16_to_cpu(chip->xfer_buf[i]);
+
+       return 0;
 }
 
 static int __devinit ad714x_i2c_probe(struct i2c_client *client,
index 4120dd5493059126b272256b47eba121044e396d..875b50811361cef62ca8e1e1b009b120121bfe66 100644 (file)
@@ -1,12 +1,12 @@
 /*
  * AD714X CapTouch Programmable Controller driver (SPI bus)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
 
-#include <linux/input.h>       /* BUS_I2C */
+#include <linux/input.h>       /* BUS_SPI */
 #include <linux/module.h>
 #include <linux/spi/spi.h>
 #include <linux/pm.h>
@@ -30,30 +30,68 @@ static int ad714x_spi_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume);
 
-static int ad714x_spi_read(struct device *dev, unsigned short reg,
-               unsigned short *data)
+static int ad714x_spi_read(struct ad714x_chip *chip,
+                          unsigned short reg, unsigned short *data, size_t len)
 {
-       struct spi_device *spi = to_spi_device(dev);
-       unsigned short tx = AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg;
+       struct spi_device *spi = to_spi_device(chip->dev);
+       struct spi_message message;
+       struct spi_transfer xfer[2];
+       int i;
+       int error;
+
+       spi_message_init(&message);
+       memset(xfer, 0, sizeof(xfer));
+
+       chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX |
+                                       AD714x_SPI_READ | reg);
+       xfer[0].tx_buf = &chip->xfer_buf[0];
+       xfer[0].len = sizeof(chip->xfer_buf[0]);
+       spi_message_add_tail(&xfer[0], &message);
+
+       xfer[1].rx_buf = &chip->xfer_buf[1];
+       xfer[1].len = sizeof(chip->xfer_buf[1]) * len;
+       spi_message_add_tail(&xfer[1], &message);
+
+       error = spi_sync(spi, &message);
+       if (unlikely(error)) {
+               dev_err(chip->dev, "SPI read error: %d\n", error);
+               return error;
+       }
+
+       for (i = 0; i < len; i++)
+               data[i] = be16_to_cpu(chip->xfer_buf[i + 1]);
 
-       return spi_write_then_read(spi, (u8 *)&tx, 2, (u8 *)data, 2);
+       return 0;
 }
 
-static int ad714x_spi_write(struct device *dev, unsigned short reg,
-               unsigned short data)
+static int ad714x_spi_write(struct ad714x_chip *chip,
+                           unsigned short reg, unsigned short data)
 {
-       struct spi_device *spi = to_spi_device(dev);
-       unsigned short tx[2] = {
-               AD714x_SPI_CMD_PREFIX | reg,
-               data
-       };
+       struct spi_device *spi = to_spi_device(chip->dev);
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg);
+       chip->xfer_buf[1] = cpu_to_be16(data);
+
+       error = spi_write(spi, (u8 *)chip->xfer_buf,
+                         2 * sizeof(*chip->xfer_buf));
+       if (unlikely(error)) {
+               dev_err(chip->dev, "SPI write error: %d\n", error);
+               return error;
+       }
 
-       return spi_write(spi, (u8 *)tx, 4);
+       return 0;
 }
 
 static int __devinit ad714x_spi_probe(struct spi_device *spi)
 {
        struct ad714x_chip *chip;
+       int err;
+
+       spi->bits_per_word = 8;
+       err = spi_setup(spi);
+       if (err < 0)
+               return err;
 
        chip = ad714x_probe(&spi->dev, BUS_SPI, spi->irq,
                            ad714x_spi_read, ad714x_spi_write);
index c3a62c42cd28838a64fa4cb432d2ac4e430a0fc0..ca42c7d2a3c79ab247163eafd7e4d4112b6ee097 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver supporting AD7142/3/7/8/7A
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -59,7 +59,6 @@
 #define STAGE11_AMBIENT                0x27D
 
 #define PER_STAGE_REG_NUM      36
-#define STAGE_NUM              12
 #define STAGE_CFGREG_NUM       8
 #define SYS_CFGREG_NUM         8
 
@@ -124,27 +123,6 @@ struct ad714x_driver_data {
  * information to integrate all things which will be private data
  * of spi/i2c device
  */
-struct ad714x_chip {
-       unsigned short h_state;
-       unsigned short l_state;
-       unsigned short c_state;
-       unsigned short adc_reg[STAGE_NUM];
-       unsigned short amb_reg[STAGE_NUM];
-       unsigned short sensor_val[STAGE_NUM];
-
-       struct ad714x_platform_data *hw;
-       struct ad714x_driver_data *sw;
-
-       int irq;
-       struct device *dev;
-       ad714x_read_t read;
-       ad714x_write_t write;
-
-       struct mutex mutex;
-
-       unsigned product;
-       unsigned version;
-};
 
 static void ad714x_use_com_int(struct ad714x_chip *ad714x,
                                int start_stage, int end_stage)
@@ -154,13 +132,13 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x,
 
        mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
 
-       ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1);
        data |= 1 << end_stage;
-       ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
 
-       ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1);
        data &= ~mask;
-       ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
 }
 
 static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
@@ -171,13 +149,13 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
 
        mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
 
-       ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1);
        data &= ~(1 << end_stage);
-       ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
 
-       ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1);
        data |= mask;
-       ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
 }
 
 static int ad714x_cal_highest_stage(struct ad714x_chip *ad714x,
@@ -273,15 +251,16 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
        struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
        int i;
 
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage,
+                       &ad714x->adc_reg[hw->start_stage],
+                       hw->end_stage - hw->start_stage + 1);
+
        for (i = hw->start_stage; i <= hw->end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                       &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
-
-               ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
+
+               ad714x->sensor_val[i] =
+                       abs(ad714x->adc_reg[i] - ad714x->amb_reg[i]);
        }
 }
 
@@ -444,15 +423,16 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
        struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
        int i;
 
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage,
+                       &ad714x->adc_reg[hw->start_stage],
+                       hw->end_stage - hw->start_stage + 1);
+
        for (i = hw->start_stage; i <= hw->end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                       &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
                if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
-                       ad714x->sensor_val[i] = ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i];
+                       ad714x->sensor_val[i] =
+                               ad714x->adc_reg[i] - ad714x->amb_reg[i];
                else
                        ad714x->sensor_val[i] = 0;
        }
@@ -597,15 +577,16 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
        struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
        int i;
 
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->x_start_stage,
+                       &ad714x->adc_reg[hw->x_start_stage],
+                       hw->x_end_stage - hw->x_start_stage + 1);
+
        for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                               &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
                if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
-                       ad714x->sensor_val[i] = ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i];
+                       ad714x->sensor_val[i] =
+                               ad714x->adc_reg[i] - ad714x->amb_reg[i];
                else
                        ad714x->sensor_val[i] = 0;
        }
@@ -891,7 +872,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x)
 {
        unsigned short data;
 
-       ad714x->read(ad714x->dev, AD714X_PARTID_REG, &data);
+       ad714x->read(ad714x, AD714X_PARTID_REG, &data, 1);
        switch (data & 0xFFF0) {
        case AD7142_PARTID:
                ad714x->product = 0x7142;
@@ -940,23 +921,20 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x)
        for (i = 0; i < STAGE_NUM; i++) {
                reg_base = AD714X_STAGECFG_REG + i * STAGE_CFGREG_NUM;
                for (j = 0; j < STAGE_CFGREG_NUM; j++)
-                       ad714x->write(ad714x->dev, reg_base + j,
+                       ad714x->write(ad714x, reg_base + j,
                                        ad714x->hw->stage_cfg_reg[i][j]);
        }
 
        for (i = 0; i < SYS_CFGREG_NUM; i++)
-               ad714x->write(ad714x->dev, AD714X_SYSCFG_REG + i,
+               ad714x->write(ad714x, AD714X_SYSCFG_REG + i,
                        ad714x->hw->sys_cfg_reg[i]);
        for (i = 0; i < SYS_CFGREG_NUM; i++)
-               ad714x->read(ad714x->dev, AD714X_SYSCFG_REG + i,
-                       &data);
+               ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data, 1);
 
-       ad714x->write(ad714x->dev, AD714X_STG_CAL_EN_REG, 0xFFF);
+       ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF);
 
        /* clear all interrupts */
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 }
 
 static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
@@ -966,9 +944,7 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
 
        mutex_lock(&ad714x->mutex);
 
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &ad714x->l_state);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &ad714x->h_state);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &ad714x->c_state);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 
        for (i = 0; i < ad714x->hw->button_num; i++)
                ad714x_button_state_machine(ad714x, i);
@@ -1245,7 +1221,7 @@ int ad714x_disable(struct ad714x_chip *ad714x)
        mutex_lock(&ad714x->mutex);
 
        data = ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL] | 0x3;
-       ad714x->write(ad714x->dev, AD714X_PWR_CTRL, data);
+       ad714x->write(ad714x, AD714X_PWR_CTRL, data);
 
        mutex_unlock(&ad714x->mutex);
 
@@ -1255,24 +1231,20 @@ EXPORT_SYMBOL(ad714x_disable);
 
 int ad714x_enable(struct ad714x_chip *ad714x)
 {
-       unsigned short data;
-
        dev_dbg(ad714x->dev, "%s enter\n", __func__);
 
        mutex_lock(&ad714x->mutex);
 
        /* resume to non-shutdown mode */
 
-       ad714x->write(ad714x->dev, AD714X_PWR_CTRL,
+       ad714x->write(ad714x, AD714X_PWR_CTRL,
                        ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL]);
 
        /* make sure the interrupt output line is not low level after resume,
         * otherwise we will get no chance to enter falling-edge irq again
         */
 
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 
        mutex_unlock(&ad714x->mutex);
 
index 45c54fb13f0755007e689d9b549b615d71b4a104..3c85455aa66d23329d3605fb367d400d320e4976 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver (bus interfaces)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
 
 #include <linux/types.h>
 
+#define STAGE_NUM              12
+
 struct device;
+struct ad714x_platform_data;
+struct ad714x_driver_data;
 struct ad714x_chip;
 
-typedef int (*ad714x_read_t)(struct device *, unsigned short, unsigned short *);
-typedef int (*ad714x_write_t)(struct device *, unsigned short, unsigned short);
+typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *, size_t);
+typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short);
+
+struct ad714x_chip {
+       unsigned short l_state;
+       unsigned short h_state;
+       unsigned short c_state;
+       unsigned short adc_reg[STAGE_NUM];
+       unsigned short amb_reg[STAGE_NUM];
+       unsigned short sensor_val[STAGE_NUM];
+
+       struct ad714x_platform_data *hw;
+       struct ad714x_driver_data *sw;
+
+       int irq;
+       struct device *dev;
+       ad714x_read_t read;
+       ad714x_write_t write;
+
+       struct mutex mutex;
+
+       unsigned product;
+       unsigned version;
+
+       __be16 xfer_buf[16] ____cacheline_aligned;
+
+};
 
 int ad714x_disable(struct ad714x_chip *ad714x);
 int ad714x_enable(struct ad714x_chip *ad714x);
index 6c76cf79299143ce16542af8e734e49e4a21bf12..0794778295fc60619311eb619c5eee2a7a97ef07 100644 (file)
@@ -234,7 +234,7 @@ static const struct of_device_id mma8450_dt_ids[] = {
        { .compatible = "fsl,mma8450", },
        { /* sentinel */ }
 };
-MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids);
+MODULE_DEVICE_TABLE(of, mma8450_dt_ids);
 
 static struct i2c_driver mma8450_driver = {
        .driver = {
index b95fac15b2ea46ec40af163359a71bfb033ecef2..f71dc728da58adcad257da28f053b98fc29fa3fa 100644 (file)
@@ -282,7 +282,7 @@ err_free_irq:
 err_pm_set_suspended:
        pm_runtime_set_suspended(&client->dev);
 err_free_mem:
-       input_unregister_device(idev);
+       input_free_device(idev);
        kfree(sensor);
        return error;
 }
index 3126983c004a96860d4b042295eec0cce7b09ef1..da280189ef07c573e0a2aa05971eb3f8bbba7f64 100644 (file)
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI   0x0245
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO    0x0246
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS    0x0247
+/* MacbookAir4,2 (unibody, July 2011) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI   0x024c
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO    0x024d
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS    0x024e
+/* Macbook8,2 (unibody) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI  0x0252
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO   0x0253
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS   0x0254
 
 #define BCM5974_DEVICE(prod) {                                 \
        .match_flags = (USB_DEVICE_ID_MATCH_DEVICE |            \
@@ -104,6 +112,14 @@ static const struct usb_device_id bcm5974_table[] = {
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
+       /* MacbookAir4,2 */
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
+       /* MacbookPro8,2 */
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
        /* Terminating entry */
        {}
 };
@@ -294,6 +310,30 @@ static const struct bcm5974_config bcm5974_config_table[] = {
                { DIM_X, DIM_X / SN_COORD, -4415, 5050 },
                { DIM_Y, DIM_Y / SN_COORD, -55, 6680 }
        },
+       {
+               USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI,
+               USB_DEVICE_ID_APPLE_WELLSPRING6_ISO,
+               USB_DEVICE_ID_APPLE_WELLSPRING6_JIS,
+               HAS_INTEGRATED_BUTTON,
+               0x84, sizeof(struct bt_data),
+               0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+               { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+               { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+               { DIM_X, DIM_X / SN_COORD, -4620, 5140 },
+               { DIM_Y, DIM_Y / SN_COORD, -150, 6600 }
+       },
+       {
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI,
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO,
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS,
+               HAS_INTEGRATED_BUTTON,
+               0x84, sizeof(struct bt_data),
+               0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+               { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+               { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+               { DIM_X, DIM_X / SN_COORD, -4750, 5280 },
+               { DIM_Y, DIM_Y / SN_COORD, -150, 6730 }
+       },
        {}
 };
 
index 449c0a46dbac51ba881ba49b8cfff1c3861dfee7..d27c9d91630b8d4fbe676e83353d5ee71d71c771 100644 (file)
@@ -49,6 +49,7 @@ struct hid_descriptor {
 #define USB_REQ_GET_REPORT     0x01
 #define USB_REQ_SET_REPORT     0x09
 #define WAC_HID_FEATURE_REPORT 0x03
+#define WAC_MSG_RETRIES                5
 
 static int usb_get_report(struct usb_interface *intf, unsigned char type,
                                unsigned char id, void *buf, int size)
@@ -165,7 +166,7 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi
                        report,
                        hid_desc->wDescriptorLength,
                        5000); /* 5 secs */
-       } while (result < 0 && limit++ < 5);
+       } while (result < 0 && limit++ < WAC_MSG_RETRIES);
 
        /* No need to parse the Descriptor. It isn't an error though */
        if (result < 0)
@@ -319,24 +320,26 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
        int limit = 0, report_id = 2;
        int error = -ENOMEM;
 
-       rep_data = kmalloc(2, GFP_KERNEL);
+       rep_data = kmalloc(4, GFP_KERNEL);
        if (!rep_data)
                return error;
 
-       /* ask to report tablet data if it is 2FGT Tablet PC or
+       /* ask to report tablet data if it is MT Tablet PC or
         * not a Tablet PC */
        if (features->type == TABLETPC2FG) {
                do {
                        rep_data[0] = 3;
                        rep_data[1] = 4;
+                       rep_data[2] = 0;
+                       rep_data[3] = 0;
                        report_id = 3;
                        error = usb_set_report(intf, WAC_HID_FEATURE_REPORT,
-                               report_id, rep_data, 2);
+                               report_id, rep_data, 4);
                        if (error >= 0)
                                error = usb_get_report(intf,
                                        WAC_HID_FEATURE_REPORT, report_id,
-                                       rep_data, 3);
-               } while ((error < 0 || rep_data[1] != 4) && limit++ < 5);
+                                       rep_data, 4);
+               } while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES);
        } else if (features->type != TABLETPC) {
                do {
                        rep_data[0] = 2;
@@ -347,7 +350,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
                                error = usb_get_report(intf,
                                        WAC_HID_FEATURE_REPORT, report_id,
                                        rep_data, 2);
-               } while ((error < 0 || rep_data[1] != 2) && limit++ < 5);
+               } while ((error < 0 || rep_data[1] != 2) && limit++ < WAC_MSG_RETRIES);
        }
 
        kfree(rep_data);
index 03ebcc8b24b59bc1ec5efa8f2d5d5465c53e3e56..c1c2f7b28d89ba1c2eae80a354a2bef156c5dbdf 100644 (file)
@@ -1460,6 +1460,9 @@ static const struct wacom_features wacom_features_0xD3 =
 static const struct wacom_features wacom_features_0xD4 =
        { "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200, 1023,
          63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0xD5 =
+       { "Wacom Bamboo Pen 6x8",     WACOM_PKGLEN_BBFUN, 21648, 13530, 1023,
+         63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0xD6 =
        { "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN,   14720,  9200, 1023,
          63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1564,6 +1567,7 @@ const struct usb_device_id wacom_ids[] = {
        { USB_DEVICE_WACOM(0xD2) },
        { USB_DEVICE_WACOM(0xD3) },
        { USB_DEVICE_WACOM(0xD4) },
+       { USB_DEVICE_WACOM(0xD5) },
        { USB_DEVICE_WACOM(0xD6) },
        { USB_DEVICE_WACOM(0xD7) },
        { USB_DEVICE_WACOM(0xD8) },
index ae00604a6a81d8c1cf37d65823d14eb728e36e3a..f5d66859f2322b362b4f2e3c747f62db8b20fe5d 100644 (file)
@@ -244,6 +244,7 @@ struct mxt_finger {
        int x;
        int y;
        int area;
+       int pressure;
 };
 
 /* Each client has this additional data */
@@ -536,6 +537,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id)
                                        finger[id].x);
                        input_report_abs(input_dev, ABS_MT_POSITION_Y,
                                        finger[id].y);
+                       input_report_abs(input_dev, ABS_MT_PRESSURE,
+                                       finger[id].pressure);
                } else {
                        finger[id].status = 0;
                }
@@ -546,6 +549,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id)
        if (status != MXT_RELEASE) {
                input_report_abs(input_dev, ABS_X, finger[single_id].x);
                input_report_abs(input_dev, ABS_Y, finger[single_id].y);
+               input_report_abs(input_dev,
+                                ABS_PRESSURE, finger[single_id].pressure);
        }
 
        input_sync(input_dev);
@@ -560,6 +565,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
        int x;
        int y;
        int area;
+       int pressure;
 
        /* Check the touch is present on the screen */
        if (!(status & MXT_DETECT)) {
@@ -584,6 +590,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
                y = y >> 2;
 
        area = message->message[4];
+       pressure = message->message[5];
 
        dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id,
                status & MXT_MOVE ? "moved" : "pressed",
@@ -594,6 +601,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
        finger[id].x = x;
        finger[id].y = y;
        finger[id].area = area;
+       finger[id].pressure = pressure;
 
        mxt_input_report(data, id);
 }
@@ -1116,6 +1124,8 @@ static int __devinit mxt_probe(struct i2c_client *client,
                             0, data->max_x, 0, 0);
        input_set_abs_params(input_dev, ABS_Y,
                             0, data->max_y, 0, 0);
+       input_set_abs_params(input_dev, ABS_PRESSURE,
+                            0, 255, 0, 0);
 
        /* For multi touch */
        input_mt_init_slots(input_dev, MXT_MAX_FINGER);
@@ -1125,6 +1135,8 @@ static int __devinit mxt_probe(struct i2c_client *client,
                             0, data->max_x, 0, 0);
        input_set_abs_params(input_dev, ABS_MT_POSITION_Y,
                             0, data->max_y, 0, 0);
+       input_set_abs_params(input_dev, ABS_MT_PRESSURE,
+                            0, 255, 0, 0);
 
        input_set_drvdata(input_dev, data);
        i2c_set_clientdata(client, data);
index 4f2713d92791352c1fddd806b2252af69ed24fb6..4627fe55b4011b8f40809938f1fc497672c14b5e 100644 (file)
@@ -9,7 +9,8 @@
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  */
 
 /*
index 089b0a0f3d8c3f51174bb9e31c5326c61e66d679..0e8f63e5b36ffbd3ee99e1a751dff2d4400e06e7 100644 (file)
@@ -13,6 +13,7 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/errno.h>
index b9826032450b4609c955260938a7d0889de40ef3..8c00937bf7e74d02bc1759cbab01ca6eba1bc13e 100644 (file)
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
index 3ebe3824662d6d4da520d3172be1e0789efa0be2..ea2185531f826e064d53437f956fbfb04e5e35f5 100644 (file)
@@ -662,6 +662,11 @@ failed_unregister_led1_R:
 static void bd2802_unregister_led_classdev(struct bd2802_led *led)
 {
        cancel_work_sync(&led->work);
+       led_classdev_unregister(&led->cdev_led2b);
+       led_classdev_unregister(&led->cdev_led2g);
+       led_classdev_unregister(&led->cdev_led2r);
+       led_classdev_unregister(&led->cdev_led1b);
+       led_classdev_unregister(&led->cdev_led1g);
        led_classdev_unregister(&led->cdev_led1r);
 }
 
index e4ce1fd46338122b6e93b39f102a81be4ea621cb..bcfbd3a60eab6b8ee4ae0ecb4cf14efce17a2ac3 100644 (file)
@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
index 0a4d86c6c4a4d107502bc00ab3f3d25943d9e0f8..2d6423c2d19340015c9f4be263342387db661d9a 100644 (file)
@@ -146,6 +146,7 @@ config PHANTOM
 
 config INTEL_MID_PTI
        tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
+       depends on PCI
        default n
        help
          The PTI (Parallel Trace Interface) driver directs
index 54e3d05b63cc6466549f532de704e5f2d0faf082..35903154ca2ee0e5c2adaa7971c0370aac8f19cc 100644 (file)
@@ -164,5 +164,5 @@ subsys_initcall(ab8500_pwm_init);
 module_exit(ab8500_pwm_exit);
 MODULE_AUTHOR("Arun MURTHY <arun.murthy@stericsson.com>");
 MODULE_DESCRIPTION("AB8500 Pulse Width Modulation Driver");
-MODULE_ALIAS("AB8500 PWM driver");
+MODULE_ALIAS("platform:ab8500-pwm");
 MODULE_LICENSE("GPL v2");
index 5325a7e70dcf47b6a357b59146b5cfd24821455e..27dc0d21aafa36b73955105aa6097eb5c7836352 100644 (file)
@@ -455,7 +455,7 @@ static int __devinit fsa9480_probe(struct i2c_client *client,
 
 fail2:
        if (client->irq)
-               free_irq(client->irq, NULL);
+               free_irq(client->irq, usbsw);
 fail1:
        i2c_set_clientdata(client, NULL);
        kfree(usbsw);
@@ -466,7 +466,7 @@ static int __devexit fsa9480_remove(struct i2c_client *client)
 {
        struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
        if (client->irq)
-               free_irq(client->irq, NULL);
+               free_irq(client->irq, usbsw);
        i2c_set_clientdata(client, NULL);
 
        sysfs_remove_group(&client->dev.kobj, &fsa9480_group);
index 8653bd0b1a33ca68b541c8161338ce8ee9e5357d..06df1877ad0f457091d5430768ec8b9a3c5f7a9f 100644 (file)
@@ -33,6 +33,8 @@
 #include <linux/mutex.h>
 #include <linux/miscdevice.h>
 #include <linux/pti.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #define DRIVERNAME             "pti"
 #define PCINAME                        "pciPTI"
index 54c91ffe4a9154fb7142f5f61a654658e8ab1fd0..ba168a7d54d42318d7d192e4158b482844c85be4 100644 (file)
@@ -338,6 +338,12 @@ void st_int_recv(void *disc_data,
                        /* Unknow packet? */
                default:
                        type = *ptr;
+                       if (st_gdata->list[type] == NULL) {
+                               pr_err("chip/interface misbehavior dropping"
+                                       " frame starting with 0x%02x", type);
+                               goto done;
+
+                       }
                        st_gdata->rx_skb = alloc_skb(
                                        st_gdata->list[type]->max_frame_size,
                                        GFP_ATOMIC);
@@ -354,6 +360,7 @@ void st_int_recv(void *disc_data,
                ptr++;
                count--;
        }
+done:
        spin_unlock_irqrestore(&st_gdata->lock, flags);
        pr_debug("done %s", __func__);
        return;
@@ -717,9 +724,10 @@ static void st_tty_close(struct tty_struct *tty)
         */
        spin_lock_irqsave(&st_gdata->lock, flags);
        for (i = ST_BT; i < ST_MAX_CHANNELS; i++) {
-               if (st_gdata->list[i] != NULL)
+               if (st_gdata->is_registered[i] == true)
                        pr_err("%d not un-registered", i);
                st_gdata->list[i] = NULL;
+               st_gdata->is_registered[i] = false;
        }
        st_gdata->protos_registered = 0;
        spin_unlock_irqrestore(&st_gdata->lock, flags);
index 38fd2f04c07eed8df424dd7ef6f04ba25d7f4054..3a3580566dfca39fa5e2882f1658418665a9e72d 100644 (file)
@@ -68,6 +68,7 @@ void validate_firmware_response(struct kim_data_s *kim_gdata)
        if (unlikely(skb->data[5] != 0)) {
                pr_err("no proper response during fw download");
                pr_err("data6 %x", skb->data[5]);
+               kfree_skb(skb);
                return;         /* keep waiting for the proper response */
        }
        /* becos of all the script being downloaded */
@@ -210,6 +211,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name)
                pr_err(" waiting for ver info- timed out ");
                return -ETIMEDOUT;
        }
+       INIT_COMPLETION(kim_gdata->kim_rcvd);
 
        version =
                MAKEWORD(kim_gdata->resp_buffer[13],
@@ -298,6 +300,7 @@ static long download_firmware(struct kim_data_s *kim_gdata)
 
                switch (((struct bts_action *)ptr)->type) {
                case ACTION_SEND_COMMAND:       /* action send */
+                       pr_debug("S");
                        action_ptr = &(((struct bts_action *)ptr)->data[0]);
                        if (unlikely
                            (((struct hci_command *)action_ptr)->opcode ==
@@ -335,6 +338,10 @@ static long download_firmware(struct kim_data_s *kim_gdata)
                                release_firmware(kim_gdata->fw_entry);
                                return -ETIMEDOUT;
                        }
+                       /* reinit completion before sending for the
+                        * relevant wait
+                        */
+                       INIT_COMPLETION(kim_gdata->kim_rcvd);
 
                        /*
                         * Free space found in uart buffer, call st_int_write
@@ -361,6 +368,7 @@ static long download_firmware(struct kim_data_s *kim_gdata)
                        }
                        break;
                case ACTION_WAIT_EVENT:  /* wait */
+                       pr_debug("W");
                        if (!wait_for_completion_timeout
                                        (&kim_gdata->kim_rcvd,
                                         msecs_to_jiffies(CMD_RESP_TIME))) {
@@ -434,11 +442,17 @@ long st_kim_start(void *kim_data)
 {
        long err = 0;
        long retry = POR_RETRY_COUNT;
+       struct ti_st_plat_data  *pdata;
        struct kim_data_s       *kim_gdata = (struct kim_data_s *)kim_data;
 
        pr_info(" %s", __func__);
+       pdata = kim_gdata->kim_pdev->dev.platform_data;
 
        do {
+               /* platform specific enabling code here */
+               if (pdata->chip_enable)
+                       pdata->chip_enable(kim_gdata);
+
                /* Configure BT nShutdown to HIGH state */
                gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
                mdelay(5);      /* FIXME: a proper toggle */
@@ -460,6 +474,12 @@ long st_kim_start(void *kim_data)
                        pr_info("ldisc_install = 0");
                        sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
                                        NULL, "install");
+                       /* the following wait is never going to be completed,
+                        * since the ldisc was never installed, hence serving
+                        * as a mdelay of LDISC_TIME msecs */
+                       err = wait_for_completion_timeout
+                               (&kim_gdata->ldisc_installed,
+                                msecs_to_jiffies(LDISC_TIME));
                        err = -ETIMEDOUT;
                        continue;
                } else {
@@ -472,6 +492,13 @@ long st_kim_start(void *kim_data)
                                pr_info("ldisc_install = 0");
                                sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
                                                NULL, "install");
+                               /* this wait might be completed, though in the
+                                * tty_close() since the ldisc is already
+                                * installed */
+                               err = wait_for_completion_timeout
+                                       (&kim_gdata->ldisc_installed,
+                                        msecs_to_jiffies(LDISC_TIME));
+                               err = -EINVAL;
                                continue;
                        } else {        /* on success don't retry */
                                break;
@@ -489,6 +516,8 @@ long st_kim_stop(void *kim_data)
 {
        long err = 0;
        struct kim_data_s       *kim_gdata = (struct kim_data_s *)kim_data;
+       struct ti_st_plat_data  *pdata =
+               kim_gdata->kim_pdev->dev.platform_data;
 
        INIT_COMPLETION(kim_gdata->ldisc_installed);
 
@@ -515,6 +544,10 @@ long st_kim_stop(void *kim_data)
        gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH);
        mdelay(1);
        gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
+
+       /* platform specific disable */
+       if (pdata->chip_disable)
+               pdata->chip_disable(kim_gdata);
        return err;
 }
 
index 3f2495138855457ac6dfc6cfa5de554ed47ee071..1ff460a8e9c74f67e584d634016fa25332fd351c 100644 (file)
@@ -22,6 +22,7 @@
 #define pr_fmt(fmt) "(stll) :" fmt
 #include <linux/skbuff.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/ti_wilink_st.h>
 
 /**********************************************************************/
@@ -37,6 +38,9 @@ static void send_ll_cmd(struct st_data_s *st_data,
 
 static void ll_device_want_to_sleep(struct st_data_s *st_data)
 {
+       struct kim_data_s       *kim_data;
+       struct ti_st_plat_data  *pdata;
+
        pr_debug("%s", __func__);
        /* sanity check */
        if (st_data->ll_state != ST_LL_AWAKE)
@@ -46,10 +50,19 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data)
        send_ll_cmd(st_data, LL_SLEEP_ACK);
        /* update state */
        st_data->ll_state = ST_LL_ASLEEP;
+
+       /* communicate to platform about chip asleep */
+       kim_data = st_data->kim_data;
+       pdata = kim_data->kim_pdev->dev.platform_data;
+       if (pdata->chip_asleep)
+               pdata->chip_asleep(NULL);
 }
 
 static void ll_device_want_to_wakeup(struct st_data_s *st_data)
 {
+       struct kim_data_s       *kim_data;
+       struct ti_st_plat_data  *pdata;
+
        /* diff actions in diff states */
        switch (st_data->ll_state) {
        case ST_LL_ASLEEP:
@@ -70,6 +83,12 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data)
        }
        /* update state */
        st_data->ll_state = ST_LL_AWAKE;
+
+       /* communicate to platform about chip wakeup */
+       kim_data = st_data->kim_data;
+       pdata = kim_data->kim_pdev->dev.platform_data;
+       if (pdata->chip_asleep)
+               pdata->chip_awake(NULL);
 }
 
 /**********************************************************************/
index 38a83acd502e6395f06257cff0fa39cde757fa40..43f2ea5410884ae34e5dd8db0fa2a25caea6d81c 100644 (file)
@@ -3419,9 +3419,27 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
 static int bond_open(struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave;
+       int i;
 
        bond->kill_timers = 0;
 
+       /* reset slave->backup and slave->inactive */
+       read_lock(&bond->lock);
+       if (bond->slave_cnt > 0) {
+               read_lock(&bond->curr_slave_lock);
+               bond_for_each_slave(bond, slave, i) {
+                       if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+                               && (slave != bond->curr_active_slave)) {
+                               bond_set_slave_inactive_flags(slave);
+                       } else {
+                               bond_set_slave_active_flags(slave);
+                       }
+               }
+               read_unlock(&bond->curr_slave_lock);
+       }
+       read_unlock(&bond->lock);
+
        INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
 
        if (bond_is_lb(bond)) {
index 231385b8e08faa38bfab0850080109d256cf6c2d..c7f3d4ea11672761cf7c9241db738e463cbb00ca 100644 (file)
@@ -408,7 +408,7 @@ static void plx_pci_del_card(struct pci_dev *pdev)
        struct sja1000_priv *priv;
        int i = 0;
 
-       for (i = 0; i < card->channels; i++) {
+       for (i = 0; i < PLX_PCI_MAX_CHAN; i++) {
                dev = card->net_dev[i];
                if (!dev)
                        continue;
@@ -536,7 +536,6 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev,
                        if (err) {
                                dev_err(&pdev->dev, "Registering device failed "
                                        "(err=%d)\n", err);
-                               free_sja1000dev(dev);
                                goto failure_cleanup;
                        }
 
@@ -549,6 +548,7 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev,
                        dev_err(&pdev->dev, "Channel #%d not detected\n",
                                i + 1);
                        free_sja1000dev(dev);
+                       card->net_dev[i] = NULL;
                }
        }
 
index f7bbde9eb2cba34db7c27207648fc000320dc690..a81249246eceeefbf385853ad149eb54a0094e10 100644 (file)
@@ -503,9 +503,9 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
        spin_unlock_irqrestore(&priv->mbx_lock, flags);
 
        /* Prepare mailbox for transmission */
+       data = cf->can_dlc | (get_tx_head_prio(priv) << 8);
        if (cf->can_id & CAN_RTR_FLAG) /* Remote transmission request */
                data |= HECC_CANMCF_RTR;
-       data |= get_tx_head_prio(priv) << 8;
        hecc_write_mbx(priv, mbxno, HECC_CANMCF, data);
 
        if (cf->can_id & CAN_EFF_FLAG) /* Extended frame format */
@@ -923,6 +923,7 @@ static int ti_hecc_probe(struct platform_device *pdev)
        priv->can.do_get_state = ti_hecc_get_state;
        priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
+       spin_lock_init(&priv->mbx_lock);
        ndev->irq = irq->start;
        ndev->flags |= IFF_ECHO;
        platform_set_drvdata(pdev, ndev);
index 646c86bcc545e08835cc07b07ca913b1ba64dbcc..fdb7a175640981186b5e891dc7f7d34fedddb223 100644 (file)
@@ -2452,14 +2452,13 @@ static irqreturn_t cas_interruptN(int irq, void *dev_id)
        struct net_device *dev = dev_id;
        struct cas *cp = netdev_priv(dev);
        unsigned long flags;
-       int ring;
+       int ring = (irq == cp->pci_irq_INTC) ? 2 : 3;
        u32 status = readl(cp->regs + REG_PLUS_INTRN_STATUS(ring));
 
        /* check for shared irq */
        if (status == 0)
                return IRQ_NONE;
 
-       ring = (irq == cp->pci_irq_INTC) ? 2 : 3;
        spin_lock_irqsave(&cp->lock, flags);
        if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
 #ifdef USE_NAPI
index 35916f485028d4617f0e7c86860a1d1b82edf585..8533ad7f3559030444d962d82ce3e1f43439e48e 100644 (file)
@@ -155,6 +155,9 @@ struct e1000_info;
 #define HV_M_STATUS_SPEED_1000            0x0200
 #define HV_M_STATUS_LINK_UP               0x0040
 
+#define E1000_ICH_FWSM_PCIM2PCI                0x01000000 /* ME PCIm-to-PCI active */
+#define E1000_ICH_FWSM_PCIM2PCI_COUNT  2000
+
 /* Time to wait before putting the device into D3 if there's no link (in ms). */
 #define LINK_TIMEOUT           100
 
@@ -454,6 +457,7 @@ struct e1000_info {
 #define FLAG2_DISABLE_AIM                 (1 << 8)
 #define FLAG2_CHECK_PHY_HANG              (1 << 9)
 #define FLAG2_NO_DISABLE_RX               (1 << 10)
+#define FLAG2_PCIM2PCI_ARBITER_WA         (1 << 11)
 
 #define E1000_RX_DESC_PS(R, i)     \
        (&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
index 4e36978b8fd8f11c2042d611165a28b119111ad1..54add27c8f760ae871858f837d6d956ee2387f03 100644 (file)
 #define HV_PM_CTRL             PHY_REG(770, 17)
 
 /* PHY Low Power Idle Control */
-#define I82579_LPI_CTRL                        PHY_REG(772, 20)
-#define I82579_LPI_CTRL_ENABLE_MASK    0x6000
+#define I82579_LPI_CTRL                                PHY_REG(772, 20)
+#define I82579_LPI_CTRL_ENABLE_MASK            0x6000
+#define I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT   0x80
 
 /* EMI Registers */
 #define I82579_EMI_ADDR         0x10
 #define HV_KMRN_MODE_CTRL      PHY_REG(769, 16)
 #define HV_KMRN_MDIO_SLOW      0x0400
 
+/* KMRN FIFO Control and Status */
+#define HV_KMRN_FIFO_CTRLSTA                  PHY_REG(770, 16)
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK    0x7000
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT   12
+
 /* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */
 /* Offset 04h HSFSTS */
 union ich8_hws_flash_status {
@@ -657,6 +663,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
        struct e1000_mac_info *mac = &hw->mac;
        s32 ret_val;
        bool link;
+       u16 phy_reg;
 
        /*
         * We only want to go out to the PHY registers to see if Auto-Neg
@@ -689,16 +696,35 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 
        mac->get_link_status = false;
 
-       if (hw->phy.type == e1000_phy_82578) {
-               ret_val = e1000_link_stall_workaround_hv(hw);
-               if (ret_val)
-                       goto out;
-       }
-
-       if (hw->mac.type == e1000_pch2lan) {
+       switch (hw->mac.type) {
+       case e1000_pch2lan:
                ret_val = e1000_k1_workaround_lv(hw);
                if (ret_val)
                        goto out;
+               /* fall-thru */
+       case e1000_pchlan:
+               if (hw->phy.type == e1000_phy_82578) {
+                       ret_val = e1000_link_stall_workaround_hv(hw);
+                       if (ret_val)
+                               goto out;
+               }
+
+               /*
+                * Workaround for PCHx parts in half-duplex:
+                * Set the number of preambles removed from the packet
+                * when it is passed from the PHY to the MAC to prevent
+                * the MAC from misinterpreting the packet type.
+                */
+               e1e_rphy(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg);
+               phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK;
+
+               if ((er32(STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD)
+                       phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT);
+
+               e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg);
+               break;
+       default:
+               break;
        }
 
        /*
@@ -788,6 +814,11 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
            (adapter->hw.phy.type == e1000_phy_igp_3))
                adapter->flags |= FLAG_LSC_GIG_SPEED_DROP;
 
+       /* Enable workaround for 82579 w/ ME enabled */
+       if ((adapter->hw.mac.type == e1000_pch2lan) &&
+           (er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+               adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA;
+
        /* Disable EEE by default until IEEE802.3az spec is finalized */
        if (adapter->flags2 & FLAG2_HAS_EEE)
                adapter->hw.dev_spec.ich8lan.eee_disable = true;
@@ -1355,7 +1386,7 @@ static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw)
                        return ret_val;
 
                /* Preamble tuning for SSC */
-               ret_val = e1e_wphy(hw, PHY_REG(770, 16), 0xA204);
+               ret_val = e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204);
                if (ret_val)
                        return ret_val;
        }
@@ -1645,6 +1676,7 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
        s32 ret_val = 0;
        u16 status_reg = 0;
        u32 mac_reg;
+       u16 phy_reg;
 
        if (hw->mac.type != e1000_pch2lan)
                goto out;
@@ -1659,12 +1691,19 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
                mac_reg = er32(FEXTNVM4);
                mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
 
-               if (status_reg & HV_M_STATUS_SPEED_1000)
+               ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg);
+               if (ret_val)
+                       goto out;
+
+               if (status_reg & HV_M_STATUS_SPEED_1000) {
                        mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
-               else
+                       phy_reg &= ~I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
+               } else {
                        mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC;
-
+                       phy_reg |= I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
+               }
                ew32(FEXTNVM4, mac_reg);
+               ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg);
        }
 
 out:
index 362f70382cdd411483ad95ec26af24b0a08e198e..2198e615f241c5fc7e640f7c7da078be16ece7f3 100644 (file)
@@ -518,6 +518,63 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
        adapter->hw_csum_good++;
 }
 
+/**
+ * e1000e_update_tail_wa - helper function for e1000e_update_[rt]dt_wa()
+ * @hw: pointer to the HW structure
+ * @tail: address of tail descriptor register
+ * @i: value to write to tail descriptor register
+ *
+ * When updating the tail register, the ME could be accessing Host CSR
+ * registers at the same time.  Normally, this is handled in h/w by an
+ * arbiter but on some parts there is a bug that acknowledges Host accesses
+ * later than it should which could result in the descriptor register to
+ * have an incorrect value.  Workaround this by checking the FWSM register
+ * which has bit 24 set while ME is accessing Host CSR registers, wait
+ * if it is set and try again a number of times.
+ **/
+static inline s32 e1000e_update_tail_wa(struct e1000_hw *hw, u8 __iomem * tail,
+                                       unsigned int i)
+{
+       unsigned int j = 0;
+
+       while ((j++ < E1000_ICH_FWSM_PCIM2PCI_COUNT) &&
+              (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI))
+               udelay(50);
+
+       writel(i, tail);
+
+       if ((j == E1000_ICH_FWSM_PCIM2PCI_COUNT) && (i != readl(tail)))
+               return E1000_ERR_SWFW_SYNC;
+
+       return 0;
+}
+
+static void e1000e_update_rdt_wa(struct e1000_adapter *adapter, unsigned int i)
+{
+       u8 __iomem *tail = (adapter->hw.hw_addr + adapter->rx_ring->tail);
+       struct e1000_hw *hw = &adapter->hw;
+
+       if (e1000e_update_tail_wa(hw, tail, i)) {
+               u32 rctl = er32(RCTL);
+               ew32(RCTL, rctl & ~E1000_RCTL_EN);
+               e_err("ME firmware caused invalid RDT - resetting\n");
+               schedule_work(&adapter->reset_task);
+       }
+}
+
+static void e1000e_update_tdt_wa(struct e1000_adapter *adapter, unsigned int i)
+{
+       u8 __iomem *tail = (adapter->hw.hw_addr + adapter->tx_ring->tail);
+       struct e1000_hw *hw = &adapter->hw;
+
+       if (e1000e_update_tail_wa(hw, tail, i)) {
+               u32 tctl = er32(TCTL);
+               ew32(TCTL, tctl & ~E1000_TCTL_EN);
+               e_err("ME firmware caused invalid TDT - resetting\n");
+               schedule_work(&adapter->reset_task);
+       }
+}
+
 /**
  * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
  * @adapter: address of board private structure
@@ -573,7 +630,10 @@ map_skb:
                         * such as IA-64).
                         */
                        wmb();
-                       writel(i, adapter->hw.hw_addr + rx_ring->tail);
+                       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                               e1000e_update_rdt_wa(adapter, i);
+                       else
+                               writel(i, adapter->hw.hw_addr + rx_ring->tail);
                }
                i++;
                if (i == rx_ring->count)
@@ -673,7 +733,11 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
                         * such as IA-64).
                         */
                        wmb();
-                       writel(i << 1, adapter->hw.hw_addr + rx_ring->tail);
+                       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                               e1000e_update_rdt_wa(adapter, i << 1);
+                       else
+                               writel(i << 1,
+                                      adapter->hw.hw_addr + rx_ring->tail);
                }
 
                i++;
@@ -756,7 +820,10 @@ check_page:
                 * applicable for weak-ordered memory model archs,
                 * such as IA-64). */
                wmb();
-               writel(i, adapter->hw.hw_addr + rx_ring->tail);
+               if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                       e1000e_update_rdt_wa(adapter, i);
+               else
+                       writel(i, adapter->hw.hw_addr + rx_ring->tail);
        }
 }
 
@@ -4689,7 +4756,12 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
        wmb();
 
        tx_ring->next_to_use = i;
-       writel(i, adapter->hw.hw_addr + tx_ring->tail);
+
+       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+               e1000e_update_tdt_wa(adapter, i);
+       else
+               writel(i, adapter->hw.hw_addr + tx_ring->tail);
+
        /*
         * we need this if more than one processor can write to our tail
         * at a time, it synchronizes IO on IA64/Altix systems
index e55df308a3af219a54beaadc7dfdcd42c9508de8..6d5fbd4d4256ba527ea295030e91c6dd5b7373f6 100644 (file)
@@ -5615,7 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                goto out_error;
        }
 
-       nv_vlan_mode(dev, dev->features);
+       if (id->driver_data & DEV_HAS_VLAN)
+               nv_vlan_mode(dev, dev->features);
 
        netif_carrier_off(dev);
 
index 2659daad783ddb857f5e9597bbb2a2ea95c05f04..31d5c574e5a9aaf923d23c124186dbdfc7defa70 100644 (file)
@@ -2710,8 +2710,13 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
        /* Tell the skb what kind of packet this is */
        skb->protocol = eth_type_trans(skb, dev);
 
-       /* Set vlan tag */
-       if (fcb->flags & RXFCB_VLN)
+       /*
+        * There's need to check for NETIF_F_HW_VLAN_RX here.
+        * Even if vlan rx accel is disabled, on some chips
+        * RXFCB_VLN is pseudo randomly set.
+        */
+       if (dev->features & NETIF_F_HW_VLAN_RX &&
+           fcb->flags & RXFCB_VLN)
                __vlan_hwaccel_put_tag(skb, fcb->vlctl);
 
        /* Send the packet up the stack */
index 6e350692d1184147b0989e19d8be3b8436938d29..25a8c2adb001892acc1c2531de8ed8b62b30d8e0 100644 (file)
@@ -686,10 +686,21 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
 {
        unsigned int last_rule_idx = priv->cur_filer_idx;
        unsigned int cmp_rqfpr;
-       unsigned int local_rqfpr[MAX_FILER_IDX + 1];
-       unsigned int local_rqfcr[MAX_FILER_IDX + 1];
+       unsigned int *local_rqfpr;
+       unsigned int *local_rqfcr;
        int i = 0x0, k = 0x0;
        int j = MAX_FILER_IDX, l = 0x0;
+       int ret = 1;
+
+       local_rqfpr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1),
+               GFP_KERNEL);
+       local_rqfcr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1),
+               GFP_KERNEL);
+       if (!local_rqfpr || !local_rqfcr) {
+               pr_err("Out of memory\n");
+               ret = 0;
+               goto err;
+       }
 
        switch (class) {
        case TCP_V4_FLOW:
@@ -706,7 +717,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
                break;
        default:
                pr_err("Right now this class is not supported\n");
-               return 0;
+               ret = 0;
+               goto err;
        }
 
        for (i = 0; i < MAX_FILER_IDX + 1; i++) {
@@ -721,7 +733,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
 
        if (i == MAX_FILER_IDX + 1) {
                pr_err("No parse rule found, can't create hash rules\n");
-               return 0;
+               ret = 0;
+               goto err;
        }
 
        /* If a match was found, then it begins the starting of a cluster rule
@@ -765,7 +778,10 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
                priv->cur_filer_idx = priv->cur_filer_idx - 1;
        }
 
-       return 1;
+err:
+       kfree(local_rqfcr);
+       kfree(local_rqfpr);
+       return ret;
 }
 
 static int gfar_set_hash_opts(struct gfar_private *priv, struct ethtool_rxnfc *cmd)
index ba99af05bf62744148a4d33d008c04ddece97a98..3e6679269400d95aa6c169443f7d55510844b6ce 100644 (file)
@@ -395,7 +395,7 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada
 }
 
 /* recycle the current buffer on the rx queue */
-static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
+static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
 {
        u32 q_index = adapter->rx_queue.index;
        u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator;
@@ -403,6 +403,7 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
        unsigned int index = correlator & 0xffffffffUL;
        union ibmveth_buf_desc desc;
        unsigned long lpar_rc;
+       int ret = 1;
 
        BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
        BUG_ON(index >= adapter->rx_buff_pool[pool].size);
@@ -410,7 +411,7 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
        if (!adapter->rx_buff_pool[pool].active) {
                ibmveth_rxq_harvest_buffer(adapter);
                ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]);
-               return;
+               goto out;
        }
 
        desc.fields.flags_len = IBMVETH_BUF_VALID |
@@ -423,12 +424,16 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
                netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed "
                           "during recycle rc=%ld", lpar_rc);
                ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
+               ret = 0;
        }
 
        if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
                adapter->rx_queue.index = 0;
                adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
        }
+
+out:
+       return ret;
 }
 
 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
@@ -1084,8 +1089,9 @@ restart_poll:
                                if (rx_flush)
                                        ibmveth_flush_buffer(skb->data,
                                                length + offset);
+                               if (!ibmveth_rxq_recycle_buffer(adapter))
+                                       kfree_skb(skb);
                                skb = new_skb;
-                               ibmveth_rxq_recycle_buffer(adapter);
                        } else {
                                ibmveth_rxq_harvest_buffer(adapter);
                                skb_reserve(skb, offset);
index e86297b32733e8a0af34a329a3f839ce2c57205b..22790394318a06085290e179f88647e97480271b 100644 (file)
@@ -1459,8 +1459,10 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                if (ixgbe_rx_is_fcoe(adapter, rx_desc)) {
                        ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb,
                                                   staterr);
-                       if (!ddp_bytes)
+                       if (!ddp_bytes) {
+                               dev_kfree_skb_any(skb);
                                goto next_desc;
+                       }
                }
 #endif /* IXGBE_FCOE */
                ixgbe_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc);
index 0620ba963508e17096dab555ed79495749c68e70..04bb8fcc0cb5dd2c069518270af8e9ca1f7c74e1 100644 (file)
@@ -25,8 +25,9 @@
 /* DP83865 phy identifier values */
 #define DP83865_PHY_ID 0x20005c7a
 
-#define DP83865_INT_MASK_REG 0x15
-#define DP83865_INT_MASK_STATUS 0x14
+#define DP83865_INT_STATUS     0x14
+#define DP83865_INT_MASK       0x15
+#define DP83865_INT_CLEAR      0x17
 
 #define DP83865_INT_REMOTE_FAULT 0x0008
 #define DP83865_INT_ANE_COMPLETED 0x0010
@@ -68,21 +69,25 @@ static int ns_config_intr(struct phy_device *phydev)
        int err;
 
        if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
-               err = phy_write(phydev, DP83865_INT_MASK_REG,
+               err = phy_write(phydev, DP83865_INT_MASK,
                                DP83865_INT_MASK_DEFAULT);
        else
-               err = phy_write(phydev, DP83865_INT_MASK_REG, 0);
+               err = phy_write(phydev, DP83865_INT_MASK, 0);
 
        return err;
 }
 
 static int ns_ack_interrupt(struct phy_device *phydev)
 {
-       int ret = phy_read(phydev, DP83865_INT_MASK_STATUS);
+       int ret = phy_read(phydev, DP83865_INT_STATUS);
        if (ret < 0)
                return ret;
 
-       return 0;
+       /* Clear the interrupt status bit by writing a “1”
+        * to the corresponding bit in INT_CLEAR (2:0 are reserved) */
+       ret = phy_write(phydev, DP83865_INT_CLEAR, ret & ~0x7);
+
+       return ret;
 }
 
 static void ns_giga_speed_fallback(struct phy_device *phydev, int mode)
index 86ac38c96bcf040b59027332b614925aeee71293..3bb131137033cb9d9e53daaf0ad0ee89310fae65 100644 (file)
@@ -80,13 +80,13 @@ static int rionet_capable = 1;
  */
 static struct rio_dev **rionet_active;
 
-#define is_rionet_capable(pef, src_ops, dst_ops)               \
-                       ((pef & RIO_PEF_INB_MBOX) &&            \
-                        (pef & RIO_PEF_INB_DOORBELL) &&        \
+#define is_rionet_capable(src_ops, dst_ops)                    \
+                       ((src_ops & RIO_SRC_OPS_DATA_MSG) &&    \
+                        (dst_ops & RIO_DST_OPS_DATA_MSG) &&    \
                         (src_ops & RIO_SRC_OPS_DOORBELL) &&    \
                         (dst_ops & RIO_DST_OPS_DOORBELL))
 #define dev_rionet_capable(dev) \
-       is_rionet_capable(dev->pef, dev->src_ops, dev->dst_ops)
+       is_rionet_capable(dev->src_ops, dev->dst_ops)
 
 #define RIONET_MAC_MATCH(x)    (*(u32 *)x == 0x00010001)
 #define RIONET_GET_DESTID(x)   (*(u16 *)(x + 4))
@@ -282,7 +282,6 @@ static int rionet_open(struct net_device *ndev)
 {
        int i, rc = 0;
        struct rionet_peer *peer, *tmp;
-       u32 pwdcsr;
        struct rionet_private *rnet = netdev_priv(ndev);
 
        if (netif_msg_ifup(rnet))
@@ -332,13 +331,8 @@ static int rionet_open(struct net_device *ndev)
                        continue;
                }
 
-               /*
-                * If device has initialized inbound doorbells,
-                * send a join message
-                */
-               rio_read_config_32(peer->rdev, RIO_WRITE_PORT_CSR, &pwdcsr);
-               if (pwdcsr & RIO_DOORBELL_AVAIL)
-                       rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
+               /* Send a join message */
+               rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
        }
 
       out:
@@ -492,7 +486,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
 {
        int rc = -ENODEV;
-       u32 lpef, lsrc_ops, ldst_ops;
+       u32 lsrc_ops, ldst_ops;
        struct rionet_peer *peer;
        struct net_device *ndev = NULL;
 
@@ -515,12 +509,11 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
         * on later probes
         */
        if (!rionet_check) {
-               rio_local_read_config_32(rdev->net->hport, RIO_PEF_CAR, &lpef);
                rio_local_read_config_32(rdev->net->hport, RIO_SRC_OPS_CAR,
                                         &lsrc_ops);
                rio_local_read_config_32(rdev->net->hport, RIO_DST_OPS_CAR,
                                         &ldst_ops);
-               if (!is_rionet_capable(lpef, lsrc_ops, ldst_ops)) {
+               if (!is_rionet_capable(lsrc_ops, ldst_ops)) {
                        printk(KERN_ERR
                               "%s: local device is not network capable\n",
                               DRV_NAME);
index ad35c210b839b950726960144ac1da72a43833cc..1c1666e99106eb8ed09ab7e0f8290abd74cb8727 100644 (file)
@@ -21,6 +21,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
 #include <linux/delay.h>
@@ -30,6 +31,7 @@
 #include <linux/phy.h>
 #include <linux/cache.h>
 #include <linux/io.h>
+#include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/ethtool.h>
index a03336e086d5ec11423a62b492304c2f00769fc3..f06fb78383a1b865b22c989661a1795f4d7f0368 100644 (file)
@@ -228,23 +228,40 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
        if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) {
 
                if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) {
-                       struct usb_cdc_ncm_ndp_input_size ndp_in_sz;
+                       struct usb_cdc_ncm_ndp_input_size *ndp_in_sz;
+
+                       ndp_in_sz = kzalloc(sizeof(*ndp_in_sz), GFP_KERNEL);
+                       if (!ndp_in_sz) {
+                               err = -ENOMEM;
+                               goto size_err;
+                       }
+
                        err = usb_control_msg(ctx->udev,
                                        usb_sndctrlpipe(ctx->udev, 0),
                                        USB_CDC_SET_NTB_INPUT_SIZE,
                                        USB_TYPE_CLASS | USB_DIR_OUT
                                         | USB_RECIP_INTERFACE,
-                                       0, iface_no, &ndp_in_sz, 8, 1000);
+                                       0, iface_no, ndp_in_sz, 8, 1000);
+                       kfree(ndp_in_sz);
                } else {
-                       __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+                       __le32 *dwNtbInMaxSize;
+                       dwNtbInMaxSize = kzalloc(sizeof(*dwNtbInMaxSize),
+                                       GFP_KERNEL);
+                       if (!dwNtbInMaxSize) {
+                               err = -ENOMEM;
+                               goto size_err;
+                       }
+                       *dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+
                        err = usb_control_msg(ctx->udev,
                                        usb_sndctrlpipe(ctx->udev, 0),
                                        USB_CDC_SET_NTB_INPUT_SIZE,
                                        USB_TYPE_CLASS | USB_DIR_OUT
                                         | USB_RECIP_INTERFACE,
-                                       0, iface_no, &dwNtbInMaxSize, 4, 1000);
+                                       0, iface_no, dwNtbInMaxSize, 4, 1000);
+                       kfree(dwNtbInMaxSize);
                }
-
+size_err:
                if (err < 0)
                        pr_debug("Setting NTB Input Size failed\n");
        }
@@ -325,19 +342,29 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 
        /* set Max Datagram Size (MTU) */
        if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) {
-               __le16 max_datagram_size;
+               __le16 *max_datagram_size;
                u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize);
+
+               max_datagram_size = kzalloc(sizeof(*max_datagram_size),
+                               GFP_KERNEL);
+               if (!max_datagram_size) {
+                       err = -ENOMEM;
+                       goto max_dgram_err;
+               }
+
                err = usb_control_msg(ctx->udev, usb_rcvctrlpipe(ctx->udev, 0),
                                USB_CDC_GET_MAX_DATAGRAM_SIZE,
                                USB_TYPE_CLASS | USB_DIR_IN
                                 | USB_RECIP_INTERFACE,
-                               0, iface_no, &max_datagram_size,
+                               0, iface_no, max_datagram_size,
                                2, 1000);
                if (err < 0) {
                        pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n",
                                                CDC_NCM_MIN_DATAGRAM_SIZE);
+                       kfree(max_datagram_size);
                } else {
-                       ctx->max_datagram_size = le16_to_cpu(max_datagram_size);
+                       ctx->max_datagram_size =
+                               le16_to_cpu(*max_datagram_size);
                        /* Check Eth descriptor value */
                        if (eth_max_sz < CDC_NCM_MAX_DATAGRAM_SIZE) {
                                if (ctx->max_datagram_size > eth_max_sz)
@@ -360,8 +387,10 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
                                                USB_TYPE_CLASS | USB_DIR_OUT
                                                 | USB_RECIP_INTERFACE,
                                                0,
-                                               iface_no, &max_datagram_size,
+                                               iface_no, max_datagram_size,
                                                2, 1000);
+                       kfree(max_datagram_size);
+max_dgram_err:
                        if (err < 0)
                                pr_debug("SET_MAX_DATAGRAM_SIZE failed\n");
                }
index deb1eca13c9f4f8321facaced82e6740da5f5cb5..7c5336c5c37f09c91d2bacfdb0fbe92fdfbddd31 100644 (file)
@@ -515,10 +515,6 @@ static void velocity_init_cam_filter(struct velocity_info *vptr)
        mac_set_cam_mask(regs, vptr->mCAMmask);
 
        /* Enable VCAMs */
-
-       if (test_bit(0, vptr->active_vlans))
-               WORD_REG_BITS_ON(MCFG_RTGOPT, &regs->MCFG);
-
        for_each_set_bit(vid, vptr->active_vlans, VLAN_N_VID) {
                mac_set_vlan_cam(regs, i, (u8 *) &vid);
                vptr->vCAMmask[i / 8] |= 0x1 << (i % 8);
index 1cbacb3896528e0cc5b54e99abd34b5a809fb714..0959583feb27ed2c7962e6d6e7a931c53f7d664a 100644 (file)
@@ -1929,14 +1929,17 @@ static void
 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-       u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-       unsigned long flags;
 
-       VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
-       spin_lock_irqsave(&adapter->cmd_lock, flags);
-       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                              VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       if (!(netdev->flags & IFF_PROMISC)) {
+               u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+               unsigned long flags;
+
+               VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       }
 
        set_bit(vid, adapter->active_vlans);
 }
@@ -1946,14 +1949,17 @@ static void
 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-       u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-       unsigned long flags;
 
-       VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
-       spin_lock_irqsave(&adapter->cmd_lock, flags);
-       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                              VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       if (!(netdev->flags & IFF_PROMISC)) {
+               u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+               unsigned long flags;
+
+               VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       }
 
        clear_bit(vid, adapter->active_vlans);
 }
index c34bef1bf2b08e29219fa03d692deeb88da45ea5..1b9400371eaf0b1b8f746a62952f3c7f18291226 100644 (file)
@@ -69,7 +69,7 @@ static int ar9003_hw_power_interpolate(int32_t x,
 static const struct ar9300_eeprom ar9300_default = {
        .eepromVersion = 2,
        .templateVersion = 2,
-       .macAddr = {1, 2, 3, 4, 5, 6},
+       .macAddr = {0, 2, 3, 4, 5, 6},
        .custData = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
        .baseEepHeader = {
index 9098aaad97a92707bdf62d2dcbdcc4dcb38a7a15..6530694a59aed691759f3d509284b29a34974292 100644 (file)
@@ -2283,7 +2283,11 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class)
 
        mutex_lock(&sc->mutex);
        ah->coverage_class = coverage_class;
+
+       ath9k_ps_wakeup(sc);
        ath9k_hw_init_global_settings(ah);
+       ath9k_ps_restore(sc);
+
        mutex_unlock(&sc->mutex);
 }
 
index 0122930b14c71f85ab0f9d779a87030373011ebb..0474e6638d218e60ca3cd2533818d6027e954433 100644 (file)
@@ -1066,8 +1066,10 @@ static int carl9170_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
         * the high througput speed in 802.11n networks.
         */
 
-       if (!is_main_vif(ar, vif))
+       if (!is_main_vif(ar, vif)) {
+               mutex_lock(&ar->mutex);
                goto err_softw;
+       }
 
        /*
         * While the hardware supports *catch-all* key, for offloading
index 69d4ec467dca70d8da0dbe0129677f42da2f2d30..2fdbffa079c1fdc4aa0529d5c2b0a885c8a18870 100644 (file)
@@ -478,27 +478,22 @@ out_no_pci:
        return err;
 }
 
-static void iwl_pci_down(struct iwl_bus *bus)
-{
-       struct iwl_pci_bus *pci_bus = (struct iwl_pci_bus *) bus->bus_specific;
-
-       pci_disable_msi(pci_bus->pci_dev);
-       pci_iounmap(pci_bus->pci_dev, pci_bus->hw_base);
-       pci_release_regions(pci_bus->pci_dev);
-       pci_disable_device(pci_bus->pci_dev);
-       pci_set_drvdata(pci_bus->pci_dev, NULL);
-
-       kfree(bus);
-}
-
 static void __devexit iwl_pci_remove(struct pci_dev *pdev)
 {
        struct iwl_priv *priv = pci_get_drvdata(pdev);
-       void *bus_specific = priv->bus->bus_specific;
+       struct iwl_bus *bus = priv->bus;
+       struct iwl_pci_bus *pci_bus = IWL_BUS_GET_PCI_BUS(bus);
+       struct pci_dev *pci_dev = IWL_BUS_GET_PCI_DEV(bus);
 
        iwl_remove(priv);
 
-       iwl_pci_down(bus_specific);
+       pci_disable_msi(pci_dev);
+       pci_iounmap(pci_dev, pci_bus->hw_base);
+       pci_release_regions(pci_dev);
+       pci_disable_device(pci_dev);
+       pci_set_drvdata(pci_dev, NULL);
+
+       kfree(bus);
 }
 
 #ifdef CONFIG_PM
index 939563162fb3dce4a2ad731b3c4a48296a4774e3..dbf501ca317f51cac690b87a0edb7f8a84145353 100644 (file)
@@ -464,6 +464,15 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
        int wcid, ack, pid;
        int tx_wcid, tx_ack, tx_pid;
 
+       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
+               WARNING(entry->queue->rt2x00dev,
+                       "Data pending for entry %u in queue %u\n",
+                       entry->entry_idx, entry->queue->qid);
+               cond_resched();
+               return false;
+       }
+
        wcid    = rt2x00_get_field32(reg, TX_STA_FIFO_WCID);
        ack     = rt2x00_get_field32(reg, TX_STA_FIFO_TX_ACK_REQUIRED);
        pid     = rt2x00_get_field32(reg, TX_STA_FIFO_PID_TYPE);
@@ -529,12 +538,11 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
                        entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
                        if (rt2800usb_txdone_entry_check(entry, reg))
                                break;
+                       entry = NULL;
                }
 
-               if (!entry || rt2x00queue_empty(queue))
-                       break;
-
-               rt2800_txdone_entry(entry, reg);
+               if (entry)
+                       rt2800_txdone_entry(entry, reg);
        }
 }
 
@@ -558,8 +566,10 @@ static void rt2800usb_work_txdone(struct work_struct *work)
                while (!rt2x00queue_empty(queue)) {
                        entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
 
-                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+                           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags))
                                break;
+
                        if (test_bit(ENTRY_DATA_IO_FAILED, &entry->flags))
                                rt2x00lib_txdone_noinfo(entry, TXDONE_FAILURE);
                        else if (rt2x00queue_status_timeout(entry))
index b6b4542c2460e6539de067048270385fba345d85..1e31050dafc9e9b2ea2a59073137de053e48074b 100644 (file)
@@ -262,23 +262,20 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
        struct queue_entry *entry = (struct queue_entry *)urb->context;
        struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
 
-       if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+       if (!test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
                return;
-
-       if (rt2x00dev->ops->lib->tx_dma_done)
-               rt2x00dev->ops->lib->tx_dma_done(entry);
-
-       /*
-        * Report the frame as DMA done
-        */
-       rt2x00lib_dmadone(entry);
-
        /*
         * Check if the frame was correctly uploaded
         */
        if (urb->status)
                set_bit(ENTRY_DATA_IO_FAILED, &entry->flags);
+       /*
+        * Report the frame as DMA done
+        */
+       rt2x00lib_dmadone(entry);
 
+       if (rt2x00dev->ops->lib->tx_dma_done)
+               rt2x00dev->ops->lib->tx_dma_done(entry);
        /*
         * Schedule the delayed work for reading the TX status
         * from the device.
@@ -874,18 +871,8 @@ int rt2x00usb_suspend(struct usb_interface *usb_intf, pm_message_t state)
 {
        struct ieee80211_hw *hw = usb_get_intfdata(usb_intf);
        struct rt2x00_dev *rt2x00dev = hw->priv;
-       int retval;
-
-       retval = rt2x00lib_suspend(rt2x00dev, state);
-       if (retval)
-               return retval;
 
-       /*
-        * Decrease usbdev refcount.
-        */
-       usb_put_dev(interface_to_usbdev(usb_intf));
-
-       return 0;
+       return rt2x00lib_suspend(rt2x00dev, state);
 }
 EXPORT_SYMBOL_GPL(rt2x00usb_suspend);
 
@@ -894,8 +881,6 @@ int rt2x00usb_resume(struct usb_interface *usb_intf)
        struct ieee80211_hw *hw = usb_get_intfdata(usb_intf);
        struct rt2x00_dev *rt2x00dev = hw->priv;
 
-       usb_get_dev(interface_to_usbdev(usb_intf));
-
        return rt2x00lib_resume(rt2x00dev);
 }
 EXPORT_SYMBOL_GPL(rt2x00usb_resume);
index 7e33f1f4f3d47c7866baac5af76b656111139d2b..34f6ab53e51960b097f709b9546c0273aa9f47a5 100644 (file)
@@ -77,8 +77,6 @@ int wl1271_acx_sleep_auth(struct wl1271 *wl, u8 sleep_auth)
        auth->sleep_auth = sleep_auth;
 
        ret = wl1271_cmd_configure(wl, ACX_SLEEP_AUTH, auth, sizeof(*auth));
-       if (ret < 0)
-               return ret;
 
 out:
        kfree(auth);
@@ -624,10 +622,8 @@ int wl1271_acx_cca_threshold(struct wl1271 *wl)
 
        ret = wl1271_cmd_configure(wl, ACX_CCA_THRESHOLD,
                                   detection, sizeof(*detection));
-       if (ret < 0) {
+       if (ret < 0)
                wl1271_warning("failed to set cca threshold: %d", ret);
-               return ret;
-       }
 
 out:
        kfree(detection);
index e58c22d21e39af9d8683ff94e1ccee929258902c..b70ae40ad660148418d9914aa0c88bcc71554bae 100644 (file)
@@ -4283,6 +4283,7 @@ int wl1271_init_ieee80211(struct wl1271 *wl)
        wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
                BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP);
        wl->hw->wiphy->max_scan_ssids = 1;
+       wl->hw->wiphy->max_sched_scan_ssids = 1;
        /*
         * Maximum length of elements in scanning probe request templates
         * should be the maximum length possible for a template, without
index 5cf18c2c23f09ee3fdd35a3aceafc322f3770580..fb1fd5af75ea562c47120067d88b8f9540e7b805 100644 (file)
@@ -164,7 +164,7 @@ static int wl1271_sdio_power_on(struct wl1271 *wl)
        /* If enabled, tell runtime PM not to power off the card */
        if (pm_runtime_enabled(&func->dev)) {
                ret = pm_runtime_get_sync(&func->dev);
-               if (ret)
+               if (ret < 0)
                        goto out;
        } else {
                /* Runtime PM is disabled: power up the card manually */
index 5d5e1ef87206a4e1722510084ba4cab2783cf0bd..4ae8effaee22828acf9565d47d9ad599f4ac9c00 100644 (file)
@@ -36,7 +36,6 @@ enum wl1271_tm_commands {
        WL1271_TM_CMD_TEST,
        WL1271_TM_CMD_INTERROGATE,
        WL1271_TM_CMD_CONFIGURE,
-       WL1271_TM_CMD_NVS_PUSH,
        WL1271_TM_CMD_SET_PLT_MODE,
        WL1271_TM_CMD_RECOVER,
 
@@ -139,12 +138,15 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[])
 
        if (ret < 0) {
                wl1271_warning("testmode cmd interrogate failed: %d", ret);
+               kfree(cmd);
                return ret;
        }
 
        skb = cfg80211_testmode_alloc_reply_skb(wl->hw->wiphy, sizeof(*cmd));
-       if (!skb)
+       if (!skb) {
+               kfree(cmd);
                return -ENOMEM;
+       }
 
        NLA_PUT(skb, WL1271_TM_ATTR_DATA, sizeof(*cmd), cmd);
 
@@ -187,48 +189,6 @@ static int wl1271_tm_cmd_configure(struct wl1271 *wl, struct nlattr *tb[])
        return 0;
 }
 
-static int wl1271_tm_cmd_nvs_push(struct wl1271 *wl, struct nlattr *tb[])
-{
-       int ret = 0;
-       size_t len;
-       void *buf;
-
-       wl1271_debug(DEBUG_TESTMODE, "testmode cmd nvs push");
-
-       if (!tb[WL1271_TM_ATTR_DATA])
-               return -EINVAL;
-
-       buf = nla_data(tb[WL1271_TM_ATTR_DATA]);
-       len = nla_len(tb[WL1271_TM_ATTR_DATA]);
-
-       mutex_lock(&wl->mutex);
-
-       kfree(wl->nvs);
-
-       if ((wl->chip.id == CHIP_ID_1283_PG20) &&
-           (len != sizeof(struct wl128x_nvs_file)))
-               return -EINVAL;
-       else if (len != sizeof(struct wl1271_nvs_file))
-               return -EINVAL;
-
-       wl->nvs = kzalloc(len, GFP_KERNEL);
-       if (!wl->nvs) {
-               wl1271_error("could not allocate memory for the nvs file");
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       memcpy(wl->nvs, buf, len);
-       wl->nvs_len = len;
-
-       wl1271_debug(DEBUG_TESTMODE, "testmode pushed nvs");
-
-out:
-       mutex_unlock(&wl->mutex);
-
-       return ret;
-}
-
 static int wl1271_tm_cmd_set_plt_mode(struct wl1271 *wl, struct nlattr *tb[])
 {
        u32 val;
@@ -285,8 +245,6 @@ int wl1271_tm_cmd(struct ieee80211_hw *hw, void *data, int len)
                return wl1271_tm_cmd_interrogate(wl, tb);
        case WL1271_TM_CMD_CONFIGURE:
                return wl1271_tm_cmd_configure(wl, tb);
-       case WL1271_TM_CMD_NVS_PUSH:
-               return wl1271_tm_cmd_nvs_push(wl, tb);
        case WL1271_TM_CMD_SET_PLT_MODE:
                return wl1271_tm_cmd_set_plt_mode(wl, tb);
        case WL1271_TM_CMD_RECOVER:
index 7106b49b26e492dbea1bbdd888e46c95844ad0ed..ffc5033ea9c92fcbad8957cad81674a376cb1909 100644 (file)
@@ -20,6 +20,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
index cc21fa2120be241e18f49de15805280fde97d077..ef8efadb58cb1778306e3b977dffc2b585b5c42e 100644 (file)
@@ -20,6 +20,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
index a675e31b4f132d507cd2c8c72ac09a5ada5cac0b..d32d0d70f9ba951c21204a6fccaf9a2d61951f67 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/s3c_adc_battery.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/module.h>
 
 #include <plat/adc.h>
 
index ee893581d4b7ea8df5e386216de9610fcdc0c3e2..ebe77dd87dafb8c2fe9253c7ca2b38304838173f 100644 (file)
@@ -505,8 +505,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
        rdev->dev.dma_mask = &rdev->dma_mask;
        rdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 
-       if ((rdev->pef & RIO_PEF_INB_DOORBELL) &&
-           (rdev->dst_ops & RIO_DST_OPS_DOORBELL))
+       if (rdev->dst_ops & RIO_DST_OPS_DOORBELL)
                rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE],
                                   0, 0xffff);
 
index 9329dbb9ebabeafc5bbfe5f0b1123a5ed5bac33d..4e7c04e773e0167bcb35ea1653ea86b8939900b4 100644 (file)
@@ -152,10 +152,6 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
                goto retry_get_time;
        }
 
-       pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n",
-                1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
-                rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
-
        rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
        rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
        rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
@@ -164,6 +160,11 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
        rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
        rtc_tm->tm_year += 100;
+
+       pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n",
+                1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
+                rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
+
        rtc_tm->tm_mon -= 1;
 
        clk_disable(rtc_clk);
@@ -269,10 +270,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
        clk_enable(rtc_clk);
        pr_debug("s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
                 alrm->enabled,
-                1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
+                1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
                 tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-
        alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
        writeb(0x00, base + S3C2410_RTCALM);
 
@@ -319,49 +319,7 @@ static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
        return 0;
 }
 
-static int s3c_rtc_open(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_device *rtc_dev = platform_get_drvdata(pdev);
-       int ret;
-
-       ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
-                         IRQF_DISABLED,  "s3c2410-rtc alarm", rtc_dev);
-
-       if (ret) {
-               dev_err(dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
-               return ret;
-       }
-
-       ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
-                         IRQF_DISABLED,  "s3c2410-rtc tick", rtc_dev);
-
-       if (ret) {
-               dev_err(dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
-               goto tick_err;
-       }
-
-       return ret;
-
- tick_err:
-       free_irq(s3c_rtc_alarmno, rtc_dev);
-       return ret;
-}
-
-static void s3c_rtc_release(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_device *rtc_dev = platform_get_drvdata(pdev);
-
-       /* do not clear AIE here, it may be needed for wake */
-
-       free_irq(s3c_rtc_alarmno, rtc_dev);
-       free_irq(s3c_rtc_tickno, rtc_dev);
-}
-
 static const struct rtc_class_ops s3c_rtcops = {
-       .open           = s3c_rtc_open,
-       .release        = s3c_rtc_release,
        .read_time      = s3c_rtc_gettime,
        .set_time       = s3c_rtc_settime,
        .read_alarm     = s3c_rtc_getalarm,
@@ -425,6 +383,9 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
 {
        struct rtc_device *rtc = platform_get_drvdata(dev);
 
+       free_irq(s3c_rtc_alarmno, rtc);
+       free_irq(s3c_rtc_tickno, rtc);
+
        platform_set_drvdata(dev, NULL);
        rtc_device_unregister(rtc);
 
@@ -548,10 +509,32 @@ static int __devinit s3c_rtc_probe(struct platform_device *pdev)
 
        s3c_rtc_setfreq(&pdev->dev, 1);
 
+       ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
+                         IRQF_DISABLED,  "s3c2410-rtc alarm", rtc);
+       if (ret) {
+               dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
+               goto err_alarm_irq;
+       }
+
+       ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
+                         IRQF_DISABLED,  "s3c2410-rtc tick", rtc);
+       if (ret) {
+               dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
+               free_irq(s3c_rtc_alarmno, rtc);
+               goto err_tick_irq;
+       }
+
        clk_disable(rtc_clk);
 
        return 0;
 
+ err_tick_irq:
+       free_irq(s3c_rtc_alarmno, rtc);
+
+ err_alarm_irq:
+       platform_set_drvdata(pdev, NULL);
+       rtc_device_unregister(rtc);
+
  err_nortc:
        s3c_rtc_enable(pdev, 0);
        clk_disable(rtc_clk);
index eb4e034378cd0d4daba20ee6988343b571ac000f..f1a2016829fc5654335abaea3d7682eae4d33370 100644 (file)
@@ -249,6 +249,7 @@ static int dasd_ioctl_reset_profile(struct dasd_block *block)
 static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
 {
        struct dasd_profile_info_t *data;
+       int rc = 0;
 
        data = kmalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
@@ -279,11 +280,14 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
                spin_unlock_bh(&block->profile.lock);
        } else {
                spin_unlock_bh(&block->profile.lock);
-               return -EIO;
+               rc = -EIO;
+               goto out;
        }
        if (copy_to_user(argp, data, sizeof(*data)))
-               return -EFAULT;
-       return 0;
+               rc = -EFAULT;
+out:
+       kfree(data);
+       return rc;
 }
 #else
 static int dasd_ioctl_reset_profile(struct dasd_block *block)
index be55fb2b1b1c58f78a3df48ad8ac7be544f3ec51..837e010299a894f4f249fc6c0707a95634146598 100644 (file)
@@ -383,8 +383,10 @@ static int sclp_attach_storage(u8 id)
        switch (sccb->header.response_code) {
        case 0x0020:
                set_bit(id, sclp_storage_ids);
-               for (i = 0; i < sccb->assigned; i++)
-                       sclp_unassign_storage(sccb->entries[i] >> 16);
+               for (i = 0; i < sccb->assigned; i++) {
+                       if (sccb->entries[i])
+                               sclp_unassign_storage(sccb->entries[i] >> 16);
+               }
                break;
        default:
                rc = -EIO;
index f33e2dd979348f4b78560d15b2f8e9f9d33ef09e..33b2ed451e095dde15bedd14ce04ded170e17fd4 100644 (file)
@@ -186,6 +186,9 @@ static unsigned char intc_irq_sense_table[IRQ_TYPE_SENSE_MASK + 1] = {
     !defined(CONFIG_CPU_SUBTYPE_SH7709)
        [IRQ_TYPE_LEVEL_HIGH] = VALID(3),
 #endif
+#if defined(CONFIG_ARCH_SH7372)
+       [IRQ_TYPE_EDGE_BOTH] = VALID(4),
+#endif
 };
 
 static int intc_set_type(struct irq_data *data, unsigned int type)
index 34253cf37812a60c190dbc2f5143623f6ba9f354..4a70180eba5d71060358edb08c18bce98e9b9b18 100644 (file)
@@ -16,6 +16,7 @@
 
 #include <linux/io.h>
 #include <linux/errno.h>
+#include <linux/string.h>
 
 #include <brcm_hw_ids.h>
 #include <chipcommon.h>
index bbf21897ae0e32137c7f38bbabf5159a31b4cda7..823b5e4672e29fbae23e7bab1175e94154c1b9d6 100644 (file)
@@ -18,6 +18,7 @@
 #define _BRCM_TYPES_H_
 
 #include <linux/types.h>
+#include <linux/io.h>
 
 /* Bus types */
 #define        SI_BUS                  0       /* SOC Interconnect */
index 9c0d2936e4862e2d3c8888fa8618e12416966212..c3d73f8431ae88f3cc7604ac603aabd36ae26a29 100644 (file)
@@ -26,6 +26,7 @@
 **********************************************************************/
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/interrupt.h>
 #include <linux/phy.h>
 #include <linux/ratelimit.h>
 #include <net/dst.h>
index 970825421884d44526385d7782d4faa40207501c..d0e2d514968a6f864f5fbda312a854e2a1cc414c 100644 (file)
@@ -26,6 +26,7 @@
 **********************************************************************/
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/interrupt.h>
 #include <net/dst.h>
 
 #include <asm/octeon/octeon.h>
index 589a0554332ea74b150429fe1d3640da011152ee..3d1279c424a85155163c498ca463d0838fdfab65 100644 (file)
@@ -209,7 +209,6 @@ int dsp_clk_enable(enum dsp_clk_id clk_id)
                break;
 #ifdef CONFIG_OMAP_MCBSP
        case MCBSP_CLK:
-               omap_mcbsp_set_io_type(MCBSP_ID(clk_id), OMAP_MCBSP_POLL_IO);
                omap_mcbsp_request(MCBSP_ID(clk_id));
                omap2_mcbsp_set_clks_src(MCBSP_ID(clk_id), MCBSP_CLKS_PAD_SRC);
                break;
index 975e34bcd722aecf4ed753a9b86ad25e90c5d19c..1ca66ea9b28123825a4db9b08d8621da909c8c66 100644 (file)
@@ -604,7 +604,7 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
        struct tmem_obj *obj;
        void *pampd;
        bool ephemeral = is_ephemeral(pool);
-       uint32_t ret = -1;
+       int ret = -1;
        struct tmem_hashbucket *hb;
        bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
        bool lock_held = false;
index 855a5bb56a47d8e591873b05e6be15ec6f4f6ee0..a3f5162bfedcf5fb6f58f42405f3323e18880ed1 100644 (file)
@@ -1158,7 +1158,7 @@ static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
        size_t clen;
        int ret;
        unsigned long count;
-       struct page *page = virt_to_page(data);
+       struct page *page = (struct page *)(data);
        struct zcache_client *cli = pool->client;
        uint16_t client_id = get_client_id_from_client(cli);
        unsigned long zv_mean_zsize;
@@ -1227,7 +1227,7 @@ static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,
        int ret = 0;
 
        BUG_ON(is_ephemeral(pool));
-       zv_decompress(virt_to_page(data), pampd);
+       zv_decompress((struct page *)(data), pampd);
        return ret;
 }
 
@@ -1539,7 +1539,7 @@ static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
                goto out;
        if (!zcache_freeze && zcache_do_preload(pool) == 0) {
                /* preload does preempt_disable on success */
-               ret = tmem_put(pool, oidp, index, page_address(page),
+               ret = tmem_put(pool, oidp, index, (char *)(page),
                                PAGE_SIZE, 0, is_ephemeral(pool));
                if (ret < 0) {
                        if (is_ephemeral(pool))
@@ -1572,7 +1572,7 @@ static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
        pool = zcache_get_pool_by_id(cli_id, pool_id);
        if (likely(pool != NULL)) {
                if (atomic_read(&pool->obj_count) > 0)
-                       ret = tmem_get(pool, oidp, index, page_address(page),
+                       ret = tmem_get(pool, oidp, index, (char *)(page),
                                        &size, 0, is_ephemeral(pool));
                zcache_put_pool(pool);
        }
index c24fb10de60be4912962f9649feb2e21a96d231b..6a4ea29c2f36733206bd49081152dbecf2fcc2df 100644 (file)
@@ -2243,7 +2243,6 @@ static int iscsit_handle_snack(
        case 0:
                return iscsit_handle_recovery_datain_or_r2t(conn, buf,
                        hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength);
-               return 0;
        case ISCSI_FLAG_SNACK_TYPE_STATUS:
                return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt,
                        hdr->begrun, hdr->runlength);
index f095e65b1ccf401a2a4b951d5e8c463827dcc57f..f1643dbf6a92923d9a55fcc9732ded891a7afaa6 100644 (file)
@@ -268,7 +268,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
                                ISCSI_TCP);
        if (IS_ERR(tpg_np)) {
                iscsit_put_tpg(tpg);
-               return ERR_PTR(PTR_ERR(tpg_np));
+               return ERR_CAST(tpg_np);
        }
        pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n");
 
@@ -1285,7 +1285,7 @@ struct se_wwn *lio_target_call_coreaddtiqn(
 
        tiqn = iscsit_add_tiqn((unsigned char *)name);
        if (IS_ERR(tiqn))
-               return ERR_PTR(PTR_ERR(tiqn));
+               return ERR_CAST(tiqn);
        /*
         * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group.
         */
index 980650792cf699de8de826722c0973752ed79cbc..c4c68da3e5004b3fa39eeb71829bbefab4e38632 100644 (file)
@@ -834,7 +834,7 @@ static int iscsit_attach_ooo_cmdsn(
                         */
                        list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list,
                                                ooo_list) {
-                               while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
+                               if (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
                                        continue;
 
                                list_add(&ooo_cmdsn->ooo_list,
index bcaf82f470375e59caa6875da5c92e9f912730c5..daad362a93cecebeca5c9bcce26233da3b202df1 100644 (file)
@@ -1013,19 +1013,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
                                        ISCSI_LOGIN_STATUS_TARGET_ERROR);
                        goto new_sess_out;
                }
-#if 0
-               if (!iscsi_ntop6((const unsigned char *)
-                               &sock_in6.sin6_addr.in6_u,
-                               (char *)&conn->ipv6_login_ip[0],
-                               IPV6_ADDRESS_SPACE)) {
-                       pr_err("iscsi_ntop6() failed\n");
-                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
-                                       ISCSI_LOGIN_STATUS_TARGET_ERROR);
-                       goto new_sess_out;
-               }
-#else
-               pr_debug("Skipping iscsi_ntop6()\n");
-#endif
+               snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c",
+                               &sock_in6.sin6_addr.in6_u);
+               conn->login_port = ntohs(sock_in6.sin6_port);
        } else {
                memset(&sock_in, 0, sizeof(struct sockaddr_in));
 
index 252e246cf51e54b8ce5babe19507d32d1f3284b7..497b2e718a76ed8d0d33d84982b16b4ceb3d674a 100644 (file)
@@ -545,13 +545,13 @@ int iscsi_copy_param_list(
        struct iscsi_param_list *src_param_list,
        int leading)
 {
-       struct iscsi_param *new_param = NULL, *param = NULL;
+       struct iscsi_param *param = NULL;
+       struct iscsi_param *new_param = NULL;
        struct iscsi_param_list *param_list = NULL;
 
        param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
        if (!param_list) {
-               pr_err("Unable to allocate memory for"
-                               " struct iscsi_param_list.\n");
+               pr_err("Unable to allocate memory for struct iscsi_param_list.\n");
                goto err_out;
        }
        INIT_LIST_HEAD(&param_list->param_list);
@@ -567,8 +567,17 @@ int iscsi_copy_param_list(
 
                new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
                if (!new_param) {
-                       pr_err("Unable to allocate memory for"
-                               " struct iscsi_param.\n");
+                       pr_err("Unable to allocate memory for struct iscsi_param.\n");
+                       goto err_out;
+               }
+
+               new_param->name = kstrdup(param->name, GFP_KERNEL);
+               new_param->value = kstrdup(param->value, GFP_KERNEL);
+               if (!new_param->value || !new_param->name) {
+                       kfree(new_param->value);
+                       kfree(new_param->name);
+                       kfree(new_param);
+                       pr_err("Unable to allocate memory for parameter name/value.\n");
                        goto err_out;
                }
 
@@ -580,32 +589,12 @@ int iscsi_copy_param_list(
                new_param->use = param->use;
                new_param->type_range = param->type_range;
 
-               new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL);
-               if (!new_param->name) {
-                       pr_err("Unable to allocate memory for"
-                               " parameter name.\n");
-                       goto err_out;
-               }
-
-               new_param->value = kzalloc(strlen(param->value) + 1,
-                               GFP_KERNEL);
-               if (!new_param->value) {
-                       pr_err("Unable to allocate memory for"
-                               " parameter value.\n");
-                       goto err_out;
-               }
-
-               memcpy(new_param->name, param->name, strlen(param->name));
-               new_param->name[strlen(param->name)] = '\0';
-               memcpy(new_param->value, param->value, strlen(param->value));
-               new_param->value[strlen(param->value)] = '\0';
-
                list_add_tail(&new_param->p_list, &param_list->param_list);
        }
 
-       if (!list_empty(&param_list->param_list))
+       if (!list_empty(&param_list->param_list)) {
                *dst_param_list = param_list;
-       else {
+       else {
                pr_err("No parameters allocated.\n");
                goto err_out;
        }
index a1acb0167902cd20d1a29de688c17d0b0e202139..a0d23bc0fc98b101261bda0e4c5b984856b9c2f3 100644 (file)
@@ -243,7 +243,7 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
        if (!cmd->tmr_req) {
                pr_err("Unable to allocate memory for"
                        " Task Management command!\n");
-               return NULL;
+               goto out;
        }
        /*
         * TASK_REASSIGN for ERL=2 / connection stays inside of
@@ -298,8 +298,6 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
        return cmd;
 out:
        iscsit_release_cmd(cmd);
-       if (se_cmd)
-               transport_free_se_cmd(se_cmd);
        return NULL;
 }
 
index 8ae09a1bdf74a8744b4884df7cddb792af6f8f0f..89ae923c5da6e28362039fe2fd6a611459747752 100644 (file)
@@ -67,6 +67,7 @@ target_emulate_inquiry_std(struct se_cmd *cmd)
 {
        struct se_lun *lun = cmd->se_lun;
        struct se_device *dev = cmd->se_dev;
+       struct se_portal_group *tpg = lun->lun_sep->sep_tpg;
        unsigned char *buf;
 
        /*
@@ -81,9 +82,13 @@ target_emulate_inquiry_std(struct se_cmd *cmd)
 
        buf = transport_kmap_first_data_page(cmd);
 
-       buf[0] = dev->transport->get_device_type(dev);
-       if (buf[0] == TYPE_TAPE)
-               buf[1] = 0x80;
+       if (dev == tpg->tpg_virt_lun0.lun_se_dev) {
+               buf[0] = 0x3f; /* Not connected */
+       } else {
+               buf[0] = dev->transport->get_device_type(dev);
+               if (buf[0] == TYPE_TAPE)
+                       buf[1] = 0x80;
+       }
        buf[2] = dev->transport->get_device_rev(dev);
 
        /*
@@ -915,8 +920,8 @@ target_emulate_modesense(struct se_cmd *cmd, int ten)
                length += target_modesense_control(dev, &buf[offset+length]);
                break;
        default:
-               pr_err("Got Unknown Mode Page: 0x%02x\n",
-                               cdb[2] & 0x3f);
+               pr_err("MODE SENSE: unimplemented page/subpage: 0x%02x/0x%02x\n",
+                      cdb[2] & 0x3f, cdb[3]);
                return PYX_TRANSPORT_UNKNOWN_MODE_PAGE;
        }
        offset += length;
@@ -1072,8 +1077,6 @@ target_emulate_unmap(struct se_task *task)
                size -= 16;
        }
 
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
 err:
        transport_kunmap_first_data_page(cmd);
 
@@ -1085,24 +1088,17 @@ err:
  * Note this is not used for TCM/pSCSI passthrough
  */
 static int
-target_emulate_write_same(struct se_task *task, int write_same32)
+target_emulate_write_same(struct se_task *task, u32 num_blocks)
 {
        struct se_cmd *cmd = task->task_se_cmd;
        struct se_device *dev = cmd->se_dev;
        sector_t range;
        sector_t lba = cmd->t_task_lba;
-       unsigned int num_blocks;
        int ret;
        /*
-        * Extract num_blocks from the WRITE_SAME_* CDB.  Then use the explict
-        * range when non zero is supplied, otherwise calculate the remaining
-        * range based on ->get_blocks() - starting LBA.
+        * Use the explicit range when non zero is supplied, otherwise calculate
+        * the remaining range based on ->get_blocks() - starting LBA.
         */
-       if (write_same32)
-               num_blocks = get_unaligned_be32(&cmd->t_task_cdb[28]);
-       else
-               num_blocks = get_unaligned_be32(&cmd->t_task_cdb[10]);
-
        if (num_blocks != 0)
                range = num_blocks;
        else
@@ -1117,8 +1113,6 @@ target_emulate_write_same(struct se_task *task, int write_same32)
                return ret;
        }
 
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
        return 0;
 }
 
@@ -1165,13 +1159,23 @@ transport_emulate_control_cdb(struct se_task *task)
                }
                ret = target_emulate_unmap(task);
                break;
+       case WRITE_SAME:
+               if (!dev->transport->do_discard) {
+                       pr_err("WRITE_SAME emulation not supported"
+                                       " for: %s\n", dev->transport->name);
+                       return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
+               }
+               ret = target_emulate_write_same(task,
+                               get_unaligned_be16(&cmd->t_task_cdb[7]));
+               break;
        case WRITE_SAME_16:
                if (!dev->transport->do_discard) {
                        pr_err("WRITE_SAME_16 emulation not supported"
                                        " for: %s\n", dev->transport->name);
                        return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
                }
-               ret = target_emulate_write_same(task, 0);
+               ret = target_emulate_write_same(task,
+                               get_unaligned_be32(&cmd->t_task_cdb[10]));
                break;
        case VARIABLE_LENGTH_CMD:
                service_action =
@@ -1184,7 +1188,8 @@ transport_emulate_control_cdb(struct se_task *task)
                                        dev->transport->name);
                                return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
                        }
-                       ret = target_emulate_write_same(task, 1);
+                       ret = target_emulate_write_same(task,
+                               get_unaligned_be32(&cmd->t_task_cdb[28]));
                        break;
                default:
                        pr_err("Unsupported VARIABLE_LENGTH_CMD SA:"
@@ -1219,8 +1224,14 @@ transport_emulate_control_cdb(struct se_task *task)
 
        if (ret < 0)
                return ret;
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
+       /*
+        * Handle the successful completion here unless a caller
+        * has explictly requested an asychronous completion.
+        */
+       if (!(cmd->se_cmd_flags & SCF_EMULATE_CDB_ASYNC)) {
+               task->task_scsi_status = GOOD;
+               transport_complete_task(task, 1);
+       }
 
        return PYX_TRANSPORT_SENT_TO_TRANSPORT;
 }
index b38b6c993e6555855be8cd830451d41a0ab6e1d5..ca6e4a4df134e3b8b64ed093338ef7f11749e499 100644 (file)
@@ -472,9 +472,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
        struct se_dev_entry *deve;
        u32 i;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) {
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
 
                spin_lock_irq(&nacl->device_list_lock);
                for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
@@ -491,9 +491,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
                }
                spin_unlock_irq(&nacl->device_list_lock);
 
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
        }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 }
 
 static struct se_port *core_alloc_port(struct se_device *dev)
@@ -839,6 +839,24 @@ int se_dev_check_shutdown(struct se_device *dev)
        return ret;
 }
 
+u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size)
+{
+       u32 tmp, aligned_max_sectors;
+       /*
+        * Limit max_sectors to a PAGE_SIZE aligned value for modern
+        * transport_allocate_data_tasks() operation.
+        */
+       tmp = rounddown((max_sectors * block_size), PAGE_SIZE);
+       aligned_max_sectors = (tmp / block_size);
+       if (max_sectors != aligned_max_sectors) {
+               printk(KERN_INFO "Rounding down aligned max_sectors from %u"
+                               " to %u\n", max_sectors, aligned_max_sectors);
+               return aligned_max_sectors;
+       }
+
+       return max_sectors;
+}
+
 void se_dev_set_default_attribs(
        struct se_device *dev,
        struct se_dev_limits *dev_limits)
@@ -878,6 +896,11 @@ void se_dev_set_default_attribs(
         * max_sectors is based on subsystem plugin dependent requirements.
         */
        dev->se_sub_dev->se_dev_attrib.hw_max_sectors = limits->max_hw_sectors;
+       /*
+        * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+        */
+       limits->max_sectors = se_dev_align_max_sectors(limits->max_sectors,
+                                               limits->logical_block_size);
        dev->se_sub_dev->se_dev_attrib.max_sectors = limits->max_sectors;
        /*
         * Set optimal_sectors from max_sectors, which can be lowered via
@@ -1242,6 +1265,11 @@ int se_dev_set_max_sectors(struct se_device *dev, u32 max_sectors)
                        return -EINVAL;
                }
        }
+       /*
+        * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+        */
+       max_sectors = se_dev_align_max_sectors(max_sectors,
+                               dev->se_sub_dev->se_dev_attrib.block_size);
 
        dev->se_sub_dev->se_dev_attrib.max_sectors = max_sectors;
        pr_debug("dev[%p]: SE Device max_sectors changed to %u\n",
@@ -1344,15 +1372,17 @@ struct se_lun *core_dev_add_lun(
         */
        if (tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) {
                struct se_node_acl *acl;
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
                list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
-                       if (acl->dynamic_node_acl) {
-                               spin_unlock_bh(&tpg->acl_node_lock);
+                       if (acl->dynamic_node_acl &&
+                           (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only ||
+                            !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) {
+                               spin_unlock_irq(&tpg->acl_node_lock);
                                core_tpg_add_node_to_devs(acl, tpg);
-                               spin_lock_bh(&tpg->acl_node_lock);
+                               spin_lock_irq(&tpg->acl_node_lock);
                        }
                }
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
        }
 
        return lun_p;
index f1654694f4ea4e3b33c1ef3e5793b6e372b70156..55bbe0847a6d351cbb2d7ee940fef3f1d194fd10 100644 (file)
@@ -481,7 +481,7 @@ static struct config_group *target_fabric_make_nodeacl(
 
        se_nacl = tf->tf_ops.fabric_make_nodeacl(se_tpg, group, name);
        if (IS_ERR(se_nacl))
-               return ERR_PTR(PTR_ERR(se_nacl));
+               return ERR_CAST(se_nacl);
 
        nacl_cg = &se_nacl->acl_group;
        nacl_cg->default_groups = se_nacl->acl_default_groups;
index 1c1b849cd4fb9c24799ce95d399b07012c328a53..7fd3a161f7cc61cb8b95b6aca9f6af76972ba44c 100644 (file)
@@ -1598,14 +1598,14 @@ static int core_scsi3_decode_spec_i_port(
                         * from the decoded fabric module specific TransportID
                         * at *i_str.
                         */
-                       spin_lock_bh(&tmp_tpg->acl_node_lock);
+                       spin_lock_irq(&tmp_tpg->acl_node_lock);
                        dest_node_acl = __core_tpg_get_initiator_node_acl(
                                                tmp_tpg, i_str);
                        if (dest_node_acl) {
                                atomic_inc(&dest_node_acl->acl_pr_ref_count);
                                smp_mb__after_atomic_inc();
                        }
-                       spin_unlock_bh(&tmp_tpg->acl_node_lock);
+                       spin_unlock_irq(&tmp_tpg->acl_node_lock);
 
                        if (!dest_node_acl) {
                                core_scsi3_tpg_undepend_item(tmp_tpg);
@@ -3496,14 +3496,14 @@ after_iport_check:
        /*
         * Locate the destination struct se_node_acl from the received Transport ID
         */
-       spin_lock_bh(&dest_se_tpg->acl_node_lock);
+       spin_lock_irq(&dest_se_tpg->acl_node_lock);
        dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg,
                                initiator_str);
        if (dest_node_acl) {
                atomic_inc(&dest_node_acl->acl_pr_ref_count);
                smp_mb__after_atomic_inc();
        }
-       spin_unlock_bh(&dest_se_tpg->acl_node_lock);
+       spin_unlock_irq(&dest_se_tpg->acl_node_lock);
 
        if (!dest_node_acl) {
                pr_err("Unable to locate %s dest_node_acl for"
index 3dd81d24d9a914169f80372550fc6bf4f77edbff..e567e129c69746b197ab0c2c4e6addfc008a3630 100644 (file)
@@ -390,12 +390,10 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                length = req->rd_size;
 
                        dst = sg_virt(&sg_d[i++]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
 
                        src = sg_virt(&sg_s[j]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
 
                        dst_offset = 0;
                        src_offset = length;
@@ -415,8 +413,7 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                length = req->rd_size;
 
                        dst = sg_virt(&sg_d[i]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
 
                        if (sg_d[i].length == length) {
                                i++;
@@ -425,8 +422,7 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                dst_offset = length;
 
                        src = sg_virt(&sg_s[j++]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
 
                        src_offset = 0;
                        page_end = 1;
@@ -510,12 +506,10 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                length = req->rd_size;
 
                        src = sg_virt(&sg_s[i++]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
 
                        dst = sg_virt(&sg_d[j]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
 
                        src_offset = 0;
                        dst_offset = length;
@@ -535,8 +529,7 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                length = req->rd_size;
 
                        src = sg_virt(&sg_s[i]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
 
                        if (sg_s[i].length == length) {
                                i++;
@@ -545,8 +538,7 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                src_offset = length;
 
                        dst = sg_virt(&sg_d[j++]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
 
                        dst_offset = 0;
                        page_end = 1;
index 4f1ba4c5ef1196e43e0d5ff4b52836c02e0dc9fe..162b736c73427c44d45f0c56fc59cb5bbf5c35d8 100644 (file)
@@ -137,15 +137,15 @@ struct se_node_acl *core_tpg_get_initiator_node_acl(
 {
        struct se_node_acl *acl;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
                if (!strcmp(acl->initiatorname, initiatorname) &&
                    !acl->dynamic_node_acl) {
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                        return acl;
                }
        }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        return NULL;
 }
@@ -298,13 +298,21 @@ struct se_node_acl *core_tpg_check_initiator_node_acl(
                tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl);
                return NULL;
        }
+       /*
+        * Here we only create demo-mode MappedLUNs from the active
+        * TPG LUNs if the fabric is not explictly asking for
+        * tpg_check_demo_mode_login_only() == 1.
+        */
+       if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only != NULL) &&
+           (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) == 1))
+               do { ; } while (0);
+       else
+               core_tpg_add_node_to_devs(acl, tpg);
 
-       core_tpg_add_node_to_devs(acl, tpg);
-
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        list_add_tail(&acl->acl_list, &tpg->acl_node_list);
        tpg->num_node_acls++;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        pr_debug("%s_TPG[%u] - Added DYNAMIC ACL with TCQ Depth: %d for %s"
                " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
@@ -354,7 +362,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
 {
        struct se_node_acl *acl = NULL;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
        if (acl) {
                if (acl->dynamic_node_acl) {
@@ -362,7 +370,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                        pr_debug("%s_TPG[%u] - Replacing dynamic ACL"
                                " for %s\n", tpg->se_tpg_tfo->get_fabric_name(),
                                tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname);
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                        /*
                         * Release the locally allocated struct se_node_acl
                         * because * core_tpg_add_initiator_node_acl() returned
@@ -378,10 +386,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                        " Node %s already exists for TPG %u, ignoring"
                        " request.\n",  tpg->se_tpg_tfo->get_fabric_name(),
                        initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                return ERR_PTR(-EEXIST);
        }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        if (!se_nacl) {
                pr_err("struct se_node_acl pointer is NULL\n");
@@ -418,10 +426,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                return ERR_PTR(-EINVAL);
        }
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        list_add_tail(&acl->acl_list, &tpg->acl_node_list);
        tpg->num_node_acls++;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
 done:
        pr_debug("%s_TPG[%hu] - Added ACL with TCQ Depth: %d for %s"
@@ -445,14 +453,14 @@ int core_tpg_del_initiator_node_acl(
        struct se_session *sess, *sess_tmp;
        int dynamic_acl = 0;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        if (acl->dynamic_node_acl) {
                acl->dynamic_node_acl = 0;
                dynamic_acl = 1;
        }
        list_del(&acl->acl_list);
        tpg->num_node_acls--;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        spin_lock_bh(&tpg->session_lock);
        list_for_each_entry_safe(sess, sess_tmp,
@@ -503,21 +511,21 @@ int core_tpg_set_initiator_node_queue_depth(
        struct se_node_acl *acl;
        int dynamic_acl = 0;
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
        if (!acl) {
                pr_err("Access Control List entry for %s Initiator"
                        " Node %s does not exists for TPG %hu, ignoring"
                        " request.\n", tpg->se_tpg_tfo->get_fabric_name(),
                        initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                return -ENODEV;
        }
        if (acl->dynamic_node_acl) {
                acl->dynamic_node_acl = 0;
                dynamic_acl = 1;
        }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        spin_lock_bh(&tpg->session_lock);
        list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) {
@@ -533,10 +541,10 @@ int core_tpg_set_initiator_node_queue_depth(
                                tpg->se_tpg_tfo->get_fabric_name(), initiatorname);
                        spin_unlock_bh(&tpg->session_lock);
 
-                       spin_lock_bh(&tpg->acl_node_lock);
+                       spin_lock_irq(&tpg->acl_node_lock);
                        if (dynamic_acl)
                                acl->dynamic_node_acl = 1;
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                        return -EEXIST;
                }
                /*
@@ -571,10 +579,10 @@ int core_tpg_set_initiator_node_queue_depth(
                if (init_sess)
                        tpg->se_tpg_tfo->close_session(init_sess);
 
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
                if (dynamic_acl)
                        acl->dynamic_node_acl = 1;
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                return -EINVAL;
        }
        spin_unlock_bh(&tpg->session_lock);
@@ -590,10 +598,10 @@ int core_tpg_set_initiator_node_queue_depth(
                initiatorname, tpg->se_tpg_tfo->get_fabric_name(),
                tpg->se_tpg_tfo->tpg_get_tag(tpg));
 
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
        if (dynamic_acl)
                acl->dynamic_node_acl = 1;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
 
        return 0;
 }
@@ -717,20 +725,20 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
         * not been released because of TFO->tpg_check_demo_mode_cache() == 1
         * in transport_deregister_session().
         */
-       spin_lock_bh(&se_tpg->acl_node_lock);
+       spin_lock_irq(&se_tpg->acl_node_lock);
        list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list,
                        acl_list) {
                list_del(&nacl->acl_list);
                se_tpg->num_node_acls--;
-               spin_unlock_bh(&se_tpg->acl_node_lock);
+               spin_unlock_irq(&se_tpg->acl_node_lock);
 
                core_tpg_wait_for_nacl_pr_ref(nacl);
                core_free_device_list_for_node(nacl, se_tpg);
                se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, nacl);
 
-               spin_lock_bh(&se_tpg->acl_node_lock);
+               spin_lock_irq(&se_tpg->acl_node_lock);
        }
-       spin_unlock_bh(&se_tpg->acl_node_lock);
+       spin_unlock_irq(&se_tpg->acl_node_lock);
 
        if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL)
                core_tpg_release_virtual_lun0(se_tpg);
index 89760329d5d0d292c0c8648c8aff16495a1822b4..8d0c58ea6316e5ad079b75c856744362e6aafb33 100644 (file)
@@ -389,17 +389,18 @@ void transport_deregister_session(struct se_session *se_sess)
 {
        struct se_portal_group *se_tpg = se_sess->se_tpg;
        struct se_node_acl *se_nacl;
+       unsigned long flags;
 
        if (!se_tpg) {
                transport_free_session(se_sess);
                return;
        }
 
-       spin_lock_bh(&se_tpg->session_lock);
+       spin_lock_irqsave(&se_tpg->session_lock, flags);
        list_del(&se_sess->sess_list);
        se_sess->se_tpg = NULL;
        se_sess->fabric_sess_ptr = NULL;
-       spin_unlock_bh(&se_tpg->session_lock);
+       spin_unlock_irqrestore(&se_tpg->session_lock, flags);
 
        /*
         * Determine if we need to do extra work for this initiator node's
@@ -407,22 +408,22 @@ void transport_deregister_session(struct se_session *se_sess)
         */
        se_nacl = se_sess->se_node_acl;
        if (se_nacl) {
-               spin_lock_bh(&se_tpg->acl_node_lock);
+               spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
                if (se_nacl->dynamic_node_acl) {
                        if (!se_tpg->se_tpg_tfo->tpg_check_demo_mode_cache(
                                        se_tpg)) {
                                list_del(&se_nacl->acl_list);
                                se_tpg->num_node_acls--;
-                               spin_unlock_bh(&se_tpg->acl_node_lock);
+                               spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
 
                                core_tpg_wait_for_nacl_pr_ref(se_nacl);
                                core_free_device_list_for_node(se_nacl, se_tpg);
                                se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg,
                                                se_nacl);
-                               spin_lock_bh(&se_tpg->acl_node_lock);
+                               spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
                        }
                }
-               spin_unlock_bh(&se_tpg->acl_node_lock);
+               spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
        }
 
        transport_free_session(se_sess);
@@ -2053,8 +2054,14 @@ static void transport_generic_request_failure(
                cmd->scsi_sense_reason = TCM_UNSUPPORTED_SCSI_OPCODE;
                break;
        }
-
-       if (!sc)
+       /*
+        * If a fabric does not define a cmd->se_tfo->new_cmd_map caller,
+        * make the call to transport_send_check_condition_and_sense()
+        * directly.  Otherwise expect the fabric to make the call to
+        * transport_send_check_condition_and_sense() after handling
+        * possible unsoliticied write data payloads.
+        */
+       if (!sc && !cmd->se_tfo->new_cmd_map)
                transport_new_cmd_failure(cmd);
        else {
                ret = transport_send_check_condition_and_sense(cmd,
@@ -2847,12 +2854,42 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd)
                        " transport_dev_end_lba(): %llu\n",
                        cmd->t_task_lba, sectors,
                        transport_dev_end_lba(dev));
-               pr_err("  We should return CHECK_CONDITION"
-                      " but we don't yet\n");
-               return 0;
+               return -EINVAL;
        }
 
-       return sectors;
+       return 0;
+}
+
+static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev)
+{
+       /*
+        * Determine if the received WRITE_SAME is used to for direct
+        * passthrough into Linux/SCSI with struct request via TCM/pSCSI
+        * or we are signaling the use of internal WRITE_SAME + UNMAP=1
+        * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK code.
+        */
+       int passthrough = (dev->transport->transport_type ==
+                               TRANSPORT_PLUGIN_PHBA_PDEV);
+
+       if (!passthrough) {
+               if ((flags[0] & 0x04) || (flags[0] & 0x02)) {
+                       pr_err("WRITE_SAME PBDATA and LBDATA"
+                               " bits not supported for Block Discard"
+                               " Emulation\n");
+                       return -ENOSYS;
+               }
+               /*
+                * Currently for the emulated case we only accept
+                * tpws with the UNMAP=1 bit set.
+                */
+               if (!(flags[0] & 0x08)) {
+                       pr_err("WRITE_SAME w/o UNMAP bit not"
+                               " supported for Block Discard Emulation\n");
+                       return -ENOSYS;
+               }
+       }
+
+       return 0;
 }
 
 /*     transport_generic_cmd_sequencer():
@@ -3065,7 +3102,7 @@ static int transport_generic_cmd_sequencer(
                                goto out_unsupported_cdb;
 
                        if (sectors)
-                               size = transport_get_size(sectors, cdb, cmd);
+                               size = transport_get_size(1, cdb, cmd);
                        else {
                                pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not"
                                       " supported\n");
@@ -3075,27 +3112,9 @@ static int transport_generic_cmd_sequencer(
                        cmd->t_task_lba = get_unaligned_be64(&cdb[12]);
                        cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
 
-                       /*
-                        * Skip the remaining assignments for TCM/PSCSI passthrough
-                        */
-                       if (passthrough)
-                               break;
-
-                       if ((cdb[10] & 0x04) || (cdb[10] & 0x02)) {
-                               pr_err("WRITE_SAME PBDATA and LBDATA"
-                                       " bits not supported for Block Discard"
-                                       " Emulation\n");
+                       if (target_check_write_same_discard(&cdb[10], dev) < 0)
                                goto out_invalid_cdb_field;
-                       }
-                       /*
-                        * Currently for the emulated case we only accept
-                        * tpws with the UNMAP=1 bit set.
-                        */
-                       if (!(cdb[10] & 0x08)) {
-                               pr_err("WRITE_SAME w/o UNMAP bit not"
-                                       " supported for Block Discard Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
+
                        break;
                default:
                        pr_err("VARIABLE_LENGTH_CMD service action"
@@ -3330,10 +3349,12 @@ static int transport_generic_cmd_sequencer(
                cmd->se_cmd_flags |= SCF_EMULATE_CDB_ASYNC;
                /*
                 * Check to ensure that LBA + Range does not exceed past end of
-                * device.
+                * device for IBLOCK and FILEIO ->do_sync_cache() backend calls
                 */
-               if (!transport_cmd_get_valid_sectors(cmd))
-                       goto out_invalid_cdb_field;
+               if ((cmd->t_task_lba != 0) || (sectors != 0)) {
+                       if (transport_cmd_get_valid_sectors(cmd) < 0)
+                               goto out_invalid_cdb_field;
+               }
                break;
        case UNMAP:
                size = get_unaligned_be16(&cdb[7]);
@@ -3345,40 +3366,38 @@ static int transport_generic_cmd_sequencer(
                        goto out_unsupported_cdb;
 
                if (sectors)
-                       size = transport_get_size(sectors, cdb, cmd);
+                       size = transport_get_size(1, cdb, cmd);
                else {
                        pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
                        goto out_invalid_cdb_field;
                }
 
                cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
-               passthrough = (dev->transport->transport_type ==
-                               TRANSPORT_PLUGIN_PHBA_PDEV);
-               /*
-                * Determine if the received WRITE_SAME_16 is used to for direct
-                * passthrough into Linux/SCSI with struct request via TCM/pSCSI
-                * or we are signaling the use of internal WRITE_SAME + UNMAP=1
-                * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK and
-                * TCM/FILEIO subsystem plugin backstores.
-                */
-               if (!passthrough) {
-                       if ((cdb[1] & 0x04) || (cdb[1] & 0x02)) {
-                               pr_err("WRITE_SAME PBDATA and LBDATA"
-                                       " bits not supported for Block Discard"
-                                       " Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
-                       /*
-                        * Currently for the emulated case we only accept
-                        * tpws with the UNMAP=1 bit set.
-                        */
-                       if (!(cdb[1] & 0x08)) {
-                               pr_err("WRITE_SAME w/o UNMAP bit not "
-                                       " supported for Block Discard Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
+               cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
+
+               if (target_check_write_same_discard(&cdb[1], dev) < 0)
+                       goto out_invalid_cdb_field;
+               break;
+       case WRITE_SAME:
+               sectors = transport_get_sectors_10(cdb, cmd, &sector_ret);
+               if (sector_ret)
+                       goto out_unsupported_cdb;
+
+               if (sectors)
+                       size = transport_get_size(1, cdb, cmd);
+               else {
+                       pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
+                       goto out_invalid_cdb_field;
                }
+
+               cmd->t_task_lba = get_unaligned_be32(&cdb[2]);
                cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
+               /*
+                * Follow sbcr26 with WRITE_SAME (10) and check for the existence
+                * of byte 1 bit 3 UNMAP instead of original reserved field
+                */
+               if (target_check_write_same_discard(&cdb[1], dev) < 0)
+                       goto out_invalid_cdb_field;
                break;
        case ALLOW_MEDIUM_REMOVAL:
        case GPCMD_CLOSE_TRACK:
@@ -3873,9 +3892,7 @@ EXPORT_SYMBOL(transport_generic_map_mem_to_cmd);
 static int transport_new_cmd_obj(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
-       u32 task_cdbs;
-       u32 rc;
-       int set_counts = 1;
+       int set_counts = 1, rc, task_cdbs;
 
        /*
         * Setup any BIDI READ tasks and memory from
@@ -3893,7 +3910,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd)
                        cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
                        cmd->scsi_sense_reason =
                                TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-                       return PYX_TRANSPORT_LU_COMM_FAILURE;
+                       return -EINVAL;
                }
                atomic_inc(&cmd->t_fe_count);
                atomic_inc(&cmd->t_se_count);
@@ -3912,7 +3929,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd)
                cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
                cmd->scsi_sense_reason =
                        TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-               return PYX_TRANSPORT_LU_COMM_FAILURE;
+               return -EINVAL;
        }
 
        if (set_counts) {
@@ -4028,8 +4045,6 @@ void transport_do_task_sg_chain(struct se_cmd *cmd)
                if (!task->task_sg)
                        continue;
 
-               BUG_ON(!task->task_padded_sg);
-
                if (!sg_first) {
                        sg_first = task->task_sg;
                        chained_nents = task->task_sg_nents;
@@ -4037,9 +4052,19 @@ void transport_do_task_sg_chain(struct se_cmd *cmd)
                        sg_chain(sg_prev, sg_prev_nents, task->task_sg);
                        chained_nents += task->task_sg_nents;
                }
+               /*
+                * For the padded tasks, use the extra SGL vector allocated
+                * in transport_allocate_data_tasks() for the sg_prev_nents
+                * offset into sg_chain() above..  The last task of a
+                * multi-task list, or a single task will not have
+                * task->task_sg_padded set..
+                */
+               if (task->task_padded_sg)
+                       sg_prev_nents = (task->task_sg_nents + 1);
+               else
+                       sg_prev_nents = task->task_sg_nents;
 
                sg_prev = task->task_sg;
-               sg_prev_nents = task->task_sg_nents;
        }
        /*
         * Setup the starting pointer and total t_tasks_sg_linked_no including
@@ -4091,7 +4116,7 @@ static int transport_allocate_data_tasks(
        
        cmd_sg = sgl;
        for (i = 0; i < task_count; i++) {
-               unsigned int task_size;
+               unsigned int task_size, task_sg_nents_padded;
                int count;
 
                task = transport_generic_get_task(cmd, data_direction);
@@ -4110,30 +4135,33 @@ static int transport_allocate_data_tasks(
 
                /* Update new cdb with updated lba/sectors */
                cmd->transport_split_cdb(task->task_lba, task->task_sectors, cdb);
-
+               /*
+                * This now assumes that passed sg_ents are in PAGE_SIZE chunks
+                * in order to calculate the number per task SGL entries
+                */
+               task->task_sg_nents = DIV_ROUND_UP(task->task_size, PAGE_SIZE);
                /*
                 * Check if the fabric module driver is requesting that all
                 * struct se_task->task_sg[] be chained together..  If so,
                 * then allocate an extra padding SG entry for linking and
-                * marking the end of the chained SGL.
-                * Possibly over-allocate task sgl size by using cmd sgl size.
-                * It's so much easier and only a waste when task_count > 1.
-                * That is extremely rare.
+                * marking the end of the chained SGL for every task except
+                * the last one for (task_count > 1) operation, or skipping
+                * the extra padding for the (task_count == 1) case.
                 */
-               task->task_sg_nents = sgl_nents;
-               if (cmd->se_tfo->task_sg_chaining) {
-                       task->task_sg_nents++;
+               if (cmd->se_tfo->task_sg_chaining && (i < (task_count - 1))) {
+                       task_sg_nents_padded = (task->task_sg_nents + 1);
                        task->task_padded_sg = 1;
-               }
+               } else
+                       task_sg_nents_padded = task->task_sg_nents;
 
                task->task_sg = kmalloc(sizeof(struct scatterlist) *
-                                       task->task_sg_nents, GFP_KERNEL);
+                                       task_sg_nents_padded, GFP_KERNEL);
                if (!task->task_sg) {
                        cmd->se_dev->transport->free_task(task);
                        return -ENOMEM;
                }
 
-               sg_init_table(task->task_sg, task->task_sg_nents);
+               sg_init_table(task->task_sg, task_sg_nents_padded);
 
                task_size = task->task_size;
 
@@ -4230,10 +4258,13 @@ static u32 transport_allocate_tasks(
        struct scatterlist *sgl,
        unsigned int sgl_nents)
 {
-       if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB)
+       if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) {
+               if (transport_cmd_get_valid_sectors(cmd) < 0)
+                       return -EINVAL;
+
                return transport_allocate_data_tasks(cmd, lba, data_direction,
                                                     sgl, sgl_nents);
-       else
+       else
                return transport_allocate_control_task(cmd);
 
 }
@@ -4726,6 +4757,13 @@ int transport_send_check_condition_and_sense(
         */
        switch (reason) {
        case TCM_NON_EXISTENT_LUN:
+               /* CURRENT ERROR */
+               buffer[offset] = 0x70;
+               /* ILLEGAL REQUEST */
+               buffer[offset+SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
+               /* LOGICAL UNIT NOT SUPPORTED */
+               buffer[offset+SPC_ASC_KEY_OFFSET] = 0x25;
+               break;
        case TCM_UNSUPPORTED_SCSI_OPCODE:
        case TCM_SECTOR_COUNT_TOO_MANY:
                /* CURRENT ERROR */
index 8781d1e423df31629d2d5eaa19e2db0e409059d8..b15879d43e227c734aadfe3ced7d367b86359812 100644 (file)
@@ -256,7 +256,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
        struct se_portal_group *se_tpg = &tpg->se_tpg;
        struct se_node_acl *se_acl;
 
-       spin_lock_bh(&se_tpg->acl_node_lock);
+       spin_lock_irq(&se_tpg->acl_node_lock);
        list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) {
                acl = container_of(se_acl, struct ft_node_acl, se_node_acl);
                pr_debug("acl %p port_name %llx\n",
@@ -270,7 +270,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
                        break;
                }
        }
-       spin_unlock_bh(&se_tpg->acl_node_lock);
+       spin_unlock_irq(&se_tpg->acl_node_lock);
        return found;
 }
 
@@ -655,9 +655,7 @@ static void __exit ft_exit(void)
        synchronize_rcu();
 }
 
-#ifdef MODULE
 MODULE_DESCRIPTION("FC TCM fabric driver " FT_VERSION);
 MODULE_LICENSE("GPL");
 module_init(ft_init);
 module_exit(ft_exit);
-#endif /* MODULE */
index 98b6e3bdb000bda730a69a05680d76b8584f1715..e809e9d4683c6d82197cbd569c12b1375df8be99 100644 (file)
@@ -446,8 +446,19 @@ static inline void legacy_pty_init(void) { }
 int pty_limit = NR_UNIX98_PTY_DEFAULT;
 static int pty_limit_min;
 static int pty_limit_max = NR_UNIX98_PTY_MAX;
+static int tty_count;
 static int pty_count;
 
+static inline void pty_inc_count(void)
+{
+       pty_count = (++tty_count) / 2;
+}
+
+static inline void pty_dec_count(void)
+{
+       pty_count = (--tty_count) / 2;
+}
+
 static struct cdev ptmx_cdev;
 
 static struct ctl_table pty_table[] = {
@@ -542,6 +553,7 @@ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver,
 
 static void pty_unix98_shutdown(struct tty_struct *tty)
 {
+       tty_driver_remove_tty(tty->driver, tty);
        /* We have our own method as we don't use the tty index */
        kfree(tty->termios);
 }
@@ -588,7 +600,8 @@ static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty)
         */
        tty_driver_kref_get(driver);
        tty->count++;
-       pty_count++;
+       pty_inc_count(); /* tty */
+       pty_inc_count(); /* tty->link */
        return 0;
 err_free_mem:
        deinitialize_tty_struct(o_tty);
@@ -602,7 +615,7 @@ err_free_tty:
 
 static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 {
-       pty_count--;
+       pty_dec_count();
 }
 
 static const struct tty_operations ptm_unix98_ops = {
index f2dfec82faf85d19f78d3ea4d755ae730b336911..7f50999eebc22f00fb482d2fda62d62f11262be5 100644 (file)
@@ -1819,6 +1819,8 @@ static void serial8250_backup_timeout(unsigned long data)
        unsigned int iir, ier = 0, lsr;
        unsigned long flags;
 
+       spin_lock_irqsave(&up->port.lock, flags);
+
        /*
         * Must disable interrupts or else we risk racing with the interrupt
         * based handler.
@@ -1836,10 +1838,8 @@ static void serial8250_backup_timeout(unsigned long data)
         * the "Diva" UART used on the management processor on many HP
         * ia64 and parisc boxes.
         */
-       spin_lock_irqsave(&up->port.lock, flags);
        lsr = serial_in(up, UART_LSR);
        up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
-       spin_unlock_irqrestore(&up->port.lock, flags);
        if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) &&
            (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) &&
            (lsr & UART_LSR_THRE)) {
@@ -1848,11 +1848,13 @@ static void serial8250_backup_timeout(unsigned long data)
        }
 
        if (!(iir & UART_IIR_NO_INT))
-               serial8250_handle_port(up);
+               transmit_chars(up);
 
        if (is_real_interrupt(up->port.irq))
                serial_out(up, UART_IER, ier);
 
+       spin_unlock_irqrestore(&up->port.lock, flags);
+
        /* Standard timer interval plus 0.2s to keep the port running */
        mod_timer(&up->timer,
                jiffies + uart_poll_timeout(&up->port) + HZ / 5);
index 6b887d90a20554683975d921c7ca492bba3ccf68..3abeca2a2a1bb08a85c8e5d76fe37afc22da5f4e 100644 (file)
@@ -1599,11 +1599,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
                .device         = 0x800D,
                .init           = pci_eg20t_init,
        },
-       {
-               .vendor         = 0x10DB,
-               .device         = 0x800D,
-               .init           = pci_eg20t_init,
-       },
        /*
         * Cronyx Omega PCI (PLX-chip based)
         */
@@ -4021,13 +4016,17 @@ static struct pci_device_id serial_pci_tbl[] = {
                0, 0, pbn_NETMOS9900_2s_115200 },
 
        /*
-        * Best Connectivity PCI Multi I/O cards
+        * Best Connectivity and Rosewill PCI Multi I/O cards
         */
 
        {       PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
                0xA000, 0x1000,
                0, 0, pbn_b0_1_115200 },
 
+       {       PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+               0xA000, 0x3002,
+               0, 0, pbn_b0_bt_2_115200 },
+
        {       PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
                0xA000, 0x3004,
                0, 0, pbn_b0_bt_4_115200 },
index fc301f6722e1bf4d2e9788f01e702cee6e711ddd..a2f236510ff1cd3f7fc92524f6096dc8678946f0 100644 (file)
@@ -109,6 +109,9 @@ static const struct pnp_device_id pnp_dev_table[] = {
        /* IBM */
        /* IBM Thinkpad 701 Internal Modem Voice */
        {       "IBM0033",              0       },
+       /* Intermec */
+       /* Intermec CV60 touchscreen port */
+       {       "PNP4972",              0       },
        /* Intertex */
        /* Intertex 28k8 33k6 Voice EXT PnP */
        {       "IXDC801",              0       },
index af9b7814965a461921d337047c068aed9bb5d051..b922f5d2e61e0cc52d4aea4ba9c81fca0b7fcc52 100644 (file)
@@ -1609,9 +1609,11 @@ static struct console atmel_console = {
 static int __init atmel_console_init(void)
 {
        if (atmel_default_console_device) {
-               add_preferred_console(ATMEL_DEVICENAME,
-                                     atmel_default_console_device->id, NULL);
-               atmel_init_port(&atmel_ports[atmel_default_console_device->id],
+               struct atmel_uart_data *pdata =
+                       atmel_default_console_device->dev.platform_data;
+
+               add_preferred_console(ATMEL_DEVICENAME, pdata->num, NULL);
+               atmel_init_port(&atmel_ports[pdata->num],
                                atmel_default_console_device);
                register_console(&atmel_console);
        }
index a1fe304f2f5204adf43c2a26171886198719ac91..d73aadd7a9ad6614385b62f92582c5c7b87c7b61 100644 (file)
@@ -340,5 +340,5 @@ module_exit(max3107_exit);
 
 MODULE_DESCRIPTION("MAX3107 driver");
 MODULE_AUTHOR("Aavamobile");
-MODULE_ALIAS("aava-max3107-spi");
+MODULE_ALIAS("spi:aava-max3107");
 MODULE_LICENSE("GPL v2");
index 750b4f627315e465e7c9eed0f7d9fd172f869c13..a8164601c0ead50c841f4a3a662f90df315e18ca 100644 (file)
@@ -1209,5 +1209,5 @@ module_exit(max3107_exit);
 
 MODULE_DESCRIPTION("MAX3107 driver");
 MODULE_AUTHOR("Aavamobile");
-MODULE_ALIAS("max3107-spi");
+MODULE_ALIAS("spi:max3107");
 MODULE_LICENSE("GPL v2");
index a764bf99743b0b5c6be4d46fed2952cbca278d94..23bc743f2a22f864674750fb2ea4d4f442ca174f 100644 (file)
@@ -917,4 +917,4 @@ module_init(serial_m3110_init);
 module_exit(serial_m3110_exit);
 
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("max3110-uart");
+MODULE_ALIAS("spi:max3110-uart");
index c37df8d0fa2819261dffccc5bc4d0180b9531f49..5e713d3ef1f47c49eaa93f8d4ced09ce87b89c1e 100644 (file)
@@ -806,8 +806,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 
        serial_omap_set_mctrl(&up->port, up->port.mctrl);
        /* Software Flow Control Configuration */
-       if (termios->c_iflag & (IXON | IXOFF))
-               serial_omap_configure_xonxoff(up, termios);
+       serial_omap_configure_xonxoff(up, termios);
 
        spin_unlock_irqrestore(&up->port.lock, flags);
        dev_dbg(up->port.dev, "serial_omap_set_termios+%d\n", up->pdev->id);
index 846dfcd3ce0d28a6ee882278a90779c3c6427e21..b46218d679e21e4561657b8cf5508e7138d5e03e 100644 (file)
@@ -598,7 +598,8 @@ static void pch_request_dma(struct uart_port *port)
        dma_cap_zero(mask);
        dma_cap_set(DMA_SLAVE, mask);
 
-       dma_dev = pci_get_bus_and_slot(2, PCI_DEVFN(0xa, 0)); /* Get DMA's dev
+       dma_dev = pci_get_bus_and_slot(priv->pdev->bus->number,
+                                      PCI_DEVFN(0xa, 0)); /* Get DMA's dev
                                                                information */
        /* Set Tx DMA */
        param = &priv->param_tx;
index afc629423152bfa606b27bb11094713dd7936988..6edafb5ace183bfb0c30eff6f00f7f673ea022c9 100644 (file)
@@ -1225,15 +1225,19 @@ static const struct dev_pm_ops s3c24xx_serial_pm_ops = {
        .suspend = s3c24xx_serial_suspend,
        .resume = s3c24xx_serial_resume,
 };
+#define SERIAL_SAMSUNG_PM_OPS  (&s3c24xx_serial_pm_ops)
+
 #else /* !CONFIG_PM_SLEEP */
-#define s3c24xx_serial_pm_ops  NULL
+
+#define SERIAL_SAMSUNG_PM_OPS  NULL
 #endif /* CONFIG_PM_SLEEP */
 
 int s3c24xx_serial_init(struct platform_driver *drv,
                        struct s3c24xx_uart_info *info)
 {
        dbg("s3c24xx_serial_init(%p,%p)\n", drv, info);
-       drv->driver.pm = &s3c24xx_serial_pm_ops;
+
+       drv->driver.pm = SERIAL_SAMSUNG_PM_OPS;
 
        return platform_driver_register(drv);
 }
index db7912cb7ae041a71a901437d3c835ee242530a9..a3efbea5dbba6b88f0068470c4e0ad019fa3cd5d 100644 (file)
@@ -200,6 +200,11 @@ static int uart_startup(struct tty_struct *tty, struct uart_state *state, int in
                clear_bit(TTY_IO_ERROR, &tty->flags);
        }
 
+       /*
+        * This is to allow setserial on this port. People may want to set
+        * port/irq/type and then reconfigure the port properly if it failed
+        * now.
+        */
        if (retval && capable(CAP_SYS_ADMIN))
                retval = 0;
 
index 2ec57b2fb2783a232df5aad9df1a276a6f2edd1e..5ea6ec3442e64d2affe5f54d9871f1561624c2f9 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/ctype.h>
 #include <linux/err.h>
 #include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 
@@ -95,6 +96,12 @@ struct sci_port {
 #endif
 
        struct notifier_block           freq_transition;
+
+#ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
+       unsigned short saved_smr;
+       unsigned short saved_fcr;
+       unsigned char saved_brr;
+#endif
 };
 
 /* Function prototypes */
@@ -1076,7 +1083,7 @@ static unsigned int sci_get_mctrl(struct uart_port *port)
        /* This routine is used for getting signals of: DTR, DCD, DSR, RI,
           and CTS/RTS */
 
-       return TIOCM_DTR | TIOCM_RTS | TIOCM_DSR;
+       return TIOCM_DTR | TIOCM_RTS | TIOCM_CTS | TIOCM_DSR;
 }
 
 #ifdef CONFIG_SERIAL_SH_SCI_DMA
@@ -1633,11 +1640,25 @@ static unsigned int sci_scbrr_calc(unsigned int algo_id, unsigned int bps,
        return ((freq + 16 * bps) / (32 * bps) - 1);
 }
 
+static void sci_reset(struct uart_port *port)
+{
+       unsigned int status;
+
+       do {
+               status = sci_in(port, SCxSR);
+       } while (!(status & SCxSR_TEND(port)));
+
+       sci_out(port, SCSCR, 0x00);     /* TE=0, RE=0, CKE1=0 */
+
+       if (port->type != PORT_SCI)
+               sci_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST);
+}
+
 static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
                            struct ktermios *old)
 {
        struct sci_port *s = to_sci_port(port);
-       unsigned int status, baud, smr_val, max_baud;
+       unsigned int baud, smr_val, max_baud;
        int t = -1;
        u16 scfcr = 0;
 
@@ -1657,14 +1678,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 
        sci_port_enable(s);
 
-       do {
-               status = sci_in(port, SCxSR);
-       } while (!(status & SCxSR_TEND(port)));
-
-       sci_out(port, SCSCR, 0x00);     /* TE=0, RE=0, CKE1=0 */
-
-       if (port->type != PORT_SCI)
-               sci_out(port, SCFCR, scfcr | SCFCR_RFRST | SCFCR_TFRST);
+       sci_reset(port);
 
        smr_val = sci_in(port, SCSMR) & 3;
 
@@ -1913,6 +1927,7 @@ static int __devinit sci_init_single(struct platform_device *dev,
 
                port->dev = &dev->dev;
 
+               pm_runtime_irq_safe(&dev->dev);
                pm_runtime_enable(&dev->dev);
        }
 
@@ -2036,7 +2051,8 @@ static int __devinit serial_console_setup(struct console *co, char *options)
        if (options)
                uart_parse_options(options, &baud, &parity, &bits, &flow);
 
-       /* TODO: disable clock */
+       sci_port_disable(sci_port);
+
        return uart_set_options(port, co, baud, parity, bits, flow);
 }
 
@@ -2079,6 +2095,36 @@ static int __devinit sci_probe_earlyprintk(struct platform_device *pdev)
        return 0;
 }
 
+#define uart_console(port)     ((port)->cons->index == (port)->line)
+
+static int sci_runtime_suspend(struct device *dev)
+{
+       struct sci_port *sci_port = dev_get_drvdata(dev);
+       struct uart_port *port = &sci_port->port;
+
+       if (uart_console(port)) {
+               sci_port->saved_smr = sci_in(port, SCSMR);
+               sci_port->saved_brr = sci_in(port, SCBRR);
+               sci_port->saved_fcr = sci_in(port, SCFCR);
+       }
+       return 0;
+}
+
+static int sci_runtime_resume(struct device *dev)
+{
+       struct sci_port *sci_port = dev_get_drvdata(dev);
+       struct uart_port *port = &sci_port->port;
+
+       if (uart_console(port)) {
+               sci_reset(port);
+               sci_out(port, SCSMR, sci_port->saved_smr);
+               sci_out(port, SCBRR, sci_port->saved_brr);
+               sci_out(port, SCFCR, sci_port->saved_fcr);
+               sci_out(port, SCSCR, sci_port->cfg->scscr);
+       }
+       return 0;
+}
+
 #define SCI_CONSOLE    (&serial_console)
 
 #else
@@ -2088,6 +2134,8 @@ static inline int __devinit sci_probe_earlyprintk(struct platform_device *pdev)
 }
 
 #define SCI_CONSOLE    NULL
+#define sci_runtime_suspend    NULL
+#define sci_runtime_resume     NULL
 
 #endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */
 
@@ -2203,6 +2251,8 @@ static int sci_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops sci_dev_pm_ops = {
+       .runtime_suspend = sci_runtime_suspend,
+       .runtime_resume = sci_runtime_resume,
        .suspend        = sci_suspend,
        .resume         = sci_resume,
 };
index c327218cad44c91ab680cf0dc46fcd4933cc8347..9af9f0879a24542860ab8f934d025acf705174ca 100644 (file)
@@ -235,7 +235,7 @@ static inline void *qe2cpu_addr(dma_addr_t addr, struct uart_qe_port *qe_port)
                return qe_port->bd_virt + (addr - qe_port->bd_dma_addr);
 
        /* something nasty happened */
-       printk(KERN_ERR "%s: addr=%x\n", __func__, addr);
+       printk(KERN_ERR "%s: addr=%llx\n", __func__, (u64)addr);
        BUG();
        return NULL;
 }
index 150e4f747c7dc4bc822a706a76d06afefb80424d..4f1fc81112e6b5ae6521e39cf19295e1f3a0c6b5 100644 (file)
@@ -1295,8 +1295,7 @@ static int tty_driver_install_tty(struct tty_driver *driver,
  *
  *     Locking: tty_mutex for now
  */
-static void tty_driver_remove_tty(struct tty_driver *driver,
-                                               struct tty_struct *tty)
+void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty)
 {
        if (driver->ops->remove)
                driver->ops->remove(driver, tty);
index 8669ba3fe79486ffe487e6b6b266ff4767afe670..73cbbd85219fa5525b2968443d61a5c6f5086abd 100644 (file)
@@ -1775,6 +1775,8 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev,
                struct usb_interface *iface = usb_ifnum_to_if(udev,
                                cur_alt->desc.bInterfaceNumber);
 
+               if (!iface)
+                       return -EINVAL;
                if (iface->resetting_device) {
                        /*
                         * The USB core just reset the device, so the xHCI host
index 8f8d3f6cd89edf22fb9d96a67f89bc3fb56b0d5f..8f3eab1af885847fa69ec343b0039a0df4faae39 100644 (file)
@@ -434,6 +434,7 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
                            config_ep_by_speed(gadget, f, fp->out_ep)) {
                                fp->in_ep->desc = NULL;
                                fp->out_ep->desc = NULL;
+                               spin_unlock(&port->lock);
                                return -EINVAL;
                        }
                        usb_ep_enable(fp->out_ep);
index e051b30c1847804f6b94e188521a865e036108ea..4c32cb19b405f7f5c4fc589e37acc3455ab15448 100644 (file)
@@ -343,7 +343,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
        u32                     temp;
        u32                     power_okay;
        int                     i;
-       u8                      resume_needed = 0;
+       unsigned long           resume_needed = 0;
 
        if (time_before (jiffies, ehci->next_statechange))
                msleep(5);
@@ -416,7 +416,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
                if (test_bit(i, &ehci->bus_suspended) &&
                                (temp & PORT_SUSPEND)) {
                        temp |= PORT_RESUME;
-                       resume_needed = 1;
+                       set_bit(i, &resume_needed);
                }
                ehci_writel(ehci, temp, &ehci->regs->port_status [i]);
        }
@@ -431,8 +431,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
        i = HCS_N_PORTS (ehci->hcs_params);
        while (i--) {
                temp = ehci_readl(ehci, &ehci->regs->port_status [i]);
-               if (test_bit(i, &ehci->bus_suspended) &&
-                               (temp & PORT_SUSPEND)) {
+               if (test_bit(i, &resume_needed)) {
                        temp &= ~(PORT_RWC_BITS | PORT_RESUME);
                        ehci_writel(ehci, temp, &ehci->regs->port_status [i]);
                        ehci_vdbg (ehci, "resumed port %d\n", i + 1);
index b3958b3d31634f59c9c067712c7f4cf7c2959c0a..9e77f1c8bdbdc1d62e31565f9c8303e292f8026f 100644 (file)
@@ -86,6 +86,7 @@ static int __devinit s5p_ehci_probe(struct platform_device *pdev)
                goto fail_hcd;
        }
 
+       s5p_ehci->hcd = hcd;
        s5p_ehci->clk = clk_get(&pdev->dev, "usbhost");
 
        if (IS_ERR(s5p_ehci->clk)) {
index 0be788cc2fdbe6e40920e5f77efa7590475bffa8..1e96d1f1fe6befacfc2a37bb84e1f48b734b2724 100644 (file)
@@ -463,11 +463,12 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                        && (temp & PORT_POWER))
                                status |= USB_PORT_STAT_SUSPEND;
                }
-               if ((temp & PORT_PLS_MASK) == XDEV_RESUME) {
+               if ((temp & PORT_PLS_MASK) == XDEV_RESUME &&
+                               !DEV_SUPERSPEED(temp)) {
                        if ((temp & PORT_RESET) || !(temp & PORT_PE))
                                goto error;
-                       if (!DEV_SUPERSPEED(temp) && time_after_eq(jiffies,
-                                               bus_state->resume_done[wIndex])) {
+                       if (time_after_eq(jiffies,
+                                       bus_state->resume_done[wIndex])) {
                                xhci_dbg(xhci, "Resume USB2 port %d\n",
                                        wIndex + 1);
                                bus_state->resume_done[wIndex] = 0;
@@ -487,6 +488,14 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                xhci_ring_device(xhci, slot_id);
                                bus_state->port_c_suspend |= 1 << wIndex;
                                bus_state->suspended_ports &= ~(1 << wIndex);
+                       } else {
+                               /*
+                                * The resume has been signaling for less than
+                                * 20ms. Report the port status as SUSPEND,
+                                * let the usbcore check port status again
+                                * and clear resume signaling later.
+                                */
+                               status |= USB_PORT_STAT_SUSPEND;
                        }
                }
                if ((temp & PORT_PLS_MASK) == XDEV_U0
@@ -664,7 +673,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                        xhci_dbg(xhci, "PORTSC %04x\n", temp);
                        if (temp & PORT_RESET)
                                goto error;
-                       if (temp & XDEV_U3) {
+                       if ((temp & PORT_PLS_MASK) == XDEV_U3) {
                                if ((temp & PORT_PE) == 0)
                                        goto error;
 
index 7113d16e2d3a40f1febae01cedd8bf2dd2308980..54139a2f06ce9700933f96255fc0a680c07b9a80 100644 (file)
@@ -514,8 +514,12 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
                        (unsigned long long) addr);
 }
 
+/* flip_cycle means flip the cycle bit of all but the first and last TRB.
+ * (The last TRB actually points to the ring enqueue pointer, which is not part
+ * of this TD.)  This is used to remove partially enqueued isoc TDs from a ring.
+ */
 static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
-               struct xhci_td *cur_td)
+               struct xhci_td *cur_td, bool flip_cycle)
 {
        struct xhci_segment *cur_seg;
        union xhci_trb *cur_trb;
@@ -528,6 +532,12 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
                         * leave the pointers intact.
                         */
                        cur_trb->generic.field[3] &= cpu_to_le32(~TRB_CHAIN);
+                       /* Flip the cycle bit (link TRBs can't be the first
+                        * or last TRB).
+                        */
+                       if (flip_cycle)
+                               cur_trb->generic.field[3] ^=
+                                       cpu_to_le32(TRB_CYCLE);
                        xhci_dbg(xhci, "Cancel (unchain) link TRB\n");
                        xhci_dbg(xhci, "Address = %p (0x%llx dma); "
                                        "in seg %p (0x%llx dma)\n",
@@ -541,6 +551,11 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
                        cur_trb->generic.field[2] = 0;
                        /* Preserve only the cycle bit of this TRB */
                        cur_trb->generic.field[3] &= cpu_to_le32(TRB_CYCLE);
+                       /* Flip the cycle bit except on the first or last TRB */
+                       if (flip_cycle && cur_trb != cur_td->first_trb &&
+                                       cur_trb != cur_td->last_trb)
+                               cur_trb->generic.field[3] ^=
+                                       cpu_to_le32(TRB_CYCLE);
                        cur_trb->generic.field[3] |= cpu_to_le32(
                                TRB_TYPE(TRB_TR_NOOP));
                        xhci_dbg(xhci, "Cancel TRB %p (0x%llx dma) "
@@ -719,14 +734,14 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
                                        cur_td->urb->stream_id,
                                        cur_td, &deq_state);
                else
-                       td_to_noop(xhci, ep_ring, cur_td);
+                       td_to_noop(xhci, ep_ring, cur_td, false);
 remove_finished_td:
                /*
                 * The event handler won't see a completion for this TD anymore,
                 * so remove it from the endpoint ring's TD list.  Keep it in
                 * the cancelled TD list for URB completion later.
                 */
-               list_del(&cur_td->td_list);
+               list_del_init(&cur_td->td_list);
        }
        last_unlinked_td = cur_td;
        xhci_stop_watchdog_timer_in_irq(xhci, ep);
@@ -754,7 +769,7 @@ remove_finished_td:
        do {
                cur_td = list_entry(ep->cancelled_td_list.next,
                                struct xhci_td, cancelled_td_list);
-               list_del(&cur_td->cancelled_td_list);
+               list_del_init(&cur_td->cancelled_td_list);
 
                /* Clean up the cancelled URB */
                /* Doesn't matter what we pass for status, since the core will
@@ -862,9 +877,9 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
                                cur_td = list_first_entry(&ring->td_list,
                                                struct xhci_td,
                                                td_list);
-                               list_del(&cur_td->td_list);
+                               list_del_init(&cur_td->td_list);
                                if (!list_empty(&cur_td->cancelled_td_list))
-                                       list_del(&cur_td->cancelled_td_list);
+                                       list_del_init(&cur_td->cancelled_td_list);
                                xhci_giveback_urb_in_irq(xhci, cur_td,
                                                -ESHUTDOWN, "killed");
                        }
@@ -873,7 +888,7 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
                                                &temp_ep->cancelled_td_list,
                                                struct xhci_td,
                                                cancelled_td_list);
-                               list_del(&cur_td->cancelled_td_list);
+                               list_del_init(&cur_td->cancelled_td_list);
                                xhci_giveback_urb_in_irq(xhci, cur_td,
                                                -ESHUTDOWN, "killed");
                        }
@@ -1565,10 +1580,10 @@ td_cleanup:
                        else
                                *status = 0;
                }
-               list_del(&td->td_list);
+               list_del_init(&td->td_list);
                /* Was this TD slated to be cancelled but completed anyway? */
                if (!list_empty(&td->cancelled_td_list))
-                       list_del(&td->cancelled_td_list);
+                       list_del_init(&td->cancelled_td_list);
 
                urb_priv->td_cnt++;
                /* Giveback the urb when all the tds are completed */
@@ -2500,11 +2515,8 @@ static int prepare_transfer(struct xhci_hcd *xhci,
 
        if (td_index == 0) {
                ret = usb_hcd_link_urb_to_ep(bus_to_hcd(urb->dev->bus), urb);
-               if (unlikely(ret)) {
-                       xhci_urb_free_priv(xhci, urb_priv);
-                       urb->hcpriv = NULL;
+               if (unlikely(ret))
                        return ret;
-               }
        }
 
        td->urb = urb;
@@ -2672,6 +2684,10 @@ static u32 xhci_v1_0_td_remainder(int running_total, int trb_buff_len,
 {
        int packets_transferred;
 
+       /* One TRB with a zero-length data packet. */
+       if (running_total == 0 && trb_buff_len == 0)
+               return 0;
+
        /* All the TRB queueing functions don't count the current TRB in
         * running_total.
         */
@@ -3113,20 +3129,15 @@ static int count_isoc_trbs_needed(struct xhci_hcd *xhci,
                struct urb *urb, int i)
 {
        int num_trbs = 0;
-       u64 addr, td_len, running_total;
+       u64 addr, td_len;
 
        addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset);
        td_len = urb->iso_frame_desc[i].length;
 
-       running_total = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
-       running_total &= TRB_MAX_BUFF_SIZE - 1;
-       if (running_total != 0)
-               num_trbs++;
-
-       while (running_total < td_len) {
+       num_trbs = DIV_ROUND_UP(td_len + (addr & (TRB_MAX_BUFF_SIZE - 1)),
+                       TRB_MAX_BUFF_SIZE);
+       if (num_trbs == 0)
                num_trbs++;
-               running_total += TRB_MAX_BUFF_SIZE;
-       }
 
        return num_trbs;
 }
@@ -3226,6 +3237,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
        start_trb = &ep_ring->enqueue->generic;
        start_cycle = ep_ring->cycle_state;
 
+       urb_priv = urb->hcpriv;
        /* Queue the first TRB, even if it's zero-length */
        for (i = 0; i < num_tds; i++) {
                unsigned int total_packet_count;
@@ -3237,9 +3249,11 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                addr = start_addr + urb->iso_frame_desc[i].offset;
                td_len = urb->iso_frame_desc[i].length;
                td_remain_len = td_len;
-               /* FIXME: Ignoring zero-length packets, can those happen? */
                total_packet_count = roundup(td_len,
                                le16_to_cpu(urb->ep->desc.wMaxPacketSize));
+               /* A zero-length transfer still involves at least one packet. */
+               if (total_packet_count == 0)
+                       total_packet_count++;
                burst_count = xhci_get_burst_count(xhci, urb->dev, urb,
                                total_packet_count);
                residue = xhci_get_last_burst_packet_count(xhci,
@@ -3249,12 +3263,13 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
                ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
                                urb->stream_id, trbs_per_td, urb, i, mem_flags);
-               if (ret < 0)
-                       return ret;
+               if (ret < 0) {
+                       if (i == 0)
+                               return ret;
+                       goto cleanup;
+               }
 
-               urb_priv = urb->hcpriv;
                td = urb_priv->td[i];
-
                for (j = 0; j < trbs_per_td; j++) {
                        u32 remainder = 0;
                        field = TRB_TBC(burst_count) | TRB_TLBPC(residue);
@@ -3344,6 +3359,27 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
        giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id,
                        start_cycle, start_trb);
        return 0;
+cleanup:
+       /* Clean up a partially enqueued isoc transfer. */
+
+       for (i--; i >= 0; i--)
+               list_del_init(&urb_priv->td[i]->td_list);
+
+       /* Use the first TD as a temporary variable to turn the TDs we've queued
+        * into No-ops with a software-owned cycle bit. That way the hardware
+        * won't accidentally start executing bogus TDs when we partially
+        * overwrite them.  td->first_trb and td->start_seg are already set.
+        */
+       urb_priv->td[0]->last_trb = ep_ring->enqueue;
+       /* Every TRB except the first & last will have its cycle bit flipped. */
+       td_to_noop(xhci, ep_ring, urb_priv->td[0], true);
+
+       /* Reset the ring enqueue back to the first TRB and its cycle bit. */
+       ep_ring->enqueue = urb_priv->td[0]->first_trb;
+       ep_ring->enq_seg = urb_priv->td[0]->start_seg;
+       ep_ring->cycle_state = start_cycle;
+       usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb);
+       return ret;
 }
 
 /*
index 1c4432d8fc1048f2e3bb838b63a0be84201f2a82..3a0f695138f4195d4caf988c2fb0266ca7678e5f 100644 (file)
@@ -1085,8 +1085,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                if (urb->dev->speed == USB_SPEED_FULL) {
                        ret = xhci_check_maxpacket(xhci, slot_id,
                                        ep_index, urb);
-                       if (ret < 0)
+                       if (ret < 0) {
+                               xhci_urb_free_priv(xhci, urb_priv);
+                               urb->hcpriv = NULL;
                                return ret;
+                       }
                }
 
                /* We have a spinlock and interrupts disabled, so we must pass
@@ -1097,6 +1100,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                        goto dying;
                ret = xhci_queue_ctrl_tx(xhci, GFP_ATOMIC, urb,
                                slot_id, ep_index);
+               if (ret)
+                       goto free_priv;
                spin_unlock_irqrestore(&xhci->lock, flags);
        } else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) {
                spin_lock_irqsave(&xhci->lock, flags);
@@ -1117,6 +1122,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                        ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb,
                                        slot_id, ep_index);
                }
+               if (ret)
+                       goto free_priv;
                spin_unlock_irqrestore(&xhci->lock, flags);
        } else if (usb_endpoint_xfer_int(&urb->ep->desc)) {
                spin_lock_irqsave(&xhci->lock, flags);
@@ -1124,6 +1131,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                        goto dying;
                ret = xhci_queue_intr_tx(xhci, GFP_ATOMIC, urb,
                                slot_id, ep_index);
+               if (ret)
+                       goto free_priv;
                spin_unlock_irqrestore(&xhci->lock, flags);
        } else {
                spin_lock_irqsave(&xhci->lock, flags);
@@ -1131,18 +1140,22 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                        goto dying;
                ret = xhci_queue_isoc_tx_prepare(xhci, GFP_ATOMIC, urb,
                                slot_id, ep_index);
+               if (ret)
+                       goto free_priv;
                spin_unlock_irqrestore(&xhci->lock, flags);
        }
 exit:
        return ret;
 dying:
-       xhci_urb_free_priv(xhci, urb_priv);
-       urb->hcpriv = NULL;
        xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for "
                        "non-responsive xHCI host.\n",
                        urb->ep->desc.bEndpointAddress, urb);
+       ret = -ESHUTDOWN;
+free_priv:
+       xhci_urb_free_priv(xhci, urb_priv);
+       urb->hcpriv = NULL;
        spin_unlock_irqrestore(&xhci->lock, flags);
-       return -ESHUTDOWN;
+       return ret;
 }
 
 /* Get the right ring for the given URB.
@@ -1239,6 +1252,13 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
        if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) {
                xhci_dbg(xhci, "HW died, freeing TD.\n");
                urb_priv = urb->hcpriv;
+               for (i = urb_priv->td_cnt; i < urb_priv->length; i++) {
+                       td = urb_priv->td[i];
+                       if (!list_empty(&td->td_list))
+                               list_del_init(&td->td_list);
+                       if (!list_empty(&td->cancelled_td_list))
+                               list_del_init(&td->cancelled_td_list);
+               }
 
                usb_hcd_unlink_urb_from_ep(hcd, urb);
                spin_unlock_irqrestore(&xhci->lock, flags);
index ae8c396177434a7cb6ea10e67893cee00a8ff5a3..5e7cfba5b079b02d781710c106cfbbe38f37a358 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
 
 #include <asm/cacheflush.h>
 
index 149f3f310a0a1f9903793ca5f56daaf1038a4024..318fb4e8a8850cb21789a4d9f9d89d23bd2fbd12 100644 (file)
@@ -226,8 +226,10 @@ static int cppi_controller_stop(struct dma_controller *c)
        struct cppi             *controller;
        void __iomem            *tibase;
        int                     i;
+       struct musb             *musb;
 
        controller = container_of(c, struct cppi, controller);
+       musb = controller->musb;
 
        tibase = controller->tibase;
        /* DISABLE INDIVIDUAL CHANNEL Interrupts */
@@ -289,9 +291,11 @@ cppi_channel_allocate(struct dma_controller *c,
        u8                      index;
        struct cppi_channel     *cppi_ch;
        void __iomem            *tibase;
+       struct musb             *musb;
 
        controller = container_of(c, struct cppi, controller);
        tibase = controller->tibase;
+       musb = controller->musb;
 
        /* ep0 doesn't use DMA; remember cppi indices are 0..N-1 */
        index = ep->epnum - 1;
@@ -339,7 +343,8 @@ static void cppi_channel_release(struct dma_channel *channel)
        c = container_of(channel, struct cppi_channel, channel);
        tibase = c->controller->tibase;
        if (!c->hw_ep)
-               dev_dbg(musb->controller, "releasing idle DMA channel %p\n", c);
+               dev_dbg(c->controller->musb->controller,
+                       "releasing idle DMA channel %p\n", c);
        else if (!c->transmit)
                core_rxirq_enable(tibase, c->index + 1);
 
@@ -357,10 +362,11 @@ cppi_dump_rx(int level, struct cppi_channel *c, const char *tag)
 
        musb_ep_select(base, c->index + 1);
 
-       DBG(level, "RX DMA%d%s: %d left, csr %04x, "
-                       "%08x H%08x S%08x C%08x, "
-                       "B%08x L%08x %08x .. %08x"
-                       "\n",
+       dev_dbg(c->controller->musb->controller,
+               "RX DMA%d%s: %d left, csr %04x, "
+               "%08x H%08x S%08x C%08x, "
+               "B%08x L%08x %08x .. %08x"
+               "\n",
                c->index, tag,
                musb_readl(c->controller->tibase,
                        DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index),
@@ -387,10 +393,11 @@ cppi_dump_tx(int level, struct cppi_channel *c, const char *tag)
 
        musb_ep_select(base, c->index + 1);
 
-       DBG(level, "TX DMA%d%s: csr %04x, "
-                       "H%08x S%08x C%08x %08x, "
-                       "F%08x L%08x .. %08x"
-                       "\n",
+       dev_dbg(c->controller->musb->controller,
+               "TX DMA%d%s: csr %04x, "
+               "H%08x S%08x C%08x %08x, "
+               "F%08x L%08x .. %08x"
+               "\n",
                c->index, tag,
                musb_readw(c->hw_ep->regs, MUSB_TXCSR),
 
@@ -1022,6 +1029,7 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
        int                             i;
        dma_addr_t                      safe2ack;
        void __iomem                    *regs = rx->hw_ep->regs;
+       struct musb                     *musb = cppi->musb;
 
        cppi_dump_rx(6, rx, "/K");
 
index 668eeef601ae94f093fa925706afcbcc2b1f5e37..b3c065ab9dbc2f9aace5b36939390761fa0c5218 100644 (file)
@@ -172,7 +172,8 @@ enum musb_g_ep0_state {
 #endif
 
 /* TUSB mapping: "flat" plus ep0 special cases */
-#if    defined(CONFIG_USB_MUSB_TUSB6010)
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 #define musb_ep_select(_mbase, _epnum) \
        musb_writeb((_mbase), MUSB_INDEX, (_epnum))
 #define        MUSB_EP_OFFSET                  MUSB_TUSB_OFFSET
@@ -241,7 +242,8 @@ struct musb_hw_ep {
        void __iomem            *fifo;
        void __iomem            *regs;
 
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
        void __iomem            *conf;
 #endif
 
@@ -258,7 +260,8 @@ struct musb_hw_ep {
        struct dma_channel      *tx_channel;
        struct dma_channel      *rx_channel;
 
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
        /* TUSB has "asynchronous" and "synchronous" dma modes */
        dma_addr_t              fifo_async;
        dma_addr_t              fifo_sync;
@@ -356,7 +359,8 @@ struct musb {
        void __iomem            *ctrl_base;
        void __iomem            *mregs;
 
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
        dma_addr_t              async;
        dma_addr_t              sync;
        void __iomem            *sync_va;
index 8c41a2e6ea7702e8906771d3bf5e7810ad412937..e81820370d6f9cab155d1fac9ef83d4c8653c723 100644 (file)
@@ -1856,6 +1856,7 @@ int __init musb_gadget_setup(struct musb *musb)
 
        return 0;
 err:
+       musb->g.dev.parent = NULL;
        device_unregister(&musb->g.dev);
        return status;
 }
@@ -1863,7 +1864,8 @@ err:
 void musb_gadget_cleanup(struct musb *musb)
 {
        usb_del_gadget_udc(&musb->g);
-       device_unregister(&musb->g.dev);
+       if (musb->g.dev.parent)
+               device_unregister(&musb->g.dev);
 }
 
 /*
index 82410703dcd3c1d87e688007c52cd737675d03d6..03f2655af290758cb708a67f832abd6257df2894 100644 (file)
 #define MUSB_TESTMODE          0x0F    /* 8 bit */
 
 /* Get offset for a given FIFO from musb->mregs */
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) ||       \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 #define MUSB_FIFO_OFFSET(epnum)        (0x200 + ((epnum) * 0x20))
 #else
 #define MUSB_FIFO_OFFSET(epnum)        (0x20 + ((epnum) * 4))
 #define MUSB_FLAT_OFFSET(_epnum, _offset)      \
        (0x100 + (0x10*(_epnum)) + (_offset))
 
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) ||       \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 /* TUSB6010 EP0 configuration register is special */
 #define MUSB_TUSB_OFFSET(_epnum, _offset)      \
        (0x10 + _offset)
index 9eec41fbf3a433e0ac250035a997df4440bef1ef..ec1480191f78752a1ff16b09566f3f4d2d6512b1 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/prefetch.h>
 #include <linux/usb.h>
 #include <linux/irq.h>
 #include <linux/platform_device.h>
index 07c8a73dfe41c4d565f8f36b2134bdc373e7b5bb..b67b4bc596c18953b5c9abf997fb3f25605be1fe 100644 (file)
@@ -20,6 +20,7 @@
 #include <plat/mux.h>
 
 #include "musb_core.h"
+#include "tusb6010.h"
 
 #define to_chdat(c)            ((struct tusb_omap_dma_ch *)(c)->private_data)
 
index cecace4118327c8a148ab976d1c8dfb234e7bf57..ef4333f4bbe021178501d14ae4edbcd5562c9dbe 100644 (file)
@@ -65,7 +65,8 @@ static void ux500_tx_work(struct work_struct *data)
        struct musb *musb = hw_ep->musb;
        unsigned long flags;
 
-       DBG(4, "DMA tx transfer done on hw_ep=%d\n", hw_ep->epnum);
+       dev_dbg(musb->controller, "DMA tx transfer done on hw_ep=%d\n",
+               hw_ep->epnum);
 
        spin_lock_irqsave(&musb->lock, flags);
        ux500_channel->channel.actual_len = ux500_channel->cur_len;
@@ -84,7 +85,8 @@ static void ux500_rx_work(struct work_struct *data)
        struct musb *musb = hw_ep->musb;
        unsigned long flags;
 
-       DBG(4, "DMA rx transfer done on hw_ep=%d\n", hw_ep->epnum);
+       dev_dbg(musb->controller, "DMA rx transfer done on hw_ep=%d\n",
+               hw_ep->epnum);
 
        spin_lock_irqsave(&musb->lock, flags);
        ux500_channel->channel.actual_len = ux500_channel->cur_len;
@@ -116,9 +118,11 @@ static bool ux500_configure_channel(struct dma_channel *channel,
        enum dma_slave_buswidth addr_width;
        dma_addr_t usb_fifo_addr = (MUSB_FIFO_OFFSET(hw_ep->epnum) +
                                        ux500_channel->controller->phy_base);
+       struct musb *musb = ux500_channel->controller->private_data;
 
-       DBG(4, "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n",
-                       packet_sz, mode, dma_addr, len, ux500_channel->is_tx);
+       dev_dbg(musb->controller,
+               "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n",
+               packet_sz, mode, dma_addr, len, ux500_channel->is_tx);
 
        ux500_channel->cur_len = len;
 
@@ -133,15 +137,13 @@ static bool ux500_configure_channel(struct dma_channel *channel,
                                        DMA_SLAVE_BUSWIDTH_4_BYTES;
 
        slave_conf.direction = direction;
-       if (direction == DMA_FROM_DEVICE) {
-               slave_conf.src_addr = usb_fifo_addr;
-               slave_conf.src_addr_width = addr_width;
-               slave_conf.src_maxburst = 16;
-       } else {
-               slave_conf.dst_addr = usb_fifo_addr;
-               slave_conf.dst_addr_width = addr_width;
-               slave_conf.dst_maxburst = 16;
-       }
+       slave_conf.src_addr = usb_fifo_addr;
+       slave_conf.src_addr_width = addr_width;
+       slave_conf.src_maxburst = 16;
+       slave_conf.dst_addr = usb_fifo_addr;
+       slave_conf.dst_addr_width = addr_width;
+       slave_conf.dst_maxburst = 16;
+
        dma_chan->device->device_control(dma_chan, DMA_SLAVE_CONFIG,
                                             (unsigned long) &slave_conf);
 
@@ -166,6 +168,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
        struct ux500_dma_controller *controller = container_of(c,
                        struct ux500_dma_controller, controller);
        struct ux500_dma_channel *ux500_channel = NULL;
+       struct musb *musb = controller->private_data;
        u8 ch_num = hw_ep->epnum - 1;
        u32 max_ch;
 
@@ -192,7 +195,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
        ux500_channel->hw_ep = hw_ep;
        ux500_channel->is_allocated = 1;
 
-       DBG(7, "hw_ep=%d, is_tx=0x%x, channel=%d\n",
+       dev_dbg(musb->controller, "hw_ep=%d, is_tx=0x%x, channel=%d\n",
                hw_ep->epnum, is_tx, ch_num);
 
        return &(ux500_channel->channel);
@@ -201,8 +204,9 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
 static void ux500_dma_channel_release(struct dma_channel *channel)
 {
        struct ux500_dma_channel *ux500_channel = channel->private_data;
+       struct musb *musb = ux500_channel->controller->private_data;
 
-       DBG(7, "channel=%d\n", ux500_channel->ch_num);
+       dev_dbg(musb->controller, "channel=%d\n", ux500_channel->ch_num);
 
        if (ux500_channel->is_allocated) {
                ux500_channel->is_allocated = 0;
@@ -252,8 +256,8 @@ static int ux500_dma_channel_abort(struct dma_channel *channel)
        void __iomem *epio = musb->endpoints[ux500_channel->hw_ep->epnum].regs;
        u16 csr;
 
-       DBG(4, "channel=%d, is_tx=%d\n", ux500_channel->ch_num,
-                                               ux500_channel->is_tx);
+       dev_dbg(musb->controller, "channel=%d, is_tx=%d\n",
+               ux500_channel->ch_num, ux500_channel->is_tx);
 
        if (channel->status == MUSB_DMA_STATUS_BUSY) {
                if (ux500_channel->is_tx) {
index 78a2cf9551cc725d220c76b13420dcd21fc78716..5fc13e717911708fa3f8b1d443b190dad2ecadaf 100644 (file)
@@ -101,6 +101,7 @@ static int   ftdi_jtag_probe(struct usb_serial *serial);
 static int   ftdi_mtxorb_hack_setup(struct usb_serial *serial);
 static int   ftdi_NDI_device_setup(struct usb_serial *serial);
 static int   ftdi_stmclite_probe(struct usb_serial *serial);
+static int   ftdi_8u2232c_probe(struct usb_serial *serial);
 static void  ftdi_USB_UIRT_setup(struct ftdi_private *priv);
 static void  ftdi_HE_TIRA1_setup(struct ftdi_private *priv);
 
@@ -128,6 +129,10 @@ static struct ftdi_sio_quirk ftdi_stmclite_quirk = {
        .probe  = ftdi_stmclite_probe,
 };
 
+static struct ftdi_sio_quirk ftdi_8u2232c_quirk = {
+       .probe  = ftdi_8u2232c_probe,
+};
+
 /*
  * The 8U232AM has the same API as the sio except for:
  * - it can support MUCH higher baudrates; up to:
@@ -178,7 +183,8 @@ static struct usb_device_id id_table_combined [] = {
        { USB_DEVICE(FTDI_VID, FTDI_8U232AM_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_8U232AM_ALT_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_232RL_PID) },
-       { USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) ,
+               .driver_info = (kernel_ulong_t)&ftdi_8u2232c_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_4232H_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_232H_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MICRO_CHAMELEON_PID) },
@@ -1737,6 +1743,18 @@ static int ftdi_jtag_probe(struct usb_serial *serial)
        return 0;
 }
 
+static int ftdi_8u2232c_probe(struct usb_serial *serial)
+{
+       struct usb_device *udev = serial->dev;
+
+       dbg("%s", __func__);
+
+       if (strcmp(udev->manufacturer, "CALAO Systems") == 0)
+               return ftdi_jtag_probe(serial);
+
+       return 0;
+}
+
 /*
  * First and second port on STMCLiteadaptors is reserved for JTAG interface
  * and the forth port for pio
index 815656198914ddee0a4e5ca51fc5b2786ba5a94a..fe22e90bc879551a912ebcbcbf974a83c4ae0cb9 100644 (file)
@@ -148,6 +148,8 @@ static void option_instat_callback(struct urb *urb);
 #define HUAWEI_PRODUCT_K4505                   0x1464
 #define HUAWEI_PRODUCT_K3765                   0x1465
 #define HUAWEI_PRODUCT_E14AC                   0x14AC
+#define HUAWEI_PRODUCT_K3806                   0x14AE
+#define HUAWEI_PRODUCT_K4605                   0x14C6
 #define HUAWEI_PRODUCT_K3770                   0x14C9
 #define HUAWEI_PRODUCT_K3771                   0x14CA
 #define HUAWEI_PRODUCT_K4510                   0x14CB
@@ -416,6 +418,56 @@ static void option_instat_callback(struct urb *urb);
 #define SAMSUNG_VENDOR_ID                       0x04e8
 #define SAMSUNG_PRODUCT_GT_B3730                0x6889
 
+/* YUGA products  www.yuga-info.com*/
+#define YUGA_VENDOR_ID                         0x257A
+#define YUGA_PRODUCT_CEM600                    0x1601
+#define YUGA_PRODUCT_CEM610                    0x1602
+#define YUGA_PRODUCT_CEM500                    0x1603
+#define YUGA_PRODUCT_CEM510                    0x1604
+#define YUGA_PRODUCT_CEM800                    0x1605
+#define YUGA_PRODUCT_CEM900                    0x1606
+
+#define YUGA_PRODUCT_CEU818                    0x1607
+#define YUGA_PRODUCT_CEU816                    0x1608
+#define YUGA_PRODUCT_CEU828                    0x1609
+#define YUGA_PRODUCT_CEU826                    0x160A
+#define YUGA_PRODUCT_CEU518                    0x160B
+#define YUGA_PRODUCT_CEU516                    0x160C
+#define YUGA_PRODUCT_CEU528                    0x160D
+#define YUGA_PRODUCT_CEU526                    0x160F
+
+#define YUGA_PRODUCT_CWM600                    0x2601
+#define YUGA_PRODUCT_CWM610                    0x2602
+#define YUGA_PRODUCT_CWM500                    0x2603
+#define YUGA_PRODUCT_CWM510                    0x2604
+#define YUGA_PRODUCT_CWM800                    0x2605
+#define YUGA_PRODUCT_CWM900                    0x2606
+
+#define YUGA_PRODUCT_CWU718                    0x2607
+#define YUGA_PRODUCT_CWU716                    0x2608
+#define YUGA_PRODUCT_CWU728                    0x2609
+#define YUGA_PRODUCT_CWU726                    0x260A
+#define YUGA_PRODUCT_CWU518                    0x260B
+#define YUGA_PRODUCT_CWU516                    0x260C
+#define YUGA_PRODUCT_CWU528                    0x260D
+#define YUGA_PRODUCT_CWU526                    0x260F
+
+#define YUGA_PRODUCT_CLM600                    0x2601
+#define YUGA_PRODUCT_CLM610                    0x2602
+#define YUGA_PRODUCT_CLM500                    0x2603
+#define YUGA_PRODUCT_CLM510                    0x2604
+#define YUGA_PRODUCT_CLM800                    0x2605
+#define YUGA_PRODUCT_CLM900                    0x2606
+
+#define YUGA_PRODUCT_CLU718                    0x2607
+#define YUGA_PRODUCT_CLU716                    0x2608
+#define YUGA_PRODUCT_CLU728                    0x2609
+#define YUGA_PRODUCT_CLU726                    0x260A
+#define YUGA_PRODUCT_CLU518                    0x260B
+#define YUGA_PRODUCT_CLU516                    0x260C
+#define YUGA_PRODUCT_CLU528                    0x260D
+#define YUGA_PRODUCT_CLU526                    0x260F
+
 /* some devices interfaces need special handling due to a number of reasons */
 enum option_blacklist_reason {
                OPTION_BLACKLIST_NONE = 0,
@@ -551,6 +603,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) },
@@ -1005,6 +1059,48 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
        { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */
        { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM500) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM510) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM800) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM900) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU818) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU816) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU828) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU826) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU518) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU516) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU528) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU526) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM600) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM610) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM500) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM510) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM800) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM900) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU718) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU716) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU728) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU726) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU518) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU516) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU528) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU526) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM600) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM610) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM500) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM510) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM800) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM900) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU718) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU716) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU728) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU726) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU518) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU516) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU528) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU526) },
        { } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
@@ -1134,11 +1230,13 @@ static int option_probe(struct usb_serial *serial,
                serial->interface->cur_altsetting->desc.bInterfaceClass != 0xff)
                return -ENODEV;
 
-       /* Don't bind network interfaces on Huawei K3765 & K4505 */
+       /* Don't bind network interfaces on Huawei K3765, K4505 & K4605 */
        if (serial->dev->descriptor.idVendor == HUAWEI_VENDOR_ID &&
                (serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 ||
-                       serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505) &&
-               serial->interface->cur_altsetting->desc.bInterfaceNumber == 1)
+                       serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505 ||
+                       serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4605) &&
+               (serial->interface->cur_altsetting->desc.bInterfaceNumber == 1 ||
+                       serial->interface->cur_altsetting->desc.bInterfaceNumber == 2))
                return -ENODEV;
 
        /* Don't bind network interface on Samsung GT-B3730, it is handled by a separate module */
index 05a8832bb3eb360d2d835e5281323d978d078500..d06886a2bfb564a4e1651083835037d79dbca8f1 100644 (file)
@@ -1009,4 +1009,4 @@ module_exit(adp8870_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("ADP8870 Backlight driver");
-MODULE_ALIAS("platform:adp8870-backlight");
+MODULE_ALIAS("i2c:adp8870-backlight");
index 9f1e389d51d2f8fac9337bf6be305705b2fd6b07..b0582917f0c8f71e879ed1be66e84897fb00ca38 100644 (file)
@@ -11,7 +11,7 @@
  * BRIGHT, on the Cirrus EP9307, EP9312, and EP9315 processors.
  */
 
-
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/fb.h>
index b8f38ec6eb1898ab0b60d606ec1f3998e11fe339..8b5b2a4124c7980129be8146f881772f5c33fa97 100644 (file)
@@ -28,6 +28,8 @@ struct pwm_bl_data {
        unsigned int            lth_brightness;
        int                     (*notify)(struct device *,
                                          int brightness);
+       void                    (*notify_after)(struct device *,
+                                       int brightness);
        int                     (*check_fb)(struct device *, struct fb_info *);
 };
 
@@ -55,6 +57,10 @@ static int pwm_backlight_update_status(struct backlight_device *bl)
                pwm_config(pb->pwm, brightness, pb->period);
                pwm_enable(pb->pwm);
        }
+
+       if (pb->notify_after)
+               pb->notify_after(pb->dev, brightness);
+
        return 0;
 }
 
@@ -105,6 +111,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
        pb->period = data->pwm_period_ns;
        pb->notify = data->notify;
+       pb->notify_after = data->notify_after;
        pb->check_fb = data->check_fb;
        pb->lth_brightness = data->lth_brightness *
                (data->pwm_period_ns / data->max_brightness);
@@ -172,6 +179,8 @@ static int pwm_backlight_suspend(struct platform_device *pdev,
                pb->notify(pb->dev, 0);
        pwm_config(pb->pwm, 0, pb->period);
        pwm_disable(pb->pwm);
+       if (pb->notify_after)
+               pb->notify_after(pb->dev, 0);
        return 0;
 }
 
index 02bf7bf7160bcbb79e2d94b36e4fa32022ff3be0..b5abaae38e97702c6a54c3c6a95b4d2149f7fbeb 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     dscore.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -1024,5 +1024,5 @@ module_init(ds_init);
 module_exit(ds_fini);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("DS2490 USB <-> W1 bus master driver (DS9490*)");
index 334d1ccf9c922618da2b5addab4bd46015263037..f667c26b219571e4409b167985605d79c56d9e8b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     matrox_w1.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -39,7 +39,7 @@
 #include "../w1_log.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for transport(Dallas 1-wire prtocol) over VGA DDC(matrox gpio).");
 
 static struct pci_device_id matrox_w1_tbl[] = {
index c37781899d90369b9bae86e58af2910ab4243666..7c8cdb8aed26048895465b72c9df1bdc8fccd887 100644 (file)
@@ -373,7 +373,7 @@ static int w1_f29_add_slave(struct w1_slave *sl)
 static void w1_f29_remove_slave(struct w1_slave *sl)
 {
        int i;
-       for (i = NB_SYSFS_BIN_FILES; i <= 0; --i)
+       for (i = NB_SYSFS_BIN_FILES - 1; i >= 0; --i)
                sysfs_remove_bin_file(&sl->dev.kobj,
                        &(w1_f29_sysfs_bin_files[i]));
 }
index cc8c02e92593f813c8c94961f213ebb288427c9b..84655625c8705108df61d703ff5356e764c1d536 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_smem.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -32,7 +32,7 @@
 #include "../w1_family.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, 64bit memory family.");
 
 static struct w1_family w1_smem_family_01 = {
index 402928b135d19c062a38a9008b0587a144b091d2..a1ef9b5b38cfdd2cd0f2e5322a78fb445d5f1df4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_therm.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -34,7 +34,7 @@
 #include "../w1_family.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, temperature family.");
 
 /* Allow the strong pullup to be disabled, but default to enabled.
index 6c136c19e982ad98246511cb019d5680b8889dfb..c374978238515cd6a0e23b464076be91a9fcb744 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -42,7 +42,7 @@
 #include "w1_netlink.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol.");
 
 static int w1_timeout = 10;
index 1ce23fc6186c36def2f0ed2608fb0b010ab32501..4d012ca3f32c42a27c2ccd54b9b506907abd597e 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 4a099041f28a8a80a73b0d4d63da6be496efbdc8..63359797c8b199abcf62d09122f37981bca7cc41 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_family.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 98a1ac0f4693a634898c22d00a9cf3eb89219b57..490cda2281bc924d8f7de8c105bd4cad1abdef10 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_family.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index b50be3f1073d0fc92af534cc61dc757408e57a5b..d220bce2cee4d242532cb4d6b84114b0e45ea662 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_int.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 4274082d22629192e6379059496d631ecf4458bb..2ad7d4414bed8b9052eb44aac75d43b04a9a9c81 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_int.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 8e8b64cfafb69a417bded8e6f3f745ca3a7e44a1..765b37b62a4f608ceb062630ebae1afc6c5448fa 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_io.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index e6ab7cf08f8885d07cbfa222420d5c621c6039c3..9c7bd62e6bdc05fa5fffcc135b65d6e3c9dc3187 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     w1_log.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 55aabd927c60557e82c01a3eb11d92cd9bcd18c1..40788c925d1c9b4d847e6572f5a8510ac402ac71 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * w1_netlink.c
  *
- * Copyright (c) 2003 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2003 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 27e950f935b18040437ae3e64019dde69fc4e273..b0922dc29658a3fe1d19e8b17942e2521009beb0 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * w1_netlink.h
  *
- * Copyright (c) 2003 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2003 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
index 0b48d018e38adbc1a72f78a4ab851626f2fffca8..58b1da4598933ca9990494121234be6dcdc35bc0 100644 (file)
@@ -1675,11 +1675,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 }
 #endif /* HAVE_SET_RESTORE_SIGMASK */
 
-long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
-{
-       return sys_ni_syscall();
-}
-
 #ifdef CONFIG_EPOLL
 
 #ifdef HAVE_SET_RESTORE_SIGMASK
index 640fc229df10323b6c9fc94b0bda452157bf1ef0..168a80f7f12b856e795eb916bf79f5e2348e06f6 100644 (file)
@@ -1358,6 +1358,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
        if (outarg.namelen > FUSE_NAME_MAX)
                goto err;
 
+       err = -EINVAL;
+       if (size != sizeof(outarg) + outarg.namelen + 1)
+               goto err;
+
        name.name = buf;
        name.len = outarg.namelen;
        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
index d480d9af46c964f8b86ced68feb7e787959a4fdb..594f07a81c2899ba33a173be33cfc818afa0d39b 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/compat.h>
+#include <linux/swap.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -245,6 +246,12 @@ void fuse_release_common(struct file *file, int opcode)
        req = ff->reserved_req;
        fuse_prepare_release(ff, file->f_flags, opcode);
 
+       if (ff->flock) {
+               struct fuse_release_in *inarg = &req->misc.release.in;
+               inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
+               inarg->lock_owner = fuse_lock_owner_id(ff->fc,
+                                                      (fl_owner_t) file);
+       }
        /* Hold vfsmount and dentry until release is finished */
        path_get(&file->f_path);
        req->misc.release.path = file->f_path;
@@ -755,18 +762,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
        return req->misc.write.out.size;
 }
 
-static int fuse_write_begin(struct file *file, struct address_space *mapping,
-                       loff_t pos, unsigned len, unsigned flags,
-                       struct page **pagep, void **fsdata)
-{
-       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-
-       *pagep = grab_cache_page_write_begin(mapping, index, flags);
-       if (!*pagep)
-               return -ENOMEM;
-       return 0;
-}
-
 void fuse_write_update_size(struct inode *inode, loff_t pos)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -779,62 +774,6 @@ void fuse_write_update_size(struct inode *inode, loff_t pos)
        spin_unlock(&fc->lock);
 }
 
-static int fuse_buffered_write(struct file *file, struct inode *inode,
-                              loff_t pos, unsigned count, struct page *page)
-{
-       int err;
-       size_t nres;
-       struct fuse_conn *fc = get_fuse_conn(inode);
-       unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-       struct fuse_req *req;
-
-       if (is_bad_inode(inode))
-               return -EIO;
-
-       /*
-        * Make sure writepages on the same page are not mixed up with
-        * plain writes.
-        */
-       fuse_wait_on_page_writeback(inode, page->index);
-
-       req = fuse_get_req(fc);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       req->in.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_offset = offset;
-       nres = fuse_send_write(req, file, pos, count, NULL);
-       err = req->out.h.error;
-       fuse_put_request(fc, req);
-       if (!err && !nres)
-               err = -EIO;
-       if (!err) {
-               pos += nres;
-               fuse_write_update_size(inode, pos);
-               if (count == PAGE_CACHE_SIZE)
-                       SetPageUptodate(page);
-       }
-       fuse_invalidate_attr(inode);
-       return err ? err : nres;
-}
-
-static int fuse_write_end(struct file *file, struct address_space *mapping,
-                       loff_t pos, unsigned len, unsigned copied,
-                       struct page *page, void *fsdata)
-{
-       struct inode *inode = mapping->host;
-       int res = 0;
-
-       if (copied)
-               res = fuse_buffered_write(file, inode, pos, copied, page);
-
-       unlock_page(page);
-       page_cache_release(page);
-       return res;
-}
-
 static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
                                    struct inode *inode, loff_t pos,
                                    size_t count)
@@ -908,6 +847,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                pagefault_enable();
                flush_dcache_page(page);
 
+               mark_page_accessed(page);
+
                if (!tmp) {
                        unlock_page(page);
                        page_cache_release(page);
@@ -1559,11 +1500,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
        struct fuse_conn *fc = get_fuse_conn(inode);
        int err;
 
-       if (fc->no_lock) {
+       if (fc->no_flock) {
                err = flock_lock_file_wait(file, fl);
        } else {
+               struct fuse_file *ff = file->private_data;
+
                /* emulate flock with POSIX locks */
                fl->fl_owner = (fl_owner_t) file;
+               ff->flock = true;
                err = fuse_setlk(file, fl, 1);
        }
 
@@ -2201,8 +2145,6 @@ static const struct address_space_operations fuse_file_aops  = {
        .readpage       = fuse_readpage,
        .writepage      = fuse_writepage,
        .launder_page   = fuse_launder_page,
-       .write_begin    = fuse_write_begin,
-       .write_end      = fuse_write_end,
        .readpages      = fuse_readpages,
        .set_page_dirty = __set_page_dirty_nobuffers,
        .bmap           = fuse_bmap,
index c6aa2d4b851733a250205734215c785be6944507..cf6db0a932192c30d6dc65a327cca84e7258a5dc 100644 (file)
@@ -135,6 +135,9 @@ struct fuse_file {
 
        /** Wait queue head for poll */
        wait_queue_head_t poll_wait;
+
+       /** Has flock been performed on this file? */
+       bool flock:1;
 };
 
 /** One input argument of a request */
@@ -448,7 +451,7 @@ struct fuse_conn {
        /** Is removexattr not implemented by fs? */
        unsigned no_removexattr:1;
 
-       /** Are file locking primitives not implemented by fs? */
+       /** Are posix file locking primitives not implemented by fs? */
        unsigned no_lock:1;
 
        /** Is access not implemented by fs? */
@@ -472,6 +475,9 @@ struct fuse_conn {
        /** Don't apply umask to creation modes */
        unsigned dont_mask:1;
 
+       /** Are BSD file locking primitives not implemented by fs? */
+       unsigned no_flock:1;
+
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
 
index 38f84cd48b67d057798f8f75fe5c8f22f12b10dc..12b502929da9c51ef78f5ee6d53948e1f5b991a6 100644 (file)
@@ -71,7 +71,7 @@ struct fuse_mount_data {
        unsigned blksize;
 };
 
-struct fuse_forget_link *fuse_alloc_forget()
+struct fuse_forget_link *fuse_alloc_forget(void)
 {
        return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
 }
@@ -809,6 +809,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->async_read = 1;
                        if (!(arg->flags & FUSE_POSIX_LOCKS))
                                fc->no_lock = 1;
+                       if (arg->minor >= 17) {
+                               if (!(arg->flags & FUSE_FLOCK_LOCKS))
+                                       fc->no_flock = 1;
+                       }
                        if (arg->flags & FUSE_ATOMIC_O_TRUNC)
                                fc->atomic_o_trunc = 1;
                        if (arg->minor >= 9) {
@@ -823,6 +827,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                } else {
                        ra_pages = fc->max_read / PAGE_CACHE_SIZE;
                        fc->no_lock = 1;
+                       fc->no_flock = 1;
                }
 
                fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
@@ -843,7 +848,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
        arg->minor = FUSE_KERNEL_MINOR_VERSION;
        arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
        arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-               FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
+               FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+               FUSE_FLOCK_LOCKS;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
index 87b6e0421c12b8a44b75bf0cf79c198893303552..ec889538e5a6afe4014922e9468845195ea56193 100644 (file)
@@ -491,6 +491,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
                        inode->i_op = &page_symlink_inode_operations;
                        break;
                }
+               lockdep_annotate_inode_mutex_key(inode);
        }
        return inode;
 }
index 73920d555c8890b2bd0fd208ef2b469af142e358..ec7924696a139870c70a0971c8d2d1483a747ade 100644 (file)
@@ -848,16 +848,9 @@ struct inode *new_inode(struct super_block *sb)
 }
 EXPORT_SYMBOL(new_inode);
 
-/**
- * unlock_new_inode - clear the I_NEW state and wake up any waiters
- * @inode:     new inode to unlock
- *
- * Called when the inode is fully initialised to clear the new state of the
- * inode and wake up anyone waiting for the inode to finish initialisation.
- */
-void unlock_new_inode(struct inode *inode)
-{
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
+void lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
        if (S_ISDIR(inode->i_mode)) {
                struct file_system_type *type = inode->i_sb->s_type;
 
@@ -873,7 +866,20 @@ void unlock_new_inode(struct inode *inode)
                                          &type->i_mutex_dir_key);
                }
        }
+}
+EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
 #endif
+
+/**
+ * unlock_new_inode - clear the I_NEW state and wake up any waiters
+ * @inode:     new inode to unlock
+ *
+ * Called when the inode is fully initialised to clear the new state of the
+ * inode and wake up anyone waiting for the inode to finish initialisation.
+ */
+void unlock_new_inode(struct inode *inode)
+{
+       lockdep_annotate_inode_mutex_key(inode);
        spin_lock(&inode->i_lock);
        WARN_ON(!(inode->i_state & I_NEW));
        inode->i_state &= ~I_NEW;
index 75bb316529ddabb27855025fd59832bc4a52a4c8..427a4e82a588759dbfb49394f73eca9400d455e7 100644 (file)
 # Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #
 
-ccflags-y := -I$(src) -I$(src)/linux-2.6
-ccflags-$(CONFIG_XFS_DEBUG) += -g
+ccflags-y += -I$(src)                  # needed for trace events
 
-XFS_LINUX := linux-2.6
+ccflags-$(CONFIG_XFS_DEBUG) += -g
 
 obj-$(CONFIG_XFS_FS)           += xfs.o
 
-xfs-y                          += linux-2.6/xfs_trace.o
-
-xfs-$(CONFIG_XFS_QUOTA)                += $(addprefix quota/, \
-                                  xfs_dquot.o \
-                                  xfs_dquot_item.o \
-                                  xfs_trans_dquot.o \
-                                  xfs_qm_syscalls.o \
-                                  xfs_qm_bhv.o \
-                                  xfs_qm.o)
-xfs-$(CONFIG_XFS_QUOTA)                += linux-2.6/xfs_quotaops.o
-
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS)          += quota/xfs_qm_stats.o
-endif
-
-xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)    += $(XFS_LINUX)/xfs_acl.o
-xfs-$(CONFIG_PROC_FS)          += $(XFS_LINUX)/xfs_stats.o
-xfs-$(CONFIG_SYSCTL)           += $(XFS_LINUX)/xfs_sysctl.o
-xfs-$(CONFIG_COMPAT)           += $(XFS_LINUX)/xfs_ioctl32.o
+# this one should be compiled first, as the tracing macros can easily blow up
+xfs-y                          += xfs_trace.o
 
+# highlevel code
+xfs-y                          += xfs_aops.o \
+                                  xfs_bit.o \
+                                  xfs_buf.o \
+                                  xfs_dfrag.o \
+                                  xfs_discard.o \
+                                  xfs_error.o \
+                                  xfs_export.o \
+                                  xfs_file.o \
+                                  xfs_filestream.o \
+                                  xfs_fsops.o \
+                                  xfs_fs_subr.o \
+                                  xfs_globals.o \
+                                  xfs_iget.o \
+                                  xfs_ioctl.o \
+                                  xfs_iomap.o \
+                                  xfs_iops.o \
+                                  xfs_itable.o \
+                                  xfs_message.o \
+                                  xfs_mru_cache.o \
+                                  xfs_super.o \
+                                  xfs_sync.o \
+                                  xfs_xattr.o \
+                                  xfs_rename.o \
+                                  xfs_rw.o \
+                                  xfs_utils.o \
+                                  xfs_vnodeops.o \
+                                  kmem.o \
+                                  uuid.o
 
+# code shared with libxfs
 xfs-y                          += xfs_alloc.o \
                                   xfs_alloc_btree.o \
                                   xfs_attr.o \
                                   xfs_attr_leaf.o \
-                                  xfs_bit.o \
                                   xfs_bmap.o \
                                   xfs_bmap_btree.o \
                                   xfs_btree.o \
-                                  xfs_buf_item.o \
                                   xfs_da_btree.o \
                                   xfs_dir2.o \
                                   xfs_dir2_block.o \
@@ -61,49 +70,37 @@ xfs-y                               += xfs_alloc.o \
                                   xfs_dir2_leaf.o \
                                   xfs_dir2_node.o \
                                   xfs_dir2_sf.o \
-                                  xfs_error.o \
-                                  xfs_extfree_item.o \
-                                  xfs_filestream.o \
-                                  xfs_fsops.o \
                                   xfs_ialloc.o \
                                   xfs_ialloc_btree.o \
-                                  xfs_iget.o \
                                   xfs_inode.o \
-                                  xfs_inode_item.o \
-                                  xfs_iomap.o \
-                                  xfs_itable.o \
-                                  xfs_dfrag.o \
-                                  xfs_log.o \
-                                  xfs_log_cil.o \
                                   xfs_log_recover.o \
                                   xfs_mount.o \
-                                  xfs_mru_cache.o \
-                                  xfs_rename.o \
-                                  xfs_trans.o \
+                                  xfs_trans.o
+
+# low-level transaction/log code
+xfs-y                          += xfs_log.o \
+                                  xfs_log_cil.o \
+                                  xfs_buf_item.o \
+                                  xfs_extfree_item.o \
+                                  xfs_inode_item.o \
                                   xfs_trans_ail.o \
                                   xfs_trans_buf.o \
                                   xfs_trans_extfree.o \
                                   xfs_trans_inode.o \
-                                  xfs_utils.o \
-                                  xfs_vnodeops.o \
-                                  xfs_rw.o
-
-# Objects in linux/
-xfs-y                          += $(addprefix $(XFS_LINUX)/, \
-                                  kmem.o \
-                                  xfs_aops.o \
-                                  xfs_buf.o \
-                                  xfs_discard.o \
-                                  xfs_export.o \
-                                  xfs_file.o \
-                                  xfs_fs_subr.o \
-                                  xfs_globals.o \
-                                  xfs_ioctl.o \
-                                  xfs_iops.o \
-                                  xfs_message.o \
-                                  xfs_super.o \
-                                  xfs_sync.o \
-                                  xfs_xattr.o)
 
-# Objects in support/
-xfs-y                          += support/uuid.o
+# optional features
+xfs-$(CONFIG_XFS_QUOTA)                += xfs_dquot.o \
+                                  xfs_dquot_item.o \
+                                  xfs_trans_dquot.o \
+                                  xfs_qm_syscalls.o \
+                                  xfs_qm_bhv.o \
+                                  xfs_qm.o \
+                                  xfs_quotaops.o
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS)          += xfs_qm_stats.o
+endif
+xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
+xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
+xfs-$(CONFIG_PROC_FS)          += xfs_stats.o
+xfs-$(CONFIG_SYSCTL)           += xfs_sysctl.o
+xfs-$(CONFIG_COMPAT)           += xfs_ioctl32.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
new file mode 100644 (file)
index 0000000..a907de5
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include "time.h"
+#include "kmem.h"
+#include "xfs_message.h"
+
+/*
+ * Greedy allocation.  May fail and may return vmalloced memory.
+ *
+ * Must be freed using kmem_free_large.
+ */
+void *
+kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
+{
+       void            *ptr;
+       size_t          kmsize = maxsize;
+
+       while (!(ptr = kmem_zalloc_large(kmsize))) {
+               if ((kmsize >>= 1) <= minsize)
+                       kmsize = minsize;
+       }
+       if (ptr)
+               *size = kmsize;
+       return ptr;
+}
+
+void *
+kmem_alloc(size_t size, unsigned int __nocast flags)
+{
+       int     retries = 0;
+       gfp_t   lflags = kmem_flags_convert(flags);
+       void    *ptr;
+
+       do {
+               ptr = kmalloc(size, lflags);
+               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                       return ptr;
+               if (!(++retries % 100))
+                       xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, lflags);
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+       } while (1);
+}
+
+void *
+kmem_zalloc(size_t size, unsigned int __nocast flags)
+{
+       void    *ptr;
+
+       ptr = kmem_alloc(size, flags);
+       if (ptr)
+               memset((char *)ptr, 0, (int)size);
+       return ptr;
+}
+
+void
+kmem_free(const void *ptr)
+{
+       if (!is_vmalloc_addr(ptr)) {
+               kfree(ptr);
+       } else {
+               vfree(ptr);
+       }
+}
+
+void *
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
+            unsigned int __nocast flags)
+{
+       void    *new;
+
+       new = kmem_alloc(newsize, flags);
+       if (ptr) {
+               if (new)
+                       memcpy(new, ptr,
+                               ((oldsize < newsize) ? oldsize : newsize));
+               kmem_free(ptr);
+       }
+       return new;
+}
+
+void *
+kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
+{
+       int     retries = 0;
+       gfp_t   lflags = kmem_flags_convert(flags);
+       void    *ptr;
+
+       do {
+               ptr = kmem_cache_alloc(zone, lflags);
+               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                       return ptr;
+               if (!(++retries % 100))
+                       xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, lflags);
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+       } while (1);
+}
+
+void *
+kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
+{
+       void    *ptr;
+
+       ptr = kmem_zone_alloc(zone, flags);
+       if (ptr)
+               memset((char *)ptr, 0, kmem_cache_size(zone));
+       return ptr;
+}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
new file mode 100644 (file)
index 0000000..f7c8f7a
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+/*
+ * General memory allocation interfaces
+ */
+
+#define KM_SLEEP       0x0001u
+#define KM_NOSLEEP     0x0002u
+#define KM_NOFS                0x0004u
+#define KM_MAYFAIL     0x0008u
+
+/*
+ * We use a special process flag to avoid recursive callbacks into
+ * the filesystem during transactions.  We will also issue our own
+ * warnings, so we explicitly skip any generic ones (silly of us).
+ */
+static inline gfp_t
+kmem_flags_convert(unsigned int __nocast flags)
+{
+       gfp_t   lflags;
+
+       BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
+
+       if (flags & KM_NOSLEEP) {
+               lflags = GFP_ATOMIC | __GFP_NOWARN;
+       } else {
+               lflags = GFP_KERNEL | __GFP_NOWARN;
+               if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+                       lflags &= ~__GFP_FS;
+       }
+       return lflags;
+}
+
+extern void *kmem_alloc(size_t, unsigned int __nocast);
+extern void *kmem_zalloc(size_t, unsigned int __nocast);
+extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
+extern void  kmem_free(const void *);
+
+static inline void *kmem_zalloc_large(size_t size)
+{
+       void *ptr;
+
+       ptr = vmalloc(size);
+       if (ptr)
+               memset(ptr, 0, size);
+       return ptr;
+}
+static inline void kmem_free_large(void *ptr)
+{
+       vfree(ptr);
+}
+
+extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
+
+/*
+ * Zone interfaces
+ */
+
+#define KM_ZONE_HWALIGN        SLAB_HWCACHE_ALIGN
+#define KM_ZONE_RECLAIM        SLAB_RECLAIM_ACCOUNT
+#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
+
+#define kmem_zone      kmem_cache
+#define kmem_zone_t    struct kmem_cache
+
+static inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+       return kmem_cache_create(zone_name, size, 0, 0, NULL);
+}
+
+static inline kmem_zone_t *
+kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
+                    void (*construct)(void *))
+{
+       return kmem_cache_create(zone_name, size, 0, flags, construct);
+}
+
+static inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+       kmem_cache_free(zone, ptr);
+}
+
+static inline void
+kmem_zone_destroy(kmem_zone_t *zone)
+{
+       if (zone)
+               kmem_cache_destroy(zone);
+}
+
+extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
+extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
+
+static inline int
+kmem_shake_allow(gfp_t gfp_mask)
+{
+       return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
deleted file mode 100644 (file)
index a907de5..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
-#include <linux/backing-dev.h>
-#include "time.h"
-#include "kmem.h"
-#include "xfs_message.h"
-
-/*
- * Greedy allocation.  May fail and may return vmalloced memory.
- *
- * Must be freed using kmem_free_large.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
-       void            *ptr;
-       size_t          kmsize = maxsize;
-
-       while (!(ptr = kmem_zalloc_large(kmsize))) {
-               if ((kmsize >>= 1) <= minsize)
-                       kmsize = minsize;
-       }
-       if (ptr)
-               *size = kmsize;
-       return ptr;
-}
-
-void *
-kmem_alloc(size_t size, unsigned int __nocast flags)
-{
-       int     retries = 0;
-       gfp_t   lflags = kmem_flags_convert(flags);
-       void    *ptr;
-
-       do {
-               ptr = kmalloc(size, lflags);
-               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
-                       return ptr;
-               if (!(++retries % 100))
-                       xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, lflags);
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
-       } while (1);
-}
-
-void *
-kmem_zalloc(size_t size, unsigned int __nocast flags)
-{
-       void    *ptr;
-
-       ptr = kmem_alloc(size, flags);
-       if (ptr)
-               memset((char *)ptr, 0, (int)size);
-       return ptr;
-}
-
-void
-kmem_free(const void *ptr)
-{
-       if (!is_vmalloc_addr(ptr)) {
-               kfree(ptr);
-       } else {
-               vfree(ptr);
-       }
-}
-
-void *
-kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
-            unsigned int __nocast flags)
-{
-       void    *new;
-
-       new = kmem_alloc(newsize, flags);
-       if (ptr) {
-               if (new)
-                       memcpy(new, ptr,
-                               ((oldsize < newsize) ? oldsize : newsize));
-               kmem_free(ptr);
-       }
-       return new;
-}
-
-void *
-kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
-       int     retries = 0;
-       gfp_t   lflags = kmem_flags_convert(flags);
-       void    *ptr;
-
-       do {
-               ptr = kmem_cache_alloc(zone, lflags);
-               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
-                       return ptr;
-               if (!(++retries % 100))
-                       xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, lflags);
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
-       } while (1);
-}
-
-void *
-kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
-       void    *ptr;
-
-       ptr = kmem_zone_alloc(zone, flags);
-       if (ptr)
-               memset((char *)ptr, 0, kmem_cache_size(zone));
-       return ptr;
-}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
deleted file mode 100644 (file)
index f7c8f7a..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_KMEM_H__
-#define __XFS_SUPPORT_KMEM_H__
-
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-
-/*
- * General memory allocation interfaces
- */
-
-#define KM_SLEEP       0x0001u
-#define KM_NOSLEEP     0x0002u
-#define KM_NOFS                0x0004u
-#define KM_MAYFAIL     0x0008u
-
-/*
- * We use a special process flag to avoid recursive callbacks into
- * the filesystem during transactions.  We will also issue our own
- * warnings, so we explicitly skip any generic ones (silly of us).
- */
-static inline gfp_t
-kmem_flags_convert(unsigned int __nocast flags)
-{
-       gfp_t   lflags;
-
-       BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
-
-       if (flags & KM_NOSLEEP) {
-               lflags = GFP_ATOMIC | __GFP_NOWARN;
-       } else {
-               lflags = GFP_KERNEL | __GFP_NOWARN;
-               if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
-                       lflags &= ~__GFP_FS;
-       }
-       return lflags;
-}
-
-extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_zalloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
-extern void  kmem_free(const void *);
-
-static inline void *kmem_zalloc_large(size_t size)
-{
-       void *ptr;
-
-       ptr = vmalloc(size);
-       if (ptr)
-               memset(ptr, 0, size);
-       return ptr;
-}
-static inline void kmem_free_large(void *ptr)
-{
-       vfree(ptr);
-}
-
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
-/*
- * Zone interfaces
- */
-
-#define KM_ZONE_HWALIGN        SLAB_HWCACHE_ALIGN
-#define KM_ZONE_RECLAIM        SLAB_RECLAIM_ACCOUNT
-#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
-
-#define kmem_zone      kmem_cache
-#define kmem_zone_t    struct kmem_cache
-
-static inline kmem_zone_t *
-kmem_zone_init(int size, char *zone_name)
-{
-       return kmem_cache_create(zone_name, size, 0, 0, NULL);
-}
-
-static inline kmem_zone_t *
-kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
-                    void (*construct)(void *))
-{
-       return kmem_cache_create(zone_name, size, 0, flags, construct);
-}
-
-static inline void
-kmem_zone_free(kmem_zone_t *zone, void *ptr)
-{
-       kmem_cache_free(zone, ptr);
-}
-
-static inline void
-kmem_zone_destroy(kmem_zone_t *zone)
-{
-       if (zone)
-               kmem_cache_destroy(zone);
-}
-
-extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
-extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-
-static inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
-       return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
-}
-
-#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
deleted file mode 100644 (file)
index ff6a198..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_MRLOCK_H__
-#define __XFS_SUPPORT_MRLOCK_H__
-
-#include <linux/rwsem.h>
-
-typedef struct {
-       struct rw_semaphore     mr_lock;
-#ifdef DEBUG
-       int                     mr_writer;
-#endif
-} mrlock_t;
-
-#ifdef DEBUG
-#define mrinit(mrp, name)      \
-       do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
-#else
-#define mrinit(mrp, name)      \
-       do { init_rwsem(&(mrp)->mr_lock); } while (0)
-#endif
-
-#define mrlock_init(mrp, t,n,s)        mrinit(mrp, n)
-#define mrfree(mrp)            do { } while (0)
-
-static inline void mraccess_nested(mrlock_t *mrp, int subclass)
-{
-       down_read_nested(&mrp->mr_lock, subclass);
-}
-
-static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
-{
-       down_write_nested(&mrp->mr_lock, subclass);
-#ifdef DEBUG
-       mrp->mr_writer = 1;
-#endif
-}
-
-static inline int mrtryaccess(mrlock_t *mrp)
-{
-       return down_read_trylock(&mrp->mr_lock);
-}
-
-static inline int mrtryupdate(mrlock_t *mrp)
-{
-       if (!down_write_trylock(&mrp->mr_lock))
-               return 0;
-#ifdef DEBUG
-       mrp->mr_writer = 1;
-#endif
-       return 1;
-}
-
-static inline void mrunlock_excl(mrlock_t *mrp)
-{
-#ifdef DEBUG
-       mrp->mr_writer = 0;
-#endif
-       up_write(&mrp->mr_lock);
-}
-
-static inline void mrunlock_shared(mrlock_t *mrp)
-{
-       up_read(&mrp->mr_lock);
-}
-
-static inline void mrdemote(mrlock_t *mrp)
-{
-#ifdef DEBUG
-       mrp->mr_writer = 0;
-#endif
-       downgrade_write(&mrp->mr_lock);
-}
-
-#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
deleted file mode 100644 (file)
index 387e695..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_TIME_H__
-#define __XFS_SUPPORT_TIME_H__
-
-#include <linux/sched.h>
-#include <linux/time.h>
-
-typedef struct timespec timespec_t;
-
-static inline void delay(long ticks)
-{
-       schedule_timeout_uninterruptible(ticks);
-}
-
-static inline void nanotime(struct timespec *tvp)
-{
-       *tvp = CURRENT_TIME;
-}
-
-#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
deleted file mode 100644 (file)
index b6c4b37..0000000
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-
-
-/*
- * Locking scheme:
- *  - all ACL updates are protected by inode->i_mutex, which is taken before
- *    calling into this file.
- */
-
-STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
-{
-       struct posix_acl_entry *acl_e;
-       struct posix_acl *acl;
-       struct xfs_acl_entry *ace;
-       int count, i;
-
-       count = be32_to_cpu(aclp->acl_cnt);
-
-       acl = posix_acl_alloc(count, GFP_KERNEL);
-       if (!acl)
-               return ERR_PTR(-ENOMEM);
-
-       for (i = 0; i < count; i++) {
-               acl_e = &acl->a_entries[i];
-               ace = &aclp->acl_entry[i];
-
-               /*
-                * The tag is 32 bits on disk and 16 bits in core.
-                *
-                * Because every access to it goes through the core
-                * format first this is not a problem.
-                */
-               acl_e->e_tag = be32_to_cpu(ace->ae_tag);
-               acl_e->e_perm = be16_to_cpu(ace->ae_perm);
-
-               switch (acl_e->e_tag) {
-               case ACL_USER:
-               case ACL_GROUP:
-                       acl_e->e_id = be32_to_cpu(ace->ae_id);
-                       break;
-               case ACL_USER_OBJ:
-               case ACL_GROUP_OBJ:
-               case ACL_MASK:
-               case ACL_OTHER:
-                       acl_e->e_id = ACL_UNDEFINED_ID;
-                       break;
-               default:
-                       goto fail;
-               }
-       }
-       return acl;
-
-fail:
-       posix_acl_release(acl);
-       return ERR_PTR(-EINVAL);
-}
-
-STATIC void
-xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
-{
-       const struct posix_acl_entry *acl_e;
-       struct xfs_acl_entry *ace;
-       int i;
-
-       aclp->acl_cnt = cpu_to_be32(acl->a_count);
-       for (i = 0; i < acl->a_count; i++) {
-               ace = &aclp->acl_entry[i];
-               acl_e = &acl->a_entries[i];
-
-               ace->ae_tag = cpu_to_be32(acl_e->e_tag);
-               ace->ae_id = cpu_to_be32(acl_e->e_id);
-               ace->ae_perm = cpu_to_be16(acl_e->e_perm);
-       }
-}
-
-struct posix_acl *
-xfs_get_acl(struct inode *inode, int type)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       struct posix_acl *acl;
-       struct xfs_acl *xfs_acl;
-       int len = sizeof(struct xfs_acl);
-       unsigned char *ea_name;
-       int error;
-
-       acl = get_cached_acl(inode, type);
-       if (acl != ACL_NOT_CACHED)
-               return acl;
-
-       trace_xfs_get_acl(ip);
-
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               ea_name = SGI_ACL_FILE;
-               break;
-       case ACL_TYPE_DEFAULT:
-               ea_name = SGI_ACL_DEFAULT;
-               break;
-       default:
-               BUG();
-       }
-
-       /*
-        * If we have a cached ACLs value just return it, not need to
-        * go out to the disk.
-        */
-
-       xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-       if (!xfs_acl)
-               return ERR_PTR(-ENOMEM);
-
-       error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
-                                                       &len, ATTR_ROOT);
-       if (error) {
-               /*
-                * If the attribute doesn't exist make sure we have a negative
-                * cache entry, for any other error assume it is transient and
-                * leave the cache entry as ACL_NOT_CACHED.
-                */
-               if (error == -ENOATTR) {
-                       acl = NULL;
-                       goto out_update_cache;
-               }
-               goto out;
-       }
-
-       acl = xfs_acl_from_disk(xfs_acl);
-       if (IS_ERR(acl))
-               goto out;
-
- out_update_cache:
-       set_cached_acl(inode, type, acl);
- out:
-       kfree(xfs_acl);
-       return acl;
-}
-
-STATIC int
-xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       unsigned char *ea_name;
-       int error;
-
-       if (S_ISLNK(inode->i_mode))
-               return -EOPNOTSUPP;
-
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               ea_name = SGI_ACL_FILE;
-               break;
-       case ACL_TYPE_DEFAULT:
-               if (!S_ISDIR(inode->i_mode))
-                       return acl ? -EACCES : 0;
-               ea_name = SGI_ACL_DEFAULT;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (acl) {
-               struct xfs_acl *xfs_acl;
-               int len;
-
-               xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-               if (!xfs_acl)
-                       return -ENOMEM;
-
-               xfs_acl_to_disk(xfs_acl, acl);
-               len = sizeof(struct xfs_acl) -
-                       (sizeof(struct xfs_acl_entry) *
-                        (XFS_ACL_MAX_ENTRIES - acl->a_count));
-
-               error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
-                               len, ATTR_ROOT);
-
-               kfree(xfs_acl);
-       } else {
-               /*
-                * A NULL ACL argument means we want to remove the ACL.
-                */
-               error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
-
-               /*
-                * If the attribute didn't exist to start with that's fine.
-                */
-               if (error == -ENOATTR)
-                       error = 0;
-       }
-
-       if (!error)
-               set_cached_acl(inode, type, acl);
-       return error;
-}
-
-static int
-xfs_set_mode(struct inode *inode, umode_t mode)
-{
-       int error = 0;
-
-       if (mode != inode->i_mode) {
-               struct iattr iattr;
-
-               iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
-               iattr.ia_mode = mode;
-               iattr.ia_ctime = current_fs_time(inode->i_sb);
-
-               error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
-       }
-
-       return error;
-}
-
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
-       int len = sizeof(struct xfs_acl);
-
-       return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
-                           ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
-       return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
-       if (!S_ISDIR(inode->i_mode))
-               return 0;
-       return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
-/*
- * No need for i_mutex because the inode is not yet exposed to the VFS.
- */
-int
-xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
-{
-       umode_t mode = inode->i_mode;
-       int error = 0, inherit = 0;
-
-       if (S_ISDIR(inode->i_mode)) {
-               error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
-               if (error)
-                       goto out;
-       }
-
-       error = posix_acl_create(&acl, GFP_KERNEL, &mode);
-       if (error < 0)
-               return error;
-
-       /*
-        * If posix_acl_create returns a positive value we need to
-        * inherit a permission that can't be represented using the Unix
-        * mode bits and we actually need to set an ACL.
-        */
-       if (error > 0)
-               inherit = 1;
-
-       error = xfs_set_mode(inode, mode);
-       if (error)
-               goto out;
-
-       if (inherit)
-               error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-
-out:
-       posix_acl_release(acl);
-       return error;
-}
-
-int
-xfs_acl_chmod(struct inode *inode)
-{
-       struct posix_acl *acl;
-       int error;
-
-       if (S_ISLNK(inode->i_mode))
-               return -EOPNOTSUPP;
-
-       acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
-       if (IS_ERR(acl) || !acl)
-               return PTR_ERR(acl);
-
-       error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
-       if (error)
-               return error;
-
-       error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-       posix_acl_release(acl);
-       return error;
-}
-
-static int
-xfs_xattr_acl_get(struct dentry *dentry, const char *name,
-               void *value, size_t size, int type)
-{
-       struct posix_acl *acl;
-       int error;
-
-       acl = xfs_get_acl(dentry->d_inode, type);
-       if (IS_ERR(acl))
-               return PTR_ERR(acl);
-       if (acl == NULL)
-               return -ENODATA;
-
-       error = posix_acl_to_xattr(acl, value, size);
-       posix_acl_release(acl);
-
-       return error;
-}
-
-static int
-xfs_xattr_acl_set(struct dentry *dentry, const char *name,
-               const void *value, size_t size, int flags, int type)
-{
-       struct inode *inode = dentry->d_inode;
-       struct posix_acl *acl = NULL;
-       int error = 0;
-
-       if (flags & XATTR_CREATE)
-               return -EINVAL;
-       if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
-               return value ? -EACCES : 0;
-       if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
-               return -EPERM;
-
-       if (!value)
-               goto set_acl;
-
-       acl = posix_acl_from_xattr(value, size);
-       if (!acl) {
-               /*
-                * acl_set_file(3) may request that we set default ACLs with
-                * zero length -- defend (gracefully) against that here.
-                */
-               goto out;
-       }
-       if (IS_ERR(acl)) {
-               error = PTR_ERR(acl);
-               goto out;
-       }
-
-       error = posix_acl_valid(acl);
-       if (error)
-               goto out_release;
-
-       error = -EINVAL;
-       if (acl->a_count > XFS_ACL_MAX_ENTRIES)
-               goto out_release;
-
-       if (type == ACL_TYPE_ACCESS) {
-               umode_t mode = inode->i_mode;
-               error = posix_acl_equiv_mode(acl, &mode);
-
-               if (error <= 0) {
-                       posix_acl_release(acl);
-                       acl = NULL;
-
-                       if (error < 0)
-                               return error;
-               }
-
-               error = xfs_set_mode(inode, mode);
-               if (error)
-                       goto out_release;
-       }
-
- set_acl:
-       error = xfs_set_acl(inode, type, acl);
- out_release:
-       posix_acl_release(acl);
- out:
-       return error;
-}
-
-const struct xattr_handler xfs_xattr_acl_access_handler = {
-       .prefix = POSIX_ACL_XATTR_ACCESS,
-       .flags  = ACL_TYPE_ACCESS,
-       .get    = xfs_xattr_acl_get,
-       .set    = xfs_xattr_acl_set,
-};
-
-const struct xattr_handler xfs_xattr_acl_default_handler = {
-       .prefix = POSIX_ACL_XATTR_DEFAULT,
-       .flags  = ACL_TYPE_DEFAULT,
-       .get    = xfs_xattr_acl_get,
-       .set    = xfs_xattr_acl_set,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
deleted file mode 100644 (file)
index 63e971e..0000000
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_iomap.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include "xfs_bmap.h"
-#include <linux/gfp.h>
-#include <linux/mpage.h>
-#include <linux/pagevec.h>
-#include <linux/writeback.h>
-
-
-/*
- * Prime number of hash buckets since address is used as the key.
- */
-#define NVSYNC         37
-#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
-static wait_queue_head_t xfs_ioend_wq[NVSYNC];
-
-void __init
-xfs_ioend_init(void)
-{
-       int i;
-
-       for (i = 0; i < NVSYNC; i++)
-               init_waitqueue_head(&xfs_ioend_wq[i]);
-}
-
-void
-xfs_ioend_wait(
-       xfs_inode_t     *ip)
-{
-       wait_queue_head_t *wq = to_ioend_wq(ip);
-
-       wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
-}
-
-STATIC void
-xfs_ioend_wake(
-       xfs_inode_t     *ip)
-{
-       if (atomic_dec_and_test(&ip->i_iocount))
-               wake_up(to_ioend_wq(ip));
-}
-
-void
-xfs_count_page_state(
-       struct page             *page,
-       int                     *delalloc,
-       int                     *unwritten)
-{
-       struct buffer_head      *bh, *head;
-
-       *delalloc = *unwritten = 0;
-
-       bh = head = page_buffers(page);
-       do {
-               if (buffer_unwritten(bh))
-                       (*unwritten) = 1;
-               else if (buffer_delay(bh))
-                       (*delalloc) = 1;
-       } while ((bh = bh->b_this_page) != head);
-}
-
-STATIC struct block_device *
-xfs_find_bdev_for_inode(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-
-       if (XFS_IS_REALTIME_INODE(ip))
-               return mp->m_rtdev_targp->bt_bdev;
-       else
-               return mp->m_ddev_targp->bt_bdev;
-}
-
-/*
- * We're now finished for good with this ioend structure.
- * Update the page state via the associated buffer_heads,
- * release holds on the inode and bio, and finally free
- * up memory.  Do not use the ioend after this.
- */
-STATIC void
-xfs_destroy_ioend(
-       xfs_ioend_t             *ioend)
-{
-       struct buffer_head      *bh, *next;
-       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
-
-       for (bh = ioend->io_buffer_head; bh; bh = next) {
-               next = bh->b_private;
-               bh->b_end_io(bh, !ioend->io_error);
-       }
-
-       /*
-        * Volume managers supporting multiple paths can send back ENODEV
-        * when the final path disappears.  In this case continuing to fill
-        * the page cache with dirty data which cannot be written out is
-        * evil, so prevent that.
-        */
-       if (unlikely(ioend->io_error == -ENODEV)) {
-               xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
-                                     __FILE__, __LINE__);
-       }
-
-       xfs_ioend_wake(ip);
-       mempool_free(ioend, xfs_ioend_pool);
-}
-
-/*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
-       xfs_ioend_t             *ioend)
-{
-       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
-       xfs_fsize_t             isize;
-       xfs_fsize_t             bsize;
-
-       bsize = ioend->io_offset + ioend->io_size;
-       isize = MAX(ip->i_size, ip->i_new_size);
-       isize = MIN(isize, bsize);
-       return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
- * Update on-disk file size now that data has been written to disk.  The
- * current in-memory file size is i_size.  If a write is beyond eof i_new_size
- * will be the intended file size until i_size is updated.  If this write does
- * not extend all the way to the valid file size then restrict this update to
- * the end of the write.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
- */
-STATIC int
-xfs_setfilesize(
-       xfs_ioend_t             *ioend)
-{
-       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
-       xfs_fsize_t             isize;
-
-       if (unlikely(ioend->io_error))
-               return 0;
-
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
-               return EAGAIN;
-
-       isize = xfs_ioend_new_eof(ioend);
-       if (isize) {
-               trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
-               ip->i_d.di_size = isize;
-               xfs_mark_inode_dirty(ip);
-       }
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return 0;
-}
-
-/*
- * Schedule IO completion handling on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend(
-       struct xfs_ioend        *ioend)
-{
-       if (atomic_dec_and_test(&ioend->io_remaining)) {
-               if (ioend->io_type == IO_UNWRITTEN)
-                       queue_work(xfsconvertd_workqueue, &ioend->io_work);
-               else
-                       queue_work(xfsdatad_workqueue, &ioend->io_work);
-       }
-}
-
-/*
- * IO write completion.
- */
-STATIC void
-xfs_end_io(
-       struct work_struct *work)
-{
-       xfs_ioend_t     *ioend = container_of(work, xfs_ioend_t, io_work);
-       struct xfs_inode *ip = XFS_I(ioend->io_inode);
-       int             error = 0;
-
-       /*
-        * For unwritten extents we need to issue transactions to convert a
-        * range to normal written extens after the data I/O has finished.
-        */
-       if (ioend->io_type == IO_UNWRITTEN &&
-           likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
-
-               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
-                                                ioend->io_size);
-               if (error)
-                       ioend->io_error = error;
-       }
-
-       /*
-        * We might have to update the on-disk file size after extending
-        * writes.
-        */
-       error = xfs_setfilesize(ioend);
-       ASSERT(!error || error == EAGAIN);
-
-       /*
-        * If we didn't complete processing of the ioend, requeue it to the
-        * tail of the workqueue for another attempt later. Otherwise destroy
-        * it.
-        */
-       if (error == EAGAIN) {
-               atomic_inc(&ioend->io_remaining);
-               xfs_finish_ioend(ioend);
-               /* ensure we don't spin on blocked ioends */
-               delay(1);
-       } else {
-               if (ioend->io_iocb)
-                       aio_complete(ioend->io_iocb, ioend->io_result, 0);
-               xfs_destroy_ioend(ioend);
-       }
-}
-
-/*
- * Call IO completion handling in caller context on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend_sync(
-       struct xfs_ioend        *ioend)
-{
-       if (atomic_dec_and_test(&ioend->io_remaining))
-               xfs_end_io(&ioend->io_work);
-}
-
-/*
- * Allocate and initialise an IO completion structure.
- * We need to track unwritten extent write completion here initially.
- * We'll need to extend this for updating the ondisk inode size later
- * (vs. incore size).
- */
-STATIC xfs_ioend_t *
-xfs_alloc_ioend(
-       struct inode            *inode,
-       unsigned int            type)
-{
-       xfs_ioend_t             *ioend;
-
-       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
-
-       /*
-        * Set the count to 1 initially, which will prevent an I/O
-        * completion callback from happening before we have started
-        * all the I/O from calling the completion routine too early.
-        */
-       atomic_set(&ioend->io_remaining, 1);
-       ioend->io_error = 0;
-       ioend->io_list = NULL;
-       ioend->io_type = type;
-       ioend->io_inode = inode;
-       ioend->io_buffer_head = NULL;
-       ioend->io_buffer_tail = NULL;
-       atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
-       ioend->io_offset = 0;
-       ioend->io_size = 0;
-       ioend->io_iocb = NULL;
-       ioend->io_result = 0;
-
-       INIT_WORK(&ioend->io_work, xfs_end_io);
-       return ioend;
-}
-
-STATIC int
-xfs_map_blocks(
-       struct inode            *inode,
-       loff_t                  offset,
-       struct xfs_bmbt_irec    *imap,
-       int                     type,
-       int                     nonblocking)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       ssize_t                 count = 1 << inode->i_blkbits;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       int                     error = 0;
-       int                     bmapi_flags = XFS_BMAPI_ENTIRE;
-       int                     nimaps = 1;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (type == IO_UNWRITTEN)
-               bmapi_flags |= XFS_BMAPI_IGSTATE;
-
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-               if (nonblocking)
-                       return -XFS_ERROR(EAGAIN);
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-       }
-
-       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
-              (ip->i_df.if_flags & XFS_IFEXTENTS));
-       ASSERT(offset <= mp->m_maxioffset);
-
-       if (offset + count > mp->m_maxioffset)
-               count = mp->m_maxioffset - offset;
-       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
-                         bmapi_flags,  NULL, 0, imap, &nimaps, NULL);
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       if (error)
-               return -XFS_ERROR(error);
-
-       if (type == IO_DELALLOC &&
-           (!nimaps || isnullstartblock(imap->br_startblock))) {
-               error = xfs_iomap_write_allocate(ip, offset, count, imap);
-               if (!error)
-                       trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
-               return -XFS_ERROR(error);
-       }
-
-#ifdef DEBUG
-       if (type == IO_UNWRITTEN) {
-               ASSERT(nimaps);
-               ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-               ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-       }
-#endif
-       if (nimaps)
-               trace_xfs_map_blocks_found(ip, offset, count, type, imap);
-       return 0;
-}
-
-STATIC int
-xfs_imap_valid(
-       struct inode            *inode,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       offset >>= inode->i_blkbits;
-
-       return offset >= imap->br_startoff &&
-               offset < imap->br_startoff + imap->br_blockcount;
-}
-
-/*
- * BIO completion handler for buffered IO.
- */
-STATIC void
-xfs_end_bio(
-       struct bio              *bio,
-       int                     error)
-{
-       xfs_ioend_t             *ioend = bio->bi_private;
-
-       ASSERT(atomic_read(&bio->bi_cnt) >= 1);
-       ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
-
-       /* Toss bio and pass work off to an xfsdatad thread */
-       bio->bi_private = NULL;
-       bio->bi_end_io = NULL;
-       bio_put(bio);
-
-       xfs_finish_ioend(ioend);
-}
-
-STATIC void
-xfs_submit_ioend_bio(
-       struct writeback_control *wbc,
-       xfs_ioend_t             *ioend,
-       struct bio              *bio)
-{
-       atomic_inc(&ioend->io_remaining);
-       bio->bi_private = ioend;
-       bio->bi_end_io = xfs_end_bio;
-
-       /*
-        * If the I/O is beyond EOF we mark the inode dirty immediately
-        * but don't update the inode size until I/O completion.
-        */
-       if (xfs_ioend_new_eof(ioend))
-               xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
-
-       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
-}
-
-STATIC struct bio *
-xfs_alloc_ioend_bio(
-       struct buffer_head      *bh)
-{
-       int                     nvecs = bio_get_nr_vecs(bh->b_bdev);
-       struct bio              *bio = bio_alloc(GFP_NOIO, nvecs);
-
-       ASSERT(bio->bi_private == NULL);
-       bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-       bio->bi_bdev = bh->b_bdev;
-       return bio;
-}
-
-STATIC void
-xfs_start_buffer_writeback(
-       struct buffer_head      *bh)
-{
-       ASSERT(buffer_mapped(bh));
-       ASSERT(buffer_locked(bh));
-       ASSERT(!buffer_delay(bh));
-       ASSERT(!buffer_unwritten(bh));
-
-       mark_buffer_async_write(bh);
-       set_buffer_uptodate(bh);
-       clear_buffer_dirty(bh);
-}
-
-STATIC void
-xfs_start_page_writeback(
-       struct page             *page,
-       int                     clear_dirty,
-       int                     buffers)
-{
-       ASSERT(PageLocked(page));
-       ASSERT(!PageWriteback(page));
-       if (clear_dirty)
-               clear_page_dirty_for_io(page);
-       set_page_writeback(page);
-       unlock_page(page);
-       /* If no buffers on the page are to be written, finish it here */
-       if (!buffers)
-               end_page_writeback(page);
-}
-
-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
-{
-       return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-}
-
-/*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
- */
-STATIC void
-xfs_submit_ioend(
-       struct writeback_control *wbc,
-       xfs_ioend_t             *ioend)
-{
-       xfs_ioend_t             *head = ioend;
-       xfs_ioend_t             *next;
-       struct buffer_head      *bh;
-       struct bio              *bio;
-       sector_t                lastblock = 0;
-
-       /* Pass 1 - start writeback */
-       do {
-               next = ioend->io_list;
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
-                       xfs_start_buffer_writeback(bh);
-       } while ((ioend = next) != NULL);
-
-       /* Pass 2 - submit I/O */
-       ioend = head;
-       do {
-               next = ioend->io_list;
-               bio = NULL;
-
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
-                       if (!bio) {
- retry:
-                               bio = xfs_alloc_ioend_bio(bh);
-                       } else if (bh->b_blocknr != lastblock + 1) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       if (bio_add_buffer(bio, bh) != bh->b_size) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       lastblock = bh->b_blocknr;
-               }
-               if (bio)
-                       xfs_submit_ioend_bio(wbc, ioend, bio);
-               xfs_finish_ioend(ioend);
-       } while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too.  Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
-       xfs_ioend_t             *ioend)
-{
-       xfs_ioend_t             *next;
-       struct buffer_head      *bh, *next_bh;
-
-       do {
-               next = ioend->io_list;
-               bh = ioend->io_buffer_head;
-               do {
-                       next_bh = bh->b_private;
-                       clear_buffer_async_write(bh);
-                       unlock_buffer(bh);
-               } while ((bh = next_bh) != NULL);
-
-               xfs_ioend_wake(XFS_I(ioend->io_inode));
-               mempool_free(ioend, xfs_ioend_pool);
-       } while ((ioend = next) != NULL);
-}
-
-/*
- * Test to see if we've been building up a completion structure for
- * earlier buffers -- if so, we try to append to this ioend if we
- * can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
- */
-STATIC void
-xfs_add_to_ioend(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       xfs_off_t               offset,
-       unsigned int            type,
-       xfs_ioend_t             **result,
-       int                     need_ioend)
-{
-       xfs_ioend_t             *ioend = *result;
-
-       if (!ioend || need_ioend || type != ioend->io_type) {
-               xfs_ioend_t     *previous = *result;
-
-               ioend = xfs_alloc_ioend(inode, type);
-               ioend->io_offset = offset;
-               ioend->io_buffer_head = bh;
-               ioend->io_buffer_tail = bh;
-               if (previous)
-                       previous->io_list = ioend;
-               *result = ioend;
-       } else {
-               ioend->io_buffer_tail->b_private = bh;
-               ioend->io_buffer_tail = bh;
-       }
-
-       bh->b_private = NULL;
-       ioend->io_size += bh->b_size;
-}
-
-STATIC void
-xfs_map_buffer(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       sector_t                bn;
-       struct xfs_mount        *m = XFS_I(inode)->i_mount;
-       xfs_off_t               iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
-       xfs_daddr_t             iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
-
-       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-       bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
-             ((offset - iomap_offset) >> inode->i_blkbits);
-
-       ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
-
-       bh->b_blocknr = bn;
-       set_buffer_mapped(bh);
-}
-
-STATIC void
-xfs_map_at_offset(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-       xfs_map_buffer(inode, bh, imap, offset);
-       set_buffer_mapped(bh);
-       clear_buffer_delay(bh);
-       clear_buffer_unwritten(bh);
-}
-
-/*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
- */
-STATIC int
-xfs_is_delayed_page(
-       struct page             *page,
-       unsigned int            type)
-{
-       if (PageWriteback(page))
-               return 0;
-
-       if (page->mapping && page_has_buffers(page)) {
-               struct buffer_head      *bh, *head;
-               int                     acceptable = 0;
-
-               bh = head = page_buffers(page);
-               do {
-                       if (buffer_unwritten(bh))
-                               acceptable = (type == IO_UNWRITTEN);
-                       else if (buffer_delay(bh))
-                               acceptable = (type == IO_DELALLOC);
-                       else if (buffer_dirty(bh) && buffer_mapped(bh))
-                               acceptable = (type == IO_OVERWRITE);
-                       else
-                               break;
-               } while ((bh = bh->b_this_page) != head);
-
-               if (acceptable)
-                       return 1;
-       }
-
-       return 0;
-}
-
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
-       struct inode            *inode,
-       struct page             *page,
-       loff_t                  tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc)
-{
-       struct buffer_head      *bh, *head;
-       xfs_off_t               end_offset;
-       unsigned long           p_offset;
-       unsigned int            type;
-       int                     len, page_dirty;
-       int                     count = 0, done = 0, uptodate = 1;
-       xfs_off_t               offset = page_offset(page);
-
-       if (page->index != tindex)
-               goto fail;
-       if (!trylock_page(page))
-               goto fail;
-       if (PageWriteback(page))
-               goto fail_unlock_page;
-       if (page->mapping != inode->i_mapping)
-               goto fail_unlock_page;
-       if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
-               goto fail_unlock_page;
-
-       /*
-        * page_dirty is initially a count of buffers on the page before
-        * EOF and is decremented as we move each into a cleanable state.
-        *
-        * Derivation:
-        *
-        * End offset is the highest offset that this page should represent.
-        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
-        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
-        * hence give us the correct page_dirty count. On any other page,
-        * it will be zero and in that case we need page_dirty to be the
-        * count of buffers on the page.
-        */
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       i_size_read(inode));
-
-       len = 1 << inode->i_blkbits;
-       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-                                       PAGE_CACHE_SIZE);
-       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
-       page_dirty = p_offset / len;
-
-       bh = head = page_buffers(page);
-       do {
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-               if (!(PageUptodate(page) || buffer_uptodate(bh))) {
-                       done = 1;
-                       continue;
-               }
-
-               if (buffer_unwritten(bh) || buffer_delay(bh) ||
-                   buffer_mapped(bh)) {
-                       if (buffer_unwritten(bh))
-                               type = IO_UNWRITTEN;
-                       else if (buffer_delay(bh))
-                               type = IO_DELALLOC;
-                       else
-                               type = IO_OVERWRITE;
-
-                       if (!xfs_imap_valid(inode, imap, offset)) {
-                               done = 1;
-                               continue;
-                       }
-
-                       lock_buffer(bh);
-                       if (type != IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type,
-                                        ioendp, done);
-
-                       page_dirty--;
-                       count++;
-               } else {
-                       done = 1;
-               }
-       } while (offset += len, (bh = bh->b_this_page) != head);
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       if (count) {
-               if (--wbc->nr_to_write <= 0 &&
-                   wbc->sync_mode == WB_SYNC_NONE)
-                       done = 1;
-       }
-       xfs_start_page_writeback(page, !page_dirty, count);
-
-       return done;
- fail_unlock_page:
-       unlock_page(page);
- fail:
-       return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
-       struct inode            *inode,
-       pgoff_t                 tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc,
-       pgoff_t                 tlast)
-{
-       struct pagevec          pvec;
-       int                     done = 0, i;
-
-       pagevec_init(&pvec, 0);
-       while (!done && tindex <= tlast) {
-               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
-               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
-                       break;
-
-               for (i = 0; i < pagevec_count(&pvec); i++) {
-                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-                                       imap, ioendp, wbc);
-                       if (done)
-                               break;
-               }
-
-               pagevec_release(&pvec);
-               cond_resched();
-       }
-}
-
-STATIC void
-xfs_vm_invalidatepage(
-       struct page             *page,
-       unsigned long           offset)
-{
-       trace_xfs_invalidatepage(page->mapping->host, page, offset);
-       block_invalidatepage(page, offset);
-}
-
-/*
- * If the page has delalloc buffers on it, we need to punch them out before we
- * invalidate the page. If we don't, we leave a stale delalloc mapping on the
- * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
- * is done on that same region - the delalloc extent is returned when none is
- * supposed to be there.
- *
- * We prevent this by truncating away the delalloc regions on the page before
- * invalidating it. Because they are delalloc, we can do this without needing a
- * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
- * truncation without a transaction as there is no space left for block
- * reservation (typically why we see a ENOSPC in writeback).
- *
- * This is not a performance critical path, so for now just do the punching a
- * buffer head at a time.
- */
-STATIC void
-xfs_aops_discard_page(
-       struct page             *page)
-{
-       struct inode            *inode = page->mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct buffer_head      *bh, *head;
-       loff_t                  offset = page_offset(page);
-
-       if (!xfs_is_delayed_page(page, IO_DELALLOC))
-               goto out_invalidate;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               goto out_invalidate;
-
-       xfs_alert(ip->i_mount,
-               "page discard on page %p, inode 0x%llx, offset %llu.",
-                       page, ip->i_ino, offset);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       bh = head = page_buffers(page);
-       do {
-               int             error;
-               xfs_fileoff_t   start_fsb;
-
-               if (!buffer_delay(bh))
-                       goto next_buffer;
-
-               start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-               error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
-               if (error) {
-                       /* something screwed, just bail */
-                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                               xfs_alert(ip->i_mount,
-                       "page discard unable to remove delalloc mapping.");
-                       }
-                       break;
-               }
-next_buffer:
-               offset += 1 << inode->i_blkbits;
-
-       } while ((bh = bh->b_this_page) != head);
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_invalidate:
-       xfs_vm_invalidatepage(page, 0);
-       return;
-}
-
-/*
- * Write out a dirty page.
- *
- * For delalloc space on the page we need to allocate space and flush it.
- * For unwritten space on the page we need to start the conversion to
- * regular allocated space.
- * For any other dirty buffer heads on the page we should flush them.
- */
-STATIC int
-xfs_vm_writepage(
-       struct page             *page,
-       struct writeback_control *wbc)
-{
-       struct inode            *inode = page->mapping->host;
-       struct buffer_head      *bh, *head;
-       struct xfs_bmbt_irec    imap;
-       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
-       loff_t                  offset;
-       unsigned int            type;
-       __uint64_t              end_offset;
-       pgoff_t                 end_index, last_index;
-       ssize_t                 len;
-       int                     err, imap_valid = 0, uptodate = 1;
-       int                     count = 0;
-       int                     nonblocking = 0;
-
-       trace_xfs_writepage(inode, page, 0);
-
-       ASSERT(page_has_buffers(page));
-
-       /*
-        * Refuse to write the page out if we are called from reclaim context.
-        *
-        * This avoids stack overflows when called from deeply used stacks in
-        * random callers for direct reclaim or memcg reclaim.  We explicitly
-        * allow reclaim from kswapd as the stack usage there is relatively low.
-        *
-        * This should really be done by the core VM, but until that happens
-        * filesystems like XFS, btrfs and ext4 have to take care of this
-        * by themselves.
-        */
-       if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
-               goto redirty;
-
-       /*
-        * Given that we do not allow direct reclaim to call us, we should
-        * never be called while in a filesystem transaction.
-        */
-       if (WARN_ON(current->flags & PF_FSTRANS))
-               goto redirty;
-
-       /* Is this page beyond the end of the file? */
-       offset = i_size_read(inode);
-       end_index = offset >> PAGE_CACHE_SHIFT;
-       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-       if (page->index >= end_index) {
-               if ((page->index >= end_index + 1) ||
-                   !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
-                       unlock_page(page);
-                       return 0;
-               }
-       }
-
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       offset);
-       len = 1 << inode->i_blkbits;
-
-       bh = head = page_buffers(page);
-       offset = page_offset(page);
-       type = IO_OVERWRITE;
-
-       if (wbc->sync_mode == WB_SYNC_NONE)
-               nonblocking = 1;
-
-       do {
-               int new_ioend = 0;
-
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-
-               /*
-                * set_page_dirty dirties all buffers in a page, independent
-                * of their state.  The dirty state however is entirely
-                * meaningless for holes (!mapped && uptodate), so skip
-                * buffers covering holes here.
-                */
-               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-                       imap_valid = 0;
-                       continue;
-               }
-
-               if (buffer_unwritten(bh)) {
-                       if (type != IO_UNWRITTEN) {
-                               type = IO_UNWRITTEN;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_delay(bh)) {
-                       if (type != IO_DELALLOC) {
-                               type = IO_DELALLOC;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_uptodate(bh)) {
-                       if (type != IO_OVERWRITE) {
-                               type = IO_OVERWRITE;
-                               imap_valid = 0;
-                       }
-               } else {
-                       if (PageUptodate(page)) {
-                               ASSERT(buffer_mapped(bh));
-                               imap_valid = 0;
-                       }
-                       continue;
-               }
-
-               if (imap_valid)
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               if (!imap_valid) {
-                       /*
-                        * If we didn't have a valid mapping then we need to
-                        * put the new mapping into a separate ioend structure.
-                        * This ensures non-contiguous extents always have
-                        * separate ioends, which is particularly important
-                        * for unwritten extent conversion at I/O completion
-                        * time.
-                        */
-                       new_ioend = 1;
-                       err = xfs_map_blocks(inode, offset, &imap, type,
-                                            nonblocking);
-                       if (err)
-                               goto error;
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               }
-               if (imap_valid) {
-                       lock_buffer(bh);
-                       if (type != IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, &imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type, &ioend,
-                                        new_ioend);
-                       count++;
-               }
-
-               if (!iohead)
-                       iohead = ioend;
-
-       } while (offset += len, ((bh = bh->b_this_page) != head));
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       xfs_start_page_writeback(page, 1, count);
-
-       if (ioend && imap_valid) {
-               xfs_off_t               end_index;
-
-               end_index = imap.br_startoff + imap.br_blockcount;
-
-               /* to bytes */
-               end_index <<= inode->i_blkbits;
-
-               /* to pages */
-               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
-               /* check against file size */
-               if (end_index > last_index)
-                       end_index = last_index;
-
-               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
-                                 wbc, end_index);
-       }
-
-       if (iohead)
-               xfs_submit_ioend(wbc, iohead);
-
-       return 0;
-
-error:
-       if (iohead)
-               xfs_cancel_ioend(iohead);
-
-       if (err == -EAGAIN)
-               goto redirty;
-
-       xfs_aops_discard_page(page);
-       ClearPageUptodate(page);
-       unlock_page(page);
-       return err;
-
-redirty:
-       redirty_page_for_writepage(wbc, page);
-       unlock_page(page);
-       return 0;
-}
-
-STATIC int
-xfs_vm_writepages(
-       struct address_space    *mapping,
-       struct writeback_control *wbc)
-{
-       xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
-       return generic_writepages(mapping, wbc);
-}
-
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. The page should already be clean. We always
- * have buffer heads in this call.
- *
- * Returns 1 if the page is ok to release, 0 otherwise.
- */
-STATIC int
-xfs_vm_releasepage(
-       struct page             *page,
-       gfp_t                   gfp_mask)
-{
-       int                     delalloc, unwritten;
-
-       trace_xfs_releasepage(page->mapping->host, page, 0);
-
-       xfs_count_page_state(page, &delalloc, &unwritten);
-
-       if (WARN_ON(delalloc))
-               return 0;
-       if (WARN_ON(unwritten))
-               return 0;
-
-       return try_to_free_buffers(page);
-}
-
-STATIC int
-__xfs_get_blocks(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create,
-       int                     direct)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       int                     error = 0;
-       int                     lockmode = 0;
-       struct xfs_bmbt_irec    imap;
-       int                     nimaps = 1;
-       xfs_off_t               offset;
-       ssize_t                 size;
-       int                     new = 0;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       offset = (xfs_off_t)iblock << inode->i_blkbits;
-       ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
-       size = bh_result->b_size;
-
-       if (!create && direct && offset >= i_size_read(inode))
-               return 0;
-
-       if (create) {
-               lockmode = XFS_ILOCK_EXCL;
-               xfs_ilock(ip, lockmode);
-       } else {
-               lockmode = xfs_ilock_map_shared(ip);
-       }
-
-       ASSERT(offset <= mp->m_maxioffset);
-       if (offset + size > mp->m_maxioffset)
-               size = mp->m_maxioffset - offset;
-       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
-       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
-                         XFS_BMAPI_ENTIRE,  NULL, 0, &imap, &nimaps, NULL);
-       if (error)
-               goto out_unlock;
-
-       if (create &&
-           (!nimaps ||
-            (imap.br_startblock == HOLESTARTBLOCK ||
-             imap.br_startblock == DELAYSTARTBLOCK))) {
-               if (direct) {
-                       error = xfs_iomap_write_direct(ip, offset, size,
-                                                      &imap, nimaps);
-               } else {
-                       error = xfs_iomap_write_delay(ip, offset, size, &imap);
-               }
-               if (error)
-                       goto out_unlock;
-
-               trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
-       } else if (nimaps) {
-               trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
-       } else {
-               trace_xfs_get_blocks_notfound(ip, offset, size);
-               goto out_unlock;
-       }
-       xfs_iunlock(ip, lockmode);
-
-       if (imap.br_startblock != HOLESTARTBLOCK &&
-           imap.br_startblock != DELAYSTARTBLOCK) {
-               /*
-                * For unwritten extents do not report a disk address on
-                * the read case (treat as if we're reading into a hole).
-                */
-               if (create || !ISUNWRITTEN(&imap))
-                       xfs_map_buffer(inode, bh_result, &imap, offset);
-               if (create && ISUNWRITTEN(&imap)) {
-                       if (direct)
-                               bh_result->b_private = inode;
-                       set_buffer_unwritten(bh_result);
-               }
-       }
-
-       /*
-        * If this is a realtime file, data may be on a different device.
-        * to that pointed to from the buffer_head b_bdev currently.
-        */
-       bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
-
-       /*
-        * If we previously allocated a block out beyond eof and we are now
-        * coming back to use it then we will need to flag it as new even if it
-        * has a disk address.
-        *
-        * With sub-block writes into unwritten extents we also need to mark
-        * the buffer as new so that the unwritten parts of the buffer gets
-        * correctly zeroed.
-        */
-       if (create &&
-           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
-            (offset >= i_size_read(inode)) ||
-            (new || ISUNWRITTEN(&imap))))
-               set_buffer_new(bh_result);
-
-       if (imap.br_startblock == DELAYSTARTBLOCK) {
-               BUG_ON(direct);
-               if (create) {
-                       set_buffer_uptodate(bh_result);
-                       set_buffer_mapped(bh_result);
-                       set_buffer_delay(bh_result);
-               }
-       }
-
-       /*
-        * If this is O_DIRECT or the mpage code calling tell them how large
-        * the mapping is, so that we can avoid repeated get_blocks calls.
-        */
-       if (direct || size > (1 << inode->i_blkbits)) {
-               xfs_off_t               mapping_size;
-
-               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
-               mapping_size <<= inode->i_blkbits;
-
-               ASSERT(mapping_size > 0);
-               if (mapping_size > size)
-                       mapping_size = size;
-               if (mapping_size > LONG_MAX)
-                       mapping_size = LONG_MAX;
-
-               bh_result->b_size = mapping_size;
-       }
-
-       return 0;
-
-out_unlock:
-       xfs_iunlock(ip, lockmode);
-       return -error;
-}
-
-int
-xfs_get_blocks(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
-{
-       return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
-}
-
-STATIC int
-xfs_get_blocks_direct(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
-{
-       return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
-}
-
-/*
- * Complete a direct I/O write request.
- *
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
- * need to issue a transaction to convert the range from unwritten to written
- * extents.  In case this is regular synchronous I/O we just call xfs_end_io
- * to do this and we are done.  But in case this was a successful AIO
- * request this handler is called from interrupt context, from which we
- * can't start transactions.  In that case offload the I/O completion to
- * the workqueues we also use for buffered I/O completion.
- */
-STATIC void
-xfs_end_io_direct_write(
-       struct kiocb            *iocb,
-       loff_t                  offset,
-       ssize_t                 size,
-       void                    *private,
-       int                     ret,
-       bool                    is_async)
-{
-       struct xfs_ioend        *ioend = iocb->private;
-
-       /*
-        * blockdev_direct_IO can return an error even after the I/O
-        * completion handler was called.  Thus we need to protect
-        * against double-freeing.
-        */
-       iocb->private = NULL;
-
-       ioend->io_offset = offset;
-       ioend->io_size = size;
-       if (private && size > 0)
-               ioend->io_type = IO_UNWRITTEN;
-
-       if (is_async) {
-               /*
-                * If we are converting an unwritten extent we need to delay
-                * the AIO completion until after the unwrittent extent
-                * conversion has completed, otherwise do it ASAP.
-                */
-               if (ioend->io_type == IO_UNWRITTEN) {
-                       ioend->io_iocb = iocb;
-                       ioend->io_result = ret;
-               } else {
-                       aio_complete(iocb, ret, 0);
-               }
-               xfs_finish_ioend(ioend);
-       } else {
-               xfs_finish_ioend_sync(ioend);
-       }
-
-       /* XXX: probably should move into the real I/O completion handler */
-       inode_dio_done(ioend->io_inode);
-}
-
-STATIC ssize_t
-xfs_vm_direct_IO(
-       int                     rw,
-       struct kiocb            *iocb,
-       const struct iovec      *iov,
-       loff_t                  offset,
-       unsigned long           nr_segs)
-{
-       struct inode            *inode = iocb->ki_filp->f_mapping->host;
-       struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
-       ssize_t                 ret;
-
-       if (rw & WRITE) {
-               iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
-
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
-                                           xfs_end_io_direct_write, NULL, 0);
-               if (ret != -EIOCBQUEUED && iocb->private)
-                       xfs_destroy_ioend(iocb->private);
-       } else {
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
-                                           NULL, NULL, 0);
-       }
-
-       return ret;
-}
-
-STATIC void
-xfs_vm_write_failed(
-       struct address_space    *mapping,
-       loff_t                  to)
-{
-       struct inode            *inode = mapping->host;
-
-       if (to > inode->i_size) {
-               /*
-                * punch out the delalloc blocks we have already allocated. We
-                * don't call xfs_setattr() to do this as we may be in the
-                * middle of a multi-iovec write and so the vfs inode->i_size
-                * will not match the xfs ip->i_size and so it will zero too
-                * much. Hence we jus truncate the page cache to zero what is
-                * necessary and punch the delalloc blocks directly.
-                */
-               struct xfs_inode        *ip = XFS_I(inode);
-               xfs_fileoff_t           start_fsb;
-               xfs_fileoff_t           end_fsb;
-               int                     error;
-
-               truncate_pagecache(inode, to, inode->i_size);
-
-               /*
-                * Check if there are any blocks that are outside of i_size
-                * that need to be trimmed back.
-                */
-               start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
-               end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
-               if (end_fsb <= start_fsb)
-                       return;
-
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-               error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-                                                       end_fsb - start_fsb);
-               if (error) {
-                       /* something screwed, just bail */
-                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                               xfs_alert(ip->i_mount,
-                       "xfs_vm_write_failed: unable to clean up ino %lld",
-                                               ip->i_ino);
-                       }
-               }
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-STATIC int
-xfs_vm_write_begin(
-       struct file             *file,
-       struct address_space    *mapping,
-       loff_t                  pos,
-       unsigned                len,
-       unsigned                flags,
-       struct page             **pagep,
-       void                    **fsdata)
-{
-       int                     ret;
-
-       ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
-                               pagep, xfs_get_blocks);
-       if (unlikely(ret))
-               xfs_vm_write_failed(mapping, pos + len);
-       return ret;
-}
-
-STATIC int
-xfs_vm_write_end(
-       struct file             *file,
-       struct address_space    *mapping,
-       loff_t                  pos,
-       unsigned                len,
-       unsigned                copied,
-       struct page             *page,
-       void                    *fsdata)
-{
-       int                     ret;
-
-       ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-       if (unlikely(ret < len))
-               xfs_vm_write_failed(mapping, pos + len);
-       return ret;
-}
-
-STATIC sector_t
-xfs_vm_bmap(
-       struct address_space    *mapping,
-       sector_t                block)
-{
-       struct inode            *inode = (struct inode *)mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-
-       trace_xfs_vm_bmap(XFS_I(inode));
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-       return generic_block_bmap(mapping, block, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpage(
-       struct file             *unused,
-       struct page             *page)
-{
-       return mpage_readpage(page, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpages(
-       struct file             *unused,
-       struct address_space    *mapping,
-       struct list_head        *pages,
-       unsigned                nr_pages)
-{
-       return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
-}
-
-const struct address_space_operations xfs_address_space_operations = {
-       .readpage               = xfs_vm_readpage,
-       .readpages              = xfs_vm_readpages,
-       .writepage              = xfs_vm_writepage,
-       .writepages             = xfs_vm_writepages,
-       .releasepage            = xfs_vm_releasepage,
-       .invalidatepage         = xfs_vm_invalidatepage,
-       .write_begin            = xfs_vm_write_begin,
-       .write_end              = xfs_vm_write_end,
-       .bmap                   = xfs_vm_bmap,
-       .direct_IO              = xfs_vm_direct_IO,
-       .migratepage            = buffer_migrate_page,
-       .is_partially_uptodate  = block_is_partially_uptodate,
-       .error_remove_page      = generic_error_remove_page,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
deleted file mode 100644 (file)
index 71f721e..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_AOPS_H__
-#define __XFS_AOPS_H__
-
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
-extern mempool_t *xfs_ioend_pool;
-
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- */
-enum {
-       IO_DIRECT = 0,  /* special case for direct I/O ioends */
-       IO_DELALLOC,    /* mapping covers delalloc region */
-       IO_UNWRITTEN,   /* mapping covers allocated but uninitialized data */
-       IO_OVERWRITE,   /* mapping covers already allocated extent */
-};
-
-#define XFS_IO_TYPES \
-       { 0,                    "" }, \
-       { IO_DELALLOC,          "delalloc" }, \
-       { IO_UNWRITTEN,         "unwritten" }, \
-       { IO_OVERWRITE,         "overwrite" }
-
-/*
- * xfs_ioend struct manages large extent writes for XFS.
- * It can manage several multi-page bio's at once.
- */
-typedef struct xfs_ioend {
-       struct xfs_ioend        *io_list;       /* next ioend in chain */
-       unsigned int            io_type;        /* delalloc / unwritten */
-       int                     io_error;       /* I/O error code */
-       atomic_t                io_remaining;   /* hold count */
-       struct inode            *io_inode;      /* file being written to */
-       struct buffer_head      *io_buffer_head;/* buffer linked list head */
-       struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
-       size_t                  io_size;        /* size of the extent */
-       xfs_off_t               io_offset;      /* offset in the file */
-       struct work_struct      io_work;        /* xfsdatad work queue */
-       struct kiocb            *io_iocb;
-       int                     io_result;
-} xfs_ioend_t;
-
-extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
-
-extern void xfs_ioend_init(void);
-extern void xfs_ioend_wait(struct xfs_inode *);
-
-extern void xfs_count_page_state(struct page *, int *, int *);
-
-#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
deleted file mode 100644 (file)
index c57836d..0000000
+++ /dev/null
@@ -1,1876 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
-#include <linux/backing-dev.h>
-#include <linux/freezer.h>
-
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_trace.h"
-
-static kmem_zone_t *xfs_buf_zone;
-STATIC int xfsbufd(void *);
-STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-
-static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
-
-#ifdef XFS_BUF_LOCK_TRACKING
-# define XB_SET_OWNER(bp)      ((bp)->b_last_holder = current->pid)
-# define XB_CLEAR_OWNER(bp)    ((bp)->b_last_holder = -1)
-# define XB_GET_OWNER(bp)      ((bp)->b_last_holder)
-#else
-# define XB_SET_OWNER(bp)      do { } while (0)
-# define XB_CLEAR_OWNER(bp)    do { } while (0)
-# define XB_GET_OWNER(bp)      do { } while (0)
-#endif
-
-#define xb_to_gfp(flags) \
-       ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
-         ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-
-#define xb_to_km(flags) \
-        (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-
-#define xfs_buf_allocate(flags) \
-       kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
-#define xfs_buf_deallocate(bp) \
-       kmem_zone_free(xfs_buf_zone, (bp));
-
-static inline int
-xfs_buf_is_vmapped(
-       struct xfs_buf  *bp)
-{
-       /*
-        * Return true if the buffer is vmapped.
-        *
-        * The XBF_MAPPED flag is set if the buffer should be mapped, but the
-        * code is clever enough to know it doesn't have to map a single page,
-        * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
-        */
-       return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
-}
-
-static inline int
-xfs_buf_vmap_len(
-       struct xfs_buf  *bp)
-{
-       return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
-}
-
-/*
- * xfs_buf_lru_add - add a buffer to the LRU.
- *
- * The LRU takes a new reference to the buffer so that it will only be freed
- * once the shrinker takes the buffer off the LRU.
- */
-STATIC void
-xfs_buf_lru_add(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-
-       spin_lock(&btp->bt_lru_lock);
-       if (list_empty(&bp->b_lru)) {
-               atomic_inc(&bp->b_hold);
-               list_add_tail(&bp->b_lru, &btp->bt_lru);
-               btp->bt_lru_nr++;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * xfs_buf_lru_del - remove a buffer from the LRU
- *
- * The unlocked check is safe here because it only occurs when there are not
- * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
- * to optimise the shrinker removing the buffer from the LRU and calling
- * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
- * bt_lru_lock.
- */
-STATIC void
-xfs_buf_lru_del(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-
-       if (list_empty(&bp->b_lru))
-               return;
-
-       spin_lock(&btp->bt_lru_lock);
-       if (!list_empty(&bp->b_lru)) {
-               list_del_init(&bp->b_lru);
-               btp->bt_lru_nr--;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * When we mark a buffer stale, we remove the buffer from the LRU and clear the
- * b_lru_ref count so that the buffer is freed immediately when the buffer
- * reference count falls to zero. If the buffer is already on the LRU, we need
- * to remove the reference that LRU holds on the buffer.
- *
- * This prevents build-up of stale buffers on the LRU.
- */
-void
-xfs_buf_stale(
-       struct xfs_buf  *bp)
-{
-       bp->b_flags |= XBF_STALE;
-       atomic_set(&(bp)->b_lru_ref, 0);
-       if (!list_empty(&bp->b_lru)) {
-               struct xfs_buftarg *btp = bp->b_target;
-
-               spin_lock(&btp->bt_lru_lock);
-               if (!list_empty(&bp->b_lru)) {
-                       list_del_init(&bp->b_lru);
-                       btp->bt_lru_nr--;
-                       atomic_dec(&bp->b_hold);
-               }
-               spin_unlock(&btp->bt_lru_lock);
-       }
-       ASSERT(atomic_read(&bp->b_hold) >= 1);
-}
-
-STATIC void
-_xfs_buf_initialize(
-       xfs_buf_t               *bp,
-       xfs_buftarg_t           *target,
-       xfs_off_t               range_base,
-       size_t                  range_length,
-       xfs_buf_flags_t         flags)
-{
-       /*
-        * We don't want certain flags to appear in b_flags.
-        */
-       flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
-
-       memset(bp, 0, sizeof(xfs_buf_t));
-       atomic_set(&bp->b_hold, 1);
-       atomic_set(&bp->b_lru_ref, 1);
-       init_completion(&bp->b_iowait);
-       INIT_LIST_HEAD(&bp->b_lru);
-       INIT_LIST_HEAD(&bp->b_list);
-       RB_CLEAR_NODE(&bp->b_rbnode);
-       sema_init(&bp->b_sema, 0); /* held, no waiters */
-       XB_SET_OWNER(bp);
-       bp->b_target = target;
-       bp->b_file_offset = range_base;
-       /*
-        * Set buffer_length and count_desired to the same value initially.
-        * I/O routines should use count_desired, which will be the same in
-        * most cases but may be reset (e.g. XFS recovery).
-        */
-       bp->b_buffer_length = bp->b_count_desired = range_length;
-       bp->b_flags = flags;
-       bp->b_bn = XFS_BUF_DADDR_NULL;
-       atomic_set(&bp->b_pin_count, 0);
-       init_waitqueue_head(&bp->b_waiters);
-
-       XFS_STATS_INC(xb_create);
-
-       trace_xfs_buf_init(bp, _RET_IP_);
-}
-
-/*
- *     Allocate a page array capable of holding a specified number
- *     of pages, and point the page buf at it.
- */
-STATIC int
-_xfs_buf_get_pages(
-       xfs_buf_t               *bp,
-       int                     page_count,
-       xfs_buf_flags_t         flags)
-{
-       /* Make sure that we have a page list */
-       if (bp->b_pages == NULL) {
-               bp->b_offset = xfs_buf_poff(bp->b_file_offset);
-               bp->b_page_count = page_count;
-               if (page_count <= XB_PAGES) {
-                       bp->b_pages = bp->b_page_array;
-               } else {
-                       bp->b_pages = kmem_alloc(sizeof(struct page *) *
-                                       page_count, xb_to_km(flags));
-                       if (bp->b_pages == NULL)
-                               return -ENOMEM;
-               }
-               memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
-       }
-       return 0;
-}
-
-/*
- *     Frees b_pages if it was allocated.
- */
-STATIC void
-_xfs_buf_free_pages(
-       xfs_buf_t       *bp)
-{
-       if (bp->b_pages != bp->b_page_array) {
-               kmem_free(bp->b_pages);
-               bp->b_pages = NULL;
-       }
-}
-
-/*
- *     Releases the specified buffer.
- *
- *     The modification state of any associated pages is left unchanged.
- *     The buffer most not be on any hash - use xfs_buf_rele instead for
- *     hashed and refcounted buffers
- */
-void
-xfs_buf_free(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_free(bp, _RET_IP_);
-
-       ASSERT(list_empty(&bp->b_lru));
-
-       if (bp->b_flags & _XBF_PAGES) {
-               uint            i;
-
-               if (xfs_buf_is_vmapped(bp))
-                       vm_unmap_ram(bp->b_addr - bp->b_offset,
-                                       bp->b_page_count);
-
-               for (i = 0; i < bp->b_page_count; i++) {
-                       struct page     *page = bp->b_pages[i];
-
-                       __free_page(page);
-               }
-       } else if (bp->b_flags & _XBF_KMEM)
-               kmem_free(bp->b_addr);
-       _xfs_buf_free_pages(bp);
-       xfs_buf_deallocate(bp);
-}
-
-/*
- * Allocates all the pages for buffer in question and builds it's page list.
- */
-STATIC int
-xfs_buf_allocate_memory(
-       xfs_buf_t               *bp,
-       uint                    flags)
-{
-       size_t                  size = bp->b_count_desired;
-       size_t                  nbytes, offset;
-       gfp_t                   gfp_mask = xb_to_gfp(flags);
-       unsigned short          page_count, i;
-       xfs_off_t               end;
-       int                     error;
-
-       /*
-        * for buffers that are contained within a single page, just allocate
-        * the memory from the heap - there's no need for the complexity of
-        * page arrays to keep allocation down to order 0.
-        */
-       if (bp->b_buffer_length < PAGE_SIZE) {
-               bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
-               if (!bp->b_addr) {
-                       /* low memory - use alloc_page loop instead */
-                       goto use_alloc_page;
-               }
-
-               if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
-                                                               PAGE_MASK) !=
-                   ((unsigned long)bp->b_addr & PAGE_MASK)) {
-                       /* b_addr spans two pages - use alloc_page instead */
-                       kmem_free(bp->b_addr);
-                       bp->b_addr = NULL;
-                       goto use_alloc_page;
-               }
-               bp->b_offset = offset_in_page(bp->b_addr);
-               bp->b_pages = bp->b_page_array;
-               bp->b_pages[0] = virt_to_page(bp->b_addr);
-               bp->b_page_count = 1;
-               bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
-               return 0;
-       }
-
-use_alloc_page:
-       end = bp->b_file_offset + bp->b_buffer_length;
-       page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
-       error = _xfs_buf_get_pages(bp, page_count, flags);
-       if (unlikely(error))
-               return error;
-
-       offset = bp->b_offset;
-       bp->b_flags |= _XBF_PAGES;
-
-       for (i = 0; i < bp->b_page_count; i++) {
-               struct page     *page;
-               uint            retries = 0;
-retry:
-               page = alloc_page(gfp_mask);
-               if (unlikely(page == NULL)) {
-                       if (flags & XBF_READ_AHEAD) {
-                               bp->b_page_count = i;
-                               error = ENOMEM;
-                               goto out_free_pages;
-                       }
-
-                       /*
-                        * This could deadlock.
-                        *
-                        * But until all the XFS lowlevel code is revamped to
-                        * handle buffer allocation failures we can't do much.
-                        */
-                       if (!(++retries % 100))
-                               xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, gfp_mask);
-
-                       XFS_STATS_INC(xb_page_retries);
-                       congestion_wait(BLK_RW_ASYNC, HZ/50);
-                       goto retry;
-               }
-
-               XFS_STATS_INC(xb_page_found);
-
-               nbytes = min_t(size_t, size, PAGE_SIZE - offset);
-               size -= nbytes;
-               bp->b_pages[i] = page;
-               offset = 0;
-       }
-       return 0;
-
-out_free_pages:
-       for (i = 0; i < bp->b_page_count; i++)
-               __free_page(bp->b_pages[i]);
-       return error;
-}
-
-/*
- *     Map buffer into kernel address-space if necessary.
- */
-STATIC int
-_xfs_buf_map_pages(
-       xfs_buf_t               *bp,
-       uint                    flags)
-{
-       ASSERT(bp->b_flags & _XBF_PAGES);
-       if (bp->b_page_count == 1) {
-               /* A single page buffer is always mappable */
-               bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
-               bp->b_flags |= XBF_MAPPED;
-       } else if (flags & XBF_MAPPED) {
-               int retried = 0;
-
-               do {
-                       bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-                                               -1, PAGE_KERNEL);
-                       if (bp->b_addr)
-                               break;
-                       vm_unmap_aliases();
-               } while (retried++ <= 1);
-
-               if (!bp->b_addr)
-                       return -ENOMEM;
-               bp->b_addr += bp->b_offset;
-               bp->b_flags |= XBF_MAPPED;
-       }
-
-       return 0;
-}
-
-/*
- *     Finding and Reading Buffers
- */
-
-/*
- *     Look up, and creates if absent, a lockable buffer for
- *     a given range of an inode.  The buffer is returned
- *     locked.  If other overlapping buffers exist, they are
- *     released before the new buffer is created and locked,
- *     which may imply that this call will block until those buffers
- *     are unlocked.  No I/O is implied by this call.
- */
-xfs_buf_t *
-_xfs_buf_find(
-       xfs_buftarg_t           *btp,   /* block device target          */
-       xfs_off_t               ioff,   /* starting offset of range     */
-       size_t                  isize,  /* length of range              */
-       xfs_buf_flags_t         flags,
-       xfs_buf_t               *new_bp)
-{
-       xfs_off_t               range_base;
-       size_t                  range_length;
-       struct xfs_perag        *pag;
-       struct rb_node          **rbp;
-       struct rb_node          *parent;
-       xfs_buf_t               *bp;
-
-       range_base = (ioff << BBSHIFT);
-       range_length = (isize << BBSHIFT);
-
-       /* Check for IOs smaller than the sector size / not sector aligned */
-       ASSERT(!(range_length < (1 << btp->bt_sshift)));
-       ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
-
-       /* get tree root */
-       pag = xfs_perag_get(btp->bt_mount,
-                               xfs_daddr_to_agno(btp->bt_mount, ioff));
-
-       /* walk tree */
-       spin_lock(&pag->pag_buf_lock);
-       rbp = &pag->pag_buf_tree.rb_node;
-       parent = NULL;
-       bp = NULL;
-       while (*rbp) {
-               parent = *rbp;
-               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
-
-               if (range_base < bp->b_file_offset)
-                       rbp = &(*rbp)->rb_left;
-               else if (range_base > bp->b_file_offset)
-                       rbp = &(*rbp)->rb_right;
-               else {
-                       /*
-                        * found a block offset match. If the range doesn't
-                        * match, the only way this is allowed is if the buffer
-                        * in the cache is stale and the transaction that made
-                        * it stale has not yet committed. i.e. we are
-                        * reallocating a busy extent. Skip this buffer and
-                        * continue searching to the right for an exact match.
-                        */
-                       if (bp->b_buffer_length != range_length) {
-                               ASSERT(bp->b_flags & XBF_STALE);
-                               rbp = &(*rbp)->rb_right;
-                               continue;
-                       }
-                       atomic_inc(&bp->b_hold);
-                       goto found;
-               }
-       }
-
-       /* No match found */
-       if (new_bp) {
-               _xfs_buf_initialize(new_bp, btp, range_base,
-                               range_length, flags);
-               rb_link_node(&new_bp->b_rbnode, parent, rbp);
-               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
-               /* the buffer keeps the perag reference until it is freed */
-               new_bp->b_pag = pag;
-               spin_unlock(&pag->pag_buf_lock);
-       } else {
-               XFS_STATS_INC(xb_miss_locked);
-               spin_unlock(&pag->pag_buf_lock);
-               xfs_perag_put(pag);
-       }
-       return new_bp;
-
-found:
-       spin_unlock(&pag->pag_buf_lock);
-       xfs_perag_put(pag);
-
-       if (!xfs_buf_trylock(bp)) {
-               if (flags & XBF_TRYLOCK) {
-                       xfs_buf_rele(bp);
-                       XFS_STATS_INC(xb_busy_locked);
-                       return NULL;
-               }
-               xfs_buf_lock(bp);
-               XFS_STATS_INC(xb_get_locked_waited);
-       }
-
-       /*
-        * if the buffer is stale, clear all the external state associated with
-        * it. We need to keep flags such as how we allocated the buffer memory
-        * intact here.
-        */
-       if (bp->b_flags & XBF_STALE) {
-               ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
-               bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
-       }
-
-       trace_xfs_buf_find(bp, flags, _RET_IP_);
-       XFS_STATS_INC(xb_get_locked);
-       return bp;
-}
-
-/*
- *     Assembles a buffer covering the specified range.
- *     Storage in memory for all portions of the buffer will be allocated,
- *     although backing storage may not be.
- */
-xfs_buf_t *
-xfs_buf_get(
-       xfs_buftarg_t           *target,/* target for buffer            */
-       xfs_off_t               ioff,   /* starting offset of range     */
-       size_t                  isize,  /* length of range              */
-       xfs_buf_flags_t         flags)
-{
-       xfs_buf_t               *bp, *new_bp;
-       int                     error = 0;
-
-       new_bp = xfs_buf_allocate(flags);
-       if (unlikely(!new_bp))
-               return NULL;
-
-       bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
-       if (bp == new_bp) {
-               error = xfs_buf_allocate_memory(bp, flags);
-               if (error)
-                       goto no_buffer;
-       } else {
-               xfs_buf_deallocate(new_bp);
-               if (unlikely(bp == NULL))
-                       return NULL;
-       }
-
-       if (!(bp->b_flags & XBF_MAPPED)) {
-               error = _xfs_buf_map_pages(bp, flags);
-               if (unlikely(error)) {
-                       xfs_warn(target->bt_mount,
-                               "%s: failed to map pages\n", __func__);
-                       goto no_buffer;
-               }
-       }
-
-       XFS_STATS_INC(xb_get);
-
-       /*
-        * Always fill in the block number now, the mapped cases can do
-        * their own overlay of this later.
-        */
-       bp->b_bn = ioff;
-       bp->b_count_desired = bp->b_buffer_length;
-
-       trace_xfs_buf_get(bp, flags, _RET_IP_);
-       return bp;
-
- no_buffer:
-       if (flags & (XBF_LOCK | XBF_TRYLOCK))
-               xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-       return NULL;
-}
-
-STATIC int
-_xfs_buf_read(
-       xfs_buf_t               *bp,
-       xfs_buf_flags_t         flags)
-{
-       int                     status;
-
-       ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
-       ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
-
-       bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
-       bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
-
-       status = xfs_buf_iorequest(bp);
-       if (status || bp->b_error || (flags & XBF_ASYNC))
-               return status;
-       return xfs_buf_iowait(bp);
-}
-
-xfs_buf_t *
-xfs_buf_read(
-       xfs_buftarg_t           *target,
-       xfs_off_t               ioff,
-       size_t                  isize,
-       xfs_buf_flags_t         flags)
-{
-       xfs_buf_t               *bp;
-
-       flags |= XBF_READ;
-
-       bp = xfs_buf_get(target, ioff, isize, flags);
-       if (bp) {
-               trace_xfs_buf_read(bp, flags, _RET_IP_);
-
-               if (!XFS_BUF_ISDONE(bp)) {
-                       XFS_STATS_INC(xb_get_read);
-                       _xfs_buf_read(bp, flags);
-               } else if (flags & XBF_ASYNC) {
-                       /*
-                        * Read ahead call which is already satisfied,
-                        * drop the buffer
-                        */
-                       goto no_buffer;
-               } else {
-                       /* We do not want read in the flags */
-                       bp->b_flags &= ~XBF_READ;
-               }
-       }
-
-       return bp;
-
- no_buffer:
-       if (flags & (XBF_LOCK | XBF_TRYLOCK))
-               xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-       return NULL;
-}
-
-/*
- *     If we are not low on memory then do the readahead in a deadlock
- *     safe manner.
- */
-void
-xfs_buf_readahead(
-       xfs_buftarg_t           *target,
-       xfs_off_t               ioff,
-       size_t                  isize)
-{
-       if (bdi_read_congested(target->bt_bdi))
-               return;
-
-       xfs_buf_read(target, ioff, isize,
-                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
-}
-
-/*
- * Read an uncached buffer from disk. Allocates and returns a locked
- * buffer containing the disk contents or nothing.
- */
-struct xfs_buf *
-xfs_buf_read_uncached(
-       struct xfs_mount        *mp,
-       struct xfs_buftarg      *target,
-       xfs_daddr_t             daddr,
-       size_t                  length,
-       int                     flags)
-{
-       xfs_buf_t               *bp;
-       int                     error;
-
-       bp = xfs_buf_get_uncached(target, length, flags);
-       if (!bp)
-               return NULL;
-
-       /* set up the buffer for a read IO */
-       XFS_BUF_SET_ADDR(bp, daddr);
-       XFS_BUF_READ(bp);
-
-       xfsbdstrat(mp, bp);
-       error = xfs_buf_iowait(bp);
-       if (error || bp->b_error) {
-               xfs_buf_relse(bp);
-               return NULL;
-       }
-       return bp;
-}
-
-xfs_buf_t *
-xfs_buf_get_empty(
-       size_t                  len,
-       xfs_buftarg_t           *target)
-{
-       xfs_buf_t               *bp;
-
-       bp = xfs_buf_allocate(0);
-       if (bp)
-               _xfs_buf_initialize(bp, target, 0, len, 0);
-       return bp;
-}
-
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
-       struct xfs_buf          *bp,
-       size_t                  len)
-{
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_page_count = 0;
-       bp->b_addr = NULL;
-       bp->b_file_offset = 0;
-       bp->b_buffer_length = bp->b_count_desired = len;
-       bp->b_bn = XFS_BUF_DADDR_NULL;
-       bp->b_flags &= ~XBF_MAPPED;
-}
-
-static inline struct page *
-mem_to_page(
-       void                    *addr)
-{
-       if ((!is_vmalloc_addr(addr))) {
-               return virt_to_page(addr);
-       } else {
-               return vmalloc_to_page(addr);
-       }
-}
-
-int
-xfs_buf_associate_memory(
-       xfs_buf_t               *bp,
-       void                    *mem,
-       size_t                  len)
-{
-       int                     rval;
-       int                     i = 0;
-       unsigned long           pageaddr;
-       unsigned long           offset;
-       size_t                  buflen;
-       int                     page_count;
-
-       pageaddr = (unsigned long)mem & PAGE_MASK;
-       offset = (unsigned long)mem - pageaddr;
-       buflen = PAGE_ALIGN(len + offset);
-       page_count = buflen >> PAGE_SHIFT;
-
-       /* Free any previous set of page pointers */
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_addr = mem;
-
-       rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
-       if (rval)
-               return rval;
-
-       bp->b_offset = offset;
-
-       for (i = 0; i < bp->b_page_count; i++) {
-               bp->b_pages[i] = mem_to_page((void *)pageaddr);
-               pageaddr += PAGE_SIZE;
-       }
-
-       bp->b_count_desired = len;
-       bp->b_buffer_length = buflen;
-       bp->b_flags |= XBF_MAPPED;
-
-       return 0;
-}
-
-xfs_buf_t *
-xfs_buf_get_uncached(
-       struct xfs_buftarg      *target,
-       size_t                  len,
-       int                     flags)
-{
-       unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
-       int                     error, i;
-       xfs_buf_t               *bp;
-
-       bp = xfs_buf_allocate(0);
-       if (unlikely(bp == NULL))
-               goto fail;
-       _xfs_buf_initialize(bp, target, 0, len, 0);
-
-       error = _xfs_buf_get_pages(bp, page_count, 0);
-       if (error)
-               goto fail_free_buf;
-
-       for (i = 0; i < page_count; i++) {
-               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
-               if (!bp->b_pages[i])
-                       goto fail_free_mem;
-       }
-       bp->b_flags |= _XBF_PAGES;
-
-       error = _xfs_buf_map_pages(bp, XBF_MAPPED);
-       if (unlikely(error)) {
-               xfs_warn(target->bt_mount,
-                       "%s: failed to map pages\n", __func__);
-               goto fail_free_mem;
-       }
-
-       trace_xfs_buf_get_uncached(bp, _RET_IP_);
-       return bp;
-
- fail_free_mem:
-       while (--i >= 0)
-               __free_page(bp->b_pages[i]);
-       _xfs_buf_free_pages(bp);
- fail_free_buf:
-       xfs_buf_deallocate(bp);
- fail:
-       return NULL;
-}
-
-/*
- *     Increment reference count on buffer, to hold the buffer concurrently
- *     with another thread which may release (free) the buffer asynchronously.
- *     Must hold the buffer already to call this function.
- */
-void
-xfs_buf_hold(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_hold(bp, _RET_IP_);
-       atomic_inc(&bp->b_hold);
-}
-
-/*
- *     Releases a hold on the specified buffer.  If the
- *     the hold count is 1, calls xfs_buf_free.
- */
-void
-xfs_buf_rele(
-       xfs_buf_t               *bp)
-{
-       struct xfs_perag        *pag = bp->b_pag;
-
-       trace_xfs_buf_rele(bp, _RET_IP_);
-
-       if (!pag) {
-               ASSERT(list_empty(&bp->b_lru));
-               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
-               if (atomic_dec_and_test(&bp->b_hold))
-                       xfs_buf_free(bp);
-               return;
-       }
-
-       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
-
-       ASSERT(atomic_read(&bp->b_hold) > 0);
-       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
-               if (!(bp->b_flags & XBF_STALE) &&
-                          atomic_read(&bp->b_lru_ref)) {
-                       xfs_buf_lru_add(bp);
-                       spin_unlock(&pag->pag_buf_lock);
-               } else {
-                       xfs_buf_lru_del(bp);
-                       ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
-                       spin_unlock(&pag->pag_buf_lock);
-                       xfs_perag_put(pag);
-                       xfs_buf_free(bp);
-               }
-       }
-}
-
-
-/*
- *     Lock a buffer object, if it is not already locked.
- *
- *     If we come across a stale, pinned, locked buffer, we know that we are
- *     being asked to lock a buffer that has been reallocated. Because it is
- *     pinned, we know that the log has not been pushed to disk and hence it
- *     will still be locked.  Rather than continuing to have trylock attempts
- *     fail until someone else pushes the log, push it ourselves before
- *     returning.  This means that the xfsaild will not get stuck trying
- *     to push on stale inode buffers.
- */
-int
-xfs_buf_trylock(
-       struct xfs_buf          *bp)
-{
-       int                     locked;
-
-       locked = down_trylock(&bp->b_sema) == 0;
-       if (locked)
-               XB_SET_OWNER(bp);
-       else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_target->bt_mount, 0);
-
-       trace_xfs_buf_trylock(bp, _RET_IP_);
-       return locked;
-}
-
-/*
- *     Lock a buffer object.
- *
- *     If we come across a stale, pinned, locked buffer, we know that we
- *     are being asked to lock a buffer that has been reallocated. Because
- *     it is pinned, we know that the log has not been pushed to disk and
- *     hence it will still be locked. Rather than sleeping until someone
- *     else pushes the log, push it ourselves before trying to get the lock.
- */
-void
-xfs_buf_lock(
-       struct xfs_buf          *bp)
-{
-       trace_xfs_buf_lock(bp, _RET_IP_);
-
-       if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_target->bt_mount, 0);
-       down(&bp->b_sema);
-       XB_SET_OWNER(bp);
-
-       trace_xfs_buf_lock_done(bp, _RET_IP_);
-}
-
-/*
- *     Releases the lock on the buffer object.
- *     If the buffer is marked delwri but is not queued, do so before we
- *     unlock the buffer as we need to set flags correctly.  We also need to
- *     take a reference for the delwri queue because the unlocker is going to
- *     drop their's and they don't know we just queued it.
- */
-void
-xfs_buf_unlock(
-       struct xfs_buf          *bp)
-{
-       if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
-               atomic_inc(&bp->b_hold);
-               bp->b_flags |= XBF_ASYNC;
-               xfs_buf_delwri_queue(bp, 0);
-       }
-
-       XB_CLEAR_OWNER(bp);
-       up(&bp->b_sema);
-
-       trace_xfs_buf_unlock(bp, _RET_IP_);
-}
-
-STATIC void
-xfs_buf_wait_unpin(
-       xfs_buf_t               *bp)
-{
-       DECLARE_WAITQUEUE       (wait, current);
-
-       if (atomic_read(&bp->b_pin_count) == 0)
-               return;
-
-       add_wait_queue(&bp->b_waiters, &wait);
-       for (;;) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               if (atomic_read(&bp->b_pin_count) == 0)
-                       break;
-               io_schedule();
-       }
-       remove_wait_queue(&bp->b_waiters, &wait);
-       set_current_state(TASK_RUNNING);
-}
-
-/*
- *     Buffer Utility Routines
- */
-
-STATIC void
-xfs_buf_iodone_work(
-       struct work_struct      *work)
-{
-       xfs_buf_t               *bp =
-               container_of(work, xfs_buf_t, b_iodone_work);
-
-       if (bp->b_iodone)
-               (*(bp->b_iodone))(bp);
-       else if (bp->b_flags & XBF_ASYNC)
-               xfs_buf_relse(bp);
-}
-
-void
-xfs_buf_ioend(
-       xfs_buf_t               *bp,
-       int                     schedule)
-{
-       trace_xfs_buf_iodone(bp, _RET_IP_);
-
-       bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
-       if (bp->b_error == 0)
-               bp->b_flags |= XBF_DONE;
-
-       if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
-               if (schedule) {
-                       INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
-                       queue_work(xfslogd_workqueue, &bp->b_iodone_work);
-               } else {
-                       xfs_buf_iodone_work(&bp->b_iodone_work);
-               }
-       } else {
-               complete(&bp->b_iowait);
-       }
-}
-
-void
-xfs_buf_ioerror(
-       xfs_buf_t               *bp,
-       int                     error)
-{
-       ASSERT(error >= 0 && error <= 0xffff);
-       bp->b_error = (unsigned short)error;
-       trace_xfs_buf_ioerror(bp, error, _RET_IP_);
-}
-
-int
-xfs_bwrite(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
-{
-       int                     error;
-
-       bp->b_flags |= XBF_WRITE;
-       bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
-
-       xfs_buf_delwri_dequeue(bp);
-       xfs_bdstrat_cb(bp);
-
-       error = xfs_buf_iowait(bp);
-       if (error)
-               xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
-       xfs_buf_relse(bp);
-       return error;
-}
-
-void
-xfs_bdwrite(
-       void                    *mp,
-       struct xfs_buf          *bp)
-{
-       trace_xfs_buf_bdwrite(bp, _RET_IP_);
-
-       bp->b_flags &= ~XBF_READ;
-       bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
-
-       xfs_buf_delwri_queue(bp, 1);
-}
-
-/*
- * Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
- * so that the proper iodone callbacks get called.
- */
-STATIC int
-xfs_bioerror(
-       xfs_buf_t *bp)
-{
-#ifdef XFSERRORDEBUG
-       ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
-#endif
-
-       /*
-        * No need to wait until the buffer is unpinned, we aren't flushing it.
-        */
-       xfs_buf_ioerror(bp, EIO);
-
-       /*
-        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
-        */
-       XFS_BUF_UNREAD(bp);
-       XFS_BUF_UNDELAYWRITE(bp);
-       XFS_BUF_UNDONE(bp);
-       XFS_BUF_STALE(bp);
-
-       xfs_buf_ioend(bp, 0);
-
-       return EIO;
-}
-
-/*
- * Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the xfs_buf_ioend call.
- * This is meant for userdata errors; metadata bufs come with
- * iodone functions attached, so that we can track down errors.
- */
-STATIC int
-xfs_bioerror_relse(
-       struct xfs_buf  *bp)
-{
-       int64_t         fl = bp->b_flags;
-       /*
-        * No need to wait until the buffer is unpinned.
-        * We aren't flushing it.
-        *
-        * chunkhold expects B_DONE to be set, whether
-        * we actually finish the I/O or not. We don't want to
-        * change that interface.
-        */
-       XFS_BUF_UNREAD(bp);
-       XFS_BUF_UNDELAYWRITE(bp);
-       XFS_BUF_DONE(bp);
-       XFS_BUF_STALE(bp);
-       bp->b_iodone = NULL;
-       if (!(fl & XBF_ASYNC)) {
-               /*
-                * Mark b_error and B_ERROR _both_.
-                * Lot's of chunkcache code assumes that.
-                * There's no reason to mark error for
-                * ASYNC buffers.
-                */
-               xfs_buf_ioerror(bp, EIO);
-               XFS_BUF_FINISH_IOWAIT(bp);
-       } else {
-               xfs_buf_relse(bp);
-       }
-
-       return EIO;
-}
-
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
-xfs_bdstrat_cb(
-       struct xfs_buf  *bp)
-{
-       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
-               trace_xfs_bdstrat_shut(bp, _RET_IP_);
-               /*
-                * Metadata write that didn't get logged but
-                * written delayed anyway. These aren't associated
-                * with a transaction, and can be ignored.
-                */
-               if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
-                       return xfs_bioerror_relse(bp);
-               else
-                       return xfs_bioerror(bp);
-       }
-
-       xfs_buf_iorequest(bp);
-       return 0;
-}
-
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem.  Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
-{
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               trace_xfs_bdstrat_shut(bp, _RET_IP_);
-               xfs_bioerror_relse(bp);
-               return;
-       }
-
-       xfs_buf_iorequest(bp);
-}
-
-STATIC void
-_xfs_buf_ioend(
-       xfs_buf_t               *bp,
-       int                     schedule)
-{
-       if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
-               xfs_buf_ioend(bp, schedule);
-}
-
-STATIC void
-xfs_buf_bio_end_io(
-       struct bio              *bio,
-       int                     error)
-{
-       xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
-
-       xfs_buf_ioerror(bp, -error);
-
-       if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
-               invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
-
-       _xfs_buf_ioend(bp, 1);
-       bio_put(bio);
-}
-
-STATIC void
-_xfs_buf_ioapply(
-       xfs_buf_t               *bp)
-{
-       int                     rw, map_i, total_nr_pages, nr_pages;
-       struct bio              *bio;
-       int                     offset = bp->b_offset;
-       int                     size = bp->b_count_desired;
-       sector_t                sector = bp->b_bn;
-
-       total_nr_pages = bp->b_page_count;
-       map_i = 0;
-
-       if (bp->b_flags & XBF_WRITE) {
-               if (bp->b_flags & XBF_SYNCIO)
-                       rw = WRITE_SYNC;
-               else
-                       rw = WRITE;
-               if (bp->b_flags & XBF_FUA)
-                       rw |= REQ_FUA;
-               if (bp->b_flags & XBF_FLUSH)
-                       rw |= REQ_FLUSH;
-       } else if (bp->b_flags & XBF_READ_AHEAD) {
-               rw = READA;
-       } else {
-               rw = READ;
-       }
-
-       /* we only use the buffer cache for meta-data */
-       rw |= REQ_META;
-
-next_chunk:
-       atomic_inc(&bp->b_io_remaining);
-       nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
-       if (nr_pages > total_nr_pages)
-               nr_pages = total_nr_pages;
-
-       bio = bio_alloc(GFP_NOIO, nr_pages);
-       bio->bi_bdev = bp->b_target->bt_bdev;
-       bio->bi_sector = sector;
-       bio->bi_end_io = xfs_buf_bio_end_io;
-       bio->bi_private = bp;
-
-
-       for (; size && nr_pages; nr_pages--, map_i++) {
-               int     rbytes, nbytes = PAGE_SIZE - offset;
-
-               if (nbytes > size)
-                       nbytes = size;
-
-               rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
-               if (rbytes < nbytes)
-                       break;
-
-               offset = 0;
-               sector += nbytes >> BBSHIFT;
-               size -= nbytes;
-               total_nr_pages--;
-       }
-
-       if (likely(bio->bi_size)) {
-               if (xfs_buf_is_vmapped(bp)) {
-                       flush_kernel_vmap_range(bp->b_addr,
-                                               xfs_buf_vmap_len(bp));
-               }
-               submit_bio(rw, bio);
-               if (size)
-                       goto next_chunk;
-       } else {
-               xfs_buf_ioerror(bp, EIO);
-               bio_put(bio);
-       }
-}
-
-int
-xfs_buf_iorequest(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_iorequest(bp, _RET_IP_);
-
-       if (bp->b_flags & XBF_DELWRI) {
-               xfs_buf_delwri_queue(bp, 1);
-               return 0;
-       }
-
-       if (bp->b_flags & XBF_WRITE) {
-               xfs_buf_wait_unpin(bp);
-       }
-
-       xfs_buf_hold(bp);
-
-       /* Set the count to 1 initially, this will stop an I/O
-        * completion callout which happens before we have started
-        * all the I/O from calling xfs_buf_ioend too early.
-        */
-       atomic_set(&bp->b_io_remaining, 1);
-       _xfs_buf_ioapply(bp);
-       _xfs_buf_ioend(bp, 0);
-
-       xfs_buf_rele(bp);
-       return 0;
-}
-
-/*
- *     Waits for I/O to complete on the buffer supplied.
- *     It returns immediately if no I/O is pending.
- *     It returns the I/O error code, if any, or 0 if there was no error.
- */
-int
-xfs_buf_iowait(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_iowait(bp, _RET_IP_);
-
-       wait_for_completion(&bp->b_iowait);
-
-       trace_xfs_buf_iowait_done(bp, _RET_IP_);
-       return bp->b_error;
-}
-
-xfs_caddr_t
-xfs_buf_offset(
-       xfs_buf_t               *bp,
-       size_t                  offset)
-{
-       struct page             *page;
-
-       if (bp->b_flags & XBF_MAPPED)
-               return bp->b_addr + offset;
-
-       offset += bp->b_offset;
-       page = bp->b_pages[offset >> PAGE_SHIFT];
-       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
-}
-
-/*
- *     Move data into or out of a buffer.
- */
-void
-xfs_buf_iomove(
-       xfs_buf_t               *bp,    /* buffer to process            */
-       size_t                  boff,   /* starting buffer offset       */
-       size_t                  bsize,  /* length to copy               */
-       void                    *data,  /* data address                 */
-       xfs_buf_rw_t            mode)   /* read/write/zero flag         */
-{
-       size_t                  bend, cpoff, csize;
-       struct page             *page;
-
-       bend = boff + bsize;
-       while (boff < bend) {
-               page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
-               cpoff = xfs_buf_poff(boff + bp->b_offset);
-               csize = min_t(size_t,
-                             PAGE_SIZE-cpoff, bp->b_count_desired-boff);
-
-               ASSERT(((csize + cpoff) <= PAGE_SIZE));
-
-               switch (mode) {
-               case XBRW_ZERO:
-                       memset(page_address(page) + cpoff, 0, csize);
-                       break;
-               case XBRW_READ:
-                       memcpy(data, page_address(page) + cpoff, csize);
-                       break;
-               case XBRW_WRITE:
-                       memcpy(page_address(page) + cpoff, data, csize);
-               }
-
-               boff += csize;
-               data += csize;
-       }
-}
-
-/*
- *     Handling of buffer targets (buftargs).
- */
-
-/*
- * Wait for any bufs with callbacks that have been submitted but have not yet
- * returned. These buffers will have an elevated hold count, so wait on those
- * while freeing all the buffers only held by the LRU.
- */
-void
-xfs_wait_buftarg(
-       struct xfs_buftarg      *btp)
-{
-       struct xfs_buf          *bp;
-
-restart:
-       spin_lock(&btp->bt_lru_lock);
-       while (!list_empty(&btp->bt_lru)) {
-               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-               if (atomic_read(&bp->b_hold) > 1) {
-                       spin_unlock(&btp->bt_lru_lock);
-                       delay(100);
-                       goto restart;
-               }
-               /*
-                * clear the LRU reference count so the bufer doesn't get
-                * ignored in xfs_buf_rele().
-                */
-               atomic_set(&bp->b_lru_ref, 0);
-               spin_unlock(&btp->bt_lru_lock);
-               xfs_buf_rele(bp);
-               spin_lock(&btp->bt_lru_lock);
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-int
-xfs_buftarg_shrink(
-       struct shrinker         *shrink,
-       struct shrink_control   *sc)
-{
-       struct xfs_buftarg      *btp = container_of(shrink,
-                                       struct xfs_buftarg, bt_shrinker);
-       struct xfs_buf          *bp;
-       int nr_to_scan = sc->nr_to_scan;
-       LIST_HEAD(dispose);
-
-       if (!nr_to_scan)
-               return btp->bt_lru_nr;
-
-       spin_lock(&btp->bt_lru_lock);
-       while (!list_empty(&btp->bt_lru)) {
-               if (nr_to_scan-- <= 0)
-                       break;
-
-               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-
-               /*
-                * Decrement the b_lru_ref count unless the value is already
-                * zero. If the value is already zero, we need to reclaim the
-                * buffer, otherwise it gets another trip through the LRU.
-                */
-               if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
-                       list_move_tail(&bp->b_lru, &btp->bt_lru);
-                       continue;
-               }
-
-               /*
-                * remove the buffer from the LRU now to avoid needing another
-                * lock round trip inside xfs_buf_rele().
-                */
-               list_move(&bp->b_lru, &dispose);
-               btp->bt_lru_nr--;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-
-       while (!list_empty(&dispose)) {
-               bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
-               list_del_init(&bp->b_lru);
-               xfs_buf_rele(bp);
-       }
-
-       return btp->bt_lru_nr;
-}
-
-void
-xfs_free_buftarg(
-       struct xfs_mount        *mp,
-       struct xfs_buftarg      *btp)
-{
-       unregister_shrinker(&btp->bt_shrinker);
-
-       xfs_flush_buftarg(btp, 1);
-       if (mp->m_flags & XFS_MOUNT_BARRIER)
-               xfs_blkdev_issue_flush(btp);
-
-       kthread_stop(btp->bt_task);
-       kmem_free(btp);
-}
-
-STATIC int
-xfs_setsize_buftarg_flags(
-       xfs_buftarg_t           *btp,
-       unsigned int            blocksize,
-       unsigned int            sectorsize,
-       int                     verbose)
-{
-       btp->bt_bsize = blocksize;
-       btp->bt_sshift = ffs(sectorsize) - 1;
-       btp->bt_smask = sectorsize - 1;
-
-       if (set_blocksize(btp->bt_bdev, sectorsize)) {
-               xfs_warn(btp->bt_mount,
-                       "Cannot set_blocksize to %u on device %s\n",
-                       sectorsize, xfs_buf_target_name(btp));
-               return EINVAL;
-       }
-
-       return 0;
-}
-
-/*
- *     When allocating the initial buffer target we have not yet
- *     read in the superblock, so don't know what sized sectors
- *     are being used is at this early stage.  Play safe.
- */
-STATIC int
-xfs_setsize_buftarg_early(
-       xfs_buftarg_t           *btp,
-       struct block_device     *bdev)
-{
-       return xfs_setsize_buftarg_flags(btp,
-                       PAGE_SIZE, bdev_logical_block_size(bdev), 0);
-}
-
-int
-xfs_setsize_buftarg(
-       xfs_buftarg_t           *btp,
-       unsigned int            blocksize,
-       unsigned int            sectorsize)
-{
-       return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
-}
-
-STATIC int
-xfs_alloc_delwrite_queue(
-       xfs_buftarg_t           *btp,
-       const char              *fsname)
-{
-       INIT_LIST_HEAD(&btp->bt_delwrite_queue);
-       spin_lock_init(&btp->bt_delwrite_lock);
-       btp->bt_flags = 0;
-       btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
-       if (IS_ERR(btp->bt_task))
-               return PTR_ERR(btp->bt_task);
-       return 0;
-}
-
-xfs_buftarg_t *
-xfs_alloc_buftarg(
-       struct xfs_mount        *mp,
-       struct block_device     *bdev,
-       int                     external,
-       const char              *fsname)
-{
-       xfs_buftarg_t           *btp;
-
-       btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
-
-       btp->bt_mount = mp;
-       btp->bt_dev =  bdev->bd_dev;
-       btp->bt_bdev = bdev;
-       btp->bt_bdi = blk_get_backing_dev_info(bdev);
-       if (!btp->bt_bdi)
-               goto error;
-
-       INIT_LIST_HEAD(&btp->bt_lru);
-       spin_lock_init(&btp->bt_lru_lock);
-       if (xfs_setsize_buftarg_early(btp, bdev))
-               goto error;
-       if (xfs_alloc_delwrite_queue(btp, fsname))
-               goto error;
-       btp->bt_shrinker.shrink = xfs_buftarg_shrink;
-       btp->bt_shrinker.seeks = DEFAULT_SEEKS;
-       register_shrinker(&btp->bt_shrinker);
-       return btp;
-
-error:
-       kmem_free(btp);
-       return NULL;
-}
-
-
-/*
- *     Delayed write buffer handling
- */
-STATIC void
-xfs_buf_delwri_queue(
-       xfs_buf_t               *bp,
-       int                     unlock)
-{
-       struct list_head        *dwq = &bp->b_target->bt_delwrite_queue;
-       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
-
-       trace_xfs_buf_delwri_queue(bp, _RET_IP_);
-
-       ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
-
-       spin_lock(dwlk);
-       /* If already in the queue, dequeue and place at tail */
-       if (!list_empty(&bp->b_list)) {
-               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-               if (unlock)
-                       atomic_dec(&bp->b_hold);
-               list_del(&bp->b_list);
-       }
-
-       if (list_empty(dwq)) {
-               /* start xfsbufd as it is about to have something to do */
-               wake_up_process(bp->b_target->bt_task);
-       }
-
-       bp->b_flags |= _XBF_DELWRI_Q;
-       list_add_tail(&bp->b_list, dwq);
-       bp->b_queuetime = jiffies;
-       spin_unlock(dwlk);
-
-       if (unlock)
-               xfs_buf_unlock(bp);
-}
-
-void
-xfs_buf_delwri_dequeue(
-       xfs_buf_t               *bp)
-{
-       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
-       int                     dequeued = 0;
-
-       spin_lock(dwlk);
-       if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
-               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-               list_del_init(&bp->b_list);
-               dequeued = 1;
-       }
-       bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-       spin_unlock(dwlk);
-
-       if (dequeued)
-               xfs_buf_rele(bp);
-
-       trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
-}
-
-/*
- * If a delwri buffer needs to be pushed before it has aged out, then promote
- * it to the head of the delwri queue so that it will be flushed on the next
- * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
- * than the age currently needed to flush the buffer. Hence the next time the
- * xfsbufd sees it is guaranteed to be considered old enough to flush.
- */
-void
-xfs_buf_delwri_promote(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-       long            age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
-
-       ASSERT(bp->b_flags & XBF_DELWRI);
-       ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
-       /*
-        * Check the buffer age before locking the delayed write queue as we
-        * don't need to promote buffers that are already past the flush age.
-        */
-       if (bp->b_queuetime < jiffies - age)
-               return;
-       bp->b_queuetime = jiffies - age;
-       spin_lock(&btp->bt_delwrite_lock);
-       list_move(&bp->b_list, &btp->bt_delwrite_queue);
-       spin_unlock(&btp->bt_delwrite_lock);
-}
-
-STATIC void
-xfs_buf_runall_queues(
-       struct workqueue_struct *queue)
-{
-       flush_workqueue(queue);
-}
-
-/*
- * Move as many buffers as specified to the supplied list
- * idicating if we skipped any buffers to prevent deadlocks.
- */
-STATIC int
-xfs_buf_delwri_split(
-       xfs_buftarg_t   *target,
-       struct list_head *list,
-       unsigned long   age)
-{
-       xfs_buf_t       *bp, *n;
-       struct list_head *dwq = &target->bt_delwrite_queue;
-       spinlock_t      *dwlk = &target->bt_delwrite_lock;
-       int             skipped = 0;
-       int             force;
-
-       force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-       INIT_LIST_HEAD(list);
-       spin_lock(dwlk);
-       list_for_each_entry_safe(bp, n, dwq, b_list) {
-               ASSERT(bp->b_flags & XBF_DELWRI);
-
-               if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
-                       if (!force &&
-                           time_before(jiffies, bp->b_queuetime + age)) {
-                               xfs_buf_unlock(bp);
-                               break;
-                       }
-
-                       bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
-                       bp->b_flags |= XBF_WRITE;
-                       list_move_tail(&bp->b_list, list);
-                       trace_xfs_buf_delwri_split(bp, _RET_IP_);
-               } else
-                       skipped++;
-       }
-       spin_unlock(dwlk);
-
-       return skipped;
-
-}
-
-/*
- * Compare function is more complex than it needs to be because
- * the return value is only 32 bits and we are doing comparisons
- * on 64 bit values
- */
-static int
-xfs_buf_cmp(
-       void            *priv,
-       struct list_head *a,
-       struct list_head *b)
-{
-       struct xfs_buf  *ap = container_of(a, struct xfs_buf, b_list);
-       struct xfs_buf  *bp = container_of(b, struct xfs_buf, b_list);
-       xfs_daddr_t             diff;
-
-       diff = ap->b_bn - bp->b_bn;
-       if (diff < 0)
-               return -1;
-       if (diff > 0)
-               return 1;
-       return 0;
-}
-
-STATIC int
-xfsbufd(
-       void            *data)
-{
-       xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
-
-       current->flags |= PF_MEMALLOC;
-
-       set_freezable();
-
-       do {
-               long    age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
-               long    tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
-               struct list_head tmp;
-               struct blk_plug plug;
-
-               if (unlikely(freezing(current))) {
-                       set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-                       refrigerator();
-               } else {
-                       clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-               }
-
-               /* sleep for a long time if there is nothing to do. */
-               if (list_empty(&target->bt_delwrite_queue))
-                       tout = MAX_SCHEDULE_TIMEOUT;
-               schedule_timeout_interruptible(tout);
-
-               xfs_buf_delwri_split(target, &tmp, age);
-               list_sort(NULL, &tmp, xfs_buf_cmp);
-
-               blk_start_plug(&plug);
-               while (!list_empty(&tmp)) {
-                       struct xfs_buf *bp;
-                       bp = list_first_entry(&tmp, struct xfs_buf, b_list);
-                       list_del_init(&bp->b_list);
-                       xfs_bdstrat_cb(bp);
-               }
-               blk_finish_plug(&plug);
-       } while (!kthread_should_stop());
-
-       return 0;
-}
-
-/*
- *     Go through all incore buffers, and release buffers if they belong to
- *     the given device. This is used in filesystem error handling to
- *     preserve the consistency of its metadata.
- */
-int
-xfs_flush_buftarg(
-       xfs_buftarg_t   *target,
-       int             wait)
-{
-       xfs_buf_t       *bp;
-       int             pincount = 0;
-       LIST_HEAD(tmp_list);
-       LIST_HEAD(wait_list);
-       struct blk_plug plug;
-
-       xfs_buf_runall_queues(xfsconvertd_workqueue);
-       xfs_buf_runall_queues(xfsdatad_workqueue);
-       xfs_buf_runall_queues(xfslogd_workqueue);
-
-       set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-       pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
-
-       /*
-        * Dropped the delayed write list lock, now walk the temporary list.
-        * All I/O is issued async and then if we need to wait for completion
-        * we do that after issuing all the IO.
-        */
-       list_sort(NULL, &tmp_list, xfs_buf_cmp);
-
-       blk_start_plug(&plug);
-       while (!list_empty(&tmp_list)) {
-               bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
-               ASSERT(target == bp->b_target);
-               list_del_init(&bp->b_list);
-               if (wait) {
-                       bp->b_flags &= ~XBF_ASYNC;
-                       list_add(&bp->b_list, &wait_list);
-               }
-               xfs_bdstrat_cb(bp);
-       }
-       blk_finish_plug(&plug);
-
-       if (wait) {
-               /* Wait for IO to complete. */
-               while (!list_empty(&wait_list)) {
-                       bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
-
-                       list_del_init(&bp->b_list);
-                       xfs_buf_iowait(bp);
-                       xfs_buf_relse(bp);
-               }
-       }
-
-       return pincount;
-}
-
-int __init
-xfs_buf_init(void)
-{
-       xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
-                                               KM_ZONE_HWALIGN, NULL);
-       if (!xfs_buf_zone)
-               goto out;
-
-       xfslogd_workqueue = alloc_workqueue("xfslogd",
-                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
-       if (!xfslogd_workqueue)
-               goto out_free_buf_zone;
-
-       xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
-       if (!xfsdatad_workqueue)
-               goto out_destroy_xfslogd_workqueue;
-
-       xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
-                                               WQ_MEM_RECLAIM, 1);
-       if (!xfsconvertd_workqueue)
-               goto out_destroy_xfsdatad_workqueue;
-
-       return 0;
-
- out_destroy_xfsdatad_workqueue:
-       destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
-       destroy_workqueue(xfslogd_workqueue);
- out_free_buf_zone:
-       kmem_zone_destroy(xfs_buf_zone);
- out:
-       return -ENOMEM;
-}
-
-void
-xfs_buf_terminate(void)
-{
-       destroy_workqueue(xfsconvertd_workqueue);
-       destroy_workqueue(xfsdatad_workqueue);
-       destroy_workqueue(xfslogd_workqueue);
-       kmem_zone_destroy(xfs_buf_zone);
-}
-
-#ifdef CONFIG_KDB_MODULES
-struct list_head *
-xfs_get_buftarg_list(void)
-{
-       return &xfs_buftarg_list;
-}
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
deleted file mode 100644 (file)
index 620972b..0000000
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_BUF_H__
-#define __XFS_BUF_H__
-
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/uio.h>
-
-/*
- *     Base types
- */
-
-#define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
-
-#define xfs_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
-#define xfs_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum {
-       XBRW_READ = 1,                  /* transfer into target memory */
-       XBRW_WRITE = 2,                 /* transfer from target memory */
-       XBRW_ZERO = 3,                  /* Zero target memory */
-} xfs_buf_rw_t;
-
-#define XBF_READ       (1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE      (1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
-#define XBF_MAPPED     (1 << 3) /* buffer mapped (b_addr valid) */
-#define XBF_ASYNC      (1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE       (1 << 5) /* all pages in the buffer uptodate */
-#define XBF_DELWRI     (1 << 6) /* buffer has dirty pages */
-#define XBF_STALE      (1 << 7) /* buffer has been staled, do not find it */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO     (1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA                (1 << 11)/* force cache write through mode */
-#define XBF_FLUSH      (1 << 12)/* flush the disk cache before a write */
-
-/* flags used only as arguments to access routines */
-#define XBF_LOCK       (1 << 15)/* lock requested */
-#define XBF_TRYLOCK    (1 << 16)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
-
-/* flags used only internally */
-#define _XBF_PAGES     (1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM      (1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q  (1 << 22)/* buffer on delwri queue */
-
-typedef unsigned int xfs_buf_flags_t;
-
-#define XFS_BUF_FLAGS \
-       { XBF_READ,             "READ" }, \
-       { XBF_WRITE,            "WRITE" }, \
-       { XBF_READ_AHEAD,       "READ_AHEAD" }, \
-       { XBF_MAPPED,           "MAPPED" }, \
-       { XBF_ASYNC,            "ASYNC" }, \
-       { XBF_DONE,             "DONE" }, \
-       { XBF_DELWRI,           "DELWRI" }, \
-       { XBF_STALE,            "STALE" }, \
-       { XBF_SYNCIO,           "SYNCIO" }, \
-       { XBF_FUA,              "FUA" }, \
-       { XBF_FLUSH,            "FLUSH" }, \
-       { XBF_LOCK,             "LOCK" },       /* should never be set */\
-       { XBF_TRYLOCK,          "TRYLOCK" },    /* ditto */\
-       { XBF_DONT_BLOCK,       "DONT_BLOCK" }, /* ditto */\
-       { _XBF_PAGES,           "PAGES" }, \
-       { _XBF_KMEM,            "KMEM" }, \
-       { _XBF_DELWRI_Q,        "DELWRI_Q" }
-
-typedef enum {
-       XBT_FORCE_SLEEP = 0,
-       XBT_FORCE_FLUSH = 1,
-} xfs_buftarg_flags_t;
-
-typedef struct xfs_buftarg {
-       dev_t                   bt_dev;
-       struct block_device     *bt_bdev;
-       struct backing_dev_info *bt_bdi;
-       struct xfs_mount        *bt_mount;
-       unsigned int            bt_bsize;
-       unsigned int            bt_sshift;
-       size_t                  bt_smask;
-
-       /* per device delwri queue */
-       struct task_struct      *bt_task;
-       struct list_head        bt_delwrite_queue;
-       spinlock_t              bt_delwrite_lock;
-       unsigned long           bt_flags;
-
-       /* LRU control structures */
-       struct shrinker         bt_shrinker;
-       struct list_head        bt_lru;
-       spinlock_t              bt_lru_lock;
-       unsigned int            bt_lru_nr;
-} xfs_buftarg_t;
-
-struct xfs_buf;
-typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
-
-#define XB_PAGES       2
-
-typedef struct xfs_buf {
-       /*
-        * first cacheline holds all the fields needed for an uncontended cache
-        * hit to be fully processed. The semaphore straddles the cacheline
-        * boundary, but the counter and lock sits on the first cacheline,
-        * which is the only bit that is touched if we hit the semaphore
-        * fast-path on locking.
-        */
-       struct rb_node          b_rbnode;       /* rbtree node */
-       xfs_off_t               b_file_offset;  /* offset in file */
-       size_t                  b_buffer_length;/* size of buffer in bytes */
-       atomic_t                b_hold;         /* reference count */
-       atomic_t                b_lru_ref;      /* lru reclaim ref count */
-       xfs_buf_flags_t         b_flags;        /* status flags */
-       struct semaphore        b_sema;         /* semaphore for lockables */
-
-       struct list_head        b_lru;          /* lru list */
-       wait_queue_head_t       b_waiters;      /* unpin waiters */
-       struct list_head        b_list;
-       struct xfs_perag        *b_pag;         /* contains rbtree root */
-       xfs_buftarg_t           *b_target;      /* buffer target (device) */
-       xfs_daddr_t             b_bn;           /* block number for I/O */
-       size_t                  b_count_desired;/* desired transfer size */
-       void                    *b_addr;        /* virtual address of buffer */
-       struct work_struct      b_iodone_work;
-       xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
-       struct completion       b_iowait;       /* queue for I/O waiters */
-       void                    *b_fspriv;
-       struct xfs_trans        *b_transp;
-       struct page             **b_pages;      /* array of page pointers */
-       struct page             *b_page_array[XB_PAGES]; /* inline pages */
-       unsigned long           b_queuetime;    /* time buffer was queued */
-       atomic_t                b_pin_count;    /* pin count */
-       atomic_t                b_io_remaining; /* #outstanding I/O requests */
-       unsigned int            b_page_count;   /* size of page array */
-       unsigned int            b_offset;       /* page offset in first page */
-       unsigned short          b_error;        /* error code on I/O */
-#ifdef XFS_BUF_LOCK_TRACKING
-       int                     b_last_holder;
-#endif
-} xfs_buf_t;
-
-
-/* Finding and Reading Buffers */
-extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t, xfs_buf_t *);
-#define xfs_incore(buftarg,blkno,len,lockit) \
-       _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-
-extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
-extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
-
-extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
-extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
-extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
-extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
-struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
-                               struct xfs_buftarg *target,
-                               xfs_daddr_t daddr, size_t length, int flags);
-
-/* Releasing Buffers */
-extern void xfs_buf_free(xfs_buf_t *);
-extern void xfs_buf_rele(xfs_buf_t *);
-
-/* Locking and Unlocking Buffers */
-extern int xfs_buf_trylock(xfs_buf_t *);
-extern void xfs_buf_lock(xfs_buf_t *);
-extern void xfs_buf_unlock(xfs_buf_t *);
-#define xfs_buf_islocked(bp) \
-       ((bp)->b_sema.count <= 0)
-
-/* Buffer Read and Write Routines */
-extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
-extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
-
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
-
-extern void xfs_buf_ioend(xfs_buf_t *, int);
-extern void xfs_buf_ioerror(xfs_buf_t *, int);
-extern int xfs_buf_iorequest(xfs_buf_t *);
-extern int xfs_buf_iowait(xfs_buf_t *);
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
-                               xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
-           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-static inline int xfs_buf_geterror(xfs_buf_t *bp)
-{
-       return bp ? bp->b_error : ENOMEM;
-}
-
-/* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-
-/* Delayed Write Buffer Routines */
-extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
-extern void xfs_buf_delwri_promote(xfs_buf_t *);
-
-/* Buffer Daemon Setup Routines */
-extern int xfs_buf_init(void);
-extern void xfs_buf_terminate(void);
-
-static inline const char *
-xfs_buf_target_name(struct xfs_buftarg *target)
-{
-       static char __b[BDEVNAME_SIZE];
-
-       return bdevname(target->bt_bdev, __b);
-}
-
-
-#define XFS_BUF_ZEROFLAGS(bp) \
-       ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
-                           XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
-
-void xfs_buf_stale(struct xfs_buf *bp);
-#define XFS_BUF_STALE(bp)      xfs_buf_stale(bp);
-#define XFS_BUF_UNSTALE(bp)    ((bp)->b_flags &= ~XBF_STALE)
-#define XFS_BUF_ISSTALE(bp)    ((bp)->b_flags & XBF_STALE)
-#define XFS_BUF_SUPER_STALE(bp)        do {                            \
-                                       XFS_BUF_STALE(bp);      \
-                                       xfs_buf_delwri_dequeue(bp);     \
-                                       XFS_BUF_DONE(bp);       \
-                               } while (0)
-
-#define XFS_BUF_DELAYWRITE(bp)         ((bp)->b_flags |= XBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(bp)       xfs_buf_delwri_dequeue(bp)
-#define XFS_BUF_ISDELAYWRITE(bp)       ((bp)->b_flags & XBF_DELWRI)
-
-#define XFS_BUF_DONE(bp)       ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp)     ((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp)     ((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_ASYNC(bp)      ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp)    ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp)    ((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_READ(bp)       ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp)     ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp)     ((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp)      ((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp)    ((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp)    ((bp)->b_flags & XBF_WRITE)
-
-#define XFS_BUF_ADDR(bp)               ((bp)->b_bn)
-#define XFS_BUF_SET_ADDR(bp, bno)      ((bp)->b_bn = (xfs_daddr_t)(bno))
-#define XFS_BUF_OFFSET(bp)             ((bp)->b_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off)    ((bp)->b_file_offset = (off))
-#define XFS_BUF_COUNT(bp)              ((bp)->b_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt)     ((bp)->b_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp)               ((bp)->b_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt)      ((bp)->b_buffer_length = (cnt))
-
-static inline void
-xfs_buf_set_ref(
-       struct xfs_buf  *bp,
-       int             lru_ref)
-{
-       atomic_set(&bp->b_lru_ref, lru_ref);
-}
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)   xfs_buf_set_ref(bp, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)            do { } while (0)
-
-static inline int xfs_buf_ispinned(struct xfs_buf *bp)
-{
-       return atomic_read(&bp->b_pin_count);
-}
-
-#define XFS_BUF_FINISH_IOWAIT(bp)      complete(&bp->b_iowait);
-
-static inline void xfs_buf_relse(xfs_buf_t *bp)
-{
-       xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-}
-
-/*
- *     Handling of buftargs.
- */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
-                       struct block_device *, int, const char *);
-extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
-extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
-extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-
-#ifdef CONFIG_KDB_MODULES
-extern struct list_head *xfs_get_buftarg_list(void);
-#endif
-
-#define xfs_getsize_buftarg(buftarg)   block_size((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg)  bdev_read_only((buftarg)->bt_bdev)
-
-#define xfs_binval(buftarg)            xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg)            xfs_flush_buftarg(buftarg, 1)
-
-#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
deleted file mode 100644 (file)
index 244e797..0000000
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (C) 2010 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_discard.h"
-#include "xfs_trace.h"
-
-STATIC int
-xfs_trim_extents(
-       struct xfs_mount        *mp,
-       xfs_agnumber_t          agno,
-       xfs_fsblock_t           start,
-       xfs_fsblock_t           len,
-       xfs_fsblock_t           minlen,
-       __uint64_t              *blocks_trimmed)
-{
-       struct block_device     *bdev = mp->m_ddev_targp->bt_bdev;
-       struct xfs_btree_cur    *cur;
-       struct xfs_buf          *agbp;
-       struct xfs_perag        *pag;
-       int                     error;
-       int                     i;
-
-       pag = xfs_perag_get(mp, agno);
-
-       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
-       if (error || !agbp)
-               goto out_put_perag;
-
-       cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
-
-       /*
-        * Force out the log.  This means any transactions that might have freed
-        * space before we took the AGF buffer lock are now on disk, and the
-        * volatile disk cache is flushed.
-        */
-       xfs_log_force(mp, XFS_LOG_SYNC);
-
-       /*
-        * Look up the longest btree in the AGF and start with it.
-        */
-       error = xfs_alloc_lookup_le(cur, 0,
-                                   XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
-       if (error)
-               goto out_del_cursor;
-
-       /*
-        * Loop until we are done with all extents that are large
-        * enough to be worth discarding.
-        */
-       while (i) {
-               xfs_agblock_t fbno;
-               xfs_extlen_t flen;
-
-               error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
-               if (error)
-                       goto out_del_cursor;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
-               ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
-
-               /*
-                * Too small?  Give up.
-                */
-               if (flen < minlen) {
-                       trace_xfs_discard_toosmall(mp, agno, fbno, flen);
-                       goto out_del_cursor;
-               }
-
-               /*
-                * If the extent is entirely outside of the range we are
-                * supposed to discard skip it.  Do not bother to trim
-                * down partially overlapping ranges for now.
-                */
-               if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
-                   XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
-                       trace_xfs_discard_exclude(mp, agno, fbno, flen);
-                       goto next_extent;
-               }
-
-               /*
-                * If any blocks in the range are still busy, skip the
-                * discard and try again the next time.
-                */
-               if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
-                       trace_xfs_discard_busy(mp, agno, fbno, flen);
-                       goto next_extent;
-               }
-
-               trace_xfs_discard_extent(mp, agno, fbno, flen);
-               error = -blkdev_issue_discard(bdev,
-                               XFS_AGB_TO_DADDR(mp, agno, fbno),
-                               XFS_FSB_TO_BB(mp, flen),
-                               GFP_NOFS, 0);
-               if (error)
-                       goto out_del_cursor;
-               *blocks_trimmed += flen;
-
-next_extent:
-               error = xfs_btree_decrement(cur, 0, &i);
-               if (error)
-                       goto out_del_cursor;
-       }
-
-out_del_cursor:
-       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-       xfs_buf_relse(agbp);
-out_put_perag:
-       xfs_perag_put(pag);
-       return error;
-}
-
-int
-xfs_ioc_trim(
-       struct xfs_mount                *mp,
-       struct fstrim_range __user      *urange)
-{
-       struct request_queue    *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
-       unsigned int            granularity = q->limits.discard_granularity;
-       struct fstrim_range     range;
-       xfs_fsblock_t           start, len, minlen;
-       xfs_agnumber_t          start_agno, end_agno, agno;
-       __uint64_t              blocks_trimmed = 0;
-       int                     error, last_error = 0;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (!blk_queue_discard(q))
-               return -XFS_ERROR(EOPNOTSUPP);
-       if (copy_from_user(&range, urange, sizeof(range)))
-               return -XFS_ERROR(EFAULT);
-
-       /*
-        * Truncating down the len isn't actually quite correct, but using
-        * XFS_B_TO_FSB would mean we trivially get overflows for values
-        * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
-        * used by the fstrim application.  In the end it really doesn't
-        * matter as trimming blocks is an advisory interface.
-        */
-       start = XFS_B_TO_FSBT(mp, range.start);
-       len = XFS_B_TO_FSBT(mp, range.len);
-       minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
-
-       start_agno = XFS_FSB_TO_AGNO(mp, start);
-       if (start_agno >= mp->m_sb.sb_agcount)
-               return -XFS_ERROR(EINVAL);
-
-       end_agno = XFS_FSB_TO_AGNO(mp, start + len);
-       if (end_agno >= mp->m_sb.sb_agcount)
-               end_agno = mp->m_sb.sb_agcount - 1;
-
-       for (agno = start_agno; agno <= end_agno; agno++) {
-               error = -xfs_trim_extents(mp, agno, start, len, minlen,
-                                         &blocks_trimmed);
-               if (error)
-                       last_error = error;
-       }
-
-       if (last_error)
-               return last_error;
-
-       range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
-       if (copy_to_user(urange, &range, sizeof(range)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-int
-xfs_discard_extents(
-       struct xfs_mount        *mp,
-       struct list_head        *list)
-{
-       struct xfs_busy_extent  *busyp;
-       int                     error = 0;
-
-       list_for_each_entry(busyp, list, list) {
-               trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
-                                        busyp->length);
-
-               error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
-                               XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
-                               XFS_FSB_TO_BB(mp, busyp->length),
-                               GFP_NOFS, 0);
-               if (error && error != EOPNOTSUPP) {
-                       xfs_info(mp,
-        "discard failed for extent [0x%llu,%u], error %d",
-                                (unsigned long long)busyp->bno,
-                                busyp->length,
-                                error);
-                       return error;
-               }
-       }
-
-       return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h
deleted file mode 100644 (file)
index 344879a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef XFS_DISCARD_H
-#define XFS_DISCARD_H 1
-
-struct fstrim_range;
-struct list_head;
-
-extern int     xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
-extern int     xfs_discard_extents(struct xfs_mount *, struct list_head *);
-
-#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
deleted file mode 100644 (file)
index 75e5d32..0000000
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_mount.h"
-#include "xfs_export.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-/*
- * Note that we only accept fileids which are long enough rather than allow
- * the parent generation number to default to zero.  XFS considers zero a
- * valid generation number not an invalid/wildcard value.
- */
-static int xfs_fileid_length(int fileid_type)
-{
-       switch (fileid_type) {
-       case FILEID_INO32_GEN:
-               return 2;
-       case FILEID_INO32_GEN_PARENT:
-               return 4;
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               return 3;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               return 6;
-       }
-       return 255; /* invalid */
-}
-
-STATIC int
-xfs_fs_encode_fh(
-       struct dentry           *dentry,
-       __u32                   *fh,
-       int                     *max_len,
-       int                     connectable)
-{
-       struct fid              *fid = (struct fid *)fh;
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fh;
-       struct inode            *inode = dentry->d_inode;
-       int                     fileid_type;
-       int                     len;
-
-       /* Directories don't need their parent encoded, they have ".." */
-       if (S_ISDIR(inode->i_mode) || !connectable)
-               fileid_type = FILEID_INO32_GEN;
-       else
-               fileid_type = FILEID_INO32_GEN_PARENT;
-
-       /*
-        * If the the filesystem may contain 64bit inode numbers, we need
-        * to use larger file handles that can represent them.
-        *
-        * While we only allocate inodes that do not fit into 32 bits any
-        * large enough filesystem may contain them, thus the slightly
-        * confusing looking conditional below.
-        */
-       if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
-           (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
-               fileid_type |= XFS_FILEID_TYPE_64FLAG;
-
-       /*
-        * Only encode if there is enough space given.  In practice
-        * this means we can't export a filesystem with 64bit inodes
-        * over NFSv2 with the subtree_check export option; the other
-        * seven combinations work.  The real answer is "don't use v2".
-        */
-       len = xfs_fileid_length(fileid_type);
-       if (*max_len < len) {
-               *max_len = len;
-               return 255;
-       }
-       *max_len = len;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-               spin_lock(&dentry->d_lock);
-               fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
-               fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
-               spin_unlock(&dentry->d_lock);
-               /*FALLTHRU*/
-       case FILEID_INO32_GEN:
-               fid->i32.ino = inode->i_ino;
-               fid->i32.gen = inode->i_generation;
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               spin_lock(&dentry->d_lock);
-               fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
-               fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
-               spin_unlock(&dentry->d_lock);
-               /*FALLTHRU*/
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               fid64->ino = inode->i_ino;
-               fid64->gen = inode->i_generation;
-               break;
-       }
-
-       return fileid_type;
-}
-
-STATIC struct inode *
-xfs_nfs_get_inode(
-       struct super_block      *sb,
-       u64                     ino,
-       u32                     generation)
- {
-       xfs_mount_t             *mp = XFS_M(sb);
-       xfs_inode_t             *ip;
-       int                     error;
-
-       /*
-        * NFS can sometimes send requests for ino 0.  Fail them gracefully.
-        */
-       if (ino == 0)
-               return ERR_PTR(-ESTALE);
-
-       /*
-        * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
-        * fine and not an indication of a corrupted filesystem as clients can
-        * send invalid file handles and we have to handle it gracefully..
-        */
-       error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
-       if (error) {
-               /*
-                * EINVAL means the inode cluster doesn't exist anymore.
-                * This implies the filehandle is stale, so we should
-                * translate it here.
-                * We don't use ESTALE directly down the chain to not
-                * confuse applications using bulkstat that expect EINVAL.
-                */
-               if (error == EINVAL || error == ENOENT)
-                       error = ESTALE;
-               return ERR_PTR(-error);
-       }
-
-       if (ip->i_d.di_gen != generation) {
-               IRELE(ip);
-               return ERR_PTR(-ESTALE);
-       }
-
-       return VFS_I(ip);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-                int fh_len, int fileid_type)
-{
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
-       struct inode            *inode = NULL;
-
-       if (fh_len < xfs_fileid_length(fileid_type))
-               return NULL;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-       case FILEID_INO32_GEN:
-               inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
-               break;
-       }
-
-       return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
-                int fh_len, int fileid_type)
-{
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
-       struct inode            *inode = NULL;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-               inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
-                                             fid->i32.parent_gen);
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
-                                             fid64->parent_gen);
-               break;
-       }
-
-       return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_get_parent(
-       struct dentry           *child)
-{
-       int                     error;
-       struct xfs_inode        *cip;
-
-       error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
-       if (unlikely(error))
-               return ERR_PTR(-error);
-
-       return d_obtain_alias(VFS_I(cip));
-}
-
-STATIC int
-xfs_fs_nfs_commit_metadata(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       int                     error = 0;
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       if (xfs_ipincount(ip)) {
-               error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
-                               XFS_LOG_SYNC, NULL);
-       }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       return error;
-}
-
-const struct export_operations xfs_export_operations = {
-       .encode_fh              = xfs_fs_encode_fh,
-       .fh_to_dentry           = xfs_fs_fh_to_dentry,
-       .fh_to_parent           = xfs_fs_fh_to_parent,
-       .get_parent             = xfs_fs_get_parent,
-       .commit_metadata        = xfs_fs_nfs_commit_metadata,
-};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
deleted file mode 100644 (file)
index 3272b6a..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_EXPORT_H__
-#define __XFS_EXPORT_H__
-
-/*
- * Common defines for code related to exporting XFS filesystems over NFS.
- *
- * The NFS fileid goes out on the wire as an array of
- * 32bit unsigned ints in host order.  There are 5 possible
- * formats.
- *
- * (1) fileid_type=0x00
- *     (no fileid data; handled by the generic code)
- *
- * (2) fileid_type=0x01
- *     inode-num
- *     generation
- *
- * (3) fileid_type=0x02
- *     inode-num
- *     generation
- *     parent-inode-num
- *     parent-generation
- *
- * (4) fileid_type=0x81
- *     inode-num-lo32
- *     inode-num-hi32
- *     generation
- *
- * (5) fileid_type=0x82
- *     inode-num-lo32
- *     inode-num-hi32
- *     generation
- *     parent-inode-num-lo32
- *     parent-inode-num-hi32
- *     parent-generation
- *
- * Note, the NFS filehandle also includes an fsid portion which
- * may have an inode number in it.  That number is hardcoded to
- * 32bits and there is no way for XFS to intercept it.  In
- * practice this means when exporting an XFS filesystem with 64bit
- * inodes you should either export the mountpoint (rather than
- * a subdirectory) or use the "fsid" export option.
- */
-
-struct xfs_fid64 {
-       u64 ino;
-       u32 gen;
-       u64 parent_ino;
-       u32 parent_gen;
-} __attribute__((packed));
-
-/* This flag goes on the wire.  Don't play with it. */
-#define XFS_FILEID_TYPE_64FLAG 0x80    /* NFS fileid has 64bit inodes */
-
-#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
deleted file mode 100644 (file)
index 7f7b424..0000000
+++ /dev/null
@@ -1,1096 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_vnodeops.h"
-#include "xfs_da_btree.h"
-#include "xfs_ioctl.h"
-#include "xfs_trace.h"
-
-#include <linux/dcache.h>
-#include <linux/falloc.h>
-
-static const struct vm_operations_struct xfs_file_vm_ops;
-
-/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_lock(&VFS_I(ip)->i_mutex);
-       xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_iunlock(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-static inline void
-xfs_rw_ilock_demote(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_ilock_demote(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-/*
- *     xfs_iozero
- *
- *     xfs_iozero clears the specified range of buffer supplied,
- *     and marks all the affected blocks as valid and modified.  If
- *     an affected block is not allocated, it will be allocated.  If
- *     an affected block is not completely overwritten, and is not
- *     valid before the operation, it will be read from disk before
- *     being partially zeroed.
- */
-STATIC int
-xfs_iozero(
-       struct xfs_inode        *ip,    /* inode                        */
-       loff_t                  pos,    /* offset in file               */
-       size_t                  count)  /* size of data to zero         */
-{
-       struct page             *page;
-       struct address_space    *mapping;
-       int                     status;
-
-       mapping = VFS_I(ip)->i_mapping;
-       do {
-               unsigned offset, bytes;
-               void *fsdata;
-
-               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-               bytes = PAGE_CACHE_SIZE - offset;
-               if (bytes > count)
-                       bytes = count;
-
-               status = pagecache_write_begin(NULL, mapping, pos, bytes,
-                                       AOP_FLAG_UNINTERRUPTIBLE,
-                                       &page, &fsdata);
-               if (status)
-                       break;
-
-               zero_user(page, offset, bytes);
-
-               status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
-                                       page, fsdata);
-               WARN_ON(status <= 0); /* can't return less than zero! */
-               pos += bytes;
-               count -= bytes;
-               status = 0;
-       } while (count);
-
-       return (-status);
-}
-
-STATIC int
-xfs_file_fsync(
-       struct file             *file,
-       loff_t                  start,
-       loff_t                  end,
-       int                     datasync)
-{
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       int                     error = 0;
-       int                     log_flushed = 0;
-
-       trace_xfs_file_fsync(ip);
-
-       error = filemap_write_and_wait_range(inode->i_mapping, start, end);
-       if (error)
-               return error;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       xfs_ioend_wait(ip);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-       if (mp->m_flags & XFS_MOUNT_BARRIER) {
-               /*
-                * If we have an RT and/or log subvolume we need to make sure
-                * to flush the write cache the device used for file data
-                * first.  This is to ensure newly written file data make
-                * it to disk before logging the new inode size in case of
-                * an extending write.
-                */
-               if (XFS_IS_REALTIME_INODE(ip))
-                       xfs_blkdev_issue_flush(mp->m_rtdev_targp);
-               else if (mp->m_logdev_targp != mp->m_ddev_targp)
-                       xfs_blkdev_issue_flush(mp->m_ddev_targp);
-       }
-
-       /*
-        * We always need to make sure that the required inode state is safe on
-        * disk.  The inode might be clean but we still might need to force the
-        * log because of committed transactions that haven't hit the disk yet.
-        * Likewise, there could be unflushed non-transactional changes to the
-        * inode core that have to go to disk and this requires us to issue
-        * a synchronous transaction to capture these changes correctly.
-        *
-        * This code relies on the assumption that if the i_update_core field
-        * of the inode is clear and the inode is unpinned then it is clean
-        * and no action is required.
-        */
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-       /*
-        * First check if the VFS inode is marked dirty.  All the dirtying
-        * of non-transactional updates no goes through mark_inode_dirty*,
-        * which allows us to distinguish beteeen pure timestamp updates
-        * and i_size updates which need to be caught for fdatasync.
-        * After that also theck for the dirty state in the XFS inode, which
-        * might gets cleared when the inode gets written out via the AIL
-        * or xfs_iflush_cluster.
-        */
-       if (((inode->i_state & I_DIRTY_DATASYNC) ||
-           ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
-           ip->i_update_core) {
-               /*
-                * Kick off a transaction to log the inode core to get the
-                * updates.  The sync transaction will also force the log.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, 0,
-                               XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp, 0);
-                       return -error;
-               }
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-               /*
-                * Note - it's possible that we might have pushed ourselves out
-                * of the way during trans_reserve which would flush the inode.
-                * But there's no guarantee that the inode buffer has actually
-                * gone out yet (it's delwri).  Plus the buffer could be pinned
-                * anyway if it's part of an inode in another recent
-                * transaction.  So we play it safe and fire off the
-                * transaction anyway.
-                */
-               xfs_trans_ijoin(tp, ip);
-               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-               xfs_trans_set_sync(tp);
-               error = _xfs_trans_commit(tp, 0, &log_flushed);
-
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       } else {
-               /*
-                * Timestamps/size haven't changed since last inode flush or
-                * inode transaction commit.  That means either nothing got
-                * written or a transaction committed which caught the updates.
-                * If the latter happened and the transaction hasn't hit the
-                * disk yet, the inode will be still be pinned.  If it is,
-                * force the log.
-                */
-               if (xfs_ipincount(ip)) {
-                       error = _xfs_log_force_lsn(mp,
-                                       ip->i_itemp->ili_last_lsn,
-                                       XFS_LOG_SYNC, &log_flushed);
-               }
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       }
-
-       /*
-        * If we only have a single device, and the log force about was
-        * a no-op we might have to flush the data device cache here.
-        * This can only happen for fdatasync/O_DSYNC if we were overwriting
-        * an already allocated file and thus do not have any metadata to
-        * commit.
-        */
-       if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
-           mp->m_logdev_targp == mp->m_ddev_targp &&
-           !XFS_IS_REALTIME_INODE(ip) &&
-           !log_flushed)
-               xfs_blkdev_issue_flush(mp->m_ddev_targp);
-
-       return -error;
-}
-
-STATIC ssize_t
-xfs_file_aio_read(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos)
-{
-       struct file             *file = iocb->ki_filp;
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       size_t                  size = 0;
-       ssize_t                 ret = 0;
-       int                     ioflags = 0;
-       xfs_fsize_t             n;
-       unsigned long           seg;
-
-       XFS_STATS_INC(xs_read_calls);
-
-       BUG_ON(iocb->ki_pos != pos);
-
-       if (unlikely(file->f_flags & O_DIRECT))
-               ioflags |= IO_ISDIRECT;
-       if (file->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       /* START copy & waste from filemap.c */
-       for (seg = 0; seg < nr_segs; seg++) {
-               const struct iovec *iv = &iovp[seg];
-
-               /*
-                * If any segment has a negative length, or the cumulative
-                * length ever wraps negative then return -EINVAL.
-                */
-               size += iv->iov_len;
-               if (unlikely((ssize_t)(size|iv->iov_len) < 0))
-                       return XFS_ERROR(-EINVAL);
-       }
-       /* END copy & waste from filemap.c */
-
-       if (unlikely(ioflags & IO_ISDIRECT)) {
-               xfs_buftarg_t   *target =
-                       XFS_IS_REALTIME_INODE(ip) ?
-                               mp->m_rtdev_targp : mp->m_ddev_targp;
-               if ((iocb->ki_pos & target->bt_smask) ||
-                   (size & target->bt_smask)) {
-                       if (iocb->ki_pos == ip->i_size)
-                               return 0;
-                       return -XFS_ERROR(EINVAL);
-               }
-       }
-
-       n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
-       if (n <= 0 || size == 0)
-               return 0;
-
-       if (n < size)
-               size = n;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       if (unlikely(ioflags & IO_ISDIRECT)) {
-               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
-
-               if (inode->i_mapping->nrpages) {
-                       ret = -xfs_flushinval_pages(ip,
-                                       (iocb->ki_pos & PAGE_CACHE_MASK),
-                                       -1, FI_REMAPF_LOCKED);
-                       if (ret) {
-                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-                               return ret;
-                       }
-               }
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-       } else
-               xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
-       trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
-
-       ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_splice_read(
-       struct file             *infilp,
-       loff_t                  *ppos,
-       struct pipe_inode_info  *pipe,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
-       int                     ioflags = 0;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_read_calls);
-
-       if (infilp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
-       trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
-
-       ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
-STATIC void
-xfs_aio_write_isize_update(
-       struct inode    *inode,
-       loff_t          *ppos,
-       ssize_t         bytes_written)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             isize = i_size_read(inode);
-
-       if (bytes_written > 0)
-               XFS_STATS_ADD(xs_write_bytes, bytes_written);
-
-       if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
-                                       *ppos > isize))
-               *ppos = isize;
-
-       if (*ppos > ip->i_size) {
-               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
-               if (*ppos > ip->i_size)
-                       ip->i_size = *ppos;
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-/*
- * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
- * part of the I/O may have been written to disk before the error occurred.  In
- * this case the on-disk file size may have been adjusted beyond the in-memory
- * file size and now needs to be truncated back.
- */
-STATIC void
-xfs_aio_write_newsize_update(
-       struct xfs_inode        *ip)
-{
-       if (ip->i_new_size) {
-               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
-               ip->i_new_size = 0;
-               if (ip->i_d.di_size > ip->i_size)
-                       ip->i_d.di_size = ip->i_size;
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-       struct pipe_inode_info  *pipe,
-       struct file             *outfilp,
-       loff_t                  *ppos,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct inode            *inode = outfilp->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             new_size;
-       int                     ioflags = 0;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_write_calls);
-
-       if (outfilp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       new_size = *ppos + count;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (new_size > ip->i_size)
-               ip->i_new_size = new_size;
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-       ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-
-       xfs_aio_write_isize_update(inode, ppos, ret);
-       xfs_aio_write_newsize_update(ip);
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       return ret;
-}
-
-/*
- * This routine is called to handle zeroing any space in the last
- * block of the file that is beyond the EOF.  We do this since the
- * size is being increased without writing anything to that block
- * and we don't want anyone to read the garbage on the disk.
- */
-STATIC int                             /* error (positive) */
-xfs_zero_last_block(
-       xfs_inode_t     *ip,
-       xfs_fsize_t     offset,
-       xfs_fsize_t     isize)
-{
-       xfs_fileoff_t   last_fsb;
-       xfs_mount_t     *mp = ip->i_mount;
-       int             nimaps;
-       int             zero_offset;
-       int             zero_len;
-       int             error = 0;
-       xfs_bmbt_irec_t imap;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-       if (zero_offset == 0) {
-               /*
-                * There are no extra bytes in the last block on disk to
-                * zero, so return.
-                */
-               return 0;
-       }
-
-       last_fsb = XFS_B_TO_FSBT(mp, isize);
-       nimaps = 1;
-       error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
-                         &nimaps, NULL);
-       if (error) {
-               return error;
-       }
-       ASSERT(nimaps > 0);
-       /*
-        * If the block underlying isize is just a hole, then there
-        * is nothing to zero.
-        */
-       if (imap.br_startblock == HOLESTARTBLOCK) {
-               return 0;
-       }
-       /*
-        * Zero the part of the last block beyond the EOF, and write it
-        * out sync.  We need to drop the ilock while we do this so we
-        * don't deadlock when the buffer cache calls back to us.
-        */
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       zero_len = mp->m_sb.sb_blocksize - zero_offset;
-       if (isize + zero_len > offset)
-               zero_len = offset - isize;
-       error = xfs_iozero(ip, isize, zero_len);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       ASSERT(error >= 0);
-       return error;
-}
-
-/*
- * Zero any on disk space between the current EOF and the new,
- * larger EOF.  This handles the normal case of zeroing the remainder
- * of the last block in the file and the unusual case of zeroing blocks
- * out beyond the size of the file.  This second case only happens
- * with fixed size extents and when the system crashes before the inode
- * size was updated but after blocks were allocated.  If fill is set,
- * then any holes in the range are filled and zeroed.  If not, the holes
- * are left alone as holes.
- */
-
-int                                    /* error (positive) */
-xfs_zero_eof(
-       xfs_inode_t     *ip,
-       xfs_off_t       offset,         /* starting I/O offset */
-       xfs_fsize_t     isize)          /* current inode size */
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       xfs_fileoff_t   start_zero_fsb;
-       xfs_fileoff_t   end_zero_fsb;
-       xfs_fileoff_t   zero_count_fsb;
-       xfs_fileoff_t   last_fsb;
-       xfs_fileoff_t   zero_off;
-       xfs_fsize_t     zero_len;
-       int             nimaps;
-       int             error = 0;
-       xfs_bmbt_irec_t imap;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-       ASSERT(offset > isize);
-
-       /*
-        * First handle zeroing the block on which isize resides.
-        * We only zero a part of that block so it is handled specially.
-        */
-       error = xfs_zero_last_block(ip, offset, isize);
-       if (error) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-               return error;
-       }
-
-       /*
-        * Calculate the range between the new size and the old
-        * where blocks needing to be zeroed may exist.  To get the
-        * block where the last byte in the file currently resides,
-        * we need to subtract one from the size and truncate back
-        * to a block boundary.  We subtract 1 in case the size is
-        * exactly on a block boundary.
-        */
-       last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
-       start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
-       end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
-       ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
-       if (last_fsb == end_zero_fsb) {
-               /*
-                * The size was only incremented on its last block.
-                * We took care of that above, so just return.
-                */
-               return 0;
-       }
-
-       ASSERT(start_zero_fsb <= end_zero_fsb);
-       while (start_zero_fsb <= end_zero_fsb) {
-               nimaps = 1;
-               zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-               error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
-                                 0, NULL, 0, &imap, &nimaps, NULL);
-               if (error) {
-                       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-                       return error;
-               }
-               ASSERT(nimaps > 0);
-
-               if (imap.br_state == XFS_EXT_UNWRITTEN ||
-                   imap.br_startblock == HOLESTARTBLOCK) {
-                       /*
-                        * This loop handles initializing pages that were
-                        * partially initialized by the code below this
-                        * loop. It basically zeroes the part of the page
-                        * that sits on a hole and sets the page as P_HOLE
-                        * and calls remapf if it is a mapped file.
-                        */
-                       start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-                       ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-                       continue;
-               }
-
-               /*
-                * There are blocks we need to zero.
-                * Drop the inode lock while we're doing the I/O.
-                * We'll still have the iolock to protect us.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-               zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
-               zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
-               if ((zero_off + zero_len) > offset)
-                       zero_len = offset - zero_off;
-
-               error = xfs_iozero(ip, zero_off, zero_len);
-               if (error) {
-                       goto out_lock;
-               }
-
-               start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-               ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-       }
-
-       return 0;
-
-out_lock:
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       ASSERT(error >= 0);
-       return error;
-}
-
-/*
- * Common pre-write limit and setup checks.
- *
- * Returns with iolock held according to @iolock.
- */
-STATIC ssize_t
-xfs_file_aio_write_checks(
-       struct file             *file,
-       loff_t                  *pos,
-       size_t                  *count,
-       int                     *iolock)
-{
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             new_size;
-       int                     error = 0;
-
-       error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
-       if (error) {
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
-               *iolock = 0;
-               return error;
-       }
-
-       new_size = *pos + *count;
-       if (new_size > ip->i_size)
-               ip->i_new_size = new_size;
-
-       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
-               file_update_time(file);
-
-       /*
-        * If the offset is beyond the size of the file, we need to zero any
-        * blocks that fall between the existing EOF and the start of this
-        * write.
-        */
-       if (*pos > ip->i_size)
-               error = -xfs_zero_eof(ip, *pos, ip->i_size);
-
-       xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       if (error)
-               return error;
-
-       /*
-        * If we're writing the file then make sure to clear the setuid and
-        * setgid bits if the process is not being run by root.  This keeps
-        * people from modifying setuid and setgid binaries.
-        */
-       return file_remove_suid(file);
-
-}
-
-/*
- * xfs_file_dio_aio_write - handle direct IO writes
- *
- * Lock the inode appropriately to prepare for and issue a direct IO write.
- * By separating it from the buffered write path we remove all the tricky to
- * follow locking changes and looping.
- *
- * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
- * until we're sure the bytes at the new EOF have been zeroed and/or the cached
- * pages are flushed out.
- *
- * In most cases the direct IO writes will be done holding IOLOCK_SHARED
- * allowing them to be done in parallel with reads and other direct IO writes.
- * However, if the IO is not aligned to filesystem blocks, the direct IO layer
- * needs to do sub-block zeroing and that requires serialisation against other
- * direct IOs to the same block. In this case we need to serialise the
- * submission of the unaligned IOs so that we don't get racing block zeroing in
- * the dio layer.  To avoid the problem with aio, we also need to wait for
- * outstanding IOs to complete so that unwritten extent conversion is completed
- * before we try to map the overlapping block. This is currently implemented by
- * hitting it with a big hammer (i.e. xfs_ioend_wait()).
- *
- * Returns with locks held indicated by @iolock and errors indicated by
- * negative return values.
- */
-STATIC ssize_t
-xfs_file_dio_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos,
-       size_t                  ocount,
-       int                     *iolock)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       ssize_t                 ret = 0;
-       size_t                  count = ocount;
-       int                     unaligned_io = 0;
-       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
-                                       mp->m_rtdev_targp : mp->m_ddev_targp;
-
-       *iolock = 0;
-       if ((pos & target->bt_smask) || (count & target->bt_smask))
-               return -XFS_ERROR(EINVAL);
-
-       if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
-               unaligned_io = 1;
-
-       if (unaligned_io || mapping->nrpages || pos > ip->i_size)
-               *iolock = XFS_IOLOCK_EXCL;
-       else
-               *iolock = XFS_IOLOCK_SHARED;
-       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
-
-       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
-       if (ret)
-               return ret;
-
-       if (mapping->nrpages) {
-               WARN_ON(*iolock != XFS_IOLOCK_EXCL);
-               ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
-                                                       FI_REMAPF_LOCKED);
-               if (ret)
-                       return ret;
-       }
-
-       /*
-        * If we are doing unaligned IO, wait for all other IO to drain,
-        * otherwise demote the lock if we had to flush cached pages
-        */
-       if (unaligned_io)
-               xfs_ioend_wait(ip);
-       else if (*iolock == XFS_IOLOCK_EXCL) {
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-               *iolock = XFS_IOLOCK_SHARED;
-       }
-
-       trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_direct_write(iocb, iovp,
-                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
-
-       /* No fallback to buffered IO on errors for XFS. */
-       ASSERT(ret < 0 || ret == count);
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_buffered_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos,
-       size_t                  ocount,
-       int                     *iolock)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       ssize_t                 ret;
-       int                     enospc = 0;
-       size_t                  count = ocount;
-
-       *iolock = XFS_IOLOCK_EXCL;
-       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
-
-       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
-       if (ret)
-               return ret;
-
-       /* We can write back this queue in page reclaim */
-       current->backing_dev_info = mapping->backing_dev_info;
-
-write_retry:
-       trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_buffered_write(iocb, iovp, nr_segs,
-                       pos, &iocb->ki_pos, count, ret);
-       /*
-        * if we just got an ENOSPC, flush the inode now we aren't holding any
-        * page locks and retry *once*
-        */
-       if (ret == -ENOSPC && !enospc) {
-               ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
-               if (ret)
-                       return ret;
-               enospc = 1;
-               goto write_retry;
-       }
-       current->backing_dev_info = NULL;
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       ssize_t                 ret;
-       int                     iolock;
-       size_t                  ocount = 0;
-
-       XFS_STATS_INC(xs_write_calls);
-
-       BUG_ON(iocb->ki_pos != pos);
-
-       ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
-       if (ret)
-               return ret;
-
-       if (ocount == 0)
-               return 0;
-
-       xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       if (unlikely(file->f_flags & O_DIRECT))
-               ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
-                                               ocount, &iolock);
-       else
-               ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
-                                               ocount, &iolock);
-
-       xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
-
-       if (ret <= 0)
-               goto out_unlock;
-
-       /* Handle various SYNC-type writes */
-       if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
-               loff_t end = pos + ret - 1;
-               int error;
-
-               xfs_rw_iunlock(ip, iolock);
-               error = xfs_file_fsync(file, pos, end,
-                                     (file->f_flags & __O_SYNC) ? 0 : 1);
-               xfs_rw_ilock(ip, iolock);
-               if (error)
-                       ret = error;
-       }
-
-out_unlock:
-       xfs_aio_write_newsize_update(ip);
-       xfs_rw_iunlock(ip, iolock);
-       return ret;
-}
-
-STATIC long
-xfs_file_fallocate(
-       struct file     *file,
-       int             mode,
-       loff_t          offset,
-       loff_t          len)
-{
-       struct inode    *inode = file->f_path.dentry->d_inode;
-       long            error;
-       loff_t          new_size = 0;
-       xfs_flock64_t   bf;
-       xfs_inode_t     *ip = XFS_I(inode);
-       int             cmd = XFS_IOC_RESVSP;
-       int             attr_flags = XFS_ATTR_NOLOCK;
-
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-               return -EOPNOTSUPP;
-
-       bf.l_whence = 0;
-       bf.l_start = offset;
-       bf.l_len = len;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       if (mode & FALLOC_FL_PUNCH_HOLE)
-               cmd = XFS_IOC_UNRESVSP;
-
-       /* check the new inode size is valid before allocating */
-       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-           offset + len > i_size_read(inode)) {
-               new_size = offset + len;
-               error = inode_newsize_ok(inode, new_size);
-               if (error)
-                       goto out_unlock;
-       }
-
-       if (file->f_flags & O_DSYNC)
-               attr_flags |= XFS_ATTR_SYNC;
-
-       error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
-       if (error)
-               goto out_unlock;
-
-       /* Change file size if needed */
-       if (new_size) {
-               struct iattr iattr;
-
-               iattr.ia_valid = ATTR_SIZE;
-               iattr.ia_size = new_size;
-               error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
-       }
-
-out_unlock:
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       return error;
-}
-
-
-STATIC int
-xfs_file_open(
-       struct inode    *inode,
-       struct file     *file)
-{
-       if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
-               return -EFBIG;
-       if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
-               return -EIO;
-       return 0;
-}
-
-STATIC int
-xfs_dir_open(
-       struct inode    *inode,
-       struct file     *file)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       int             mode;
-       int             error;
-
-       error = xfs_file_open(inode, file);
-       if (error)
-               return error;
-
-       /*
-        * If there are any blocks, read-ahead block 0 as we're almost
-        * certain to have the next operation be a read there.
-        */
-       mode = xfs_ilock_map_shared(ip);
-       if (ip->i_d.di_nextents > 0)
-               xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
-       xfs_iunlock(ip, mode);
-       return 0;
-}
-
-STATIC int
-xfs_file_release(
-       struct inode    *inode,
-       struct file     *filp)
-{
-       return -xfs_release(XFS_I(inode));
-}
-
-STATIC int
-xfs_file_readdir(
-       struct file     *filp,
-       void            *dirent,
-       filldir_t       filldir)
-{
-       struct inode    *inode = filp->f_path.dentry->d_inode;
-       xfs_inode_t     *ip = XFS_I(inode);
-       int             error;
-       size_t          bufsize;
-
-       /*
-        * The Linux API doesn't pass down the total size of the buffer
-        * we read into down to the filesystem.  With the filldir concept
-        * it's not needed for correct information, but the XFS dir2 leaf
-        * code wants an estimate of the buffer size to calculate it's
-        * readahead window and size the buffers used for mapping to
-        * physical blocks.
-        *
-        * Try to give it an estimate that's good enough, maybe at some
-        * point we can change the ->readdir prototype to include the
-        * buffer size.  For now we use the current glibc buffer size.
-        */
-       bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
-
-       error = xfs_readdir(ip, dirent, bufsize,
-                               (xfs_off_t *)&filp->f_pos, filldir);
-       if (error)
-               return -error;
-       return 0;
-}
-
-STATIC int
-xfs_file_mmap(
-       struct file     *filp,
-       struct vm_area_struct *vma)
-{
-       vma->vm_ops = &xfs_file_vm_ops;
-       vma->vm_flags |= VM_CAN_NONLINEAR;
-
-       file_accessed(filp);
-       return 0;
-}
-
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-       struct vm_area_struct   *vma,
-       struct vm_fault         *vmf)
-{
-       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
-const struct file_operations xfs_file_operations = {
-       .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = xfs_file_aio_read,
-       .aio_write      = xfs_file_aio_write,
-       .splice_read    = xfs_file_splice_read,
-       .splice_write   = xfs_file_splice_write,
-       .unlocked_ioctl = xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
-       .compat_ioctl   = xfs_file_compat_ioctl,
-#endif
-       .mmap           = xfs_file_mmap,
-       .open           = xfs_file_open,
-       .release        = xfs_file_release,
-       .fsync          = xfs_file_fsync,
-       .fallocate      = xfs_file_fallocate,
-};
-
-const struct file_operations xfs_dir_file_operations = {
-       .open           = xfs_dir_open,
-       .read           = generic_read_dir,
-       .readdir        = xfs_file_readdir,
-       .llseek         = generic_file_llseek,
-       .unlocked_ioctl = xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
-       .compat_ioctl   = xfs_file_compat_ioctl,
-#endif
-       .fsync          = xfs_file_fsync,
-};
-
-static const struct vm_operations_struct xfs_file_vm_ops = {
-       .fault          = filemap_fault,
-       .page_mkwrite   = xfs_vm_page_mkwrite,
-};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
deleted file mode 100644 (file)
index ed88ed1..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trace.h"
-
-/*
- * note: all filemap functions return negative error codes. These
- * need to be inverted before returning to the xfs core functions.
- */
-void
-xfs_tosspages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       int             fiopt)
-{
-       /* can't toss partial tail pages, so mask them out */
-       last &= ~(PAGE_SIZE - 1);
-       truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
-}
-
-int
-xfs_flushinval_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       int             fiopt)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-       int             ret = 0;
-
-       trace_xfs_pagecache_inval(ip, first, last);
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-       ret = filemap_write_and_wait_range(mapping, first,
-                               last == -1 ? LLONG_MAX : last);
-       if (!ret)
-               truncate_inode_pages_range(mapping, first, last);
-       return -ret;
-}
-
-int
-xfs_flush_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       uint64_t        flags,
-       int             fiopt)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-       int             ret = 0;
-       int             ret2;
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-       ret = -filemap_fdatawrite_range(mapping, first,
-                               last == -1 ? LLONG_MAX : last);
-       if (flags & XBF_ASYNC)
-               return ret;
-       ret2 = xfs_wait_on_pages(ip, first, last);
-       if (!ret)
-               ret = ret2;
-       return ret;
-}
-
-int
-xfs_wait_on_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-
-       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
-               return -filemap_fdatawait_range(mapping, first,
-                                       last == -1 ? ip->i_size - 1 : last);
-       }
-       return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
deleted file mode 100644 (file)
index 76e81cf..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sysctl.h"
-
-/*
- * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
- * other XFS code uses these values.  Times are measured in centisecs (i.e.
- * 100ths of a second).
- */
-xfs_param_t xfs_params = {
-                         /*    MIN             DFLT            MAX     */
-       .sgid_inherit   = {     0,              0,              1       },
-       .symlink_mode   = {     0,              0,              1       },
-       .panic_mask     = {     0,              0,              255     },
-       .error_level    = {     0,              3,              11      },
-       .syncd_timer    = {     1*100,          30*100,         7200*100},
-       .stats_clear    = {     0,              0,              1       },
-       .inherit_sync   = {     0,              1,              1       },
-       .inherit_nodump = {     0,              1,              1       },
-       .inherit_noatim = {     0,              1,              1       },
-       .xfs_buf_timer  = {     100/2,          1*100,          30*100  },
-       .xfs_buf_age    = {     1*100,          15*100,         7200*100},
-       .inherit_nosym  = {     0,              0,              1       },
-       .rotorstep      = {     1,              1,              255     },
-       .inherit_nodfrg = {     0,              1,              1       },
-       .fstrm_timer    = {     1,              30*100,         3600*100},
-};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
deleted file mode 100644 (file)
index f7ce7de..0000000
+++ /dev/null
@@ -1,1556 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_rtalloc.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_bmap.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_dfrag.h"
-#include "xfs_fsops.h"
-#include "xfs_vnodeops.h"
-#include "xfs_discard.h"
-#include "xfs_quota.h"
-#include "xfs_inode_item.h"
-#include "xfs_export.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/dcache.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
-
-/*
- * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
- * a file or fs handle.
- *
- * XFS_IOC_PATH_TO_FSHANDLE
- *    returns fs handle for a mount point or path within that mount point
- * XFS_IOC_FD_TO_HANDLE
- *    returns full handle for a FD opened in user space
- * XFS_IOC_PATH_TO_HANDLE
- *    returns full handle for a path
- */
-int
-xfs_find_handle(
-       unsigned int            cmd,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       int                     hsize;
-       xfs_handle_t            handle;
-       struct inode            *inode;
-       struct file             *file = NULL;
-       struct path             path;
-       int                     error;
-       struct xfs_inode        *ip;
-
-       if (cmd == XFS_IOC_FD_TO_HANDLE) {
-               file = fget(hreq->fd);
-               if (!file)
-                       return -EBADF;
-               inode = file->f_path.dentry->d_inode;
-       } else {
-               error = user_lpath((const char __user *)hreq->path, &path);
-               if (error)
-                       return error;
-               inode = path.dentry->d_inode;
-       }
-       ip = XFS_I(inode);
-
-       /*
-        * We can only generate handles for inodes residing on a XFS filesystem,
-        * and only for regular files, directories or symbolic links.
-        */
-       error = -EINVAL;
-       if (inode->i_sb->s_magic != XFS_SB_MAGIC)
-               goto out_put;
-
-       error = -EBADF;
-       if (!S_ISREG(inode->i_mode) &&
-           !S_ISDIR(inode->i_mode) &&
-           !S_ISLNK(inode->i_mode))
-               goto out_put;
-
-
-       memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
-
-       if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
-               /*
-                * This handle only contains an fsid, zero the rest.
-                */
-               memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
-               hsize = sizeof(xfs_fsid_t);
-       } else {
-               int             lock_mode;
-
-               lock_mode = xfs_ilock_map_shared(ip);
-               handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
-                                       sizeof(handle.ha_fid.fid_len);
-               handle.ha_fid.fid_pad = 0;
-               handle.ha_fid.fid_gen = ip->i_d.di_gen;
-               handle.ha_fid.fid_ino = ip->i_ino;
-               xfs_iunlock_map_shared(ip, lock_mode);
-
-               hsize = XFS_HSIZE(handle);
-       }
-
-       error = -EFAULT;
-       if (copy_to_user(hreq->ohandle, &handle, hsize) ||
-           copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
-               goto out_put;
-
-       error = 0;
-
- out_put:
-       if (cmd == XFS_IOC_FD_TO_HANDLE)
-               fput(file);
-       else
-               path_put(&path);
-       return error;
-}
-
-/*
- * No need to do permission checks on the various pathname components
- * as the handle operations are privileged.
- */
-STATIC int
-xfs_handle_acceptable(
-       void                    *context,
-       struct dentry           *dentry)
-{
-       return 1;
-}
-
-/*
- * Convert userspace handle data into a dentry.
- */
-struct dentry *
-xfs_handle_to_dentry(
-       struct file             *parfilp,
-       void __user             *uhandle,
-       u32                     hlen)
-{
-       xfs_handle_t            handle;
-       struct xfs_fid64        fid;
-
-       /*
-        * Only allow handle opens under a directory.
-        */
-       if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
-               return ERR_PTR(-ENOTDIR);
-
-       if (hlen != sizeof(xfs_handle_t))
-               return ERR_PTR(-EINVAL);
-       if (copy_from_user(&handle, uhandle, hlen))
-               return ERR_PTR(-EFAULT);
-       if (handle.ha_fid.fid_len !=
-           sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
-               return ERR_PTR(-EINVAL);
-
-       memset(&fid, 0, sizeof(struct fid));
-       fid.ino = handle.ha_fid.fid_ino;
-       fid.gen = handle.ha_fid.fid_gen;
-
-       return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
-                       FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
-                       xfs_handle_acceptable, NULL);
-}
-
-STATIC struct dentry *
-xfs_handlereq_to_dentry(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
-}
-
-int
-xfs_open_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       const struct cred       *cred = current_cred();
-       int                     error;
-       int                     fd;
-       int                     permflag;
-       struct file             *filp;
-       struct inode            *inode;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-       inode = dentry->d_inode;
-
-       /* Restrict xfs_open_by_handle to directories & regular files. */
-       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
-               error = -XFS_ERROR(EPERM);
-               goto out_dput;
-       }
-
-#if BITS_PER_LONG != 32
-       hreq->oflags |= O_LARGEFILE;
-#endif
-
-       /* Put open permission in namei format. */
-       permflag = hreq->oflags;
-       if ((permflag+1) & O_ACCMODE)
-               permflag++;
-       if (permflag & O_TRUNC)
-               permflag |= 2;
-
-       if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
-           (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out_dput;
-       }
-
-       if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
-               error = -XFS_ERROR(EACCES);
-               goto out_dput;
-       }
-
-       /* Can't write directories. */
-       if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
-               error = -XFS_ERROR(EISDIR);
-               goto out_dput;
-       }
-
-       fd = get_unused_fd();
-       if (fd < 0) {
-               error = fd;
-               goto out_dput;
-       }
-
-       filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
-                          hreq->oflags, cred);
-       if (IS_ERR(filp)) {
-               put_unused_fd(fd);
-               return PTR_ERR(filp);
-       }
-
-       if (S_ISREG(inode->i_mode)) {
-               filp->f_flags |= O_NOATIME;
-               filp->f_mode |= FMODE_NOCMTIME;
-       }
-
-       fd_install(fd, filp);
-       return fd;
-
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
-       char __user             *buffer,
-       int                     buflen,
-       const char              *link)
-{
-        int len;
-
-       len = PTR_ERR(link);
-       if (IS_ERR(link))
-               goto out;
-
-       len = strlen(link);
-       if (len > (unsigned) buflen)
-               len = buflen;
-       if (copy_to_user(buffer, link, len))
-               len = -EFAULT;
- out:
-       return len;
-}
-
-
-int
-xfs_readlink_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       struct dentry           *dentry;
-       __u32                   olen;
-       void                    *link;
-       int                     error;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       /* Restrict this handle operation to symlinks only. */
-       if (!S_ISLNK(dentry->d_inode->i_mode)) {
-               error = -XFS_ERROR(EINVAL);
-               goto out_dput;
-       }
-
-       if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out_dput;
-       }
-
-       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-       if (!link) {
-               error = -XFS_ERROR(ENOMEM);
-               goto out_dput;
-       }
-
-       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
-       if (error)
-               goto out_kfree;
-       error = do_readlink(hreq->ohandle, olen, link);
-       if (error)
-               goto out_kfree;
-
- out_kfree:
-       kfree(link);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_fssetdm_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       struct fsdmidata        fsd;
-       xfs_fsop_setdm_handlereq_t dmhreq;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_MKNOD))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out;
-       }
-
-       if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out;
-       }
-
-       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
-                                fsd.fsd_dmstate);
-
- out:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_attrlist_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error = -ENOMEM;
-       attrlist_cursor_kern_t  *cursor;
-       xfs_fsop_attrlist_handlereq_t al_hreq;
-       struct dentry           *dentry;
-       char                    *kbuf;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-       if (al_hreq.buflen > XATTR_LIST_MAX)
-               return -XFS_ERROR(EINVAL);
-
-       /*
-        * Reject flags, only allow namespaces.
-        */
-       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
-               return -XFS_ERROR(EINVAL);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
-       if (!kbuf)
-               goto out_dput;
-
-       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
-                                       al_hreq.flags, cursor);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
-               error = -EFAULT;
-
- out_kfree:
-       kfree(kbuf);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-int
-xfs_attrmulti_attr_get(
-       struct inode            *inode,
-       unsigned char           *name,
-       unsigned char           __user *ubuf,
-       __uint32_t              *len,
-       __uint32_t              flags)
-{
-       unsigned char           *kbuf;
-       int                     error = EFAULT;
-
-       if (*len > XATTR_SIZE_MAX)
-               return EINVAL;
-       kbuf = kmalloc(*len, GFP_KERNEL);
-       if (!kbuf)
-               return ENOMEM;
-
-       error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(ubuf, kbuf, *len))
-               error = EFAULT;
-
- out_kfree:
-       kfree(kbuf);
-       return error;
-}
-
-int
-xfs_attrmulti_attr_set(
-       struct inode            *inode,
-       unsigned char           *name,
-       const unsigned char     __user *ubuf,
-       __uint32_t              len,
-       __uint32_t              flags)
-{
-       unsigned char           *kbuf;
-       int                     error = EFAULT;
-
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               return EPERM;
-       if (len > XATTR_SIZE_MAX)
-               return EINVAL;
-
-       kbuf = memdup_user(ubuf, len);
-       if (IS_ERR(kbuf))
-               return PTR_ERR(kbuf);
-
-       error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
-
-       return error;
-}
-
-int
-xfs_attrmulti_attr_remove(
-       struct inode            *inode,
-       unsigned char           *name,
-       __uint32_t              flags)
-{
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               return EPERM;
-       return xfs_attr_remove(XFS_I(inode), name, flags);
-}
-
-STATIC int
-xfs_attrmulti_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       xfs_attr_multiop_t      *ops;
-       xfs_fsop_attrmulti_handlereq_t am_hreq;
-       struct dentry           *dentry;
-       unsigned int            i, size;
-       unsigned char           *attr_name;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       /* overflow check */
-       if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
-               return -E2BIG;
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = E2BIG;
-       size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
-       if (!size || size > 16 * PAGE_SIZE)
-               goto out_dput;
-
-       ops = memdup_user(am_hreq.ops, size);
-       if (IS_ERR(ops)) {
-               error = PTR_ERR(ops);
-               goto out_dput;
-       }
-
-       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
-       if (!attr_name)
-               goto out_kfree_ops;
-
-       error = 0;
-       for (i = 0; i < am_hreq.opcount; i++) {
-               ops[i].am_error = strncpy_from_user((char *)attr_name,
-                               ops[i].am_attrname, MAXNAMELEN);
-               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-                       error = -ERANGE;
-               if (ops[i].am_error < 0)
-                       break;
-
-               switch (ops[i].am_opcode) {
-               case ATTR_OP_GET:
-                       ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_attrvalue, &ops[i].am_length,
-                                       ops[i].am_flags);
-                       break;
-               case ATTR_OP_SET:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_attrvalue, ops[i].am_length,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               case ATTR_OP_REMOVE:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               default:
-                       ops[i].am_error = EINVAL;
-               }
-       }
-
-       if (copy_to_user(am_hreq.ops, ops, size))
-               error = XFS_ERROR(EFAULT);
-
-       kfree(attr_name);
- out_kfree_ops:
-       kfree(ops);
- out_dput:
-       dput(dentry);
-       return -error;
-}
-
-int
-xfs_ioc_space(
-       struct xfs_inode        *ip,
-       struct inode            *inode,
-       struct file             *filp,
-       int                     ioflags,
-       unsigned int            cmd,
-       xfs_flock64_t           *bf)
-{
-       int                     attr_flags = 0;
-       int                     error;
-
-       /*
-        * Only allow the sys admin to reserve space unless
-        * unwritten extents are enabled.
-        */
-       if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
-           !capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
-               return -XFS_ERROR(EPERM);
-
-       if (!(filp->f_mode & FMODE_WRITE))
-               return -XFS_ERROR(EBADF);
-
-       if (!S_ISREG(inode->i_mode))
-               return -XFS_ERROR(EINVAL);
-
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               attr_flags |= XFS_ATTR_NONBLOCK;
-
-       if (filp->f_flags & O_DSYNC)
-               attr_flags |= XFS_ATTR_SYNC;
-
-       if (ioflags & IO_INVIS)
-               attr_flags |= XFS_ATTR_DMI;
-
-       error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
-       return -error;
-}
-
-STATIC int
-xfs_ioc_bulkstat(
-       xfs_mount_t             *mp,
-       unsigned int            cmd,
-       void                    __user *arg)
-{
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
-       int                     error;
-
-       /* done = 1 if there are more stats to get and if bulkstat */
-       /* should be called again (unused here, but used in dmapi) */
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
-               return -XFS_ERROR(EFAULT);
-
-       if ((count = bulkreq.icount) <= 0)
-               return -XFS_ERROR(EINVAL);
-
-       if (bulkreq.ubuffer == NULL)
-               return -XFS_ERROR(EINVAL);
-
-       if (cmd == XFS_IOC_FSINUMBERS)
-               error = xfs_inumbers(mp, &inlast, &count,
-                                       bulkreq.ubuffer, xfs_inumbers_fmt);
-       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
-               error = xfs_bulkstat_single(mp, &inlast,
-                                               bulkreq.ubuffer, &done);
-       else    /* XFS_IOC_FSBULKSTAT */
-               error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
-                                    sizeof(xfs_bstat_t), bulkreq.ubuffer,
-                                    &done);
-
-       if (error)
-               return -error;
-
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -XFS_ERROR(EFAULT);
-
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -XFS_ERROR(EFAULT);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
-       xfs_mount_t             *mp,
-       void                    __user *arg)
-{
-       xfs_fsop_geom_t         fsgeo;
-       int                     error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 3);
-       if (error)
-               return -error;
-
-       /*
-        * Caller should have passed an argument of type
-        * xfs_fsop_geom_v1_t.  This is a proper subset of the
-        * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
-        */
-       if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry(
-       xfs_mount_t             *mp,
-       void                    __user *arg)
-{
-       xfs_fsop_geom_t         fsgeo;
-       int                     error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 4);
-       if (error)
-               return -error;
-
-       if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/*
- * Linux extended inode flags interface.
- */
-
-STATIC unsigned int
-xfs_merge_ioc_xflags(
-       unsigned int    flags,
-       unsigned int    start)
-{
-       unsigned int    xflags = start;
-
-       if (flags & FS_IMMUTABLE_FL)
-               xflags |= XFS_XFLAG_IMMUTABLE;
-       else
-               xflags &= ~XFS_XFLAG_IMMUTABLE;
-       if (flags & FS_APPEND_FL)
-               xflags |= XFS_XFLAG_APPEND;
-       else
-               xflags &= ~XFS_XFLAG_APPEND;
-       if (flags & FS_SYNC_FL)
-               xflags |= XFS_XFLAG_SYNC;
-       else
-               xflags &= ~XFS_XFLAG_SYNC;
-       if (flags & FS_NOATIME_FL)
-               xflags |= XFS_XFLAG_NOATIME;
-       else
-               xflags &= ~XFS_XFLAG_NOATIME;
-       if (flags & FS_NODUMP_FL)
-               xflags |= XFS_XFLAG_NODUMP;
-       else
-               xflags &= ~XFS_XFLAG_NODUMP;
-
-       return xflags;
-}
-
-STATIC unsigned int
-xfs_di2lxflags(
-       __uint16_t      di_flags)
-{
-       unsigned int    flags = 0;
-
-       if (di_flags & XFS_DIFLAG_IMMUTABLE)
-               flags |= FS_IMMUTABLE_FL;
-       if (di_flags & XFS_DIFLAG_APPEND)
-               flags |= FS_APPEND_FL;
-       if (di_flags & XFS_DIFLAG_SYNC)
-               flags |= FS_SYNC_FL;
-       if (di_flags & XFS_DIFLAG_NOATIME)
-               flags |= FS_NOATIME_FL;
-       if (di_flags & XFS_DIFLAG_NODUMP)
-               flags |= FS_NODUMP_FL;
-       return flags;
-}
-
-STATIC int
-xfs_ioc_fsgetxattr(
-       xfs_inode_t             *ip,
-       int                     attr,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-
-       memset(&fa, 0, sizeof(struct fsxattr));
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       fa.fsx_xflags = xfs_ip2xflags(ip);
-       fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-       fa.fsx_projid = xfs_get_projid(ip);
-
-       if (attr) {
-               if (ip->i_afp) {
-                       if (ip->i_afp->if_flags & XFS_IFEXTENTS)
-                               fa.fsx_nextents = ip->i_afp->if_bytes /
-                                                       sizeof(xfs_bmbt_rec_t);
-                       else
-                               fa.fsx_nextents = ip->i_d.di_anextents;
-               } else
-                       fa.fsx_nextents = 0;
-       } else {
-               if (ip->i_df.if_flags & XFS_IFEXTENTS)
-                       fa.fsx_nextents = ip->i_df.if_bytes /
-                                               sizeof(xfs_bmbt_rec_t);
-               else
-                       fa.fsx_nextents = ip->i_d.di_nextents;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       if (copy_to_user(arg, &fa, sizeof(fa)))
-               return -EFAULT;
-       return 0;
-}
-
-STATIC void
-xfs_set_diflags(
-       struct xfs_inode        *ip,
-       unsigned int            xflags)
-{
-       unsigned int            di_flags;
-
-       /* can't set PREALLOC this way, just preserve it */
-       di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-       if (xflags & XFS_XFLAG_IMMUTABLE)
-               di_flags |= XFS_DIFLAG_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
-               di_flags |= XFS_DIFLAG_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
-               di_flags |= XFS_DIFLAG_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
-               di_flags |= XFS_DIFLAG_NOATIME;
-       if (xflags & XFS_XFLAG_NODUMP)
-               di_flags |= XFS_DIFLAG_NODUMP;
-       if (xflags & XFS_XFLAG_PROJINHERIT)
-               di_flags |= XFS_DIFLAG_PROJINHERIT;
-       if (xflags & XFS_XFLAG_NODEFRAG)
-               di_flags |= XFS_DIFLAG_NODEFRAG;
-       if (xflags & XFS_XFLAG_FILESTREAM)
-               di_flags |= XFS_DIFLAG_FILESTREAM;
-       if (S_ISDIR(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_RTINHERIT)
-                       di_flags |= XFS_DIFLAG_RTINHERIT;
-               if (xflags & XFS_XFLAG_NOSYMLINKS)
-                       di_flags |= XFS_DIFLAG_NOSYMLINKS;
-               if (xflags & XFS_XFLAG_EXTSZINHERIT)
-                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-       } else if (S_ISREG(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_REALTIME)
-                       di_flags |= XFS_DIFLAG_REALTIME;
-               if (xflags & XFS_XFLAG_EXTSIZE)
-                       di_flags |= XFS_DIFLAG_EXTSIZE;
-       }
-
-       ip->i_d.di_flags = di_flags;
-}
-
-STATIC void
-xfs_diflags_to_linux(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = VFS_I(ip);
-       unsigned int            xflags = xfs_ip2xflags(ip);
-
-       if (xflags & XFS_XFLAG_IMMUTABLE)
-               inode->i_flags |= S_IMMUTABLE;
-       else
-               inode->i_flags &= ~S_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
-               inode->i_flags |= S_APPEND;
-       else
-               inode->i_flags &= ~S_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
-               inode->i_flags |= S_SYNC;
-       else
-               inode->i_flags &= ~S_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
-               inode->i_flags |= S_NOATIME;
-       else
-               inode->i_flags &= ~S_NOATIME;
-}
-
-#define FSX_PROJID     1
-#define FSX_EXTSIZE    2
-#define FSX_XFLAGS     4
-#define FSX_NONBLOCK   8
-
-STATIC int
-xfs_ioctl_setattr(
-       xfs_inode_t             *ip,
-       struct fsxattr          *fa,
-       int                     mask)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       unsigned int            lock_flags = 0;
-       struct xfs_dquot        *udqp = NULL;
-       struct xfs_dquot        *gdqp = NULL;
-       struct xfs_dquot        *olddquot = NULL;
-       int                     code;
-
-       trace_xfs_ioctl_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       /*
-        * Disallow 32bit project ids when projid32bit feature is not enabled.
-        */
-       if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
-                       !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
-               return XFS_ERROR(EINVAL);
-
-       /*
-        * If disk quotas is on, we make sure that the dquots do exist on disk,
-        * before we start any other transactions. Trying to do this later
-        * is messy. We don't care to take a readlock to look at the ids
-        * in inode here, because we can't hold it across the trans_reserve.
-        * If the IDs do change before we take the ilock, we're covered
-        * because the i_*dquot fields will get updated anyway.
-        */
-       if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
-               code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
-                                        ip->i_d.di_gid, fa->fsx_projid,
-                                        XFS_QMOPT_PQUOTA, &udqp, &gdqp);
-               if (code)
-                       return code;
-       }
-
-       /*
-        * For the other attributes, we acquire the inode lock and
-        * first do an error checking pass.
-        */
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-       if (code)
-               goto error_return;
-
-       lock_flags = XFS_ILOCK_EXCL;
-       xfs_ilock(ip, lock_flags);
-
-       /*
-        * CAP_FOWNER overrides the following restrictions:
-        *
-        * The user ID of the calling process must be equal
-        * to the file owner ID, except in cases where the
-        * CAP_FSETID capability is applicable.
-        */
-       if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
-               code = XFS_ERROR(EPERM);
-               goto error_return;
-       }
-
-       /*
-        * Do a quota reservation only if projid is actually going to change.
-        */
-       if (mask & FSX_PROJID) {
-               if (XFS_IS_QUOTA_RUNNING(mp) &&
-                   XFS_IS_PQUOTA_ON(mp) &&
-                   xfs_get_projid(ip) != fa->fsx_projid) {
-                       ASSERT(tp);
-                       code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-                                               capable(CAP_FOWNER) ?
-                                               XFS_QMOPT_FORCE_RES : 0);
-                       if (code)       /* out of quota */
-                               goto error_return;
-               }
-       }
-
-       if (mask & FSX_EXTSIZE) {
-               /*
-                * Can't change extent size if any extents are allocated.
-                */
-               if (ip->i_d.di_nextents &&
-                   ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-                    fa->fsx_extsize)) {
-                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
-                       goto error_return;
-               }
-
-               /*
-                * Extent size must be a multiple of the appropriate block
-                * size, if set at all. It must also be smaller than the
-                * maximum extent size supported by the filesystem.
-                *
-                * Also, for non-realtime files, limit the extent size hint to
-                * half the size of the AGs in the filesystem so alignment
-                * doesn't result in extents larger than an AG.
-                */
-               if (fa->fsx_extsize != 0) {
-                       xfs_extlen_t    size;
-                       xfs_fsblock_t   extsize_fsb;
-
-                       extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
-                       if (extsize_fsb > MAXEXTLEN) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-
-                       if (XFS_IS_REALTIME_INODE(ip) ||
-                           ((mask & FSX_XFLAGS) &&
-                           (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
-                               size = mp->m_sb.sb_rextsize <<
-                                      mp->m_sb.sb_blocklog;
-                       } else {
-                               size = mp->m_sb.sb_blocksize;
-                               if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
-                                       code = XFS_ERROR(EINVAL);
-                                       goto error_return;
-                               }
-                       }
-
-                       if (fa->fsx_extsize % size) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-               }
-       }
-
-
-       if (mask & FSX_XFLAGS) {
-               /*
-                * Can't change realtime flag if any extents are allocated.
-                */
-               if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-                   (XFS_IS_REALTIME_INODE(ip)) !=
-                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
-                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
-                       goto error_return;
-               }
-
-               /*
-                * If realtime flag is set then must have realtime data.
-                */
-               if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
-                       if ((mp->m_sb.sb_rblocks == 0) ||
-                           (mp->m_sb.sb_rextsize == 0) ||
-                           (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-               }
-
-               /*
-                * Can't modify an immutable/append-only file unless
-                * we have appropriate permission.
-                */
-               if ((ip->i_d.di_flags &
-                               (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
-                    (fa->fsx_xflags &
-                               (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
-                   !capable(CAP_LINUX_IMMUTABLE)) {
-                       code = XFS_ERROR(EPERM);
-                       goto error_return;
-               }
-       }
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & FSX_PROJID) {
-               /*
-                * CAP_FSETID overrides the following restrictions:
-                *
-                * The set-user-ID and set-group-ID bits of a file will be
-                * cleared upon successful return from chown()
-                */
-               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-                   !capable(CAP_FSETID))
-                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-               /*
-                * Change the ownerships and register quota modifications
-                * in the transaction.
-                */
-               if (xfs_get_projid(ip) != fa->fsx_projid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
-                               olddquot = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_gdquot, gdqp);
-                       }
-                       xfs_set_projid(ip, fa->fsx_projid);
-
-                       /*
-                        * We may have to rev the inode as well as
-                        * the superblock version number since projids didn't
-                        * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
-                        */
-                       if (ip->i_d.di_version == 1)
-                               xfs_bump_ino_vers2(tp, ip);
-               }
-
-       }
-
-       if (mask & FSX_EXTSIZE)
-               ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
-       if (mask & FSX_XFLAGS) {
-               xfs_set_diflags(ip, fa->fsx_xflags);
-               xfs_diflags_to_linux(ip);
-       }
-
-       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       /*
-        * If this is a synchronous mount, make sure that the
-        * transaction goes to disk before returning to the user.
-        * This is slightly sub-optimal in that truncates require
-        * two sync transactions instead of one for wsync filesystems.
-        * One for the truncate and one for the timestamps since we
-        * don't want to change the timestamps unless we're sure the
-        * truncate worked.  Truncates are less than 1% of the laddis
-        * mix so this probably isn't worth the trouble to optimize.
-        */
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-       code = xfs_trans_commit(tp, 0);
-       xfs_iunlock(ip, lock_flags);
-
-       /*
-        * Release any dquot(s) the inode had kept before chown.
-        */
-       xfs_qm_dqrele(olddquot);
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-
-       return code;
-
- error_return:
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-       xfs_trans_cancel(tp, 0);
-       if (lock_flags)
-               xfs_iunlock(ip, lock_flags);
-       return code;
-}
-
-STATIC int
-xfs_ioc_fssetxattr(
-       xfs_inode_t             *ip,
-       struct file             *filp,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-       unsigned int            mask;
-
-       if (copy_from_user(&fa, arg, sizeof(fa)))
-               return -EFAULT;
-
-       mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               mask |= FSX_NONBLOCK;
-
-       return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_ioc_getxflags(
-       xfs_inode_t             *ip,
-       void                    __user *arg)
-{
-       unsigned int            flags;
-
-       flags = xfs_di2lxflags(ip->i_d.di_flags);
-       if (copy_to_user(arg, &flags, sizeof(flags)))
-               return -EFAULT;
-       return 0;
-}
-
-STATIC int
-xfs_ioc_setxflags(
-       xfs_inode_t             *ip,
-       struct file             *filp,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-       unsigned int            flags;
-       unsigned int            mask;
-
-       if (copy_from_user(&flags, arg, sizeof(flags)))
-               return -EFAULT;
-
-       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
-                     FS_NOATIME_FL | FS_NODUMP_FL | \
-                     FS_SYNC_FL))
-               return -EOPNOTSUPP;
-
-       mask = FSX_XFLAGS;
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               mask |= FSX_NONBLOCK;
-       fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-
-       return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
-{
-       struct getbmap __user   *base = *ap;
-
-       /* copy only getbmap portion (not getbmapx) */
-       if (copy_to_user(base, bmv, sizeof(struct getbmap)))
-               return XFS_ERROR(EFAULT);
-
-       *ap += sizeof(struct getbmap);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_getbmap(
-       struct xfs_inode        *ip,
-       int                     ioflags,
-       unsigned int            cmd,
-       void                    __user *arg)
-{
-       struct getbmapx         bmx;
-       int                     error;
-
-       if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
-               return -XFS_ERROR(EFAULT);
-
-       if (bmx.bmv_count < 2)
-               return -XFS_ERROR(EINVAL);
-
-       bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
-       if (ioflags & IO_INVIS)
-               bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
-
-       error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
-                           (struct getbmap *)arg+1);
-       if (error)
-               return -error;
-
-       /* copy back header - only size of getbmap */
-       if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
-{
-       struct getbmapx __user  *base = *ap;
-
-       if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
-               return XFS_ERROR(EFAULT);
-
-       *ap += sizeof(struct getbmapx);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_getbmapx(
-       struct xfs_inode        *ip,
-       void                    __user *arg)
-{
-       struct getbmapx         bmx;
-       int                     error;
-
-       if (copy_from_user(&bmx, arg, sizeof(bmx)))
-               return -XFS_ERROR(EFAULT);
-
-       if (bmx.bmv_count < 2)
-               return -XFS_ERROR(EINVAL);
-
-       if (bmx.bmv_iflags & (~BMV_IF_VALID))
-               return -XFS_ERROR(EINVAL);
-
-       error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
-                           (struct getbmapx *)arg+1);
-       if (error)
-               return -error;
-
-       /* copy back header */
-       if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
-               return -XFS_ERROR(EFAULT);
-
-       return 0;
-}
-
-/*
- * Note: some of the ioctl's return positive numbers as a
- * byte count indicating success, such as readlink_by_handle.
- * So we don't "sign flip" like most other routines.  This means
- * true errors need to be returned as a negative value.
- */
-long
-xfs_file_ioctl(
-       struct file             *filp,
-       unsigned int            cmd,
-       unsigned long           p)
-{
-       struct inode            *inode = filp->f_path.dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       void                    __user *arg = (void __user *)p;
-       int                     ioflags = 0;
-       int                     error;
-
-       if (filp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       trace_xfs_file_ioctl(ip);
-
-       switch (cmd) {
-       case FITRIM:
-               return xfs_ioc_trim(mp, arg);
-       case XFS_IOC_ALLOCSP:
-       case XFS_IOC_FREESP:
-       case XFS_IOC_RESVSP:
-       case XFS_IOC_UNRESVSP:
-       case XFS_IOC_ALLOCSP64:
-       case XFS_IOC_FREESP64:
-       case XFS_IOC_RESVSP64:
-       case XFS_IOC_UNRESVSP64:
-       case XFS_IOC_ZERO_RANGE: {
-               xfs_flock64_t           bf;
-
-               if (copy_from_user(&bf, arg, sizeof(bf)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
-       }
-       case XFS_IOC_DIOINFO: {
-               struct dioattr  da;
-               xfs_buftarg_t   *target =
-                       XFS_IS_REALTIME_INODE(ip) ?
-                       mp->m_rtdev_targp : mp->m_ddev_targp;
-
-               da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-               da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
-               if (copy_to_user(arg, &da, sizeof(da)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_FSBULKSTAT_SINGLE:
-       case XFS_IOC_FSBULKSTAT:
-       case XFS_IOC_FSINUMBERS:
-               return xfs_ioc_bulkstat(mp, cmd, arg);
-
-       case XFS_IOC_FSGEOMETRY_V1:
-               return xfs_ioc_fsgeometry_v1(mp, arg);
-
-       case XFS_IOC_FSGEOMETRY:
-               return xfs_ioc_fsgeometry(mp, arg);
-
-       case XFS_IOC_GETVERSION:
-               return put_user(inode->i_generation, (int __user *)arg);
-
-       case XFS_IOC_FSGETXATTR:
-               return xfs_ioc_fsgetxattr(ip, 0, arg);
-       case XFS_IOC_FSGETXATTRA:
-               return xfs_ioc_fsgetxattr(ip, 1, arg);
-       case XFS_IOC_FSSETXATTR:
-               return xfs_ioc_fssetxattr(ip, filp, arg);
-       case XFS_IOC_GETXFLAGS:
-               return xfs_ioc_getxflags(ip, arg);
-       case XFS_IOC_SETXFLAGS:
-               return xfs_ioc_setxflags(ip, filp, arg);
-
-       case XFS_IOC_FSSETDM: {
-               struct fsdmidata        dmi;
-
-               if (copy_from_user(&dmi, arg, sizeof(dmi)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
-                               dmi.fsd_dmstate);
-               return -error;
-       }
-
-       case XFS_IOC_GETBMAP:
-       case XFS_IOC_GETBMAPA:
-               return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
-
-       case XFS_IOC_GETBMAPX:
-               return xfs_ioc_getbmapx(ip, arg);
-
-       case XFS_IOC_FD_TO_HANDLE:
-       case XFS_IOC_PATH_TO_HANDLE:
-       case XFS_IOC_PATH_TO_FSHANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(hreq)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_find_handle(cmd, &hreq);
-       }
-       case XFS_IOC_OPEN_BY_HANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_open_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_FSSETDM_BY_HANDLE:
-               return xfs_fssetdm_by_handle(filp, arg);
-
-       case XFS_IOC_READLINK_BY_HANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_readlink_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_ATTRLIST_BY_HANDLE:
-               return xfs_attrlist_by_handle(filp, arg);
-
-       case XFS_IOC_ATTRMULTI_BY_HANDLE:
-               return xfs_attrmulti_by_handle(filp, arg);
-
-       case XFS_IOC_SWAPEXT: {
-               struct xfs_swapext      sxp;
-
-               if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_swapext(&sxp);
-               return -error;
-       }
-
-       case XFS_IOC_FSCOUNTS: {
-               xfs_fsop_counts_t out;
-
-               error = xfs_fs_counts(mp, &out);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &out, sizeof(out)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_SET_RESBLKS: {
-               xfs_fsop_resblks_t inout;
-               __uint64_t         in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (mp->m_flags & XFS_MOUNT_RDONLY)
-                       return -XFS_ERROR(EROFS);
-
-               if (copy_from_user(&inout, arg, sizeof(inout)))
-                       return -XFS_ERROR(EFAULT);
-
-               /* input parameter is passed in resblks field of structure */
-               in = inout.resblks;
-               error = xfs_reserve_blocks(mp, &in, &inout);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &inout, sizeof(inout)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_GET_RESBLKS: {
-               xfs_fsop_resblks_t out;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               error = xfs_reserve_blocks(mp, NULL, &out);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &out, sizeof(out)))
-                       return -XFS_ERROR(EFAULT);
-
-               return 0;
-       }
-
-       case XFS_IOC_FSGROWFSDATA: {
-               xfs_growfs_data_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_data(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_FSGROWFSLOG: {
-               xfs_growfs_log_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_log(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_FSGROWFSRT: {
-               xfs_growfs_rt_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_rt(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_GOINGDOWN: {
-               __uint32_t in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (get_user(in, (__uint32_t __user *)arg))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_fs_goingdown(mp, in);
-               return -error;
-       }
-
-       case XFS_IOC_ERROR_INJECTION: {
-               xfs_error_injection_t in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_errortag_add(in.errtag, mp);
-               return -error;
-       }
-
-       case XFS_IOC_ERROR_CLEARALL:
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               error = xfs_errortag_clearall(mp, 1);
-               return -error;
-
-       default:
-               return -ENOTTY;
-       }
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
deleted file mode 100644 (file)
index d56173b..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOCTL_H__
-#define __XFS_IOCTL_H__
-
-extern int
-xfs_ioc_space(
-       struct xfs_inode        *ip,
-       struct inode            *inode,
-       struct file             *filp,
-       int                     ioflags,
-       unsigned int            cmd,
-       xfs_flock64_t           *bf);
-
-extern int
-xfs_find_handle(
-       unsigned int            cmd,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_open_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_readlink_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_attrmulti_attr_get(
-       struct inode            *inode,
-       unsigned char           *name,
-       unsigned char           __user *ubuf,
-       __uint32_t              *len,
-       __uint32_t              flags);
-
-extern int
-xfs_attrmulti_attr_set(
-       struct inode            *inode,
-       unsigned char           *name,
-       const unsigned char     __user *ubuf,
-       __uint32_t              len,
-       __uint32_t              flags);
-
-extern int
-xfs_attrmulti_attr_remove(
-       struct inode            *inode,
-       unsigned char           *name,
-       __uint32_t              flags);
-
-extern struct dentry *
-xfs_handle_to_dentry(
-       struct file             *parfilp,
-       void __user             *uhandle,
-       u32                     hlen);
-
-extern long
-xfs_file_ioctl(
-       struct file             *filp,
-       unsigned int            cmd,
-       unsigned long           p);
-
-extern long
-xfs_file_compat_ioctl(
-       struct file             *file,
-       unsigned int            cmd,
-       unsigned long           arg);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
deleted file mode 100644 (file)
index 54e623b..0000000
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/compat.h>
-#include <linux/ioctl.h>
-#include <linux/mount.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_vnode.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_dfrag.h"
-#include "xfs_vnodeops.h"
-#include "xfs_fsops.h"
-#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_attr.h"
-#include "xfs_ioctl.h"
-#include "xfs_ioctl32.h"
-#include "xfs_trace.h"
-
-#define  _NATIVE_IOC(cmd, type) \
-         _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
-
-#ifdef BROKEN_X86_ALIGNMENT
-STATIC int
-xfs_compat_flock64_copyin(
-       xfs_flock64_t           *bf,
-       compat_xfs_flock64_t    __user *arg32)
-{
-       if (get_user(bf->l_type,        &arg32->l_type) ||
-           get_user(bf->l_whence,      &arg32->l_whence) ||
-           get_user(bf->l_start,       &arg32->l_start) ||
-           get_user(bf->l_len,         &arg32->l_len) ||
-           get_user(bf->l_sysid,       &arg32->l_sysid) ||
-           get_user(bf->l_pid,         &arg32->l_pid) ||
-           copy_from_user(bf->l_pad,   &arg32->l_pad,  4*sizeof(u32)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_ioc_fsgeometry_v1(
-       struct xfs_mount          *mp,
-       compat_xfs_fsop_geom_v1_t __user *arg32)
-{
-       xfs_fsop_geom_t           fsgeo;
-       int                       error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 3);
-       if (error)
-               return -error;
-       /* The 32-bit variant simply has some padding at the end */
-       if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_growfs_data_copyin(
-       struct xfs_growfs_data   *in,
-       compat_xfs_growfs_data_t __user *arg32)
-{
-       if (get_user(in->newblocks, &arg32->newblocks) ||
-           get_user(in->imaxpct,   &arg32->imaxpct))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_growfs_rt_copyin(
-       struct xfs_growfs_rt     *in,
-       compat_xfs_growfs_rt_t  __user *arg32)
-{
-       if (get_user(in->newblocks, &arg32->newblocks) ||
-           get_user(in->extsize,   &arg32->extsize))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_inumbers_fmt_compat(
-       void                    __user *ubuffer,
-       const xfs_inogrp_t      *buffer,
-       long                    count,
-       long                    *written)
-{
-       compat_xfs_inogrp_t     __user *p32 = ubuffer;
-       long                    i;
-
-       for (i = 0; i < count; i++) {
-               if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
-                   put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
-                   put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
-                       return -XFS_ERROR(EFAULT);
-       }
-       *written = count * sizeof(*p32);
-       return 0;
-}
-
-#else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
-#endif /* BROKEN_X86_ALIGNMENT */
-
-STATIC int
-xfs_ioctl32_bstime_copyin(
-       xfs_bstime_t            *bstime,
-       compat_xfs_bstime_t     __user *bstime32)
-{
-       compat_time_t           sec32;  /* tv_sec differs on 64 vs. 32 */
-
-       if (get_user(sec32,             &bstime32->tv_sec)      ||
-           get_user(bstime->tv_nsec,   &bstime32->tv_nsec))
-               return -XFS_ERROR(EFAULT);
-       bstime->tv_sec = sec32;
-       return 0;
-}
-
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
-STATIC int
-xfs_ioctl32_bstat_copyin(
-       xfs_bstat_t             *bstat,
-       compat_xfs_bstat_t      __user *bstat32)
-{
-       if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
-           get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
-           get_user(bstat->bs_nlink,   &bstat32->bs_nlink)     ||
-           get_user(bstat->bs_uid,     &bstat32->bs_uid)       ||
-           get_user(bstat->bs_gid,     &bstat32->bs_gid)       ||
-           get_user(bstat->bs_rdev,    &bstat32->bs_rdev)      ||
-           get_user(bstat->bs_blksize, &bstat32->bs_blksize)   ||
-           get_user(bstat->bs_size,    &bstat32->bs_size)      ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
-           get_user(bstat->bs_blocks,  &bstat32->bs_size)      ||
-           get_user(bstat->bs_xflags,  &bstat32->bs_size)      ||
-           get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
-           get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
-           get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
-           get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
-           get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
-           get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
-           get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
-           get_user(bstat->bs_aextents, &bstat32->bs_aextents))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/* XFS_IOC_FSBULKSTAT and friends */
-
-STATIC int
-xfs_bstime_store_compat(
-       compat_xfs_bstime_t     __user *p32,
-       const xfs_bstime_t      *p)
-{
-       __s32                   sec32;
-
-       sec32 = p->tv_sec;
-       if (put_user(sec32, &p32->tv_sec) ||
-           put_user(p->tv_nsec, &p32->tv_nsec))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/* Return 0 on success or positive error (to xfs_bulkstat()) */
-STATIC int
-xfs_bulkstat_one_fmt_compat(
-       void                    __user *ubuffer,
-       int                     ubsize,
-       int                     *ubused,
-       const xfs_bstat_t       *buffer)
-{
-       compat_xfs_bstat_t      __user *p32 = ubuffer;
-
-       if (ubsize < sizeof(*p32))
-               return XFS_ERROR(ENOMEM);
-
-       if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
-           put_user(buffer->bs_mode,     &p32->bs_mode)        ||
-           put_user(buffer->bs_nlink,    &p32->bs_nlink)       ||
-           put_user(buffer->bs_uid,      &p32->bs_uid)         ||
-           put_user(buffer->bs_gid,      &p32->bs_gid)         ||
-           put_user(buffer->bs_rdev,     &p32->bs_rdev)        ||
-           put_user(buffer->bs_blksize,  &p32->bs_blksize)     ||
-           put_user(buffer->bs_size,     &p32->bs_size)        ||
-           xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
-           xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
-           xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
-           put_user(buffer->bs_blocks,   &p32->bs_blocks)      ||
-           put_user(buffer->bs_xflags,   &p32->bs_xflags)      ||
-           put_user(buffer->bs_extsize,  &p32->bs_extsize)     ||
-           put_user(buffer->bs_extents,  &p32->bs_extents)     ||
-           put_user(buffer->bs_gen,      &p32->bs_gen)         ||
-           put_user(buffer->bs_projid,   &p32->bs_projid)      ||
-           put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
-           put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
-           put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
-           put_user(buffer->bs_aextents, &p32->bs_aextents))
-               return XFS_ERROR(EFAULT);
-       if (ubused)
-               *ubused = sizeof(*p32);
-       return 0;
-}
-
-STATIC int
-xfs_bulkstat_one_compat(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* buffer to place output in */
-       int             ubsize,         /* size of buffer */
-       int             *ubused,        /* bytes used by me */
-       int             *stat)          /* BULKSTAT_RV_... */
-{
-       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                   xfs_bulkstat_one_fmt_compat,
-                                   ubused, stat);
-}
-
-/* copied from xfs_ioctl.c */
-STATIC int
-xfs_compat_ioc_bulkstat(
-       xfs_mount_t               *mp,
-       unsigned int              cmd,
-       compat_xfs_fsop_bulkreq_t __user *p32)
-{
-       u32                     addr;
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
-       int                     error;
-
-       /* done = 1 if there are more stats to get and if bulkstat */
-       /* should be called again (unused here, but used in dmapi) */
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (get_user(addr, &p32->lastip))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.lastip = compat_ptr(addr);
-       if (get_user(bulkreq.icount, &p32->icount) ||
-           get_user(addr, &p32->ubuffer))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.ubuffer = compat_ptr(addr);
-       if (get_user(addr, &p32->ocount))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.ocount = compat_ptr(addr);
-
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
-               return -XFS_ERROR(EFAULT);
-
-       if ((count = bulkreq.icount) <= 0)
-               return -XFS_ERROR(EINVAL);
-
-       if (bulkreq.ubuffer == NULL)
-               return -XFS_ERROR(EINVAL);
-
-       if (cmd == XFS_IOC_FSINUMBERS_32) {
-               error = xfs_inumbers(mp, &inlast, &count,
-                               bulkreq.ubuffer, xfs_inumbers_fmt_compat);
-       } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
-               int res;
-
-               error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
-                               sizeof(compat_xfs_bstat_t), 0, &res);
-       } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
-               error = xfs_bulkstat(mp, &inlast, &count,
-                       xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
-                       bulkreq.ubuffer, &done);
-       } else
-               error = XFS_ERROR(EINVAL);
-       if (error)
-               return -error;
-
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -XFS_ERROR(EFAULT);
-
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -XFS_ERROR(EFAULT);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_compat_handlereq_copyin(
-       xfs_fsop_handlereq_t            *hreq,
-       compat_xfs_fsop_handlereq_t     __user *arg32)
-{
-       compat_xfs_fsop_handlereq_t     hreq32;
-
-       if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       hreq->fd = hreq32.fd;
-       hreq->path = compat_ptr(hreq32.path);
-       hreq->oflags = hreq32.oflags;
-       hreq->ihandle = compat_ptr(hreq32.ihandle);
-       hreq->ihandlen = hreq32.ihandlen;
-       hreq->ohandle = compat_ptr(hreq32.ohandle);
-       hreq->ohandlen = compat_ptr(hreq32.ohandlen);
-
-       return 0;
-}
-
-STATIC struct dentry *
-xfs_compat_handlereq_to_dentry(
-       struct file             *parfilp,
-       compat_xfs_fsop_handlereq_t *hreq)
-{
-       return xfs_handle_to_dentry(parfilp,
-                       compat_ptr(hreq->ihandle), hreq->ihandlen);
-}
-
-STATIC int
-xfs_compat_attrlist_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       attrlist_cursor_kern_t  *cursor;
-       compat_xfs_fsop_attrlist_handlereq_t al_hreq;
-       struct dentry           *dentry;
-       char                    *kbuf;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&al_hreq, arg,
-                          sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-       if (al_hreq.buflen > XATTR_LIST_MAX)
-               return -XFS_ERROR(EINVAL);
-
-       /*
-        * Reject flags, only allow namespaces.
-        */
-       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
-               return -XFS_ERROR(EINVAL);
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = -ENOMEM;
-       kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
-       if (!kbuf)
-               goto out_dput;
-
-       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
-                                       al_hreq.flags, cursor);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
-               error = -EFAULT;
-
- out_kfree:
-       kfree(kbuf);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_compat_attrmulti_by_handle(
-       struct file                             *parfilp,
-       void                                    __user *arg)
-{
-       int                                     error;
-       compat_xfs_attr_multiop_t               *ops;
-       compat_xfs_fsop_attrmulti_handlereq_t   am_hreq;
-       struct dentry                           *dentry;
-       unsigned int                            i, size;
-       unsigned char                           *attr_name;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&am_hreq, arg,
-                          sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       /* overflow check */
-       if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
-               return -E2BIG;
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = E2BIG;
-       size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
-       if (!size || size > 16 * PAGE_SIZE)
-               goto out_dput;
-
-       ops = memdup_user(compat_ptr(am_hreq.ops), size);
-       if (IS_ERR(ops)) {
-               error = PTR_ERR(ops);
-               goto out_dput;
-       }
-
-       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
-       if (!attr_name)
-               goto out_kfree_ops;
-
-       error = 0;
-       for (i = 0; i < am_hreq.opcount; i++) {
-               ops[i].am_error = strncpy_from_user((char *)attr_name,
-                               compat_ptr(ops[i].am_attrname),
-                               MAXNAMELEN);
-               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-                       error = -ERANGE;
-               if (ops[i].am_error < 0)
-                       break;
-
-               switch (ops[i].am_opcode) {
-               case ATTR_OP_GET:
-                       ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
-                                       compat_ptr(ops[i].am_attrvalue),
-                                       &ops[i].am_length, ops[i].am_flags);
-                       break;
-               case ATTR_OP_SET:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
-                                       compat_ptr(ops[i].am_attrvalue),
-                                       ops[i].am_length, ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               case ATTR_OP_REMOVE:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               default:
-                       ops[i].am_error = EINVAL;
-               }
-       }
-
-       if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
-               error = XFS_ERROR(EFAULT);
-
-       kfree(attr_name);
- out_kfree_ops:
-       kfree(ops);
- out_dput:
-       dput(dentry);
-       return -error;
-}
-
-STATIC int
-xfs_compat_fssetdm_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       struct fsdmidata        fsd;
-       compat_xfs_fsop_setdm_handlereq_t dmhreq;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_MKNOD))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&dmhreq, arg,
-                          sizeof(compat_xfs_fsop_setdm_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out;
-       }
-
-       if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out;
-       }
-
-       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
-                                fsd.fsd_dmstate);
-
-out:
-       dput(dentry);
-       return error;
-}
-
-long
-xfs_file_compat_ioctl(
-       struct file             *filp,
-       unsigned                cmd,
-       unsigned long           p)
-{
-       struct inode            *inode = filp->f_path.dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       void                    __user *arg = (void __user *)p;
-       int                     ioflags = 0;
-       int                     error;
-
-       if (filp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       trace_xfs_file_compat_ioctl(ip);
-
-       switch (cmd) {
-       /* No size or alignment issues on any arch */
-       case XFS_IOC_DIOINFO:
-       case XFS_IOC_FSGEOMETRY:
-       case XFS_IOC_FSGETXATTR:
-       case XFS_IOC_FSSETXATTR:
-       case XFS_IOC_FSGETXATTRA:
-       case XFS_IOC_FSSETDM:
-       case XFS_IOC_GETBMAP:
-       case XFS_IOC_GETBMAPA:
-       case XFS_IOC_GETBMAPX:
-       case XFS_IOC_FSCOUNTS:
-       case XFS_IOC_SET_RESBLKS:
-       case XFS_IOC_GET_RESBLKS:
-       case XFS_IOC_FSGROWFSLOG:
-       case XFS_IOC_GOINGDOWN:
-       case XFS_IOC_ERROR_INJECTION:
-       case XFS_IOC_ERROR_CLEARALL:
-               return xfs_file_ioctl(filp, cmd, p);
-#ifndef BROKEN_X86_ALIGNMENT
-       /* These are handled fine if no alignment issues */
-       case XFS_IOC_ALLOCSP:
-       case XFS_IOC_FREESP:
-       case XFS_IOC_RESVSP:
-       case XFS_IOC_UNRESVSP:
-       case XFS_IOC_ALLOCSP64:
-       case XFS_IOC_FREESP64:
-       case XFS_IOC_RESVSP64:
-       case XFS_IOC_UNRESVSP64:
-       case XFS_IOC_FSGEOMETRY_V1:
-       case XFS_IOC_FSGROWFSDATA:
-       case XFS_IOC_FSGROWFSRT:
-       case XFS_IOC_ZERO_RANGE:
-               return xfs_file_ioctl(filp, cmd, p);
-#else
-       case XFS_IOC_ALLOCSP_32:
-       case XFS_IOC_FREESP_32:
-       case XFS_IOC_ALLOCSP64_32:
-       case XFS_IOC_FREESP64_32:
-       case XFS_IOC_RESVSP_32:
-       case XFS_IOC_UNRESVSP_32:
-       case XFS_IOC_RESVSP64_32:
-       case XFS_IOC_UNRESVSP64_32:
-       case XFS_IOC_ZERO_RANGE_32: {
-               struct xfs_flock64      bf;
-
-               if (xfs_compat_flock64_copyin(&bf, arg))
-                       return -XFS_ERROR(EFAULT);
-               cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
-               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
-       }
-       case XFS_IOC_FSGEOMETRY_V1_32:
-               return xfs_compat_ioc_fsgeometry_v1(mp, arg);
-       case XFS_IOC_FSGROWFSDATA_32: {
-               struct xfs_growfs_data  in;
-
-               if (xfs_compat_growfs_data_copyin(&in, arg))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_growfs_data(mp, &in);
-               return -error;
-       }
-       case XFS_IOC_FSGROWFSRT_32: {
-               struct xfs_growfs_rt    in;
-
-               if (xfs_compat_growfs_rt_copyin(&in, arg))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_growfs_rt(mp, &in);
-               return -error;
-       }
-#endif
-       /* long changes size, but xfs only copiese out 32 bits */
-       case XFS_IOC_GETXFLAGS_32:
-       case XFS_IOC_SETXFLAGS_32:
-       case XFS_IOC_GETVERSION_32:
-               cmd = _NATIVE_IOC(cmd, long);
-               return xfs_file_ioctl(filp, cmd, p);
-       case XFS_IOC_SWAPEXT_32: {
-               struct xfs_swapext        sxp;
-               struct compat_xfs_swapext __user *sxu = arg;
-
-               /* Bulk copy in up to the sx_stat field, then copy bstat */
-               if (copy_from_user(&sxp, sxu,
-                                  offsetof(struct xfs_swapext, sx_stat)) ||
-                   xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_swapext(&sxp);
-               return -error;
-       }
-       case XFS_IOC_FSBULKSTAT_32:
-       case XFS_IOC_FSBULKSTAT_SINGLE_32:
-       case XFS_IOC_FSINUMBERS_32:
-               return xfs_compat_ioc_bulkstat(mp, cmd, arg);
-       case XFS_IOC_FD_TO_HANDLE_32:
-       case XFS_IOC_PATH_TO_HANDLE_32:
-       case XFS_IOC_PATH_TO_FSHANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
-               return xfs_find_handle(cmd, &hreq);
-       }
-       case XFS_IOC_OPEN_BY_HANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_open_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_READLINK_BY_HANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_readlink_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_ATTRLIST_BY_HANDLE_32:
-               return xfs_compat_attrlist_by_handle(filp, arg);
-       case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
-               return xfs_compat_attrmulti_by_handle(filp, arg);
-       case XFS_IOC_FSSETDM_BY_HANDLE_32:
-               return xfs_compat_fssetdm_by_handle(filp, arg);
-       default:
-               return -XFS_ERROR(ENOIOCTLCMD);
-       }
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
deleted file mode 100644 (file)
index 80f4060..0000000
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOCTL32_H__
-#define __XFS_IOCTL32_H__
-
-#include <linux/compat.h>
-
-/*
- * on 32-bit arches, ioctl argument structures may have different sizes
- * and/or alignment.  We define compat structures which match the
- * 32-bit sizes/alignments here, and their associated ioctl numbers.
- *
- * xfs_ioctl32.c contains routines to copy these structures in and out.
- */
-
-/* stock kernel-level ioctls we support */
-#define XFS_IOC_GETXFLAGS_32   FS_IOC32_GETFLAGS
-#define XFS_IOC_SETXFLAGS_32   FS_IOC32_SETFLAGS
-#define XFS_IOC_GETVERSION_32  FS_IOC32_GETVERSION
-
-/*
- * On intel, even if sizes match, alignment and/or padding may differ.
- */
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
-#define BROKEN_X86_ALIGNMENT
-#define __compat_packed __attribute__((packed))
-#else
-#define __compat_packed
-#endif
-
-typedef struct compat_xfs_bstime {
-       compat_time_t   tv_sec;         /* seconds              */
-       __s32           tv_nsec;        /* and nanoseconds      */
-} compat_xfs_bstime_t;
-
-typedef struct compat_xfs_bstat {
-       __u64           bs_ino;         /* inode number                 */
-       __u16           bs_mode;        /* type and mode                */
-       __u16           bs_nlink;       /* number of links              */
-       __u32           bs_uid;         /* user id                      */
-       __u32           bs_gid;         /* group id                     */
-       __u32           bs_rdev;        /* device value                 */
-       __s32           bs_blksize;     /* block size                   */
-       __s64           bs_size;        /* file size                    */
-       compat_xfs_bstime_t bs_atime;   /* access time                  */
-       compat_xfs_bstime_t bs_mtime;   /* modify time                  */
-       compat_xfs_bstime_t bs_ctime;   /* inode change time            */
-       int64_t         bs_blocks;      /* number of blocks             */
-       __u32           bs_xflags;      /* extended flags               */
-       __s32           bs_extsize;     /* extent size                  */
-       __s32           bs_extents;     /* number of extents            */
-       __u32           bs_gen;         /* generation count             */
-       __u16           bs_projid_lo;   /* lower part of project id     */
-#define        bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
-       __u16           bs_projid_hi;   /* high part of project id      */
-       unsigned char   bs_pad[12];     /* pad space, unused            */
-       __u32           bs_dmevmask;    /* DMIG event mask              */
-       __u16           bs_dmstate;     /* DMIG state info              */
-       __u16           bs_aextents;    /* attribute number of extents  */
-} __compat_packed compat_xfs_bstat_t;
-
-typedef struct compat_xfs_fsop_bulkreq {
-       compat_uptr_t   lastip;         /* last inode # pointer         */
-       __s32           icount;         /* count of entries in buffer   */
-       compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
-       compat_uptr_t   ocount;         /* output count pointer         */
-} compat_xfs_fsop_bulkreq_t;
-
-#define XFS_IOC_FSBULKSTAT_32 \
-       _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
-       _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSINUMBERS_32 \
-       _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
-
-typedef struct compat_xfs_fsop_handlereq {
-       __u32           fd;             /* fd for FD_TO_HANDLE          */
-       compat_uptr_t   path;           /* user pathname                */
-       __u32           oflags;         /* open flags                   */
-       compat_uptr_t   ihandle;        /* user supplied handle         */
-       __u32           ihandlen;       /* user supplied length         */
-       compat_uptr_t   ohandle;        /* user buffer for handle       */
-       compat_uptr_t   ohandlen;       /* user buffer length           */
-} compat_xfs_fsop_handlereq_t;
-
-#define XFS_IOC_PATH_TO_FSHANDLE_32 \
-       _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_PATH_TO_HANDLE_32 \
-       _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_FD_TO_HANDLE_32 \
-       _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_OPEN_BY_HANDLE_32 \
-       _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_READLINK_BY_HANDLE_32 \
-       _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
-
-/* The bstat field in the swapext struct needs translation */
-typedef struct compat_xfs_swapext {
-       __int64_t               sx_version;     /* version */
-       __int64_t               sx_fdtarget;    /* fd of target file */
-       __int64_t               sx_fdtmp;       /* fd of tmp file */
-       xfs_off_t               sx_offset;      /* offset into file */
-       xfs_off_t               sx_length;      /* leng from offset */
-       char                    sx_pad[16];     /* pad space, unused */
-       compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
-} __compat_packed compat_xfs_swapext_t;
-
-#define XFS_IOC_SWAPEXT_32     _IOWR('X', 109, struct compat_xfs_swapext)
-
-typedef struct compat_xfs_fsop_attrlist_handlereq {
-       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
-       struct xfs_attrlist_cursor      pos; /* opaque cookie, list offset */
-       __u32                           flags;  /* which namespace to use */
-       __u32                           buflen; /* length of buffer supplied */
-       compat_uptr_t                   buffer; /* returned names */
-} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
-
-/* Note: actually this is read/write */
-#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
-       _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
-
-/* am_opcodes defined in xfs_fs.h */
-typedef struct compat_xfs_attr_multiop {
-       __u32           am_opcode;
-       __s32           am_error;
-       compat_uptr_t   am_attrname;
-       compat_uptr_t   am_attrvalue;
-       __u32           am_length;
-       __u32           am_flags;
-} compat_xfs_attr_multiop_t;
-
-typedef struct compat_xfs_fsop_attrmulti_handlereq {
-       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
-       __u32                           opcount;/* count of following multiop */
-       /* ptr to compat_xfs_attr_multiop */
-       compat_uptr_t                   ops; /* attr_multi data */
-} compat_xfs_fsop_attrmulti_handlereq_t;
-
-#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
-       _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
-
-typedef struct compat_xfs_fsop_setdm_handlereq {
-       struct compat_xfs_fsop_handlereq hreq;  /* handle information   */
-       /* ptr to struct fsdmidata */
-       compat_uptr_t                   data;   /* DMAPI data   */
-} compat_xfs_fsop_setdm_handlereq_t;
-
-#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
-       _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
-
-#ifdef BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct compat_xfs_flock64 {
-       __s16           l_type;
-       __s16           l_whence;
-       __s64           l_start __attribute__((packed));
-                       /* len == 0 means until end of file */
-       __s64           l_len __attribute__((packed));
-       __s32           l_sysid;
-       __u32           l_pid;
-       __s32           l_pad[4];       /* reserve area */
-} compat_xfs_flock64_t;
-
-#define XFS_IOC_ALLOCSP_32     _IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32      _IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32   _IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32    _IOW('X', 37, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP_32      _IOW('X', 40, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP_32    _IOW('X', 41, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP64_32    _IOW('X', 42, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP64_32  _IOW('X', 43, struct compat_xfs_flock64)
-#define XFS_IOC_ZERO_RANGE_32  _IOW('X', 57, struct compat_xfs_flock64)
-
-typedef struct compat_xfs_fsop_geom_v1 {
-       __u32           blocksize;      /* filesystem (data) block size */
-       __u32           rtextsize;      /* realtime extent size         */
-       __u32           agblocks;       /* fsblocks in an AG            */
-       __u32           agcount;        /* number of allocation groups  */
-       __u32           logblocks;      /* fsblocks in the log          */
-       __u32           sectsize;       /* (data) sector size, bytes    */
-       __u32           inodesize;      /* inode size in bytes          */
-       __u32           imaxpct;        /* max allowed inode space(%)   */
-       __u64           datablocks;     /* fsblocks in data subvolume   */
-       __u64           rtblocks;       /* fsblocks in realtime subvol  */
-       __u64           rtextents;      /* rt extents in realtime subvol*/
-       __u64           logstart;       /* starting fsblock of the log  */
-       unsigned char   uuid[16];       /* unique id of the filesystem  */
-       __u32           sunit;          /* stripe unit, fsblocks        */
-       __u32           swidth;         /* stripe width, fsblocks       */
-       __s32           version;        /* structure version            */
-       __u32           flags;          /* superblock version flags     */
-       __u32           logsectsize;    /* log sector size, bytes       */
-       __u32           rtsectsize;     /* realtime sector size, bytes  */
-       __u32           dirblocksize;   /* directory block size, bytes  */
-} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
-
-#define XFS_IOC_FSGEOMETRY_V1_32  \
-       _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
-
-typedef struct compat_xfs_inogrp {
-       __u64           xi_startino;    /* starting inode number        */
-       __s32           xi_alloccount;  /* # bits set in allocmask      */
-       __u64           xi_allocmask;   /* mask of allocated inodes     */
-} __attribute__((packed)) compat_xfs_inogrp_t;
-
-/* These growfs input structures have padding on the end, so must translate */
-typedef struct compat_xfs_growfs_data {
-       __u64           newblocks;      /* new data subvol size, fsblocks */
-       __u32           imaxpct;        /* new inode space percentage limit */
-} __attribute__((packed)) compat_xfs_growfs_data_t;
-
-typedef struct compat_xfs_growfs_rt {
-       __u64           newblocks;      /* new realtime size, fsblocks */
-       __u32           extsize;        /* new realtime extent size, fsblocks */
-} __attribute__((packed)) compat_xfs_growfs_rt_t;
-
-#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
-#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
-
-#endif /* BROKEN_X86_ALIGNMENT */
-
-#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
deleted file mode 100644 (file)
index b9c172b..0000000
+++ /dev/null
@@ -1,1210 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_acl.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/xattr.h>
-#include <linux/namei.h>
-#include <linux/posix_acl.h>
-#include <linux/security.h>
-#include <linux/fiemap.h>
-#include <linux/slab.h>
-
-/*
- * Bring the timestamps in the XFS inode uptodate.
- *
- * Used before writing the inode to disk.
- */
-void
-xfs_synchronize_times(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
-       ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
-       ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
-       ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
-       ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
-       ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-}
-
-/*
- * If the linux inode is valid, mark it dirty.
- * Used when committing a dirty inode into a transaction so that
- * the inode will get written back by the linux code
- */
-void
-xfs_mark_inode_dirty_sync(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-               mark_inode_dirty_sync(inode);
-}
-
-void
-xfs_mark_inode_dirty(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-               mark_inode_dirty(inode);
-}
-
-/*
- * Hook in SELinux.  This is not quite correct yet, what we really need
- * here (as we do for default ACLs) is a mechanism by which creation of
- * these attrs can be journalled at inode creation time (along with the
- * inode, of course, such that log replay can't cause these to be lost).
- */
-STATIC int
-xfs_init_security(
-       struct inode    *inode,
-       struct inode    *dir,
-       const struct qstr *qstr)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       size_t          length;
-       void            *value;
-       unsigned char   *name;
-       int             error;
-
-       error = security_inode_init_security(inode, dir, qstr, (char **)&name,
-                                            &value, &length);
-       if (error) {
-               if (error == -EOPNOTSUPP)
-                       return 0;
-               return -error;
-       }
-
-       error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
-
-       kfree(name);
-       kfree(value);
-       return error;
-}
-
-static void
-xfs_dentry_to_name(
-       struct xfs_name *namep,
-       struct dentry   *dentry)
-{
-       namep->name = dentry->d_name.name;
-       namep->len = dentry->d_name.len;
-}
-
-STATIC void
-xfs_cleanup_inode(
-       struct inode    *dir,
-       struct inode    *inode,
-       struct dentry   *dentry)
-{
-       struct xfs_name teardown;
-
-       /* Oh, the horror.
-        * If we can't add the ACL or we fail in
-        * xfs_init_security we must back out.
-        * ENOSPC can hit here, among other things.
-        */
-       xfs_dentry_to_name(&teardown, dentry);
-
-       xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
-       iput(inode);
-}
-
-STATIC int
-xfs_vn_mknod(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode,
-       dev_t           rdev)
-{
-       struct inode    *inode;
-       struct xfs_inode *ip = NULL;
-       struct posix_acl *default_acl = NULL;
-       struct xfs_name name;
-       int             error;
-
-       /*
-        * Irix uses Missed'em'V split, but doesn't want to see
-        * the upper 5 bits of (14bit) major.
-        */
-       if (S_ISCHR(mode) || S_ISBLK(mode)) {
-               if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
-                       return -EINVAL;
-               rdev = sysv_encode_dev(rdev);
-       } else {
-               rdev = 0;
-       }
-
-       if (IS_POSIXACL(dir)) {
-               default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
-               if (IS_ERR(default_acl))
-                       return PTR_ERR(default_acl);
-
-               if (!default_acl)
-                       mode &= ~current_umask();
-       }
-
-       xfs_dentry_to_name(&name, dentry);
-       error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
-       if (unlikely(error))
-               goto out_free_acl;
-
-       inode = VFS_I(ip);
-
-       error = xfs_init_security(inode, dir, &dentry->d_name);
-       if (unlikely(error))
-               goto out_cleanup_inode;
-
-       if (default_acl) {
-               error = -xfs_inherit_acl(inode, default_acl);
-               default_acl = NULL;
-               if (unlikely(error))
-                       goto out_cleanup_inode;
-       }
-
-
-       d_instantiate(dentry, inode);
-       return -error;
-
- out_cleanup_inode:
-       xfs_cleanup_inode(dir, inode, dentry);
- out_free_acl:
-       posix_acl_release(default_acl);
-       return -error;
-}
-
-STATIC int
-xfs_vn_create(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode,
-       struct nameidata *nd)
-{
-       return xfs_vn_mknod(dir, dentry, mode, 0);
-}
-
-STATIC int
-xfs_vn_mkdir(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode)
-{
-       return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
-}
-
-STATIC struct dentry *
-xfs_vn_lookup(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       struct nameidata *nd)
-{
-       struct xfs_inode *cip;
-       struct xfs_name name;
-       int             error;
-
-       if (dentry->d_name.len >= MAXNAMELEN)
-               return ERR_PTR(-ENAMETOOLONG);
-
-       xfs_dentry_to_name(&name, dentry);
-       error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
-       if (unlikely(error)) {
-               if (unlikely(error != ENOENT))
-                       return ERR_PTR(-error);
-               d_add(dentry, NULL);
-               return NULL;
-       }
-
-       return d_splice_alias(VFS_I(cip), dentry);
-}
-
-STATIC struct dentry *
-xfs_vn_ci_lookup(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       struct nameidata *nd)
-{
-       struct xfs_inode *ip;
-       struct xfs_name xname;
-       struct xfs_name ci_name;
-       struct qstr     dname;
-       int             error;
-
-       if (dentry->d_name.len >= MAXNAMELEN)
-               return ERR_PTR(-ENAMETOOLONG);
-
-       xfs_dentry_to_name(&xname, dentry);
-       error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
-       if (unlikely(error)) {
-               if (unlikely(error != ENOENT))
-                       return ERR_PTR(-error);
-               /*
-                * call d_add(dentry, NULL) here when d_drop_negative_children
-                * is called in xfs_vn_mknod (ie. allow negative dentries
-                * with CI filesystems).
-                */
-               return NULL;
-       }
-
-       /* if exact match, just splice and exit */
-       if (!ci_name.name)
-               return d_splice_alias(VFS_I(ip), dentry);
-
-       /* else case-insensitive match... */
-       dname.name = ci_name.name;
-       dname.len = ci_name.len;
-       dentry = d_add_ci(dentry, VFS_I(ip), &dname);
-       kmem_free(ci_name.name);
-       return dentry;
-}
-
-STATIC int
-xfs_vn_link(
-       struct dentry   *old_dentry,
-       struct inode    *dir,
-       struct dentry   *dentry)
-{
-       struct inode    *inode = old_dentry->d_inode;
-       struct xfs_name name;
-       int             error;
-
-       xfs_dentry_to_name(&name, dentry);
-
-       error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
-       if (unlikely(error))
-               return -error;
-
-       ihold(inode);
-       d_instantiate(dentry, inode);
-       return 0;
-}
-
-STATIC int
-xfs_vn_unlink(
-       struct inode    *dir,
-       struct dentry   *dentry)
-{
-       struct xfs_name name;
-       int             error;
-
-       xfs_dentry_to_name(&name, dentry);
-
-       error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
-       if (error)
-               return error;
-
-       /*
-        * With unlink, the VFS makes the dentry "negative": no inode,
-        * but still hashed. This is incompatible with case-insensitive
-        * mode, so invalidate (unhash) the dentry in CI-mode.
-        */
-       if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
-               d_invalidate(dentry);
-       return 0;
-}
-
-STATIC int
-xfs_vn_symlink(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       const char      *symname)
-{
-       struct inode    *inode;
-       struct xfs_inode *cip = NULL;
-       struct xfs_name name;
-       int             error;
-       mode_t          mode;
-
-       mode = S_IFLNK |
-               (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
-       xfs_dentry_to_name(&name, dentry);
-
-       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
-       if (unlikely(error))
-               goto out;
-
-       inode = VFS_I(cip);
-
-       error = xfs_init_security(inode, dir, &dentry->d_name);
-       if (unlikely(error))
-               goto out_cleanup_inode;
-
-       d_instantiate(dentry, inode);
-       return 0;
-
- out_cleanup_inode:
-       xfs_cleanup_inode(dir, inode, dentry);
- out:
-       return -error;
-}
-
-STATIC int
-xfs_vn_rename(
-       struct inode    *odir,
-       struct dentry   *odentry,
-       struct inode    *ndir,
-       struct dentry   *ndentry)
-{
-       struct inode    *new_inode = ndentry->d_inode;
-       struct xfs_name oname;
-       struct xfs_name nname;
-
-       xfs_dentry_to_name(&oname, odentry);
-       xfs_dentry_to_name(&nname, ndentry);
-
-       return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
-                          XFS_I(ndir), &nname, new_inode ?
-                                               XFS_I(new_inode) : NULL);
-}
-
-/*
- * careful here - this function can get called recursively, so
- * we need to be very careful about how much stack we use.
- * uio is kmalloced for this reason...
- */
-STATIC void *
-xfs_vn_follow_link(
-       struct dentry           *dentry,
-       struct nameidata        *nd)
-{
-       char                    *link;
-       int                     error = -ENOMEM;
-
-       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-       if (!link)
-               goto out_err;
-
-       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
-       if (unlikely(error))
-               goto out_kfree;
-
-       nd_set_link(nd, link);
-       return NULL;
-
- out_kfree:
-       kfree(link);
- out_err:
-       nd_set_link(nd, ERR_PTR(error));
-       return NULL;
-}
-
-STATIC void
-xfs_vn_put_link(
-       struct dentry   *dentry,
-       struct nameidata *nd,
-       void            *p)
-{
-       char            *s = nd_get_link(nd);
-
-       if (!IS_ERR(s))
-               kfree(s);
-}
-
-STATIC int
-xfs_vn_getattr(
-       struct vfsmount         *mnt,
-       struct dentry           *dentry,
-       struct kstat            *stat)
-{
-       struct inode            *inode = dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-
-       trace_xfs_getattr(ip);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       stat->size = XFS_ISIZE(ip);
-       stat->dev = inode->i_sb->s_dev;
-       stat->mode = ip->i_d.di_mode;
-       stat->nlink = ip->i_d.di_nlink;
-       stat->uid = ip->i_d.di_uid;
-       stat->gid = ip->i_d.di_gid;
-       stat->ino = ip->i_ino;
-       stat->atime = inode->i_atime;
-       stat->mtime = inode->i_mtime;
-       stat->ctime = inode->i_ctime;
-       stat->blocks =
-               XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
-
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFBLK:
-       case S_IFCHR:
-               stat->blksize = BLKDEV_IOSIZE;
-               stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-                                  sysv_minor(ip->i_df.if_u2.if_rdev));
-               break;
-       default:
-               if (XFS_IS_REALTIME_INODE(ip)) {
-                       /*
-                        * If the file blocks are being allocated from a
-                        * realtime volume, then return the inode's realtime
-                        * extent size or the realtime volume's extent size.
-                        */
-                       stat->blksize =
-                               xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
-               } else
-                       stat->blksize = xfs_preferred_iosize(mp);
-               stat->rdev = 0;
-               break;
-       }
-
-       return 0;
-}
-
-int
-xfs_setattr_nonsize(
-       struct xfs_inode        *ip,
-       struct iattr            *iattr,
-       int                     flags)
-{
-       xfs_mount_t             *mp = ip->i_mount;
-       struct inode            *inode = VFS_I(ip);
-       int                     mask = iattr->ia_valid;
-       xfs_trans_t             *tp;
-       int                     error;
-       uid_t                   uid = 0, iuid = 0;
-       gid_t                   gid = 0, igid = 0;
-       struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
-       struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
-
-       trace_xfs_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       error = -inode_change_ok(inode, iattr);
-       if (error)
-               return XFS_ERROR(error);
-
-       ASSERT((mask & ATTR_SIZE) == 0);
-
-       /*
-        * If disk quotas is on, we make sure that the dquots do exist on disk,
-        * before we start any other transactions. Trying to do this later
-        * is messy. We don't care to take a readlock to look at the ids
-        * in inode here, because we can't hold it across the trans_reserve.
-        * If the IDs do change before we take the ilock, we're covered
-        * because the i_*dquot fields will get updated anyway.
-        */
-       if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
-               uint    qflags = 0;
-
-               if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
-                       uid = iattr->ia_uid;
-                       qflags |= XFS_QMOPT_UQUOTA;
-               } else {
-                       uid = ip->i_d.di_uid;
-               }
-               if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
-                       gid = iattr->ia_gid;
-                       qflags |= XFS_QMOPT_GQUOTA;
-               }  else {
-                       gid = ip->i_d.di_gid;
-               }
-
-               /*
-                * We take a reference when we initialize udqp and gdqp,
-                * so it is important that we never blindly double trip on
-                * the same variable. See xfs_create() for an example.
-                */
-               ASSERT(udqp == NULL);
-               ASSERT(gdqp == NULL);
-               error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
-                                        qflags, &udqp, &gdqp);
-               if (error)
-                       return error;
-       }
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-       if (error)
-               goto out_dqrele;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & (ATTR_UID|ATTR_GID)) {
-               /*
-                * These IDs could have changed since we last looked at them.
-                * But, we're assured that if the ownership did change
-                * while we didn't have the inode locked, inode's dquot(s)
-                * would have changed also.
-                */
-               iuid = ip->i_d.di_uid;
-               igid = ip->i_d.di_gid;
-               gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
-               uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-
-               /*
-                * Do a quota reservation only if uid/gid is actually
-                * going to change.
-                */
-               if (XFS_IS_QUOTA_RUNNING(mp) &&
-                   ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-                    (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
-                       ASSERT(tp);
-                       error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-                                               capable(CAP_FOWNER) ?
-                                               XFS_QMOPT_FORCE_RES : 0);
-                       if (error)      /* out of quota */
-                               goto out_trans_cancel;
-               }
-       }
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & (ATTR_UID|ATTR_GID)) {
-               /*
-                * CAP_FSETID overrides the following restrictions:
-                *
-                * The set-user-ID and set-group-ID bits of a file will be
-                * cleared upon successful return from chown()
-                */
-               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-                   !capable(CAP_FSETID))
-                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-               /*
-                * Change the ownerships and register quota modifications
-                * in the transaction.
-                */
-               if (iuid != uid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
-                               ASSERT(mask & ATTR_UID);
-                               ASSERT(udqp);
-                               olddquot1 = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_udquot, udqp);
-                       }
-                       ip->i_d.di_uid = uid;
-                       inode->i_uid = uid;
-               }
-               if (igid != gid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-                               ASSERT(!XFS_IS_PQUOTA_ON(mp));
-                               ASSERT(mask & ATTR_GID);
-                               ASSERT(gdqp);
-                               olddquot2 = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_gdquot, gdqp);
-                       }
-                       ip->i_d.di_gid = gid;
-                       inode->i_gid = gid;
-               }
-       }
-
-       /*
-        * Change file access modes.
-        */
-       if (mask & ATTR_MODE) {
-               umode_t mode = iattr->ia_mode;
-
-               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                       mode &= ~S_ISGID;
-
-               ip->i_d.di_mode &= S_IFMT;
-               ip->i_d.di_mode |= mode & ~S_IFMT;
-
-               inode->i_mode &= S_IFMT;
-               inode->i_mode |= mode & ~S_IFMT;
-       }
-
-       /*
-        * Change file access or modified times.
-        */
-       if (mask & ATTR_ATIME) {
-               inode->i_atime = iattr->ia_atime;
-               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_CTIME) {
-               inode->i_ctime = iattr->ia_ctime;
-               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_MTIME) {
-               inode->i_mtime = iattr->ia_mtime;
-               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Release any dquot(s) the inode had kept before chown.
-        */
-       xfs_qm_dqrele(olddquot1);
-       xfs_qm_dqrele(olddquot2);
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-
-       if (error)
-               return XFS_ERROR(error);
-
-       /*
-        * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
-        *           update.  We could avoid this with linked transactions
-        *           and passing down the transaction pointer all the way
-        *           to attr_set.  No previous user of the generic
-        *           Posix ACL code seems to care about this issue either.
-        */
-       if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
-               error = -xfs_acl_chmod(inode);
-               if (error)
-                       return XFS_ERROR(error);
-       }
-
-       return 0;
-
-out_trans_cancel:
-       xfs_trans_cancel(tp, 0);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_dqrele:
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-       return error;
-}
-
-/*
- * Truncate file.  Must have write permission and not be a directory.
- */
-int
-xfs_setattr_size(
-       struct xfs_inode        *ip,
-       struct iattr            *iattr,
-       int                     flags)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct inode            *inode = VFS_I(ip);
-       int                     mask = iattr->ia_valid;
-       struct xfs_trans        *tp;
-       int                     error;
-       uint                    lock_flags;
-       uint                    commit_flags = 0;
-
-       trace_xfs_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       error = -inode_change_ok(inode, iattr);
-       if (error)
-               return XFS_ERROR(error);
-
-       ASSERT(S_ISREG(ip->i_d.di_mode));
-       ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
-                       ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
-                       ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
-
-       lock_flags = XFS_ILOCK_EXCL;
-       if (!(flags & XFS_ATTR_NOLOCK))
-               lock_flags |= XFS_IOLOCK_EXCL;
-       xfs_ilock(ip, lock_flags);
-
-       /*
-        * Short circuit the truncate case for zero length files.
-        */
-       if (iattr->ia_size == 0 &&
-           ip->i_size == 0 && ip->i_d.di_nextents == 0) {
-               if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
-                       goto out_unlock;
-
-               /*
-                * Use the regular setattr path to update the timestamps.
-                */
-               xfs_iunlock(ip, lock_flags);
-               iattr->ia_valid &= ~ATTR_SIZE;
-               return xfs_setattr_nonsize(ip, iattr, 0);
-       }
-
-       /*
-        * Make sure that the dquots are attached to the inode.
-        */
-       error = xfs_qm_dqattach_locked(ip, 0);
-       if (error)
-               goto out_unlock;
-
-       /*
-        * Now we can make the changes.  Before we join the inode to the
-        * transaction, take care of the part of the truncation that must be
-        * done without the inode lock.  This needs to be done before joining
-        * the inode to the transaction, because the inode cannot be unlocked
-        * once it is a part of the transaction.
-        */
-       if (iattr->ia_size > ip->i_size) {
-               /*
-                * Do the first part of growing a file: zero any data in the
-                * last block that is beyond the old EOF.  We need to do this
-                * before the inode is joined to the transaction to modify
-                * i_size.
-                */
-               error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
-               if (error)
-                       goto out_unlock;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       lock_flags &= ~XFS_ILOCK_EXCL;
-
-       /*
-        * We are going to log the inode size change in this transaction so
-        * any previous writes that are beyond the on disk EOF and the new
-        * EOF that have not been written out need to be written here.  If we
-        * do not write the data out, we expose ourselves to the null files
-        * problem.
-        *
-        * Only flush from the on disk size to the smaller of the in memory
-        * file size or the new size as that's the range we really care about
-        * here and prevents waiting for other data not within the range we
-        * care about here.
-        */
-       if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
-               error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
-                                       XBF_ASYNC, FI_NONE);
-               if (error)
-                       goto out_unlock;
-       }
-
-       /*
-        * Wait for all I/O to complete.
-        */
-       xfs_ioend_wait(ip);
-
-       error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
-                                    xfs_get_blocks);
-       if (error)
-               goto out_unlock;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                XFS_TRANS_PERM_LOG_RES,
-                                XFS_ITRUNCATE_LOG_COUNT);
-       if (error)
-               goto out_trans_cancel;
-
-       truncate_setsize(inode, iattr->ia_size);
-
-       commit_flags = XFS_TRANS_RELEASE_LOG_RES;
-       lock_flags |= XFS_ILOCK_EXCL;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Only change the c/mtime if we are changing the size or we are
-        * explicitly asked to change it.  This handles the semantic difference
-        * between truncate() and ftruncate() as implemented in the VFS.
-        *
-        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
-        * special case where we need to update the times despite not having
-        * these flags set.  For all other operations the VFS set these flags
-        * explicitly if it wants a timestamp update.
-        */
-       if (iattr->ia_size != ip->i_size &&
-           (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
-               iattr->ia_ctime = iattr->ia_mtime =
-                       current_fs_time(inode->i_sb);
-               mask |= ATTR_CTIME | ATTR_MTIME;
-       }
-
-       if (iattr->ia_size > ip->i_size) {
-               ip->i_d.di_size = iattr->ia_size;
-               ip->i_size = iattr->ia_size;
-       } else if (iattr->ia_size <= ip->i_size ||
-                  (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
-               error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
-               if (error)
-                       goto out_trans_abort;
-
-               /*
-                * Truncated "down", so we're removing references to old data
-                * here - if we delay flushing for a long time, we expose
-                * ourselves unduly to the notorious NULL files problem.  So,
-                * we mark this inode and flush it when the file is closed,
-                * and do not wait the usual (long) time for writeout.
-                */
-               xfs_iflags_set(ip, XFS_ITRUNCATED);
-       }
-
-       if (mask & ATTR_CTIME) {
-               inode->i_ctime = iattr->ia_ctime;
-               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_MTIME) {
-               inode->i_mtime = iattr->ia_mtime;
-               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-out_unlock:
-       if (lock_flags)
-               xfs_iunlock(ip, lock_flags);
-       return error;
-
-out_trans_abort:
-       commit_flags |= XFS_TRANS_ABORT;
-out_trans_cancel:
-       xfs_trans_cancel(tp, commit_flags);
-       goto out_unlock;
-}
-
-STATIC int
-xfs_vn_setattr(
-       struct dentry   *dentry,
-       struct iattr    *iattr)
-{
-       if (iattr->ia_valid & ATTR_SIZE)
-               return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
-       return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
-}
-
-#define XFS_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-
-/*
- * Call fiemap helper to fill in user data.
- * Returns positive errors to xfs_getbmap.
- */
-STATIC int
-xfs_fiemap_format(
-       void                    **arg,
-       struct getbmapx         *bmv,
-       int                     *full)
-{
-       int                     error;
-       struct fiemap_extent_info *fieinfo = *arg;
-       u32                     fiemap_flags = 0;
-       u64                     logical, physical, length;
-
-       /* Do nothing for a hole */
-       if (bmv->bmv_block == -1LL)
-               return 0;
-
-       logical = BBTOB(bmv->bmv_offset);
-       physical = BBTOB(bmv->bmv_block);
-       length = BBTOB(bmv->bmv_length);
-
-       if (bmv->bmv_oflags & BMV_OF_PREALLOC)
-               fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
-       else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
-               fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
-               physical = 0;   /* no block yet */
-       }
-       if (bmv->bmv_oflags & BMV_OF_LAST)
-               fiemap_flags |= FIEMAP_EXTENT_LAST;
-
-       error = fiemap_fill_next_extent(fieinfo, logical, physical,
-                                       length, fiemap_flags);
-       if (error > 0) {
-               error = 0;
-               *full = 1;      /* user array now full */
-       }
-
-       return -error;
-}
-
-STATIC int
-xfs_vn_fiemap(
-       struct inode            *inode,
-       struct fiemap_extent_info *fieinfo,
-       u64                     start,
-       u64                     length)
-{
-       xfs_inode_t             *ip = XFS_I(inode);
-       struct getbmapx         bm;
-       int                     error;
-
-       error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
-       if (error)
-               return error;
-
-       /* Set up bmap header for xfs internal routine */
-       bm.bmv_offset = BTOBB(start);
-       /* Special case for whole file */
-       if (length == FIEMAP_MAX_OFFSET)
-               bm.bmv_length = -1LL;
-       else
-               bm.bmv_length = BTOBB(length);
-
-       /* We add one because in getbmap world count includes the header */
-       bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
-                                       fieinfo->fi_extents_max + 1;
-       bm.bmv_count = min_t(__s32, bm.bmv_count,
-                            (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
-       bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
-       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
-               bm.bmv_iflags |= BMV_IF_ATTRFORK;
-       if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
-               bm.bmv_iflags |= BMV_IF_DELALLOC;
-
-       error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
-       if (error)
-               return -error;
-
-       return 0;
-}
-
-static const struct inode_operations xfs_inode_operations = {
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-       .fiemap                 = xfs_vn_fiemap,
-};
-
-static const struct inode_operations xfs_dir_inode_operations = {
-       .create                 = xfs_vn_create,
-       .lookup                 = xfs_vn_lookup,
-       .link                   = xfs_vn_link,
-       .unlink                 = xfs_vn_unlink,
-       .symlink                = xfs_vn_symlink,
-       .mkdir                  = xfs_vn_mkdir,
-       /*
-        * Yes, XFS uses the same method for rmdir and unlink.
-        *
-        * There are some subtile differences deeper in the code,
-        * but we use S_ISDIR to check for those.
-        */
-       .rmdir                  = xfs_vn_unlink,
-       .mknod                  = xfs_vn_mknod,
-       .rename                 = xfs_vn_rename,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_dir_ci_inode_operations = {
-       .create                 = xfs_vn_create,
-       .lookup                 = xfs_vn_ci_lookup,
-       .link                   = xfs_vn_link,
-       .unlink                 = xfs_vn_unlink,
-       .symlink                = xfs_vn_symlink,
-       .mkdir                  = xfs_vn_mkdir,
-       /*
-        * Yes, XFS uses the same method for rmdir and unlink.
-        *
-        * There are some subtile differences deeper in the code,
-        * but we use S_ISDIR to check for those.
-        */
-       .rmdir                  = xfs_vn_unlink,
-       .mknod                  = xfs_vn_mknod,
-       .rename                 = xfs_vn_rename,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_symlink_inode_operations = {
-       .readlink               = generic_readlink,
-       .follow_link            = xfs_vn_follow_link,
-       .put_link               = xfs_vn_put_link,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-STATIC void
-xfs_diflags_to_iflags(
-       struct inode            *inode,
-       struct xfs_inode        *ip)
-{
-       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
-               inode->i_flags |= S_IMMUTABLE;
-       else
-               inode->i_flags &= ~S_IMMUTABLE;
-       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
-               inode->i_flags |= S_APPEND;
-       else
-               inode->i_flags &= ~S_APPEND;
-       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
-               inode->i_flags |= S_SYNC;
-       else
-               inode->i_flags &= ~S_SYNC;
-       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
-               inode->i_flags |= S_NOATIME;
-       else
-               inode->i_flags &= ~S_NOATIME;
-}
-
-/*
- * Initialize the Linux inode, set up the operation vectors and
- * unlock the inode.
- *
- * When reading existing inodes from disk this is called directly
- * from xfs_iget, when creating a new inode it is called from
- * xfs_ialloc after setting up the inode.
- *
- * We are always called with an uninitialised linux inode here.
- * We need to initialise the necessary fields and take a reference
- * on it.
- */
-void
-xfs_setup_inode(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = &ip->i_vnode;
-
-       inode->i_ino = ip->i_ino;
-       inode->i_state = I_NEW;
-
-       inode_sb_list_add(inode);
-       /* make the inode look hashed for the writeback code */
-       hlist_add_fake(&inode->i_hash);
-
-       inode->i_mode   = ip->i_d.di_mode;
-       inode->i_nlink  = ip->i_d.di_nlink;
-       inode->i_uid    = ip->i_d.di_uid;
-       inode->i_gid    = ip->i_d.di_gid;
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFBLK:
-       case S_IFCHR:
-               inode->i_rdev =
-                       MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-                             sysv_minor(ip->i_df.if_u2.if_rdev));
-               break;
-       default:
-               inode->i_rdev = 0;
-               break;
-       }
-
-       inode->i_generation = ip->i_d.di_gen;
-       i_size_write(inode, ip->i_d.di_size);
-       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
-       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
-       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
-       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
-       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
-       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
-       xfs_diflags_to_iflags(inode, ip);
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFREG:
-               inode->i_op = &xfs_inode_operations;
-               inode->i_fop = &xfs_file_operations;
-               inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       case S_IFDIR:
-               if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
-                       inode->i_op = &xfs_dir_ci_inode_operations;
-               else
-                       inode->i_op = &xfs_dir_inode_operations;
-               inode->i_fop = &xfs_dir_file_operations;
-               break;
-       case S_IFLNK:
-               inode->i_op = &xfs_symlink_inode_operations;
-               if (!(ip->i_df.if_flags & XFS_IFINLINE))
-                       inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       default:
-               inode->i_op = &xfs_inode_operations;
-               init_special_inode(inode, inode->i_mode, inode->i_rdev);
-               break;
-       }
-
-       /*
-        * If there is no attribute fork no ACL can exist on this inode,
-        * and it can't have any file capabilities attached to it either.
-        */
-       if (!XFS_IFORK_Q(ip)) {
-               inode_has_no_xattr(inode);
-               cache_no_acl(inode);
-       }
-
-       xfs_iflags_clear(ip, XFS_INEW);
-       barrier();
-
-       unlock_new_inode(inode);
-}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
deleted file mode 100644 (file)
index ef41c92..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOPS_H__
-#define __XFS_IOPS_H__
-
-struct xfs_inode;
-
-extern const struct file_operations xfs_file_operations;
-extern const struct file_operations xfs_dir_file_operations;
-
-extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
-
-extern void xfs_setup_inode(struct xfs_inode *);
-
-#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
deleted file mode 100644 (file)
index d42f814..0000000
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_LINUX__
-#define __XFS_LINUX__
-
-#include <linux/types.h>
-
-/*
- * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
- */
-#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
-# define XFS_BIG_BLKNOS        1
-# define XFS_BIG_INUMS 1
-#else
-# define XFS_BIG_BLKNOS        0
-# define XFS_BIG_INUMS 0
-#endif
-
-#include <xfs_types.h>
-
-#include <kmem.h>
-#include <mrlock.h>
-#include <time.h>
-
-#include <support/uuid.h>
-
-#include <linux/semaphore.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/file.h>
-#include <linux/swap.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/bitops.h>
-#include <linux/major.h>
-#include <linux/pagemap.h>
-#include <linux/vfs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/proc_fs.h>
-#include <linux/sort.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-#include <linux/delay.h>
-#include <linux/log2.h>
-#include <linux/spinlock.h>
-#include <linux/random.h>
-#include <linux/ctype.h>
-#include <linux/writeback.h>
-#include <linux/capability.h>
-#include <linux/list_sort.h>
-
-#include <asm/page.h>
-#include <asm/div64.h>
-#include <asm/param.h>
-#include <asm/uaccess.h>
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include <xfs_vnode.h>
-#include <xfs_stats.h>
-#include <xfs_sysctl.h>
-#include <xfs_iops.h>
-#include <xfs_aops.h>
-#include <xfs_super.h>
-#include <xfs_buf.h>
-#include <xfs_message.h>
-
-#ifdef __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#endif
-
-#define irix_sgid_inherit      xfs_params.sgid_inherit.val
-#define irix_symlink_mode      xfs_params.symlink_mode.val
-#define xfs_panic_mask         xfs_params.panic_mask.val
-#define xfs_error_level                xfs_params.error_level.val
-#define xfs_syncd_centisecs    xfs_params.syncd_timer.val
-#define xfs_stats_clear                xfs_params.stats_clear.val
-#define xfs_inherit_sync       xfs_params.inherit_sync.val
-#define xfs_inherit_nodump     xfs_params.inherit_nodump.val
-#define xfs_inherit_noatime    xfs_params.inherit_noatim.val
-#define xfs_buf_timer_centisecs        xfs_params.xfs_buf_timer.val
-#define xfs_buf_age_centisecs  xfs_params.xfs_buf_age.val
-#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
-#define xfs_rotorstep          xfs_params.rotorstep.val
-#define xfs_inherit_nodefrag   xfs_params.inherit_nodfrg.val
-#define xfs_fstrm_centisecs    xfs_params.fstrm_timer.val
-
-#define current_cpu()          (raw_smp_processor_id())
-#define current_pid()          (current->pid)
-#define current_test_flags(f)  (current->flags & (f))
-#define current_set_flags_nested(sp, f)                \
-               (*(sp) = current->flags, current->flags |= (f))
-#define current_clear_flags_nested(sp, f)      \
-               (*(sp) = current->flags, current->flags &= ~(f))
-#define current_restore_flags_nested(sp, f)    \
-               (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
-
-#define spinlock_destroy(lock)
-
-#define NBBY           8               /* number of bits per byte */
-
-/*
- * Size of block device i/o is parameterized here.
- * Currently the system supports page-sized i/o.
- */
-#define        BLKDEV_IOSHIFT          PAGE_CACHE_SHIFT
-#define        BLKDEV_IOSIZE           (1<<BLKDEV_IOSHIFT)
-/* number of BB's per block device block */
-#define        BLKDEV_BB               BTOBB(BLKDEV_IOSIZE)
-
-#define ENOATTR                ENODATA         /* Attribute not found */
-#define EWRONGFS       EINVAL          /* Mount with wrong filesystem type */
-#define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
-
-#define SYNCHRONIZE()  barrier()
-#define __return_address __builtin_return_address(0)
-
-#define XFS_PROJID_DEFAULT     0
-#define MAXPATHLEN     1024
-
-#define MIN(a,b)       (min(a,b))
-#define MAX(a,b)       (max(a,b))
-#define howmany(x, y)  (((x)+((y)-1))/(y))
-
-/*
- * Various platform dependent calls that don't fit anywhere else
- */
-#define xfs_sort(a,n,s,fn)     sort(a,n,s,fn,NULL)
-#define xfs_stack_trace()      dump_stack()
-
-
-/* Move the kernel do_div definition off to one side */
-
-#if defined __i386__
-/* For ia32 we need to pull some tricks to get past various versions
- * of the compiler which do not like us using do_div in the middle
- * of large functions.
- */
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
-       __u32   mod;
-
-       switch (n) {
-               case 4:
-                       mod = *(__u32 *)a % b;
-                       *(__u32 *)a = *(__u32 *)a / b;
-                       return mod;
-               case 8:
-                       {
-                       unsigned long __upper, __low, __high, __mod;
-                       __u64   c = *(__u64 *)a;
-                       __upper = __high = c >> 32;
-                       __low = c;
-                       if (__high) {
-                               __upper = __high % (b);
-                               __high = __high / (b);
-                       }
-                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
-                       asm("":"=A" (c):"a" (__low),"d" (__high));
-                       *(__u64 *)a = c;
-                       return __mod;
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
-       switch (n) {
-               case 4:
-                       return *(__u32 *)a % b;
-               case 8:
-                       {
-                       unsigned long __upper, __low, __high, __mod;
-                       __u64   c = *(__u64 *)a;
-                       __upper = __high = c >> 32;
-                       __low = c;
-                       if (__high) {
-                               __upper = __high % (b);
-                               __high = __high / (b);
-                       }
-                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
-                       asm("":"=A" (c):"a" (__low),"d" (__high));
-                       return __mod;
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-#else
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
-       __u32   mod;
-
-       switch (n) {
-               case 4:
-                       mod = *(__u32 *)a % b;
-                       *(__u32 *)a = *(__u32 *)a / b;
-                       return mod;
-               case 8:
-                       mod = do_div(*(__u64 *)a, b);
-                       return mod;
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
-       switch (n) {
-               case 4:
-                       return *(__u32 *)a % b;
-               case 8:
-                       {
-                       __u64   c = *(__u64 *)a;
-                       return do_div(c, b);
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-#endif
-
-#undef do_div
-#define do_div(a, b)   xfs_do_div(&(a), (b), sizeof(a))
-#define do_mod(a, b)   xfs_do_mod(&(a), (b), sizeof(a))
-
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
-{
-       x += y - 1;
-       do_div(x, y);
-       return(x * y);
-}
-
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
-{
-       x += y - 1;
-       do_div(x, y);
-       return x;
-}
-
-/* ARM old ABI has some weird alignment/padding */
-#if defined(__arm__) && !defined(__ARM_EABI__)
-#define __arch_pack __attribute__((packed))
-#else
-#define __arch_pack
-#endif
-
-#define ASSERT_ALWAYS(expr)    \
-       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef DEBUG
-#define ASSERT(expr)   ((void)0)
-
-#ifndef STATIC
-# define STATIC static noinline
-#endif
-
-#else /* DEBUG */
-
-#define ASSERT(expr)   \
-       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef STATIC
-# define STATIC noinline
-#endif
-
-#endif /* DEBUG */
-
-#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
deleted file mode 100644 (file)
index bd672de..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2011 Red Hat, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-
-/*
- * XFS logging functions
- */
-static void
-__xfs_printk(
-       const char              *level,
-       const struct xfs_mount  *mp,
-       struct va_format        *vaf)
-{
-       if (mp && mp->m_fsname) {
-               printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
-               return;
-       }
-       printk("%sXFS: %pV\n", level, vaf);
-}
-
-#define define_xfs_printk_level(func, kern_level)              \
-void func(const struct xfs_mount *mp, const char *fmt, ...)    \
-{                                                              \
-       struct va_format        vaf;                            \
-       va_list                 args;                           \
-                                                               \
-       va_start(args, fmt);                                    \
-                                                               \
-       vaf.fmt = fmt;                                          \
-       vaf.va = &args;                                         \
-                                                               \
-       __xfs_printk(kern_level, mp, &vaf);                     \
-       va_end(args);                                           \
-}                                                              \
-
-define_xfs_printk_level(xfs_emerg, KERN_EMERG);
-define_xfs_printk_level(xfs_alert, KERN_ALERT);
-define_xfs_printk_level(xfs_crit, KERN_CRIT);
-define_xfs_printk_level(xfs_err, KERN_ERR);
-define_xfs_printk_level(xfs_warn, KERN_WARNING);
-define_xfs_printk_level(xfs_notice, KERN_NOTICE);
-define_xfs_printk_level(xfs_info, KERN_INFO);
-#ifdef DEBUG
-define_xfs_printk_level(xfs_debug, KERN_DEBUG);
-#endif
-
-void
-xfs_alert_tag(
-       const struct xfs_mount  *mp,
-       int                     panic_tag,
-       const char              *fmt, ...)
-{
-       struct va_format        vaf;
-       va_list                 args;
-       int                     do_panic = 0;
-
-       if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
-               xfs_alert(mp, "Transforming an alert into a BUG.");
-               do_panic = 1;
-       }
-
-       va_start(args, fmt);
-
-       vaf.fmt = fmt;
-       vaf.va = &args;
-
-       __xfs_printk(KERN_ALERT, mp, &vaf);
-       va_end(args);
-
-       BUG_ON(do_panic);
-}
-
-void
-assfail(char *expr, char *file, int line)
-{
-       xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
-               expr, file, line);
-       BUG();
-}
-
-void
-xfs_hex_dump(void *p, int length)
-{
-       print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
-}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
deleted file mode 100644 (file)
index 7fb7ea0..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __XFS_MESSAGE_H
-#define __XFS_MESSAGE_H 1
-
-struct xfs_mount;
-
-extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
-                        const char *fmt, ...)
-        __attribute__ ((format (printf, 3, 4)));
-extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-
-#ifdef DEBUG
-extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-#else
-static inline void
-__attribute__ ((format (printf, 2, 3)))
-xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-{
-}
-#endif
-
-extern void assfail(char *expr, char *f, int l);
-
-extern void xfs_hex_dump(void *p, int length);
-
-#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
deleted file mode 100644 (file)
index 29b9d64..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "quota/xfs_qm.h"
-#include <linux/quota.h>
-
-
-STATIC int
-xfs_quota_type(int type)
-{
-       switch (type) {
-       case USRQUOTA:
-               return XFS_DQ_USER;
-       case GRPQUOTA:
-               return XFS_DQ_GROUP;
-       default:
-               return XFS_DQ_PROJ;
-       }
-}
-
-STATIC int
-xfs_fs_get_xstate(
-       struct super_block      *sb,
-       struct fs_quota_stat    *fqs)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       return -xfs_qm_scall_getqstat(mp, fqs);
-}
-
-STATIC int
-xfs_fs_set_xstate(
-       struct super_block      *sb,
-       unsigned int            uflags,
-       int                     op)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       unsigned int            flags = 0;
-
-       if (sb->s_flags & MS_RDONLY)
-               return -EROFS;
-       if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-
-       if (uflags & FS_QUOTA_UDQ_ACCT)
-               flags |= XFS_UQUOTA_ACCT;
-       if (uflags & FS_QUOTA_PDQ_ACCT)
-               flags |= XFS_PQUOTA_ACCT;
-       if (uflags & FS_QUOTA_GDQ_ACCT)
-               flags |= XFS_GQUOTA_ACCT;
-       if (uflags & FS_QUOTA_UDQ_ENFD)
-               flags |= XFS_UQUOTA_ENFD;
-       if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
-               flags |= XFS_OQUOTA_ENFD;
-
-       switch (op) {
-       case Q_XQUOTAON:
-               return -xfs_qm_scall_quotaon(mp, flags);
-       case Q_XQUOTAOFF:
-               if (!XFS_IS_QUOTA_ON(mp))
-                       return -EINVAL;
-               return -xfs_qm_scall_quotaoff(mp, flags);
-       case Q_XQUOTARM:
-               if (XFS_IS_QUOTA_ON(mp))
-                       return -EINVAL;
-               return -xfs_qm_scall_trunc_qfiles(mp, flags);
-       }
-
-       return -EINVAL;
-}
-
-STATIC int
-xfs_fs_get_dqblk(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       if (!XFS_IS_QUOTA_ON(mp))
-               return -ESRCH;
-
-       return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
-}
-
-STATIC int
-xfs_fs_set_dqblk(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (sb->s_flags & MS_RDONLY)
-               return -EROFS;
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       if (!XFS_IS_QUOTA_ON(mp))
-               return -ESRCH;
-
-       return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
-}
-
-const struct quotactl_ops xfs_quotactl_operations = {
-       .get_xstate             = xfs_fs_get_xstate,
-       .set_xstate             = xfs_fs_set_xstate,
-       .get_dqblk              = xfs_fs_get_dqblk,
-       .set_dqblk              = xfs_fs_set_dqblk,
-};
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
deleted file mode 100644 (file)
index 76fdc58..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/proc_fs.h>
-
-DEFINE_PER_CPU(struct xfsstats, xfsstats);
-
-static int xfs_stat_proc_show(struct seq_file *m, void *v)
-{
-       int             c, i, j, val;
-       __uint64_t      xs_xstrat_bytes = 0;
-       __uint64_t      xs_write_bytes = 0;
-       __uint64_t      xs_read_bytes = 0;
-
-       static const struct xstats_entry {
-               char    *desc;
-               int     endpoint;
-       } xstats[] = {
-               { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
-               { "abt",                XFSSTAT_END_ALLOC_BTREE         },
-               { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
-               { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
-               { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
-               { "trans",              XFSSTAT_END_TRANSACTIONS        },
-               { "ig",                 XFSSTAT_END_INODE_OPS           },
-               { "log",                XFSSTAT_END_LOG_OPS             },
-               { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
-               { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
-               { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
-               { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
-               { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
-               { "vnodes",             XFSSTAT_END_VNODE_OPS           },
-               { "buf",                XFSSTAT_END_BUF                 },
-               { "abtb2",              XFSSTAT_END_ABTB_V2             },
-               { "abtc2",              XFSSTAT_END_ABTC_V2             },
-               { "bmbt2",              XFSSTAT_END_BMBT_V2             },
-               { "ibt2",               XFSSTAT_END_IBT_V2              },
-       };
-
-       /* Loop over all stats groups */
-       for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
-               seq_printf(m, "%s", xstats[i].desc);
-               /* inner loop does each group */
-               while (j < xstats[i].endpoint) {
-                       val = 0;
-                       /* sum over all cpus */
-                       for_each_possible_cpu(c)
-                               val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-                       seq_printf(m, " %u", val);
-                       j++;
-               }
-               seq_putc(m, '\n');
-       }
-       /* extra precision counters */
-       for_each_possible_cpu(i) {
-               xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
-               xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
-               xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
-       }
-
-       seq_printf(m, "xpc %Lu %Lu %Lu\n",
-                       xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
-       seq_printf(m, "debug %u\n",
-#if defined(DEBUG)
-               1);
-#else
-               0);
-#endif
-       return 0;
-}
-
-static int xfs_stat_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xfs_stat_proc_show, NULL);
-}
-
-static const struct file_operations xfs_stat_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xfs_stat_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-int
-xfs_init_procfs(void)
-{
-       if (!proc_mkdir("fs/xfs", NULL))
-               goto out;
-
-       if (!proc_create("fs/xfs/stat", 0, NULL,
-                        &xfs_stat_proc_fops))
-               goto out_remove_entry;
-       return 0;
-
- out_remove_entry:
-       remove_proc_entry("fs/xfs", NULL);
- out:
-       return -ENOMEM;
-}
-
-void
-xfs_cleanup_procfs(void)
-{
-       remove_proc_entry("fs/xfs/stat", NULL);
-       remove_proc_entry("fs/xfs", NULL);
-}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
deleted file mode 100644 (file)
index 736854b..0000000
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_STATS_H__
-#define __XFS_STATS_H__
-
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-#include <linux/percpu.h>
-
-/*
- * XFS global statistics
- */
-struct xfsstats {
-# define XFSSTAT_END_EXTENT_ALLOC      4
-       __uint32_t              xs_allocx;
-       __uint32_t              xs_allocb;
-       __uint32_t              xs_freex;
-       __uint32_t              xs_freeb;
-# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
-       __uint32_t              xs_abt_lookup;
-       __uint32_t              xs_abt_compare;
-       __uint32_t              xs_abt_insrec;
-       __uint32_t              xs_abt_delrec;
-# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
-       __uint32_t              xs_blk_mapr;
-       __uint32_t              xs_blk_mapw;
-       __uint32_t              xs_blk_unmap;
-       __uint32_t              xs_add_exlist;
-       __uint32_t              xs_del_exlist;
-       __uint32_t              xs_look_exlist;
-       __uint32_t              xs_cmp_exlist;
-# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
-       __uint32_t              xs_bmbt_lookup;
-       __uint32_t              xs_bmbt_compare;
-       __uint32_t              xs_bmbt_insrec;
-       __uint32_t              xs_bmbt_delrec;
-# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
-       __uint32_t              xs_dir_lookup;
-       __uint32_t              xs_dir_create;
-       __uint32_t              xs_dir_remove;
-       __uint32_t              xs_dir_getdents;
-# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
-       __uint32_t              xs_trans_sync;
-       __uint32_t              xs_trans_async;
-       __uint32_t              xs_trans_empty;
-# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
-       __uint32_t              xs_ig_attempts;
-       __uint32_t              xs_ig_found;
-       __uint32_t              xs_ig_frecycle;
-       __uint32_t              xs_ig_missed;
-       __uint32_t              xs_ig_dup;
-       __uint32_t              xs_ig_reclaims;
-       __uint32_t              xs_ig_attrchg;
-# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
-       __uint32_t              xs_log_writes;
-       __uint32_t              xs_log_blocks;
-       __uint32_t              xs_log_noiclogs;
-       __uint32_t              xs_log_force;
-       __uint32_t              xs_log_force_sleep;
-# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
-       __uint32_t              xs_try_logspace;
-       __uint32_t              xs_sleep_logspace;
-       __uint32_t              xs_push_ail;
-       __uint32_t              xs_push_ail_success;
-       __uint32_t              xs_push_ail_pushbuf;
-       __uint32_t              xs_push_ail_pinned;
-       __uint32_t              xs_push_ail_locked;
-       __uint32_t              xs_push_ail_flushing;
-       __uint32_t              xs_push_ail_restarts;
-       __uint32_t              xs_push_ail_flush;
-# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
-       __uint32_t              xs_xstrat_quick;
-       __uint32_t              xs_xstrat_split;
-# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
-       __uint32_t              xs_write_calls;
-       __uint32_t              xs_read_calls;
-# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
-       __uint32_t              xs_attr_get;
-       __uint32_t              xs_attr_set;
-       __uint32_t              xs_attr_remove;
-       __uint32_t              xs_attr_list;
-# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_ATTRIBUTE_OPS+3)
-       __uint32_t              xs_iflush_count;
-       __uint32_t              xs_icluster_flushcnt;
-       __uint32_t              xs_icluster_flushinode;
-# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
-       __uint32_t              vn_active;      /* # vnodes not on free lists */
-       __uint32_t              vn_alloc;       /* # times vn_alloc called */
-       __uint32_t              vn_get;         /* # times vn_get called */
-       __uint32_t              vn_hold;        /* # times vn_hold called */
-       __uint32_t              vn_rele;        /* # times vn_rele called */
-       __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
-       __uint32_t              vn_remove;      /* # times vn_remove called */
-       __uint32_t              vn_free;        /* # times vn_free called */
-#define XFSSTAT_END_BUF                        (XFSSTAT_END_VNODE_OPS+9)
-       __uint32_t              xb_get;
-       __uint32_t              xb_create;
-       __uint32_t              xb_get_locked;
-       __uint32_t              xb_get_locked_waited;
-       __uint32_t              xb_busy_locked;
-       __uint32_t              xb_miss_locked;
-       __uint32_t              xb_page_retries;
-       __uint32_t              xb_page_found;
-       __uint32_t              xb_get_read;
-/* Version 2 btree counters */
-#define XFSSTAT_END_ABTB_V2            (XFSSTAT_END_BUF+15)
-       __uint32_t              xs_abtb_2_lookup;
-       __uint32_t              xs_abtb_2_compare;
-       __uint32_t              xs_abtb_2_insrec;
-       __uint32_t              xs_abtb_2_delrec;
-       __uint32_t              xs_abtb_2_newroot;
-       __uint32_t              xs_abtb_2_killroot;
-       __uint32_t              xs_abtb_2_increment;
-       __uint32_t              xs_abtb_2_decrement;
-       __uint32_t              xs_abtb_2_lshift;
-       __uint32_t              xs_abtb_2_rshift;
-       __uint32_t              xs_abtb_2_split;
-       __uint32_t              xs_abtb_2_join;
-       __uint32_t              xs_abtb_2_alloc;
-       __uint32_t              xs_abtb_2_free;
-       __uint32_t              xs_abtb_2_moves;
-#define XFSSTAT_END_ABTC_V2            (XFSSTAT_END_ABTB_V2+15)
-       __uint32_t              xs_abtc_2_lookup;
-       __uint32_t              xs_abtc_2_compare;
-       __uint32_t              xs_abtc_2_insrec;
-       __uint32_t              xs_abtc_2_delrec;
-       __uint32_t              xs_abtc_2_newroot;
-       __uint32_t              xs_abtc_2_killroot;
-       __uint32_t              xs_abtc_2_increment;
-       __uint32_t              xs_abtc_2_decrement;
-       __uint32_t              xs_abtc_2_lshift;
-       __uint32_t              xs_abtc_2_rshift;
-       __uint32_t              xs_abtc_2_split;
-       __uint32_t              xs_abtc_2_join;
-       __uint32_t              xs_abtc_2_alloc;
-       __uint32_t              xs_abtc_2_free;
-       __uint32_t              xs_abtc_2_moves;
-#define XFSSTAT_END_BMBT_V2            (XFSSTAT_END_ABTC_V2+15)
-       __uint32_t              xs_bmbt_2_lookup;
-       __uint32_t              xs_bmbt_2_compare;
-       __uint32_t              xs_bmbt_2_insrec;
-       __uint32_t              xs_bmbt_2_delrec;
-       __uint32_t              xs_bmbt_2_newroot;
-       __uint32_t              xs_bmbt_2_killroot;
-       __uint32_t              xs_bmbt_2_increment;
-       __uint32_t              xs_bmbt_2_decrement;
-       __uint32_t              xs_bmbt_2_lshift;
-       __uint32_t              xs_bmbt_2_rshift;
-       __uint32_t              xs_bmbt_2_split;
-       __uint32_t              xs_bmbt_2_join;
-       __uint32_t              xs_bmbt_2_alloc;
-       __uint32_t              xs_bmbt_2_free;
-       __uint32_t              xs_bmbt_2_moves;
-#define XFSSTAT_END_IBT_V2             (XFSSTAT_END_BMBT_V2+15)
-       __uint32_t              xs_ibt_2_lookup;
-       __uint32_t              xs_ibt_2_compare;
-       __uint32_t              xs_ibt_2_insrec;
-       __uint32_t              xs_ibt_2_delrec;
-       __uint32_t              xs_ibt_2_newroot;
-       __uint32_t              xs_ibt_2_killroot;
-       __uint32_t              xs_ibt_2_increment;
-       __uint32_t              xs_ibt_2_decrement;
-       __uint32_t              xs_ibt_2_lshift;
-       __uint32_t              xs_ibt_2_rshift;
-       __uint32_t              xs_ibt_2_split;
-       __uint32_t              xs_ibt_2_join;
-       __uint32_t              xs_ibt_2_alloc;
-       __uint32_t              xs_ibt_2_free;
-       __uint32_t              xs_ibt_2_moves;
-/* Extra precision counters */
-       __uint64_t              xs_xstrat_bytes;
-       __uint64_t              xs_write_bytes;
-       __uint64_t              xs_read_bytes;
-};
-
-DECLARE_PER_CPU(struct xfsstats, xfsstats);
-
-/*
- * We don't disable preempt, not too worried about poking the
- * wrong CPU's stat for now (also aggregated before reporting).
- */
-#define XFS_STATS_INC(v)       (per_cpu(xfsstats, current_cpu()).v++)
-#define XFS_STATS_DEC(v)       (per_cpu(xfsstats, current_cpu()).v--)
-#define XFS_STATS_ADD(v, inc)  (per_cpu(xfsstats, current_cpu()).v += (inc))
-
-extern int xfs_init_procfs(void);
-extern void xfs_cleanup_procfs(void);
-
-
-#else  /* !CONFIG_PROC_FS */
-
-# define XFS_STATS_INC(count)
-# define XFS_STATS_DEC(count)
-# define XFS_STATS_ADD(count, inc)
-
-static inline int xfs_init_procfs(void)
-{
-       return 0;
-}
-
-static inline void xfs_cleanup_procfs(void)
-{
-}
-
-#endif /* !CONFIG_PROC_FS */
-
-#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
deleted file mode 100644 (file)
index 9a72dda..0000000
+++ /dev/null
@@ -1,1773 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_fsops.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_log_priv.h"
-#include "xfs_trans_priv.h"
-#include "xfs_filestream.h"
-#include "xfs_da_btree.h"
-#include "xfs_extfree_item.h"
-#include "xfs_mru_cache.h"
-#include "xfs_inode_item.h"
-#include "xfs_sync.h"
-#include "xfs_trace.h"
-
-#include <linux/namei.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/parser.h>
-
-static const struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_ioend_zone;
-mempool_t *xfs_ioend_pool;
-
-#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
-#define MNTOPT_LOGDEV  "logdev"        /* log device */
-#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
-#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
-#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
-#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
-#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
-#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
-#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
-#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
-#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
-#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
-#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
-#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
-#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
-                                        * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
-#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
-#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
-#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
-                                        * in stat(). */
-#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
-#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
-#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
-#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
-#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
-#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
-#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
-#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
-#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
-#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
-#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
-
-/*
- * Table driven mount option parser.
- *
- * Currently only used for remount, but it will be used for mount
- * in the future, too.
- */
-enum {
-       Opt_barrier, Opt_nobarrier, Opt_err
-};
-
-static const match_table_t tokens = {
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_err, NULL}
-};
-
-
-STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
-{
-       int     last, shift_left_factor = 0;
-       char    *value = s;
-
-       last = strlen(value) - 1;
-       if (value[last] == 'K' || value[last] == 'k') {
-               shift_left_factor = 10;
-               value[last] = '\0';
-       }
-       if (value[last] == 'M' || value[last] == 'm') {
-               shift_left_factor = 20;
-               value[last] = '\0';
-       }
-       if (value[last] == 'G' || value[last] == 'g') {
-               shift_left_factor = 30;
-               value[last] = '\0';
-       }
-
-       return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- *
- * Note that this function leaks the various device name allocations on
- * failure.  The caller takes care of them.
- */
-STATIC int
-xfs_parseargs(
-       struct xfs_mount        *mp,
-       char                    *options)
-{
-       struct super_block      *sb = mp->m_super;
-       char                    *this_char, *value, *eov;
-       int                     dsunit = 0;
-       int                     dswidth = 0;
-       int                     iosize = 0;
-       __uint8_t               iosizelog = 0;
-
-       /*
-        * set up the mount name first so all the errors will refer to the
-        * correct device.
-        */
-       mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
-       if (!mp->m_fsname)
-               return ENOMEM;
-       mp->m_fsname_len = strlen(mp->m_fsname) + 1;
-
-       /*
-        * Copy binary VFS mount flags we are interested in.
-        */
-       if (sb->s_flags & MS_RDONLY)
-               mp->m_flags |= XFS_MOUNT_RDONLY;
-       if (sb->s_flags & MS_DIRSYNC)
-               mp->m_flags |= XFS_MOUNT_DIRSYNC;
-       if (sb->s_flags & MS_SYNCHRONOUS)
-               mp->m_flags |= XFS_MOUNT_WSYNC;
-
-       /*
-        * Set some default flags that could be cleared by the mount option
-        * parsing.
-        */
-       mp->m_flags |= XFS_MOUNT_BARRIER;
-       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-       mp->m_flags |= XFS_MOUNT_DELAYLOG;
-
-       /*
-        * These can be overridden by the mount option parsing.
-        */
-       mp->m_logbufs = -1;
-       mp->m_logbsize = -1;
-
-       if (!options)
-               goto done;
-
-       while ((this_char = strsep(&options, ",")) != NULL) {
-               if (!*this_char)
-                       continue;
-               if ((value = strchr(this_char, '=')) != NULL)
-                       *value++ = 0;
-
-               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logbufs = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logbsize = suffix_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-                       if (!mp->m_logname)
-                               return ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
-                       xfs_warn(mp, "%s option not allowed on this system",
-                               this_char);
-                       return EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-                       if (!mp->m_rtname)
-                               return ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       iosize = simple_strtoul(value, &eov, 10);
-                       iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       iosize = suffix_strtoul(value, &eov, 10);
-                       iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
-                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
-                       mp->m_flags |= XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
-                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
-                       mp->m_flags &= ~XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
-                       mp->m_flags |= XFS_MOUNT_WSYNC;
-               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
-                       mp->m_flags |= XFS_MOUNT_NORECOVERY;
-               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
-                       mp->m_flags |= XFS_MOUNT_NOALIGN;
-               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
-                       mp->m_flags |= XFS_MOUNT_SWALLOC;
-               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       dsunit = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       dswidth = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
-                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-#if !XFS_BIG_INUMS
-                       xfs_warn(mp, "%s option not allowed on this system",
-                               this_char);
-                       return EINVAL;
-#endif
-               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
-                       mp->m_flags |= XFS_MOUNT_NOUUID;
-               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
-                       mp->m_flags |= XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
-                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
-                       mp->m_flags |= XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
-                       mp->m_flags &= ~XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
-                       mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
-                       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-                       mp->m_flags |= XFS_MOUNT_ATTR2;
-               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-                       mp->m_flags &= ~XFS_MOUNT_ATTR2;
-                       mp->m_flags |= XFS_MOUNT_NOATTR2;
-               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
-                       mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
-                       mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                                         XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-                                         XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-                                         XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
-                          !strcmp(this_char, MNTOPT_UQUOTA) ||
-                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
-                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                                        XFS_UQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
-                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_UQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
-                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
-                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-                                        XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
-                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
-                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-                                        XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
-                       mp->m_flags |= XFS_MOUNT_DELAYLOG;
-               } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
-                       mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
-               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
-                       mp->m_flags |= XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
-                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, "ihashsize")) {
-                       xfs_warn(mp,
-       "ihashsize no longer used, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisdsync")) {
-                       xfs_warn(mp,
-       "osyncisdsync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisosync")) {
-                       xfs_warn(mp,
-       "osyncisosync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "irixsgid")) {
-                       xfs_warn(mp,
-       "irixsgid is now a sysctl(2) variable, option is deprecated.");
-               } else {
-                       xfs_warn(mp, "unknown mount option [%s].", this_char);
-                       return EINVAL;
-               }
-       }
-
-       /*
-        * no recovery flag requires a read-only mount
-        */
-       if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
-           !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-               xfs_warn(mp, "no-recovery mounts must be read-only.");
-               return EINVAL;
-       }
-
-       if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
-               xfs_warn(mp,
-       "sunit and swidth options incompatible with the noalign option");
-               return EINVAL;
-       }
-
-       if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
-           !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
-               xfs_warn(mp,
-       "the discard option is incompatible with the nodelaylog option");
-               return EINVAL;
-       }
-
-#ifndef CONFIG_XFS_QUOTA
-       if (XFS_IS_QUOTA_RUNNING(mp)) {
-               xfs_warn(mp, "quota support not available in this kernel.");
-               return EINVAL;
-       }
-#endif
-
-       if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
-           (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
-               xfs_warn(mp, "cannot mount with both project and group quota");
-               return EINVAL;
-       }
-
-       if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
-               xfs_warn(mp, "sunit and swidth must be specified together");
-               return EINVAL;
-       }
-
-       if (dsunit && (dswidth % dsunit != 0)) {
-               xfs_warn(mp,
-       "stripe width (%d) must be a multiple of the stripe unit (%d)",
-                       dswidth, dsunit);
-               return EINVAL;
-       }
-
-done:
-       if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
-               /*
-                * At this point the superblock has not been read
-                * in, therefore we do not know the block size.
-                * Before the mount call ends we will convert
-                * these to FSBs.
-                */
-               if (dsunit) {
-                       mp->m_dalign = dsunit;
-                       mp->m_flags |= XFS_MOUNT_RETERR;
-               }
-
-               if (dswidth)
-                       mp->m_swidth = dswidth;
-       }
-
-       if (mp->m_logbufs != -1 &&
-           mp->m_logbufs != 0 &&
-           (mp->m_logbufs < XLOG_MIN_ICLOGS ||
-            mp->m_logbufs > XLOG_MAX_ICLOGS)) {
-               xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
-                       mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
-               return XFS_ERROR(EINVAL);
-       }
-       if (mp->m_logbsize != -1 &&
-           mp->m_logbsize !=  0 &&
-           (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
-            mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
-            !is_power_of_2(mp->m_logbsize))) {
-               xfs_warn(mp,
-                       "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
-                       mp->m_logbsize);
-               return XFS_ERROR(EINVAL);
-       }
-
-       if (iosizelog) {
-               if (iosizelog > XFS_MAX_IO_LOG ||
-                   iosizelog < XFS_MIN_IO_LOG) {
-                       xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
-                               iosizelog, XFS_MIN_IO_LOG,
-                               XFS_MAX_IO_LOG);
-                       return XFS_ERROR(EINVAL);
-               }
-
-               mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-               mp->m_readio_log = iosizelog;
-               mp->m_writeio_log = iosizelog;
-       }
-
-       return 0;
-}
-
-struct proc_xfs_info {
-       int     flag;
-       char    *str;
-};
-
-STATIC int
-xfs_showargs(
-       struct xfs_mount        *mp,
-       struct seq_file         *m)
-{
-       static struct proc_xfs_info xfs_info_set[] = {
-               /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
-               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
-               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
-               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
-               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
-               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
-               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
-               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
-               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
-               { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
-               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
-               { 0, NULL }
-       };
-       static struct proc_xfs_info xfs_info_unset[] = {
-               /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
-               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
-               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
-               { 0, NULL }
-       };
-       struct proc_xfs_info    *xfs_infop;
-
-       for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
-               if (mp->m_flags & xfs_infop->flag)
-                       seq_puts(m, xfs_infop->str);
-       }
-       for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
-               if (!(mp->m_flags & xfs_infop->flag))
-                       seq_puts(m, xfs_infop->str);
-       }
-
-       if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
-                               (int)(1 << mp->m_writeio_log) >> 10);
-
-       if (mp->m_logbufs > 0)
-               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
-       if (mp->m_logbsize > 0)
-               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
-
-       if (mp->m_logname)
-               seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
-       if (mp->m_rtname)
-               seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
-
-       if (mp->m_dalign > 0)
-               seq_printf(m, "," MNTOPT_SUNIT "=%d",
-                               (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-       if (mp->m_swidth > 0)
-               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
-                               (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-
-       if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_USRQUOTA);
-       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_UQUOTANOENF);
-
-       /* Either project or group quotas can be active, not both */
-
-       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_OQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_PRJQUOTA);
-               else
-                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
-       } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_OQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_GRPQUOTA);
-               else
-                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
-       }
-
-       if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
-               seq_puts(m, "," MNTOPT_NOQUOTA);
-
-       return 0;
-}
-__uint64_t
-xfs_max_file_offset(
-       unsigned int            blockshift)
-{
-       unsigned int            pagefactor = 1;
-       unsigned int            bitshift = BITS_PER_LONG - 1;
-
-       /* Figure out maximum filesize, on Linux this can depend on
-        * the filesystem blocksize (on 32 bit platforms).
-        * __block_write_begin does this in an [unsigned] long...
-        *      page->index << (PAGE_CACHE_SHIFT - bbits)
-        * So, for page sized blocks (4K on 32 bit platforms),
-        * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
-        *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
-        * but for smaller blocksizes it is less (bbits = log2 bsize).
-        * Note1: get_block_t takes a long (implicit cast from above)
-        * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
-        * can optionally convert the [unsigned] long from above into
-        * an [unsigned] long long.
-        */
-
-#if BITS_PER_LONG == 32
-# if defined(CONFIG_LBDAF)
-       ASSERT(sizeof(sector_t) == 8);
-       pagefactor = PAGE_CACHE_SIZE;
-       bitshift = BITS_PER_LONG;
-# else
-       pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
-# endif
-#endif
-
-       return (((__uint64_t)pagefactor) << bitshift) - 1;
-}
-
-STATIC int
-xfs_blkdev_get(
-       xfs_mount_t             *mp,
-       const char              *name,
-       struct block_device     **bdevp)
-{
-       int                     error = 0;
-
-       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-                                   mp);
-       if (IS_ERR(*bdevp)) {
-               error = PTR_ERR(*bdevp);
-               xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
-       }
-
-       return -error;
-}
-
-STATIC void
-xfs_blkdev_put(
-       struct block_device     *bdev)
-{
-       if (bdev)
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void
-xfs_blkdev_issue_flush(
-       xfs_buftarg_t           *buftarg)
-{
-       blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
-}
-
-STATIC void
-xfs_close_devices(
-       struct xfs_mount        *mp)
-{
-       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-               struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-               xfs_free_buftarg(mp, mp->m_logdev_targp);
-               xfs_blkdev_put(logdev);
-       }
-       if (mp->m_rtdev_targp) {
-               struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-               xfs_free_buftarg(mp, mp->m_rtdev_targp);
-               xfs_blkdev_put(rtdev);
-       }
-       xfs_free_buftarg(mp, mp->m_ddev_targp);
-}
-
-/*
- * The file system configurations are:
- *     (1) device (partition) with data and internal log
- *     (2) logical volume with data and log subvolumes.
- *     (3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present.  The data subvolume has already been opened by
- * get_sb_bdev() and is stored in sb->s_bdev.
- */
-STATIC int
-xfs_open_devices(
-       struct xfs_mount        *mp)
-{
-       struct block_device     *ddev = mp->m_super->s_bdev;
-       struct block_device     *logdev = NULL, *rtdev = NULL;
-       int                     error;
-
-       /*
-        * Open real time and log devices - order is important.
-        */
-       if (mp->m_logname) {
-               error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
-               if (error)
-                       goto out;
-       }
-
-       if (mp->m_rtname) {
-               error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
-               if (error)
-                       goto out_close_logdev;
-
-               if (rtdev == ddev || rtdev == logdev) {
-                       xfs_warn(mp,
-       "Cannot mount filesystem with identical rtdev and ddev/logdev.");
-                       error = EINVAL;
-                       goto out_close_rtdev;
-               }
-       }
-
-       /*
-        * Setup xfs_mount buffer target pointers
-        */
-       error = ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
-       if (!mp->m_ddev_targp)
-               goto out_close_rtdev;
-
-       if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
-                                                       mp->m_fsname);
-               if (!mp->m_rtdev_targp)
-                       goto out_free_ddev_targ;
-       }
-
-       if (logdev && logdev != ddev) {
-               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
-                                                       mp->m_fsname);
-               if (!mp->m_logdev_targp)
-                       goto out_free_rtdev_targ;
-       } else {
-               mp->m_logdev_targp = mp->m_ddev_targp;
-       }
-
-       return 0;
-
- out_free_rtdev_targ:
-       if (mp->m_rtdev_targp)
-               xfs_free_buftarg(mp, mp->m_rtdev_targp);
- out_free_ddev_targ:
-       xfs_free_buftarg(mp, mp->m_ddev_targp);
- out_close_rtdev:
-       if (rtdev)
-               xfs_blkdev_put(rtdev);
- out_close_logdev:
-       if (logdev && logdev != ddev)
-               xfs_blkdev_put(logdev);
- out:
-       return error;
-}
-
-/*
- * Setup xfs_mount buffer target pointers based on superblock
- */
-STATIC int
-xfs_setup_devices(
-       struct xfs_mount        *mp)
-{
-       int                     error;
-
-       error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-                                   mp->m_sb.sb_sectsize);
-       if (error)
-               return error;
-
-       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-               unsigned int    log_sector_size = BBSIZE;
-
-               if (xfs_sb_version_hassector(&mp->m_sb))
-                       log_sector_size = mp->m_sb.sb_logsectsize;
-               error = xfs_setsize_buftarg(mp->m_logdev_targp,
-                                           mp->m_sb.sb_blocksize,
-                                           log_sector_size);
-               if (error)
-                       return error;
-       }
-       if (mp->m_rtdev_targp) {
-               error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-                                           mp->m_sb.sb_blocksize,
-                                           mp->m_sb.sb_sectsize);
-               if (error)
-                       return error;
-       }
-
-       return 0;
-}
-
-/* Catch misguided souls that try to use this interface on XFS */
-STATIC struct inode *
-xfs_fs_alloc_inode(
-       struct super_block      *sb)
-{
-       BUG();
-       return NULL;
-}
-
-/*
- * Now that the generic code is guaranteed not to be accessing
- * the linux inode, we can reclaim the inode.
- */
-STATIC void
-xfs_fs_destroy_inode(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-
-       trace_xfs_destroy_inode(ip);
-
-       XFS_STATS_INC(vn_reclaim);
-
-       /* bad inode, get out here ASAP */
-       if (is_bad_inode(inode))
-               goto out_reclaim;
-
-       xfs_ioend_wait(ip);
-
-       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-
-       /*
-        * We should never get here with one of the reclaim flags already set.
-        */
-       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
-
-       /*
-        * We always use background reclaim here because even if the
-        * inode is clean, it still may be under IO and hence we have
-        * to take the flush lock. The background reclaim path handles
-        * this more efficiently than we can here, so simply let background
-        * reclaim tear down all inodes.
-        */
-out_reclaim:
-       xfs_inode_set_reclaim_tag(ip);
-}
-
-/*
- * Slab object creation initialisation for the XFS inode.
- * This covers only the idempotent fields in the XFS inode;
- * all other fields need to be initialised on allocation
- * from the slab. This avoids the need to repeatedly initialise
- * fields in the xfs inode that left in the initialise state
- * when freeing the inode.
- */
-STATIC void
-xfs_fs_inode_init_once(
-       void                    *inode)
-{
-       struct xfs_inode        *ip = inode;
-
-       memset(ip, 0, sizeof(struct xfs_inode));
-
-       /* vfs inode */
-       inode_init_once(VFS_I(ip));
-
-       /* xfs inode */
-       atomic_set(&ip->i_iocount, 0);
-       atomic_set(&ip->i_pincount, 0);
-       spin_lock_init(&ip->i_flags_lock);
-       init_waitqueue_head(&ip->i_ipin_wait);
-       /*
-        * Because we want to use a counting completion, complete
-        * the flush completion once to allow a single access to
-        * the flush completion without blocking.
-        */
-       init_completion(&ip->i_flush);
-       complete(&ip->i_flush);
-
-       mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
-                    "xfsino", ip->i_ino);
-}
-
-/*
- * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode.
- *
- * We need the barrier() to maintain correct ordering between unlogged
- * updates and the transaction commit code that clears the i_update_core
- * field. This requires all updates to be completed before marking the
- * inode dirty.
- */
-STATIC void
-xfs_fs_dirty_inode(
-       struct inode    *inode,
-       int             flags)
-{
-       barrier();
-       XFS_I(inode)->i_update_core = 1;
-}
-
-STATIC int
-xfs_log_inode(
-       struct xfs_inode        *ip)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       int                     error;
-
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               /* we need to return with the lock hold shared */
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-               return error;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Note - it's possible that we might have pushed ourselves out of the
-        * way during trans_reserve which would flush the inode.  But there's
-        * no guarantee that the inode buffer has actually gone out yet (it's
-        * delwri).  Plus the buffer could be pinned anyway if it's part of
-        * an inode in another recent transaction.  So we play it safe and
-        * fire off the transaction anyway.
-        */
-       xfs_trans_ijoin(tp, ip);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       error = xfs_trans_commit(tp, 0);
-       xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
-
-       return error;
-}
-
-STATIC int
-xfs_fs_write_inode(
-       struct inode            *inode,
-       struct writeback_control *wbc)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       int                     error = EAGAIN;
-
-       trace_xfs_write_inode(ip);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       if (wbc->sync_mode == WB_SYNC_ALL) {
-               /*
-                * Make sure the inode has made it it into the log.  Instead
-                * of forcing it all the way to stable storage using a
-                * synchronous transaction we let the log force inside the
-                * ->sync_fs call do that for thus, which reduces the number
-                * of synchronous log foces dramatically.
-                */
-               xfs_ioend_wait(ip);
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-               if (ip->i_update_core) {
-                       error = xfs_log_inode(ip);
-                       if (error)
-                               goto out_unlock;
-               }
-       } else {
-               /*
-                * We make this non-blocking if the inode is contended, return
-                * EAGAIN to indicate to the caller that they did not succeed.
-                * This prevents the flush path from blocking on inodes inside
-                * another operation right now, they get caught later by
-                * xfs_sync.
-                */
-               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
-                       goto out;
-
-               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
-                       goto out_unlock;
-
-               /*
-                * Now we have the flush lock and the inode is not pinned, we
-                * can check if the inode is really clean as we know that
-                * there are no pending transaction completions, it is not
-                * waiting on the delayed write queue and there is no IO in
-                * progress.
-                */
-               if (xfs_inode_clean(ip)) {
-                       xfs_ifunlock(ip);
-                       error = 0;
-                       goto out_unlock;
-               }
-               error = xfs_iflush(ip, SYNC_TRYLOCK);
-       }
-
- out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
- out:
-       /*
-        * if we failed to write out the inode then mark
-        * it dirty again so we'll try again later.
-        */
-       if (error)
-               xfs_mark_inode_dirty_sync(ip);
-       return -error;
-}
-
-STATIC void
-xfs_fs_evict_inode(
-       struct inode            *inode)
-{
-       xfs_inode_t             *ip = XFS_I(inode);
-
-       trace_xfs_evict_inode(ip);
-
-       truncate_inode_pages(&inode->i_data, 0);
-       end_writeback(inode);
-       XFS_STATS_INC(vn_rele);
-       XFS_STATS_INC(vn_remove);
-       XFS_STATS_DEC(vn_active);
-
-       /*
-        * The iolock is used by the file system to coordinate reads,
-        * writes, and block truncates.  Up to this point the lock
-        * protected concurrent accesses by users of the inode.  But
-        * from here forward we're doing some final processing of the
-        * inode because we're done with it, and although we reuse the
-        * iolock for protection it is really a distinct lock class
-        * (in the lockdep sense) from before.  To keep lockdep happy
-        * (and basically indicate what we are doing), we explicitly
-        * re-init the iolock here.
-        */
-       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
-       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-       lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
-                       &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
-
-       xfs_inactive(ip);
-}
-
-STATIC void
-xfs_free_fsname(
-       struct xfs_mount        *mp)
-{
-       kfree(mp->m_fsname);
-       kfree(mp->m_rtname);
-       kfree(mp->m_logname);
-}
-
-STATIC void
-xfs_fs_put_super(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_syncd_stop(mp);
-
-       /*
-        * Blow away any referenced inode in the filestreams cache.
-        * This can and will cause log traffic as inodes go inactive
-        * here.
-        */
-       xfs_filestream_unmount(mp);
-
-       XFS_bflush(mp->m_ddev_targp);
-
-       xfs_unmountfs(mp);
-       xfs_freesb(mp);
-       xfs_icsb_destroy_counters(mp);
-       xfs_close_devices(mp);
-       xfs_free_fsname(mp);
-       kfree(mp);
-}
-
-STATIC int
-xfs_fs_sync_fs(
-       struct super_block      *sb,
-       int                     wait)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       int                     error;
-
-       /*
-        * Not much we can do for the first async pass.  Writing out the
-        * superblock would be counter-productive as we are going to redirty
-        * when writing out other data and metadata (and writing out a single
-        * block is quite fast anyway).
-        *
-        * Try to asynchronously kick off quota syncing at least.
-        */
-       if (!wait) {
-               xfs_qm_sync(mp, SYNC_TRYLOCK);
-               return 0;
-       }
-
-       error = xfs_quiesce_data(mp);
-       if (error)
-               return -error;
-
-       if (laptop_mode) {
-               /*
-                * The disk must be active because we're syncing.
-                * We schedule xfssyncd now (now that the disk is
-                * active) instead of later (when it might not be).
-                */
-               flush_delayed_work_sync(&mp->m_sync_work);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_fs_statfs(
-       struct dentry           *dentry,
-       struct kstatfs          *statp)
-{
-       struct xfs_mount        *mp = XFS_M(dentry->d_sb);
-       xfs_sb_t                *sbp = &mp->m_sb;
-       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
-       __uint64_t              fakeinos, id;
-       xfs_extlen_t            lsize;
-       __int64_t               ffree;
-
-       statp->f_type = XFS_SB_MAGIC;
-       statp->f_namelen = MAXNAMELEN - 1;
-
-       id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
-       statp->f_fsid.val[0] = (u32)id;
-       statp->f_fsid.val[1] = (u32)(id >> 32);
-
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
-
-       spin_lock(&mp->m_sb_lock);
-       statp->f_bsize = sbp->sb_blocksize;
-       lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
-       statp->f_blocks = sbp->sb_dblocks - lsize;
-       statp->f_bfree = statp->f_bavail =
-                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-       fakeinos = statp->f_bfree << sbp->sb_inopblog;
-       statp->f_files =
-           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
-       if (mp->m_maxicount)
-               statp->f_files = min_t(typeof(statp->f_files),
-                                       statp->f_files,
-                                       mp->m_maxicount);
-
-       /* make sure statp->f_ffree does not underflow */
-       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
-       statp->f_ffree = max_t(__int64_t, ffree, 0);
-
-       spin_unlock(&mp->m_sb_lock);
-
-       if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
-           ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
-                             (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-               xfs_qm_statvfs(ip, statp);
-       return 0;
-}
-
-STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
-{
-       __uint64_t resblks = 0;
-
-       mp->m_resblks_save = mp->m_resblks;
-       xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
-{
-       __uint64_t resblks;
-
-       if (mp->m_resblks_save) {
-               resblks = mp->m_resblks_save;
-               mp->m_resblks_save = 0;
-       } else
-               resblks = xfs_default_resblks(mp);
-
-       xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC int
-xfs_fs_remount(
-       struct super_block      *sb,
-       int                     *flags,
-       char                    *options)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       substring_t             args[MAX_OPT_ARGS];
-       char                    *p;
-       int                     error;
-
-       while ((p = strsep(&options, ",")) != NULL) {
-               int token;
-
-               if (!*p)
-                       continue;
-
-               token = match_token(p, tokens, args);
-               switch (token) {
-               case Opt_barrier:
-                       mp->m_flags |= XFS_MOUNT_BARRIER;
-                       break;
-               case Opt_nobarrier:
-                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                       break;
-               default:
-                       /*
-                        * Logically we would return an error here to prevent
-                        * users from believing they might have changed
-                        * mount options using remount which can't be changed.
-                        *
-                        * But unfortunately mount(8) adds all options from
-                        * mtab and fstab to the mount arguments in some cases
-                        * so we can't blindly reject options, but have to
-                        * check for each specified option if it actually
-                        * differs from the currently set option and only
-                        * reject it if that's the case.
-                        *
-                        * Until that is implemented we return success for
-                        * every remount request, and silently ignore all
-                        * options that we can't actually change.
-                        */
-#if 0
-                       xfs_info(mp,
-               "mount option \"%s\" not supported for remount\n", p);
-                       return -EINVAL;
-#else
-                       break;
-#endif
-               }
-       }
-
-       /* ro -> rw */
-       if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
-               mp->m_flags &= ~XFS_MOUNT_RDONLY;
-
-               /*
-                * If this is the first remount to writeable state we
-                * might have some superblock changes to update.
-                */
-               if (mp->m_update_flags) {
-                       error = xfs_mount_log_sb(mp, mp->m_update_flags);
-                       if (error) {
-                               xfs_warn(mp, "failed to write sb changes");
-                               return error;
-                       }
-                       mp->m_update_flags = 0;
-               }
-
-               /*
-                * Fill out the reserve pool if it is empty. Use the stashed
-                * value if it is non-zero, otherwise go with the default.
-                */
-               xfs_restore_resvblks(mp);
-       }
-
-       /* rw -> ro */
-       if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
-               /*
-                * After we have synced the data but before we sync the
-                * metadata, we need to free up the reserve block pool so that
-                * the used block count in the superblock on disk is correct at
-                * the end of the remount. Stash the current reserve pool size
-                * so that if we get remounted rw, we can return it to the same
-                * size.
-                */
-
-               xfs_quiesce_data(mp);
-               xfs_save_resvblks(mp);
-               xfs_quiesce_attr(mp);
-               mp->m_flags |= XFS_MOUNT_RDONLY;
-       }
-
-       return 0;
-}
-
-/*
- * Second stage of a freeze. The data is already frozen so we only
- * need to take care of the metadata. Once that's done write a dummy
- * record to dirty the log in case of a crash while frozen.
- */
-STATIC int
-xfs_fs_freeze(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_save_resvblks(mp);
-       xfs_quiesce_attr(mp);
-       return -xfs_fs_log_dummy(mp);
-}
-
-STATIC int
-xfs_fs_unfreeze(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_restore_resvblks(mp);
-       return 0;
-}
-
-STATIC int
-xfs_fs_show_options(
-       struct seq_file         *m,
-       struct vfsmount         *mnt)
-{
-       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
-       struct xfs_mount        *mp)
-{
-       int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
-       /* Fail a mount where the logbuf is smaller than the log stripe */
-       if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-               if (mp->m_logbsize <= 0 &&
-                   mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
-                       mp->m_logbsize = mp->m_sb.sb_logsunit;
-               } else if (mp->m_logbsize > 0 &&
-                          mp->m_logbsize < mp->m_sb.sb_logsunit) {
-                       xfs_warn(mp,
-               "logbuf size must be greater than or equal to log stripe size");
-                       return XFS_ERROR(EINVAL);
-               }
-       } else {
-               /* Fail a mount if the logbuf is larger than 32K */
-               if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
-                       xfs_warn(mp,
-               "logbuf size for version 1 logs must be 16K or 32K");
-                       return XFS_ERROR(EINVAL);
-               }
-       }
-
-       /*
-        * mkfs'ed attr2 will turn on attr2 mount unless explicitly
-        * told by noattr2 to turn it off
-        */
-       if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-           !(mp->m_flags & XFS_MOUNT_NOATTR2))
-               mp->m_flags |= XFS_MOUNT_ATTR2;
-
-       /*
-        * prohibit r/w mounts of read-only filesystems
-        */
-       if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
-               xfs_warn(mp,
-                       "cannot mount a read-only filesystem as read-write");
-               return XFS_ERROR(EROFS);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_fs_fill_super(
-       struct super_block      *sb,
-       void                    *data,
-       int                     silent)
-{
-       struct inode            *root;
-       struct xfs_mount        *mp = NULL;
-       int                     flags = 0, error = ENOMEM;
-
-       mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
-       if (!mp)
-               goto out;
-
-       spin_lock_init(&mp->m_sb_lock);
-       mutex_init(&mp->m_growlock);
-       atomic_set(&mp->m_active_trans, 0);
-
-       mp->m_super = sb;
-       sb->s_fs_info = mp;
-
-       error = xfs_parseargs(mp, (char *)data);
-       if (error)
-               goto out_free_fsname;
-
-       sb_min_blocksize(sb, BBSIZE);
-       sb->s_xattr = xfs_xattr_handlers;
-       sb->s_export_op = &xfs_export_operations;
-#ifdef CONFIG_XFS_QUOTA
-       sb->s_qcop = &xfs_quotactl_operations;
-#endif
-       sb->s_op = &xfs_super_operations;
-
-       if (silent)
-               flags |= XFS_MFSI_QUIET;
-
-       error = xfs_open_devices(mp);
-       if (error)
-               goto out_free_fsname;
-
-       error = xfs_icsb_init_counters(mp);
-       if (error)
-               goto out_close_devices;
-
-       error = xfs_readsb(mp, flags);
-       if (error)
-               goto out_destroy_counters;
-
-       error = xfs_finish_flags(mp);
-       if (error)
-               goto out_free_sb;
-
-       error = xfs_setup_devices(mp);
-       if (error)
-               goto out_free_sb;
-
-       error = xfs_filestream_mount(mp);
-       if (error)
-               goto out_free_sb;
-
-       /*
-        * we must configure the block size in the superblock before we run the
-        * full mount process as the mount process can lookup and cache inodes.
-        * For the same reason we must also initialise the syncd and register
-        * the inode cache shrinker so that inodes can be reclaimed during
-        * operations like a quotacheck that iterate all inodes in the
-        * filesystem.
-        */
-       sb->s_magic = XFS_SB_MAGIC;
-       sb->s_blocksize = mp->m_sb.sb_blocksize;
-       sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
-       sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
-       sb->s_time_gran = 1;
-       set_posix_acl_flag(sb);
-
-       error = xfs_mountfs(mp);
-       if (error)
-               goto out_filestream_unmount;
-
-       error = xfs_syncd_init(mp);
-       if (error)
-               goto out_unmount;
-
-       root = igrab(VFS_I(mp->m_rootip));
-       if (!root) {
-               error = ENOENT;
-               goto out_syncd_stop;
-       }
-       if (is_bad_inode(root)) {
-               error = EINVAL;
-               goto out_syncd_stop;
-       }
-       sb->s_root = d_alloc_root(root);
-       if (!sb->s_root) {
-               error = ENOMEM;
-               goto out_iput;
-       }
-
-       return 0;
-
- out_filestream_unmount:
-       xfs_filestream_unmount(mp);
- out_free_sb:
-       xfs_freesb(mp);
- out_destroy_counters:
-       xfs_icsb_destroy_counters(mp);
- out_close_devices:
-       xfs_close_devices(mp);
- out_free_fsname:
-       xfs_free_fsname(mp);
-       kfree(mp);
- out:
-       return -error;
-
- out_iput:
-       iput(root);
- out_syncd_stop:
-       xfs_syncd_stop(mp);
- out_unmount:
-       /*
-        * Blow away any referenced inode in the filestreams cache.
-        * This can and will cause log traffic as inodes go inactive
-        * here.
-        */
-       xfs_filestream_unmount(mp);
-
-       XFS_bflush(mp->m_ddev_targp);
-
-       xfs_unmountfs(mp);
-       goto out_free_sb;
-}
-
-STATIC struct dentry *
-xfs_fs_mount(
-       struct file_system_type *fs_type,
-       int                     flags,
-       const char              *dev_name,
-       void                    *data)
-{
-       return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
-}
-
-static int
-xfs_fs_nr_cached_objects(
-       struct super_block      *sb)
-{
-       return xfs_reclaim_inodes_count(XFS_M(sb));
-}
-
-static void
-xfs_fs_free_cached_objects(
-       struct super_block      *sb,
-       int                     nr_to_scan)
-{
-       xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
-}
-
-static const struct super_operations xfs_super_operations = {
-       .alloc_inode            = xfs_fs_alloc_inode,
-       .destroy_inode          = xfs_fs_destroy_inode,
-       .dirty_inode            = xfs_fs_dirty_inode,
-       .write_inode            = xfs_fs_write_inode,
-       .evict_inode            = xfs_fs_evict_inode,
-       .put_super              = xfs_fs_put_super,
-       .sync_fs                = xfs_fs_sync_fs,
-       .freeze_fs              = xfs_fs_freeze,
-       .unfreeze_fs            = xfs_fs_unfreeze,
-       .statfs                 = xfs_fs_statfs,
-       .remount_fs             = xfs_fs_remount,
-       .show_options           = xfs_fs_show_options,
-       .nr_cached_objects      = xfs_fs_nr_cached_objects,
-       .free_cached_objects    = xfs_fs_free_cached_objects,
-};
-
-static struct file_system_type xfs_fs_type = {
-       .owner                  = THIS_MODULE,
-       .name                   = "xfs",
-       .mount                  = xfs_fs_mount,
-       .kill_sb                = kill_block_super,
-       .fs_flags               = FS_REQUIRES_DEV,
-};
-
-STATIC int __init
-xfs_init_zones(void)
-{
-
-       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-       if (!xfs_ioend_zone)
-               goto out;
-
-       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-                                                 xfs_ioend_zone);
-       if (!xfs_ioend_pool)
-               goto out_destroy_ioend_zone;
-
-       xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
-                                               "xfs_log_ticket");
-       if (!xfs_log_ticket_zone)
-               goto out_destroy_ioend_pool;
-
-       xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
-                                               "xfs_bmap_free_item");
-       if (!xfs_bmap_free_item_zone)
-               goto out_destroy_log_ticket_zone;
-
-       xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
-                                               "xfs_btree_cur");
-       if (!xfs_btree_cur_zone)
-               goto out_destroy_bmap_free_item_zone;
-
-       xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
-                                               "xfs_da_state");
-       if (!xfs_da_state_zone)
-               goto out_destroy_btree_cur_zone;
-
-       xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
-       if (!xfs_dabuf_zone)
-               goto out_destroy_da_state_zone;
-
-       xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-       if (!xfs_ifork_zone)
-               goto out_destroy_dabuf_zone;
-
-       xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
-       if (!xfs_trans_zone)
-               goto out_destroy_ifork_zone;
-
-       xfs_log_item_desc_zone =
-               kmem_zone_init(sizeof(struct xfs_log_item_desc),
-                              "xfs_log_item_desc");
-       if (!xfs_log_item_desc_zone)
-               goto out_destroy_trans_zone;
-
-       /*
-        * The size of the zone allocated buf log item is the maximum
-        * size possible under XFS.  This wastes a little bit of memory,
-        * but it is much faster.
-        */
-       xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
-                               (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
-                                 NBWORD) * sizeof(int))), "xfs_buf_item");
-       if (!xfs_buf_item_zone)
-               goto out_destroy_log_item_desc_zone;
-
-       xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
-                       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
-                                sizeof(xfs_extent_t))), "xfs_efd_item");
-       if (!xfs_efd_zone)
-               goto out_destroy_buf_item_zone;
-
-       xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
-                       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
-                               sizeof(xfs_extent_t))), "xfs_efi_item");
-       if (!xfs_efi_zone)
-               goto out_destroy_efd_zone;
-
-       xfs_inode_zone =
-               kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
-                       KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
-                       xfs_fs_inode_init_once);
-       if (!xfs_inode_zone)
-               goto out_destroy_efi_zone;
-
-       xfs_ili_zone =
-               kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
-                                       KM_ZONE_SPREAD, NULL);
-       if (!xfs_ili_zone)
-               goto out_destroy_inode_zone;
-
-       return 0;
-
- out_destroy_inode_zone:
-       kmem_zone_destroy(xfs_inode_zone);
- out_destroy_efi_zone:
-       kmem_zone_destroy(xfs_efi_zone);
- out_destroy_efd_zone:
-       kmem_zone_destroy(xfs_efd_zone);
- out_destroy_buf_item_zone:
-       kmem_zone_destroy(xfs_buf_item_zone);
- out_destroy_log_item_desc_zone:
-       kmem_zone_destroy(xfs_log_item_desc_zone);
- out_destroy_trans_zone:
-       kmem_zone_destroy(xfs_trans_zone);
- out_destroy_ifork_zone:
-       kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_dabuf_zone:
-       kmem_zone_destroy(xfs_dabuf_zone);
- out_destroy_da_state_zone:
-       kmem_zone_destroy(xfs_da_state_zone);
- out_destroy_btree_cur_zone:
-       kmem_zone_destroy(xfs_btree_cur_zone);
- out_destroy_bmap_free_item_zone:
-       kmem_zone_destroy(xfs_bmap_free_item_zone);
- out_destroy_log_ticket_zone:
-       kmem_zone_destroy(xfs_log_ticket_zone);
- out_destroy_ioend_pool:
-       mempool_destroy(xfs_ioend_pool);
- out_destroy_ioend_zone:
-       kmem_zone_destroy(xfs_ioend_zone);
- out:
-       return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_zones(void)
-{
-       kmem_zone_destroy(xfs_ili_zone);
-       kmem_zone_destroy(xfs_inode_zone);
-       kmem_zone_destroy(xfs_efi_zone);
-       kmem_zone_destroy(xfs_efd_zone);
-       kmem_zone_destroy(xfs_buf_item_zone);
-       kmem_zone_destroy(xfs_log_item_desc_zone);
-       kmem_zone_destroy(xfs_trans_zone);
-       kmem_zone_destroy(xfs_ifork_zone);
-       kmem_zone_destroy(xfs_dabuf_zone);
-       kmem_zone_destroy(xfs_da_state_zone);
-       kmem_zone_destroy(xfs_btree_cur_zone);
-       kmem_zone_destroy(xfs_bmap_free_item_zone);
-       kmem_zone_destroy(xfs_log_ticket_zone);
-       mempool_destroy(xfs_ioend_pool);
-       kmem_zone_destroy(xfs_ioend_zone);
-
-}
-
-STATIC int __init
-xfs_init_workqueues(void)
-{
-       /*
-        * max_active is set to 8 to give enough concurency to allow
-        * multiple work operations on each CPU to run. This allows multiple
-        * filesystems to be running sync work concurrently, and scales with
-        * the number of CPUs in the system.
-        */
-       xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
-       if (!xfs_syncd_wq)
-               goto out;
-
-       xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
-       if (!xfs_ail_wq)
-               goto out_destroy_syncd;
-
-       return 0;
-
-out_destroy_syncd:
-       destroy_workqueue(xfs_syncd_wq);
-out:
-       return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_workqueues(void)
-{
-       destroy_workqueue(xfs_ail_wq);
-       destroy_workqueue(xfs_syncd_wq);
-}
-
-STATIC int __init
-init_xfs_fs(void)
-{
-       int                     error;
-
-       printk(KERN_INFO XFS_VERSION_STRING " with "
-                        XFS_BUILD_OPTIONS " enabled\n");
-
-       xfs_ioend_init();
-       xfs_dir_startup();
-
-       error = xfs_init_zones();
-       if (error)
-               goto out;
-
-       error = xfs_init_workqueues();
-       if (error)
-               goto out_destroy_zones;
-
-       error = xfs_mru_cache_init();
-       if (error)
-               goto out_destroy_wq;
-
-       error = xfs_filestream_init();
-       if (error)
-               goto out_mru_cache_uninit;
-
-       error = xfs_buf_init();
-       if (error)
-               goto out_filestream_uninit;
-
-       error = xfs_init_procfs();
-       if (error)
-               goto out_buf_terminate;
-
-       error = xfs_sysctl_register();
-       if (error)
-               goto out_cleanup_procfs;
-
-       vfs_initquota();
-
-       error = register_filesystem(&xfs_fs_type);
-       if (error)
-               goto out_sysctl_unregister;
-       return 0;
-
- out_sysctl_unregister:
-       xfs_sysctl_unregister();
- out_cleanup_procfs:
-       xfs_cleanup_procfs();
- out_buf_terminate:
-       xfs_buf_terminate();
- out_filestream_uninit:
-       xfs_filestream_uninit();
- out_mru_cache_uninit:
-       xfs_mru_cache_uninit();
- out_destroy_wq:
-       xfs_destroy_workqueues();
- out_destroy_zones:
-       xfs_destroy_zones();
- out:
-       return error;
-}
-
-STATIC void __exit
-exit_xfs_fs(void)
-{
-       vfs_exitquota();
-       unregister_filesystem(&xfs_fs_type);
-       xfs_sysctl_unregister();
-       xfs_cleanup_procfs();
-       xfs_buf_terminate();
-       xfs_filestream_uninit();
-       xfs_mru_cache_uninit();
-       xfs_destroy_workqueues();
-       xfs_destroy_zones();
-}
-
-module_init(init_xfs_fs);
-module_exit(exit_xfs_fs);
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
-MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
deleted file mode 100644 (file)
index 50a3266..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPER_H__
-#define __XFS_SUPER_H__
-
-#include <linux/exportfs.h>
-
-#ifdef CONFIG_XFS_QUOTA
-extern void xfs_qm_init(void);
-extern void xfs_qm_exit(void);
-# define vfs_initquota()       xfs_qm_init()
-# define vfs_exitquota()       xfs_qm_exit()
-#else
-# define vfs_initquota()       do { } while (0)
-# define vfs_exitquota()       do { } while (0)
-#endif
-
-#ifdef CONFIG_XFS_POSIX_ACL
-# define XFS_ACL_STRING                "ACLs, "
-# define set_posix_acl_flag(sb)        ((sb)->s_flags |= MS_POSIXACL)
-#else
-# define XFS_ACL_STRING
-# define set_posix_acl_flag(sb)        do { } while (0)
-#endif
-
-#define XFS_SECURITY_STRING    "security attributes, "
-
-#ifdef CONFIG_XFS_RT
-# define XFS_REALTIME_STRING   "realtime, "
-#else
-# define XFS_REALTIME_STRING
-#endif
-
-#if XFS_BIG_BLKNOS
-# if XFS_BIG_INUMS
-#  define XFS_BIGFS_STRING     "large block/inode numbers, "
-# else
-#  define XFS_BIGFS_STRING     "large block numbers, "
-# endif
-#else
-# define XFS_BIGFS_STRING
-#endif
-
-#ifdef DEBUG
-# define XFS_DBG_STRING                "debug"
-#else
-# define XFS_DBG_STRING                "no debug"
-#endif
-
-#define XFS_VERSION_STRING     "SGI XFS"
-#define XFS_BUILD_OPTIONS      XFS_ACL_STRING \
-                               XFS_SECURITY_STRING \
-                               XFS_REALTIME_STRING \
-                               XFS_BIGFS_STRING \
-                               XFS_DBG_STRING /* DBG must be last */
-
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_buftarg;
-struct block_device;
-
-extern __uint64_t xfs_max_file_offset(unsigned int);
-
-extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-
-extern const struct export_operations xfs_export_operations;
-extern const struct xattr_handler *xfs_xattr_handlers[];
-extern const struct quotactl_ops xfs_quotactl_operations;
-
-#define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
-
-#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
deleted file mode 100644 (file)
index 4604f90..0000000
+++ /dev/null
@@ -1,1065 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_dinode.h"
-#include "xfs_error.h"
-#include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_quota.h"
-#include "xfs_trace.h"
-#include "xfs_fsops.h"
-
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-
-struct workqueue_struct        *xfs_syncd_wq;  /* sync workqueue */
-
-/*
- * The inode lookup is done in batches to keep the amount of lock traffic and
- * radix tree lookups to a minimum. The batch size is a trade off between
- * lookup reduction and stack usage. This is in the reclaim path, so we can't
- * be too greedy.
- */
-#define XFS_LOOKUP_BATCH       32
-
-STATIC int
-xfs_inode_ag_walk_grab(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = VFS_I(ip);
-
-       ASSERT(rcu_read_lock_held());
-
-       /*
-        * check for stale RCU freed inode
-        *
-        * If the inode has been reallocated, it doesn't matter if it's not in
-        * the AG we are walking - we are walking for writeback, so if it
-        * passes all the "valid inode" checks and is dirty, then we'll write
-        * it back anyway.  If it has been reallocated and still being
-        * initialised, the XFS_INEW check below will catch it.
-        */
-       spin_lock(&ip->i_flags_lock);
-       if (!ip->i_ino)
-               goto out_unlock_noent;
-
-       /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-       if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-               goto out_unlock_noent;
-       spin_unlock(&ip->i_flags_lock);
-
-       /* nothing to sync during shutdown */
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return EFSCORRUPTED;
-
-       /* If we can't grab the inode, it must on it's way to reclaim. */
-       if (!igrab(inode))
-               return ENOENT;
-
-       if (is_bad_inode(inode)) {
-               IRELE(ip);
-               return ENOENT;
-       }
-
-       /* inode is valid */
-       return 0;
-
-out_unlock_noent:
-       spin_unlock(&ip->i_flags_lock);
-       return ENOENT;
-}
-
-STATIC int
-xfs_inode_ag_walk(
-       struct xfs_mount        *mp,
-       struct xfs_perag        *pag,
-       int                     (*execute)(struct xfs_inode *ip,
-                                          struct xfs_perag *pag, int flags),
-       int                     flags)
-{
-       uint32_t                first_index;
-       int                     last_error = 0;
-       int                     skipped;
-       int                     done;
-       int                     nr_found;
-
-restart:
-       done = 0;
-       skipped = 0;
-       first_index = 0;
-       nr_found = 0;
-       do {
-               struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-               int             error = 0;
-               int             i;
-
-               rcu_read_lock();
-               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                                       (void **)batch, first_index,
-                                       XFS_LOOKUP_BATCH);
-               if (!nr_found) {
-                       rcu_read_unlock();
-                       break;
-               }
-
-               /*
-                * Grab the inodes before we drop the lock. if we found
-                * nothing, nr == 0 and the loop will be skipped.
-                */
-               for (i = 0; i < nr_found; i++) {
-                       struct xfs_inode *ip = batch[i];
-
-                       if (done || xfs_inode_ag_walk_grab(ip))
-                               batch[i] = NULL;
-
-                       /*
-                        * Update the index for the next lookup. Catch
-                        * overflows into the next AG range which can occur if
-                        * we have inodes in the last block of the AG and we
-                        * are currently pointing to the last inode.
-                        *
-                        * Because we may see inodes that are from the wrong AG
-                        * due to RCU freeing and reallocation, only update the
-                        * index if it lies in this AG. It was a race that lead
-                        * us to see this inode, so another lookup from the
-                        * same index will not find it again.
-                        */
-                       if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
-                               continue;
-                       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                       if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                               done = 1;
-               }
-
-               /* unlock now we've grabbed the inodes. */
-               rcu_read_unlock();
-
-               for (i = 0; i < nr_found; i++) {
-                       if (!batch[i])
-                               continue;
-                       error = execute(batch[i], pag, flags);
-                       IRELE(batch[i]);
-                       if (error == EAGAIN) {
-                               skipped++;
-                               continue;
-                       }
-                       if (error && last_error != EFSCORRUPTED)
-                               last_error = error;
-               }
-
-               /* bail out if the filesystem is corrupted.  */
-               if (error == EFSCORRUPTED)
-                       break;
-
-               cond_resched();
-
-       } while (nr_found && !done);
-
-       if (skipped) {
-               delay(1);
-               goto restart;
-       }
-       return last_error;
-}
-
-int
-xfs_inode_ag_iterator(
-       struct xfs_mount        *mp,
-       int                     (*execute)(struct xfs_inode *ip,
-                                          struct xfs_perag *pag, int flags),
-       int                     flags)
-{
-       struct xfs_perag        *pag;
-       int                     error = 0;
-       int                     last_error = 0;
-       xfs_agnumber_t          ag;
-
-       ag = 0;
-       while ((pag = xfs_perag_get(mp, ag))) {
-               ag = pag->pag_agno + 1;
-               error = xfs_inode_ag_walk(mp, pag, execute, flags);
-               xfs_perag_put(pag);
-               if (error) {
-                       last_error = error;
-                       if (error == EFSCORRUPTED)
-                               break;
-               }
-       }
-       return XFS_ERROR(last_error);
-}
-
-STATIC int
-xfs_sync_inode_data(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       struct inode            *inode = VFS_I(ip);
-       struct address_space *mapping = inode->i_mapping;
-       int                     error = 0;
-
-       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
-               goto out_wait;
-
-       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
-               if (flags & SYNC_TRYLOCK)
-                       goto out_wait;
-               xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       }
-
-       error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
-                               0 : XBF_ASYNC, FI_NONE);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
- out_wait:
-       if (flags & SYNC_WAIT)
-               xfs_ioend_wait(ip);
-       return error;
-}
-
-STATIC int
-xfs_sync_inode_attr(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       int                     error = 0;
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       if (xfs_inode_clean(ip))
-               goto out_unlock;
-       if (!xfs_iflock_nowait(ip)) {
-               if (!(flags & SYNC_WAIT))
-                       goto out_unlock;
-               xfs_iflock(ip);
-       }
-
-       if (xfs_inode_clean(ip)) {
-               xfs_ifunlock(ip);
-               goto out_unlock;
-       }
-
-       error = xfs_iflush(ip, flags);
-
-       /*
-        * We don't want to try again on non-blocking flushes that can't run
-        * again immediately. If an inode really must be written, then that's
-        * what the SYNC_WAIT flag is for.
-        */
-       if (error == EAGAIN) {
-               ASSERT(!(flags & SYNC_WAIT));
-               error = 0;
-       }
-
- out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       return error;
-}
-
-/*
- * Write out pagecache data for the whole filesystem.
- */
-STATIC int
-xfs_sync_data(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       int                     error;
-
-       ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
-
-       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
-       if (error)
-               return XFS_ERROR(error);
-
-       xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
-       return 0;
-}
-
-/*
- * Write out inode metadata (attributes) for the whole filesystem.
- */
-STATIC int
-xfs_sync_attr(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       ASSERT((flags & ~SYNC_WAIT) == 0);
-
-       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
-}
-
-STATIC int
-xfs_sync_fsdata(
-       struct xfs_mount        *mp)
-{
-       struct xfs_buf          *bp;
-
-       /*
-        * If the buffer is pinned then push on the log so we won't get stuck
-        * waiting in the write for someone, maybe ourselves, to flush the log.
-        *
-        * Even though we just pushed the log above, we did not have the
-        * superblock buffer locked at that point so it can become pinned in
-        * between there and here.
-        */
-       bp = xfs_getsb(mp, 0);
-       if (xfs_buf_ispinned(bp))
-               xfs_log_force(mp, 0);
-
-       return xfs_bwrite(mp, bp);
-}
-
-/*
- * When remounting a filesystem read-only or freezing the filesystem, we have
- * two phases to execute. This first phase is syncing the data before we
- * quiesce the filesystem, and the second is flushing all the inodes out after
- * we've waited for all the transactions created by the first phase to
- * complete. The second phase ensures that the inodes are written to their
- * location on disk rather than just existing in transactions in the log. This
- * means after a quiesce there is no log replay required to write the inodes to
- * disk (this is the main difference between a sync and a quiesce).
- */
-/*
- * First stage of freeze - no writers will make progress now we are here,
- * so we flush delwri and delalloc buffers here, then wait for all I/O to
- * complete.  Data is frozen at that point. Metadata is not frozen,
- * transactions can still occur here so don't bother flushing the buftarg
- * because it'll just get dirty again.
- */
-int
-xfs_quiesce_data(
-       struct xfs_mount        *mp)
-{
-       int                     error, error2 = 0;
-
-       xfs_qm_sync(mp, SYNC_TRYLOCK);
-       xfs_qm_sync(mp, SYNC_WAIT);
-
-       /* force out the newly dirtied log buffers */
-       xfs_log_force(mp, XFS_LOG_SYNC);
-
-       /* write superblock and hoover up shutdown errors */
-       error = xfs_sync_fsdata(mp);
-
-       /* make sure all delwri buffers are written out */
-       xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
-       /* mark the log as covered if needed */
-       if (xfs_log_need_covered(mp))
-               error2 = xfs_fs_log_dummy(mp);
-
-       /* flush data-only devices */
-       if (mp->m_rtdev_targp)
-               XFS_bflush(mp->m_rtdev_targp);
-
-       return error ? error : error2;
-}
-
-STATIC void
-xfs_quiesce_fs(
-       struct xfs_mount        *mp)
-{
-       int     count = 0, pincount;
-
-       xfs_reclaim_inodes(mp, 0);
-       xfs_flush_buftarg(mp->m_ddev_targp, 0);
-
-       /*
-        * This loop must run at least twice.  The first instance of the loop
-        * will flush most meta data but that will generate more meta data
-        * (typically directory updates).  Which then must be flushed and
-        * logged before we can write the unmount record. We also so sync
-        * reclaim of inodes to catch any that the above delwri flush skipped.
-        */
-       do {
-               xfs_reclaim_inodes(mp, SYNC_WAIT);
-               xfs_sync_attr(mp, SYNC_WAIT);
-               pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-               if (!pincount) {
-                       delay(50);
-                       count++;
-               }
-       } while (count < 2);
-}
-
-/*
- * Second stage of a quiesce. The data is already synced, now we have to take
- * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceeding.
- */
-void
-xfs_quiesce_attr(
-       struct xfs_mount        *mp)
-{
-       int     error = 0;
-
-       /* wait for all modifications to complete */
-       while (atomic_read(&mp->m_active_trans) > 0)
-               delay(100);
-
-       /* flush inodes and push all remaining buffers out to disk */
-       xfs_quiesce_fs(mp);
-
-       /*
-        * Just warn here till VFS can correctly support
-        * read-only remount without racing.
-        */
-       WARN_ON(atomic_read(&mp->m_active_trans) != 0);
-
-       /* Push the superblock and write an unmount record */
-       error = xfs_log_sbcount(mp);
-       if (error)
-               xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
-                               "Frozen image may not be consistent.");
-       xfs_log_unmount_write(mp);
-       xfs_unmountfs_writesb(mp);
-}
-
-static void
-xfs_syncd_queue_sync(
-       struct xfs_mount        *mp)
-{
-       queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
-                               msecs_to_jiffies(xfs_syncd_centisecs * 10));
-}
-
-/*
- * Every sync period we need to unpin all items, reclaim inodes and sync
- * disk quotas.  We might need to cover the log to indicate that the
- * filesystem is idle and not frozen.
- */
-STATIC void
-xfs_sync_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(to_delayed_work(work),
-                                       struct xfs_mount, m_sync_work);
-       int             error;
-
-       if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-               /* dgc: errors ignored here */
-               if (mp->m_super->s_frozen == SB_UNFROZEN &&
-                   xfs_log_need_covered(mp))
-                       error = xfs_fs_log_dummy(mp);
-               else
-                       xfs_log_force(mp, 0);
-               error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-
-               /* start pushing all the metadata that is currently dirty */
-               xfs_ail_push_all(mp->m_ail);
-       }
-
-       /* queue us up again */
-       xfs_syncd_queue_sync(mp);
-}
-
-/*
- * Queue a new inode reclaim pass if there are reclaimable inodes and there
- * isn't a reclaim pass already in progress. By default it runs every 5s based
- * on the xfs syncd work default of 30s. Perhaps this should have it's own
- * tunable, but that can be done if this method proves to be ineffective or too
- * aggressive.
- */
-static void
-xfs_syncd_queue_reclaim(
-       struct xfs_mount        *mp)
-{
-
-       /*
-        * We can have inodes enter reclaim after we've shut down the syncd
-        * workqueue during unmount, so don't allow reclaim work to be queued
-        * during unmount.
-        */
-       if (!(mp->m_super->s_flags & MS_ACTIVE))
-               return;
-
-       rcu_read_lock();
-       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
-               queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
-                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
-       }
-       rcu_read_unlock();
-}
-
-/*
- * This is a fast pass over the inode cache to try to get reclaim moving on as
- * many inodes as possible in a short period of time. It kicks itself every few
- * seconds, as well as being kicked by the inode cache shrinker when memory
- * goes low. It scans as quickly as possible avoiding locked inodes or those
- * already being flushed, and once done schedules a future pass.
- */
-STATIC void
-xfs_reclaim_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(to_delayed_work(work),
-                                       struct xfs_mount, m_reclaim_work);
-
-       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
-       xfs_syncd_queue_reclaim(mp);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room.
- *
- * Queue a new data flush if there isn't one already in progress and
- * wait for completion of the flush. This means that we only ever have one
- * inode flush in progress no matter how many ENOSPC events are occurring and
- * so will prevent the system from bogging down due to every concurrent
- * ENOSPC event scanning all the active inodes in the system for writeback.
- */
-void
-xfs_flush_inodes(
-       struct xfs_inode        *ip)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-
-       queue_work(xfs_syncd_wq, &mp->m_flush_work);
-       flush_work_sync(&mp->m_flush_work);
-}
-
-STATIC void
-xfs_flush_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(work,
-                                       struct xfs_mount, m_flush_work);
-
-       xfs_sync_data(mp, SYNC_TRYLOCK);
-       xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-}
-
-int
-xfs_syncd_init(
-       struct xfs_mount        *mp)
-{
-       INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
-       INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
-       INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-
-       xfs_syncd_queue_sync(mp);
-       xfs_syncd_queue_reclaim(mp);
-
-       return 0;
-}
-
-void
-xfs_syncd_stop(
-       struct xfs_mount        *mp)
-{
-       cancel_delayed_work_sync(&mp->m_sync_work);
-       cancel_delayed_work_sync(&mp->m_reclaim_work);
-       cancel_work_sync(&mp->m_flush_work);
-}
-
-void
-__xfs_inode_set_reclaim_tag(
-       struct xfs_perag        *pag,
-       struct xfs_inode        *ip)
-{
-       radix_tree_tag_set(&pag->pag_ici_root,
-                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
-                          XFS_ICI_RECLAIM_TAG);
-
-       if (!pag->pag_ici_reclaimable) {
-               /* propagate the reclaim tag up into the perag radix tree */
-               spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                               XFS_ICI_RECLAIM_TAG);
-               spin_unlock(&ip->i_mount->m_perag_lock);
-
-               /* schedule periodic background inode reclaim */
-               xfs_syncd_queue_reclaim(ip->i_mount);
-
-               trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
-                                                       -1, _RET_IP_);
-       }
-       pag->pag_ici_reclaimable++;
-}
-
-/*
- * We set the inode flag atomically with the radix tree tag.
- * Once we get tag lookups on the radix tree, this inode flag
- * can go away.
- */
-void
-xfs_inode_set_reclaim_tag(
-       xfs_inode_t     *ip)
-{
-       struct xfs_mount *mp = ip->i_mount;
-       struct xfs_perag *pag;
-
-       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-       spin_lock(&pag->pag_ici_lock);
-       spin_lock(&ip->i_flags_lock);
-       __xfs_inode_set_reclaim_tag(pag, ip);
-       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-       spin_unlock(&ip->i_flags_lock);
-       spin_unlock(&pag->pag_ici_lock);
-       xfs_perag_put(pag);
-}
-
-STATIC void
-__xfs_inode_clear_reclaim(
-       xfs_perag_t     *pag,
-       xfs_inode_t     *ip)
-{
-       pag->pag_ici_reclaimable--;
-       if (!pag->pag_ici_reclaimable) {
-               /* clear the reclaim tag from the perag radix tree */
-               spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                               XFS_ICI_RECLAIM_TAG);
-               spin_unlock(&ip->i_mount->m_perag_lock);
-               trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
-                                                       -1, _RET_IP_);
-       }
-}
-
-void
-__xfs_inode_clear_reclaim_tag(
-       xfs_mount_t     *mp,
-       xfs_perag_t     *pag,
-       xfs_inode_t     *ip)
-{
-       radix_tree_tag_clear(&pag->pag_ici_root,
-                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
-       __xfs_inode_clear_reclaim(pag, ip);
-}
-
-/*
- * Grab the inode for reclaim exclusively.
- * Return 0 if we grabbed it, non-zero otherwise.
- */
-STATIC int
-xfs_reclaim_inode_grab(
-       struct xfs_inode        *ip,
-       int                     flags)
-{
-       ASSERT(rcu_read_lock_held());
-
-       /* quick check for stale RCU freed inode */
-       if (!ip->i_ino)
-               return 1;
-
-       /*
-        * do some unlocked checks first to avoid unnecessary lock traffic.
-        * The first is a flush lock check, the second is a already in reclaim
-        * check. Only do these checks if we are not going to block on locks.
-        */
-       if ((flags & SYNC_TRYLOCK) &&
-           (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
-               return 1;
-       }
-
-       /*
-        * The radix tree lock here protects a thread in xfs_iget from racing
-        * with us starting reclaim on the inode.  Once we have the
-        * XFS_IRECLAIM flag set it will not touch us.
-        *
-        * Due to RCU lookup, we may find inodes that have been freed and only
-        * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
-        * aren't candidates for reclaim at all, so we must check the
-        * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
-        */
-       spin_lock(&ip->i_flags_lock);
-       if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
-           __xfs_iflags_test(ip, XFS_IRECLAIM)) {
-               /* not a reclaim candidate. */
-               spin_unlock(&ip->i_flags_lock);
-               return 1;
-       }
-       __xfs_iflags_set(ip, XFS_IRECLAIM);
-       spin_unlock(&ip->i_flags_lock);
-       return 0;
-}
-
-/*
- * Inodes in different states need to be treated differently, and the return
- * value of xfs_iflush is not sufficient to get this right. The following table
- * lists the inode states and the reclaim actions necessary for non-blocking
- * reclaim:
- *
- *
- *     inode state          iflush ret         required action
- *      ---------------      ----------         ---------------
- *     bad                     -               reclaim
- *     shutdown                EIO             unpin and reclaim
- *     clean, unpinned         0               reclaim
- *     stale, unpinned         0               reclaim
- *     clean, pinned(*)        0               requeue
- *     stale, pinned           EAGAIN          requeue
- *     dirty, delwri ok        0               requeue
- *     dirty, delwri blocked   EAGAIN          requeue
- *     dirty, sync flush       0               reclaim
- *
- * (*) dgc: I don't think the clean, pinned state is possible but it gets
- * handled anyway given the order of checks implemented.
- *
- * As can be seen from the table, the return value of xfs_iflush() is not
- * sufficient to correctly decide the reclaim action here. The checks in
- * xfs_iflush() might look like duplicates, but they are not.
- *
- * Also, because we get the flush lock first, we know that any inode that has
- * been flushed delwri has had the flush completed by the time we check that
- * the inode is clean. The clean inode check needs to be done before flushing
- * the inode delwri otherwise we would loop forever requeuing clean inodes as
- * we cannot tell apart a successful delwri flush and a clean inode from the
- * return value of xfs_iflush().
- *
- * Note that because the inode is flushed delayed write by background
- * writeback, the flush lock may already be held here and waiting on it can
- * result in very long latencies. Hence for sync reclaims, where we wait on the
- * flush lock, the caller should push out delayed write inodes first before
- * trying to reclaim them to minimise the amount of time spent waiting. For
- * background relaim, we just requeue the inode for the next pass.
- *
- * Hence the order of actions after gaining the locks should be:
- *     bad             => reclaim
- *     shutdown        => unpin and reclaim
- *     pinned, delwri  => requeue
- *     pinned, sync    => unpin
- *     stale           => reclaim
- *     clean           => reclaim
- *     dirty, delwri   => flush and requeue
- *     dirty, sync     => flush, wait and reclaim
- */
-STATIC int
-xfs_reclaim_inode(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     sync_mode)
-{
-       int     error;
-
-restart:
-       error = 0;
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (!xfs_iflock_nowait(ip)) {
-               if (!(sync_mode & SYNC_WAIT))
-                       goto out;
-               xfs_iflock(ip);
-       }
-
-       if (is_bad_inode(VFS_I(ip)))
-               goto reclaim;
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-               xfs_iunpin_wait(ip);
-               goto reclaim;
-       }
-       if (xfs_ipincount(ip)) {
-               if (!(sync_mode & SYNC_WAIT)) {
-                       xfs_ifunlock(ip);
-                       goto out;
-               }
-               xfs_iunpin_wait(ip);
-       }
-       if (xfs_iflags_test(ip, XFS_ISTALE))
-               goto reclaim;
-       if (xfs_inode_clean(ip))
-               goto reclaim;
-
-       /*
-        * Now we have an inode that needs flushing.
-        *
-        * We do a nonblocking flush here even if we are doing a SYNC_WAIT
-        * reclaim as we can deadlock with inode cluster removal.
-        * xfs_ifree_cluster() can lock the inode buffer before it locks the
-        * ip->i_lock, and we are doing the exact opposite here. As a result,
-        * doing a blocking xfs_itobp() to get the cluster buffer will result
-        * in an ABBA deadlock with xfs_ifree_cluster().
-        *
-        * As xfs_ifree_cluser() must gather all inodes that are active in the
-        * cache to mark them stale, if we hit this case we don't actually want
-        * to do IO here - we want the inode marked stale so we can simply
-        * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
-        * just unlock the inode, back off and try again. Hopefully the next
-        * pass through will see the stale flag set on the inode.
-        */
-       error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
-       if (sync_mode & SYNC_WAIT) {
-               if (error == EAGAIN) {
-                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                       /* backoff longer than in xfs_ifree_cluster */
-                       delay(2);
-                       goto restart;
-               }
-               xfs_iflock(ip);
-               goto reclaim;
-       }
-
-       /*
-        * When we have to flush an inode but don't have SYNC_WAIT set, we
-        * flush the inode out using a delwri buffer and wait for the next
-        * call into reclaim to find it in a clean state instead of waiting for
-        * it now. We also don't return errors here - if the error is transient
-        * then the next reclaim pass will flush the inode, and if the error
-        * is permanent then the next sync reclaim will reclaim the inode and
-        * pass on the error.
-        */
-       if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-               xfs_warn(ip->i_mount,
-                       "inode 0x%llx background reclaim flush failed with %d",
-                       (long long)ip->i_ino, error);
-       }
-out:
-       xfs_iflags_clear(ip, XFS_IRECLAIM);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       /*
-        * We could return EAGAIN here to make reclaim rescan the inode tree in
-        * a short while. However, this just burns CPU time scanning the tree
-        * waiting for IO to complete and xfssyncd never goes back to the idle
-        * state. Instead, return 0 to let the next scheduled background reclaim
-        * attempt to reclaim the inode again.
-        */
-       return 0;
-
-reclaim:
-       xfs_ifunlock(ip);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       XFS_STATS_INC(xs_ig_reclaims);
-       /*
-        * Remove the inode from the per-AG radix tree.
-        *
-        * Because radix_tree_delete won't complain even if the item was never
-        * added to the tree assert that it's been there before to catch
-        * problems with the inode life time early on.
-        */
-       spin_lock(&pag->pag_ici_lock);
-       if (!radix_tree_delete(&pag->pag_ici_root,
-                               XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
-               ASSERT(0);
-       __xfs_inode_clear_reclaim(pag, ip);
-       spin_unlock(&pag->pag_ici_lock);
-
-       /*
-        * Here we do an (almost) spurious inode lock in order to coordinate
-        * with inode cache radix tree lookups.  This is because the lookup
-        * can reference the inodes in the cache without taking references.
-        *
-        * We make that OK here by ensuring that we wait until the inode is
-        * unlocked after the lookup before we go ahead and free it.  We get
-        * both the ilock and the iolock because the code may need to drop the
-        * ilock one but will still hold the iolock.
-        */
-       xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-       xfs_qm_dqdetach(ip);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
-       xfs_inode_free(ip);
-       return error;
-
-}
-
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- */
-int
-xfs_reclaim_inodes_ag(
-       struct xfs_mount        *mp,
-       int                     flags,
-       int                     *nr_to_scan)
-{
-       struct xfs_perag        *pag;
-       int                     error = 0;
-       int                     last_error = 0;
-       xfs_agnumber_t          ag;
-       int                     trylock = flags & SYNC_TRYLOCK;
-       int                     skipped;
-
-restart:
-       ag = 0;
-       skipped = 0;
-       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-               unsigned long   first_index = 0;
-               int             done = 0;
-               int             nr_found = 0;
-
-               ag = pag->pag_agno + 1;
-
-               if (trylock) {
-                       if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
-                               skipped++;
-                               xfs_perag_put(pag);
-                               continue;
-                       }
-                       first_index = pag->pag_ici_reclaim_cursor;
-               } else
-                       mutex_lock(&pag->pag_ici_reclaim_lock);
-
-               do {
-                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-                       int     i;
-
-                       rcu_read_lock();
-                       nr_found = radix_tree_gang_lookup_tag(
-                                       &pag->pag_ici_root,
-                                       (void **)batch, first_index,
-                                       XFS_LOOKUP_BATCH,
-                                       XFS_ICI_RECLAIM_TAG);
-                       if (!nr_found) {
-                               done = 1;
-                               rcu_read_unlock();
-                               break;
-                       }
-
-                       /*
-                        * Grab the inodes before we drop the lock. if we found
-                        * nothing, nr == 0 and the loop will be skipped.
-                        */
-                       for (i = 0; i < nr_found; i++) {
-                               struct xfs_inode *ip = batch[i];
-
-                               if (done || xfs_reclaim_inode_grab(ip, flags))
-                                       batch[i] = NULL;
-
-                               /*
-                                * Update the index for the next lookup. Catch
-                                * overflows into the next AG range which can
-                                * occur if we have inodes in the last block of
-                                * the AG and we are currently pointing to the
-                                * last inode.
-                                *
-                                * Because we may see inodes that are from the
-                                * wrong AG due to RCU freeing and
-                                * reallocation, only update the index if it
-                                * lies in this AG. It was a race that lead us
-                                * to see this inode, so another lookup from
-                                * the same index will not find it again.
-                                */
-                               if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
-                                                               pag->pag_agno)
-                                       continue;
-                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                                       done = 1;
-                       }
-
-                       /* unlock now we've grabbed the inodes. */
-                       rcu_read_unlock();
-
-                       for (i = 0; i < nr_found; i++) {
-                               if (!batch[i])
-                                       continue;
-                               error = xfs_reclaim_inode(batch[i], pag, flags);
-                               if (error && last_error != EFSCORRUPTED)
-                                       last_error = error;
-                       }
-
-                       *nr_to_scan -= XFS_LOOKUP_BATCH;
-
-                       cond_resched();
-
-               } while (nr_found && !done && *nr_to_scan > 0);
-
-               if (trylock && !done)
-                       pag->pag_ici_reclaim_cursor = first_index;
-               else
-                       pag->pag_ici_reclaim_cursor = 0;
-               mutex_unlock(&pag->pag_ici_reclaim_lock);
-               xfs_perag_put(pag);
-       }
-
-       /*
-        * if we skipped any AG, and we still have scan count remaining, do
-        * another pass this time using blocking reclaim semantics (i.e
-        * waiting on the reclaim locks and ignoring the reclaim cursors). This
-        * ensure that when we get more reclaimers than AGs we block rather
-        * than spin trying to execute reclaim.
-        */
-       if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
-               trylock = 0;
-               goto restart;
-       }
-       return XFS_ERROR(last_error);
-}
-
-int
-xfs_reclaim_inodes(
-       xfs_mount_t     *mp,
-       int             mode)
-{
-       int             nr_to_scan = INT_MAX;
-
-       return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
-}
-
-/*
- * Scan a certain number of inodes for reclaim.
- *
- * When called we make sure that there is a background (fast) inode reclaim in
- * progress, while we will throttle the speed of reclaim via doing synchronous
- * reclaim of inodes. That means if we come across dirty inodes, we wait for
- * them to be cleaned, which we hope will not be very long due to the
- * background walker having already kicked the IO off on those dirty inodes.
- */
-void
-xfs_reclaim_inodes_nr(
-       struct xfs_mount        *mp,
-       int                     nr_to_scan)
-{
-       /* kick background reclaimer and push the AIL */
-       xfs_syncd_queue_reclaim(mp);
-       xfs_ail_push_all(mp->m_ail);
-
-       xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
-}
-
-/*
- * Return the number of reclaimable inodes in the filesystem for
- * the shrinker to determine how much to reclaim.
- */
-int
-xfs_reclaim_inodes_count(
-       struct xfs_mount        *mp)
-{
-       struct xfs_perag        *pag;
-       xfs_agnumber_t          ag = 0;
-       int                     reclaimable = 0;
-
-       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-               ag = pag->pag_agno + 1;
-               reclaimable += pag->pag_ici_reclaimable;
-               xfs_perag_put(pag);
-       }
-       return reclaimable;
-}
-
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
deleted file mode 100644 (file)
index 941202e..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef XFS_SYNC_H
-#define XFS_SYNC_H 1
-
-struct xfs_mount;
-struct xfs_perag;
-
-#define SYNC_WAIT              0x0001  /* wait for i/o to complete */
-#define SYNC_TRYLOCK           0x0002  /* only try to lock inodes */
-
-extern struct workqueue_struct *xfs_syncd_wq;  /* sync workqueue */
-
-int xfs_syncd_init(struct xfs_mount *mp);
-void xfs_syncd_stop(struct xfs_mount *mp);
-
-int xfs_quiesce_data(struct xfs_mount *mp);
-void xfs_quiesce_attr(struct xfs_mount *mp);
-
-void xfs_flush_inodes(struct xfs_inode *ip);
-
-int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
-int xfs_reclaim_inodes_count(struct xfs_mount *mp);
-void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
-
-void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
-void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
-void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
-                               struct xfs_inode *ip);
-
-int xfs_sync_inode_grab(struct xfs_inode *ip);
-int xfs_inode_ag_iterator(struct xfs_mount *mp,
-       int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-       int flags);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
deleted file mode 100644 (file)
index ee2d2ad..0000000
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include "xfs_error.h"
-
-static struct ctl_table_header *xfs_table_header;
-
-#ifdef CONFIG_PROC_FS
-STATIC int
-xfs_stats_clear_proc_handler(
-       ctl_table       *ctl,
-       int             write,
-       void            __user *buffer,
-       size_t          *lenp,
-       loff_t          *ppos)
-{
-       int             c, ret, *valp = ctl->data;
-       __uint32_t      vn_active;
-
-       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-
-       if (!ret && write && *valp) {
-               xfs_notice(NULL, "Clearing xfsstats");
-               for_each_possible_cpu(c) {
-                       preempt_disable();
-                       /* save vn_active, it's a universal truth! */
-                       vn_active = per_cpu(xfsstats, c).vn_active;
-                       memset(&per_cpu(xfsstats, c), 0,
-                              sizeof(struct xfsstats));
-                       per_cpu(xfsstats, c).vn_active = vn_active;
-                       preempt_enable();
-               }
-               xfs_stats_clear = 0;
-       }
-
-       return ret;
-}
-
-STATIC int
-xfs_panic_mask_proc_handler(
-       ctl_table       *ctl,
-       int             write,
-       void            __user *buffer,
-       size_t          *lenp,
-       loff_t          *ppos)
-{
-       int             ret, *valp = ctl->data;
-
-       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-       if (!ret && write) {
-               xfs_panic_mask = *valp;
-#ifdef DEBUG
-               xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
-#endif
-       }
-       return ret;
-}
-#endif /* CONFIG_PROC_FS */
-
-static ctl_table xfs_table[] = {
-       {
-               .procname       = "irix_sgid_inherit",
-               .data           = &xfs_params.sgid_inherit.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.sgid_inherit.min,
-               .extra2         = &xfs_params.sgid_inherit.max
-       },
-       {
-               .procname       = "irix_symlink_mode",
-               .data           = &xfs_params.symlink_mode.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.symlink_mode.min,
-               .extra2         = &xfs_params.symlink_mode.max
-       },
-       {
-               .procname       = "panic_mask",
-               .data           = &xfs_params.panic_mask.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = xfs_panic_mask_proc_handler,
-               .extra1         = &xfs_params.panic_mask.min,
-               .extra2         = &xfs_params.panic_mask.max
-       },
-
-       {
-               .procname       = "error_level",
-               .data           = &xfs_params.error_level.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.error_level.min,
-               .extra2         = &xfs_params.error_level.max
-       },
-       {
-               .procname       = "xfssyncd_centisecs",
-               .data           = &xfs_params.syncd_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.syncd_timer.min,
-               .extra2         = &xfs_params.syncd_timer.max
-       },
-       {
-               .procname       = "inherit_sync",
-               .data           = &xfs_params.inherit_sync.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_sync.min,
-               .extra2         = &xfs_params.inherit_sync.max
-       },
-       {
-               .procname       = "inherit_nodump",
-               .data           = &xfs_params.inherit_nodump.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nodump.min,
-               .extra2         = &xfs_params.inherit_nodump.max
-       },
-       {
-               .procname       = "inherit_noatime",
-               .data           = &xfs_params.inherit_noatim.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_noatim.min,
-               .extra2         = &xfs_params.inherit_noatim.max
-       },
-       {
-               .procname       = "xfsbufd_centisecs",
-               .data           = &xfs_params.xfs_buf_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.xfs_buf_timer.min,
-               .extra2         = &xfs_params.xfs_buf_timer.max
-       },
-       {
-               .procname       = "age_buffer_centisecs",
-               .data           = &xfs_params.xfs_buf_age.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.xfs_buf_age.min,
-               .extra2         = &xfs_params.xfs_buf_age.max
-       },
-       {
-               .procname       = "inherit_nosymlinks",
-               .data           = &xfs_params.inherit_nosym.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nosym.min,
-               .extra2         = &xfs_params.inherit_nosym.max
-       },
-       {
-               .procname       = "rotorstep",
-               .data           = &xfs_params.rotorstep.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.rotorstep.min,
-               .extra2         = &xfs_params.rotorstep.max
-       },
-       {
-               .procname       = "inherit_nodefrag",
-               .data           = &xfs_params.inherit_nodfrg.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nodfrg.min,
-               .extra2         = &xfs_params.inherit_nodfrg.max
-       },
-       {
-               .procname       = "filestream_centisecs",
-               .data           = &xfs_params.fstrm_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.fstrm_timer.min,
-               .extra2         = &xfs_params.fstrm_timer.max,
-       },
-       /* please keep this the last entry */
-#ifdef CONFIG_PROC_FS
-       {
-               .procname       = "stats_clear",
-               .data           = &xfs_params.stats_clear.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = xfs_stats_clear_proc_handler,
-               .extra1         = &xfs_params.stats_clear.min,
-               .extra2         = &xfs_params.stats_clear.max
-       },
-#endif /* CONFIG_PROC_FS */
-
-       {}
-};
-
-static ctl_table xfs_dir_table[] = {
-       {
-               .procname       = "xfs",
-               .mode           = 0555,
-               .child          = xfs_table
-       },
-       {}
-};
-
-static ctl_table xfs_root_table[] = {
-       {
-               .procname       = "fs",
-               .mode           = 0555,
-               .child          = xfs_dir_table
-       },
-       {}
-};
-
-int
-xfs_sysctl_register(void)
-{
-       xfs_table_header = register_sysctl_table(xfs_root_table);
-       if (!xfs_table_header)
-               return -ENOMEM;
-       return 0;
-}
-
-void
-xfs_sysctl_unregister(void)
-{
-       unregister_sysctl_table(xfs_table_header);
-}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
deleted file mode 100644 (file)
index b9937d4..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SYSCTL_H__
-#define __XFS_SYSCTL_H__
-
-#include <linux/sysctl.h>
-
-/*
- * Tunable xfs parameters
- */
-
-typedef struct xfs_sysctl_val {
-       int min;
-       int val;
-       int max;
-} xfs_sysctl_val_t;
-
-typedef struct xfs_param {
-       xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID if process' GID is
-                                        * not a member of parent dir GID. */
-       xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
-       xfs_sysctl_val_t panic_mask;    /* bitmask to cause panic on errors. */
-       xfs_sysctl_val_t error_level;   /* Degree of reporting for problems  */
-       xfs_sysctl_val_t syncd_timer;   /* Interval between xfssyncd wakeups */
-       xfs_sysctl_val_t stats_clear;   /* Reset all XFS statistics to zero. */
-       xfs_sysctl_val_t inherit_sync;  /* Inherit the "sync" inode flag. */
-       xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
-       xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
-       xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
-       xfs_sysctl_val_t xfs_buf_age;   /* Metadata buffer age before flush. */
-       xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
-       xfs_sysctl_val_t rotorstep;     /* inode32 AG rotoring control knob */
-       xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
-       xfs_sysctl_val_t fstrm_timer;   /* Filestream dir-AG assoc'n timeout. */
-} xfs_param_t;
-
-/*
- * xfs_error_level:
- *
- * How much error reporting will be done when internal problems are
- * encountered.  These problems normally return an EFSCORRUPTED to their
- * caller, with no other information reported.
- *
- * 0   No error reports
- * 1   Report EFSCORRUPTED errors that will cause a filesystem shutdown
- * 5   Report all EFSCORRUPTED errors (all of the above errors, plus any
- *     additional errors that are known to not cause shutdowns)
- *
- * xfs_panic_mask bit 0x8 turns the error reports into panics
- */
-
-enum {
-       /* XFS_REFCACHE_SIZE = 1 */
-       /* XFS_REFCACHE_PURGE = 2 */
-       /* XFS_RESTRICT_CHOWN = 3 */
-       XFS_SGID_INHERIT = 4,
-       XFS_SYMLINK_MODE = 5,
-       XFS_PANIC_MASK = 6,
-       XFS_ERRLEVEL = 7,
-       XFS_SYNCD_TIMER = 8,
-       /* XFS_PROBE_DMAPI = 9 */
-       /* XFS_PROBE_IOOPS = 10 */
-       /* XFS_PROBE_QUOTA = 11 */
-       XFS_STATS_CLEAR = 12,
-       XFS_INHERIT_SYNC = 13,
-       XFS_INHERIT_NODUMP = 14,
-       XFS_INHERIT_NOATIME = 15,
-       XFS_BUF_TIMER = 16,
-       XFS_BUF_AGE = 17,
-       /* XFS_IO_BYPASS = 18 */
-       XFS_INHERIT_NOSYM = 19,
-       XFS_ROTORSTEP = 20,
-       XFS_INHERIT_NODFRG = 21,
-       XFS_FILESTREAM_TIMER = 22,
-};
-
-extern xfs_param_t     xfs_params;
-
-#ifdef CONFIG_SYSCTL
-extern int xfs_sysctl_register(void);
-extern void xfs_sysctl_unregister(void);
-#else
-# define xfs_sysctl_register()         (0)
-# define xfs_sysctl_unregister()       do { } while (0)
-#endif /* CONFIG_SYSCTL */
-
-#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
deleted file mode 100644 (file)
index 88d25d4..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_mount.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_log_priv.h"
-#include "xfs_buf_item.h"
-#include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
-#include "quota/xfs_dquot_item.h"
-#include "quota/xfs_dquot.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-
-/*
- * We include this last to have the helpers above available for the trace
- * event implementations.
- */
-#define CREATE_TRACE_POINTS
-#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
deleted file mode 100644 (file)
index 690fc7a..0000000
+++ /dev/null
@@ -1,1746 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM xfs
-
-#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_XFS_H
-
-#include <linux/tracepoint.h>
-
-struct xfs_agf;
-struct xfs_alloc_arg;
-struct xfs_attr_list_context;
-struct xfs_buf_log_item;
-struct xfs_da_args;
-struct xfs_da_node_entry;
-struct xfs_dquot;
-struct xlog_ticket;
-struct log;
-struct xlog_recover;
-struct xlog_recover_item;
-struct xfs_buf_log_format;
-struct xfs_inode_log_format;
-
-DECLARE_EVENT_CLASS(xfs_attr_list_class,
-       TP_PROTO(struct xfs_attr_list_context *ctx),
-       TP_ARGS(ctx),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(u32, hashval)
-               __field(u32, blkno)
-               __field(u32, offset)
-               __field(void *, alist)
-               __field(int, bufsize)
-               __field(int, count)
-               __field(int, firstu)
-               __field(int, dupcnt)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
-               __entry->ino = ctx->dp->i_ino;
-               __entry->hashval = ctx->cursor->hashval;
-               __entry->blkno = ctx->cursor->blkno;
-               __entry->offset = ctx->cursor->offset;
-               __entry->alist = ctx->alist;
-               __entry->bufsize = ctx->bufsize;
-               __entry->count = ctx->count;
-               __entry->firstu = ctx->firstu;
-               __entry->flags = ctx->flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-                 "alist 0x%p size %u count %u firstu %u flags %d %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                  __entry->ino,
-                  __entry->hashval,
-                  __entry->blkno,
-                  __entry->offset,
-                  __entry->dupcnt,
-                  __entry->alist,
-                  __entry->bufsize,
-                  __entry->count,
-                  __entry->firstu,
-                  __entry->flags,
-                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
-       )
-)
-
-#define DEFINE_ATTR_LIST_EVENT(name) \
-DEFINE_EVENT(xfs_attr_list_class, name, \
-       TP_PROTO(struct xfs_attr_list_context *ctx), \
-       TP_ARGS(ctx))
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
-
-DECLARE_EVENT_CLASS(xfs_perag_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
-                unsigned long caller_ip),
-       TP_ARGS(mp, agno, refcount, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(int, refcount)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->refcount = refcount;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->refcount,
-                 (char *)__entry->caller_ip)
-);
-
-#define DEFINE_PERAG_REF_EVENT(name)   \
-DEFINE_EVENT(xfs_perag_class, name,    \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,       \
-                unsigned long caller_ip),                                      \
-       TP_ARGS(mp, agno, refcount, caller_ip))
-DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
-DEFINE_PERAG_REF_EVENT(xfs_perag_put);
-DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
-DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
-
-TRACE_EVENT(xfs_attr_list_node_descend,
-       TP_PROTO(struct xfs_attr_list_context *ctx,
-                struct xfs_da_node_entry *btree),
-       TP_ARGS(ctx, btree),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(u32, hashval)
-               __field(u32, blkno)
-               __field(u32, offset)
-               __field(void *, alist)
-               __field(int, bufsize)
-               __field(int, count)
-               __field(int, firstu)
-               __field(int, dupcnt)
-               __field(int, flags)
-               __field(u32, bt_hashval)
-               __field(u32, bt_before)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
-               __entry->ino = ctx->dp->i_ino;
-               __entry->hashval = ctx->cursor->hashval;
-               __entry->blkno = ctx->cursor->blkno;
-               __entry->offset = ctx->cursor->offset;
-               __entry->alist = ctx->alist;
-               __entry->bufsize = ctx->bufsize;
-               __entry->count = ctx->count;
-               __entry->firstu = ctx->firstu;
-               __entry->flags = ctx->flags;
-               __entry->bt_hashval = be32_to_cpu(btree->hashval);
-               __entry->bt_before = be32_to_cpu(btree->before);
-       ),
-       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-                 "alist 0x%p size %u count %u firstu %u flags %d %s "
-                 "node hashval %u, node before %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                  __entry->ino,
-                  __entry->hashval,
-                  __entry->blkno,
-                  __entry->offset,
-                  __entry->dupcnt,
-                  __entry->alist,
-                  __entry->bufsize,
-                  __entry->count,
-                  __entry->firstu,
-                  __entry->flags,
-                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
-                  __entry->bt_hashval,
-                  __entry->bt_before)
-);
-
-TRACE_EVENT(xfs_iext_insert,
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
-                struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
-       TP_ARGS(ip, idx, r, state, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_extnum_t, idx)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-               __field(xfs_exntst_t, state)
-               __field(int, bmap_state)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->idx = idx;
-               __entry->startoff = r->br_startoff;
-               __entry->startblock = r->br_startblock;
-               __entry->blockcount = r->br_blockcount;
-               __entry->state = r->br_state;
-               __entry->bmap_state = state;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
-                 (long)__entry->idx,
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount,
-                 __entry->state,
-                 (char *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_bmap_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
-                unsigned long caller_ip),
-       TP_ARGS(ip, idx, state, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_extnum_t, idx)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-               __field(xfs_exntst_t, state)
-               __field(int, bmap_state)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               struct xfs_ifork        *ifp = (state & BMAP_ATTRFORK) ?
-                                               ip->i_afp : &ip->i_df;
-               struct xfs_bmbt_irec    r;
-
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->idx = idx;
-               __entry->startoff = r.br_startoff;
-               __entry->startblock = r.br_startblock;
-               __entry->blockcount = r.br_blockcount;
-               __entry->state = r.br_state;
-               __entry->bmap_state = state;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
-                 (long)__entry->idx,
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount,
-                 __entry->state,
-                 (char *)__entry->caller_ip)
-)
-
-#define DEFINE_BMAP_EVENT(name) \
-DEFINE_EVENT(xfs_bmap_class, name, \
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
-                unsigned long caller_ip), \
-       TP_ARGS(ip, idx, state, caller_ip))
-DEFINE_BMAP_EVENT(xfs_iext_remove);
-DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
-DEFINE_BMAP_EVENT(xfs_bmap_post_update);
-DEFINE_BMAP_EVENT(xfs_extlist);
-
-DECLARE_EVENT_CLASS(xfs_buf_class,
-       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
-       TP_ARGS(bp, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(unsigned, flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->flags = bp->b_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_EVENT(name) \
-DEFINE_EVENT(xfs_buf_class, name, \
-       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
-       TP_ARGS(bp, caller_ip))
-DEFINE_BUF_EVENT(xfs_buf_init);
-DEFINE_BUF_EVENT(xfs_buf_free);
-DEFINE_BUF_EVENT(xfs_buf_hold);
-DEFINE_BUF_EVENT(xfs_buf_rele);
-DEFINE_BUF_EVENT(xfs_buf_iodone);
-DEFINE_BUF_EVENT(xfs_buf_iorequest);
-DEFINE_BUF_EVENT(xfs_buf_bawrite);
-DEFINE_BUF_EVENT(xfs_buf_bdwrite);
-DEFINE_BUF_EVENT(xfs_buf_lock);
-DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_trylock);
-DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_iowait);
-DEFINE_BUF_EVENT(xfs_buf_iowait_done);
-DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_uncached);
-DEFINE_BUF_EVENT(xfs_bdstrat_shut);
-DEFINE_BUF_EVENT(xfs_buf_item_relse);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
-DEFINE_BUF_EVENT(xfs_buf_error_relse);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
-
-/* not really buffer traces, but the buf provides useful information */
-DEFINE_BUF_EVENT(xfs_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_reset_dqcounts);
-DEFINE_BUF_EVENT(xfs_inode_item_push);
-
-/* pass flags explicitly */
-DECLARE_EVENT_CLASS(xfs_buf_flags_class,
-       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
-       TP_ARGS(bp, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(unsigned, flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->flags = flags;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_FLAGS_EVENT(name) \
-DEFINE_EVENT(xfs_buf_flags_class, name, \
-       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
-       TP_ARGS(bp, flags, caller_ip))
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
-
-TRACE_EVENT(xfs_buf_ioerror,
-       TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
-       TP_ARGS(bp, error, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(unsigned, flags)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(int, error)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->error = error;
-               __entry->flags = bp->b_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d error %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __entry->error,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_buf_item_class,
-       TP_PROTO(struct xfs_buf_log_item *bip),
-       TP_ARGS(bip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, buf_bno)
-               __field(size_t, buf_len)
-               __field(int, buf_hold)
-               __field(int, buf_pincount)
-               __field(int, buf_lockval)
-               __field(unsigned, buf_flags)
-               __field(unsigned, bli_recur)
-               __field(int, bli_refcount)
-               __field(unsigned, bli_flags)
-               __field(void *, li_desc)
-               __field(unsigned, li_flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = bip->bli_buf->b_target->bt_dev;
-               __entry->bli_flags = bip->bli_flags;
-               __entry->bli_recur = bip->bli_recur;
-               __entry->bli_refcount = atomic_read(&bip->bli_refcount);
-               __entry->buf_bno = bip->bli_buf->b_bn;
-               __entry->buf_len = bip->bli_buf->b_buffer_length;
-               __entry->buf_flags = bip->bli_buf->b_flags;
-               __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
-               __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
-               __entry->buf_lockval = bip->bli_buf->b_sema.count;
-               __entry->li_desc = bip->bli_item.li_desc;
-               __entry->li_flags = bip->bli_item.li_flags;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s recur %d refcount %d bliflags %s "
-                 "lidesc 0x%p liflags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->buf_bno,
-                 __entry->buf_len,
-                 __entry->buf_hold,
-                 __entry->buf_pincount,
-                 __entry->buf_lockval,
-                 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
-                 __entry->bli_recur,
-                 __entry->bli_refcount,
-                 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
-                 __entry->li_desc,
-                 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
-)
-
-#define DEFINE_BUF_ITEM_EVENT(name) \
-DEFINE_EVENT(xfs_buf_item_class, name, \
-       TP_PROTO(struct xfs_buf_log_item *bip), \
-       TP_ARGS(bip))
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
-
-DECLARE_EVENT_CLASS(xfs_lock_class,
-       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
-                unsigned long caller_ip),
-       TP_ARGS(ip,  lock_flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, lock_flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->lock_flags = lock_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_LOCK_EVENT(name) \
-DEFINE_EVENT(xfs_lock_class, name, \
-       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
-                unsigned long caller_ip), \
-       TP_ARGS(ip,  lock_flags, caller_ip))
-DEFINE_LOCK_EVENT(xfs_ilock);
-DEFINE_LOCK_EVENT(xfs_ilock_nowait);
-DEFINE_LOCK_EVENT(xfs_ilock_demote);
-DEFINE_LOCK_EVENT(xfs_iunlock);
-
-DECLARE_EVENT_CLASS(xfs_inode_class,
-       TP_PROTO(struct xfs_inode *ip),
-       TP_ARGS(ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino)
-)
-
-#define DEFINE_INODE_EVENT(name) \
-DEFINE_EVENT(xfs_inode_class, name, \
-       TP_PROTO(struct xfs_inode *ip), \
-       TP_ARGS(ip))
-DEFINE_INODE_EVENT(xfs_iget_skip);
-DEFINE_INODE_EVENT(xfs_iget_reclaim);
-DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
-DEFINE_INODE_EVENT(xfs_iget_hit);
-DEFINE_INODE_EVENT(xfs_iget_miss);
-
-DEFINE_INODE_EVENT(xfs_getattr);
-DEFINE_INODE_EVENT(xfs_setattr);
-DEFINE_INODE_EVENT(xfs_readlink);
-DEFINE_INODE_EVENT(xfs_alloc_file_space);
-DEFINE_INODE_EVENT(xfs_free_file_space);
-DEFINE_INODE_EVENT(xfs_readdir);
-#ifdef CONFIG_XFS_POSIX_ACL
-DEFINE_INODE_EVENT(xfs_get_acl);
-#endif
-DEFINE_INODE_EVENT(xfs_vm_bmap);
-DEFINE_INODE_EVENT(xfs_file_ioctl);
-DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
-DEFINE_INODE_EVENT(xfs_ioctl_setattr);
-DEFINE_INODE_EVENT(xfs_file_fsync);
-DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_write_inode);
-DEFINE_INODE_EVENT(xfs_evict_inode);
-
-DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
-DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
-
-DECLARE_EVENT_CLASS(xfs_iref_class,
-       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
-       TP_ARGS(ip, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, count)
-               __field(int, pincount)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->count = atomic_read(&VFS_I(ip)->i_count);
-               __entry->pincount = atomic_read(&ip->i_pincount);
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->count,
-                 __entry->pincount,
-                 (char *)__entry->caller_ip)
-)
-
-#define DEFINE_IREF_EVENT(name) \
-DEFINE_EVENT(xfs_iref_class, name, \
-       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
-       TP_ARGS(ip, caller_ip))
-DEFINE_IREF_EVENT(xfs_ihold);
-DEFINE_IREF_EVENT(xfs_irele);
-DEFINE_IREF_EVENT(xfs_inode_pin);
-DEFINE_IREF_EVENT(xfs_inode_unpin);
-DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
-
-DECLARE_EVENT_CLASS(xfs_namespace_class,
-       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
-       TP_ARGS(dp, name),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, dp_ino)
-               __dynamic_array(char, name, name->len)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(dp)->i_sb->s_dev;
-               __entry->dp_ino = dp->i_ino;
-               memcpy(__get_str(name), name->name, name->len);
-       ),
-       TP_printk("dev %d:%d dp ino 0x%llx name %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->dp_ino,
-                 __get_str(name))
-)
-
-#define DEFINE_NAMESPACE_EVENT(name) \
-DEFINE_EVENT(xfs_namespace_class, name, \
-       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
-       TP_ARGS(dp, name))
-DEFINE_NAMESPACE_EVENT(xfs_remove);
-DEFINE_NAMESPACE_EVENT(xfs_link);
-DEFINE_NAMESPACE_EVENT(xfs_lookup);
-DEFINE_NAMESPACE_EVENT(xfs_create);
-DEFINE_NAMESPACE_EVENT(xfs_symlink);
-
-TRACE_EVENT(xfs_rename,
-       TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
-                struct xfs_name *src_name, struct xfs_name *target_name),
-       TP_ARGS(src_dp, target_dp, src_name, target_name),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, src_dp_ino)
-               __field(xfs_ino_t, target_dp_ino)
-               __dynamic_array(char, src_name, src_name->len)
-               __dynamic_array(char, target_name, target_name->len)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
-               __entry->src_dp_ino = src_dp->i_ino;
-               __entry->target_dp_ino = target_dp->i_ino;
-               memcpy(__get_str(src_name), src_name->name, src_name->len);
-               memcpy(__get_str(target_name), target_name->name, target_name->len);
-       ),
-       TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
-                 " src name %s target name %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->src_dp_ino,
-                 __entry->target_dp_ino,
-                 __get_str(src_name),
-                 __get_str(target_name))
-)
-
-DECLARE_EVENT_CLASS(xfs_dquot_class,
-       TP_PROTO(struct xfs_dquot *dqp),
-       TP_ARGS(dqp),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(u32, id)
-               __field(unsigned, flags)
-               __field(unsigned, nrefs)
-               __field(unsigned long long, res_bcount)
-               __field(unsigned long long, bcount)
-               __field(unsigned long long, icount)
-               __field(unsigned long long, blk_hardlimit)
-               __field(unsigned long long, blk_softlimit)
-               __field(unsigned long long, ino_hardlimit)
-               __field(unsigned long long, ino_softlimit)
-       ), \
-       TP_fast_assign(
-               __entry->dev = dqp->q_mount->m_super->s_dev;
-               __entry->id = be32_to_cpu(dqp->q_core.d_id);
-               __entry->flags = dqp->dq_flags;
-               __entry->nrefs = dqp->q_nrefs;
-               __entry->res_bcount = dqp->q_res_bcount;
-               __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
-               __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
-               __entry->blk_hardlimit =
-                       be64_to_cpu(dqp->q_core.d_blk_hardlimit);
-               __entry->blk_softlimit =
-                       be64_to_cpu(dqp->q_core.d_blk_softlimit);
-               __entry->ino_hardlimit =
-                       be64_to_cpu(dqp->q_core.d_ino_hardlimit);
-               __entry->ino_softlimit =
-                       be64_to_cpu(dqp->q_core.d_ino_softlimit);
-       ),
-       TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
-                 "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
-                 "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->id,
-                 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
-                 __entry->nrefs,
-                 __entry->res_bcount,
-                 __entry->bcount,
-                 __entry->blk_hardlimit,
-                 __entry->blk_softlimit,
-                 __entry->icount,
-                 __entry->ino_hardlimit,
-                 __entry->ino_softlimit)
-)
-
-#define DEFINE_DQUOT_EVENT(name) \
-DEFINE_EVENT(xfs_dquot_class, name, \
-       TP_PROTO(struct xfs_dquot *dqp), \
-       TP_ARGS(dqp))
-DEFINE_DQUOT_EVENT(xfs_dqadjust);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
-DEFINE_DQUOT_EVENT(xfs_dqattach_found);
-DEFINE_DQUOT_EVENT(xfs_dqattach_get);
-DEFINE_DQUOT_EVENT(xfs_dqinit);
-DEFINE_DQUOT_EVENT(xfs_dqreuse);
-DEFINE_DQUOT_EVENT(xfs_dqalloc);
-DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
-DEFINE_DQUOT_EVENT(xfs_dqread);
-DEFINE_DQUOT_EVENT(xfs_dqread_fail);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
-DEFINE_DQUOT_EVENT(xfs_dqget_hit);
-DEFINE_DQUOT_EVENT(xfs_dqget_miss);
-DEFINE_DQUOT_EVENT(xfs_dqput);
-DEFINE_DQUOT_EVENT(xfs_dqput_wait);
-DEFINE_DQUOT_EVENT(xfs_dqput_free);
-DEFINE_DQUOT_EVENT(xfs_dqrele);
-DEFINE_DQUOT_EVENT(xfs_dqflush);
-DEFINE_DQUOT_EVENT(xfs_dqflush_force);
-DEFINE_DQUOT_EVENT(xfs_dqflush_done);
-
-DECLARE_EVENT_CLASS(xfs_loggrant_class,
-       TP_PROTO(struct log *log, struct xlog_ticket *tic),
-       TP_ARGS(log, tic),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(unsigned, trans_type)
-               __field(char, ocnt)
-               __field(char, cnt)
-               __field(int, curr_res)
-               __field(int, unit_res)
-               __field(unsigned int, flags)
-               __field(int, reserveq)
-               __field(int, writeq)
-               __field(int, grant_reserve_cycle)
-               __field(int, grant_reserve_bytes)
-               __field(int, grant_write_cycle)
-               __field(int, grant_write_bytes)
-               __field(int, curr_cycle)
-               __field(int, curr_block)
-               __field(xfs_lsn_t, tail_lsn)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->trans_type = tic->t_trans_type;
-               __entry->ocnt = tic->t_ocnt;
-               __entry->cnt = tic->t_cnt;
-               __entry->curr_res = tic->t_curr_res;
-               __entry->unit_res = tic->t_unit_res;
-               __entry->flags = tic->t_flags;
-               __entry->reserveq = list_empty(&log->l_reserveq);
-               __entry->writeq = list_empty(&log->l_writeq);
-               xlog_crack_grant_head(&log->l_grant_reserve_head,
-                               &__entry->grant_reserve_cycle,
-                               &__entry->grant_reserve_bytes);
-               xlog_crack_grant_head(&log->l_grant_write_head,
-                               &__entry->grant_write_cycle,
-                               &__entry->grant_write_bytes);
-               __entry->curr_cycle = log->l_curr_cycle;
-               __entry->curr_block = log->l_curr_block;
-               __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
-       ),
-       TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
-                 "t_unit_res %u t_flags %s reserveq %s "
-                 "writeq %s grant_reserve_cycle %d "
-                 "grant_reserve_bytes %d grant_write_cycle %d "
-                 "grant_write_bytes %d curr_cycle %d curr_block %d "
-                 "tail_cycle %d tail_block %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
-                 __entry->ocnt,
-                 __entry->cnt,
-                 __entry->curr_res,
-                 __entry->unit_res,
-                 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
-                 __entry->reserveq ? "empty" : "active",
-                 __entry->writeq ? "empty" : "active",
-                 __entry->grant_reserve_cycle,
-                 __entry->grant_reserve_bytes,
-                 __entry->grant_write_cycle,
-                 __entry->grant_write_bytes,
-                 __entry->curr_cycle,
-                 __entry->curr_block,
-                 CYCLE_LSN(__entry->tail_lsn),
-                 BLOCK_LSN(__entry->tail_lsn)
-       )
-)
-
-#define DEFINE_LOGGRANT_EVENT(name) \
-DEFINE_EVENT(xfs_loggrant_class, name, \
-       TP_PROTO(struct log *log, struct xlog_ticket *tic), \
-       TP_ARGS(log, tic))
-DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
-DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
-DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
-DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
-
-DECLARE_EVENT_CLASS(xfs_file_class,
-       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
-       TP_ARGS(ip, count, offset, flags),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fsize_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-               __entry->flags = flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count 0x%zx ioflags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count,
-                 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
-)
-
-#define DEFINE_RW_EVENT(name)          \
-DEFINE_EVENT(xfs_file_class, name,     \
-       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
-       TP_ARGS(ip, count, offset, flags))
-DEFINE_RW_EVENT(xfs_file_read);
-DEFINE_RW_EVENT(xfs_file_buffered_write);
-DEFINE_RW_EVENT(xfs_file_direct_write);
-DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
-
-DECLARE_EVENT_CLASS(xfs_page_class,
-       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
-       TP_ARGS(inode, page, off),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(pgoff_t, pgoff)
-               __field(loff_t, size)
-               __field(unsigned long, offset)
-               __field(int, delalloc)
-               __field(int, unwritten)
-       ),
-       TP_fast_assign(
-               int delalloc = -1, unwritten = -1;
-
-               if (page_has_buffers(page))
-                       xfs_count_page_state(page, &delalloc, &unwritten);
-               __entry->dev = inode->i_sb->s_dev;
-               __entry->ino = XFS_I(inode)->i_ino;
-               __entry->pgoff = page_offset(page);
-               __entry->size = i_size_read(inode);
-               __entry->offset = off;
-               __entry->delalloc = delalloc;
-               __entry->unwritten = unwritten;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
-                 "delalloc %d unwritten %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->pgoff,
-                 __entry->size,
-                 __entry->offset,
-                 __entry->delalloc,
-                 __entry->unwritten)
-)
-
-#define DEFINE_PAGE_EVENT(name)                \
-DEFINE_EVENT(xfs_page_class, name,     \
-       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),    \
-       TP_ARGS(inode, page, off))
-DEFINE_PAGE_EVENT(xfs_writepage);
-DEFINE_PAGE_EVENT(xfs_releasepage);
-DEFINE_PAGE_EVENT(xfs_invalidatepage);
-
-DECLARE_EVENT_CLASS(xfs_imap_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
-                int type, struct xfs_bmbt_irec *irec),
-       TP_ARGS(ip, offset, count, type, irec),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(loff_t, size)
-               __field(loff_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-               __field(int, type)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-               __entry->type = type;
-               __entry->startoff = irec ? irec->br_startoff : 0;
-               __entry->startblock = irec ? irec->br_startblock : 0;
-               __entry->blockcount = irec ? irec->br_blockcount : 0;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count %zd type %s "
-                 "startoff 0x%llx startblock %lld blockcount 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count,
-                 __print_symbolic(__entry->type, XFS_IO_TYPES),
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount)
-)
-
-#define DEFINE_IOMAP_EVENT(name)       \
-DEFINE_EVENT(xfs_imap_class, name,     \
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
-                int type, struct xfs_bmbt_irec *irec),         \
-       TP_ARGS(ip, offset, count, type, irec))
-DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-
-DECLARE_EVENT_CLASS(xfs_simple_io_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
-       TP_ARGS(ip, offset, count),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(loff_t, isize)
-               __field(loff_t, disize)
-               __field(loff_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->isize = ip->i_size;
-               __entry->disize = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count %zd",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->isize,
-                 __entry->disize,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count)
-);
-
-#define DEFINE_SIMPLE_IO_EVENT(name)   \
-DEFINE_EVENT(xfs_simple_io_class, name,        \
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),        \
-       TP_ARGS(ip, offset, count))
-DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
-DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
-DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
-DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
-
-DECLARE_EVENT_CLASS(xfs_itrunc_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
-       TP_ARGS(ip, new_size),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fsize_t, new_size)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = new_size;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size)
-)
-
-#define DEFINE_ITRUNC_EVENT(name) \
-DEFINE_EVENT(xfs_itrunc_class, name, \
-       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
-       TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
-
-TRACE_EVENT(xfs_pagecache_inval,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
-       TP_ARGS(ip, start, finish),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_off_t, start)
-               __field(xfs_off_t, finish)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->start = start;
-               __entry->finish = finish;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->start,
-                 __entry->finish)
-);
-
-TRACE_EVENT(xfs_bunmap,
-       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
-                int flags, unsigned long caller_ip),
-       TP_ARGS(ip, bno, len, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fileoff_t, bno)
-               __field(xfs_filblks_t, len)
-               __field(unsigned long, caller_ip)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->bno = bno;
-               __entry->len = len;
-               __entry->caller_ip = caller_ip;
-               __entry->flags = flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-                 "flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->bno,
-                 __entry->len,
-                 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
-                 (void *)__entry->caller_ip)
-
-);
-
-DECLARE_EVENT_CLASS(xfs_busy_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len),
-       TP_ARGS(mp, agno, agbno, len),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len)
-);
-#define DEFINE_BUSY_EVENT(name) \
-DEFINE_EVENT(xfs_busy_class, name, \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
-                xfs_agblock_t agbno, xfs_extlen_t len), \
-       TP_ARGS(mp, agno, agbno, len))
-DEFINE_BUSY_EVENT(xfs_alloc_busy);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
-
-TRACE_EVENT(xfs_alloc_busy_trim,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len,
-                xfs_agblock_t tbno, xfs_extlen_t tlen),
-       TP_ARGS(mp, agno, agbno, len, tbno, tlen),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-               __field(xfs_agblock_t, tbno)
-               __field(xfs_extlen_t, tlen)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-               __entry->tbno = tbno;
-               __entry->tlen = tlen;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len,
-                 __entry->tbno,
-                 __entry->tlen)
-);
-
-TRACE_EVENT(xfs_trans_commit_lsn,
-       TP_PROTO(struct xfs_trans *trans),
-       TP_ARGS(trans),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(struct xfs_trans *, tp)
-               __field(xfs_lsn_t, lsn)
-       ),
-       TP_fast_assign(
-               __entry->dev = trans->t_mountp->m_super->s_dev;
-               __entry->tp = trans;
-               __entry->lsn = trans->t_commit_lsn;
-       ),
-       TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->tp,
-                 __entry->lsn)
-);
-
-TRACE_EVENT(xfs_agf,
-       TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
-                unsigned long caller_ip),
-       TP_ARGS(mp, agf, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(int, flags)
-               __field(__u32, length)
-               __field(__u32, bno_root)
-               __field(__u32, cnt_root)
-               __field(__u32, bno_level)
-               __field(__u32, cnt_level)
-               __field(__u32, flfirst)
-               __field(__u32, fllast)
-               __field(__u32, flcount)
-               __field(__u32, freeblks)
-               __field(__u32, longest)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = be32_to_cpu(agf->agf_seqno),
-               __entry->flags = flags;
-               __entry->length = be32_to_cpu(agf->agf_length),
-               __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
-               __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
-               __entry->bno_level =
-                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
-               __entry->cnt_level =
-                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
-               __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
-               __entry->fllast = be32_to_cpu(agf->agf_fllast),
-               __entry->flcount = be32_to_cpu(agf->agf_flcount),
-               __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
-               __entry->longest = be32_to_cpu(agf->agf_longest);
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
-                 "levels b %u c %u flfirst %u fllast %u flcount %u "
-                 "freeblks %u longest %u caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
-                 __entry->length,
-                 __entry->bno_root,
-                 __entry->cnt_root,
-                 __entry->bno_level,
-                 __entry->cnt_level,
-                 __entry->flfirst,
-                 __entry->fllast,
-                 __entry->flcount,
-                 __entry->freeblks,
-                 __entry->longest,
-                 (void *)__entry->caller_ip)
-);
-
-TRACE_EVENT(xfs_free_extent,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
-                xfs_extlen_t len, bool isfl, int haveleft, int haveright),
-       TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-               __field(int, isfl)
-               __field(int, haveleft)
-               __field(int, haveright)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-               __entry->isfl = isfl;
-               __entry->haveleft = haveleft;
-               __entry->haveright = haveright;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len,
-                 __entry->isfl,
-                 __entry->haveleft ?
-                       (__entry->haveright ? "both" : "left") :
-                       (__entry->haveright ? "right" : "none"))
-
-);
-
-DECLARE_EVENT_CLASS(xfs_alloc_class,
-       TP_PROTO(struct xfs_alloc_arg *args),
-       TP_ARGS(args),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, minlen)
-               __field(xfs_extlen_t, maxlen)
-               __field(xfs_extlen_t, mod)
-               __field(xfs_extlen_t, prod)
-               __field(xfs_extlen_t, minleft)
-               __field(xfs_extlen_t, total)
-               __field(xfs_extlen_t, alignment)
-               __field(xfs_extlen_t, minalignslop)
-               __field(xfs_extlen_t, len)
-               __field(short, type)
-               __field(short, otype)
-               __field(char, wasdel)
-               __field(char, wasfromfl)
-               __field(char, isfl)
-               __field(char, userdata)
-               __field(xfs_fsblock_t, firstblock)
-       ),
-       TP_fast_assign(
-               __entry->dev = args->mp->m_super->s_dev;
-               __entry->agno = args->agno;
-               __entry->agbno = args->agbno;
-               __entry->minlen = args->minlen;
-               __entry->maxlen = args->maxlen;
-               __entry->mod = args->mod;
-               __entry->prod = args->prod;
-               __entry->minleft = args->minleft;
-               __entry->total = args->total;
-               __entry->alignment = args->alignment;
-               __entry->minalignslop = args->minalignslop;
-               __entry->len = args->len;
-               __entry->type = args->type;
-               __entry->otype = args->otype;
-               __entry->wasdel = args->wasdel;
-               __entry->wasfromfl = args->wasfromfl;
-               __entry->isfl = args->isfl;
-               __entry->userdata = args->userdata;
-               __entry->firstblock = args->firstblock;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
-                 "prod %u minleft %u total %u alignment %u minalignslop %u "
-                 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
-                 "userdata %d firstblock 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->minlen,
-                 __entry->maxlen,
-                 __entry->mod,
-                 __entry->prod,
-                 __entry->minleft,
-                 __entry->total,
-                 __entry->alignment,
-                 __entry->minalignslop,
-                 __entry->len,
-                 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
-                 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
-                 __entry->wasdel,
-                 __entry->wasfromfl,
-                 __entry->isfl,
-                 __entry->userdata,
-                 (unsigned long long)__entry->firstblock)
-)
-
-#define DEFINE_ALLOC_EVENT(name) \
-DEFINE_EVENT(xfs_alloc_class, name, \
-       TP_PROTO(struct xfs_alloc_arg *args), \
-       TP_ARGS(args))
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
-
-DECLARE_EVENT_CLASS(xfs_dir2_class,
-       TP_PROTO(struct xfs_da_args *args),
-       TP_ARGS(args),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __dynamic_array(char, name, args->namelen)
-               __field(int, namelen)
-               __field(xfs_dahash_t, hashval)
-               __field(xfs_ino_t, inumber)
-               __field(int, op_flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               if (args->namelen)
-                       memcpy(__get_str(name), args->name, args->namelen);
-               __entry->namelen = args->namelen;
-               __entry->hashval = args->hashval;
-               __entry->inumber = args->inumber;
-               __entry->op_flags = args->op_flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
-                 "inumber 0x%llx op_flags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->namelen,
-                 __entry->namelen ? __get_str(name) : NULL,
-                 __entry->namelen,
-                 __entry->hashval,
-                 __entry->inumber,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
-)
-
-#define DEFINE_DIR2_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_class, name, \
-       TP_PROTO(struct xfs_da_args *args), \
-       TP_ARGS(args))
-DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
-DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
-
-DECLARE_EVENT_CLASS(xfs_dir2_space_class,
-       TP_PROTO(struct xfs_da_args *args, int idx),
-       TP_ARGS(args, idx),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, op_flags)
-               __field(int, idx)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               __entry->op_flags = args->op_flags;
-               __entry->idx = idx;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
-                 __entry->idx)
-)
-
-#define DEFINE_DIR2_SPACE_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_space_class, name, \
-       TP_PROTO(struct xfs_da_args *args, int idx), \
-       TP_ARGS(args, idx))
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
-
-TRACE_EVENT(xfs_dir2_leafn_moveents,
-       TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
-       TP_ARGS(args, src_idx, dst_idx, count),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, op_flags)
-               __field(int, src_idx)
-               __field(int, dst_idx)
-               __field(int, count)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               __entry->op_flags = args->op_flags;
-               __entry->src_idx = src_idx;
-               __entry->dst_idx = dst_idx;
-               __entry->count = count;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx op_flags %s "
-                 "src_idx %d dst_idx %d count %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
-                 __entry->src_idx,
-                 __entry->dst_idx,
-                 __entry->count)
-);
-
-#define XFS_SWAPEXT_INODES \
-       { 0,    "target" }, \
-       { 1,    "temp" }
-
-#define XFS_INODE_FORMAT_STR \
-       { 0,    "invalid" }, \
-       { 1,    "local" }, \
-       { 2,    "extent" }, \
-       { 3,    "btree" }
-
-DECLARE_EVENT_CLASS(xfs_swap_extent_class,
-       TP_PROTO(struct xfs_inode *ip, int which),
-       TP_ARGS(ip, which),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(int, which)
-               __field(xfs_ino_t, ino)
-               __field(int, format)
-               __field(int, nex)
-               __field(int, max_nex)
-               __field(int, broot_size)
-               __field(int, fork_off)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->which = which;
-               __entry->ino = ip->i_ino;
-               __entry->format = ip->i_d.di_format;
-               __entry->nex = ip->i_d.di_nextents;
-               __entry->max_nex = ip->i_df.if_ext_max;
-               __entry->broot_size = ip->i_df.if_broot_bytes;
-               __entry->fork_off = XFS_IFORK_BOFF(ip);
-       ),
-       TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
-                 "Max in-fork extents %d, broot size %d, fork offset %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
-                 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
-                 __entry->nex,
-                 __entry->max_nex,
-                 __entry->broot_size,
-                 __entry->fork_off)
-)
-
-#define DEFINE_SWAPEXT_EVENT(name) \
-DEFINE_EVENT(xfs_swap_extent_class, name, \
-       TP_PROTO(struct xfs_inode *ip, int which), \
-       TP_ARGS(ip, which))
-
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
-       TP_PROTO(struct log *log, struct xlog_recover *trans,
-               struct xlog_recover_item *item, int pass),
-       TP_ARGS(log, trans, item, pass),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(unsigned long, item)
-               __field(xlog_tid_t, tid)
-               __field(int, type)
-               __field(int, pass)
-               __field(int, count)
-               __field(int, total)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->item = (unsigned long)item;
-               __entry->tid = trans->r_log_tid;
-               __entry->type = ITEM_TYPE(item);
-               __entry->pass = pass;
-               __entry->count = item->ri_cnt;
-               __entry->total = item->ri_total;
-       ),
-       TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
-                 "item region count/total %d/%d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->tid,
-                 __entry->pass,
-                 (void *)__entry->item,
-                 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
-                 __entry->count,
-                 __entry->total)
-)
-
-#define DEFINE_LOG_RECOVER_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_item_class, name, \
-       TP_PROTO(struct log *log, struct xlog_recover *trans, \
-               struct xlog_recover_item *item, int pass), \
-       TP_ARGS(log, trans, item, pass))
-
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
-       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
-       TP_ARGS(log, buf_f),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(__int64_t, blkno)
-               __field(unsigned short, len)
-               __field(unsigned short, flags)
-               __field(unsigned short, size)
-               __field(unsigned int, map_size)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->blkno = buf_f->blf_blkno;
-               __entry->len = buf_f->blf_len;
-               __entry->flags = buf_f->blf_flags;
-               __entry->size = buf_f->blf_size;
-               __entry->map_size = buf_f->blf_map_size;
-       ),
-       TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
-                       "map_size %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->blkno,
-                 __entry->len,
-                 __entry->flags,
-                 __entry->size,
-                 __entry->map_size)
-)
-
-#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
-       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
-       TP_ARGS(log, buf_f))
-
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
-       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
-       TP_ARGS(log, in_f),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(unsigned short, size)
-               __field(int, fields)
-               __field(unsigned short, asize)
-               __field(unsigned short, dsize)
-               __field(__int64_t, blkno)
-               __field(int, len)
-               __field(int, boffset)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->ino = in_f->ilf_ino;
-               __entry->size = in_f->ilf_size;
-               __entry->fields = in_f->ilf_fields;
-               __entry->asize = in_f->ilf_asize;
-               __entry->dsize = in_f->ilf_dsize;
-               __entry->blkno = in_f->ilf_blkno;
-               __entry->len = in_f->ilf_len;
-               __entry->boffset = in_f->ilf_boffset;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
-                       "dsize %d, blkno 0x%llx, len %d, boffset %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->fields,
-                 __entry->asize,
-                 __entry->dsize,
-                 __entry->blkno,
-                 __entry->len,
-                 __entry->boffset)
-)
-#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
-       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
-       TP_ARGS(log, in_f))
-
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
-
-DECLARE_EVENT_CLASS(xfs_discard_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len),
-       TP_ARGS(mp, agno, agbno, len),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u\n",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len)
-)
-
-#define DEFINE_DISCARD_EVENT(name) \
-DEFINE_EVENT(xfs_discard_class, name, \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
-                xfs_agblock_t agbno, xfs_extlen_t len), \
-       TP_ARGS(mp, agno, agbno, len))
-DEFINE_DISCARD_EVENT(xfs_discard_extent);
-DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
-DEFINE_DISCARD_EVENT(xfs_discard_exclude);
-DEFINE_DISCARD_EVENT(xfs_discard_busy);
-
-#endif /* _TRACE_XFS_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE xfs_trace
-#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
deleted file mode 100644 (file)
index 7c220b4..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-#include "xfs_fs.h"
-
-struct file;
-struct xfs_inode;
-struct xfs_iomap;
-struct attrlist_cursor_kern;
-
-/*
- * Return values for xfs_inactive.  A return value of
- * VN_INACTIVE_NOCACHE implies that the file system behavior
- * has disassociated its state and bhv_desc_t from the vnode.
- */
-#define        VN_INACTIVE_CACHE       0
-#define        VN_INACTIVE_NOCACHE     1
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISDIRECT    0x00004         /* bypass page cache */
-#define IO_INVIS       0x00020         /* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
-       { IO_ISDIRECT,  "DIRECT" }, \
-       { IO_INVIS,     "INVIS"}
-
-/*
- * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
- */
-#define FI_NONE                        0       /* none */
-#define FI_REMAPF              1       /* Do a remapf prior to the operation */
-#define FI_REMAPF_LOCKED       2       /* Do a remapf prior to the operation.
-                                          Prevent VM access to the pages until
-                                          the operation completes. */
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp)  mapping_mapped(vp->i_mapping)
-#define VN_CACHED(vp)  (vp->i_mapping->nrpages)
-#define VN_DIRTY(vp)   mapping_tagged(vp->i_mapping, \
-                                       PAGECACHE_TAG_DIRTY)
-
-
-#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
deleted file mode 100644 (file)
index 87d3e03..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (C) 2008 Christoph Hellwig.
- * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
-
-
-static int
-xfs_xattr_get(struct dentry *dentry, const char *name,
-               void *value, size_t size, int xflags)
-{
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
-       int error, asize = size;
-
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-
-       /* Convert Linux syscall to XFS internal ATTR flags */
-       if (!size) {
-               xflags |= ATTR_KERNOVAL;
-               value = NULL;
-       }
-
-       error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
-       if (error)
-               return error;
-       return asize;
-}
-
-static int
-xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
-               size_t size, int flags, int xflags)
-{
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
-
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-
-       /* Convert Linux syscall to XFS internal ATTR flags */
-       if (flags & XATTR_CREATE)
-               xflags |= ATTR_CREATE;
-       if (flags & XATTR_REPLACE)
-               xflags |= ATTR_REPLACE;
-
-       if (!value)
-               return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
-       return -xfs_attr_set(ip, (unsigned char *)name,
-                               (void *)value, size, xflags);
-}
-
-static const struct xattr_handler xfs_xattr_user_handler = {
-       .prefix = XATTR_USER_PREFIX,
-       .flags  = 0, /* no flags implies user namespace */
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_trusted_handler = {
-       .prefix = XATTR_TRUSTED_PREFIX,
-       .flags  = ATTR_ROOT,
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_security_handler = {
-       .prefix = XATTR_SECURITY_PREFIX,
-       .flags  = ATTR_SECURE,
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-const struct xattr_handler *xfs_xattr_handlers[] = {
-       &xfs_xattr_user_handler,
-       &xfs_xattr_trusted_handler,
-       &xfs_xattr_security_handler,
-#ifdef CONFIG_XFS_POSIX_ACL
-       &xfs_xattr_acl_access_handler,
-       &xfs_xattr_acl_default_handler,
-#endif
-       NULL
-};
-
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
-       if (flags & XFS_ATTR_SECURE)
-               return sizeof("security");
-       else if (flags & XFS_ATTR_ROOT)
-               return sizeof("trusted");
-       else
-               return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
-       if (flags & XFS_ATTR_SECURE)
-               return xfs_xattr_security_handler.prefix;
-       else if (flags & XFS_ATTR_ROOT)
-               return xfs_xattr_trusted_handler.prefix;
-       else
-               return xfs_xattr_user_handler.prefix;
-}
-
-static int
-xfs_xattr_put_listent(
-       struct xfs_attr_list_context *context,
-       int             flags,
-       unsigned char   *name,
-       int             namelen,
-       int             valuelen,
-       unsigned char   *value)
-{
-       unsigned int prefix_len = xfs_xattr_prefix_len(flags);
-       char *offset;
-       int arraytop;
-
-       ASSERT(context->count >= 0);
-
-       /*
-        * Only show root namespace entries if we are actually allowed to
-        * see them.
-        */
-       if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
-               return 0;
-
-       arraytop = context->count + prefix_len + namelen + 1;
-       if (arraytop > context->firstu) {
-               context->count = -1;    /* insufficient space */
-               return 1;
-       }
-       offset = (char *)context->alist + context->count;
-       strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
-       offset += prefix_len;
-       strncpy(offset, (char *)name, namelen);                 /* real name */
-       offset += namelen;
-       *offset = '\0';
-       context->count += prefix_len + namelen + 1;
-       return 0;
-}
-
-static int
-xfs_xattr_put_listent_sizes(
-       struct xfs_attr_list_context *context,
-       int             flags,
-       unsigned char   *name,
-       int             namelen,
-       int             valuelen,
-       unsigned char   *value)
-{
-       context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
-       return 0;
-}
-
-static int
-list_one_attr(const char *name, const size_t len, void *data,
-               size_t size, ssize_t *result)
-{
-       char *p = data + *result;
-
-       *result += len;
-       if (!size)
-               return 0;
-       if (*result > size)
-               return -ERANGE;
-
-       strcpy(p, name);
-       return 0;
-}
-
-ssize_t
-xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
-{
-       struct xfs_attr_list_context context;
-       struct attrlist_cursor_kern cursor = { 0 };
-       struct inode            *inode = dentry->d_inode;
-       int                     error;
-
-       /*
-        * First read the regular on-disk attributes.
-        */
-       memset(&context, 0, sizeof(context));
-       context.dp = XFS_I(inode);
-       context.cursor = &cursor;
-       context.resynch = 1;
-       context.alist = data;
-       context.bufsize = size;
-       context.firstu = context.bufsize;
-
-       if (size)
-               context.put_listent = xfs_xattr_put_listent;
-       else
-               context.put_listent = xfs_xattr_put_listent_sizes;
-
-       xfs_attr_list_int(&context);
-       if (context.count < 0)
-               return -ERANGE;
-
-       /*
-        * Then add the two synthetic ACL attributes.
-        */
-       if (posix_acl_access_exists(inode)) {
-               error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
-                               strlen(POSIX_ACL_XATTR_ACCESS) + 1,
-                               data, size, &context.count);
-               if (error)
-                       return error;
-       }
-
-       if (posix_acl_default_exists(inode)) {
-               error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
-                               strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
-                               data, size, &context.count);
-               if (error)
-                       return error;
-       }
-
-       return context.count;
-}
diff --git a/fs/xfs/mrlock.h b/fs/xfs/mrlock.h
new file mode 100644 (file)
index 0000000..ff6a198
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+
+#include <linux/rwsem.h>
+
+typedef struct {
+       struct rw_semaphore     mr_lock;
+#ifdef DEBUG
+       int                     mr_writer;
+#endif
+} mrlock_t;
+
+#ifdef DEBUG
+#define mrinit(mrp, name)      \
+       do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
+#else
+#define mrinit(mrp, name)      \
+       do { init_rwsem(&(mrp)->mr_lock); } while (0)
+#endif
+
+#define mrlock_init(mrp, t,n,s)        mrinit(mrp, n)
+#define mrfree(mrp)            do { } while (0)
+
+static inline void mraccess_nested(mrlock_t *mrp, int subclass)
+{
+       down_read_nested(&mrp->mr_lock, subclass);
+}
+
+static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
+{
+       down_write_nested(&mrp->mr_lock, subclass);
+#ifdef DEBUG
+       mrp->mr_writer = 1;
+#endif
+}
+
+static inline int mrtryaccess(mrlock_t *mrp)
+{
+       return down_read_trylock(&mrp->mr_lock);
+}
+
+static inline int mrtryupdate(mrlock_t *mrp)
+{
+       if (!down_write_trylock(&mrp->mr_lock))
+               return 0;
+#ifdef DEBUG
+       mrp->mr_writer = 1;
+#endif
+       return 1;
+}
+
+static inline void mrunlock_excl(mrlock_t *mrp)
+{
+#ifdef DEBUG
+       mrp->mr_writer = 0;
+#endif
+       up_write(&mrp->mr_lock);
+}
+
+static inline void mrunlock_shared(mrlock_t *mrp)
+{
+       up_read(&mrp->mr_lock);
+}
+
+static inline void mrdemote(mrlock_t *mrp)
+{
+#ifdef DEBUG
+       mrp->mr_writer = 0;
+#endif
+       downgrade_write(&mrp->mr_lock);
+}
+
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
deleted file mode 100644 (file)
index db62959..0000000
+++ /dev/null
@@ -1,1454 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-
-/*
-   LOCK ORDER
-
-   inode lock              (ilock)
-   dquot hash-chain lock    (hashlock)
-   xqm dquot freelist lock  (freelistlock
-   mount's dquot list lock  (mplistlock)
-   user dquot lock - lock ordering among dquots is based on the uid or gid
-   group dquot lock - similar to udquots. Between the two dquots, the udquot
-                     has to be locked first.
-   pin lock - the dquot lock must be held to take this lock.
-   flush lock - ditto.
-*/
-
-#ifdef DEBUG
-xfs_buftarg_t *xfs_dqerror_target;
-int xfs_do_dqerror;
-int xfs_dqreq_num;
-int xfs_dqerror_mod = 33;
-#endif
-
-static struct lock_class_key xfs_dquot_other_class;
-
-/*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
-       xfs_mount_t  *mp,
-       xfs_dqid_t   id,
-       uint         type)
-{
-       xfs_dquot_t     *dqp;
-       boolean_t       brandnewdquot;
-
-       brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
-       dqp->dq_flags = type;
-       dqp->q_core.d_id = cpu_to_be32(id);
-       dqp->q_mount = mp;
-
-       /*
-        * No need to re-initialize these if this is a reclaimed dquot.
-        */
-       if (brandnewdquot) {
-               INIT_LIST_HEAD(&dqp->q_freelist);
-               mutex_init(&dqp->q_qlock);
-               init_waitqueue_head(&dqp->q_pinwait);
-
-               /*
-                * Because we want to use a counting completion, complete
-                * the flush completion once to allow a single access to
-                * the flush completion without blocking.
-                */
-               init_completion(&dqp->q_flush);
-               complete(&dqp->q_flush);
-
-               trace_xfs_dqinit(dqp);
-       } else {
-               /*
-                * Only the q_core portion was zeroed in dqreclaim_one().
-                * So, we need to reset others.
-                */
-               dqp->q_nrefs = 0;
-               dqp->q_blkno = 0;
-               INIT_LIST_HEAD(&dqp->q_mplist);
-               INIT_LIST_HEAD(&dqp->q_hashlist);
-               dqp->q_bufoffset = 0;
-               dqp->q_fileoffset = 0;
-               dqp->q_transp = NULL;
-               dqp->q_gdquot = NULL;
-               dqp->q_res_bcount = 0;
-               dqp->q_res_icount = 0;
-               dqp->q_res_rtbcount = 0;
-               atomic_set(&dqp->q_pincount, 0);
-               dqp->q_hash = NULL;
-               ASSERT(list_empty(&dqp->q_freelist));
-
-               trace_xfs_dqreuse(dqp);
-       }
-
-       /*
-        * In either case we need to make sure group quotas have a different
-        * lock class than user quotas, to make sure lockdep knows we can
-        * locks of one of each at the same time.
-        */
-       if (!(type & XFS_DQ_USER))
-               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-
-       /*
-        * log item gets initialized later
-        */
-       return (dqp);
-}
-
-/*
- * This is called to free all the memory associated with a dquot
- */
-void
-xfs_qm_dqdestroy(
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(list_empty(&dqp->q_freelist));
-
-       mutex_destroy(&dqp->q_qlock);
-       kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
-
-       atomic_dec(&xfs_Gqm->qm_totaldquots);
-}
-
-/*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
-       xfs_dqid_t      id,
-       uint            type,
-       xfs_dqblk_t     *d)
-{
-       /*
-        * Caller has zero'd the entire dquot 'chunk' already.
-        */
-       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
-       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
-       d->dd_diskdq.d_id = cpu_to_be32(id);
-       d->dd_diskdq.d_flags = type;
-}
-
-/*
- * If default limits are in force, push them into the dquot now.
- * We overwrite the dquot limits only if they are zero and this
- * is not the root dquot.
- */
-void
-xfs_qm_adjust_dqlimits(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *d)
-{
-       xfs_quotainfo_t         *q = mp->m_quotainfo;
-
-       ASSERT(d->d_id);
-
-       if (q->qi_bsoftlimit && !d->d_blk_softlimit)
-               d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
-       if (q->qi_bhardlimit && !d->d_blk_hardlimit)
-               d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
-       if (q->qi_isoftlimit && !d->d_ino_softlimit)
-               d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
-       if (q->qi_ihardlimit && !d->d_ino_hardlimit)
-               d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
-       if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
-               d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
-       if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
-               d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
-}
-
-/*
- * Check the limits and timers of a dquot and start or reset timers
- * if necessary.
- * This gets called even when quota enforcement is OFF, which makes our
- * life a little less complicated. (We just don't reject any quota
- * reservations in that case, when enforcement is off).
- * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
- * enforcement's off.
- * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded.  They do
- * get reset to zero, however, when we find the count to be under
- * the soft limit (they are only ever set non-zero via userspace).
- */
-void
-xfs_qm_adjust_dqtimers(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *d)
-{
-       ASSERT(d->d_id);
-
-#ifdef DEBUG
-       if (d->d_blk_hardlimit)
-               ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
-                      be64_to_cpu(d->d_blk_hardlimit));
-       if (d->d_ino_hardlimit)
-               ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
-                      be64_to_cpu(d->d_ino_hardlimit));
-       if (d->d_rtb_hardlimit)
-               ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
-                      be64_to_cpu(d->d_rtb_hardlimit));
-#endif
-
-       if (!d->d_btimer) {
-               if ((d->d_blk_softlimit &&
-                    (be64_to_cpu(d->d_bcount) >=
-                     be64_to_cpu(d->d_blk_softlimit))) ||
-                   (d->d_blk_hardlimit &&
-                    (be64_to_cpu(d->d_bcount) >=
-                     be64_to_cpu(d->d_blk_hardlimit)))) {
-                       d->d_btimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_btimelimit);
-               } else {
-                       d->d_bwarns = 0;
-               }
-       } else {
-               if ((!d->d_blk_softlimit ||
-                    (be64_to_cpu(d->d_bcount) <
-                     be64_to_cpu(d->d_blk_softlimit))) &&
-                   (!d->d_blk_hardlimit ||
-                   (be64_to_cpu(d->d_bcount) <
-                    be64_to_cpu(d->d_blk_hardlimit)))) {
-                       d->d_btimer = 0;
-               }
-       }
-
-       if (!d->d_itimer) {
-               if ((d->d_ino_softlimit &&
-                    (be64_to_cpu(d->d_icount) >=
-                     be64_to_cpu(d->d_ino_softlimit))) ||
-                   (d->d_ino_hardlimit &&
-                    (be64_to_cpu(d->d_icount) >=
-                     be64_to_cpu(d->d_ino_hardlimit)))) {
-                       d->d_itimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_itimelimit);
-               } else {
-                       d->d_iwarns = 0;
-               }
-       } else {
-               if ((!d->d_ino_softlimit ||
-                    (be64_to_cpu(d->d_icount) <
-                     be64_to_cpu(d->d_ino_softlimit)))  &&
-                   (!d->d_ino_hardlimit ||
-                    (be64_to_cpu(d->d_icount) <
-                     be64_to_cpu(d->d_ino_hardlimit)))) {
-                       d->d_itimer = 0;
-               }
-       }
-
-       if (!d->d_rtbtimer) {
-               if ((d->d_rtb_softlimit &&
-                    (be64_to_cpu(d->d_rtbcount) >=
-                     be64_to_cpu(d->d_rtb_softlimit))) ||
-                   (d->d_rtb_hardlimit &&
-                    (be64_to_cpu(d->d_rtbcount) >=
-                     be64_to_cpu(d->d_rtb_hardlimit)))) {
-                       d->d_rtbtimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_rtbtimelimit);
-               } else {
-                       d->d_rtbwarns = 0;
-               }
-       } else {
-               if ((!d->d_rtb_softlimit ||
-                    (be64_to_cpu(d->d_rtbcount) <
-                     be64_to_cpu(d->d_rtb_softlimit))) &&
-                   (!d->d_rtb_hardlimit ||
-                    (be64_to_cpu(d->d_rtbcount) <
-                     be64_to_cpu(d->d_rtb_hardlimit)))) {
-                       d->d_rtbtimer = 0;
-               }
-       }
-}
-
-/*
- * initialize a buffer full of dquots and log the whole thing
- */
-STATIC void
-xfs_qm_init_dquot_blk(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,
-       uint            type,
-       xfs_buf_t       *bp)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       xfs_dqblk_t     *d;
-       int             curid, i;
-
-       ASSERT(tp);
-       ASSERT(xfs_buf_islocked(bp));
-
-       d = bp->b_addr;
-
-       /*
-        * ID of the first dquot in the block - id's are zero based.
-        */
-       curid = id - (id % q->qi_dqperchunk);
-       ASSERT(curid >= 0);
-       memset(d, 0, BBTOB(q->qi_dqchunklen));
-       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
-               xfs_qm_dqinit_core(curid, type, d);
-       xfs_trans_dquot_buf(tp, bp,
-                           (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
-                           ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
-                            XFS_BLF_GDQUOT_BUF)));
-       xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
-}
-
-
-
-/*
- * Allocate a block and fill it with dquots.
- * This is called when the bmapi finds a hole.
- */
-STATIC int
-xfs_qm_dqalloc(
-       xfs_trans_t     **tpp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *dqp,
-       xfs_inode_t     *quotip,
-       xfs_fileoff_t   offset_fsb,
-       xfs_buf_t       **O_bpp)
-{
-       xfs_fsblock_t   firstblock;
-       xfs_bmap_free_t flist;
-       xfs_bmbt_irec_t map;
-       int             nmaps, error, committed;
-       xfs_buf_t       *bp;
-       xfs_trans_t     *tp = *tpp;
-
-       ASSERT(tp != NULL);
-
-       trace_xfs_dqalloc(dqp);
-
-       /*
-        * Initialize the bmap freelist prior to calling bmapi code.
-        */
-       xfs_bmap_init(&flist, &firstblock);
-       xfs_ilock(quotip, XFS_ILOCK_EXCL);
-       /*
-        * Return if this type of quotas is turned off while we didn't
-        * have an inode lock
-        */
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-               xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-               return (ESRCH);
-       }
-
-       xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
-       nmaps = 1;
-       if ((error = xfs_bmapi(tp, quotip,
-                             offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
-                             XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
-                             &firstblock,
-                             XFS_QM_DQALLOC_SPACE_RES(mp),
-                             &map, &nmaps, &flist))) {
-               goto error0;
-       }
-       ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
-       ASSERT(nmaps == 1);
-       ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
-              (map.br_startblock != HOLESTARTBLOCK));
-
-       /*
-        * Keep track of the blkno to save a lookup later
-        */
-       dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-       /* now we can just get the buffer (there's nothing to read yet) */
-       bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
-                              dqp->q_blkno,
-                              mp->m_quotainfo->qi_dqchunklen,
-                              0);
-       if (!bp || (error = xfs_buf_geterror(bp)))
-               goto error1;
-       /*
-        * Make a chunk of dquots out of this buffer and log
-        * the entire thing.
-        */
-       xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
-                             dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
-
-       /*
-        * xfs_bmap_finish() may commit the current transaction and
-        * start a second transaction if the freelist is not empty.
-        *
-        * Since we still want to modify this buffer, we need to
-        * ensure that the buffer is not released on commit of
-        * the first transaction and ensure the buffer is added to the
-        * second transaction.
-        *
-        * If there is only one transaction then don't stop the buffer
-        * from being released when it commits later on.
-        */
-
-       xfs_trans_bhold(tp, bp);
-
-       if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
-               goto error1;
-       }
-
-       if (committed) {
-               tp = *tpp;
-               xfs_trans_bjoin(tp, bp);
-       } else {
-               xfs_trans_bhold_release(tp, bp);
-       }
-
-       *O_bpp = bp;
-       return 0;
-
-      error1:
-       xfs_bmap_cancel(&flist);
-      error0:
-       xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
-       return (error);
-}
-
-/*
- * Maps a dquot to the buffer containing its on-disk version.
- * This returns a ptr to the buffer containing the on-disk dquot
- * in the bpp param, and a ptr to the on-disk dquot within that buffer
- */
-STATIC int
-xfs_qm_dqtobp(
-       xfs_trans_t             **tpp,
-       xfs_dquot_t             *dqp,
-       xfs_disk_dquot_t        **O_ddpp,
-       xfs_buf_t               **O_bpp,
-       uint                    flags)
-{
-       xfs_bmbt_irec_t map;
-       int             nmaps = 1, error;
-       xfs_buf_t       *bp;
-       xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
-       xfs_mount_t     *mp = dqp->q_mount;
-       xfs_disk_dquot_t *ddq;
-       xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
-       xfs_trans_t     *tp = (tpp ? *tpp : NULL);
-
-       dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
-
-       xfs_ilock(quotip, XFS_ILOCK_SHARED);
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-               /*
-                * Return if this type of quotas is turned off while we
-                * didn't have the quota inode lock.
-                */
-               xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-               return ESRCH;
-       }
-
-       /*
-        * Find the block map; no allocations yet
-        */
-       error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
-                         XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
-                         NULL, 0, &map, &nmaps, NULL);
-
-       xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-       if (error)
-               return error;
-
-       ASSERT(nmaps == 1);
-       ASSERT(map.br_blockcount == 1);
-
-       /*
-        * Offset of dquot in the (fixed sized) dquot chunk.
-        */
-       dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
-               sizeof(xfs_dqblk_t);
-
-       ASSERT(map.br_startblock != DELAYSTARTBLOCK);
-       if (map.br_startblock == HOLESTARTBLOCK) {
-               /*
-                * We don't allocate unless we're asked to
-                */
-               if (!(flags & XFS_QMOPT_DQALLOC))
-                       return ENOENT;
-
-               ASSERT(tp);
-               error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
-                                       dqp->q_fileoffset, &bp);
-               if (error)
-                       return error;
-               tp = *tpp;
-       } else {
-               trace_xfs_dqtobp_read(dqp);
-
-               /*
-                * store the blkno etc so that we don't have to do the
-                * mapping all the time
-                */
-               dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-               error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                                          dqp->q_blkno,
-                                          mp->m_quotainfo->qi_dqchunklen,
-                                          0, &bp);
-               if (error || !bp)
-                       return XFS_ERROR(error);
-       }
-
-       ASSERT(xfs_buf_islocked(bp));
-
-       /*
-        * calculate the location of the dquot inside the buffer.
-        */
-       ddq = bp->b_addr + dqp->q_bufoffset;
-
-       /*
-        * A simple sanity check in case we got a corrupted dquot...
-        */
-       error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
-                          flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
-                          "dqtobp");
-       if (error) {
-               if (!(flags & XFS_QMOPT_DQREPAIR)) {
-                       xfs_trans_brelse(tp, bp);
-                       return XFS_ERROR(EIO);
-               }
-       }
-
-       *O_bpp = bp;
-       *O_ddpp = ddq;
-
-       return (0);
-}
-
-
-/*
- * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
- * and release the buffer immediately.
- *
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqread(
-       xfs_trans_t     **tpp,
-       xfs_dqid_t      id,
-       xfs_dquot_t     *dqp,   /* dquot to get filled in */
-       uint            flags)
-{
-       xfs_disk_dquot_t *ddqp;
-       xfs_buf_t        *bp;
-       int              error;
-       xfs_trans_t      *tp;
-
-       ASSERT(tpp);
-
-       trace_xfs_dqread(dqp);
-
-       /*
-        * get a pointer to the on-disk dquot and the buffer containing it
-        * dqp already knows its own type (GROUP/USER).
-        */
-       if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
-               return (error);
-       }
-       tp = *tpp;
-
-       /* copy everything from disk dquot to the incore dquot */
-       memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-       xfs_qm_dquot_logitem_init(dqp);
-
-       /*
-        * Reservation counters are defined as reservation plus current usage
-        * to avoid having to add every time.
-        */
-       dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
-       dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
-       dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
-
-       /* Mark the buf so that this will stay incore a little longer */
-       XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
-
-       /*
-        * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
-        * So we need to release with xfs_trans_brelse().
-        * The strategy here is identical to that of inodes; we lock
-        * the dquot in xfs_qm_dqget() before making it accessible to
-        * others. This is because dquots, like inodes, need a good level of
-        * concurrency, and we don't want to take locks on the entire buffers
-        * for dquot accesses.
-        * Note also that the dquot buffer may even be dirty at this point, if
-        * this particular dquot was repaired. We still aren't afraid to
-        * brelse it because we have the changes incore.
-        */
-       ASSERT(xfs_buf_islocked(bp));
-       xfs_trans_brelse(tp, bp);
-
-       return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,      /* gid or uid, depending on type */
-       uint            type,    /* UDQUOT or GDQUOT */
-       uint            flags,   /* DQALLOC, DQREPAIR */
-       xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-       xfs_trans_t     *tp;
-       int             cancelflags=0;
-
-       dqp = xfs_qm_dqinit(mp, id, type);
-       tp = NULL;
-       if (flags & XFS_QMOPT_DQALLOC) {
-               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
-                               XFS_WRITE_LOG_RES(mp) +
-                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
-                               128,
-                               0,
-                               XFS_TRANS_PERM_LOG_RES,
-                               XFS_WRITE_LOG_COUNT);
-               if (error) {
-                       cancelflags = 0;
-                       goto error0;
-               }
-               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
-       }
-
-       /*
-        * Read it from disk; xfs_dqread() takes care of
-        * all the necessary initialization of dquot's fields (locks, etc)
-        */
-       if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
-               /*
-                * This can happen if quotas got turned off (ESRCH),
-                * or if the dquot didn't exist on disk and we ask to
-                * allocate (ENOENT).
-                */
-               trace_xfs_dqread_fail(dqp);
-               cancelflags |= XFS_TRANS_ABORT;
-               goto error0;
-       }
-       if (tp) {
-               if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
-                       goto error1;
-       }
-
-       *O_dqpp = dqp;
-       return (0);
-
- error0:
-       ASSERT(error);
-       if (tp)
-               xfs_trans_cancel(tp, cancelflags);
- error1:
-       xfs_qm_dqdestroy(dqp);
-       *O_dqpp = NULL;
-       return (error);
-}
-
-/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
-       xfs_mount_t             *mp,
-       xfs_dqid_t              id,
-       xfs_dqhash_t            *qh,
-       xfs_dquot_t             **O_dqpp)
-{
-       xfs_dquot_t             *dqp;
-       uint                    flist_locked;
-
-       ASSERT(mutex_is_locked(&qh->qh_lock));
-
-       flist_locked = B_FALSE;
-
-       /*
-        * Traverse the hashchain looking for a match
-        */
-       list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
-               /*
-                * We already have the hashlock. We don't need the
-                * dqlock to look at the id field of the dquot, since the
-                * id can't be modified without the hashlock anyway.
-                */
-               if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
-                       trace_xfs_dqlookup_found(dqp);
-
-                       /*
-                        * All in core dquots must be on the dqlist of mp
-                        */
-                       ASSERT(!list_empty(&dqp->q_mplist));
-
-                       xfs_dqlock(dqp);
-                       if (dqp->q_nrefs == 0) {
-                               ASSERT(!list_empty(&dqp->q_freelist));
-                               if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-                                       trace_xfs_dqlookup_want(dqp);
-
-                                       /*
-                                        * We may have raced with dqreclaim_one()
-                                        * (and lost). So, flag that we don't
-                                        * want the dquot to be reclaimed.
-                                        */
-                                       dqp->dq_flags |= XFS_DQ_WANT;
-                                       xfs_dqunlock(dqp);
-                                       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-                                       xfs_dqlock(dqp);
-                                       dqp->dq_flags &= ~(XFS_DQ_WANT);
-                               }
-                               flist_locked = B_TRUE;
-                       }
-
-                       /*
-                        * id couldn't have changed; we had the hashlock all
-                        * along
-                        */
-                       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-
-                       if (flist_locked) {
-                               if (dqp->q_nrefs != 0) {
-                                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                                       flist_locked = B_FALSE;
-                               } else {
-                                       /* take it off the freelist */
-                                       trace_xfs_dqlookup_freelist(dqp);
-                                       list_del_init(&dqp->q_freelist);
-                                       xfs_Gqm->qm_dqfrlist_cnt--;
-                               }
-                       }
-
-                       XFS_DQHOLD(dqp);
-
-                       if (flist_locked)
-                               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                       /*
-                        * move the dquot to the front of the hashchain
-                        */
-                       ASSERT(mutex_is_locked(&qh->qh_lock));
-                       list_move(&dqp->q_hashlist, &qh->qh_list);
-                       trace_xfs_dqlookup_done(dqp);
-                       *O_dqpp = dqp;
-                       return 0;
-               }
-       }
-
-       *O_dqpp = NULL;
-       ASSERT(mutex_is_locked(&qh->qh_lock));
-       return (1);
-}
-
-/*
- * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
- * a locked dquot, doing an allocation (if requested) as needed.
- * When both an inode and an id are given, the inode's id takes precedence.
- * That is, if the id changes while we don't hold the ilock inside this
- * function, the new dquot is returned, not necessarily the one requested
- * in the id argument.
- */
-int
-xfs_qm_dqget(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *ip,      /* locked inode (optional) */
-       xfs_dqid_t      id,       /* uid/projid/gid depending on type */
-       uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
-       uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
-       xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
-{
-       xfs_dquot_t     *dqp;
-       xfs_dqhash_t    *h;
-       uint            version;
-       int             error;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-       if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
-           (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
-           (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
-               return (ESRCH);
-       }
-       h = XFS_DQ_HASH(mp, id, type);
-
-#ifdef DEBUG
-       if (xfs_do_dqerror) {
-               if ((xfs_dqerror_target == mp->m_ddev_targp) &&
-                   (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
-                       xfs_debug(mp, "Returning error in dqget");
-                       return (EIO);
-               }
-       }
-#endif
-
- again:
-
-#ifdef DEBUG
-       ASSERT(type == XFS_DQ_USER ||
-              type == XFS_DQ_PROJ ||
-              type == XFS_DQ_GROUP);
-       if (ip) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               if (type == XFS_DQ_USER)
-                       ASSERT(ip->i_udquot == NULL);
-               else
-                       ASSERT(ip->i_gdquot == NULL);
-       }
-#endif
-       mutex_lock(&h->qh_lock);
-
-       /*
-        * Look in the cache (hashtable).
-        * The chain is kept locked during lookup.
-        */
-       if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
-               XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
-               /*
-                * The dquot was found, moved to the front of the chain,
-                * taken off the freelist if it was on it, and locked
-                * at this point. Just unlock the hashchain and return.
-                */
-               ASSERT(*O_dqpp);
-               ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
-               mutex_unlock(&h->qh_lock);
-               trace_xfs_dqget_hit(*O_dqpp);
-               return (0);     /* success */
-       }
-       XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
-
-       /*
-        * Dquot cache miss. We don't want to keep the inode lock across
-        * a (potential) disk read. Also we don't want to deal with the lock
-        * ordering between quotainode and this inode. OTOH, dropping the inode
-        * lock here means dealing with a chown that can happen before
-        * we re-acquire the lock.
-        */
-       if (ip)
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       /*
-        * Save the hashchain version stamp, and unlock the chain, so that
-        * we don't keep the lock across a disk read
-        */
-       version = h->qh_version;
-       mutex_unlock(&h->qh_lock);
-
-       /*
-        * Allocate the dquot on the kernel heap, and read the ondisk
-        * portion off the disk. Also, do all the necessary initialization
-        * This can return ENOENT if dquot didn't exist on disk and we didn't
-        * ask it to allocate; ESRCH if quotas got turned off suddenly.
-        */
-       if ((error = xfs_qm_idtodq(mp, id, type,
-                                 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
-                                          XFS_QMOPT_DOWARN),
-                                 &dqp))) {
-               if (ip)
-                       xfs_ilock(ip, XFS_ILOCK_EXCL);
-               return (error);
-       }
-
-       /*
-        * See if this is mount code calling to look at the overall quota limits
-        * which are stored in the id == 0 user or group's dquot.
-        * Since we may not have done a quotacheck by this point, just return
-        * the dquot without attaching it to any hashtables, lists, etc, or even
-        * taking a reference.
-        * The caller must dqdestroy this once done.
-        */
-       if (flags & XFS_QMOPT_DQSUSER) {
-               ASSERT(id == 0);
-               ASSERT(! ip);
-               goto dqret;
-       }
-
-       /*
-        * Dquot lock comes after hashlock in the lock ordering
-        */
-       if (ip) {
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-               /*
-                * A dquot could be attached to this inode by now, since
-                * we had dropped the ilock.
-                */
-               if (type == XFS_DQ_USER) {
-                       if (!XFS_IS_UQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_udquot) {
-                               xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_udquot;
-                               xfs_dqlock(dqp);
-                               goto dqret;
-                       }
-               } else {
-                       if (!XFS_IS_OQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_gdquot) {
-                               xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_gdquot;
-                               xfs_dqlock(dqp);
-                               goto dqret;
-                       }
-               }
-       }
-
-       /*
-        * Hashlock comes after ilock in lock order
-        */
-       mutex_lock(&h->qh_lock);
-       if (version != h->qh_version) {
-               xfs_dquot_t *tmpdqp;
-               /*
-                * Now, see if somebody else put the dquot in the
-                * hashtable before us. This can happen because we didn't
-                * keep the hashchain lock. We don't have to worry about
-                * lock order between the two dquots here since dqp isn't
-                * on any findable lists yet.
-                */
-               if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
-                       /*
-                        * Duplicate found. Just throw away the new dquot
-                        * and start over.
-                        */
-                       xfs_qm_dqput(tmpdqp);
-                       mutex_unlock(&h->qh_lock);
-                       xfs_qm_dqdestroy(dqp);
-                       XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-                       goto again;
-               }
-       }
-
-       /*
-        * Put the dquot at the beginning of the hash-chain and mp's list
-        * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
-        */
-       ASSERT(mutex_is_locked(&h->qh_lock));
-       dqp->q_hash = h;
-       list_add(&dqp->q_hashlist, &h->qh_list);
-       h->qh_version++;
-
-       /*
-        * Attach this dquot to this filesystem's list of all dquots,
-        * kept inside the mount structure in m_quotainfo field
-        */
-       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-
-       /*
-        * We return a locked dquot to the caller, with a reference taken
-        */
-       xfs_dqlock(dqp);
-       dqp->q_nrefs = 1;
-
-       list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
-       mp->m_quotainfo->qi_dquots++;
-       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-       mutex_unlock(&h->qh_lock);
- dqret:
-       ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       trace_xfs_dqget_miss(dqp);
-       *O_dqpp = dqp;
-       return (0);
-}
-
-
-/*
- * Release a reference to the dquot (decrement ref-count)
- * and unlock it. If there is a group quota attached to this
- * dquot, carefully release that too without tripping over
- * deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
-       xfs_dquot_t     *dqp)
-{
-       xfs_dquot_t     *gdqp;
-
-       ASSERT(dqp->q_nrefs > 0);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       trace_xfs_dqput(dqp);
-
-       if (dqp->q_nrefs != 1) {
-               dqp->q_nrefs--;
-               xfs_dqunlock(dqp);
-               return;
-       }
-
-       /*
-        * drop the dqlock and acquire the freelist and dqlock
-        * in the right order; but try to get it out-of-order first
-        */
-       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-               trace_xfs_dqput_wait(dqp);
-               xfs_dqunlock(dqp);
-               mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-               xfs_dqlock(dqp);
-       }
-
-       while (1) {
-               gdqp = NULL;
-
-               /* We can't depend on nrefs being == 1 here */
-               if (--dqp->q_nrefs == 0) {
-                       trace_xfs_dqput_free(dqp);
-
-                       list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-                       xfs_Gqm->qm_dqfrlist_cnt++;
-
-                       /*
-                        * If we just added a udquot to the freelist, then
-                        * we want to release the gdquot reference that
-                        * it (probably) has. Otherwise it'll keep the
-                        * gdquot from getting reclaimed.
-                        */
-                       if ((gdqp = dqp->q_gdquot)) {
-                               /*
-                                * Avoid a recursive dqput call
-                                */
-                               xfs_dqlock(gdqp);
-                               dqp->q_gdquot = NULL;
-                       }
-               }
-               xfs_dqunlock(dqp);
-
-               /*
-                * If we had a group quota inside the user quota as a hint,
-                * release it now.
-                */
-               if (! gdqp)
-                       break;
-               dqp = gdqp;
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-}
-
-/*
- * Release a dquot. Flush it if dirty, then dqput() it.
- * dquot must not be locked.
- */
-void
-xfs_qm_dqrele(
-       xfs_dquot_t     *dqp)
-{
-       if (!dqp)
-               return;
-
-       trace_xfs_dqrele(dqp);
-
-       xfs_dqlock(dqp);
-       /*
-        * We don't care to flush it if the dquot is dirty here.
-        * That will create stutters that we want to avoid.
-        * Instead we do a delayed write when we try to reclaim
-        * a dirty dquot. Also xfs_sync will take part of the burden...
-        */
-       xfs_qm_dqput(dqp);
-}
-
-/*
- * This is the dquot flushing I/O completion routine.  It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk.  It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-STATIC void
-xfs_qm_dqflush_done(
-       struct xfs_buf          *bp,
-       struct xfs_log_item     *lip)
-{
-       xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
-       xfs_dquot_t             *dqp = qip->qli_dquot;
-       struct xfs_ail          *ailp = lip->li_ailp;
-
-       /*
-        * We only want to pull the item from the AIL if its
-        * location in the log has not changed since we started the flush.
-        * Thus, we only bother if the dquot's lsn has
-        * not changed. First we check the lsn outside the lock
-        * since it's cheaper, and then we recheck while
-        * holding the lock before removing the dquot from the AIL.
-        */
-       if ((lip->li_flags & XFS_LI_IN_AIL) &&
-           lip->li_lsn == qip->qli_flush_lsn) {
-
-               /* xfs_trans_ail_delete() drops the AIL lock. */
-               spin_lock(&ailp->xa_lock);
-               if (lip->li_lsn == qip->qli_flush_lsn)
-                       xfs_trans_ail_delete(ailp, lip);
-               else
-                       spin_unlock(&ailp->xa_lock);
-       }
-
-       /*
-        * Release the dq's flush lock since we're done with it.
-        */
-       xfs_dqfunlock(dqp);
-}
-
-/*
- * Write a modified dquot to disk.
- * The dquot must be locked and the flush lock too taken by caller.
- * The flush lock will not be unlocked until the dquot reaches the disk,
- * but the dquot is free to be unlocked and modified by the caller
- * in the interim. Dquot is still locked on return. This behavior is
- * identical to that of inodes.
- */
-int
-xfs_qm_dqflush(
-       xfs_dquot_t             *dqp,
-       uint                    flags)
-{
-       struct xfs_mount        *mp = dqp->q_mount;
-       struct xfs_buf          *bp;
-       struct xfs_disk_dquot   *ddqp;
-       int                     error;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(!completion_done(&dqp->q_flush));
-
-       trace_xfs_dqflush(dqp);
-
-       /*
-        * If not dirty, or it's pinned and we are not supposed to block, nada.
-        */
-       if (!XFS_DQ_IS_DIRTY(dqp) ||
-           (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
-               xfs_dqfunlock(dqp);
-               return 0;
-       }
-       xfs_qm_dqunpin_wait(dqp);
-
-       /*
-        * This may have been unpinned because the filesystem is shutting
-        * down forcibly. If that's the case we must not write this dquot
-        * to disk, because the log record didn't make it to disk!
-        */
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               dqp->dq_flags &= ~XFS_DQ_DIRTY;
-               xfs_dqfunlock(dqp);
-               return XFS_ERROR(EIO);
-       }
-
-       /*
-        * Get the buffer containing the on-disk dquot
-        */
-       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
-                                  mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-       if (error) {
-               ASSERT(error != ENOENT);
-               xfs_dqfunlock(dqp);
-               return error;
-       }
-
-       /*
-        * Calculate the location of the dquot inside the buffer.
-        */
-       ddqp = bp->b_addr + dqp->q_bufoffset;
-
-       /*
-        * A simple sanity check in case we got a corrupted dquot..
-        */
-       error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
-                          XFS_QMOPT_DOWARN, "dqflush (incore copy)");
-       if (error) {
-               xfs_buf_relse(bp);
-               xfs_dqfunlock(dqp);
-               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-               return XFS_ERROR(EIO);
-       }
-
-       /* This is the only portion of data that needs to persist */
-       memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
-
-       /*
-        * Clear the dirty field and remember the flush lsn for later use.
-        */
-       dqp->dq_flags &= ~XFS_DQ_DIRTY;
-
-       xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
-                                       &dqp->q_logitem.qli_item.li_lsn);
-
-       /*
-        * Attach an iodone routine so that we can remove this dquot from the
-        * AIL and release the flush lock once the dquot is synced to disk.
-        */
-       xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
-                                 &dqp->q_logitem.qli_item);
-
-       /*
-        * If the buffer is pinned then push on the log so we won't
-        * get stuck waiting in the write for too long.
-        */
-       if (xfs_buf_ispinned(bp)) {
-               trace_xfs_dqflush_force(dqp);
-               xfs_log_force(mp, 0);
-       }
-
-       if (flags & SYNC_WAIT)
-               error = xfs_bwrite(mp, bp);
-       else
-               xfs_bdwrite(mp, bp);
-
-       trace_xfs_dqflush_done(dqp);
-
-       /*
-        * dqp is still locked, but caller is free to unlock it now.
-        */
-       return error;
-
-}
-
-int
-xfs_qm_dqlock_nowait(
-       xfs_dquot_t *dqp)
-{
-       return mutex_trylock(&dqp->q_qlock);
-}
-
-void
-xfs_dqlock(
-       xfs_dquot_t *dqp)
-{
-       mutex_lock(&dqp->q_qlock);
-}
-
-void
-xfs_dqunlock(
-       xfs_dquot_t *dqp)
-{
-       mutex_unlock(&(dqp->q_qlock));
-       if (dqp->q_logitem.qli_dquot == dqp) {
-               /* Once was dqp->q_mount, but might just have been cleared */
-               xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
-                                       (xfs_log_item_t*)&(dqp->q_logitem));
-       }
-}
-
-
-void
-xfs_dqunlock_nonotify(
-       xfs_dquot_t *dqp)
-{
-       mutex_unlock(&(dqp->q_qlock));
-}
-
-/*
- * Lock two xfs_dquot structures.
- *
- * To avoid deadlocks we always lock the quota structure with
- * the lowerd id first.
- */
-void
-xfs_dqlock2(
-       xfs_dquot_t     *d1,
-       xfs_dquot_t     *d2)
-{
-       if (d1 && d2) {
-               ASSERT(d1 != d2);
-               if (be32_to_cpu(d1->q_core.d_id) >
-                   be32_to_cpu(d2->q_core.d_id)) {
-                       mutex_lock(&d2->q_qlock);
-                       mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
-               } else {
-                       mutex_lock(&d1->q_qlock);
-                       mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
-               }
-       } else if (d1) {
-               mutex_lock(&d1->q_qlock);
-       } else if (d2) {
-               mutex_lock(&d2->q_qlock);
-       }
-}
-
-
-/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
- */
-/* ARGSUSED */
-int
-xfs_qm_dqpurge(
-       xfs_dquot_t     *dqp)
-{
-       xfs_dqhash_t    *qh = dqp->q_hash;
-       xfs_mount_t     *mp = dqp->q_mount;
-
-       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
-       ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
-
-       xfs_dqlock(dqp);
-       /*
-        * We really can't afford to purge a dquot that is
-        * referenced, because these are hard refs.
-        * It shouldn't happen in general because we went thru _all_ inodes in
-        * dqrele_all_inodes before calling this and didn't let the mountlock go.
-        * However it is possible that we have dquots with temporary
-        * references that are not attached to an inode. e.g. see xfs_setattr().
-        */
-       if (dqp->q_nrefs != 0) {
-               xfs_dqunlock(dqp);
-               mutex_unlock(&dqp->q_hash->qh_lock);
-               return (1);
-       }
-
-       ASSERT(!list_empty(&dqp->q_freelist));
-
-       /*
-        * If we're turning off quotas, we have to make sure that, for
-        * example, we don't delete quota disk blocks while dquots are
-        * in the process of getting written to those disk blocks.
-        * This dquot might well be on AIL, and we can't leave it there
-        * if we're turning off quotas. Basically, we need this flush
-        * lock, and are willing to block on it.
-        */
-       if (!xfs_dqflock_nowait(dqp)) {
-               /*
-                * Block on the flush lock after nudging dquot buffer,
-                * if it is incore.
-                */
-               xfs_qm_dqflock_pushbuf_wait(dqp);
-       }
-
-       /*
-        * XXXIf we're turning this type of quotas off, we don't care
-        * about the dirty metadata sitting in this dquot. OTOH, if
-        * we're unmounting, we do care, so we flush it and wait.
-        */
-       if (XFS_DQ_IS_DIRTY(dqp)) {
-               int     error;
-
-               /* dqflush unlocks dqflock */
-               /*
-                * Given that dqpurge is a very rare occurrence, it is OK
-                * that we're holding the hashlist and mplist locks
-                * across the disk write. But, ... XXXsup
-                *
-                * We don't care about getting disk errors here. We need
-                * to purge this dquot anyway, so we go ahead regardless.
-                */
-               error = xfs_qm_dqflush(dqp, SYNC_WAIT);
-               if (error)
-                       xfs_warn(mp, "%s: dquot %p flush failed",
-                               __func__, dqp);
-               xfs_dqflock(dqp);
-       }
-       ASSERT(atomic_read(&dqp->q_pincount) == 0);
-       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
-              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
-
-       list_del_init(&dqp->q_hashlist);
-       qh->qh_version++;
-       list_del_init(&dqp->q_mplist);
-       mp->m_quotainfo->qi_dqreclaims++;
-       mp->m_quotainfo->qi_dquots--;
-       /*
-        * XXX Move this to the front of the freelist, if we can get the
-        * freelist lock.
-        */
-       ASSERT(!list_empty(&dqp->q_freelist));
-
-       dqp->q_mount = NULL;
-       dqp->q_hash = NULL;
-       dqp->dq_flags = XFS_DQ_INACTIVE;
-       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-       xfs_dqfunlock(dqp);
-       xfs_dqunlock(dqp);
-       mutex_unlock(&qh->qh_lock);
-       return (0);
-}
-
-
-/*
- * Give the buffer a little push if it is incore and
- * wait on the flush lock.
- */
-void
-xfs_qm_dqflock_pushbuf_wait(
-       xfs_dquot_t     *dqp)
-{
-       xfs_mount_t     *mp = dqp->q_mount;
-       xfs_buf_t       *bp;
-
-       /*
-        * Check to see if the dquot has been flushed delayed
-        * write.  If so, grab its buffer and send it
-        * out immediately.  We'll be able to acquire
-        * the flush lock when the I/O completes.
-        */
-       bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
-                       mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
-       if (!bp)
-               goto out_lock;
-
-       if (XFS_BUF_ISDELAYWRITE(bp)) {
-               if (xfs_buf_ispinned(bp))
-                       xfs_log_force(mp, 0);
-               xfs_buf_delwri_promote(bp);
-               wake_up_process(bp->b_target->bt_task);
-       }
-       xfs_buf_relse(bp);
-out_lock:
-       xfs_dqflock(dqp);
-}
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
deleted file mode 100644 (file)
index 34b7e94..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DQUOT_H__
-#define __XFS_DQUOT_H__
-
-/*
- * Dquots are structures that hold quota information about a user or a group,
- * much like inodes are for files. In fact, dquots share many characteristics
- * with inodes. However, dquots can also be a centralized resource, relative
- * to a collection of inodes. In this respect, dquots share some characteristics
- * of the superblock.
- * XFS dquots exploit both those in its algorithms. They make every attempt
- * to not be a bottleneck when quotas are on and have minimal impact, if any,
- * when quotas are off.
- */
-
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
-       struct list_head  qh_list;
-       struct mutex      qh_lock;
-       uint              qh_version;   /* ever increasing version */
-       uint              qh_nelems;    /* number of dquots on the list */
-} xfs_dqhash_t;
-
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * The incore dquot structure
- */
-typedef struct xfs_dquot {
-       uint             dq_flags;      /* various flags (XFS_DQ_*) */
-       struct list_head q_freelist;    /* global free list of dquots */
-       struct list_head q_mplist;      /* mount's list of dquots */
-       struct list_head q_hashlist;    /* gloabl hash list of dquots */
-       xfs_dqhash_t    *q_hash;        /* the hashchain header */
-       struct xfs_mount*q_mount;       /* filesystem this relates to */
-       struct xfs_trans*q_transp;      /* trans this belongs to currently */
-       uint             q_nrefs;       /* # active refs from inodes */
-       xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
-       int              q_bufoffset;   /* off of dq in buffer (# dquots) */
-       xfs_fileoff_t    q_fileoffset;  /* offset in quotas file */
-
-       struct xfs_dquot*q_gdquot;      /* group dquot, hint only */
-       xfs_disk_dquot_t q_core;        /* actual usage & quotas */
-       xfs_dq_logitem_t q_logitem;     /* dquot log item */
-       xfs_qcnt_t       q_res_bcount;  /* total regular nblks used+reserved */
-       xfs_qcnt_t       q_res_icount;  /* total inos allocd+reserved */
-       xfs_qcnt_t       q_res_rtbcount;/* total realtime blks used+reserved */
-       struct mutex     q_qlock;       /* quota lock */
-       struct completion q_flush;      /* flush completion queue */
-       atomic_t          q_pincount;   /* dquot pin count */
-       wait_queue_head_t q_pinwait;    /* dquot pinning wait queue */
-} xfs_dquot_t;
-
-/*
- * Lock hierarchy for q_qlock:
- *     XFS_QLOCK_NORMAL is the implicit default,
- *     XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
- */
-enum {
-       XFS_QLOCK_NORMAL = 0,
-       XFS_QLOCK_NESTED,
-};
-
-#define XFS_DQHOLD(dqp)                ((dqp)->q_nrefs++)
-
-/*
- * Manage the q_flush completion queue embedded in the dquot.  This completion
- * queue synchronizes processes attempting to flush the in-core dquot back to
- * disk.
- */
-static inline void xfs_dqflock(xfs_dquot_t *dqp)
-{
-       wait_for_completion(&dqp->q_flush);
-}
-
-static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
-{
-       return try_wait_for_completion(&dqp->q_flush);
-}
-
-static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
-{
-       complete(&dqp->q_flush);
-}
-
-#define XFS_DQ_IS_LOCKED(dqp)  (mutex_is_locked(&((dqp)->q_qlock)))
-#define XFS_DQ_IS_DIRTY(dqp)   ((dqp)->dq_flags & XFS_DQ_DIRTY)
-#define XFS_QM_ISUDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_USER)
-#define XFS_QM_ISPDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_PROJ)
-#define XFS_QM_ISGDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_GROUP)
-#define XFS_DQ_TO_QINF(dqp)    ((dqp)->q_mount->m_quotainfo)
-#define XFS_DQ_TO_QIP(dqp)     (XFS_QM_ISUDQ(dqp) ? \
-                                XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
-                                XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
-
-#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
-                                    (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
-                                    (XFS_IS_OQUOTA_ON((d)->q_mount))))
-
-extern void            xfs_qm_dqdestroy(xfs_dquot_t *);
-extern int             xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int             xfs_qm_dqpurge(xfs_dquot_t *);
-extern void            xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int             xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern void            xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
-extern void            xfs_qm_adjust_dqtimers(xfs_mount_t *,
-                                       xfs_disk_dquot_t *);
-extern void            xfs_qm_adjust_dqlimits(xfs_mount_t *,
-                                       xfs_disk_dquot_t *);
-extern int             xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
-                                       xfs_dqid_t, uint, uint, xfs_dquot_t **);
-extern void            xfs_qm_dqput(xfs_dquot_t *);
-extern void            xfs_dqlock(xfs_dquot_t *);
-extern void            xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
-extern void            xfs_dqunlock(xfs_dquot_t *);
-extern void            xfs_dqunlock_nonotify(xfs_dquot_t *);
-
-#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
deleted file mode 100644 (file)
index 9e0e2fa..0000000
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
-{
-       return container_of(lip, struct xfs_dq_logitem, qli_item);
-}
-
-/*
- * returns the number of iovecs needed to log the given dquot item.
- */
-STATIC uint
-xfs_qm_dquot_logitem_size(
-       struct xfs_log_item     *lip)
-{
-       /*
-        * we need only two iovecs, one for the format, one for the real thing
-        */
-       return 2;
-}
-
-/*
- * fills in the vector of log iovecs for the given dquot log item.
- */
-STATIC void
-xfs_qm_dquot_logitem_format(
-       struct xfs_log_item     *lip,
-       struct xfs_log_iovec    *logvec)
-{
-       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
-
-       logvec->i_addr = &qlip->qli_format;
-       logvec->i_len  = sizeof(xfs_dq_logformat_t);
-       logvec->i_type = XLOG_REG_TYPE_QFORMAT;
-       logvec++;
-       logvec->i_addr = &qlip->qli_dquot->q_core;
-       logvec->i_len  = sizeof(xfs_disk_dquot_t);
-       logvec->i_type = XLOG_REG_TYPE_DQUOT;
-
-       ASSERT(2 == lip->li_desc->lid_size);
-       qlip->qli_format.qlf_size = 2;
-
-}
-
-/*
- * Increment the pin count of the given dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_pin(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       atomic_inc(&dqp->q_pincount);
-}
-
-/*
- * Decrement the pin count of the given dquot, and wake up
- * anyone in xfs_dqwait_unpin() if the count goes to 0.         The
- * dquot must have been previously pinned with a call to
- * xfs_qm_dquot_logitem_pin().
- */
-STATIC void
-xfs_qm_dquot_logitem_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(atomic_read(&dqp->q_pincount) > 0);
-       if (atomic_dec_and_test(&dqp->q_pincount))
-               wake_up(&dqp->q_pinwait);
-}
-
-/*
- * Given the logitem, this writes the corresponding dquot entry to disk
- * asynchronously. This is called with the dquot entry securely locked;
- * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
- * at the end.
- */
-STATIC void
-xfs_qm_dquot_logitem_push(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-       int                     error;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(!completion_done(&dqp->q_flush));
-
-       /*
-        * Since we were able to lock the dquot's flush lock and
-        * we found it on the AIL, the dquot must be dirty.  This
-        * is because the dquot is removed from the AIL while still
-        * holding the flush lock in xfs_dqflush_done().  Thus, if
-        * we found it in the AIL and were able to obtain the flush
-        * lock without sleeping, then there must not have been
-        * anyone in the process of flushing the dquot.
-        */
-       error = xfs_qm_dqflush(dqp, 0);
-       if (error)
-               xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
-                       __func__, error, dqp);
-       xfs_dqunlock(dqp);
-}
-
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       /*
-        * We always re-log the entire dquot when it becomes dirty,
-        * so, the latest copy _is_ the only one that matters.
-        */
-       return lsn;
-}
-
-/*
- * This is called to wait for the given dquot to be unpinned.
- * Most of these pin/unpin routines are plagiarized from inode code.
- */
-void
-xfs_qm_dqunpin_wait(
-       struct xfs_dquot        *dqp)
-{
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       if (atomic_read(&dqp->q_pincount) == 0)
-               return;
-
-       /*
-        * Give the log a push so we don't wait here too long.
-        */
-       xfs_log_force(dqp->q_mount, 0);
-       wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
-}
-
-/*
- * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
- * the dquot is locked by us, but the flush lock isn't. So, here we are
- * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
- * If so, we want to push it out to help us take this item off the AIL as soon
- * as possible.
- *
- * We must not be holding the AIL lock at this point. Calling incore() to
- * search the buffer cache can be a time consuming thing, and AIL lock is a
- * spinlock.
- */
-STATIC void
-xfs_qm_dquot_logitem_pushbuf(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
-       struct xfs_dquot        *dqp = qlip->qli_dquot;
-       struct xfs_buf          *bp;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       /*
-        * If flushlock isn't locked anymore, chances are that the
-        * inode flush completed and the inode was taken off the AIL.
-        * So, just get out.
-        */
-       if (completion_done(&dqp->q_flush) ||
-           !(lip->li_flags & XFS_LI_IN_AIL)) {
-               xfs_dqunlock(dqp);
-               return;
-       }
-
-       bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
-                       dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
-       xfs_dqunlock(dqp);
-       if (!bp)
-               return;
-       if (XFS_BUF_ISDELAYWRITE(bp))
-               xfs_buf_delwri_promote(bp);
-       xfs_buf_relse(bp);
-}
-
-/*
- * This is called to attempt to lock the dquot associated with this
- * dquot log item.  Don't sleep on the dquot lock or the flush lock.
- * If the flush lock is already held, indicating that the dquot has
- * been or is in the process of being flushed, then see if we can
- * find the dquot's buffer in the buffer cache without sleeping.  If
- * we can and it is marked delayed write, then we want to send it out.
- * We delay doing so until the push routine, though, to avoid sleeping
- * in any device strategy routines.
- */
-STATIC uint
-xfs_qm_dquot_logitem_trylock(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       if (atomic_read(&dqp->q_pincount) > 0)
-               return XFS_ITEM_PINNED;
-
-       if (!xfs_qm_dqlock_nowait(dqp))
-               return XFS_ITEM_LOCKED;
-
-       if (!xfs_dqflock_nowait(dqp)) {
-               /*
-                * dquot has already been flushed to the backing buffer,
-                * leave it locked, pushbuf routine will unlock it.
-                */
-               return XFS_ITEM_PUSHBUF;
-       }
-
-       ASSERT(lip->li_flags & XFS_LI_IN_AIL);
-       return XFS_ITEM_SUCCESS;
-}
-
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction.  If the
- * hold flags is set, do not unlock the dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_unlock(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       /*
-        * Clear the transaction pointer in the dquot
-        */
-       dqp->q_transp = NULL;
-
-       /*
-        * dquots are never 'held' from getting unlocked at the end of
-        * a transaction.  Their locking and unlocking is hidden inside the
-        * transaction layer, within trans_commit. Hence, no LI_HOLD flag
-        * for the logitem.
-        */
-       xfs_dqunlock(dqp);
-}
-
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
-STATIC void
-xfs_qm_dquot_logitem_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector for dquots
- */
-static struct xfs_item_ops xfs_dquot_item_ops = {
-       .iop_size       = xfs_qm_dquot_logitem_size,
-       .iop_format     = xfs_qm_dquot_logitem_format,
-       .iop_pin        = xfs_qm_dquot_logitem_pin,
-       .iop_unpin      = xfs_qm_dquot_logitem_unpin,
-       .iop_trylock    = xfs_qm_dquot_logitem_trylock,
-       .iop_unlock     = xfs_qm_dquot_logitem_unlock,
-       .iop_committed  = xfs_qm_dquot_logitem_committed,
-       .iop_push       = xfs_qm_dquot_logitem_push,
-       .iop_pushbuf    = xfs_qm_dquot_logitem_pushbuf,
-       .iop_committing = xfs_qm_dquot_logitem_committing
-};
-
-/*
- * Initialize the dquot log item for a newly allocated dquot.
- * The dquot isn't locked at this point, but it isn't on any of the lists
- * either, so we don't care.
- */
-void
-xfs_qm_dquot_logitem_init(
-       struct xfs_dquot        *dqp)
-{
-       struct xfs_dq_logitem   *lp = &dqp->q_logitem;
-
-       xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
-                                       &xfs_dquot_item_ops);
-       lp->qli_dquot = dqp;
-       lp->qli_format.qlf_type = XFS_LI_DQUOT;
-       lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
-       lp->qli_format.qlf_blkno = dqp->q_blkno;
-       lp->qli_format.qlf_len = 1;
-       /*
-        * This is just the offset of this dquot within its buffer
-        * (which is currently 1 FSB and probably won't change).
-        * Hence 32 bits for this offset should be just fine.
-        * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
-        * here, and recompute it at recovery time.
-        */
-       lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
-}
-
-/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
-
-static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
-{
-       return container_of(lip, struct xfs_qoff_logitem, qql_item);
-}
-
-
-/*
- * This returns the number of iovecs needed to log the given quotaoff item.
- * We only need 1 iovec for an quotaoff item.  It just logs the
- * quotaoff_log_format structure.
- */
-STATIC uint
-xfs_qm_qoff_logitem_size(
-       struct xfs_log_item     *lip)
-{
-       return 1;
-}
-
-/*
- * This is called to fill in the vector of log iovecs for the
- * given quotaoff log item. We use only 1 iovec, and we point that
- * at the quotaoff_log_format structure embedded in the quotaoff item.
- * It is at this point that we assert that all of the extent
- * slots in the quotaoff item have been filled.
- */
-STATIC void
-xfs_qm_qoff_logitem_format(
-       struct xfs_log_item     *lip,
-       struct xfs_log_iovec    *log_vector)
-{
-       struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
-
-       ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
-
-       log_vector->i_addr = &qflip->qql_format;
-       log_vector->i_len = sizeof(xfs_qoff_logitem_t);
-       log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
-       qflip->qql_format.qf_size = 1;
-}
-
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-}
-
-/*
- * Quotaoff items have no locking, so just return success.
- */
-STATIC uint
-xfs_qm_qoff_logitem_trylock(
-       struct xfs_log_item     *lip)
-{
-       return XFS_ITEM_LOCKED;
-}
-
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * There isn't much you can do to push on an quotaoff item.  It is simply
- * stuck waiting for the log to be flushed to disk.
- */
-STATIC void
-xfs_qm_qoff_logitem_push(
-       struct xfs_log_item     *lip)
-{
-}
-
-
-STATIC xfs_lsn_t
-xfs_qm_qoffend_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
-       struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
-       struct xfs_ail          *ailp = qfs->qql_item.li_ailp;
-
-       /*
-        * Delete the qoff-start logitem from the AIL.
-        * xfs_trans_ail_delete() drops the AIL lock.
-        */
-       spin_lock(&ailp->xa_lock);
-       xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
-
-       kmem_free(qfs);
-       kmem_free(qfe);
-       return (xfs_lsn_t)-1;
-}
-
-/*
- * XXX rcc - don't know quite what to do with this.  I think we can
- * just ignore it.  The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen?  Also, do we need different routines for
- * quotaoff start and quotaoff end?  I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable).  If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               commit_lsn)
-{
-}
-
-static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
-       .iop_size       = xfs_qm_qoff_logitem_size,
-       .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-       .iop_committed  = xfs_qm_qoffend_logitem_committed,
-       .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
-static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
-       .iop_size       = xfs_qm_qoff_logitem_size,
-       .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-       .iop_committed  = xfs_qm_qoff_logitem_committed,
-       .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * Allocate and initialize an quotaoff item of the correct quota type(s).
- */
-struct xfs_qoff_logitem *
-xfs_qm_qoff_logitem_init(
-       struct xfs_mount        *mp,
-       struct xfs_qoff_logitem *start,
-       uint                    flags)
-{
-       struct xfs_qoff_logitem *qf;
-
-       qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
-
-       xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
-                       &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
-       qf->qql_item.li_mountp = mp;
-       qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
-       qf->qql_format.qf_flags = flags;
-       qf->qql_start_lip = start;
-       return qf;
-}
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
deleted file mode 100644 (file)
index 5acae2a..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DQUOT_ITEM_H__
-#define __XFS_DQUOT_ITEM_H__
-
-struct xfs_dquot;
-struct xfs_trans;
-struct xfs_mount;
-struct xfs_qoff_logitem;
-
-typedef struct xfs_dq_logitem {
-       xfs_log_item_t           qli_item;         /* common portion */
-       struct xfs_dquot        *qli_dquot;        /* dquot ptr */
-       xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
-       xfs_dq_logformat_t       qli_format;       /* logged structure */
-} xfs_dq_logitem_t;
-
-typedef struct xfs_qoff_logitem {
-       xfs_log_item_t           qql_item;      /* common portion */
-       struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
-       xfs_qoff_logformat_t     qql_format;    /* logged structure */
-} xfs_qoff_logitem_t;
-
-
-extern void               xfs_qm_dquot_logitem_init(struct xfs_dquot *);
-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
-                                       struct xfs_qoff_logitem *, uint);
-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
-                                       struct xfs_qoff_logitem *, uint);
-extern void               xfs_trans_log_quotaoff_item(struct xfs_trans *,
-                                       struct xfs_qoff_logitem *);
-
-#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
deleted file mode 100644 (file)
index 9a0aa76..0000000
+++ /dev/null
@@ -1,2416 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-/*
- * The global quota manager. There is only one of these for the entire
- * system, _not_ one per file system. XQM keeps track of the overall
- * quota functionality, including maintaining the freelist and hash
- * tables of dquots.
- */
-struct mutex   xfs_Gqm_lock;
-struct xfs_qm  *xfs_Gqm;
-uint           ndquot;
-
-kmem_zone_t    *qm_dqzone;
-kmem_zone_t    *qm_dqtrxzone;
-
-STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
-
-STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
-STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int     xfs_qm_shake(struct shrinker *, struct shrink_control *);
-
-static struct shrinker xfs_qm_shaker = {
-       .shrink = xfs_qm_shake,
-       .seeks = DEFAULT_SEEKS,
-};
-
-/*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
- */
-STATIC struct xfs_qm *
-xfs_Gqm_init(void)
-{
-       xfs_dqhash_t    *udqhash, *gdqhash;
-       xfs_qm_t        *xqm;
-       size_t          hsize;
-       uint            i;
-
-       /*
-        * Initialize the dquot hash tables.
-        */
-       udqhash = kmem_zalloc_greedy(&hsize,
-                                    XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
-                                    XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
-       if (!udqhash)
-               goto out;
-
-       gdqhash = kmem_zalloc_large(hsize);
-       if (!gdqhash)
-               goto out_free_udqhash;
-
-       hsize /= sizeof(xfs_dqhash_t);
-       ndquot = hsize << 8;
-
-       xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-       xqm->qm_dqhashmask = hsize - 1;
-       xqm->qm_usr_dqhtable = udqhash;
-       xqm->qm_grp_dqhtable = gdqhash;
-       ASSERT(xqm->qm_usr_dqhtable != NULL);
-       ASSERT(xqm->qm_grp_dqhtable != NULL);
-
-       for (i = 0; i < hsize; i++) {
-               xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
-               xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
-       }
-
-       /*
-        * Freelist of all dquots of all file systems
-        */
-       INIT_LIST_HEAD(&xqm->qm_dqfrlist);
-       xqm->qm_dqfrlist_cnt = 0;
-       mutex_init(&xqm->qm_dqfrlist_lock);
-
-       /*
-        * dquot zone. we register our own low-memory callback.
-        */
-       if (!qm_dqzone) {
-               xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
-                                               "xfs_dquots");
-               qm_dqzone = xqm->qm_dqzone;
-       } else
-               xqm->qm_dqzone = qm_dqzone;
-
-       register_shrinker(&xfs_qm_shaker);
-
-       /*
-        * The t_dqinfo portion of transactions.
-        */
-       if (!qm_dqtrxzone) {
-               xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
-                                                  "xfs_dqtrx");
-               qm_dqtrxzone = xqm->qm_dqtrxzone;
-       } else
-               xqm->qm_dqtrxzone = qm_dqtrxzone;
-
-       atomic_set(&xqm->qm_totaldquots, 0);
-       xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
-       xqm->qm_nrefs = 0;
-       return xqm;
-
- out_free_udqhash:
-       kmem_free_large(udqhash);
- out:
-       return NULL;
-}
-
-/*
- * Destroy the global quota manager when its reference count goes to zero.
- */
-STATIC void
-xfs_qm_destroy(
-       struct xfs_qm   *xqm)
-{
-       struct xfs_dquot *dqp, *n;
-       int             hsize, i;
-
-       ASSERT(xqm != NULL);
-       ASSERT(xqm->qm_nrefs == 0);
-       unregister_shrinker(&xfs_qm_shaker);
-       hsize = xqm->qm_dqhashmask + 1;
-       for (i = 0; i < hsize; i++) {
-               xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
-               xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
-       }
-       kmem_free_large(xqm->qm_usr_dqhtable);
-       kmem_free_large(xqm->qm_grp_dqhtable);
-       xqm->qm_usr_dqhtable = NULL;
-       xqm->qm_grp_dqhtable = NULL;
-       xqm->qm_dqhashmask = 0;
-
-       /* frlist cleanup */
-       mutex_lock(&xqm->qm_dqfrlist_lock);
-       list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
-               xfs_dqlock(dqp);
-               list_del_init(&dqp->q_freelist);
-               xfs_Gqm->qm_dqfrlist_cnt--;
-               xfs_dqunlock(dqp);
-               xfs_qm_dqdestroy(dqp);
-       }
-       mutex_unlock(&xqm->qm_dqfrlist_lock);
-       mutex_destroy(&xqm->qm_dqfrlist_lock);
-       kmem_free(xqm);
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
-       struct xfs_mount *mp)
-{
-       /*
-        * Need to lock the xfs_Gqm structure for things like this. For example,
-        * the structure could disappear between the entry to this routine and
-        * a HOLD operation if not locked.
-        */
-       mutex_lock(&xfs_Gqm_lock);
-
-       if (!xfs_Gqm) {
-               xfs_Gqm = xfs_Gqm_init();
-               if (!xfs_Gqm) {
-                       mutex_unlock(&xfs_Gqm_lock);
-                       return ENOMEM;
-               }
-       }
-
-       /*
-        * We can keep a list of all filesystems with quotas mounted for
-        * debugging and statistical purposes, but ...
-        * Just take a reference and get out.
-        */
-       xfs_Gqm->qm_nrefs++;
-       mutex_unlock(&xfs_Gqm_lock);
-
-       return 0;
-}
-
-
-/*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
-       struct xfs_mount *mp)
-{
-       xfs_dquot_t     *dqp, *n;
-
-       ASSERT(xfs_Gqm);
-       ASSERT(xfs_Gqm->qm_nrefs > 0);
-
-       /*
-        * Go thru the freelist and destroy all inactive dquots.
-        */
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-               xfs_dqlock(dqp);
-               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                       ASSERT(dqp->q_mount == NULL);
-                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(list_empty(&dqp->q_hashlist));
-                       ASSERT(list_empty(&dqp->q_mplist));
-                       list_del_init(&dqp->q_freelist);
-                       xfs_Gqm->qm_dqfrlist_cnt--;
-                       xfs_dqunlock(dqp);
-                       xfs_qm_dqdestroy(dqp);
-               } else {
-                       xfs_dqunlock(dqp);
-               }
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       /*
-        * Destroy the entire XQM. If somebody mounts with quotaon, this'll
-        * be restarted.
-        */
-       mutex_lock(&xfs_Gqm_lock);
-       if (--xfs_Gqm->qm_nrefs == 0) {
-               xfs_qm_destroy(xfs_Gqm);
-               xfs_Gqm = NULL;
-       }
-       mutex_unlock(&xfs_Gqm_lock);
-}
-
-/*
- * Just destroy the quotainfo structure.
- */
-void
-xfs_qm_unmount(
-       struct xfs_mount        *mp)
-{
-       if (mp->m_quotainfo) {
-               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
-               xfs_qm_destroy_quotainfo(mp);
-       }
-}
-
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo.  This is also responsible for
- * running a quotacheck as necessary.  We are guaranteed that the superblock
- * is consistently read in at this point.
- *
- * If we fail here, the mount will continue with quota turned off. We don't
- * need to inidicate success or failure at all.
- */
-void
-xfs_qm_mount_quotas(
-       xfs_mount_t     *mp)
-{
-       int             error = 0;
-       uint            sbf;
-
-       /*
-        * If quotas on realtime volumes is not supported, we disable
-        * quotas immediately.
-        */
-       if (mp->m_sb.sb_rextents) {
-               xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
-               mp->m_qflags = 0;
-               goto write_changes;
-       }
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * Allocate the quotainfo structure inside the mount struct, and
-        * create quotainode(s), and change/rev superblock if necessary.
-        */
-       error = xfs_qm_init_quotainfo(mp);
-       if (error) {
-               /*
-                * We must turn off quotas.
-                */
-               ASSERT(mp->m_quotainfo == NULL);
-               mp->m_qflags = 0;
-               goto write_changes;
-       }
-       /*
-        * If any of the quotas are not consistent, do a quotacheck.
-        */
-       if (XFS_QM_NEED_QUOTACHECK(mp)) {
-               error = xfs_qm_quotacheck(mp);
-               if (error) {
-                       /* Quotacheck failed and disabled quotas. */
-                       return;
-               }
-       }
-       /* 
-        * If one type of quotas is off, then it will lose its
-        * quotachecked status, since we won't be doing accounting for
-        * that type anymore.
-        */
-       if (!XFS_IS_UQUOTA_ON(mp))
-               mp->m_qflags &= ~XFS_UQUOTA_CHKD;
-       if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
-               mp->m_qflags &= ~XFS_OQUOTA_CHKD;
-
- write_changes:
-       /*
-        * We actually don't have to acquire the m_sb_lock at all.
-        * This can only be called from mount, and that's single threaded. XXX
-        */
-       spin_lock(&mp->m_sb_lock);
-       sbf = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
-       spin_unlock(&mp->m_sb_lock);
-
-       if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
-               if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
-                       /*
-                        * We could only have been turning quotas off.
-                        * We aren't in very good shape actually because
-                        * the incore structures are convinced that quotas are
-                        * off, but the on disk superblock doesn't know that !
-                        */
-                       ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
-                       xfs_alert(mp, "%s: Superblock update failed!",
-                               __func__);
-               }
-       }
-
-       if (error) {
-               xfs_warn(mp, "Failed to initialize disk quotas.");
-               return;
-       }
-}
-
-/*
- * Called from the vfsops layer.
- */
-void
-xfs_qm_unmount_quotas(
-       xfs_mount_t     *mp)
-{
-       /*
-        * Release the dquots that root inode, et al might be holding,
-        * before we flush quotas and blow away the quotainfo structure.
-        */
-       ASSERT(mp->m_rootip);
-       xfs_qm_dqdetach(mp->m_rootip);
-       if (mp->m_rbmip)
-               xfs_qm_dqdetach(mp->m_rbmip);
-       if (mp->m_rsumip)
-               xfs_qm_dqdetach(mp->m_rsumip);
-
-       /*
-        * Release the quota inodes.
-        */
-       if (mp->m_quotainfo) {
-               if (mp->m_quotainfo->qi_uquotaip) {
-                       IRELE(mp->m_quotainfo->qi_uquotaip);
-                       mp->m_quotainfo->qi_uquotaip = NULL;
-               }
-               if (mp->m_quotainfo->qi_gquotaip) {
-                       IRELE(mp->m_quotainfo->qi_gquotaip);
-                       mp->m_quotainfo->qi_gquotaip = NULL;
-               }
-       }
-}
-
-/*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
- */
-STATIC int
-xfs_qm_dqflush_all(
-       struct xfs_mount        *mp,
-       int                     sync_mode)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       int                     recl;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       if (!q)
-               return 0;
-again:
-       mutex_lock(&q->qi_dqlist_lock);
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if (! XFS_DQ_IS_DIRTY(dqp)) {
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               /* XXX a sentinel would be better */
-               recl = q->qi_dqreclaims;
-               if (!xfs_dqflock_nowait(dqp)) {
-                       /*
-                        * If we can't grab the flush lock then check
-                        * to see if the dquot has been flushed delayed
-                        * write.  If so, grab its buffer and send it
-                        * out immediately.  We'll be able to acquire
-                        * the flush lock when the I/O completes.
-                        */
-                       xfs_qm_dqflock_pushbuf_wait(dqp);
-               }
-               /*
-                * Let go of the mplist lock. We don't want to hold it
-                * across a disk write.
-                */
-               mutex_unlock(&q->qi_dqlist_lock);
-               error = xfs_qm_dqflush(dqp, sync_mode);
-               xfs_dqunlock(dqp);
-               if (error)
-                       return error;
-
-               mutex_lock(&q->qi_dqlist_lock);
-               if (recl != q->qi_dqreclaims) {
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       /* XXX restart limit */
-                       goto again;
-               }
-       }
-
-       mutex_unlock(&q->qi_dqlist_lock);
-       /* return ! busy */
-       return 0;
-}
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
-       struct xfs_mount        *mp)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_dquot        *dqp, *gdqp;
-       int                     nrecl;
-
- again:
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if ((gdqp = dqp->q_gdquot)) {
-                       xfs_dqlock(gdqp);
-                       dqp->q_gdquot = NULL;
-               }
-               xfs_dqunlock(dqp);
-
-               if (gdqp) {
-                       /*
-                        * Can't hold the mplist lock across a dqput.
-                        * XXXmust convert to marker based iterations here.
-                        */
-                       nrecl = q->qi_dqreclaims;
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       xfs_qm_dqput(gdqp);
-
-                       mutex_lock(&q->qi_dqlist_lock);
-                       if (nrecl != q->qi_dqreclaims)
-                               goto again;
-               }
-       }
-}
-
-/*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
- */
-STATIC int
-xfs_qm_dqpurge_int(
-       struct xfs_mount        *mp,
-       uint                    flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_dquot        *dqp, *n;
-       uint                    dqtype;
-       int                     nrecl;
-       int                     nmisses;
-
-       if (!q)
-               return 0;
-
-       dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
-       dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
-       dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
-       mutex_lock(&q->qi_dqlist_lock);
-
-       /*
-        * In the first pass through all incore dquots of this filesystem,
-        * we release the group dquot pointers the user dquots may be
-        * carrying around as a hint. We need to do this irrespective of
-        * what's being turned off.
-        */
-       xfs_qm_detach_gdquots(mp);
-
-      again:
-       nmisses = 0;
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       /*
-        * Try to get rid of all of the unwanted dquots. The idea is to
-        * get them off mplist and hashlist, but leave them on freelist.
-        */
-       list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
-               /*
-                * It's OK to look at the type without taking dqlock here.
-                * We're holding the mplist lock here, and that's needed for
-                * a dqreclaim.
-                */
-               if ((dqp->dq_flags & dqtype) == 0)
-                       continue;
-
-               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       nrecl = q->qi_dqreclaims;
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       mutex_lock(&dqp->q_hash->qh_lock);
-                       mutex_lock(&q->qi_dqlist_lock);
-
-                       /*
-                        * XXXTheoretically, we can get into a very long
-                        * ping pong game here.
-                        * No one can be adding dquots to the mplist at
-                        * this point, but somebody might be taking things off.
-                        */
-                       if (nrecl != q->qi_dqreclaims) {
-                               mutex_unlock(&dqp->q_hash->qh_lock);
-                               goto again;
-                       }
-               }
-
-               /*
-                * Take the dquot off the mplist and hashlist. It may remain on
-                * freelist in INACTIVE state.
-                */
-               nmisses += xfs_qm_dqpurge(dqp);
-       }
-       mutex_unlock(&q->qi_dqlist_lock);
-       return nmisses;
-}
-
-int
-xfs_qm_dqpurge_all(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             ndquots;
-
-       /*
-        * Purge the dquot cache.
-        * None of the dquots should really be busy at this point.
-        */
-       if (mp->m_quotainfo) {
-               while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
-                       delay(ndquots * 10);
-               }
-       }
-       return 0;
-}
-
-STATIC int
-xfs_qm_dqattach_one(
-       xfs_inode_t     *ip,
-       xfs_dqid_t      id,
-       uint            type,
-       uint            doalloc,
-       xfs_dquot_t     *udqhint, /* hint */
-       xfs_dquot_t     **IO_idqpp)
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       error = 0;
-
-       /*
-        * See if we already have it in the inode itself. IO_idqpp is
-        * &i_udquot or &i_gdquot. This made the code look weird, but
-        * made the logic a lot simpler.
-        */
-       dqp = *IO_idqpp;
-       if (dqp) {
-               trace_xfs_dqattach_found(dqp);
-               return 0;
-       }
-
-       /*
-        * udqhint is the i_udquot field in inode, and is non-NULL only
-        * when the type arg is group/project. Its purpose is to save a
-        * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
-        * the user dquot.
-        */
-       if (udqhint) {
-               ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
-               xfs_dqlock(udqhint);
-
-               /*
-                * No need to take dqlock to look at the id.
-                *
-                * The ID can't change until it gets reclaimed, and it won't
-                * be reclaimed as long as we have a ref from inode and we
-                * hold the ilock.
-                */
-               dqp = udqhint->q_gdquot;
-               if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
-                       xfs_dqlock(dqp);
-                       XFS_DQHOLD(dqp);
-                       ASSERT(*IO_idqpp == NULL);
-                       *IO_idqpp = dqp;
-
-                       xfs_dqunlock(dqp);
-                       xfs_dqunlock(udqhint);
-                       return 0;
-               }
-
-               /*
-                * We can't hold a dquot lock when we call the dqget code.
-                * We'll deadlock in no time, because of (not conforming to)
-                * lock ordering - the inodelock comes before any dquot lock,
-                * and we may drop and reacquire the ilock in xfs_qm_dqget().
-                */
-               xfs_dqunlock(udqhint);
-       }
-
-       /*
-        * Find the dquot from somewhere. This bumps the
-        * reference count of dquot and returns it locked.
-        * This can return ENOENT if dquot didn't exist on
-        * disk and we didn't ask it to allocate;
-        * ESRCH if quotas got turned off suddenly.
-        */
-       error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
-       if (error)
-               return error;
-
-       trace_xfs_dqattach_get(dqp);
-
-       /*
-        * dqget may have dropped and re-acquired the ilock, but it guarantees
-        * that the dquot returned is the one that should go in the inode.
-        */
-       *IO_idqpp = dqp;
-       xfs_dqunlock(dqp);
-       return 0;
-}
-
-
-/*
- * Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering constraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
- */
-STATIC void
-xfs_qm_dqattach_grouphint(
-       xfs_dquot_t     *udq,
-       xfs_dquot_t     *gdq)
-{
-       xfs_dquot_t     *tmp;
-
-       xfs_dqlock(udq);
-
-       if ((tmp = udq->q_gdquot)) {
-               if (tmp == gdq) {
-                       xfs_dqunlock(udq);
-                       return;
-               }
-
-               udq->q_gdquot = NULL;
-               /*
-                * We can't keep any dqlocks when calling dqrele,
-                * because the freelist lock comes before dqlocks.
-                */
-               xfs_dqunlock(udq);
-               /*
-                * we took a hard reference once upon a time in dqget,
-                * so give it back when the udquot no longer points at it
-                * dqput() does the unlocking of the dquot.
-                */
-               xfs_qm_dqrele(tmp);
-
-               xfs_dqlock(udq);
-               xfs_dqlock(gdq);
-
-       } else {
-               ASSERT(XFS_DQ_IS_LOCKED(udq));
-               xfs_dqlock(gdq);
-       }
-
-       ASSERT(XFS_DQ_IS_LOCKED(udq));
-       ASSERT(XFS_DQ_IS_LOCKED(gdq));
-       /*
-        * Somebody could have attached a gdquot here,
-        * when we dropped the uqlock. If so, just do nothing.
-        */
-       if (udq->q_gdquot == NULL) {
-               XFS_DQHOLD(gdq);
-               udq->q_gdquot = gdq;
-       }
-
-       xfs_dqunlock(gdq);
-       xfs_dqunlock(udq);
-}
-
-
-/*
- * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
- * into account.
- * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * Inode may get unlocked and relocked in here, and the caller must deal with
- * the consequences.
- */
-int
-xfs_qm_dqattach_locked(
-       xfs_inode_t     *ip,
-       uint            flags)
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       uint            nquotas = 0;
-       int             error = 0;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) ||
-           !XFS_IS_QUOTA_ON(mp) ||
-           !XFS_NOT_DQATTACHED(mp, ip) ||
-           ip->i_ino == mp->m_sb.sb_uquotino ||
-           ip->i_ino == mp->m_sb.sb_gquotino)
-               return 0;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       if (XFS_IS_UQUOTA_ON(mp)) {
-               error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               NULL, &ip->i_udquot);
-               if (error)
-                       goto done;
-               nquotas++;
-       }
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       if (XFS_IS_OQUOTA_ON(mp)) {
-               error = XFS_IS_GQUOTA_ON(mp) ?
-                       xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               ip->i_udquot, &ip->i_gdquot) :
-                       xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               ip->i_udquot, &ip->i_gdquot);
-               /*
-                * Don't worry about the udquot that we may have
-                * attached above. It'll get detached, if not already.
-                */
-               if (error)
-                       goto done;
-               nquotas++;
-       }
-
-       /*
-        * Attach this group quota to the user quota as a hint.
-        * This WON'T, in general, result in a thrash.
-        */
-       if (nquotas == 2) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               ASSERT(ip->i_udquot);
-               ASSERT(ip->i_gdquot);
-
-               /*
-                * We may or may not have the i_udquot locked at this point,
-                * but this check is OK since we don't depend on the i_gdquot to
-                * be accurate 100% all the time. It is just a hint, and this
-                * will succeed in general.
-                */
-               if (ip->i_udquot->q_gdquot == ip->i_gdquot)
-                       goto done;
-               /*
-                * Attach i_gdquot to the gdquot hint inside the i_udquot.
-                */
-               xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
-       }
-
- done:
-#ifdef DEBUG
-       if (!error) {
-               if (XFS_IS_UQUOTA_ON(mp))
-                       ASSERT(ip->i_udquot);
-               if (XFS_IS_OQUOTA_ON(mp))
-                       ASSERT(ip->i_gdquot);
-       }
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
-       return error;
-}
-
-int
-xfs_qm_dqattach(
-       struct xfs_inode        *ip,
-       uint                    flags)
-{
-       int                     error;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       error = xfs_qm_dqattach_locked(ip, flags);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       return error;
-}
-
-/*
- * Release dquots (and their references) if any.
- * The inode should be locked EXCL except when this's called by
- * xfs_ireclaim.
- */
-void
-xfs_qm_dqdetach(
-       xfs_inode_t     *ip)
-{
-       if (!(ip->i_udquot || ip->i_gdquot))
-               return;
-
-       trace_xfs_dquot_dqdetach(ip);
-
-       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
-       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
-       if (ip->i_udquot) {
-               xfs_qm_dqrele(ip->i_udquot);
-               ip->i_udquot = NULL;
-       }
-       if (ip->i_gdquot) {
-               xfs_qm_dqrele(ip->i_gdquot);
-               ip->i_gdquot = NULL;
-       }
-}
-
-int
-xfs_qm_sync(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       int                     recl, restarts;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       restarts = 0;
-
-  again:
-       mutex_lock(&q->qi_dqlist_lock);
-       /*
-        * dqpurge_all() also takes the mplist lock and iterate thru all dquots
-        * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
-        * when we have the mplist lock, we know that dquots will be consistent
-        * as long as we have it locked.
-        */
-       if (!XFS_IS_QUOTA_ON(mp)) {
-               mutex_unlock(&q->qi_dqlist_lock);
-               return 0;
-       }
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               /*
-                * If this is vfs_sync calling, then skip the dquots that
-                * don't 'seem' to be dirty. ie. don't acquire dqlock.
-                * This is very similar to what xfs_sync does with inodes.
-                */
-               if (flags & SYNC_TRYLOCK) {
-                       if (!XFS_DQ_IS_DIRTY(dqp))
-                               continue;
-                       if (!xfs_qm_dqlock_nowait(dqp))
-                               continue;
-               } else {
-                       xfs_dqlock(dqp);
-               }
-
-               /*
-                * Now, find out for sure if this dquot is dirty or not.
-                */
-               if (! XFS_DQ_IS_DIRTY(dqp)) {
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               /* XXX a sentinel would be better */
-               recl = q->qi_dqreclaims;
-               if (!xfs_dqflock_nowait(dqp)) {
-                       if (flags & SYNC_TRYLOCK) {
-                               xfs_dqunlock(dqp);
-                               continue;
-                       }
-                       /*
-                        * If we can't grab the flush lock then if the caller
-                        * really wanted us to give this our best shot, so
-                        * see if we can give a push to the buffer before we wait
-                        * on the flush lock. At this point, we know that
-                        * even though the dquot is being flushed,
-                        * it has (new) dirty data.
-                        */
-                       xfs_qm_dqflock_pushbuf_wait(dqp);
-               }
-               /*
-                * Let go of the mplist lock. We don't want to hold it
-                * across a disk write
-                */
-               mutex_unlock(&q->qi_dqlist_lock);
-               error = xfs_qm_dqflush(dqp, flags);
-               xfs_dqunlock(dqp);
-               if (error && XFS_FORCED_SHUTDOWN(mp))
-                       return 0;       /* Need to prevent umount failure */
-               else if (error)
-                       return error;
-
-               mutex_lock(&q->qi_dqlist_lock);
-               if (recl != q->qi_dqreclaims) {
-                       if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
-                               break;
-
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       goto again;
-               }
-       }
-
-       mutex_unlock(&q->qi_dqlist_lock);
-       return 0;
-}
-
-/*
- * The hash chains and the mplist use the same xfs_dqhash structure as
- * their list head, but we can take the mplist qh_lock and one of the
- * hash qh_locks at the same time without any problem as they aren't
- * related.
- */
-static struct lock_class_key xfs_quota_mplist_class;
-
-/*
- * This initializes all the quota information that's kept in the
- * mount structure
- */
-STATIC int
-xfs_qm_init_quotainfo(
-       xfs_mount_t     *mp)
-{
-       xfs_quotainfo_t *qinf;
-       int             error;
-       xfs_dquot_t     *dqp;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * Tell XQM that we exist as soon as possible.
-        */
-       if ((error = xfs_qm_hold_quotafs_ref(mp))) {
-               return error;
-       }
-
-       qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
-
-       /*
-        * See if quotainodes are setup, and if not, allocate them,
-        * and change the superblock accordingly.
-        */
-       if ((error = xfs_qm_init_quotainos(mp))) {
-               kmem_free(qinf);
-               mp->m_quotainfo = NULL;
-               return error;
-       }
-
-       INIT_LIST_HEAD(&qinf->qi_dqlist);
-       mutex_init(&qinf->qi_dqlist_lock);
-       lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
-
-       qinf->qi_dqreclaims = 0;
-
-       /* mutex used to serialize quotaoffs */
-       mutex_init(&qinf->qi_quotaofflock);
-
-       /* Precalc some constants */
-       qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-       ASSERT(qinf->qi_dqchunklen);
-       qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
-       do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
-
-       mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
-
-       /*
-        * We try to get the limits from the superuser's limits fields.
-        * This is quite hacky, but it is standard quota practice.
-        * We look at the USR dquot with id == 0 first, but if user quotas
-        * are not enabled we goto the GRP dquot with id == 0.
-        * We don't really care to keep separate default limits for user
-        * and group quotas, at least not at this point.
-        */
-       error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
-                            XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
-                            (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
-                               XFS_DQ_PROJ),
-                            XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
-                            &dqp);
-       if (! error) {
-               xfs_disk_dquot_t        *ddqp = &dqp->q_core;
-
-               /*
-                * The warnings and timers set the grace period given to
-                * a user or group before he or she can not perform any
-                * more writing. If it is zero, a default is used.
-                */
-               qinf->qi_btimelimit = ddqp->d_btimer ?
-                       be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
-               qinf->qi_itimelimit = ddqp->d_itimer ?
-                       be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
-               qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
-                       be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
-               qinf->qi_bwarnlimit = ddqp->d_bwarns ?
-                       be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
-               qinf->qi_iwarnlimit = ddqp->d_iwarns ?
-                       be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
-               qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
-                       be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
-               qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
-               qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
-               qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
-               qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
-               qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
-               qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-               /*
-                * We sent the XFS_QMOPT_DQSUSER flag to dqget because
-                * we don't want this dquot cached. We haven't done a
-                * quotacheck yet, and quotacheck doesn't like incore dquots.
-                */
-               xfs_qm_dqdestroy(dqp);
-       } else {
-               qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
-               qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
-               qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
-               qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
-               qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
-               qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
-       }
-
-       return 0;
-}
-
-
-/*
- * Gets called when unmounting a filesystem or when all quotas get
- * turned off.
- * This purges the quota inodes, destroys locks and frees itself.
- */
-void
-xfs_qm_destroy_quotainfo(
-       xfs_mount_t     *mp)
-{
-       xfs_quotainfo_t *qi;
-
-       qi = mp->m_quotainfo;
-       ASSERT(qi != NULL);
-       ASSERT(xfs_Gqm != NULL);
-
-       /*
-        * Release the reference that XQM kept, so that we know
-        * when the XQM structure should be freed. We cannot assume
-        * that xfs_Gqm is non-null after this point.
-        */
-       xfs_qm_rele_quotafs_ref(mp);
-
-       ASSERT(list_empty(&qi->qi_dqlist));
-       mutex_destroy(&qi->qi_dqlist_lock);
-
-       if (qi->qi_uquotaip) {
-               IRELE(qi->qi_uquotaip);
-               qi->qi_uquotaip = NULL; /* paranoia */
-       }
-       if (qi->qi_gquotaip) {
-               IRELE(qi->qi_gquotaip);
-               qi->qi_gquotaip = NULL;
-       }
-       mutex_destroy(&qi->qi_quotaofflock);
-       kmem_free(qi);
-       mp->m_quotainfo = NULL;
-}
-
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
-       xfs_dqlist_t    *list,
-       char            *str,
-       int             n)
-{
-       mutex_init(&list->qh_lock);
-       INIT_LIST_HEAD(&list->qh_list);
-       list->qh_version = 0;
-       list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
-       xfs_dqlist_t    *list)
-{
-       mutex_destroy(&(list->qh_lock));
-}
-
-/*
- * Create an inode and return with a reference already taken, but unlocked
- * This is how we create quota inodes
- */
-STATIC int
-xfs_qm_qino_alloc(
-       xfs_mount_t     *mp,
-       xfs_inode_t     **ip,
-       __int64_t       sbfields,
-       uint            flags)
-{
-       xfs_trans_t     *tp;
-       int             error;
-       int             committed;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
-       if ((error = xfs_trans_reserve(tp,
-                                     XFS_QM_QINOCREATE_SPACE_RES(mp),
-                                     XFS_CREATE_LOG_RES(mp), 0,
-                                     XFS_TRANS_PERM_LOG_RES,
-                                     XFS_CREATE_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
-       if (error) {
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                XFS_TRANS_ABORT);
-               return error;
-       }
-
-       /*
-        * Make the changes in the superblock, and log those too.
-        * sbfields arg may contain fields other than *QUOTINO;
-        * VERSIONNUM for example.
-        */
-       spin_lock(&mp->m_sb_lock);
-       if (flags & XFS_QMOPT_SBVERSION) {
-               ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
-               ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                                  XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
-                      (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                       XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
-
-               xfs_sb_version_addquota(&mp->m_sb);
-               mp->m_sb.sb_uquotino = NULLFSINO;
-               mp->m_sb.sb_gquotino = NULLFSINO;
-
-               /* qflags will get updated _after_ quotacheck */
-               mp->m_sb.sb_qflags = 0;
-       }
-       if (flags & XFS_QMOPT_UQUOTA)
-               mp->m_sb.sb_uquotino = (*ip)->i_ino;
-       else
-               mp->m_sb.sb_gquotino = (*ip)->i_ino;
-       spin_unlock(&mp->m_sb_lock);
-       xfs_mod_sb(tp, sbfields);
-
-       if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
-               xfs_alert(mp, "%s failed (error %d)!", __func__, error);
-               return error;
-       }
-       return 0;
-}
-
-
-STATIC void
-xfs_qm_reset_dqcounts(
-       xfs_mount_t     *mp,
-       xfs_buf_t       *bp,
-       xfs_dqid_t      id,
-       uint            type)
-{
-       xfs_disk_dquot_t        *ddq;
-       int                     j;
-
-       trace_xfs_reset_dqcounts(bp, _RET_IP_);
-
-       /*
-        * Reset all counters and timers. They'll be
-        * started afresh by xfs_qm_quotacheck.
-        */
-#ifdef DEBUG
-       j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-       do_div(j, sizeof(xfs_dqblk_t));
-       ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
-#endif
-       ddq = bp->b_addr;
-       for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
-               /*
-                * Do a sanity check, and if needed, repair the dqblk. Don't
-                * output any warnings because it's perfectly possible to
-                * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
-                */
-               (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
-                                     "xfs_quotacheck");
-               ddq->d_bcount = 0;
-               ddq->d_icount = 0;
-               ddq->d_rtbcount = 0;
-               ddq->d_btimer = 0;
-               ddq->d_itimer = 0;
-               ddq->d_rtbtimer = 0;
-               ddq->d_bwarns = 0;
-               ddq->d_iwarns = 0;
-               ddq->d_rtbwarns = 0;
-               ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
-       }
-}
-
-STATIC int
-xfs_qm_dqiter_bufs(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      firstid,
-       xfs_fsblock_t   bno,
-       xfs_filblks_t   blkcnt,
-       uint            flags)
-{
-       xfs_buf_t       *bp;
-       int             error;
-       int             type;
-
-       ASSERT(blkcnt > 0);
-       type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
-               (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
-       error = 0;
-
-       /*
-        * Blkcnt arg can be a very big number, and might even be
-        * larger than the log itself. So, we have to break it up into
-        * manageable-sized transactions.
-        * Note that we don't start a permanent transaction here; we might
-        * not be able to get a log reservation for the whole thing up front,
-        * and we don't really care to either, because we just discard
-        * everything if we were to crash in the middle of this loop.
-        */
-       while (blkcnt--) {
-               error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-                             XFS_FSB_TO_DADDR(mp, bno),
-                             mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-               if (error)
-                       break;
-
-               xfs_qm_reset_dqcounts(mp, bp, firstid, type);
-               xfs_bdwrite(mp, bp);
-               /*
-                * goto the next block.
-                */
-               bno++;
-               firstid += mp->m_quotainfo->qi_dqperchunk;
-       }
-       return error;
-}
-
-/*
- * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
- * caller supplied function for every chunk of dquots that we find.
- */
-STATIC int
-xfs_qm_dqiterate(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *qip,
-       uint            flags)
-{
-       xfs_bmbt_irec_t         *map;
-       int                     i, nmaps;       /* number of map entries */
-       int                     error;          /* return value */
-       xfs_fileoff_t           lblkno;
-       xfs_filblks_t           maxlblkcnt;
-       xfs_dqid_t              firstid;
-       xfs_fsblock_t           rablkno;
-       xfs_filblks_t           rablkcnt;
-
-       error = 0;
-       /*
-        * This looks racy, but we can't keep an inode lock across a
-        * trans_reserve. But, this gets called during quotacheck, and that
-        * happens only at mount time which is single threaded.
-        */
-       if (qip->i_d.di_nblocks == 0)
-               return 0;
-
-       map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
-
-       lblkno = 0;
-       maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-       do {
-               nmaps = XFS_DQITER_MAP_SIZE;
-               /*
-                * We aren't changing the inode itself. Just changing
-                * some of its data. No new blocks are added here, and
-                * the inode is never added to the transaction.
-                */
-               xfs_ilock(qip, XFS_ILOCK_SHARED);
-               error = xfs_bmapi(NULL, qip, lblkno,
-                                 maxlblkcnt - lblkno,
-                                 XFS_BMAPI_METADATA,
-                                 NULL,
-                                 0, map, &nmaps, NULL);
-               xfs_iunlock(qip, XFS_ILOCK_SHARED);
-               if (error)
-                       break;
-
-               ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
-               for (i = 0; i < nmaps; i++) {
-                       ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
-                       ASSERT(map[i].br_blockcount);
-
-
-                       lblkno += map[i].br_blockcount;
-
-                       if (map[i].br_startblock == HOLESTARTBLOCK)
-                               continue;
-
-                       firstid = (xfs_dqid_t) map[i].br_startoff *
-                               mp->m_quotainfo->qi_dqperchunk;
-                       /*
-                        * Do a read-ahead on the next extent.
-                        */
-                       if ((i+1 < nmaps) &&
-                           (map[i+1].br_startblock != HOLESTARTBLOCK)) {
-                               rablkcnt =  map[i+1].br_blockcount;
-                               rablkno = map[i+1].br_startblock;
-                               while (rablkcnt--) {
-                                       xfs_buf_readahead(mp->m_ddev_targp,
-                                              XFS_FSB_TO_DADDR(mp, rablkno),
-                                              mp->m_quotainfo->qi_dqchunklen);
-                                       rablkno++;
-                               }
-                       }
-                       /*
-                        * Iterate thru all the blks in the extent and
-                        * reset the counters of all the dquots inside them.
-                        */
-                       if ((error = xfs_qm_dqiter_bufs(mp,
-                                                      firstid,
-                                                      map[i].br_startblock,
-                                                      map[i].br_blockcount,
-                                                      flags))) {
-                               break;
-                       }
-               }
-
-               if (error)
-                       break;
-       } while (nmaps > 0);
-
-       kmem_free(map);
-
-       return error;
-}
-
-/*
- * Called by dqusage_adjust in doing a quotacheck.
- *
- * Given the inode, and a dquot id this updates both the incore dqout as well
- * as the buffer copy. This is so that once the quotacheck is done, we can
- * just log all the buffers, as opposed to logging numerous updates to
- * individual dquots.
- */
-STATIC int
-xfs_qm_quotacheck_dqadjust(
-       struct xfs_inode        *ip,
-       xfs_dqid_t              id,
-       uint                    type,
-       xfs_qcnt_t              nblks,
-       xfs_qcnt_t              rtblks)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       error = xfs_qm_dqget(mp, ip, id, type,
-                            XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
-       if (error) {
-               /*
-                * Shouldn't be able to turn off quotas here.
-                */
-               ASSERT(error != ESRCH);
-               ASSERT(error != ENOENT);
-               return error;
-       }
-
-       trace_xfs_dqadjust(dqp);
-
-       /*
-        * Adjust the inode count and the block count to reflect this inode's
-        * resource usage.
-        */
-       be64_add_cpu(&dqp->q_core.d_icount, 1);
-       dqp->q_res_icount++;
-       if (nblks) {
-               be64_add_cpu(&dqp->q_core.d_bcount, nblks);
-               dqp->q_res_bcount += nblks;
-       }
-       if (rtblks) {
-               be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
-               dqp->q_res_rtbcount += rtblks;
-       }
-
-       /*
-        * Set default limits, adjust timers (since we changed usages)
-        *
-        * There are no timers for the default values set in the root dquot.
-        */
-       if (dqp->q_core.d_id) {
-               xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
-               xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
-       }
-
-       dqp->dq_flags |= XFS_DQ_DIRTY;
-       xfs_qm_dqput(dqp);
-       return 0;
-}
-
-STATIC int
-xfs_qm_get_rtblks(
-       xfs_inode_t     *ip,
-       xfs_qcnt_t      *O_rtblks)
-{
-       xfs_filblks_t   rtblks;                 /* total rt blks */
-       xfs_extnum_t    idx;                    /* extent record index */
-       xfs_ifork_t     *ifp;                   /* inode fork pointer */
-       xfs_extnum_t    nextents;               /* number of extent entries */
-       int             error;
-
-       ASSERT(XFS_IS_REALTIME_INODE(ip));
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-               if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
-                       return error;
-       }
-       rtblks = 0;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       for (idx = 0; idx < nextents; idx++)
-               rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
-       *O_rtblks = (xfs_qcnt_t)rtblks;
-       return 0;
-}
-
-/*
- * callback routine supplied to bulkstat(). Given an inumber, find its
- * dquots and update them to account for resources taken by that inode.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqusage_adjust(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* not used */
-       int             ubsize,         /* not used */
-       int             *ubused,        /* not used */
-       int             *res)           /* result code value */
-{
-       xfs_inode_t     *ip;
-       xfs_qcnt_t      nblks, rtblks = 0;
-       int             error;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * rootino must have its resources accounted for, not so with the quota
-        * inodes.
-        */
-       if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
-               *res = BULKSTAT_RV_NOTHING;
-               return XFS_ERROR(EINVAL);
-       }
-
-       /*
-        * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
-        * interface expects the inode to be exclusively locked because that's
-        * the case in all other instances. It's OK that we do this because
-        * quotacheck is done only at mount time.
-        */
-       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
-       if (error) {
-               *res = BULKSTAT_RV_NOTHING;
-               return error;
-       }
-
-       ASSERT(ip->i_delayed_blks == 0);
-
-       if (XFS_IS_REALTIME_INODE(ip)) {
-               /*
-                * Walk thru the extent list and count the realtime blocks.
-                */
-               error = xfs_qm_get_rtblks(ip, &rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
-
-       /*
-        * Add the (disk blocks and inode) resources occupied by this
-        * inode to its dquots. We do this adjustment in the incore dquot,
-        * and also copy the changes to its buffer.
-        * We don't care about putting these changes in a transaction
-        * envelope because if we crash in the middle of a 'quotacheck'
-        * we have to start from the beginning anyway.
-        * Once we're done, we'll log all the dquot bufs.
-        *
-        * The *QUOTA_ON checks below may look pretty racy, but quotachecks
-        * and quotaoffs don't race. (Quotachecks happen at mount time only).
-        */
-       if (XFS_IS_UQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
-                                                  XFS_DQ_USER, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       if (XFS_IS_GQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
-                                                  XFS_DQ_GROUP, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       if (XFS_IS_PQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
-                                                  XFS_DQ_PROJ, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       IRELE(ip);
-       *res = BULKSTAT_RV_DIDONE;
-       return 0;
-
-error0:
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       IRELE(ip);
-       *res = BULKSTAT_RV_GIVEUP;
-       return error;
-}
-
-/*
- * Walk thru all the filesystem inodes and construct a consistent view
- * of the disk quota world. If the quotacheck fails, disable quotas.
- */
-int
-xfs_qm_quotacheck(
-       xfs_mount_t     *mp)
-{
-       int             done, count, error;
-       xfs_ino_t       lastino;
-       size_t          structsz;
-       xfs_inode_t     *uip, *gip;
-       uint            flags;
-
-       count = INT_MAX;
-       structsz = 1;
-       lastino = 0;
-       flags = 0;
-
-       ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * There should be no cached dquots. The (simplistic) quotacheck
-        * algorithm doesn't like that.
-        */
-       ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
-
-       xfs_notice(mp, "Quotacheck needed: Please wait.");
-
-       /*
-        * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
-        * their counters to zero. We need a clean slate.
-        * We don't log our changes till later.
-        */
-       uip = mp->m_quotainfo->qi_uquotaip;
-       if (uip) {
-               error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
-               if (error)
-                       goto error_return;
-               flags |= XFS_UQUOTA_CHKD;
-       }
-
-       gip = mp->m_quotainfo->qi_gquotaip;
-       if (gip) {
-               error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
-                                       XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
-               if (error)
-                       goto error_return;
-               flags |= XFS_OQUOTA_CHKD;
-       }
-
-       do {
-               /*
-                * Iterate thru all the inodes in the file system,
-                * adjusting the corresponding dquot counters in core.
-                */
-               error = xfs_bulkstat(mp, &lastino, &count,
-                                    xfs_qm_dqusage_adjust,
-                                    structsz, NULL, &done);
-               if (error)
-                       break;
-
-       } while (!done);
-
-       /*
-        * We've made all the changes that we need to make incore.
-        * Flush them down to disk buffers if everything was updated
-        * successfully.
-        */
-       if (!error)
-               error = xfs_qm_dqflush_all(mp, 0);
-
-       /*
-        * We can get this error if we couldn't do a dquot allocation inside
-        * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
-        * dirty dquots that might be cached, we just want to get rid of them
-        * and turn quotaoff. The dquots won't be attached to any of the inodes
-        * at this point (because we intentionally didn't in dqget_noattach).
-        */
-       if (error) {
-               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
-               goto error_return;
-       }
-
-       /*
-        * We didn't log anything, because if we crashed, we'll have to
-        * start the quotacheck from scratch anyway. However, we must make
-        * sure that our dquot changes are secure before we put the
-        * quotacheck'd stamp on the superblock. So, here we do a synchronous
-        * flush.
-        */
-       XFS_bflush(mp->m_ddev_targp);
-
-       /*
-        * If one type of quotas is off, then it will lose its
-        * quotachecked status, since we won't be doing accounting for
-        * that type anymore.
-        */
-       mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
-       mp->m_qflags |= flags;
-
- error_return:
-       if (error) {
-               xfs_warn(mp,
-       "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
-                       error);
-               /*
-                * We must turn off quotas.
-                */
-               ASSERT(mp->m_quotainfo != NULL);
-               ASSERT(xfs_Gqm != NULL);
-               xfs_qm_destroy_quotainfo(mp);
-               if (xfs_mount_reset_sbqflags(mp)) {
-                       xfs_warn(mp,
-                               "Quotacheck: Failed to reset quota flags.");
-               }
-       } else
-               xfs_notice(mp, "Quotacheck: Done.");
-       return (error);
-}
-
-/*
- * This is called after the superblock has been read in and we're ready to
- * iget the quota inodes.
- */
-STATIC int
-xfs_qm_init_quotainos(
-       xfs_mount_t     *mp)
-{
-       xfs_inode_t     *uip, *gip;
-       int             error;
-       __int64_t       sbflags;
-       uint            flags;
-
-       ASSERT(mp->m_quotainfo);
-       uip = gip = NULL;
-       sbflags = 0;
-       flags = 0;
-
-       /*
-        * Get the uquota and gquota inodes
-        */
-       if (xfs_sb_version_hasquota(&mp->m_sb)) {
-               if (XFS_IS_UQUOTA_ON(mp) &&
-                   mp->m_sb.sb_uquotino != NULLFSINO) {
-                       ASSERT(mp->m_sb.sb_uquotino > 0);
-                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-                                            0, 0, &uip)))
-                               return XFS_ERROR(error);
-               }
-               if (XFS_IS_OQUOTA_ON(mp) &&
-                   mp->m_sb.sb_gquotino != NULLFSINO) {
-                       ASSERT(mp->m_sb.sb_gquotino > 0);
-                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-                                            0, 0, &gip))) {
-                               if (uip)
-                                       IRELE(uip);
-                               return XFS_ERROR(error);
-                       }
-               }
-       } else {
-               flags |= XFS_QMOPT_SBVERSION;
-               sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                           XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
-       }
-
-       /*
-        * Create the two inodes, if they don't exist already. The changes
-        * made above will get added to a transaction and logged in one of
-        * the qino_alloc calls below.  If the device is readonly,
-        * temporarily switch to read-write to do this.
-        */
-       if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
-               if ((error = xfs_qm_qino_alloc(mp, &uip,
-                                             sbflags | XFS_SB_UQUOTINO,
-                                             flags | XFS_QMOPT_UQUOTA)))
-                       return XFS_ERROR(error);
-
-               flags &= ~XFS_QMOPT_SBVERSION;
-       }
-       if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
-               flags |= (XFS_IS_GQUOTA_ON(mp) ?
-                               XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
-               error = xfs_qm_qino_alloc(mp, &gip,
-                                         sbflags | XFS_SB_GQUOTINO, flags);
-               if (error) {
-                       if (uip)
-                               IRELE(uip);
-
-                       return XFS_ERROR(error);
-               }
-       }
-
-       mp->m_quotainfo->qi_uquotaip = uip;
-       mp->m_quotainfo->qi_gquotaip = gip;
-
-       return 0;
-}
-
-
-
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
-       xfs_dquot_t     *dqpout;
-       xfs_dquot_t     *dqp;
-       int             restarts;
-       int             startagain;
-
-       restarts = 0;
-       dqpout = NULL;
-
-       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-again:
-       startagain = 0;
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-               struct xfs_mount *mp = dqp->q_mount;
-               xfs_dqlock(dqp);
-
-               /*
-                * We are racing with dqlookup here. Naturally we don't
-                * want to reclaim a dquot that lookup wants. We release the
-                * freelist lock and start over, so that lookup will grab
-                * both the dquot and the freelistlock.
-                */
-               if (dqp->dq_flags & XFS_DQ_WANT) {
-                       ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-
-                       trace_xfs_dqreclaim_want(dqp);
-                       XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                       restarts++;
-                       startagain = 1;
-                       goto dqunlock;
-               }
-
-               /*
-                * If the dquot is inactive, we are assured that it is
-                * not on the mplist or the hashlist, and that makes our
-                * life easier.
-                */
-               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                       ASSERT(mp == NULL);
-                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(list_empty(&dqp->q_hashlist));
-                       ASSERT(list_empty(&dqp->q_mplist));
-                       list_del_init(&dqp->q_freelist);
-                       xfs_Gqm->qm_dqfrlist_cnt--;
-                       dqpout = dqp;
-                       XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-                       goto dqunlock;
-               }
-
-               ASSERT(dqp->q_hash);
-               ASSERT(!list_empty(&dqp->q_mplist));
-
-               /*
-                * Try to grab the flush lock. If this dquot is in the process
-                * of getting flushed to disk, we don't want to reclaim it.
-                */
-               if (!xfs_dqflock_nowait(dqp))
-                       goto dqunlock;
-
-               /*
-                * We have the flush lock so we know that this is not in the
-                * process of being flushed. So, if this is dirty, flush it
-                * DELWRI so that we don't get a freelist infested with
-                * dirty dquots.
-                */
-               if (XFS_DQ_IS_DIRTY(dqp)) {
-                       int     error;
-
-                       trace_xfs_dqreclaim_dirty(dqp);
-
-                       /*
-                        * We flush it delayed write, so don't bother
-                        * releasing the freelist lock.
-                        */
-                       error = xfs_qm_dqflush(dqp, 0);
-                       if (error) {
-                               xfs_warn(mp, "%s: dquot %p flush failed",
-                                       __func__, dqp);
-                       }
-                       goto dqunlock;
-               }
-
-               /*
-                * We're trying to get the hashlock out of order. This races
-                * with dqlookup; so, we giveup and goto the next dquot if
-                * we couldn't get the hashlock. This way, we won't starve
-                * a dqlookup process that holds the hashlock that is
-                * waiting for the freelist lock.
-                */
-               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       restarts++;
-                       goto dqfunlock;
-               }
-
-               /*
-                * This races with dquot allocation code as well as dqflush_all
-                * and reclaim code. So, if we failed to grab the mplist lock,
-                * giveup everything and start over.
-                */
-               if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
-                       restarts++;
-                       startagain = 1;
-                       goto qhunlock;
-               }
-
-               ASSERT(dqp->q_nrefs == 0);
-               list_del_init(&dqp->q_mplist);
-               mp->m_quotainfo->qi_dquots--;
-               mp->m_quotainfo->qi_dqreclaims++;
-               list_del_init(&dqp->q_hashlist);
-               dqp->q_hash->qh_version++;
-               list_del_init(&dqp->q_freelist);
-               xfs_Gqm->qm_dqfrlist_cnt--;
-               dqpout = dqp;
-               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-qhunlock:
-               mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
-               xfs_dqfunlock(dqp);
-dqunlock:
-               xfs_dqunlock(dqp);
-               if (dqpout)
-                       break;
-               if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                       break;
-               if (startagain) {
-                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                       goto again;
-               }
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-       return dqpout;
-}
-
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- */
-STATIC int
-xfs_qm_shake_freelist(
-       int     howmany)
-{
-       int             nreclaimed = 0;
-       xfs_dquot_t     *dqp;
-
-       if (howmany <= 0)
-               return 0;
-
-       while (nreclaimed < howmany) {
-               dqp = xfs_qm_dqreclaim_one();
-               if (!dqp)
-                       return nreclaimed;
-               xfs_qm_dqdestroy(dqp);
-               nreclaimed++;
-       }
-       return nreclaimed;
-}
-
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_shake(
-       struct shrinker *shrink,
-       struct shrink_control *sc)
-{
-       int     ndqused, nfree, n;
-       gfp_t gfp_mask = sc->gfp_mask;
-
-       if (!kmem_shake_allow(gfp_mask))
-               return 0;
-       if (!xfs_Gqm)
-               return 0;
-
-       nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
-       /* incore dquots in all f/s's */
-       ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
-       ASSERT(ndqused >= 0);
-
-       if (nfree <= ndqused && nfree < ndquot)
-               return 0;
-
-       ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
-       n = nfree - ndqused - ndquot;           /* # over target */
-
-       return xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
-       xfs_dquot_t **O_dqpp)
-{
-       xfs_dquot_t     *dqp;
-
-       /*
-        * Check against high water mark to see if we want to pop
-        * a nincompoop dquot off the freelist.
-        */
-       if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
-               /*
-                * Try to recycle a dquot from the freelist.
-                */
-               if ((dqp = xfs_qm_dqreclaim_one())) {
-                       XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
-                       /*
-                        * Just zero the core here. The rest will get
-                        * reinitialized by caller. XXX we shouldn't even
-                        * do this zero ...
-                        */
-                       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-                       *O_dqpp = dqp;
-                       return B_FALSE;
-               }
-               XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
-       }
-
-       /*
-        * Allocate a brand new dquot on the kernel heap and return it
-        * to the caller to initialize.
-        */
-       ASSERT(xfs_Gqm->qm_dqzone != NULL);
-       *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
-       atomic_inc(&xfs_Gqm->qm_totaldquots);
-
-       return B_TRUE;
-}
-
-
-/*
- * Start a transaction and write the incore superblock changes to
- * disk. flags parameter indicates which fields have changed.
- */
-int
-xfs_qm_write_sb_changes(
-       xfs_mount_t     *mp,
-       __int64_t       flags)
-{
-       xfs_trans_t     *tp;
-       int             error;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-       if ((error = xfs_trans_reserve(tp, 0,
-                                     mp->m_sb.sb_sectsize + 128, 0,
-                                     0,
-                                     XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       xfs_mod_sb(tp, flags);
-       error = xfs_trans_commit(tp, 0);
-
-       return error;
-}
-
-
-/* --------------- utility functions for vnodeops ---------------- */
-
-
-/*
- * Given an inode, a uid, gid and prid make sure that we have
- * allocated relevant dquot(s) on disk, and that we won't exceed inode
- * quotas by creating this file.
- * This also attaches dquot(s) to the given inode after locking it,
- * and returns the dquots corresponding to the uid and/or gid.
- *
- * in  : inode (unlocked)
- * out : udquot, gdquot with references taken and unlocked
- */
-int
-xfs_qm_vop_dqalloc(
-       struct xfs_inode        *ip,
-       uid_t                   uid,
-       gid_t                   gid,
-       prid_t                  prid,
-       uint                    flags,
-       struct xfs_dquot        **O_udqpp,
-       struct xfs_dquot        **O_gdqpp)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_dquot        *uq, *gq;
-       int                     error;
-       uint                    lockflags;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       lockflags = XFS_ILOCK_EXCL;
-       xfs_ilock(ip, lockflags);
-
-       if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
-               gid = ip->i_d.di_gid;
-
-       /*
-        * Attach the dquot(s) to this inode, doing a dquot allocation
-        * if necessary. The dquot(s) will not be locked.
-        */
-       if (XFS_NOT_DQATTACHED(mp, ip)) {
-               error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
-               if (error) {
-                       xfs_iunlock(ip, lockflags);
-                       return error;
-               }
-       }
-
-       uq = gq = NULL;
-       if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
-               if (ip->i_d.di_uid != uid) {
-                       /*
-                        * What we need is the dquot that has this uid, and
-                        * if we send the inode to dqget, the uid of the inode
-                        * takes priority over what's sent in the uid argument.
-                        * We must unlock inode here before calling dqget if
-                        * we're not sending the inode, because otherwise
-                        * we'll deadlock by doing trans_reserve while
-                        * holding ilock.
-                        */
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
-                                                XFS_DQ_USER,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &uq))) {
-                               ASSERT(error != ENOENT);
-                               return error;
-                       }
-                       /*
-                        * Get the ilock in the right order.
-                        */
-                       xfs_dqunlock(uq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       /*
-                        * Take an extra reference, because we'll return
-                        * this to caller
-                        */
-                       ASSERT(ip->i_udquot);
-                       uq = ip->i_udquot;
-                       xfs_dqlock(uq);
-                       XFS_DQHOLD(uq);
-                       xfs_dqunlock(uq);
-               }
-       }
-       if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
-               if (ip->i_d.di_gid != gid) {
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
-                                                XFS_DQ_GROUP,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &gq))) {
-                               if (uq)
-                                       xfs_qm_dqrele(uq);
-                               ASSERT(error != ENOENT);
-                               return error;
-                       }
-                       xfs_dqunlock(gq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       ASSERT(ip->i_gdquot);
-                       gq = ip->i_gdquot;
-                       xfs_dqlock(gq);
-                       XFS_DQHOLD(gq);
-                       xfs_dqunlock(gq);
-               }
-       } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
-               if (xfs_get_projid(ip) != prid) {
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
-                                                XFS_DQ_PROJ,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &gq))) {
-                               if (uq)
-                                       xfs_qm_dqrele(uq);
-                               ASSERT(error != ENOENT);
-                               return (error);
-                       }
-                       xfs_dqunlock(gq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       ASSERT(ip->i_gdquot);
-                       gq = ip->i_gdquot;
-                       xfs_dqlock(gq);
-                       XFS_DQHOLD(gq);
-                       xfs_dqunlock(gq);
-               }
-       }
-       if (uq)
-               trace_xfs_dquot_dqalloc(ip);
-
-       xfs_iunlock(ip, lockflags);
-       if (O_udqpp)
-               *O_udqpp = uq;
-       else if (uq)
-               xfs_qm_dqrele(uq);
-       if (O_gdqpp)
-               *O_gdqpp = gq;
-       else if (gq)
-               xfs_qm_dqrele(gq);
-       return 0;
-}
-
-/*
- * Actually transfer ownership, and do dquot modifications.
- * These were already reserved.
- */
-xfs_dquot_t *
-xfs_qm_vop_chown(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       xfs_dquot_t     **IO_olddq,
-       xfs_dquot_t     *newdq)
-{
-       xfs_dquot_t     *prevdq;
-       uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
-                                XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
-
-       /* old dquot */
-       prevdq = *IO_olddq;
-       ASSERT(prevdq);
-       ASSERT(prevdq != newdq);
-
-       xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
-       xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
-
-       /* the sparkling new dquot */
-       xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
-       xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
-
-       /*
-        * Take an extra reference, because the inode
-        * is going to keep this dquot pointer even
-        * after the trans_commit.
-        */
-       xfs_dqlock(newdq);
-       XFS_DQHOLD(newdq);
-       xfs_dqunlock(newdq);
-       *IO_olddq = newdq;
-
-       return prevdq;
-}
-
-/*
- * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
- */
-int
-xfs_qm_vop_chown_reserve(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       xfs_dquot_t     *udqp,
-       xfs_dquot_t     *gdqp,
-       uint            flags)
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       uint            delblks, blkflags, prjflags = 0;
-       xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
-       int             error;
-
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       delblks = ip->i_delayed_blks;
-       delblksudq = delblksgdq = unresudq = unresgdq = NULL;
-       blkflags = XFS_IS_REALTIME_INODE(ip) ?
-                       XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
-
-       if (XFS_IS_UQUOTA_ON(mp) && udqp &&
-           ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
-               delblksudq = udqp;
-               /*
-                * If there are delayed allocation blocks, then we have to
-                * unreserve those from the old dquot, and add them to the
-                * new dquot.
-                */
-               if (delblks) {
-                       ASSERT(ip->i_udquot);
-                       unresudq = ip->i_udquot;
-               }
-       }
-       if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
-               if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
-                    xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
-                       prjflags = XFS_QMOPT_ENOSPC;
-
-               if (prjflags ||
-                   (XFS_IS_GQUOTA_ON(ip->i_mount) &&
-                    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
-                       delblksgdq = gdqp;
-                       if (delblks) {
-                               ASSERT(ip->i_gdquot);
-                               unresgdq = ip->i_gdquot;
-                       }
-               }
-       }
-
-       if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
-                               delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
-                               flags | blkflags | prjflags)))
-               return (error);
-
-       /*
-        * Do the delayed blks reservations/unreservations now. Since, these
-        * are done without the help of a transaction, if a reservation fails
-        * its previous reservations won't be automatically undone by trans
-        * code. So, we have to do it manually here.
-        */
-       if (delblks) {
-               /*
-                * Do the reservations first. Unreservation can't fail.
-                */
-               ASSERT(delblksudq || delblksgdq);
-               ASSERT(unresudq || unresgdq);
-               if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
-                               delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
-                               flags | blkflags | prjflags)))
-                       return (error);
-               xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
-                               unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
-                               blkflags);
-       }
-
-       return (0);
-}
-
-int
-xfs_qm_vop_rename_dqattach(
-       struct xfs_inode        **i_tab)
-{
-       struct xfs_mount        *mp = i_tab[0]->i_mount;
-       int                     i;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       for (i = 0; (i < 4 && i_tab[i]); i++) {
-               struct xfs_inode        *ip = i_tab[i];
-               int                     error;
-
-               /*
-                * Watch out for duplicate entries in the table.
-                */
-               if (i == 0 || ip != i_tab[i-1]) {
-                       if (XFS_NOT_DQATTACHED(mp, ip)) {
-                               error = xfs_qm_dqattach(ip, 0);
-                               if (error)
-                                       return error;
-                       }
-               }
-       }
-       return 0;
-}
-
-void
-xfs_qm_vop_create_dqattach(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       struct xfs_dquot        *udqp,
-       struct xfs_dquot        *gdqp)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       if (udqp) {
-               xfs_dqlock(udqp);
-               XFS_DQHOLD(udqp);
-               xfs_dqunlock(udqp);
-               ASSERT(ip->i_udquot == NULL);
-               ip->i_udquot = udqp;
-               ASSERT(XFS_IS_UQUOTA_ON(mp));
-               ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
-               xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
-       }
-       if (gdqp) {
-               xfs_dqlock(gdqp);
-               XFS_DQHOLD(gdqp);
-               xfs_dqunlock(gdqp);
-               ASSERT(ip->i_gdquot == NULL);
-               ip->i_gdquot = gdqp;
-               ASSERT(XFS_IS_OQUOTA_ON(mp));
-               ASSERT((XFS_IS_GQUOTA_ON(mp) ?
-                       ip->i_d.di_gid : xfs_get_projid(ip)) ==
-                               be32_to_cpu(gdqp->q_core.d_id));
-               xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
-       }
-}
-
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
deleted file mode 100644 (file)
index 43b9abe..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_H__
-#define __XFS_QM_H__
-
-#include "xfs_dquot_item.h"
-#include "xfs_dquot.h"
-#include "xfs_quota_priv.h"
-#include "xfs_qm_stats.h"
-
-struct xfs_qm;
-struct xfs_inode;
-
-extern uint            ndquot;
-extern struct mutex    xfs_Gqm_lock;
-extern struct xfs_qm   *xfs_Gqm;
-extern kmem_zone_t     *qm_dqzone;
-extern kmem_zone_t     *qm_dqtrxzone;
-
-/*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS       7
-
-/*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS    4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO            2
-
-/*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_HASHSIZE_LOW            (PAGE_SIZE / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH           ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
-
-/*
- * This defines the unit of allocation of dquots.
- * Currently, it is just one file system block, and a 4K blk contains 30
- * (136 * 30 = 4080) dquots. It's probably not worth trying to make
- * this more dynamic.
- * XXXsup However, if this number is changed, we have to make sure that we don't
- * implicitly assume that we do allocations in chunks of a single filesystem
- * block in the dquot/xqm code.
- */
-#define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
-
-typedef xfs_dqhash_t   xfs_dqlist_t;
-
-/*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
-       xfs_dqlist_t    *qm_usr_dqhtable;/* udquot hash table */
-       xfs_dqlist_t    *qm_grp_dqhtable;/* gdquot hash table */
-       uint             qm_dqhashmask;  /* # buckets in dq hashtab - 1 */
-       struct list_head qm_dqfrlist;    /* freelist of dquots */
-       struct mutex     qm_dqfrlist_lock;
-       int              qm_dqfrlist_cnt;
-       atomic_t         qm_totaldquots; /* total incore dquots */
-       uint             qm_nrefs;       /* file systems with quota on */
-       int              qm_dqfree_ratio;/* ratio of free to inuse dquots */
-       kmem_zone_t     *qm_dqzone;      /* dquot mem-alloc zone */
-       kmem_zone_t     *qm_dqtrxzone;   /* t_dqinfo of transactions */
-} xfs_qm_t;
-
-/*
- * Various quota information for individual filesystems.
- * The mount structure keeps a pointer to this.
- */
-typedef struct xfs_quotainfo {
-       xfs_inode_t     *qi_uquotaip;    /* user quota inode */
-       xfs_inode_t     *qi_gquotaip;    /* group quota inode */
-       struct list_head qi_dqlist;      /* all dquots in filesys */
-       struct mutex     qi_dqlist_lock;
-       int              qi_dquots;
-       int              qi_dqreclaims;  /* a change here indicates
-                                           a removal in the dqlist */
-       time_t           qi_btimelimit;  /* limit for blks timer */
-       time_t           qi_itimelimit;  /* limit for inodes timer */
-       time_t           qi_rtbtimelimit;/* limit for rt blks timer */
-       xfs_qwarncnt_t   qi_bwarnlimit;  /* limit for blks warnings */
-       xfs_qwarncnt_t   qi_iwarnlimit;  /* limit for inodes warnings */
-       xfs_qwarncnt_t   qi_rtbwarnlimit;/* limit for rt blks warnings */
-       struct mutex     qi_quotaofflock;/* to serialize quotaoff */
-       xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
-       uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
-       xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
-       xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
-       xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
-       xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
-       xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
-       xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
-} xfs_quotainfo_t;
-
-
-extern void    xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
-extern int     xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
-                       xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
-extern void    xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
-extern void    xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
-
-/*
- * We keep the usr and grp dquots separately so that locking will be easier
- * to do at commit time. All transactions that we know of at this point
- * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
- */
-#define XFS_QM_TRANS_MAXDQS            2
-typedef struct xfs_dquot_acct {
-       xfs_dqtrx_t     dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
-       xfs_dqtrx_t     dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
-} xfs_dquot_acct_t;
-
-/*
- * Users are allowed to have a usage exceeding their softlimit for
- * a period this long.
- */
-#define XFS_QM_BTIMELIMIT      (7 * 24*60*60)          /* 1 week */
-#define XFS_QM_RTBTIMELIMIT    (7 * 24*60*60)          /* 1 week */
-#define XFS_QM_ITIMELIMIT      (7 * 24*60*60)          /* 1 week */
-
-#define XFS_QM_BWARNLIMIT      5
-#define XFS_QM_IWARNLIMIT      5
-#define XFS_QM_RTBWARNLIMIT    5
-
-extern void            xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern int             xfs_qm_quotacheck(xfs_mount_t *);
-extern int             xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-
-/* dquot stuff */
-extern boolean_t       xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int             xfs_qm_dqpurge_all(xfs_mount_t *, uint);
-extern void            xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
-
-/* quota ops */
-extern int             xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
-extern int             xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
-                                       fs_disk_quota_t *);
-extern int             xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
-                                       fs_disk_quota_t *);
-extern int             xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
-extern int             xfs_qm_scall_quotaon(xfs_mount_t *, uint);
-extern int             xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-
-#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
deleted file mode 100644 (file)
index a0a829a..0000000
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-
-STATIC void
-xfs_fill_statvfs_from_dquot(
-       struct kstatfs          *statp,
-       xfs_disk_dquot_t        *dp)
-{
-       __uint64_t              limit;
-
-       limit = dp->d_blk_softlimit ?
-               be64_to_cpu(dp->d_blk_softlimit) :
-               be64_to_cpu(dp->d_blk_hardlimit);
-       if (limit && statp->f_blocks > limit) {
-               statp->f_blocks = limit;
-               statp->f_bfree = statp->f_bavail =
-                       (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
-                        (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
-       }
-
-       limit = dp->d_ino_softlimit ?
-               be64_to_cpu(dp->d_ino_softlimit) :
-               be64_to_cpu(dp->d_ino_hardlimit);
-       if (limit && statp->f_files > limit) {
-               statp->f_files = limit;
-               statp->f_ffree =
-                       (statp->f_files > be64_to_cpu(dp->d_icount)) ?
-                        (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
-       }
-}
-
-
-/*
- * Directory tree accounting is implemented using project quotas, where
- * the project identifier is inherited from parent directories.
- * A statvfs (df, etc.) of a directory that is using project quota should
- * return a statvfs of the project, not the entire filesystem.
- * This makes such trees appear as if they are filesystems in themselves.
- */
-void
-xfs_qm_statvfs(
-       xfs_inode_t             *ip,
-       struct kstatfs          *statp)
-{
-       xfs_mount_t             *mp = ip->i_mount;
-       xfs_dquot_t             *dqp;
-
-       if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
-               xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
-               xfs_qm_dqput(dqp);
-       }
-}
-
-int
-xfs_qm_newmount(
-       xfs_mount_t     *mp,
-       uint            *needquotamount,
-       uint            *quotaflags)
-{
-       uint            quotaondisk;
-       uint            uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
-
-       quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
-                               (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
-
-       if (quotaondisk) {
-               uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
-               pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
-               gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
-       }
-
-       /*
-        * If the device itself is read-only, we can't allow
-        * the user to change the state of quota on the mount -
-        * this would generate a transaction on the ro device,
-        * which would lead to an I/O error and shutdown
-        */
-
-       if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
-           (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
-            (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
-           (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
-            (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
-           (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
-           xfs_dev_is_read_only(mp, "changing quota state")) {
-               xfs_warn(mp, "please mount with%s%s%s%s.",
-                       (!quotaondisk ? "out quota" : ""),
-                       (uquotaondisk ? " usrquota" : ""),
-                       (pquotaondisk ? " prjquota" : ""),
-                       (gquotaondisk ? " grpquota" : ""));
-               return XFS_ERROR(EPERM);
-       }
-
-       if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
-               /*
-                * Call mount_quotas at this point only if we won't have to do
-                * a quotacheck.
-                */
-               if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
-                       /*
-                        * If an error occurred, qm_mount_quotas code
-                        * has already disabled quotas. So, just finish
-                        * mounting, and get on with the boring life
-                        * without disk quotas.
-                        */
-                       xfs_qm_mount_quotas(mp);
-               } else {
-                       /*
-                        * Clear the quota flags, but remember them. This
-                        * is so that the quota code doesn't get invoked
-                        * before we're ready. This can happen when an
-                        * inode goes inactive and wants to free blocks,
-                        * or via xfs_log_mount_finish.
-                        */
-                       *needquotamount = B_TRUE;
-                       *quotaflags = mp->m_qflags;
-                       mp->m_qflags = 0;
-               }
-       }
-
-       return 0;
-}
-
-void __init
-xfs_qm_init(void)
-{
-       printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
-       mutex_init(&xfs_Gqm_lock);
-       xfs_qm_init_procfs();
-}
-
-void __exit
-xfs_qm_exit(void)
-{
-       xfs_qm_cleanup_procfs();
-       if (qm_dqzone)
-               kmem_zone_destroy(qm_dqzone);
-       if (qm_dqtrxzone)
-               kmem_zone_destroy(qm_dqtrxzone);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
deleted file mode 100644 (file)
index 8671a0b..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-struct xqmstats xqmstats;
-
-static int xqm_proc_show(struct seq_file *m, void *v)
-{
-       /* maximum; incore; ratio free to inuse; freelist */
-       seq_printf(m, "%d\t%d\t%d\t%u\n",
-                       ndquot,
-                       xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
-                       xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
-                       xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
-       return 0;
-}
-
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xqm_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static int xqmstat_proc_show(struct seq_file *m, void *v)
-{
-       /* quota performance statistics */
-       seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
-                       xqmstats.xs_qm_dqreclaims,
-                       xqmstats.xs_qm_dqreclaim_misses,
-                       xqmstats.xs_qm_dquot_dups,
-                       xqmstats.xs_qm_dqcachemisses,
-                       xqmstats.xs_qm_dqcachehits,
-                       xqmstats.xs_qm_dqwants,
-                       xqmstats.xs_qm_dqshake_reclaims,
-                       xqmstats.xs_qm_dqinact_reclaims);
-       return 0;
-}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xqmstat_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-void
-xfs_qm_init_procfs(void)
-{
-       proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
-       proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
-}
-
-void
-xfs_qm_cleanup_procfs(void)
-{
-       remove_proc_entry("fs/xfs/xqm", NULL);
-       remove_proc_entry("fs/xfs/xqmstat", NULL);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
deleted file mode 100644 (file)
index 5b964fc..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_STATS_H__
-#define __XFS_QM_STATS_H__
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-/*
- * XQM global statistics
- */
-struct xqmstats {
-       __uint32_t              xs_qm_dqreclaims;
-       __uint32_t              xs_qm_dqreclaim_misses;
-       __uint32_t              xs_qm_dquot_dups;
-       __uint32_t              xs_qm_dqcachemisses;
-       __uint32_t              xs_qm_dqcachehits;
-       __uint32_t              xs_qm_dqwants;
-       __uint32_t              xs_qm_dqshake_reclaims;
-       __uint32_t              xs_qm_dqinact_reclaims;
-};
-
-extern struct xqmstats xqmstats;
-
-# define XQM_STATS_INC(count)  ( (count)++ )
-
-extern void xfs_qm_init_procfs(void);
-extern void xfs_qm_cleanup_procfs(void);
-
-#else
-
-# define XQM_STATS_INC(count)  do { } while (0)
-
-static inline void xfs_qm_init_procfs(void) { };
-static inline void xfs_qm_cleanup_procfs(void) { };
-
-#endif
-
-#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
deleted file mode 100644 (file)
index 609246f..0000000
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include <linux/capability.h>
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-STATIC int     xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
-STATIC int     xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
-                                       uint);
-STATIC uint    xfs_qm_export_flags(uint);
-STATIC uint    xfs_qm_export_qtype_flags(uint);
-STATIC void    xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
-                                       fs_disk_quota_t *);
-
-
-/*
- * Turn off quota accounting and/or enforcement for all udquots and/or
- * gdquots. Called only at unmount time.
- *
- * This assumes that there are no dquots of this file system cached
- * incore, and modifies the ondisk dquot directly. Therefore, for example,
- * it is an error to call this twice, without purging the cache.
- */
-int
-xfs_qm_scall_quotaoff(
-       xfs_mount_t             *mp,
-       uint                    flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       uint                    dqtype;
-       int                     error;
-       uint                    inactivate_flags;
-       xfs_qoff_logitem_t      *qoffstart;
-       int                     nculprits;
-
-       /*
-        * No file system can have quotas enabled on disk but not in core.
-        * Note that quota utilities (like quotaoff) _expect_
-        * errno == EEXIST here.
-        */
-       if ((mp->m_qflags & flags) == 0)
-               return XFS_ERROR(EEXIST);
-       error = 0;
-
-       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-
-       /*
-        * We don't want to deal with two quotaoffs messing up each other,
-        * so we're going to serialize it. quotaoff isn't exactly a performance
-        * critical thing.
-        * If quotaoff, then we must be dealing with the root filesystem.
-        */
-       ASSERT(q);
-       mutex_lock(&q->qi_quotaofflock);
-
-       /*
-        * If we're just turning off quota enforcement, change mp and go.
-        */
-       if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
-               mp->m_qflags &= ~(flags);
-
-               spin_lock(&mp->m_sb_lock);
-               mp->m_sb.sb_qflags = mp->m_qflags;
-               spin_unlock(&mp->m_sb_lock);
-               mutex_unlock(&q->qi_quotaofflock);
-
-               /* XXX what to do if error ? Revert back to old vals incore ? */
-               error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
-               return (error);
-       }
-
-       dqtype = 0;
-       inactivate_flags = 0;
-       /*
-        * If accounting is off, we must turn enforcement off, clear the
-        * quota 'CHKD' certificate to make it known that we have to
-        * do a quotacheck the next time this quota is turned on.
-        */
-       if (flags & XFS_UQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_UQUOTA;
-               flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
-               inactivate_flags |= XFS_UQUOTA_ACTIVE;
-       }
-       if (flags & XFS_GQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_GQUOTA;
-               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
-               inactivate_flags |= XFS_GQUOTA_ACTIVE;
-       } else if (flags & XFS_PQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_PQUOTA;
-               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
-               inactivate_flags |= XFS_PQUOTA_ACTIVE;
-       }
-
-       /*
-        * Nothing to do?  Don't complain. This happens when we're just
-        * turning off quota enforcement.
-        */
-       if ((mp->m_qflags & flags) == 0)
-               goto out_unlock;
-
-       /*
-        * Write the LI_QUOTAOFF log record, and do SB changes atomically,
-        * and synchronously. If we fail to write, we should abort the
-        * operation as it cannot be recovered safely if we crash.
-        */
-       error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
-       if (error)
-               goto out_unlock;
-
-       /*
-        * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
-        * to take care of the race between dqget and quotaoff. We don't take
-        * any special locks to reset these bits. All processes need to check
-        * these bits *after* taking inode lock(s) to see if the particular
-        * quota type is in the process of being turned off. If *ACTIVE, it is
-        * guaranteed that all dquot structures and all quotainode ptrs will all
-        * stay valid as long as that inode is kept locked.
-        *
-        * There is no turning back after this.
-        */
-       mp->m_qflags &= ~inactivate_flags;
-
-       /*
-        * Give back all the dquot reference(s) held by inodes.
-        * Here we go thru every single incore inode in this file system, and
-        * do a dqrele on the i_udquot/i_gdquot that it may have.
-        * Essentially, as long as somebody has an inode locked, this guarantees
-        * that quotas will not be turned off. This is handy because in a
-        * transaction once we lock the inode(s) and check for quotaon, we can
-        * depend on the quota inodes (and other things) being valid as long as
-        * we keep the lock(s).
-        */
-       xfs_qm_dqrele_all_inodes(mp, flags);
-
-       /*
-        * Next we make the changes in the quota flag in the mount struct.
-        * This isn't protected by a particular lock directly, because we
-        * don't want to take a mrlock every time we depend on quotas being on.
-        */
-       mp->m_qflags &= ~(flags);
-
-       /*
-        * Go through all the dquots of this file system and purge them,
-        * according to what was turned off. We may not be able to get rid
-        * of all dquots, because dquots can have temporary references that
-        * are not attached to inodes. eg. xfs_setattr, xfs_create.
-        * So, if we couldn't purge all the dquots from the filesystem,
-        * we can't get rid of the incore data structures.
-        */
-       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
-               delay(10 * nculprits);
-
-       /*
-        * Transactions that had started before ACTIVE state bit was cleared
-        * could have logged many dquots, so they'd have higher LSNs than
-        * the first QUOTAOFF log record does. If we happen to crash when
-        * the tail of the log has gone past the QUOTAOFF record, but
-        * before the last dquot modification, those dquots __will__
-        * recover, and that's not good.
-        *
-        * So, we have QUOTAOFF start and end logitems; the start
-        * logitem won't get overwritten until the end logitem appears...
-        */
-       error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
-       if (error) {
-               /* We're screwed now. Shutdown is the only option. */
-               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-               goto out_unlock;
-       }
-
-       /*
-        * If quotas is completely disabled, close shop.
-        */
-       if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
-           ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
-               mutex_unlock(&q->qi_quotaofflock);
-               xfs_qm_destroy_quotainfo(mp);
-               return (0);
-       }
-
-       /*
-        * Release our quotainode references if we don't need them anymore.
-        */
-       if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
-               IRELE(q->qi_uquotaip);
-               q->qi_uquotaip = NULL;
-       }
-       if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
-               IRELE(q->qi_gquotaip);
-               q->qi_gquotaip = NULL;
-       }
-
-out_unlock:
-       mutex_unlock(&q->qi_quotaofflock);
-       return error;
-}
-
-STATIC int
-xfs_qm_scall_trunc_qfile(
-       struct xfs_mount        *mp,
-       xfs_ino_t               ino)
-{
-       struct xfs_inode        *ip;
-       struct xfs_trans        *tp;
-       int                     error;
-
-       if (ino == NULLFSINO)
-               return 0;
-
-       error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
-       if (error)
-               return error;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
-       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                 XFS_TRANS_PERM_LOG_RES,
-                                 XFS_ITRUNCATE_LOG_COUNT);
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-               goto out_put;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip);
-
-       error = xfs_itruncate_data(&tp, ip, 0);
-       if (error) {
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                    XFS_TRANS_ABORT);
-               goto out_unlock;
-       }
-
-       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
-out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-out_put:
-       IRELE(ip);
-       return error;
-}
-
-int
-xfs_qm_scall_trunc_qfiles(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             error = 0, error2 = 0;
-
-       if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
-               xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
-                       __func__, flags, mp->m_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-
-       if (flags & XFS_DQ_USER)
-               error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
-       if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
-               error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
-
-       return error ? error : error2;
-}
-
-/*
- * Switch on (a given) quota enforcement for a filesystem.  This takes
- * effect immediately.
- * (Switching on quota accounting must be done at mount time.)
- */
-int
-xfs_qm_scall_quotaon(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             error;
-       uint            qf;
-       __int64_t       sbflags;
-
-       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-       /*
-        * Switching on quota accounting must be done at mount time.
-        */
-       flags &= ~(XFS_ALL_QUOTA_ACCT);
-
-       sbflags = 0;
-
-       if (flags == 0) {
-               xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
-                       __func__, mp->m_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-
-       /* No fs can turn on quotas with a delayed effect */
-       ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
-
-       /*
-        * Can't enforce without accounting. We check the superblock
-        * qflags here instead of m_qflags because rootfs can have
-        * quota acct on ondisk without m_qflags' knowing.
-        */
-       if (((flags & XFS_UQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
-           (flags & XFS_UQUOTA_ENFD))
-           ||
-           ((flags & XFS_PQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
-           (flags & XFS_GQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
-           (flags & XFS_OQUOTA_ENFD))) {
-               xfs_debug(mp,
-                       "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
-                       __func__, flags, mp->m_sb.sb_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-       /*
-        * If everything's up to-date incore, then don't waste time.
-        */
-       if ((mp->m_qflags & flags) == flags)
-               return XFS_ERROR(EEXIST);
-
-       /*
-        * Change sb_qflags on disk but not incore mp->qflags
-        * if this is the root filesystem.
-        */
-       spin_lock(&mp->m_sb_lock);
-       qf = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = qf | flags;
-       spin_unlock(&mp->m_sb_lock);
-
-       /*
-        * There's nothing to change if it's the same.
-        */
-       if ((qf & flags) == flags && sbflags == 0)
-               return XFS_ERROR(EEXIST);
-       sbflags |= XFS_SB_QFLAGS;
-
-       if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
-               return (error);
-       /*
-        * If we aren't trying to switch on quota enforcement, we are done.
-        */
-       if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
-            ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
-            ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
-           (flags & XFS_ALL_QUOTA_ENFD) == 0)
-               return (0);
-
-       if (! XFS_IS_QUOTA_RUNNING(mp))
-               return XFS_ERROR(ESRCH);
-
-       /*
-        * Switch on quota enforcement in core.
-        */
-       mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
-       mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
-       mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
-
-       return (0);
-}
-
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- */
-int
-xfs_qm_scall_getqstat(
-       struct xfs_mount        *mp,
-       struct fs_quota_stat    *out)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_inode        *uip, *gip;
-       boolean_t               tempuqip, tempgqip;
-
-       uip = gip = NULL;
-       tempuqip = tempgqip = B_FALSE;
-       memset(out, 0, sizeof(fs_quota_stat_t));
-
-       out->qs_version = FS_QSTAT_VERSION;
-       if (!xfs_sb_version_hasquota(&mp->m_sb)) {
-               out->qs_uquota.qfs_ino = NULLFSINO;
-               out->qs_gquota.qfs_ino = NULLFSINO;
-               return (0);
-       }
-       out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
-                                                       (XFS_ALL_QUOTA_ACCT|
-                                                        XFS_ALL_QUOTA_ENFD));
-       out->qs_pad = 0;
-       out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
-       out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-
-       if (q) {
-               uip = q->qi_uquotaip;
-               gip = q->qi_gquotaip;
-       }
-       if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
-               if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-                                       0, 0, &uip) == 0)
-                       tempuqip = B_TRUE;
-       }
-       if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
-               if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-                                       0, 0, &gip) == 0)
-                       tempgqip = B_TRUE;
-       }
-       if (uip) {
-               out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
-               out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
-               if (tempuqip)
-                       IRELE(uip);
-       }
-       if (gip) {
-               out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
-               out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
-               if (tempgqip)
-                       IRELE(gip);
-       }
-       if (q) {
-               out->qs_incoredqs = q->qi_dquots;
-               out->qs_btimelimit = q->qi_btimelimit;
-               out->qs_itimelimit = q->qi_itimelimit;
-               out->qs_rtbtimelimit = q->qi_rtbtimelimit;
-               out->qs_bwarnlimit = q->qi_bwarnlimit;
-               out->qs_iwarnlimit = q->qi_iwarnlimit;
-       }
-       return 0;
-}
-
-#define XFS_DQ_MASK \
-       (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
-
-/*
- * Adjust quota limits, and start/stop timers accordingly.
- */
-int
-xfs_qm_scall_setqlim(
-       xfs_mount_t             *mp,
-       xfs_dqid_t              id,
-       uint                    type,
-       fs_disk_quota_t         *newlim)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       xfs_disk_dquot_t        *ddq;
-       xfs_dquot_t             *dqp;
-       xfs_trans_t             *tp;
-       int                     error;
-       xfs_qcnt_t              hard, soft;
-
-       if (newlim->d_fieldmask & ~XFS_DQ_MASK)
-               return EINVAL;
-       if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
-               return 0;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
-                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return (error);
-       }
-
-       /*
-        * We don't want to race with a quotaoff so take the quotaoff lock.
-        * (We don't hold an inode lock, so there's nothing else to stop
-        * a quotaoff from happening). (XXXThis doesn't currently happen
-        * because we take the vfslock before calling xfs_qm_sysent).
-        */
-       mutex_lock(&q->qi_quotaofflock);
-
-       /*
-        * Get the dquot (locked), and join it to the transaction.
-        * Allocate the dquot if this doesn't exist.
-        */
-       if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
-               xfs_trans_cancel(tp, XFS_TRANS_ABORT);
-               ASSERT(error != ENOENT);
-               goto out_unlock;
-       }
-       xfs_trans_dqjoin(tp, dqp);
-       ddq = &dqp->q_core;
-
-       /*
-        * Make sure that hardlimits are >= soft limits before changing.
-        */
-       hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
-                       be64_to_cpu(ddq->d_blk_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
-                       be64_to_cpu(ddq->d_blk_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_blk_hardlimit = cpu_to_be64(hard);
-               ddq->d_blk_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_bhardlimit = hard;
-                       q->qi_bsoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
-       }
-       hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
-                       be64_to_cpu(ddq->d_rtb_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
-                       be64_to_cpu(ddq->d_rtb_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_rtb_hardlimit = cpu_to_be64(hard);
-               ddq->d_rtb_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_rtbhardlimit = hard;
-                       q->qi_rtbsoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
-       }
-
-       hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
-               (xfs_qcnt_t) newlim->d_ino_hardlimit :
-                       be64_to_cpu(ddq->d_ino_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
-               (xfs_qcnt_t) newlim->d_ino_softlimit :
-                       be64_to_cpu(ddq->d_ino_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_ino_hardlimit = cpu_to_be64(hard);
-               ddq->d_ino_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_ihardlimit = hard;
-                       q->qi_isoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
-       }
-
-       /*
-        * Update warnings counter(s) if requested
-        */
-       if (newlim->d_fieldmask & FS_DQ_BWARNS)
-               ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
-       if (newlim->d_fieldmask & FS_DQ_IWARNS)
-               ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
-       if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-               ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
-
-       if (id == 0) {
-               /*
-                * Timelimits for the super user set the relative time
-                * the other users can be over quota for this file system.
-                * If it is zero a default is used.  Ditto for the default
-                * soft and hard limit values (already done, above), and
-                * for warnings.
-                */
-               if (newlim->d_fieldmask & FS_DQ_BTIMER) {
-                       q->qi_btimelimit = newlim->d_btimer;
-                       ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_ITIMER) {
-                       q->qi_itimelimit = newlim->d_itimer;
-                       ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
-                       q->qi_rtbtimelimit = newlim->d_rtbtimer;
-                       ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_BWARNS)
-                       q->qi_bwarnlimit = newlim->d_bwarns;
-               if (newlim->d_fieldmask & FS_DQ_IWARNS)
-                       q->qi_iwarnlimit = newlim->d_iwarns;
-               if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-                       q->qi_rtbwarnlimit = newlim->d_rtbwarns;
-       } else {
-               /*
-                * If the user is now over quota, start the timelimit.
-                * The user will not be 'warned'.
-                * Note that we keep the timers ticking, whether enforcement
-                * is on or off. We don't really want to bother with iterating
-                * over all ondisk dquots and turning the timers on/off.
-                */
-               xfs_qm_adjust_dqtimers(mp, ddq);
-       }
-       dqp->dq_flags |= XFS_DQ_DIRTY;
-       xfs_trans_log_dquot(tp, dqp);
-
-       error = xfs_trans_commit(tp, 0);
-       xfs_qm_dqrele(dqp);
-
- out_unlock:
-       mutex_unlock(&q->qi_quotaofflock);
-       return error;
-}
-
-int
-xfs_qm_scall_getquota(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,
-       uint            type,
-       fs_disk_quota_t *out)
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-
-       /*
-        * Try to get the dquot. We don't want it allocated on disk, so
-        * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
-        * exist, we'll get ENOENT back.
-        */
-       if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
-               return (error);
-       }
-
-       /*
-        * If everything's NULL, this dquot doesn't quite exist as far as
-        * our utility programs are concerned.
-        */
-       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-               xfs_qm_dqput(dqp);
-               return XFS_ERROR(ENOENT);
-       }
-       /*
-        * Convert the disk dquot to the exportable format
-        */
-       xfs_qm_export_dquot(mp, &dqp->q_core, out);
-       xfs_qm_dqput(dqp);
-       return (error ? XFS_ERROR(EFAULT) : 0);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff_end(
-       xfs_mount_t             *mp,
-       xfs_qoff_logitem_t      *startqoff,
-       uint                    flags)
-{
-       xfs_trans_t             *tp;
-       int                     error;
-       xfs_qoff_logitem_t      *qoffi;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-
-       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
-                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return (error);
-       }
-
-       qoffi = xfs_trans_get_qoff_item(tp, startqoff,
-                                       flags & XFS_ALL_QUOTA_ACCT);
-       xfs_trans_log_quotaoff_item(tp, qoffi);
-
-       /*
-        * We have to make sure that the transaction is secure on disk before we
-        * return and actually stop quota accounting. So, make it synchronous.
-        * We don't care about quotoff's performance.
-        */
-       xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-       return (error);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff(
-       xfs_mount_t            *mp,
-       xfs_qoff_logitem_t     **qoffstartp,
-       uint                   flags)
-{
-       xfs_trans_t            *tp;
-       int                     error;
-       xfs_qoff_logitem_t     *qoffi=NULL;
-       uint                    oldsbqflag=0;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
-       if ((error = xfs_trans_reserve(tp, 0,
-                                     sizeof(xfs_qoff_logitem_t) * 2 +
-                                     mp->m_sb.sb_sectsize + 128,
-                                     0,
-                                     0,
-                                     XFS_DEFAULT_LOG_COUNT))) {
-               goto error0;
-       }
-
-       qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
-       xfs_trans_log_quotaoff_item(tp, qoffi);
-
-       spin_lock(&mp->m_sb_lock);
-       oldsbqflag = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
-       spin_unlock(&mp->m_sb_lock);
-
-       xfs_mod_sb(tp, XFS_SB_QFLAGS);
-
-       /*
-        * We have to make sure that the transaction is secure on disk before we
-        * return and actually stop quota accounting. So, make it synchronous.
-        * We don't care about quotoff's performance.
-        */
-       xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-
-error0:
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               /*
-                * No one else is modifying sb_qflags, so this is OK.
-                * We still hold the quotaofflock.
-                */
-               spin_lock(&mp->m_sb_lock);
-               mp->m_sb.sb_qflags = oldsbqflag;
-               spin_unlock(&mp->m_sb_lock);
-       }
-       *qoffstartp = qoffi;
-       return (error);
-}
-
-
-/*
- * Translate an internal style on-disk-dquot to the exportable format.
- * The main differences are that the counters/limits are all in Basic
- * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
- * to be converted to the native endianness.
- */
-STATIC void
-xfs_qm_export_dquot(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *src,
-       struct fs_disk_quota    *dst)
-{
-       memset(dst, 0, sizeof(*dst));
-       dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
-       dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
-       dst->d_id = be32_to_cpu(src->d_id);
-       dst->d_blk_hardlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
-       dst->d_blk_softlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
-       dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
-       dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
-       dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
-       dst->d_icount = be64_to_cpu(src->d_icount);
-       dst->d_btimer = be32_to_cpu(src->d_btimer);
-       dst->d_itimer = be32_to_cpu(src->d_itimer);
-       dst->d_iwarns = be16_to_cpu(src->d_iwarns);
-       dst->d_bwarns = be16_to_cpu(src->d_bwarns);
-       dst->d_rtb_hardlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
-       dst->d_rtb_softlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
-       dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
-       dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
-       dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
-
-       /*
-        * Internally, we don't reset all the timers when quota enforcement
-        * gets turned off. No need to confuse the user level code,
-        * so return zeroes in that case.
-        */
-       if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
-           (!XFS_IS_OQUOTA_ENFORCED(mp) &&
-                       (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
-               dst->d_btimer = 0;
-               dst->d_itimer = 0;
-               dst->d_rtbtimer = 0;
-       }
-
-#ifdef DEBUG
-       if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
-            (XFS_IS_OQUOTA_ENFORCED(mp) &&
-                       (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
-           dst->d_id != 0) {
-               if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
-                   (dst->d_blk_softlimit > 0)) {
-                       ASSERT(dst->d_btimer != 0);
-               }
-               if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
-                   (dst->d_ino_softlimit > 0)) {
-                       ASSERT(dst->d_itimer != 0);
-               }
-       }
-#endif
-}
-
-STATIC uint
-xfs_qm_export_qtype_flags(
-       uint flags)
-{
-       /*
-        * Can't be more than one, or none.
-        */
-       ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
-               (FS_PROJ_QUOTA | FS_USER_QUOTA));
-       ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
-               (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
-       ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
-               (FS_USER_QUOTA | FS_GROUP_QUOTA));
-       ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
-
-       return (flags & XFS_DQ_USER) ?
-               FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
-                       FS_PROJ_QUOTA : FS_GROUP_QUOTA;
-}
-
-STATIC uint
-xfs_qm_export_flags(
-       uint flags)
-{
-       uint uflags;
-
-       uflags = 0;
-       if (flags & XFS_UQUOTA_ACCT)
-               uflags |= FS_QUOTA_UDQ_ACCT;
-       if (flags & XFS_PQUOTA_ACCT)
-               uflags |= FS_QUOTA_PDQ_ACCT;
-       if (flags & XFS_GQUOTA_ACCT)
-               uflags |= FS_QUOTA_GDQ_ACCT;
-       if (flags & XFS_UQUOTA_ENFD)
-               uflags |= FS_QUOTA_UDQ_ENFD;
-       if (flags & (XFS_OQUOTA_ENFD)) {
-               uflags |= (flags & XFS_GQUOTA_ACCT) ?
-                       FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
-       }
-       return (uflags);
-}
-
-
-STATIC int
-xfs_dqrele_inode(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       /* skip quota inodes */
-       if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
-           ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
-               ASSERT(ip->i_udquot == NULL);
-               ASSERT(ip->i_gdquot == NULL);
-               return 0;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-               xfs_qm_dqrele(ip->i_udquot);
-               ip->i_udquot = NULL;
-       }
-       if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
-               xfs_qm_dqrele(ip->i_gdquot);
-               ip->i_gdquot = NULL;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return 0;
-}
-
-
-/*
- * Go thru all the inodes in the file system, releasing their dquots.
- *
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff.
- */
-void
-xfs_qm_dqrele_all_inodes(
-       struct xfs_mount *mp,
-       uint             flags)
-{
-       ASSERT(mp->m_quotainfo);
-       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
-}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
deleted file mode 100644 (file)
index 94a3d92..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QUOTA_PRIV_H__
-#define __XFS_QUOTA_PRIV_H__
-
-/*
- * Number of bmaps that we ask from bmapi when doing a quotacheck.
- * We make this restriction to keep the memory usage to a minimum.
- */
-#define XFS_DQITER_MAP_SIZE    10
-
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-                                (__psunsigned_t)(id)) & \
-                               (xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
-                                    (xfs_Gqm->qm_usr_dqhtable + \
-                                     XFS_DQ_HASHVAL(mp, id)) : \
-                                    (xfs_Gqm->qm_grp_dqhtable + \
-                                     XFS_DQ_HASHVAL(mp, id)))
-#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
-       !dqp->q_core.d_blk_hardlimit && \
-       !dqp->q_core.d_blk_softlimit && \
-       !dqp->q_core.d_rtb_hardlimit && \
-       !dqp->q_core.d_rtb_softlimit && \
-       !dqp->q_core.d_ino_hardlimit && \
-       !dqp->q_core.d_ino_softlimit && \
-       !dqp->q_core.d_bcount && \
-       !dqp->q_core.d_rtbcount && \
-       !dqp->q_core.d_icount)
-
-#define DQFLAGTO_TYPESTR(d)    (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
-                                (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
-                                (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-
-#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
deleted file mode 100644 (file)
index 4d00ee6..0000000
+++ /dev/null
@@ -1,890 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-STATIC void    xfs_trans_alloc_dqinfo(xfs_trans_t *);
-
-/*
- * Add the locked dquot to the transaction.
- * The dquot must be locked, and it cannot be associated with any
- * transaction.
- */
-void
-xfs_trans_dqjoin(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(dqp->q_transp != tp);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(dqp->q_logitem.qli_dquot == dqp);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
-       /*
-        * Initialize d_transp so we can later determine if this dquot is
-        * associated with this transaction.
-        */
-       dqp->q_transp = tp;
-}
-
-
-/*
- * This is called to mark the dquot as needing
- * to be logged when the transaction is committed.  The dquot must
- * already be associated with the given transaction.
- * Note that it marks the entire transaction as dirty. In the ordinary
- * case, this gets called via xfs_trans_commit, after the transaction
- * is already dirty. However, there's nothing stop this from getting
- * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
- * flag.
- */
-void
-xfs_trans_log_dquot(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(dqp->q_transp == tp);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-/*
- * Carry forward whatever is left of the quota blk reservation to
- * the spanky new transaction
- */
-void
-xfs_trans_dup_dqinfo(
-       xfs_trans_t     *otp,
-       xfs_trans_t     *ntp)
-{
-       xfs_dqtrx_t     *oq, *nq;
-       int             i,j;
-       xfs_dqtrx_t     *oqa, *nqa;
-
-       if (!otp->t_dqinfo)
-               return;
-
-       xfs_trans_alloc_dqinfo(ntp);
-       oqa = otp->t_dqinfo->dqa_usrdquots;
-       nqa = ntp->t_dqinfo->dqa_usrdquots;
-
-       /*
-        * Because the quota blk reservation is carried forward,
-        * it is also necessary to carry forward the DQ_DIRTY flag.
-        */
-       if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
-               ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
-
-       for (j = 0; j < 2; j++) {
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       if (oqa[i].qt_dquot == NULL)
-                               break;
-                       oq = &oqa[i];
-                       nq = &nqa[i];
-
-                       nq->qt_dquot = oq->qt_dquot;
-                       nq->qt_bcount_delta = nq->qt_icount_delta = 0;
-                       nq->qt_rtbcount_delta = 0;
-
-                       /*
-                        * Transfer whatever is left of the reservations.
-                        */
-                       nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
-                       oq->qt_blk_res = oq->qt_blk_res_used;
-
-                       nq->qt_rtblk_res = oq->qt_rtblk_res -
-                               oq->qt_rtblk_res_used;
-                       oq->qt_rtblk_res = oq->qt_rtblk_res_used;
-
-                       nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
-                       oq->qt_ino_res = oq->qt_ino_res_used;
-
-               }
-               oqa = otp->t_dqinfo->dqa_grpdquots;
-               nqa = ntp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-/*
- * Wrap around mod_dquot to account for both user and group quotas.
- */
-void
-xfs_trans_mod_dquot_byino(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       uint            field,
-       long            delta)
-{
-       xfs_mount_t     *mp = tp->t_mountp;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) ||
-           !XFS_IS_QUOTA_ON(mp) ||
-           ip->i_ino == mp->m_sb.sb_uquotino ||
-           ip->i_ino == mp->m_sb.sb_gquotino)
-               return;
-
-       if (tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-
-       if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
-               (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
-       if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
-               (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
-}
-
-STATIC xfs_dqtrx_t *
-xfs_trans_get_dqtrx(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       int             i;
-       xfs_dqtrx_t     *qa;
-
-       qa = XFS_QM_ISUDQ(dqp) ?
-               tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
-
-       for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-               if (qa[i].qt_dquot == NULL ||
-                   qa[i].qt_dquot == dqp)
-                       return &qa[i];
-       }
-
-       return NULL;
-}
-
-/*
- * Make the changes in the transaction structure.
- * The moral equivalent to xfs_trans_mod_sb().
- * We don't touch any fields in the dquot, so we don't care
- * if it's locked or not (most of the time it won't be).
- */
-void
-xfs_trans_mod_dquot(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp,
-       uint            field,
-       long            delta)
-{
-       xfs_dqtrx_t     *qtrx;
-
-       ASSERT(tp);
-       ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
-       qtrx = NULL;
-
-       if (tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-       /*
-        * Find either the first free slot or the slot that belongs
-        * to this dquot.
-        */
-       qtrx = xfs_trans_get_dqtrx(tp, dqp);
-       ASSERT(qtrx);
-       if (qtrx->qt_dquot == NULL)
-               qtrx->qt_dquot = dqp;
-
-       switch (field) {
-
-               /*
-                * regular disk blk reservation
-                */
-             case XFS_TRANS_DQ_RES_BLKS:
-               qtrx->qt_blk_res += (ulong)delta;
-               break;
-
-               /*
-                * inode reservation
-                */
-             case XFS_TRANS_DQ_RES_INOS:
-               qtrx->qt_ino_res += (ulong)delta;
-               break;
-
-               /*
-                * disk blocks used.
-                */
-             case XFS_TRANS_DQ_BCOUNT:
-               if (qtrx->qt_blk_res && delta > 0) {
-                       qtrx->qt_blk_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
-               }
-               qtrx->qt_bcount_delta += delta;
-               break;
-
-             case XFS_TRANS_DQ_DELBCOUNT:
-               qtrx->qt_delbcnt_delta += delta;
-               break;
-
-               /*
-                * Inode Count
-                */
-             case XFS_TRANS_DQ_ICOUNT:
-               if (qtrx->qt_ino_res && delta > 0) {
-                       qtrx->qt_ino_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
-               }
-               qtrx->qt_icount_delta += delta;
-               break;
-
-               /*
-                * rtblk reservation
-                */
-             case XFS_TRANS_DQ_RES_RTBLKS:
-               qtrx->qt_rtblk_res += (ulong)delta;
-               break;
-
-               /*
-                * rtblk count
-                */
-             case XFS_TRANS_DQ_RTBCOUNT:
-               if (qtrx->qt_rtblk_res && delta > 0) {
-                       qtrx->qt_rtblk_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
-               }
-               qtrx->qt_rtbcount_delta += delta;
-               break;
-
-             case XFS_TRANS_DQ_DELRTBCOUNT:
-               qtrx->qt_delrtb_delta += delta;
-               break;
-
-             default:
-               ASSERT(0);
-       }
-       tp->t_flags |= XFS_TRANS_DQ_DIRTY;
-}
-
-
-/*
- * Given an array of dqtrx structures, lock all the dquots associated
- * and join them to the transaction, provided they have been modified.
- * We know that the highest number of dquots (of one type - usr OR grp),
- * involved in a transaction is 2 and that both usr and grp combined - 3.
- * So, we don't attempt to make this very generic.
- */
-STATIC void
-xfs_trans_dqlockedjoin(
-       xfs_trans_t     *tp,
-       xfs_dqtrx_t     *q)
-{
-       ASSERT(q[0].qt_dquot != NULL);
-       if (q[1].qt_dquot == NULL) {
-               xfs_dqlock(q[0].qt_dquot);
-               xfs_trans_dqjoin(tp, q[0].qt_dquot);
-       } else {
-               ASSERT(XFS_QM_TRANS_MAXDQS == 2);
-               xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
-               xfs_trans_dqjoin(tp, q[0].qt_dquot);
-               xfs_trans_dqjoin(tp, q[1].qt_dquot);
-       }
-}
-
-
-/*
- * Called by xfs_trans_commit() and similar in spirit to
- * xfs_trans_apply_sb_deltas().
- * Go thru all the dquots belonging to this transaction and modify the
- * INCORE dquot to reflect the actual usages.
- * Unreserve just the reservations done by this transaction.
- * dquot is still left locked at exit.
- */
-void
-xfs_trans_apply_dquot_deltas(
-       xfs_trans_t             *tp)
-{
-       int                     i, j;
-       xfs_dquot_t             *dqp;
-       xfs_dqtrx_t             *qtrx, *qa;
-       xfs_disk_dquot_t        *d;
-       long                    totalbdelta;
-       long                    totalrtbdelta;
-
-       if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
-               return;
-
-       ASSERT(tp->t_dqinfo);
-       qa = tp->t_dqinfo->dqa_usrdquots;
-       for (j = 0; j < 2; j++) {
-               if (qa[0].qt_dquot == NULL) {
-                       qa = tp->t_dqinfo->dqa_grpdquots;
-                       continue;
-               }
-
-               /*
-                * Lock all of the dquots and join them to the transaction.
-                */
-               xfs_trans_dqlockedjoin(tp, qa);
-
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       qtrx = &qa[i];
-                       /*
-                        * The array of dquots is filled
-                        * sequentially, not sparsely.
-                        */
-                       if ((dqp = qtrx->qt_dquot) == NULL)
-                               break;
-
-                       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-                       ASSERT(dqp->q_transp == tp);
-
-                       /*
-                        * adjust the actual number of blocks used
-                        */
-                       d = &dqp->q_core;
-
-                       /*
-                        * The issue here is - sometimes we don't make a blkquota
-                        * reservation intentionally to be fair to users
-                        * (when the amount is small). On the other hand,
-                        * delayed allocs do make reservations, but that's
-                        * outside of a transaction, so we have no
-                        * idea how much was really reserved.
-                        * So, here we've accumulated delayed allocation blks and
-                        * non-delay blks. The assumption is that the
-                        * delayed ones are always reserved (outside of a
-                        * transaction), and the others may or may not have
-                        * quota reservations.
-                        */
-                       totalbdelta = qtrx->qt_bcount_delta +
-                               qtrx->qt_delbcnt_delta;
-                       totalrtbdelta = qtrx->qt_rtbcount_delta +
-                               qtrx->qt_delrtb_delta;
-#ifdef DEBUG
-                       if (totalbdelta < 0)
-                               ASSERT(be64_to_cpu(d->d_bcount) >=
-                                      -totalbdelta);
-
-                       if (totalrtbdelta < 0)
-                               ASSERT(be64_to_cpu(d->d_rtbcount) >=
-                                      -totalrtbdelta);
-
-                       if (qtrx->qt_icount_delta < 0)
-                               ASSERT(be64_to_cpu(d->d_icount) >=
-                                      -qtrx->qt_icount_delta);
-#endif
-                       if (totalbdelta)
-                               be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
-
-                       if (qtrx->qt_icount_delta)
-                               be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
-
-                       if (totalrtbdelta)
-                               be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
-
-                       /*
-                        * Get any default limits in use.
-                        * Start/reset the timer(s) if needed.
-                        */
-                       if (d->d_id) {
-                               xfs_qm_adjust_dqlimits(tp->t_mountp, d);
-                               xfs_qm_adjust_dqtimers(tp->t_mountp, d);
-                       }
-
-                       dqp->dq_flags |= XFS_DQ_DIRTY;
-                       /*
-                        * add this to the list of items to get logged
-                        */
-                       xfs_trans_log_dquot(tp, dqp);
-                       /*
-                        * Take off what's left of the original reservation.
-                        * In case of delayed allocations, there's no
-                        * reservation that a transaction structure knows of.
-                        */
-                       if (qtrx->qt_blk_res != 0) {
-                               if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
-                                       if (qtrx->qt_blk_res >
-                                           qtrx->qt_blk_res_used)
-                                               dqp->q_res_bcount -= (xfs_qcnt_t)
-                                                       (qtrx->qt_blk_res -
-                                                        qtrx->qt_blk_res_used);
-                                       else
-                                               dqp->q_res_bcount -= (xfs_qcnt_t)
-                                                       (qtrx->qt_blk_res_used -
-                                                        qtrx->qt_blk_res);
-                               }
-                       } else {
-                               /*
-                                * These blks were never reserved, either inside
-                                * a transaction or outside one (in a delayed
-                                * allocation). Also, this isn't always a
-                                * negative number since we sometimes
-                                * deliberately skip quota reservations.
-                                */
-                               if (qtrx->qt_bcount_delta) {
-                                       dqp->q_res_bcount +=
-                                             (xfs_qcnt_t)qtrx->qt_bcount_delta;
-                               }
-                       }
-                       /*
-                        * Adjust the RT reservation.
-                        */
-                       if (qtrx->qt_rtblk_res != 0) {
-                               if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
-                                       if (qtrx->qt_rtblk_res >
-                                           qtrx->qt_rtblk_res_used)
-                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
-                                                      (qtrx->qt_rtblk_res -
-                                                       qtrx->qt_rtblk_res_used);
-                                       else
-                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
-                                                      (qtrx->qt_rtblk_res_used -
-                                                       qtrx->qt_rtblk_res);
-                               }
-                       } else {
-                               if (qtrx->qt_rtbcount_delta)
-                                       dqp->q_res_rtbcount +=
-                                           (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
-                       }
-
-                       /*
-                        * Adjust the inode reservation.
-                        */
-                       if (qtrx->qt_ino_res != 0) {
-                               ASSERT(qtrx->qt_ino_res >=
-                                      qtrx->qt_ino_res_used);
-                               if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
-                                       dqp->q_res_icount -= (xfs_qcnt_t)
-                                               (qtrx->qt_ino_res -
-                                                qtrx->qt_ino_res_used);
-                       } else {
-                               if (qtrx->qt_icount_delta)
-                                       dqp->q_res_icount +=
-                                           (xfs_qcnt_t)qtrx->qt_icount_delta;
-                       }
-
-                       ASSERT(dqp->q_res_bcount >=
-                               be64_to_cpu(dqp->q_core.d_bcount));
-                       ASSERT(dqp->q_res_icount >=
-                               be64_to_cpu(dqp->q_core.d_icount));
-                       ASSERT(dqp->q_res_rtbcount >=
-                               be64_to_cpu(dqp->q_core.d_rtbcount));
-               }
-               /*
-                * Do the group quotas next
-                */
-               qa = tp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-/*
- * Release the reservations, and adjust the dquots accordingly.
- * This is called only when the transaction is being aborted. If by
- * any chance we have done dquot modifications incore (ie. deltas) already,
- * we simply throw those away, since that's the expected behavior
- * when a transaction is curtailed without a commit.
- */
-void
-xfs_trans_unreserve_and_mod_dquots(
-       xfs_trans_t             *tp)
-{
-       int                     i, j;
-       xfs_dquot_t             *dqp;
-       xfs_dqtrx_t             *qtrx, *qa;
-       boolean_t               locked;
-
-       if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
-               return;
-
-       qa = tp->t_dqinfo->dqa_usrdquots;
-
-       for (j = 0; j < 2; j++) {
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       qtrx = &qa[i];
-                       /*
-                        * We assume that the array of dquots is filled
-                        * sequentially, not sparsely.
-                        */
-                       if ((dqp = qtrx->qt_dquot) == NULL)
-                               break;
-                       /*
-                        * Unreserve the original reservation. We don't care
-                        * about the number of blocks used field, or deltas.
-                        * Also we don't bother to zero the fields.
-                        */
-                       locked = B_FALSE;
-                       if (qtrx->qt_blk_res) {
-                               xfs_dqlock(dqp);
-                               locked = B_TRUE;
-                               dqp->q_res_bcount -=
-                                       (xfs_qcnt_t)qtrx->qt_blk_res;
-                       }
-                       if (qtrx->qt_ino_res) {
-                               if (!locked) {
-                                       xfs_dqlock(dqp);
-                                       locked = B_TRUE;
-                               }
-                               dqp->q_res_icount -=
-                                       (xfs_qcnt_t)qtrx->qt_ino_res;
-                       }
-
-                       if (qtrx->qt_rtblk_res) {
-                               if (!locked) {
-                                       xfs_dqlock(dqp);
-                                       locked = B_TRUE;
-                               }
-                               dqp->q_res_rtbcount -=
-                                       (xfs_qcnt_t)qtrx->qt_rtblk_res;
-                       }
-                       if (locked)
-                               xfs_dqunlock(dqp);
-
-               }
-               qa = tp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-STATIC void
-xfs_quota_warn(
-       struct xfs_mount        *mp,
-       struct xfs_dquot        *dqp,
-       int                     type)
-{
-       /* no warnings for project quotas - we just return ENOSPC later */
-       if (dqp->dq_flags & XFS_DQ_PROJ)
-               return;
-       quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
-                          be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
-                          type);
-}
-
-/*
- * This reserves disk blocks and inodes against a dquot.
- * Flags indicate if the dquot is to be locked here and also
- * if the blk reservation is for RT or regular blocks.
- * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
- */
-STATIC int
-xfs_trans_dqresv(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *dqp,
-       long            nblks,
-       long            ninos,
-       uint            flags)
-{
-       xfs_qcnt_t      hardlimit;
-       xfs_qcnt_t      softlimit;
-       time_t          timer;
-       xfs_qwarncnt_t  warns;
-       xfs_qwarncnt_t  warnlimit;
-       xfs_qcnt_t      count;
-       xfs_qcnt_t      *resbcountp;
-       xfs_quotainfo_t *q = mp->m_quotainfo;
-
-
-       xfs_dqlock(dqp);
-
-       if (flags & XFS_TRANS_DQ_RES_BLKS) {
-               hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
-               if (!hardlimit)
-                       hardlimit = q->qi_bhardlimit;
-               softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
-               if (!softlimit)
-                       softlimit = q->qi_bsoftlimit;
-               timer = be32_to_cpu(dqp->q_core.d_btimer);
-               warns = be16_to_cpu(dqp->q_core.d_bwarns);
-               warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
-               resbcountp = &dqp->q_res_bcount;
-       } else {
-               ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
-               hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
-               if (!hardlimit)
-                       hardlimit = q->qi_rtbhardlimit;
-               softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
-               if (!softlimit)
-                       softlimit = q->qi_rtbsoftlimit;
-               timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
-               warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
-               warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
-               resbcountp = &dqp->q_res_rtbcount;
-       }
-
-       if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
-           dqp->q_core.d_id &&
-           ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
-            (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
-             (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
-               if (nblks > 0) {
-                       /*
-                        * dquot is locked already. See if we'd go over the
-                        * hardlimit or exceed the timelimit if we allocate
-                        * nblks.
-                        */
-                       if (hardlimit > 0ULL &&
-                           hardlimit <= nblks + *resbcountp) {
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
-                               goto error_return;
-                       }
-                       if (softlimit > 0ULL &&
-                           softlimit <= nblks + *resbcountp) {
-                               if ((timer != 0 && get_seconds() > timer) ||
-                                   (warns != 0 && warns >= warnlimit)) {
-                                       xfs_quota_warn(mp, dqp,
-                                                      QUOTA_NL_BSOFTLONGWARN);
-                                       goto error_return;
-                               }
-
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
-                       }
-               }
-               if (ninos > 0) {
-                       count = be64_to_cpu(dqp->q_core.d_icount);
-                       timer = be32_to_cpu(dqp->q_core.d_itimer);
-                       warns = be16_to_cpu(dqp->q_core.d_iwarns);
-                       warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
-                       hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
-                       if (!hardlimit)
-                               hardlimit = q->qi_ihardlimit;
-                       softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
-                       if (!softlimit)
-                               softlimit = q->qi_isoftlimit;
-
-                       if (hardlimit > 0ULL && count >= hardlimit) {
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
-                               goto error_return;
-                       }
-                       if (softlimit > 0ULL && count >= softlimit) {
-                               if  ((timer != 0 && get_seconds() > timer) ||
-                                    (warns != 0 && warns >= warnlimit)) {
-                                       xfs_quota_warn(mp, dqp,
-                                                      QUOTA_NL_ISOFTLONGWARN);
-                                       goto error_return;
-                               }
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
-                       }
-               }
-       }
-
-       /*
-        * Change the reservation, but not the actual usage.
-        * Note that q_res_bcount = q_core.d_bcount + resv
-        */
-       (*resbcountp) += (xfs_qcnt_t)nblks;
-       if (ninos != 0)
-               dqp->q_res_icount += (xfs_qcnt_t)ninos;
-
-       /*
-        * note the reservation amt in the trans struct too,
-        * so that the transaction knows how much was reserved by
-        * it against this particular dquot.
-        * We don't do this when we are reserving for a delayed allocation,
-        * because we don't have the luxury of a transaction envelope then.
-        */
-       if (tp) {
-               ASSERT(tp->t_dqinfo);
-               ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-               if (nblks != 0)
-                       xfs_trans_mod_dquot(tp, dqp,
-                                           flags & XFS_QMOPT_RESBLK_MASK,
-                                           nblks);
-               if (ninos != 0)
-                       xfs_trans_mod_dquot(tp, dqp,
-                                           XFS_TRANS_DQ_RES_INOS,
-                                           ninos);
-       }
-       ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
-       ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
-       ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
-
-       xfs_dqunlock(dqp);
-       return 0;
-
-error_return:
-       xfs_dqunlock(dqp);
-       if (flags & XFS_QMOPT_ENOSPC)
-               return ENOSPC;
-       return EDQUOT;
-}
-
-
-/*
- * Given dquot(s), make disk block and/or inode reservations against them.
- * The fact that this does the reservation against both the usr and
- * grp/prj quotas is important, because this follows a both-or-nothing
- * approach.
- *
- * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
- *        XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
- *        XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
- *        XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
- * dquots are unlocked on return, if they were not locked by caller.
- */
-int
-xfs_trans_reserve_quota_bydquots(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *udqp,
-       xfs_dquot_t     *gdqp,
-       long            nblks,
-       long            ninos,
-       uint            flags)
-{
-       int             resvd = 0, error;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       if (tp && tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-
-       ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-
-       if (udqp) {
-               error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
-                                       (flags & ~XFS_QMOPT_ENOSPC));
-               if (error)
-                       return error;
-               resvd = 1;
-       }
-
-       if (gdqp) {
-               error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
-               if (error) {
-                       /*
-                        * can't do it, so backout previous reservation
-                        */
-                       if (resvd) {
-                               flags |= XFS_QMOPT_FORCE_RES;
-                               xfs_trans_dqresv(tp, mp, udqp,
-                                                -nblks, -ninos, flags);
-                       }
-                       return error;
-               }
-       }
-
-       /*
-        * Didn't change anything critical, so, no need to log
-        */
-       return 0;
-}
-
-
-/*
- * Lock the dquot and change the reservation if we can.
- * This doesn't change the actual usage, just the reservation.
- * The inode sent in is locked.
- */
-int
-xfs_trans_reserve_quota_nblks(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       long                    nblks,
-       long                    ninos,
-       uint                    flags)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-       if (XFS_IS_PQUOTA_ON(mp))
-               flags |= XFS_QMOPT_ENOSPC;
-
-       ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
-       ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-                               XFS_TRANS_DQ_RES_RTBLKS ||
-              (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-                               XFS_TRANS_DQ_RES_BLKS);
-
-       /*
-        * Reserve nblks against these dquots, with trans as the mediator.
-        */
-       return xfs_trans_reserve_quota_bydquots(tp, mp,
-                                               ip->i_udquot, ip->i_gdquot,
-                                               nblks, ninos, flags);
-}
-
-/*
- * This routine is called to allocate a quotaoff log item.
- */
-xfs_qoff_logitem_t *
-xfs_trans_get_qoff_item(
-       xfs_trans_t             *tp,
-       xfs_qoff_logitem_t      *startqoff,
-       uint                    flags)
-{
-       xfs_qoff_logitem_t      *q;
-
-       ASSERT(tp != NULL);
-
-       q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
-       ASSERT(q != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &q->qql_item);
-       return q;
-}
-
-
-/*
- * This is called to mark the quotaoff logitem as needing
- * to be logged when the transaction is committed.  The logitem must
- * already be associated with the given transaction.
- */
-void
-xfs_trans_log_quotaoff_item(
-       xfs_trans_t             *tp,
-       xfs_qoff_logitem_t      *qlp)
-{
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-STATIC void
-xfs_trans_alloc_dqinfo(
-       xfs_trans_t     *tp)
-{
-       tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
-}
-
-void
-xfs_trans_free_dqinfo(
-       xfs_trans_t     *tp)
-{
-       if (!tp->t_dqinfo)
-               return;
-       kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
-       tp->t_dqinfo = NULL;
-}
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
deleted file mode 100644 (file)
index b83f76b..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
-       __be32  uu_timelow;
-       __be16  uu_timemid;
-       __be16  uu_timehi;
-       __be16  uu_clockseq;
-       __be16  uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
-       xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
-       fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-                  be16_to_cpu(uup->uu_timemid);
-       fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
-       int     i;
-       char    *cp = (char *)uuid;
-
-       if (uuid == NULL)
-               return 0;
-       /* implied check of version number here... */
-       for (i = 0; i < sizeof *uuid; i++)
-               if (*cp++) return 0;    /* not nil */
-       return 1;       /* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
-       return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
deleted file mode 100644 (file)
index 4732d71..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
-       unsigned char   __u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/time.h b/fs/xfs/time.h
new file mode 100644 (file)
index 0000000..387e695
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+
+#include <linux/sched.h>
+#include <linux/time.h>
+
+typedef struct timespec timespec_t;
+
+static inline void delay(long ticks)
+{
+       schedule_timeout_uninterruptible(ticks);
+}
+
+static inline void nanotime(struct timespec *tvp)
+{
+       *tvp = CURRENT_TIME;
+}
+
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
new file mode 100644 (file)
index 0000000..b83f76b
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <xfs.h>
+
+/* IRIX interpretation of an uuid_t */
+typedef struct {
+       __be32  uu_timelow;
+       __be16  uu_timemid;
+       __be16  uu_timehi;
+       __be16  uu_clockseq;
+       __be16  uu_node[3];
+} xfs_uu_t;
+
+/*
+ * uuid_getnodeuniq - obtain the node unique fields of a UUID.
+ *
+ * This is not in any way a standard or condoned UUID function;
+ * it just something that's needed for user-level file handles.
+ */
+void
+uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
+{
+       xfs_uu_t *uup = (xfs_uu_t *)uuid;
+
+       fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
+                  be16_to_cpu(uup->uu_timemid);
+       fsid[1] = be32_to_cpu(uup->uu_timelow);
+}
+
+int
+uuid_is_nil(uuid_t *uuid)
+{
+       int     i;
+       char    *cp = (char *)uuid;
+
+       if (uuid == NULL)
+               return 0;
+       /* implied check of version number here... */
+       for (i = 0; i < sizeof *uuid; i++)
+               if (*cp++) return 0;    /* not nil */
+       return 1;       /* is nil */
+}
+
+int
+uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
+{
+       return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
+}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
new file mode 100644 (file)
index 0000000..4732d71
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_UUID_H__
+#define __XFS_SUPPORT_UUID_H__
+
+typedef struct {
+       unsigned char   __u_bits[16];
+} uuid_t;
+
+extern int uuid_is_nil(uuid_t *uuid);
+extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
+extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
+
+#endif /* __XFS_SUPPORT_UUID_H__ */
index 53ec3ea9a625f40e36a16d7950e624e198836ee7..d8b11b7f94aae5bf7f591037d1fc07eaf79e7c25 100644 (file)
@@ -24,5 +24,6 @@
 #define XFS_BUF_LOCK_TRACKING 1
 #endif
 
-#include <linux-2.6/xfs_linux.h>
+#include "xfs_linux.h"
+
 #endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
new file mode 100644 (file)
index 0000000..b6c4b37
--- /dev/null
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include "xfs_trace.h"
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+/*
+ * Locking scheme:
+ *  - all ACL updates are protected by inode->i_mutex, which is taken before
+ *    calling into this file.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+       struct posix_acl_entry *acl_e;
+       struct posix_acl *acl;
+       struct xfs_acl_entry *ace;
+       int count, i;
+
+       count = be32_to_cpu(aclp->acl_cnt);
+
+       acl = posix_acl_alloc(count, GFP_KERNEL);
+       if (!acl)
+               return ERR_PTR(-ENOMEM);
+
+       for (i = 0; i < count; i++) {
+               acl_e = &acl->a_entries[i];
+               ace = &aclp->acl_entry[i];
+
+               /*
+                * The tag is 32 bits on disk and 16 bits in core.
+                *
+                * Because every access to it goes through the core
+                * format first this is not a problem.
+                */
+               acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+               acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+               switch (acl_e->e_tag) {
+               case ACL_USER:
+               case ACL_GROUP:
+                       acl_e->e_id = be32_to_cpu(ace->ae_id);
+                       break;
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       acl_e->e_id = ACL_UNDEFINED_ID;
+                       break;
+               default:
+                       goto fail;
+               }
+       }
+       return acl;
+
+fail:
+       posix_acl_release(acl);
+       return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+       const struct posix_acl_entry *acl_e;
+       struct xfs_acl_entry *ace;
+       int i;
+
+       aclp->acl_cnt = cpu_to_be32(acl->a_count);
+       for (i = 0; i < acl->a_count; i++) {
+               ace = &aclp->acl_entry[i];
+               acl_e = &acl->a_entries[i];
+
+               ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+               ace->ae_id = cpu_to_be32(acl_e->e_id);
+               ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+       }
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       struct posix_acl *acl;
+       struct xfs_acl *xfs_acl;
+       int len = sizeof(struct xfs_acl);
+       unsigned char *ea_name;
+       int error;
+
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
+       trace_xfs_get_acl(ip);
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               ea_name = SGI_ACL_FILE;
+               break;
+       case ACL_TYPE_DEFAULT:
+               ea_name = SGI_ACL_DEFAULT;
+               break;
+       default:
+               BUG();
+       }
+
+       /*
+        * If we have a cached ACLs value just return it, not need to
+        * go out to the disk.
+        */
+
+       xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+       if (!xfs_acl)
+               return ERR_PTR(-ENOMEM);
+
+       error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
+                                                       &len, ATTR_ROOT);
+       if (error) {
+               /*
+                * If the attribute doesn't exist make sure we have a negative
+                * cache entry, for any other error assume it is transient and
+                * leave the cache entry as ACL_NOT_CACHED.
+                */
+               if (error == -ENOATTR) {
+                       acl = NULL;
+                       goto out_update_cache;
+               }
+               goto out;
+       }
+
+       acl = xfs_acl_from_disk(xfs_acl);
+       if (IS_ERR(acl))
+               goto out;
+
+ out_update_cache:
+       set_cached_acl(inode, type, acl);
+ out:
+       kfree(xfs_acl);
+       return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       unsigned char *ea_name;
+       int error;
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               ea_name = SGI_ACL_FILE;
+               break;
+       case ACL_TYPE_DEFAULT:
+               if (!S_ISDIR(inode->i_mode))
+                       return acl ? -EACCES : 0;
+               ea_name = SGI_ACL_DEFAULT;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (acl) {
+               struct xfs_acl *xfs_acl;
+               int len;
+
+               xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+               if (!xfs_acl)
+                       return -ENOMEM;
+
+               xfs_acl_to_disk(xfs_acl, acl);
+               len = sizeof(struct xfs_acl) -
+                       (sizeof(struct xfs_acl_entry) *
+                        (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+               error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
+                               len, ATTR_ROOT);
+
+               kfree(xfs_acl);
+       } else {
+               /*
+                * A NULL ACL argument means we want to remove the ACL.
+                */
+               error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+               /*
+                * If the attribute didn't exist to start with that's fine.
+                */
+               if (error == -ENOATTR)
+                       error = 0;
+       }
+
+       if (!error)
+               set_cached_acl(inode, type, acl);
+       return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, umode_t mode)
+{
+       int error = 0;
+
+       if (mode != inode->i_mode) {
+               struct iattr iattr;
+
+               iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
+               iattr.ia_mode = mode;
+               iattr.ia_ctime = current_fs_time(inode->i_sb);
+
+               error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+       }
+
+       return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, unsigned char *name)
+{
+       int len = sizeof(struct xfs_acl);
+
+       return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+                           ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+       return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+       if (!S_ISDIR(inode->i_mode))
+               return 0;
+       return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
+{
+       umode_t mode = inode->i_mode;
+       int error = 0, inherit = 0;
+
+       if (S_ISDIR(inode->i_mode)) {
+               error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+               if (error)
+                       goto out;
+       }
+
+       error = posix_acl_create(&acl, GFP_KERNEL, &mode);
+       if (error < 0)
+               return error;
+
+       /*
+        * If posix_acl_create returns a positive value we need to
+        * inherit a permission that can't be represented using the Unix
+        * mode bits and we actually need to set an ACL.
+        */
+       if (error > 0)
+               inherit = 1;
+
+       error = xfs_set_mode(inode, mode);
+       if (error)
+               goto out;
+
+       if (inherit)
+               error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+
+out:
+       posix_acl_release(acl);
+       return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+       struct posix_acl *acl;
+       int error;
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+       if (IS_ERR(acl) || !acl)
+               return PTR_ERR(acl);
+
+       error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+       if (error)
+               return error;
+
+       error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+       posix_acl_release(acl);
+       return error;
+}
+
+static int
+xfs_xattr_acl_get(struct dentry *dentry, const char *name,
+               void *value, size_t size, int type)
+{
+       struct posix_acl *acl;
+       int error;
+
+       acl = xfs_get_acl(dentry->d_inode, type);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl == NULL)
+               return -ENODATA;
+
+       error = posix_acl_to_xattr(acl, value, size);
+       posix_acl_release(acl);
+
+       return error;
+}
+
+static int
+xfs_xattr_acl_set(struct dentry *dentry, const char *name,
+               const void *value, size_t size, int flags, int type)
+{
+       struct inode *inode = dentry->d_inode;
+       struct posix_acl *acl = NULL;
+       int error = 0;
+
+       if (flags & XATTR_CREATE)
+               return -EINVAL;
+       if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+               return value ? -EACCES : 0;
+       if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+               return -EPERM;
+
+       if (!value)
+               goto set_acl;
+
+       acl = posix_acl_from_xattr(value, size);
+       if (!acl) {
+               /*
+                * acl_set_file(3) may request that we set default ACLs with
+                * zero length -- defend (gracefully) against that here.
+                */
+               goto out;
+       }
+       if (IS_ERR(acl)) {
+               error = PTR_ERR(acl);
+               goto out;
+       }
+
+       error = posix_acl_valid(acl);
+       if (error)
+               goto out_release;
+
+       error = -EINVAL;
+       if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+               goto out_release;
+
+       if (type == ACL_TYPE_ACCESS) {
+               umode_t mode = inode->i_mode;
+               error = posix_acl_equiv_mode(acl, &mode);
+
+               if (error <= 0) {
+                       posix_acl_release(acl);
+                       acl = NULL;
+
+                       if (error < 0)
+                               return error;
+               }
+
+               error = xfs_set_mode(inode, mode);
+               if (error)
+                       goto out_release;
+       }
+
+ set_acl:
+       error = xfs_set_acl(inode, type, acl);
+ out_release:
+       posix_acl_release(acl);
+ out:
+       return error;
+}
+
+const struct xattr_handler xfs_xattr_acl_access_handler = {
+       .prefix = POSIX_ACL_XATTR_ACCESS,
+       .flags  = ACL_TYPE_ACCESS,
+       .get    = xfs_xattr_acl_get,
+       .set    = xfs_xattr_acl_set,
+};
+
+const struct xattr_handler xfs_xattr_acl_default_handler = {
+       .prefix = POSIX_ACL_XATTR_DEFAULT,
+       .flags  = ACL_TYPE_DEFAULT,
+       .get    = xfs_xattr_acl_get,
+       .set    = xfs_xattr_acl_set,
+};
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
new file mode 100644 (file)
index 0000000..63e971e
--- /dev/null
@@ -0,0 +1,1499 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_iomap.h"
+#include "xfs_vnodeops.h"
+#include "xfs_trace.h"
+#include "xfs_bmap.h"
+#include <linux/gfp.h>
+#include <linux/mpage.h>
+#include <linux/pagevec.h>
+#include <linux/writeback.h>
+
+
+/*
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC         37
+#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
+static wait_queue_head_t xfs_ioend_wq[NVSYNC];
+
+void __init
+xfs_ioend_init(void)
+{
+       int i;
+
+       for (i = 0; i < NVSYNC; i++)
+               init_waitqueue_head(&xfs_ioend_wq[i]);
+}
+
+void
+xfs_ioend_wait(
+       xfs_inode_t     *ip)
+{
+       wait_queue_head_t *wq = to_ioend_wq(ip);
+
+       wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
+}
+
+STATIC void
+xfs_ioend_wake(
+       xfs_inode_t     *ip)
+{
+       if (atomic_dec_and_test(&ip->i_iocount))
+               wake_up(to_ioend_wq(ip));
+}
+
+void
+xfs_count_page_state(
+       struct page             *page,
+       int                     *delalloc,
+       int                     *unwritten)
+{
+       struct buffer_head      *bh, *head;
+
+       *delalloc = *unwritten = 0;
+
+       bh = head = page_buffers(page);
+       do {
+               if (buffer_unwritten(bh))
+                       (*unwritten) = 1;
+               else if (buffer_delay(bh))
+                       (*delalloc) = 1;
+       } while ((bh = bh->b_this_page) != head);
+}
+
+STATIC struct block_device *
+xfs_find_bdev_for_inode(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+
+       if (XFS_IS_REALTIME_INODE(ip))
+               return mp->m_rtdev_targp->bt_bdev;
+       else
+               return mp->m_ddev_targp->bt_bdev;
+}
+
+/*
+ * We're now finished for good with this ioend structure.
+ * Update the page state via the associated buffer_heads,
+ * release holds on the inode and bio, and finally free
+ * up memory.  Do not use the ioend after this.
+ */
+STATIC void
+xfs_destroy_ioend(
+       xfs_ioend_t             *ioend)
+{
+       struct buffer_head      *bh, *next;
+       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
+
+       for (bh = ioend->io_buffer_head; bh; bh = next) {
+               next = bh->b_private;
+               bh->b_end_io(bh, !ioend->io_error);
+       }
+
+       /*
+        * Volume managers supporting multiple paths can send back ENODEV
+        * when the final path disappears.  In this case continuing to fill
+        * the page cache with dirty data which cannot be written out is
+        * evil, so prevent that.
+        */
+       if (unlikely(ioend->io_error == -ENODEV)) {
+               xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
+                                     __FILE__, __LINE__);
+       }
+
+       xfs_ioend_wake(ip);
+       mempool_free(ioend, xfs_ioend_pool);
+}
+
+/*
+ * If the end of the current ioend is beyond the current EOF,
+ * return the new EOF value, otherwise zero.
+ */
+STATIC xfs_fsize_t
+xfs_ioend_new_eof(
+       xfs_ioend_t             *ioend)
+{
+       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
+       xfs_fsize_t             isize;
+       xfs_fsize_t             bsize;
+
+       bsize = ioend->io_offset + ioend->io_size;
+       isize = MAX(ip->i_size, ip->i_new_size);
+       isize = MIN(isize, bsize);
+       return isize > ip->i_d.di_size ? isize : 0;
+}
+
+/*
+ * Update on-disk file size now that data has been written to disk.  The
+ * current in-memory file size is i_size.  If a write is beyond eof i_new_size
+ * will be the intended file size until i_size is updated.  If this write does
+ * not extend all the way to the valid file size then restrict this update to
+ * the end of the write.
+ *
+ * This function does not block as blocking on the inode lock in IO completion
+ * can lead to IO completion order dependency deadlocks.. If it can't get the
+ * inode ilock it will return EAGAIN. Callers must handle this.
+ */
+STATIC int
+xfs_setfilesize(
+       xfs_ioend_t             *ioend)
+{
+       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
+       xfs_fsize_t             isize;
+
+       if (unlikely(ioend->io_error))
+               return 0;
+
+       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+               return EAGAIN;
+
+       isize = xfs_ioend_new_eof(ioend);
+       if (isize) {
+               trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
+               ip->i_d.di_size = isize;
+               xfs_mark_inode_dirty(ip);
+       }
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return 0;
+}
+
+/*
+ * Schedule IO completion handling on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend(
+       struct xfs_ioend        *ioend)
+{
+       if (atomic_dec_and_test(&ioend->io_remaining)) {
+               if (ioend->io_type == IO_UNWRITTEN)
+                       queue_work(xfsconvertd_workqueue, &ioend->io_work);
+               else
+                       queue_work(xfsdatad_workqueue, &ioend->io_work);
+       }
+}
+
+/*
+ * IO write completion.
+ */
+STATIC void
+xfs_end_io(
+       struct work_struct *work)
+{
+       xfs_ioend_t     *ioend = container_of(work, xfs_ioend_t, io_work);
+       struct xfs_inode *ip = XFS_I(ioend->io_inode);
+       int             error = 0;
+
+       /*
+        * For unwritten extents we need to issue transactions to convert a
+        * range to normal written extens after the data I/O has finished.
+        */
+       if (ioend->io_type == IO_UNWRITTEN &&
+           likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
+
+               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
+                                                ioend->io_size);
+               if (error)
+                       ioend->io_error = error;
+       }
+
+       /*
+        * We might have to update the on-disk file size after extending
+        * writes.
+        */
+       error = xfs_setfilesize(ioend);
+       ASSERT(!error || error == EAGAIN);
+
+       /*
+        * If we didn't complete processing of the ioend, requeue it to the
+        * tail of the workqueue for another attempt later. Otherwise destroy
+        * it.
+        */
+       if (error == EAGAIN) {
+               atomic_inc(&ioend->io_remaining);
+               xfs_finish_ioend(ioend);
+               /* ensure we don't spin on blocked ioends */
+               delay(1);
+       } else {
+               if (ioend->io_iocb)
+                       aio_complete(ioend->io_iocb, ioend->io_result, 0);
+               xfs_destroy_ioend(ioend);
+       }
+}
+
+/*
+ * Call IO completion handling in caller context on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend_sync(
+       struct xfs_ioend        *ioend)
+{
+       if (atomic_dec_and_test(&ioend->io_remaining))
+               xfs_end_io(&ioend->io_work);
+}
+
+/*
+ * Allocate and initialise an IO completion structure.
+ * We need to track unwritten extent write completion here initially.
+ * We'll need to extend this for updating the ondisk inode size later
+ * (vs. incore size).
+ */
+STATIC xfs_ioend_t *
+xfs_alloc_ioend(
+       struct inode            *inode,
+       unsigned int            type)
+{
+       xfs_ioend_t             *ioend;
+
+       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
+
+       /*
+        * Set the count to 1 initially, which will prevent an I/O
+        * completion callback from happening before we have started
+        * all the I/O from calling the completion routine too early.
+        */
+       atomic_set(&ioend->io_remaining, 1);
+       ioend->io_error = 0;
+       ioend->io_list = NULL;
+       ioend->io_type = type;
+       ioend->io_inode = inode;
+       ioend->io_buffer_head = NULL;
+       ioend->io_buffer_tail = NULL;
+       atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
+       ioend->io_offset = 0;
+       ioend->io_size = 0;
+       ioend->io_iocb = NULL;
+       ioend->io_result = 0;
+
+       INIT_WORK(&ioend->io_work, xfs_end_io);
+       return ioend;
+}
+
+STATIC int
+xfs_map_blocks(
+       struct inode            *inode,
+       loff_t                  offset,
+       struct xfs_bmbt_irec    *imap,
+       int                     type,
+       int                     nonblocking)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       ssize_t                 count = 1 << inode->i_blkbits;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     error = 0;
+       int                     bmapi_flags = XFS_BMAPI_ENTIRE;
+       int                     nimaps = 1;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (type == IO_UNWRITTEN)
+               bmapi_flags |= XFS_BMAPI_IGSTATE;
+
+       if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+               if (nonblocking)
+                       return -XFS_ERROR(EAGAIN);
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+       }
+
+       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+              (ip->i_df.if_flags & XFS_IFEXTENTS));
+       ASSERT(offset <= mp->m_maxioffset);
+
+       if (offset + count > mp->m_maxioffset)
+               count = mp->m_maxioffset - offset;
+       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+                         bmapi_flags,  NULL, 0, imap, &nimaps, NULL);
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       if (error)
+               return -XFS_ERROR(error);
+
+       if (type == IO_DELALLOC &&
+           (!nimaps || isnullstartblock(imap->br_startblock))) {
+               error = xfs_iomap_write_allocate(ip, offset, count, imap);
+               if (!error)
+                       trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
+               return -XFS_ERROR(error);
+       }
+
+#ifdef DEBUG
+       if (type == IO_UNWRITTEN) {
+               ASSERT(nimaps);
+               ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+               ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+       }
+#endif
+       if (nimaps)
+               trace_xfs_map_blocks_found(ip, offset, count, type, imap);
+       return 0;
+}
+
+STATIC int
+xfs_imap_valid(
+       struct inode            *inode,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       offset >>= inode->i_blkbits;
+
+       return offset >= imap->br_startoff &&
+               offset < imap->br_startoff + imap->br_blockcount;
+}
+
+/*
+ * BIO completion handler for buffered IO.
+ */
+STATIC void
+xfs_end_bio(
+       struct bio              *bio,
+       int                     error)
+{
+       xfs_ioend_t             *ioend = bio->bi_private;
+
+       ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+       ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
+
+       /* Toss bio and pass work off to an xfsdatad thread */
+       bio->bi_private = NULL;
+       bio->bi_end_io = NULL;
+       bio_put(bio);
+
+       xfs_finish_ioend(ioend);
+}
+
+STATIC void
+xfs_submit_ioend_bio(
+       struct writeback_control *wbc,
+       xfs_ioend_t             *ioend,
+       struct bio              *bio)
+{
+       atomic_inc(&ioend->io_remaining);
+       bio->bi_private = ioend;
+       bio->bi_end_io = xfs_end_bio;
+
+       /*
+        * If the I/O is beyond EOF we mark the inode dirty immediately
+        * but don't update the inode size until I/O completion.
+        */
+       if (xfs_ioend_new_eof(ioend))
+               xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
+
+       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
+}
+
+STATIC struct bio *
+xfs_alloc_ioend_bio(
+       struct buffer_head      *bh)
+{
+       int                     nvecs = bio_get_nr_vecs(bh->b_bdev);
+       struct bio              *bio = bio_alloc(GFP_NOIO, nvecs);
+
+       ASSERT(bio->bi_private == NULL);
+       bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+       bio->bi_bdev = bh->b_bdev;
+       return bio;
+}
+
+STATIC void
+xfs_start_buffer_writeback(
+       struct buffer_head      *bh)
+{
+       ASSERT(buffer_mapped(bh));
+       ASSERT(buffer_locked(bh));
+       ASSERT(!buffer_delay(bh));
+       ASSERT(!buffer_unwritten(bh));
+
+       mark_buffer_async_write(bh);
+       set_buffer_uptodate(bh);
+       clear_buffer_dirty(bh);
+}
+
+STATIC void
+xfs_start_page_writeback(
+       struct page             *page,
+       int                     clear_dirty,
+       int                     buffers)
+{
+       ASSERT(PageLocked(page));
+       ASSERT(!PageWriteback(page));
+       if (clear_dirty)
+               clear_page_dirty_for_io(page);
+       set_page_writeback(page);
+       unlock_page(page);
+       /* If no buffers on the page are to be written, finish it here */
+       if (!buffers)
+               end_page_writeback(page);
+}
+
+static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+{
+       return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+}
+
+/*
+ * Submit all of the bios for all of the ioends we have saved up, covering the
+ * initial writepage page and also any probed pages.
+ *
+ * Because we may have multiple ioends spanning a page, we need to start
+ * writeback on all the buffers before we submit them for I/O. If we mark the
+ * buffers as we got, then we can end up with a page that only has buffers
+ * marked async write and I/O complete on can occur before we mark the other
+ * buffers async write.
+ *
+ * The end result of this is that we trip a bug in end_page_writeback() because
+ * we call it twice for the one page as the code in end_buffer_async_write()
+ * assumes that all buffers on the page are started at the same time.
+ *
+ * The fix is two passes across the ioend list - one to start writeback on the
+ * buffer_heads, and then submit them for I/O on the second pass.
+ */
+STATIC void
+xfs_submit_ioend(
+       struct writeback_control *wbc,
+       xfs_ioend_t             *ioend)
+{
+       xfs_ioend_t             *head = ioend;
+       xfs_ioend_t             *next;
+       struct buffer_head      *bh;
+       struct bio              *bio;
+       sector_t                lastblock = 0;
+
+       /* Pass 1 - start writeback */
+       do {
+               next = ioend->io_list;
+               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
+                       xfs_start_buffer_writeback(bh);
+       } while ((ioend = next) != NULL);
+
+       /* Pass 2 - submit I/O */
+       ioend = head;
+       do {
+               next = ioend->io_list;
+               bio = NULL;
+
+               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+
+                       if (!bio) {
+ retry:
+                               bio = xfs_alloc_ioend_bio(bh);
+                       } else if (bh->b_blocknr != lastblock + 1) {
+                               xfs_submit_ioend_bio(wbc, ioend, bio);
+                               goto retry;
+                       }
+
+                       if (bio_add_buffer(bio, bh) != bh->b_size) {
+                               xfs_submit_ioend_bio(wbc, ioend, bio);
+                               goto retry;
+                       }
+
+                       lastblock = bh->b_blocknr;
+               }
+               if (bio)
+                       xfs_submit_ioend_bio(wbc, ioend, bio);
+               xfs_finish_ioend(ioend);
+       } while ((ioend = next) != NULL);
+}
+
+/*
+ * Cancel submission of all buffer_heads so far in this endio.
+ * Toss the endio too.  Only ever called for the initial page
+ * in a writepage request, so only ever one page.
+ */
+STATIC void
+xfs_cancel_ioend(
+       xfs_ioend_t             *ioend)
+{
+       xfs_ioend_t             *next;
+       struct buffer_head      *bh, *next_bh;
+
+       do {
+               next = ioend->io_list;
+               bh = ioend->io_buffer_head;
+               do {
+                       next_bh = bh->b_private;
+                       clear_buffer_async_write(bh);
+                       unlock_buffer(bh);
+               } while ((bh = next_bh) != NULL);
+
+               xfs_ioend_wake(XFS_I(ioend->io_inode));
+               mempool_free(ioend, xfs_ioend_pool);
+       } while ((ioend = next) != NULL);
+}
+
+/*
+ * Test to see if we've been building up a completion structure for
+ * earlier buffers -- if so, we try to append to this ioend if we
+ * can, otherwise we finish off any current ioend and start another.
+ * Return true if we've finished the given ioend.
+ */
+STATIC void
+xfs_add_to_ioend(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       xfs_off_t               offset,
+       unsigned int            type,
+       xfs_ioend_t             **result,
+       int                     need_ioend)
+{
+       xfs_ioend_t             *ioend = *result;
+
+       if (!ioend || need_ioend || type != ioend->io_type) {
+               xfs_ioend_t     *previous = *result;
+
+               ioend = xfs_alloc_ioend(inode, type);
+               ioend->io_offset = offset;
+               ioend->io_buffer_head = bh;
+               ioend->io_buffer_tail = bh;
+               if (previous)
+                       previous->io_list = ioend;
+               *result = ioend;
+       } else {
+               ioend->io_buffer_tail->b_private = bh;
+               ioend->io_buffer_tail = bh;
+       }
+
+       bh->b_private = NULL;
+       ioend->io_size += bh->b_size;
+}
+
+STATIC void
+xfs_map_buffer(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       sector_t                bn;
+       struct xfs_mount        *m = XFS_I(inode)->i_mount;
+       xfs_off_t               iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
+       xfs_daddr_t             iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
+
+       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+       bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+             ((offset - iomap_offset) >> inode->i_blkbits);
+
+       ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
+
+       bh->b_blocknr = bn;
+       set_buffer_mapped(bh);
+}
+
+STATIC void
+xfs_map_at_offset(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+       xfs_map_buffer(inode, bh, imap, offset);
+       set_buffer_mapped(bh);
+       clear_buffer_delay(bh);
+       clear_buffer_unwritten(bh);
+}
+
+/*
+ * Test if a given page is suitable for writing as part of an unwritten
+ * or delayed allocate extent.
+ */
+STATIC int
+xfs_is_delayed_page(
+       struct page             *page,
+       unsigned int            type)
+{
+       if (PageWriteback(page))
+               return 0;
+
+       if (page->mapping && page_has_buffers(page)) {
+               struct buffer_head      *bh, *head;
+               int                     acceptable = 0;
+
+               bh = head = page_buffers(page);
+               do {
+                       if (buffer_unwritten(bh))
+                               acceptable = (type == IO_UNWRITTEN);
+                       else if (buffer_delay(bh))
+                               acceptable = (type == IO_DELALLOC);
+                       else if (buffer_dirty(bh) && buffer_mapped(bh))
+                               acceptable = (type == IO_OVERWRITE);
+                       else
+                               break;
+               } while ((bh = bh->b_this_page) != head);
+
+               if (acceptable)
+                       return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC int
+xfs_convert_page(
+       struct inode            *inode,
+       struct page             *page,
+       loff_t                  tindex,
+       struct xfs_bmbt_irec    *imap,
+       xfs_ioend_t             **ioendp,
+       struct writeback_control *wbc)
+{
+       struct buffer_head      *bh, *head;
+       xfs_off_t               end_offset;
+       unsigned long           p_offset;
+       unsigned int            type;
+       int                     len, page_dirty;
+       int                     count = 0, done = 0, uptodate = 1;
+       xfs_off_t               offset = page_offset(page);
+
+       if (page->index != tindex)
+               goto fail;
+       if (!trylock_page(page))
+               goto fail;
+       if (PageWriteback(page))
+               goto fail_unlock_page;
+       if (page->mapping != inode->i_mapping)
+               goto fail_unlock_page;
+       if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
+               goto fail_unlock_page;
+
+       /*
+        * page_dirty is initially a count of buffers on the page before
+        * EOF and is decremented as we move each into a cleanable state.
+        *
+        * Derivation:
+        *
+        * End offset is the highest offset that this page should represent.
+        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+        * hence give us the correct page_dirty count. On any other page,
+        * it will be zero and in that case we need page_dirty to be the
+        * count of buffers on the page.
+        */
+       end_offset = min_t(unsigned long long,
+                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                       i_size_read(inode));
+
+       len = 1 << inode->i_blkbits;
+       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
+                                       PAGE_CACHE_SIZE);
+       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+       page_dirty = p_offset / len;
+
+       bh = head = page_buffers(page);
+       do {
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+               if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+                       done = 1;
+                       continue;
+               }
+
+               if (buffer_unwritten(bh) || buffer_delay(bh) ||
+                   buffer_mapped(bh)) {
+                       if (buffer_unwritten(bh))
+                               type = IO_UNWRITTEN;
+                       else if (buffer_delay(bh))
+                               type = IO_DELALLOC;
+                       else
+                               type = IO_OVERWRITE;
+
+                       if (!xfs_imap_valid(inode, imap, offset)) {
+                               done = 1;
+                               continue;
+                       }
+
+                       lock_buffer(bh);
+                       if (type != IO_OVERWRITE)
+                               xfs_map_at_offset(inode, bh, imap, offset);
+                       xfs_add_to_ioend(inode, bh, offset, type,
+                                        ioendp, done);
+
+                       page_dirty--;
+                       count++;
+               } else {
+                       done = 1;
+               }
+       } while (offset += len, (bh = bh->b_this_page) != head);
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       if (count) {
+               if (--wbc->nr_to_write <= 0 &&
+                   wbc->sync_mode == WB_SYNC_NONE)
+                       done = 1;
+       }
+       xfs_start_page_writeback(page, !page_dirty, count);
+
+       return done;
+ fail_unlock_page:
+       unlock_page(page);
+ fail:
+       return 1;
+}
+
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+       struct inode            *inode,
+       pgoff_t                 tindex,
+       struct xfs_bmbt_irec    *imap,
+       xfs_ioend_t             **ioendp,
+       struct writeback_control *wbc,
+       pgoff_t                 tlast)
+{
+       struct pagevec          pvec;
+       int                     done = 0, i;
+
+       pagevec_init(&pvec, 0);
+       while (!done && tindex <= tlast) {
+               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+                       break;
+
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+                                       imap, ioendp, wbc);
+                       if (done)
+                               break;
+               }
+
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+}
+
+STATIC void
+xfs_vm_invalidatepage(
+       struct page             *page,
+       unsigned long           offset)
+{
+       trace_xfs_invalidatepage(page->mapping->host, page, offset);
+       block_invalidatepage(page, offset);
+}
+
+/*
+ * If the page has delalloc buffers on it, we need to punch them out before we
+ * invalidate the page. If we don't, we leave a stale delalloc mapping on the
+ * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
+ * is done on that same region - the delalloc extent is returned when none is
+ * supposed to be there.
+ *
+ * We prevent this by truncating away the delalloc regions on the page before
+ * invalidating it. Because they are delalloc, we can do this without needing a
+ * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
+ * truncation without a transaction as there is no space left for block
+ * reservation (typically why we see a ENOSPC in writeback).
+ *
+ * This is not a performance critical path, so for now just do the punching a
+ * buffer head at a time.
+ */
+STATIC void
+xfs_aops_discard_page(
+       struct page             *page)
+{
+       struct inode            *inode = page->mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct buffer_head      *bh, *head;
+       loff_t                  offset = page_offset(page);
+
+       if (!xfs_is_delayed_page(page, IO_DELALLOC))
+               goto out_invalidate;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               goto out_invalidate;
+
+       xfs_alert(ip->i_mount,
+               "page discard on page %p, inode 0x%llx, offset %llu.",
+                       page, ip->i_ino, offset);
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       bh = head = page_buffers(page);
+       do {
+               int             error;
+               xfs_fileoff_t   start_fsb;
+
+               if (!buffer_delay(bh))
+                       goto next_buffer;
+
+               start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+               error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
+               if (error) {
+                       /* something screwed, just bail */
+                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                               xfs_alert(ip->i_mount,
+                       "page discard unable to remove delalloc mapping.");
+                       }
+                       break;
+               }
+next_buffer:
+               offset += 1 << inode->i_blkbits;
+
+       } while ((bh = bh->b_this_page) != head);
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_invalidate:
+       xfs_vm_invalidatepage(page, 0);
+       return;
+}
+
+/*
+ * Write out a dirty page.
+ *
+ * For delalloc space on the page we need to allocate space and flush it.
+ * For unwritten space on the page we need to start the conversion to
+ * regular allocated space.
+ * For any other dirty buffer heads on the page we should flush them.
+ */
+STATIC int
+xfs_vm_writepage(
+       struct page             *page,
+       struct writeback_control *wbc)
+{
+       struct inode            *inode = page->mapping->host;
+       struct buffer_head      *bh, *head;
+       struct xfs_bmbt_irec    imap;
+       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
+       loff_t                  offset;
+       unsigned int            type;
+       __uint64_t              end_offset;
+       pgoff_t                 end_index, last_index;
+       ssize_t                 len;
+       int                     err, imap_valid = 0, uptodate = 1;
+       int                     count = 0;
+       int                     nonblocking = 0;
+
+       trace_xfs_writepage(inode, page, 0);
+
+       ASSERT(page_has_buffers(page));
+
+       /*
+        * Refuse to write the page out if we are called from reclaim context.
+        *
+        * This avoids stack overflows when called from deeply used stacks in
+        * random callers for direct reclaim or memcg reclaim.  We explicitly
+        * allow reclaim from kswapd as the stack usage there is relatively low.
+        *
+        * This should really be done by the core VM, but until that happens
+        * filesystems like XFS, btrfs and ext4 have to take care of this
+        * by themselves.
+        */
+       if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
+               goto redirty;
+
+       /*
+        * Given that we do not allow direct reclaim to call us, we should
+        * never be called while in a filesystem transaction.
+        */
+       if (WARN_ON(current->flags & PF_FSTRANS))
+               goto redirty;
+
+       /* Is this page beyond the end of the file? */
+       offset = i_size_read(inode);
+       end_index = offset >> PAGE_CACHE_SHIFT;
+       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
+       if (page->index >= end_index) {
+               if ((page->index >= end_index + 1) ||
+                   !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                       unlock_page(page);
+                       return 0;
+               }
+       }
+
+       end_offset = min_t(unsigned long long,
+                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                       offset);
+       len = 1 << inode->i_blkbits;
+
+       bh = head = page_buffers(page);
+       offset = page_offset(page);
+       type = IO_OVERWRITE;
+
+       if (wbc->sync_mode == WB_SYNC_NONE)
+               nonblocking = 1;
+
+       do {
+               int new_ioend = 0;
+
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+
+               /*
+                * set_page_dirty dirties all buffers in a page, independent
+                * of their state.  The dirty state however is entirely
+                * meaningless for holes (!mapped && uptodate), so skip
+                * buffers covering holes here.
+                */
+               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+                       imap_valid = 0;
+                       continue;
+               }
+
+               if (buffer_unwritten(bh)) {
+                       if (type != IO_UNWRITTEN) {
+                               type = IO_UNWRITTEN;
+                               imap_valid = 0;
+                       }
+               } else if (buffer_delay(bh)) {
+                       if (type != IO_DELALLOC) {
+                               type = IO_DELALLOC;
+                               imap_valid = 0;
+                       }
+               } else if (buffer_uptodate(bh)) {
+                       if (type != IO_OVERWRITE) {
+                               type = IO_OVERWRITE;
+                               imap_valid = 0;
+                       }
+               } else {
+                       if (PageUptodate(page)) {
+                               ASSERT(buffer_mapped(bh));
+                               imap_valid = 0;
+                       }
+                       continue;
+               }
+
+               if (imap_valid)
+                       imap_valid = xfs_imap_valid(inode, &imap, offset);
+               if (!imap_valid) {
+                       /*
+                        * If we didn't have a valid mapping then we need to
+                        * put the new mapping into a separate ioend structure.
+                        * This ensures non-contiguous extents always have
+                        * separate ioends, which is particularly important
+                        * for unwritten extent conversion at I/O completion
+                        * time.
+                        */
+                       new_ioend = 1;
+                       err = xfs_map_blocks(inode, offset, &imap, type,
+                                            nonblocking);
+                       if (err)
+                               goto error;
+                       imap_valid = xfs_imap_valid(inode, &imap, offset);
+               }
+               if (imap_valid) {
+                       lock_buffer(bh);
+                       if (type != IO_OVERWRITE)
+                               xfs_map_at_offset(inode, bh, &imap, offset);
+                       xfs_add_to_ioend(inode, bh, offset, type, &ioend,
+                                        new_ioend);
+                       count++;
+               }
+
+               if (!iohead)
+                       iohead = ioend;
+
+       } while (offset += len, ((bh = bh->b_this_page) != head));
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       xfs_start_page_writeback(page, 1, count);
+
+       if (ioend && imap_valid) {
+               xfs_off_t               end_index;
+
+               end_index = imap.br_startoff + imap.br_blockcount;
+
+               /* to bytes */
+               end_index <<= inode->i_blkbits;
+
+               /* to pages */
+               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
+
+               /* check against file size */
+               if (end_index > last_index)
+                       end_index = last_index;
+
+               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
+                                 wbc, end_index);
+       }
+
+       if (iohead)
+               xfs_submit_ioend(wbc, iohead);
+
+       return 0;
+
+error:
+       if (iohead)
+               xfs_cancel_ioend(iohead);
+
+       if (err == -EAGAIN)
+               goto redirty;
+
+       xfs_aops_discard_page(page);
+       ClearPageUptodate(page);
+       unlock_page(page);
+       return err;
+
+redirty:
+       redirty_page_for_writepage(wbc, page);
+       unlock_page(page);
+       return 0;
+}
+
+STATIC int
+xfs_vm_writepages(
+       struct address_space    *mapping,
+       struct writeback_control *wbc)
+{
+       xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+       return generic_writepages(mapping, wbc);
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. The page should already be clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 1 if the page is ok to release, 0 otherwise.
+ */
+STATIC int
+xfs_vm_releasepage(
+       struct page             *page,
+       gfp_t                   gfp_mask)
+{
+       int                     delalloc, unwritten;
+
+       trace_xfs_releasepage(page->mapping->host, page, 0);
+
+       xfs_count_page_state(page, &delalloc, &unwritten);
+
+       if (WARN_ON(delalloc))
+               return 0;
+       if (WARN_ON(unwritten))
+               return 0;
+
+       return try_to_free_buffers(page);
+}
+
+STATIC int
+__xfs_get_blocks(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create,
+       int                     direct)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     error = 0;
+       int                     lockmode = 0;
+       struct xfs_bmbt_irec    imap;
+       int                     nimaps = 1;
+       xfs_off_t               offset;
+       ssize_t                 size;
+       int                     new = 0;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       offset = (xfs_off_t)iblock << inode->i_blkbits;
+       ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+       size = bh_result->b_size;
+
+       if (!create && direct && offset >= i_size_read(inode))
+               return 0;
+
+       if (create) {
+               lockmode = XFS_ILOCK_EXCL;
+               xfs_ilock(ip, lockmode);
+       } else {
+               lockmode = xfs_ilock_map_shared(ip);
+       }
+
+       ASSERT(offset <= mp->m_maxioffset);
+       if (offset + size > mp->m_maxioffset)
+               size = mp->m_maxioffset - offset;
+       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
+       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+                         XFS_BMAPI_ENTIRE,  NULL, 0, &imap, &nimaps, NULL);
+       if (error)
+               goto out_unlock;
+
+       if (create &&
+           (!nimaps ||
+            (imap.br_startblock == HOLESTARTBLOCK ||
+             imap.br_startblock == DELAYSTARTBLOCK))) {
+               if (direct) {
+                       error = xfs_iomap_write_direct(ip, offset, size,
+                                                      &imap, nimaps);
+               } else {
+                       error = xfs_iomap_write_delay(ip, offset, size, &imap);
+               }
+               if (error)
+                       goto out_unlock;
+
+               trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+       } else if (nimaps) {
+               trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+       } else {
+               trace_xfs_get_blocks_notfound(ip, offset, size);
+               goto out_unlock;
+       }
+       xfs_iunlock(ip, lockmode);
+
+       if (imap.br_startblock != HOLESTARTBLOCK &&
+           imap.br_startblock != DELAYSTARTBLOCK) {
+               /*
+                * For unwritten extents do not report a disk address on
+                * the read case (treat as if we're reading into a hole).
+                */
+               if (create || !ISUNWRITTEN(&imap))
+                       xfs_map_buffer(inode, bh_result, &imap, offset);
+               if (create && ISUNWRITTEN(&imap)) {
+                       if (direct)
+                               bh_result->b_private = inode;
+                       set_buffer_unwritten(bh_result);
+               }
+       }
+
+       /*
+        * If this is a realtime file, data may be on a different device.
+        * to that pointed to from the buffer_head b_bdev currently.
+        */
+       bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
+
+       /*
+        * If we previously allocated a block out beyond eof and we are now
+        * coming back to use it then we will need to flag it as new even if it
+        * has a disk address.
+        *
+        * With sub-block writes into unwritten extents we also need to mark
+        * the buffer as new so that the unwritten parts of the buffer gets
+        * correctly zeroed.
+        */
+       if (create &&
+           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+            (offset >= i_size_read(inode)) ||
+            (new || ISUNWRITTEN(&imap))))
+               set_buffer_new(bh_result);
+
+       if (imap.br_startblock == DELAYSTARTBLOCK) {
+               BUG_ON(direct);
+               if (create) {
+                       set_buffer_uptodate(bh_result);
+                       set_buffer_mapped(bh_result);
+                       set_buffer_delay(bh_result);
+               }
+       }
+
+       /*
+        * If this is O_DIRECT or the mpage code calling tell them how large
+        * the mapping is, so that we can avoid repeated get_blocks calls.
+        */
+       if (direct || size > (1 << inode->i_blkbits)) {
+               xfs_off_t               mapping_size;
+
+               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
+               mapping_size <<= inode->i_blkbits;
+
+               ASSERT(mapping_size > 0);
+               if (mapping_size > size)
+                       mapping_size = size;
+               if (mapping_size > LONG_MAX)
+                       mapping_size = LONG_MAX;
+
+               bh_result->b_size = mapping_size;
+       }
+
+       return 0;
+
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+       return -error;
+}
+
+int
+xfs_get_blocks(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
+}
+
+STATIC int
+xfs_get_blocks_direct(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
+}
+
+/*
+ * Complete a direct I/O write request.
+ *
+ * If the private argument is non-NULL __xfs_get_blocks signals us that we
+ * need to issue a transaction to convert the range from unwritten to written
+ * extents.  In case this is regular synchronous I/O we just call xfs_end_io
+ * to do this and we are done.  But in case this was a successful AIO
+ * request this handler is called from interrupt context, from which we
+ * can't start transactions.  In that case offload the I/O completion to
+ * the workqueues we also use for buffered I/O completion.
+ */
+STATIC void
+xfs_end_io_direct_write(
+       struct kiocb            *iocb,
+       loff_t                  offset,
+       ssize_t                 size,
+       void                    *private,
+       int                     ret,
+       bool                    is_async)
+{
+       struct xfs_ioend        *ioend = iocb->private;
+
+       /*
+        * blockdev_direct_IO can return an error even after the I/O
+        * completion handler was called.  Thus we need to protect
+        * against double-freeing.
+        */
+       iocb->private = NULL;
+
+       ioend->io_offset = offset;
+       ioend->io_size = size;
+       if (private && size > 0)
+               ioend->io_type = IO_UNWRITTEN;
+
+       if (is_async) {
+               /*
+                * If we are converting an unwritten extent we need to delay
+                * the AIO completion until after the unwrittent extent
+                * conversion has completed, otherwise do it ASAP.
+                */
+               if (ioend->io_type == IO_UNWRITTEN) {
+                       ioend->io_iocb = iocb;
+                       ioend->io_result = ret;
+               } else {
+                       aio_complete(iocb, ret, 0);
+               }
+               xfs_finish_ioend(ioend);
+       } else {
+               xfs_finish_ioend_sync(ioend);
+       }
+
+       /* XXX: probably should move into the real I/O completion handler */
+       inode_dio_done(ioend->io_inode);
+}
+
+STATIC ssize_t
+xfs_vm_direct_IO(
+       int                     rw,
+       struct kiocb            *iocb,
+       const struct iovec      *iov,
+       loff_t                  offset,
+       unsigned long           nr_segs)
+{
+       struct inode            *inode = iocb->ki_filp->f_mapping->host;
+       struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
+       ssize_t                 ret;
+
+       if (rw & WRITE) {
+               iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
+
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+                                           offset, nr_segs,
+                                           xfs_get_blocks_direct,
+                                           xfs_end_io_direct_write, NULL, 0);
+               if (ret != -EIOCBQUEUED && iocb->private)
+                       xfs_destroy_ioend(iocb->private);
+       } else {
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+                                           offset, nr_segs,
+                                           xfs_get_blocks_direct,
+                                           NULL, NULL, 0);
+       }
+
+       return ret;
+}
+
+STATIC void
+xfs_vm_write_failed(
+       struct address_space    *mapping,
+       loff_t                  to)
+{
+       struct inode            *inode = mapping->host;
+
+       if (to > inode->i_size) {
+               /*
+                * punch out the delalloc blocks we have already allocated. We
+                * don't call xfs_setattr() to do this as we may be in the
+                * middle of a multi-iovec write and so the vfs inode->i_size
+                * will not match the xfs ip->i_size and so it will zero too
+                * much. Hence we jus truncate the page cache to zero what is
+                * necessary and punch the delalloc blocks directly.
+                */
+               struct xfs_inode        *ip = XFS_I(inode);
+               xfs_fileoff_t           start_fsb;
+               xfs_fileoff_t           end_fsb;
+               int                     error;
+
+               truncate_pagecache(inode, to, inode->i_size);
+
+               /*
+                * Check if there are any blocks that are outside of i_size
+                * that need to be trimmed back.
+                */
+               start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+               end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+               if (end_fsb <= start_fsb)
+                       return;
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+                                                       end_fsb - start_fsb);
+               if (error) {
+                       /* something screwed, just bail */
+                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                               xfs_alert(ip->i_mount,
+                       "xfs_vm_write_failed: unable to clean up ino %lld",
+                                               ip->i_ino);
+                       }
+               }
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+STATIC int
+xfs_vm_write_begin(
+       struct file             *file,
+       struct address_space    *mapping,
+       loff_t                  pos,
+       unsigned                len,
+       unsigned                flags,
+       struct page             **pagep,
+       void                    **fsdata)
+{
+       int                     ret;
+
+       ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
+                               pagep, xfs_get_blocks);
+       if (unlikely(ret))
+               xfs_vm_write_failed(mapping, pos + len);
+       return ret;
+}
+
+STATIC int
+xfs_vm_write_end(
+       struct file             *file,
+       struct address_space    *mapping,
+       loff_t                  pos,
+       unsigned                len,
+       unsigned                copied,
+       struct page             *page,
+       void                    *fsdata)
+{
+       int                     ret;
+
+       ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+       if (unlikely(ret < len))
+               xfs_vm_write_failed(mapping, pos + len);
+       return ret;
+}
+
+STATIC sector_t
+xfs_vm_bmap(
+       struct address_space    *mapping,
+       sector_t                block)
+{
+       struct inode            *inode = (struct inode *)mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       trace_xfs_vm_bmap(XFS_I(inode));
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+       return generic_block_bmap(mapping, block, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpage(
+       struct file             *unused,
+       struct page             *page)
+{
+       return mpage_readpage(page, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpages(
+       struct file             *unused,
+       struct address_space    *mapping,
+       struct list_head        *pages,
+       unsigned                nr_pages)
+{
+       return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
+}
+
+const struct address_space_operations xfs_address_space_operations = {
+       .readpage               = xfs_vm_readpage,
+       .readpages              = xfs_vm_readpages,
+       .writepage              = xfs_vm_writepage,
+       .writepages             = xfs_vm_writepages,
+       .releasepage            = xfs_vm_releasepage,
+       .invalidatepage         = xfs_vm_invalidatepage,
+       .write_begin            = xfs_vm_write_begin,
+       .write_end              = xfs_vm_write_end,
+       .bmap                   = xfs_vm_bmap,
+       .direct_IO              = xfs_vm_direct_IO,
+       .migratepage            = buffer_migrate_page,
+       .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
+};
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
new file mode 100644 (file)
index 0000000..71f721e
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2005-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_AOPS_H__
+#define __XFS_AOPS_H__
+
+extern struct workqueue_struct *xfsdatad_workqueue;
+extern struct workqueue_struct *xfsconvertd_workqueue;
+extern mempool_t *xfs_ioend_pool;
+
+/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+       IO_DIRECT = 0,  /* special case for direct I/O ioends */
+       IO_DELALLOC,    /* mapping covers delalloc region */
+       IO_UNWRITTEN,   /* mapping covers allocated but uninitialized data */
+       IO_OVERWRITE,   /* mapping covers already allocated extent */
+};
+
+#define XFS_IO_TYPES \
+       { 0,                    "" }, \
+       { IO_DELALLOC,          "delalloc" }, \
+       { IO_UNWRITTEN,         "unwritten" }, \
+       { IO_OVERWRITE,         "overwrite" }
+
+/*
+ * xfs_ioend struct manages large extent writes for XFS.
+ * It can manage several multi-page bio's at once.
+ */
+typedef struct xfs_ioend {
+       struct xfs_ioend        *io_list;       /* next ioend in chain */
+       unsigned int            io_type;        /* delalloc / unwritten */
+       int                     io_error;       /* I/O error code */
+       atomic_t                io_remaining;   /* hold count */
+       struct inode            *io_inode;      /* file being written to */
+       struct buffer_head      *io_buffer_head;/* buffer linked list head */
+       struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
+       size_t                  io_size;        /* size of the extent */
+       xfs_off_t               io_offset;      /* offset in the file */
+       struct work_struct      io_work;        /* xfsdatad work queue */
+       struct kiocb            *io_iocb;
+       int                     io_result;
+} xfs_ioend_t;
+
+extern const struct address_space_operations xfs_address_space_operations;
+extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+
+extern void xfs_ioend_init(void);
+extern void xfs_ioend_wait(struct xfs_inode *);
+
+extern void xfs_count_page_state(struct page *, int *, int *);
+
+#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
new file mode 100644 (file)
index 0000000..c57836d
--- /dev/null
@@ -0,0 +1,1876 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/percpu.h>
+#include <linux/blkdev.h>
+#include <linux/hash.h>
+#include <linux/kthread.h>
+#include <linux/migrate.h>
+#include <linux/backing-dev.h>
+#include <linux/freezer.h>
+
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_trace.h"
+
+static kmem_zone_t *xfs_buf_zone;
+STATIC int xfsbufd(void *);
+STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
+
+static struct workqueue_struct *xfslogd_workqueue;
+struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
+
+#ifdef XFS_BUF_LOCK_TRACKING
+# define XB_SET_OWNER(bp)      ((bp)->b_last_holder = current->pid)
+# define XB_CLEAR_OWNER(bp)    ((bp)->b_last_holder = -1)
+# define XB_GET_OWNER(bp)      ((bp)->b_last_holder)
+#else
+# define XB_SET_OWNER(bp)      do { } while (0)
+# define XB_CLEAR_OWNER(bp)    do { } while (0)
+# define XB_GET_OWNER(bp)      do { } while (0)
+#endif
+
+#define xb_to_gfp(flags) \
+       ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
+         ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+
+#define xb_to_km(flags) \
+        (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+
+#define xfs_buf_allocate(flags) \
+       kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
+#define xfs_buf_deallocate(bp) \
+       kmem_zone_free(xfs_buf_zone, (bp));
+
+static inline int
+xfs_buf_is_vmapped(
+       struct xfs_buf  *bp)
+{
+       /*
+        * Return true if the buffer is vmapped.
+        *
+        * The XBF_MAPPED flag is set if the buffer should be mapped, but the
+        * code is clever enough to know it doesn't have to map a single page,
+        * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
+        */
+       return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
+}
+
+static inline int
+xfs_buf_vmap_len(
+       struct xfs_buf  *bp)
+{
+       return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
+}
+
+/*
+ * xfs_buf_lru_add - add a buffer to the LRU.
+ *
+ * The LRU takes a new reference to the buffer so that it will only be freed
+ * once the shrinker takes the buffer off the LRU.
+ */
+STATIC void
+xfs_buf_lru_add(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+
+       spin_lock(&btp->bt_lru_lock);
+       if (list_empty(&bp->b_lru)) {
+               atomic_inc(&bp->b_hold);
+               list_add_tail(&bp->b_lru, &btp->bt_lru);
+               btp->bt_lru_nr++;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * xfs_buf_lru_del - remove a buffer from the LRU
+ *
+ * The unlocked check is safe here because it only occurs when there are not
+ * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
+ * to optimise the shrinker removing the buffer from the LRU and calling
+ * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
+ * bt_lru_lock.
+ */
+STATIC void
+xfs_buf_lru_del(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+
+       if (list_empty(&bp->b_lru))
+               return;
+
+       spin_lock(&btp->bt_lru_lock);
+       if (!list_empty(&bp->b_lru)) {
+               list_del_init(&bp->b_lru);
+               btp->bt_lru_nr--;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * When we mark a buffer stale, we remove the buffer from the LRU and clear the
+ * b_lru_ref count so that the buffer is freed immediately when the buffer
+ * reference count falls to zero. If the buffer is already on the LRU, we need
+ * to remove the reference that LRU holds on the buffer.
+ *
+ * This prevents build-up of stale buffers on the LRU.
+ */
+void
+xfs_buf_stale(
+       struct xfs_buf  *bp)
+{
+       bp->b_flags |= XBF_STALE;
+       atomic_set(&(bp)->b_lru_ref, 0);
+       if (!list_empty(&bp->b_lru)) {
+               struct xfs_buftarg *btp = bp->b_target;
+
+               spin_lock(&btp->bt_lru_lock);
+               if (!list_empty(&bp->b_lru)) {
+                       list_del_init(&bp->b_lru);
+                       btp->bt_lru_nr--;
+                       atomic_dec(&bp->b_hold);
+               }
+               spin_unlock(&btp->bt_lru_lock);
+       }
+       ASSERT(atomic_read(&bp->b_hold) >= 1);
+}
+
+STATIC void
+_xfs_buf_initialize(
+       xfs_buf_t               *bp,
+       xfs_buftarg_t           *target,
+       xfs_off_t               range_base,
+       size_t                  range_length,
+       xfs_buf_flags_t         flags)
+{
+       /*
+        * We don't want certain flags to appear in b_flags.
+        */
+       flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
+
+       memset(bp, 0, sizeof(xfs_buf_t));
+       atomic_set(&bp->b_hold, 1);
+       atomic_set(&bp->b_lru_ref, 1);
+       init_completion(&bp->b_iowait);
+       INIT_LIST_HEAD(&bp->b_lru);
+       INIT_LIST_HEAD(&bp->b_list);
+       RB_CLEAR_NODE(&bp->b_rbnode);
+       sema_init(&bp->b_sema, 0); /* held, no waiters */
+       XB_SET_OWNER(bp);
+       bp->b_target = target;
+       bp->b_file_offset = range_base;
+       /*
+        * Set buffer_length and count_desired to the same value initially.
+        * I/O routines should use count_desired, which will be the same in
+        * most cases but may be reset (e.g. XFS recovery).
+        */
+       bp->b_buffer_length = bp->b_count_desired = range_length;
+       bp->b_flags = flags;
+       bp->b_bn = XFS_BUF_DADDR_NULL;
+       atomic_set(&bp->b_pin_count, 0);
+       init_waitqueue_head(&bp->b_waiters);
+
+       XFS_STATS_INC(xb_create);
+
+       trace_xfs_buf_init(bp, _RET_IP_);
+}
+
+/*
+ *     Allocate a page array capable of holding a specified number
+ *     of pages, and point the page buf at it.
+ */
+STATIC int
+_xfs_buf_get_pages(
+       xfs_buf_t               *bp,
+       int                     page_count,
+       xfs_buf_flags_t         flags)
+{
+       /* Make sure that we have a page list */
+       if (bp->b_pages == NULL) {
+               bp->b_offset = xfs_buf_poff(bp->b_file_offset);
+               bp->b_page_count = page_count;
+               if (page_count <= XB_PAGES) {
+                       bp->b_pages = bp->b_page_array;
+               } else {
+                       bp->b_pages = kmem_alloc(sizeof(struct page *) *
+                                       page_count, xb_to_km(flags));
+                       if (bp->b_pages == NULL)
+                               return -ENOMEM;
+               }
+               memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
+       }
+       return 0;
+}
+
+/*
+ *     Frees b_pages if it was allocated.
+ */
+STATIC void
+_xfs_buf_free_pages(
+       xfs_buf_t       *bp)
+{
+       if (bp->b_pages != bp->b_page_array) {
+               kmem_free(bp->b_pages);
+               bp->b_pages = NULL;
+       }
+}
+
+/*
+ *     Releases the specified buffer.
+ *
+ *     The modification state of any associated pages is left unchanged.
+ *     The buffer most not be on any hash - use xfs_buf_rele instead for
+ *     hashed and refcounted buffers
+ */
+void
+xfs_buf_free(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_free(bp, _RET_IP_);
+
+       ASSERT(list_empty(&bp->b_lru));
+
+       if (bp->b_flags & _XBF_PAGES) {
+               uint            i;
+
+               if (xfs_buf_is_vmapped(bp))
+                       vm_unmap_ram(bp->b_addr - bp->b_offset,
+                                       bp->b_page_count);
+
+               for (i = 0; i < bp->b_page_count; i++) {
+                       struct page     *page = bp->b_pages[i];
+
+                       __free_page(page);
+               }
+       } else if (bp->b_flags & _XBF_KMEM)
+               kmem_free(bp->b_addr);
+       _xfs_buf_free_pages(bp);
+       xfs_buf_deallocate(bp);
+}
+
+/*
+ * Allocates all the pages for buffer in question and builds it's page list.
+ */
+STATIC int
+xfs_buf_allocate_memory(
+       xfs_buf_t               *bp,
+       uint                    flags)
+{
+       size_t                  size = bp->b_count_desired;
+       size_t                  nbytes, offset;
+       gfp_t                   gfp_mask = xb_to_gfp(flags);
+       unsigned short          page_count, i;
+       xfs_off_t               end;
+       int                     error;
+
+       /*
+        * for buffers that are contained within a single page, just allocate
+        * the memory from the heap - there's no need for the complexity of
+        * page arrays to keep allocation down to order 0.
+        */
+       if (bp->b_buffer_length < PAGE_SIZE) {
+               bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
+               if (!bp->b_addr) {
+                       /* low memory - use alloc_page loop instead */
+                       goto use_alloc_page;
+               }
+
+               if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
+                                                               PAGE_MASK) !=
+                   ((unsigned long)bp->b_addr & PAGE_MASK)) {
+                       /* b_addr spans two pages - use alloc_page instead */
+                       kmem_free(bp->b_addr);
+                       bp->b_addr = NULL;
+                       goto use_alloc_page;
+               }
+               bp->b_offset = offset_in_page(bp->b_addr);
+               bp->b_pages = bp->b_page_array;
+               bp->b_pages[0] = virt_to_page(bp->b_addr);
+               bp->b_page_count = 1;
+               bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
+               return 0;
+       }
+
+use_alloc_page:
+       end = bp->b_file_offset + bp->b_buffer_length;
+       page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
+       error = _xfs_buf_get_pages(bp, page_count, flags);
+       if (unlikely(error))
+               return error;
+
+       offset = bp->b_offset;
+       bp->b_flags |= _XBF_PAGES;
+
+       for (i = 0; i < bp->b_page_count; i++) {
+               struct page     *page;
+               uint            retries = 0;
+retry:
+               page = alloc_page(gfp_mask);
+               if (unlikely(page == NULL)) {
+                       if (flags & XBF_READ_AHEAD) {
+                               bp->b_page_count = i;
+                               error = ENOMEM;
+                               goto out_free_pages;
+                       }
+
+                       /*
+                        * This could deadlock.
+                        *
+                        * But until all the XFS lowlevel code is revamped to
+                        * handle buffer allocation failures we can't do much.
+                        */
+                       if (!(++retries % 100))
+                               xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, gfp_mask);
+
+                       XFS_STATS_INC(xb_page_retries);
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto retry;
+               }
+
+               XFS_STATS_INC(xb_page_found);
+
+               nbytes = min_t(size_t, size, PAGE_SIZE - offset);
+               size -= nbytes;
+               bp->b_pages[i] = page;
+               offset = 0;
+       }
+       return 0;
+
+out_free_pages:
+       for (i = 0; i < bp->b_page_count; i++)
+               __free_page(bp->b_pages[i]);
+       return error;
+}
+
+/*
+ *     Map buffer into kernel address-space if necessary.
+ */
+STATIC int
+_xfs_buf_map_pages(
+       xfs_buf_t               *bp,
+       uint                    flags)
+{
+       ASSERT(bp->b_flags & _XBF_PAGES);
+       if (bp->b_page_count == 1) {
+               /* A single page buffer is always mappable */
+               bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
+               bp->b_flags |= XBF_MAPPED;
+       } else if (flags & XBF_MAPPED) {
+               int retried = 0;
+
+               do {
+                       bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
+                                               -1, PAGE_KERNEL);
+                       if (bp->b_addr)
+                               break;
+                       vm_unmap_aliases();
+               } while (retried++ <= 1);
+
+               if (!bp->b_addr)
+                       return -ENOMEM;
+               bp->b_addr += bp->b_offset;
+               bp->b_flags |= XBF_MAPPED;
+       }
+
+       return 0;
+}
+
+/*
+ *     Finding and Reading Buffers
+ */
+
+/*
+ *     Look up, and creates if absent, a lockable buffer for
+ *     a given range of an inode.  The buffer is returned
+ *     locked.  If other overlapping buffers exist, they are
+ *     released before the new buffer is created and locked,
+ *     which may imply that this call will block until those buffers
+ *     are unlocked.  No I/O is implied by this call.
+ */
+xfs_buf_t *
+_xfs_buf_find(
+       xfs_buftarg_t           *btp,   /* block device target          */
+       xfs_off_t               ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       xfs_buf_flags_t         flags,
+       xfs_buf_t               *new_bp)
+{
+       xfs_off_t               range_base;
+       size_t                  range_length;
+       struct xfs_perag        *pag;
+       struct rb_node          **rbp;
+       struct rb_node          *parent;
+       xfs_buf_t               *bp;
+
+       range_base = (ioff << BBSHIFT);
+       range_length = (isize << BBSHIFT);
+
+       /* Check for IOs smaller than the sector size / not sector aligned */
+       ASSERT(!(range_length < (1 << btp->bt_sshift)));
+       ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
+
+       /* get tree root */
+       pag = xfs_perag_get(btp->bt_mount,
+                               xfs_daddr_to_agno(btp->bt_mount, ioff));
+
+       /* walk tree */
+       spin_lock(&pag->pag_buf_lock);
+       rbp = &pag->pag_buf_tree.rb_node;
+       parent = NULL;
+       bp = NULL;
+       while (*rbp) {
+               parent = *rbp;
+               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+
+               if (range_base < bp->b_file_offset)
+                       rbp = &(*rbp)->rb_left;
+               else if (range_base > bp->b_file_offset)
+                       rbp = &(*rbp)->rb_right;
+               else {
+                       /*
+                        * found a block offset match. If the range doesn't
+                        * match, the only way this is allowed is if the buffer
+                        * in the cache is stale and the transaction that made
+                        * it stale has not yet committed. i.e. we are
+                        * reallocating a busy extent. Skip this buffer and
+                        * continue searching to the right for an exact match.
+                        */
+                       if (bp->b_buffer_length != range_length) {
+                               ASSERT(bp->b_flags & XBF_STALE);
+                               rbp = &(*rbp)->rb_right;
+                               continue;
+                       }
+                       atomic_inc(&bp->b_hold);
+                       goto found;
+               }
+       }
+
+       /* No match found */
+       if (new_bp) {
+               _xfs_buf_initialize(new_bp, btp, range_base,
+                               range_length, flags);
+               rb_link_node(&new_bp->b_rbnode, parent, rbp);
+               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+               /* the buffer keeps the perag reference until it is freed */
+               new_bp->b_pag = pag;
+               spin_unlock(&pag->pag_buf_lock);
+       } else {
+               XFS_STATS_INC(xb_miss_locked);
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
+       }
+       return new_bp;
+
+found:
+       spin_unlock(&pag->pag_buf_lock);
+       xfs_perag_put(pag);
+
+       if (!xfs_buf_trylock(bp)) {
+               if (flags & XBF_TRYLOCK) {
+                       xfs_buf_rele(bp);
+                       XFS_STATS_INC(xb_busy_locked);
+                       return NULL;
+               }
+               xfs_buf_lock(bp);
+               XFS_STATS_INC(xb_get_locked_waited);
+       }
+
+       /*
+        * if the buffer is stale, clear all the external state associated with
+        * it. We need to keep flags such as how we allocated the buffer memory
+        * intact here.
+        */
+       if (bp->b_flags & XBF_STALE) {
+               ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
+               bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
+       }
+
+       trace_xfs_buf_find(bp, flags, _RET_IP_);
+       XFS_STATS_INC(xb_get_locked);
+       return bp;
+}
+
+/*
+ *     Assembles a buffer covering the specified range.
+ *     Storage in memory for all portions of the buffer will be allocated,
+ *     although backing storage may not be.
+ */
+xfs_buf_t *
+xfs_buf_get(
+       xfs_buftarg_t           *target,/* target for buffer            */
+       xfs_off_t               ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       xfs_buf_flags_t         flags)
+{
+       xfs_buf_t               *bp, *new_bp;
+       int                     error = 0;
+
+       new_bp = xfs_buf_allocate(flags);
+       if (unlikely(!new_bp))
+               return NULL;
+
+       bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
+       if (bp == new_bp) {
+               error = xfs_buf_allocate_memory(bp, flags);
+               if (error)
+                       goto no_buffer;
+       } else {
+               xfs_buf_deallocate(new_bp);
+               if (unlikely(bp == NULL))
+                       return NULL;
+       }
+
+       if (!(bp->b_flags & XBF_MAPPED)) {
+               error = _xfs_buf_map_pages(bp, flags);
+               if (unlikely(error)) {
+                       xfs_warn(target->bt_mount,
+                               "%s: failed to map pages\n", __func__);
+                       goto no_buffer;
+               }
+       }
+
+       XFS_STATS_INC(xb_get);
+
+       /*
+        * Always fill in the block number now, the mapped cases can do
+        * their own overlay of this later.
+        */
+       bp->b_bn = ioff;
+       bp->b_count_desired = bp->b_buffer_length;
+
+       trace_xfs_buf_get(bp, flags, _RET_IP_);
+       return bp;
+
+ no_buffer:
+       if (flags & (XBF_LOCK | XBF_TRYLOCK))
+               xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+       return NULL;
+}
+
+STATIC int
+_xfs_buf_read(
+       xfs_buf_t               *bp,
+       xfs_buf_flags_t         flags)
+{
+       int                     status;
+
+       ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
+       ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+
+       bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
+       bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
+
+       status = xfs_buf_iorequest(bp);
+       if (status || bp->b_error || (flags & XBF_ASYNC))
+               return status;
+       return xfs_buf_iowait(bp);
+}
+
+xfs_buf_t *
+xfs_buf_read(
+       xfs_buftarg_t           *target,
+       xfs_off_t               ioff,
+       size_t                  isize,
+       xfs_buf_flags_t         flags)
+{
+       xfs_buf_t               *bp;
+
+       flags |= XBF_READ;
+
+       bp = xfs_buf_get(target, ioff, isize, flags);
+       if (bp) {
+               trace_xfs_buf_read(bp, flags, _RET_IP_);
+
+               if (!XFS_BUF_ISDONE(bp)) {
+                       XFS_STATS_INC(xb_get_read);
+                       _xfs_buf_read(bp, flags);
+               } else if (flags & XBF_ASYNC) {
+                       /*
+                        * Read ahead call which is already satisfied,
+                        * drop the buffer
+                        */
+                       goto no_buffer;
+               } else {
+                       /* We do not want read in the flags */
+                       bp->b_flags &= ~XBF_READ;
+               }
+       }
+
+       return bp;
+
+ no_buffer:
+       if (flags & (XBF_LOCK | XBF_TRYLOCK))
+               xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+       return NULL;
+}
+
+/*
+ *     If we are not low on memory then do the readahead in a deadlock
+ *     safe manner.
+ */
+void
+xfs_buf_readahead(
+       xfs_buftarg_t           *target,
+       xfs_off_t               ioff,
+       size_t                  isize)
+{
+       if (bdi_read_congested(target->bt_bdi))
+               return;
+
+       xfs_buf_read(target, ioff, isize,
+                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             daddr,
+       size_t                  length,
+       int                     flags)
+{
+       xfs_buf_t               *bp;
+       int                     error;
+
+       bp = xfs_buf_get_uncached(target, length, flags);
+       if (!bp)
+               return NULL;
+
+       /* set up the buffer for a read IO */
+       XFS_BUF_SET_ADDR(bp, daddr);
+       XFS_BUF_READ(bp);
+
+       xfsbdstrat(mp, bp);
+       error = xfs_buf_iowait(bp);
+       if (error || bp->b_error) {
+               xfs_buf_relse(bp);
+               return NULL;
+       }
+       return bp;
+}
+
+xfs_buf_t *
+xfs_buf_get_empty(
+       size_t                  len,
+       xfs_buftarg_t           *target)
+{
+       xfs_buf_t               *bp;
+
+       bp = xfs_buf_allocate(0);
+       if (bp)
+               _xfs_buf_initialize(bp, target, 0, len, 0);
+       return bp;
+}
+
+/*
+ * Return a buffer allocated as an empty buffer and associated to external
+ * memory via xfs_buf_associate_memory() back to it's empty state.
+ */
+void
+xfs_buf_set_empty(
+       struct xfs_buf          *bp,
+       size_t                  len)
+{
+       if (bp->b_pages)
+               _xfs_buf_free_pages(bp);
+
+       bp->b_pages = NULL;
+       bp->b_page_count = 0;
+       bp->b_addr = NULL;
+       bp->b_file_offset = 0;
+       bp->b_buffer_length = bp->b_count_desired = len;
+       bp->b_bn = XFS_BUF_DADDR_NULL;
+       bp->b_flags &= ~XBF_MAPPED;
+}
+
+static inline struct page *
+mem_to_page(
+       void                    *addr)
+{
+       if ((!is_vmalloc_addr(addr))) {
+               return virt_to_page(addr);
+       } else {
+               return vmalloc_to_page(addr);
+       }
+}
+
+int
+xfs_buf_associate_memory(
+       xfs_buf_t               *bp,
+       void                    *mem,
+       size_t                  len)
+{
+       int                     rval;
+       int                     i = 0;
+       unsigned long           pageaddr;
+       unsigned long           offset;
+       size_t                  buflen;
+       int                     page_count;
+
+       pageaddr = (unsigned long)mem & PAGE_MASK;
+       offset = (unsigned long)mem - pageaddr;
+       buflen = PAGE_ALIGN(len + offset);
+       page_count = buflen >> PAGE_SHIFT;
+
+       /* Free any previous set of page pointers */
+       if (bp->b_pages)
+               _xfs_buf_free_pages(bp);
+
+       bp->b_pages = NULL;
+       bp->b_addr = mem;
+
+       rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
+       if (rval)
+               return rval;
+
+       bp->b_offset = offset;
+
+       for (i = 0; i < bp->b_page_count; i++) {
+               bp->b_pages[i] = mem_to_page((void *)pageaddr);
+               pageaddr += PAGE_SIZE;
+       }
+
+       bp->b_count_desired = len;
+       bp->b_buffer_length = buflen;
+       bp->b_flags |= XBF_MAPPED;
+
+       return 0;
+}
+
+xfs_buf_t *
+xfs_buf_get_uncached(
+       struct xfs_buftarg      *target,
+       size_t                  len,
+       int                     flags)
+{
+       unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+       int                     error, i;
+       xfs_buf_t               *bp;
+
+       bp = xfs_buf_allocate(0);
+       if (unlikely(bp == NULL))
+               goto fail;
+       _xfs_buf_initialize(bp, target, 0, len, 0);
+
+       error = _xfs_buf_get_pages(bp, page_count, 0);
+       if (error)
+               goto fail_free_buf;
+
+       for (i = 0; i < page_count; i++) {
+               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
+               if (!bp->b_pages[i])
+                       goto fail_free_mem;
+       }
+       bp->b_flags |= _XBF_PAGES;
+
+       error = _xfs_buf_map_pages(bp, XBF_MAPPED);
+       if (unlikely(error)) {
+               xfs_warn(target->bt_mount,
+                       "%s: failed to map pages\n", __func__);
+               goto fail_free_mem;
+       }
+
+       trace_xfs_buf_get_uncached(bp, _RET_IP_);
+       return bp;
+
+ fail_free_mem:
+       while (--i >= 0)
+               __free_page(bp->b_pages[i]);
+       _xfs_buf_free_pages(bp);
+ fail_free_buf:
+       xfs_buf_deallocate(bp);
+ fail:
+       return NULL;
+}
+
+/*
+ *     Increment reference count on buffer, to hold the buffer concurrently
+ *     with another thread which may release (free) the buffer asynchronously.
+ *     Must hold the buffer already to call this function.
+ */
+void
+xfs_buf_hold(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_hold(bp, _RET_IP_);
+       atomic_inc(&bp->b_hold);
+}
+
+/*
+ *     Releases a hold on the specified buffer.  If the
+ *     the hold count is 1, calls xfs_buf_free.
+ */
+void
+xfs_buf_rele(
+       xfs_buf_t               *bp)
+{
+       struct xfs_perag        *pag = bp->b_pag;
+
+       trace_xfs_buf_rele(bp, _RET_IP_);
+
+       if (!pag) {
+               ASSERT(list_empty(&bp->b_lru));
+               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
+               if (atomic_dec_and_test(&bp->b_hold))
+                       xfs_buf_free(bp);
+               return;
+       }
+
+       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
+
+       ASSERT(atomic_read(&bp->b_hold) > 0);
+       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
+               if (!(bp->b_flags & XBF_STALE) &&
+                          atomic_read(&bp->b_lru_ref)) {
+                       xfs_buf_lru_add(bp);
+                       spin_unlock(&pag->pag_buf_lock);
+               } else {
+                       xfs_buf_lru_del(bp);
+                       ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
+                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+                       spin_unlock(&pag->pag_buf_lock);
+                       xfs_perag_put(pag);
+                       xfs_buf_free(bp);
+               }
+       }
+}
+
+
+/*
+ *     Lock a buffer object, if it is not already locked.
+ *
+ *     If we come across a stale, pinned, locked buffer, we know that we are
+ *     being asked to lock a buffer that has been reallocated. Because it is
+ *     pinned, we know that the log has not been pushed to disk and hence it
+ *     will still be locked.  Rather than continuing to have trylock attempts
+ *     fail until someone else pushes the log, push it ourselves before
+ *     returning.  This means that the xfsaild will not get stuck trying
+ *     to push on stale inode buffers.
+ */
+int
+xfs_buf_trylock(
+       struct xfs_buf          *bp)
+{
+       int                     locked;
+
+       locked = down_trylock(&bp->b_sema) == 0;
+       if (locked)
+               XB_SET_OWNER(bp);
+       else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+               xfs_log_force(bp->b_target->bt_mount, 0);
+
+       trace_xfs_buf_trylock(bp, _RET_IP_);
+       return locked;
+}
+
+/*
+ *     Lock a buffer object.
+ *
+ *     If we come across a stale, pinned, locked buffer, we know that we
+ *     are being asked to lock a buffer that has been reallocated. Because
+ *     it is pinned, we know that the log has not been pushed to disk and
+ *     hence it will still be locked. Rather than sleeping until someone
+ *     else pushes the log, push it ourselves before trying to get the lock.
+ */
+void
+xfs_buf_lock(
+       struct xfs_buf          *bp)
+{
+       trace_xfs_buf_lock(bp, _RET_IP_);
+
+       if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+               xfs_log_force(bp->b_target->bt_mount, 0);
+       down(&bp->b_sema);
+       XB_SET_OWNER(bp);
+
+       trace_xfs_buf_lock_done(bp, _RET_IP_);
+}
+
+/*
+ *     Releases the lock on the buffer object.
+ *     If the buffer is marked delwri but is not queued, do so before we
+ *     unlock the buffer as we need to set flags correctly.  We also need to
+ *     take a reference for the delwri queue because the unlocker is going to
+ *     drop their's and they don't know we just queued it.
+ */
+void
+xfs_buf_unlock(
+       struct xfs_buf          *bp)
+{
+       if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
+               atomic_inc(&bp->b_hold);
+               bp->b_flags |= XBF_ASYNC;
+               xfs_buf_delwri_queue(bp, 0);
+       }
+
+       XB_CLEAR_OWNER(bp);
+       up(&bp->b_sema);
+
+       trace_xfs_buf_unlock(bp, _RET_IP_);
+}
+
+STATIC void
+xfs_buf_wait_unpin(
+       xfs_buf_t               *bp)
+{
+       DECLARE_WAITQUEUE       (wait, current);
+
+       if (atomic_read(&bp->b_pin_count) == 0)
+               return;
+
+       add_wait_queue(&bp->b_waiters, &wait);
+       for (;;) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (atomic_read(&bp->b_pin_count) == 0)
+                       break;
+               io_schedule();
+       }
+       remove_wait_queue(&bp->b_waiters, &wait);
+       set_current_state(TASK_RUNNING);
+}
+
+/*
+ *     Buffer Utility Routines
+ */
+
+STATIC void
+xfs_buf_iodone_work(
+       struct work_struct      *work)
+{
+       xfs_buf_t               *bp =
+               container_of(work, xfs_buf_t, b_iodone_work);
+
+       if (bp->b_iodone)
+               (*(bp->b_iodone))(bp);
+       else if (bp->b_flags & XBF_ASYNC)
+               xfs_buf_relse(bp);
+}
+
+void
+xfs_buf_ioend(
+       xfs_buf_t               *bp,
+       int                     schedule)
+{
+       trace_xfs_buf_iodone(bp, _RET_IP_);
+
+       bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
+       if (bp->b_error == 0)
+               bp->b_flags |= XBF_DONE;
+
+       if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
+               if (schedule) {
+                       INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
+                       queue_work(xfslogd_workqueue, &bp->b_iodone_work);
+               } else {
+                       xfs_buf_iodone_work(&bp->b_iodone_work);
+               }
+       } else {
+               complete(&bp->b_iowait);
+       }
+}
+
+void
+xfs_buf_ioerror(
+       xfs_buf_t               *bp,
+       int                     error)
+{
+       ASSERT(error >= 0 && error <= 0xffff);
+       bp->b_error = (unsigned short)error;
+       trace_xfs_buf_ioerror(bp, error, _RET_IP_);
+}
+
+int
+xfs_bwrite(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       int                     error;
+
+       bp->b_flags |= XBF_WRITE;
+       bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
+
+       xfs_buf_delwri_dequeue(bp);
+       xfs_bdstrat_cb(bp);
+
+       error = xfs_buf_iowait(bp);
+       if (error)
+               xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+       xfs_buf_relse(bp);
+       return error;
+}
+
+void
+xfs_bdwrite(
+       void                    *mp,
+       struct xfs_buf          *bp)
+{
+       trace_xfs_buf_bdwrite(bp, _RET_IP_);
+
+       bp->b_flags &= ~XBF_READ;
+       bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
+
+       xfs_buf_delwri_queue(bp, 1);
+}
+
+/*
+ * Called when we want to stop a buffer from getting written or read.
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
+ * so that the proper iodone callbacks get called.
+ */
+STATIC int
+xfs_bioerror(
+       xfs_buf_t *bp)
+{
+#ifdef XFSERRORDEBUG
+       ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
+#endif
+
+       /*
+        * No need to wait until the buffer is unpinned, we aren't flushing it.
+        */
+       xfs_buf_ioerror(bp, EIO);
+
+       /*
+        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
+        */
+       XFS_BUF_UNREAD(bp);
+       XFS_BUF_UNDELAYWRITE(bp);
+       XFS_BUF_UNDONE(bp);
+       XFS_BUF_STALE(bp);
+
+       xfs_buf_ioend(bp, 0);
+
+       return EIO;
+}
+
+/*
+ * Same as xfs_bioerror, except that we are releasing the buffer
+ * here ourselves, and avoiding the xfs_buf_ioend call.
+ * This is meant for userdata errors; metadata bufs come with
+ * iodone functions attached, so that we can track down errors.
+ */
+STATIC int
+xfs_bioerror_relse(
+       struct xfs_buf  *bp)
+{
+       int64_t         fl = bp->b_flags;
+       /*
+        * No need to wait until the buffer is unpinned.
+        * We aren't flushing it.
+        *
+        * chunkhold expects B_DONE to be set, whether
+        * we actually finish the I/O or not. We don't want to
+        * change that interface.
+        */
+       XFS_BUF_UNREAD(bp);
+       XFS_BUF_UNDELAYWRITE(bp);
+       XFS_BUF_DONE(bp);
+       XFS_BUF_STALE(bp);
+       bp->b_iodone = NULL;
+       if (!(fl & XBF_ASYNC)) {
+               /*
+                * Mark b_error and B_ERROR _both_.
+                * Lot's of chunkcache code assumes that.
+                * There's no reason to mark error for
+                * ASYNC buffers.
+                */
+               xfs_buf_ioerror(bp, EIO);
+               XFS_BUF_FINISH_IOWAIT(bp);
+       } else {
+               xfs_buf_relse(bp);
+       }
+
+       return EIO;
+}
+
+
+/*
+ * All xfs metadata buffers except log state machine buffers
+ * get this attached as their b_bdstrat callback function.
+ * This is so that we can catch a buffer
+ * after prematurely unpinning it to forcibly shutdown the filesystem.
+ */
+int
+xfs_bdstrat_cb(
+       struct xfs_buf  *bp)
+{
+       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+               trace_xfs_bdstrat_shut(bp, _RET_IP_);
+               /*
+                * Metadata write that didn't get logged but
+                * written delayed anyway. These aren't associated
+                * with a transaction, and can be ignored.
+                */
+               if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
+                       return xfs_bioerror_relse(bp);
+               else
+                       return xfs_bioerror(bp);
+       }
+
+       xfs_buf_iorequest(bp);
+       return 0;
+}
+
+/*
+ * Wrapper around bdstrat so that we can stop data from going to disk in case
+ * we are shutting down the filesystem.  Typically user data goes thru this
+ * path; one of the exceptions is the superblock.
+ */
+void
+xfsbdstrat(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               trace_xfs_bdstrat_shut(bp, _RET_IP_);
+               xfs_bioerror_relse(bp);
+               return;
+       }
+
+       xfs_buf_iorequest(bp);
+}
+
+STATIC void
+_xfs_buf_ioend(
+       xfs_buf_t               *bp,
+       int                     schedule)
+{
+       if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+               xfs_buf_ioend(bp, schedule);
+}
+
+STATIC void
+xfs_buf_bio_end_io(
+       struct bio              *bio,
+       int                     error)
+{
+       xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
+
+       xfs_buf_ioerror(bp, -error);
+
+       if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+               invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
+
+       _xfs_buf_ioend(bp, 1);
+       bio_put(bio);
+}
+
+STATIC void
+_xfs_buf_ioapply(
+       xfs_buf_t               *bp)
+{
+       int                     rw, map_i, total_nr_pages, nr_pages;
+       struct bio              *bio;
+       int                     offset = bp->b_offset;
+       int                     size = bp->b_count_desired;
+       sector_t                sector = bp->b_bn;
+
+       total_nr_pages = bp->b_page_count;
+       map_i = 0;
+
+       if (bp->b_flags & XBF_WRITE) {
+               if (bp->b_flags & XBF_SYNCIO)
+                       rw = WRITE_SYNC;
+               else
+                       rw = WRITE;
+               if (bp->b_flags & XBF_FUA)
+                       rw |= REQ_FUA;
+               if (bp->b_flags & XBF_FLUSH)
+                       rw |= REQ_FLUSH;
+       } else if (bp->b_flags & XBF_READ_AHEAD) {
+               rw = READA;
+       } else {
+               rw = READ;
+       }
+
+       /* we only use the buffer cache for meta-data */
+       rw |= REQ_META;
+
+next_chunk:
+       atomic_inc(&bp->b_io_remaining);
+       nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+       if (nr_pages > total_nr_pages)
+               nr_pages = total_nr_pages;
+
+       bio = bio_alloc(GFP_NOIO, nr_pages);
+       bio->bi_bdev = bp->b_target->bt_bdev;
+       bio->bi_sector = sector;
+       bio->bi_end_io = xfs_buf_bio_end_io;
+       bio->bi_private = bp;
+
+
+       for (; size && nr_pages; nr_pages--, map_i++) {
+               int     rbytes, nbytes = PAGE_SIZE - offset;
+
+               if (nbytes > size)
+                       nbytes = size;
+
+               rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+               if (rbytes < nbytes)
+                       break;
+
+               offset = 0;
+               sector += nbytes >> BBSHIFT;
+               size -= nbytes;
+               total_nr_pages--;
+       }
+
+       if (likely(bio->bi_size)) {
+               if (xfs_buf_is_vmapped(bp)) {
+                       flush_kernel_vmap_range(bp->b_addr,
+                                               xfs_buf_vmap_len(bp));
+               }
+               submit_bio(rw, bio);
+               if (size)
+                       goto next_chunk;
+       } else {
+               xfs_buf_ioerror(bp, EIO);
+               bio_put(bio);
+       }
+}
+
+int
+xfs_buf_iorequest(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_iorequest(bp, _RET_IP_);
+
+       if (bp->b_flags & XBF_DELWRI) {
+               xfs_buf_delwri_queue(bp, 1);
+               return 0;
+       }
+
+       if (bp->b_flags & XBF_WRITE) {
+               xfs_buf_wait_unpin(bp);
+       }
+
+       xfs_buf_hold(bp);
+
+       /* Set the count to 1 initially, this will stop an I/O
+        * completion callout which happens before we have started
+        * all the I/O from calling xfs_buf_ioend too early.
+        */
+       atomic_set(&bp->b_io_remaining, 1);
+       _xfs_buf_ioapply(bp);
+       _xfs_buf_ioend(bp, 0);
+
+       xfs_buf_rele(bp);
+       return 0;
+}
+
+/*
+ *     Waits for I/O to complete on the buffer supplied.
+ *     It returns immediately if no I/O is pending.
+ *     It returns the I/O error code, if any, or 0 if there was no error.
+ */
+int
+xfs_buf_iowait(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_iowait(bp, _RET_IP_);
+
+       wait_for_completion(&bp->b_iowait);
+
+       trace_xfs_buf_iowait_done(bp, _RET_IP_);
+       return bp->b_error;
+}
+
+xfs_caddr_t
+xfs_buf_offset(
+       xfs_buf_t               *bp,
+       size_t                  offset)
+{
+       struct page             *page;
+
+       if (bp->b_flags & XBF_MAPPED)
+               return bp->b_addr + offset;
+
+       offset += bp->b_offset;
+       page = bp->b_pages[offset >> PAGE_SHIFT];
+       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
+}
+
+/*
+ *     Move data into or out of a buffer.
+ */
+void
+xfs_buf_iomove(
+       xfs_buf_t               *bp,    /* buffer to process            */
+       size_t                  boff,   /* starting buffer offset       */
+       size_t                  bsize,  /* length to copy               */
+       void                    *data,  /* data address                 */
+       xfs_buf_rw_t            mode)   /* read/write/zero flag         */
+{
+       size_t                  bend, cpoff, csize;
+       struct page             *page;
+
+       bend = boff + bsize;
+       while (boff < bend) {
+               page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
+               cpoff = xfs_buf_poff(boff + bp->b_offset);
+               csize = min_t(size_t,
+                             PAGE_SIZE-cpoff, bp->b_count_desired-boff);
+
+               ASSERT(((csize + cpoff) <= PAGE_SIZE));
+
+               switch (mode) {
+               case XBRW_ZERO:
+                       memset(page_address(page) + cpoff, 0, csize);
+                       break;
+               case XBRW_READ:
+                       memcpy(data, page_address(page) + cpoff, csize);
+                       break;
+               case XBRW_WRITE:
+                       memcpy(page_address(page) + cpoff, data, csize);
+               }
+
+               boff += csize;
+               data += csize;
+       }
+}
+
+/*
+ *     Handling of buffer targets (buftargs).
+ */
+
+/*
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
+ */
+void
+xfs_wait_buftarg(
+       struct xfs_buftarg      *btp)
+{
+       struct xfs_buf          *bp;
+
+restart:
+       spin_lock(&btp->bt_lru_lock);
+       while (!list_empty(&btp->bt_lru)) {
+               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+               if (atomic_read(&bp->b_hold) > 1) {
+                       spin_unlock(&btp->bt_lru_lock);
+                       delay(100);
+                       goto restart;
+               }
+               /*
+                * clear the LRU reference count so the bufer doesn't get
+                * ignored in xfs_buf_rele().
+                */
+               atomic_set(&bp->b_lru_ref, 0);
+               spin_unlock(&btp->bt_lru_lock);
+               xfs_buf_rele(bp);
+               spin_lock(&btp->bt_lru_lock);
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+int
+xfs_buftarg_shrink(
+       struct shrinker         *shrink,
+       struct shrink_control   *sc)
+{
+       struct xfs_buftarg      *btp = container_of(shrink,
+                                       struct xfs_buftarg, bt_shrinker);
+       struct xfs_buf          *bp;
+       int nr_to_scan = sc->nr_to_scan;
+       LIST_HEAD(dispose);
+
+       if (!nr_to_scan)
+               return btp->bt_lru_nr;
+
+       spin_lock(&btp->bt_lru_lock);
+       while (!list_empty(&btp->bt_lru)) {
+               if (nr_to_scan-- <= 0)
+                       break;
+
+               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+
+               /*
+                * Decrement the b_lru_ref count unless the value is already
+                * zero. If the value is already zero, we need to reclaim the
+                * buffer, otherwise it gets another trip through the LRU.
+                */
+               if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+                       list_move_tail(&bp->b_lru, &btp->bt_lru);
+                       continue;
+               }
+
+               /*
+                * remove the buffer from the LRU now to avoid needing another
+                * lock round trip inside xfs_buf_rele().
+                */
+               list_move(&bp->b_lru, &dispose);
+               btp->bt_lru_nr--;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+
+       while (!list_empty(&dispose)) {
+               bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+               list_del_init(&bp->b_lru);
+               xfs_buf_rele(bp);
+       }
+
+       return btp->bt_lru_nr;
+}
+
+void
+xfs_free_buftarg(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *btp)
+{
+       unregister_shrinker(&btp->bt_shrinker);
+
+       xfs_flush_buftarg(btp, 1);
+       if (mp->m_flags & XFS_MOUNT_BARRIER)
+               xfs_blkdev_issue_flush(btp);
+
+       kthread_stop(btp->bt_task);
+       kmem_free(btp);
+}
+
+STATIC int
+xfs_setsize_buftarg_flags(
+       xfs_buftarg_t           *btp,
+       unsigned int            blocksize,
+       unsigned int            sectorsize,
+       int                     verbose)
+{
+       btp->bt_bsize = blocksize;
+       btp->bt_sshift = ffs(sectorsize) - 1;
+       btp->bt_smask = sectorsize - 1;
+
+       if (set_blocksize(btp->bt_bdev, sectorsize)) {
+               xfs_warn(btp->bt_mount,
+                       "Cannot set_blocksize to %u on device %s\n",
+                       sectorsize, xfs_buf_target_name(btp));
+               return EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ *     When allocating the initial buffer target we have not yet
+ *     read in the superblock, so don't know what sized sectors
+ *     are being used is at this early stage.  Play safe.
+ */
+STATIC int
+xfs_setsize_buftarg_early(
+       xfs_buftarg_t           *btp,
+       struct block_device     *bdev)
+{
+       return xfs_setsize_buftarg_flags(btp,
+                       PAGE_SIZE, bdev_logical_block_size(bdev), 0);
+}
+
+int
+xfs_setsize_buftarg(
+       xfs_buftarg_t           *btp,
+       unsigned int            blocksize,
+       unsigned int            sectorsize)
+{
+       return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+
+STATIC int
+xfs_alloc_delwrite_queue(
+       xfs_buftarg_t           *btp,
+       const char              *fsname)
+{
+       INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+       spin_lock_init(&btp->bt_delwrite_lock);
+       btp->bt_flags = 0;
+       btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
+       if (IS_ERR(btp->bt_task))
+               return PTR_ERR(btp->bt_task);
+       return 0;
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+       struct xfs_mount        *mp,
+       struct block_device     *bdev,
+       int                     external,
+       const char              *fsname)
+{
+       xfs_buftarg_t           *btp;
+
+       btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+       btp->bt_mount = mp;
+       btp->bt_dev =  bdev->bd_dev;
+       btp->bt_bdev = bdev;
+       btp->bt_bdi = blk_get_backing_dev_info(bdev);
+       if (!btp->bt_bdi)
+               goto error;
+
+       INIT_LIST_HEAD(&btp->bt_lru);
+       spin_lock_init(&btp->bt_lru_lock);
+       if (xfs_setsize_buftarg_early(btp, bdev))
+               goto error;
+       if (xfs_alloc_delwrite_queue(btp, fsname))
+               goto error;
+       btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+       btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+       register_shrinker(&btp->bt_shrinker);
+       return btp;
+
+error:
+       kmem_free(btp);
+       return NULL;
+}
+
+
+/*
+ *     Delayed write buffer handling
+ */
+STATIC void
+xfs_buf_delwri_queue(
+       xfs_buf_t               *bp,
+       int                     unlock)
+{
+       struct list_head        *dwq = &bp->b_target->bt_delwrite_queue;
+       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
+
+       trace_xfs_buf_delwri_queue(bp, _RET_IP_);
+
+       ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
+
+       spin_lock(dwlk);
+       /* If already in the queue, dequeue and place at tail */
+       if (!list_empty(&bp->b_list)) {
+               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+               if (unlock)
+                       atomic_dec(&bp->b_hold);
+               list_del(&bp->b_list);
+       }
+
+       if (list_empty(dwq)) {
+               /* start xfsbufd as it is about to have something to do */
+               wake_up_process(bp->b_target->bt_task);
+       }
+
+       bp->b_flags |= _XBF_DELWRI_Q;
+       list_add_tail(&bp->b_list, dwq);
+       bp->b_queuetime = jiffies;
+       spin_unlock(dwlk);
+
+       if (unlock)
+               xfs_buf_unlock(bp);
+}
+
+void
+xfs_buf_delwri_dequeue(
+       xfs_buf_t               *bp)
+{
+       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
+       int                     dequeued = 0;
+
+       spin_lock(dwlk);
+       if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
+               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+               list_del_init(&bp->b_list);
+               dequeued = 1;
+       }
+       bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+       spin_unlock(dwlk);
+
+       if (dequeued)
+               xfs_buf_rele(bp);
+
+       trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
+}
+
+/*
+ * If a delwri buffer needs to be pushed before it has aged out, then promote
+ * it to the head of the delwri queue so that it will be flushed on the next
+ * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
+ * than the age currently needed to flush the buffer. Hence the next time the
+ * xfsbufd sees it is guaranteed to be considered old enough to flush.
+ */
+void
+xfs_buf_delwri_promote(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+       long            age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
+
+       ASSERT(bp->b_flags & XBF_DELWRI);
+       ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+       /*
+        * Check the buffer age before locking the delayed write queue as we
+        * don't need to promote buffers that are already past the flush age.
+        */
+       if (bp->b_queuetime < jiffies - age)
+               return;
+       bp->b_queuetime = jiffies - age;
+       spin_lock(&btp->bt_delwrite_lock);
+       list_move(&bp->b_list, &btp->bt_delwrite_queue);
+       spin_unlock(&btp->bt_delwrite_lock);
+}
+
+STATIC void
+xfs_buf_runall_queues(
+       struct workqueue_struct *queue)
+{
+       flush_workqueue(queue);
+}
+
+/*
+ * Move as many buffers as specified to the supplied list
+ * idicating if we skipped any buffers to prevent deadlocks.
+ */
+STATIC int
+xfs_buf_delwri_split(
+       xfs_buftarg_t   *target,
+       struct list_head *list,
+       unsigned long   age)
+{
+       xfs_buf_t       *bp, *n;
+       struct list_head *dwq = &target->bt_delwrite_queue;
+       spinlock_t      *dwlk = &target->bt_delwrite_lock;
+       int             skipped = 0;
+       int             force;
+
+       force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+       INIT_LIST_HEAD(list);
+       spin_lock(dwlk);
+       list_for_each_entry_safe(bp, n, dwq, b_list) {
+               ASSERT(bp->b_flags & XBF_DELWRI);
+
+               if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
+                       if (!force &&
+                           time_before(jiffies, bp->b_queuetime + age)) {
+                               xfs_buf_unlock(bp);
+                               break;
+                       }
+
+                       bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
+                       bp->b_flags |= XBF_WRITE;
+                       list_move_tail(&bp->b_list, list);
+                       trace_xfs_buf_delwri_split(bp, _RET_IP_);
+               } else
+                       skipped++;
+       }
+       spin_unlock(dwlk);
+
+       return skipped;
+
+}
+
+/*
+ * Compare function is more complex than it needs to be because
+ * the return value is only 32 bits and we are doing comparisons
+ * on 64 bit values
+ */
+static int
+xfs_buf_cmp(
+       void            *priv,
+       struct list_head *a,
+       struct list_head *b)
+{
+       struct xfs_buf  *ap = container_of(a, struct xfs_buf, b_list);
+       struct xfs_buf  *bp = container_of(b, struct xfs_buf, b_list);
+       xfs_daddr_t             diff;
+
+       diff = ap->b_bn - bp->b_bn;
+       if (diff < 0)
+               return -1;
+       if (diff > 0)
+               return 1;
+       return 0;
+}
+
+STATIC int
+xfsbufd(
+       void            *data)
+{
+       xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
+
+       current->flags |= PF_MEMALLOC;
+
+       set_freezable();
+
+       do {
+               long    age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
+               long    tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
+               struct list_head tmp;
+               struct blk_plug plug;
+
+               if (unlikely(freezing(current))) {
+                       set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+                       refrigerator();
+               } else {
+                       clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+               }
+
+               /* sleep for a long time if there is nothing to do. */
+               if (list_empty(&target->bt_delwrite_queue))
+                       tout = MAX_SCHEDULE_TIMEOUT;
+               schedule_timeout_interruptible(tout);
+
+               xfs_buf_delwri_split(target, &tmp, age);
+               list_sort(NULL, &tmp, xfs_buf_cmp);
+
+               blk_start_plug(&plug);
+               while (!list_empty(&tmp)) {
+                       struct xfs_buf *bp;
+                       bp = list_first_entry(&tmp, struct xfs_buf, b_list);
+                       list_del_init(&bp->b_list);
+                       xfs_bdstrat_cb(bp);
+               }
+               blk_finish_plug(&plug);
+       } while (!kthread_should_stop());
+
+       return 0;
+}
+
+/*
+ *     Go through all incore buffers, and release buffers if they belong to
+ *     the given device. This is used in filesystem error handling to
+ *     preserve the consistency of its metadata.
+ */
+int
+xfs_flush_buftarg(
+       xfs_buftarg_t   *target,
+       int             wait)
+{
+       xfs_buf_t       *bp;
+       int             pincount = 0;
+       LIST_HEAD(tmp_list);
+       LIST_HEAD(wait_list);
+       struct blk_plug plug;
+
+       xfs_buf_runall_queues(xfsconvertd_workqueue);
+       xfs_buf_runall_queues(xfsdatad_workqueue);
+       xfs_buf_runall_queues(xfslogd_workqueue);
+
+       set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+       pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
+
+       /*
+        * Dropped the delayed write list lock, now walk the temporary list.
+        * All I/O is issued async and then if we need to wait for completion
+        * we do that after issuing all the IO.
+        */
+       list_sort(NULL, &tmp_list, xfs_buf_cmp);
+
+       blk_start_plug(&plug);
+       while (!list_empty(&tmp_list)) {
+               bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
+               ASSERT(target == bp->b_target);
+               list_del_init(&bp->b_list);
+               if (wait) {
+                       bp->b_flags &= ~XBF_ASYNC;
+                       list_add(&bp->b_list, &wait_list);
+               }
+               xfs_bdstrat_cb(bp);
+       }
+       blk_finish_plug(&plug);
+
+       if (wait) {
+               /* Wait for IO to complete. */
+               while (!list_empty(&wait_list)) {
+                       bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
+
+                       list_del_init(&bp->b_list);
+                       xfs_buf_iowait(bp);
+                       xfs_buf_relse(bp);
+               }
+       }
+
+       return pincount;
+}
+
+int __init
+xfs_buf_init(void)
+{
+       xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
+                                               KM_ZONE_HWALIGN, NULL);
+       if (!xfs_buf_zone)
+               goto out;
+
+       xfslogd_workqueue = alloc_workqueue("xfslogd",
+                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
+       if (!xfslogd_workqueue)
+               goto out_free_buf_zone;
+
+       xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
+       if (!xfsdatad_workqueue)
+               goto out_destroy_xfslogd_workqueue;
+
+       xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
+                                               WQ_MEM_RECLAIM, 1);
+       if (!xfsconvertd_workqueue)
+               goto out_destroy_xfsdatad_workqueue;
+
+       return 0;
+
+ out_destroy_xfsdatad_workqueue:
+       destroy_workqueue(xfsdatad_workqueue);
+ out_destroy_xfslogd_workqueue:
+       destroy_workqueue(xfslogd_workqueue);
+ out_free_buf_zone:
+       kmem_zone_destroy(xfs_buf_zone);
+ out:
+       return -ENOMEM;
+}
+
+void
+xfs_buf_terminate(void)
+{
+       destroy_workqueue(xfsconvertd_workqueue);
+       destroy_workqueue(xfsdatad_workqueue);
+       destroy_workqueue(xfslogd_workqueue);
+       kmem_zone_destroy(xfs_buf_zone);
+}
+
+#ifdef CONFIG_KDB_MODULES
+struct list_head *
+xfs_get_buftarg_list(void)
+{
+       return &xfs_buftarg_list;
+}
+#endif
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
new file mode 100644 (file)
index 0000000..620972b
--- /dev/null
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+
+/*
+ *     Base types
+ */
+
+#define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
+
+#define xfs_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
+#define xfs_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum {
+       XBRW_READ = 1,                  /* transfer into target memory */
+       XBRW_WRITE = 2,                 /* transfer from target memory */
+       XBRW_ZERO = 3,                  /* Zero target memory */
+} xfs_buf_rw_t;
+
+#define XBF_READ       (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE      (1 << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
+#define XBF_MAPPED     (1 << 3) /* buffer mapped (b_addr valid) */
+#define XBF_ASYNC      (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE       (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_DELWRI     (1 << 6) /* buffer has dirty pages */
+#define XBF_STALE      (1 << 7) /* buffer has been staled, do not find it */
+
+/* I/O hints for the BIO layer */
+#define XBF_SYNCIO     (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA                (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH      (1 << 12)/* flush the disk cache before a write */
+
+/* flags used only as arguments to access routines */
+#define XBF_LOCK       (1 << 15)/* lock requested */
+#define XBF_TRYLOCK    (1 << 16)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
+
+/* flags used only internally */
+#define _XBF_PAGES     (1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM      (1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q  (1 << 22)/* buffer on delwri queue */
+
+typedef unsigned int xfs_buf_flags_t;
+
+#define XFS_BUF_FLAGS \
+       { XBF_READ,             "READ" }, \
+       { XBF_WRITE,            "WRITE" }, \
+       { XBF_READ_AHEAD,       "READ_AHEAD" }, \
+       { XBF_MAPPED,           "MAPPED" }, \
+       { XBF_ASYNC,            "ASYNC" }, \
+       { XBF_DONE,             "DONE" }, \
+       { XBF_DELWRI,           "DELWRI" }, \
+       { XBF_STALE,            "STALE" }, \
+       { XBF_SYNCIO,           "SYNCIO" }, \
+       { XBF_FUA,              "FUA" }, \
+       { XBF_FLUSH,            "FLUSH" }, \
+       { XBF_LOCK,             "LOCK" },       /* should never be set */\
+       { XBF_TRYLOCK,          "TRYLOCK" },    /* ditto */\
+       { XBF_DONT_BLOCK,       "DONT_BLOCK" }, /* ditto */\
+       { _XBF_PAGES,           "PAGES" }, \
+       { _XBF_KMEM,            "KMEM" }, \
+       { _XBF_DELWRI_Q,        "DELWRI_Q" }
+
+typedef enum {
+       XBT_FORCE_SLEEP = 0,
+       XBT_FORCE_FLUSH = 1,
+} xfs_buftarg_flags_t;
+
+typedef struct xfs_buftarg {
+       dev_t                   bt_dev;
+       struct block_device     *bt_bdev;
+       struct backing_dev_info *bt_bdi;
+       struct xfs_mount        *bt_mount;
+       unsigned int            bt_bsize;
+       unsigned int            bt_sshift;
+       size_t                  bt_smask;
+
+       /* per device delwri queue */
+       struct task_struct      *bt_task;
+       struct list_head        bt_delwrite_queue;
+       spinlock_t              bt_delwrite_lock;
+       unsigned long           bt_flags;
+
+       /* LRU control structures */
+       struct shrinker         bt_shrinker;
+       struct list_head        bt_lru;
+       spinlock_t              bt_lru_lock;
+       unsigned int            bt_lru_nr;
+} xfs_buftarg_t;
+
+struct xfs_buf;
+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
+
+#define XB_PAGES       2
+
+typedef struct xfs_buf {
+       /*
+        * first cacheline holds all the fields needed for an uncontended cache
+        * hit to be fully processed. The semaphore straddles the cacheline
+        * boundary, but the counter and lock sits on the first cacheline,
+        * which is the only bit that is touched if we hit the semaphore
+        * fast-path on locking.
+        */
+       struct rb_node          b_rbnode;       /* rbtree node */
+       xfs_off_t               b_file_offset;  /* offset in file */
+       size_t                  b_buffer_length;/* size of buffer in bytes */
+       atomic_t                b_hold;         /* reference count */
+       atomic_t                b_lru_ref;      /* lru reclaim ref count */
+       xfs_buf_flags_t         b_flags;        /* status flags */
+       struct semaphore        b_sema;         /* semaphore for lockables */
+
+       struct list_head        b_lru;          /* lru list */
+       wait_queue_head_t       b_waiters;      /* unpin waiters */
+       struct list_head        b_list;
+       struct xfs_perag        *b_pag;         /* contains rbtree root */
+       xfs_buftarg_t           *b_target;      /* buffer target (device) */
+       xfs_daddr_t             b_bn;           /* block number for I/O */
+       size_t                  b_count_desired;/* desired transfer size */
+       void                    *b_addr;        /* virtual address of buffer */
+       struct work_struct      b_iodone_work;
+       xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
+       struct completion       b_iowait;       /* queue for I/O waiters */
+       void                    *b_fspriv;
+       struct xfs_trans        *b_transp;
+       struct page             **b_pages;      /* array of page pointers */
+       struct page             *b_page_array[XB_PAGES]; /* inline pages */
+       unsigned long           b_queuetime;    /* time buffer was queued */
+       atomic_t                b_pin_count;    /* pin count */
+       atomic_t                b_io_remaining; /* #outstanding I/O requests */
+       unsigned int            b_page_count;   /* size of page array */
+       unsigned int            b_offset;       /* page offset in first page */
+       unsigned short          b_error;        /* error code on I/O */
+#ifdef XFS_BUF_LOCK_TRACKING
+       int                     b_last_holder;
+#endif
+} xfs_buf_t;
+
+
+/* Finding and Reading Buffers */
+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t, xfs_buf_t *);
+#define xfs_incore(buftarg,blkno,len,lockit) \
+       _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
+
+extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t);
+extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t);
+
+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
+extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
+extern void xfs_buf_hold(xfs_buf_t *);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+                               struct xfs_buftarg *target,
+                               xfs_daddr_t daddr, size_t length, int flags);
+
+/* Releasing Buffers */
+extern void xfs_buf_free(xfs_buf_t *);
+extern void xfs_buf_rele(xfs_buf_t *);
+
+/* Locking and Unlocking Buffers */
+extern int xfs_buf_trylock(xfs_buf_t *);
+extern void xfs_buf_lock(xfs_buf_t *);
+extern void xfs_buf_unlock(xfs_buf_t *);
+#define xfs_buf_islocked(bp) \
+       ((bp)->b_sema.count <= 0)
+
+/* Buffer Read and Write Routines */
+extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
+extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
+
+extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
+
+extern void xfs_buf_ioend(xfs_buf_t *, int);
+extern void xfs_buf_ioerror(xfs_buf_t *, int);
+extern int xfs_buf_iorequest(xfs_buf_t *);
+extern int xfs_buf_iowait(xfs_buf_t *);
+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
+                               xfs_buf_rw_t);
+#define xfs_buf_zero(bp, off, len) \
+           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+
+static inline int xfs_buf_geterror(xfs_buf_t *bp)
+{
+       return bp ? bp->b_error : ENOMEM;
+}
+
+/* Buffer Utility Routines */
+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
+
+/* Delayed Write Buffer Routines */
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
+extern void xfs_buf_delwri_promote(xfs_buf_t *);
+
+/* Buffer Daemon Setup Routines */
+extern int xfs_buf_init(void);
+extern void xfs_buf_terminate(void);
+
+static inline const char *
+xfs_buf_target_name(struct xfs_buftarg *target)
+{
+       static char __b[BDEVNAME_SIZE];
+
+       return bdevname(target->bt_bdev, __b);
+}
+
+
+#define XFS_BUF_ZEROFLAGS(bp) \
+       ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
+                           XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+
+void xfs_buf_stale(struct xfs_buf *bp);
+#define XFS_BUF_STALE(bp)      xfs_buf_stale(bp);
+#define XFS_BUF_UNSTALE(bp)    ((bp)->b_flags &= ~XBF_STALE)
+#define XFS_BUF_ISSTALE(bp)    ((bp)->b_flags & XBF_STALE)
+#define XFS_BUF_SUPER_STALE(bp)        do {                            \
+                                       XFS_BUF_STALE(bp);      \
+                                       xfs_buf_delwri_dequeue(bp);     \
+                                       XFS_BUF_DONE(bp);       \
+                               } while (0)
+
+#define XFS_BUF_DELAYWRITE(bp)         ((bp)->b_flags |= XBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(bp)       xfs_buf_delwri_dequeue(bp)
+#define XFS_BUF_ISDELAYWRITE(bp)       ((bp)->b_flags & XBF_DELWRI)
+
+#define XFS_BUF_DONE(bp)       ((bp)->b_flags |= XBF_DONE)
+#define XFS_BUF_UNDONE(bp)     ((bp)->b_flags &= ~XBF_DONE)
+#define XFS_BUF_ISDONE(bp)     ((bp)->b_flags & XBF_DONE)
+
+#define XFS_BUF_ASYNC(bp)      ((bp)->b_flags |= XBF_ASYNC)
+#define XFS_BUF_UNASYNC(bp)    ((bp)->b_flags &= ~XBF_ASYNC)
+#define XFS_BUF_ISASYNC(bp)    ((bp)->b_flags & XBF_ASYNC)
+
+#define XFS_BUF_READ(bp)       ((bp)->b_flags |= XBF_READ)
+#define XFS_BUF_UNREAD(bp)     ((bp)->b_flags &= ~XBF_READ)
+#define XFS_BUF_ISREAD(bp)     ((bp)->b_flags & XBF_READ)
+
+#define XFS_BUF_WRITE(bp)      ((bp)->b_flags |= XBF_WRITE)
+#define XFS_BUF_UNWRITE(bp)    ((bp)->b_flags &= ~XBF_WRITE)
+#define XFS_BUF_ISWRITE(bp)    ((bp)->b_flags & XBF_WRITE)
+
+#define XFS_BUF_ADDR(bp)               ((bp)->b_bn)
+#define XFS_BUF_SET_ADDR(bp, bno)      ((bp)->b_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_OFFSET(bp)             ((bp)->b_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)    ((bp)->b_file_offset = (off))
+#define XFS_BUF_COUNT(bp)              ((bp)->b_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)     ((bp)->b_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)               ((bp)->b_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)      ((bp)->b_buffer_length = (cnt))
+
+static inline void
+xfs_buf_set_ref(
+       struct xfs_buf  *bp,
+       int             lru_ref)
+{
+       atomic_set(&bp->b_lru_ref, lru_ref);
+}
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)   xfs_buf_set_ref(bp, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)            do { } while (0)
+
+static inline int xfs_buf_ispinned(struct xfs_buf *bp)
+{
+       return atomic_read(&bp->b_pin_count);
+}
+
+#define XFS_BUF_FINISH_IOWAIT(bp)      complete(&bp->b_iowait);
+
+static inline void xfs_buf_relse(xfs_buf_t *bp)
+{
+       xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+}
+
+/*
+ *     Handling of buftargs.
+ */
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+                       struct block_device *, int, const char *);
+extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
+extern void xfs_wait_buftarg(xfs_buftarg_t *);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
+
+#ifdef CONFIG_KDB_MODULES
+extern struct list_head *xfs_get_buftarg_list(void);
+#endif
+
+#define xfs_getsize_buftarg(buftarg)   block_size((buftarg)->bt_bdev)
+#define xfs_readonly_buftarg(buftarg)  bdev_read_only((buftarg)->bt_bdev)
+
+#define xfs_binval(buftarg)            xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg)            xfs_flush_buftarg(buftarg, 1)
+
+#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
new file mode 100644 (file)
index 0000000..244e797
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_discard.h"
+#include "xfs_trace.h"
+
+STATIC int
+xfs_trim_extents(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_fsblock_t           start,
+       xfs_fsblock_t           len,
+       xfs_fsblock_t           minlen,
+       __uint64_t              *blocks_trimmed)
+{
+       struct block_device     *bdev = mp->m_ddev_targp->bt_bdev;
+       struct xfs_btree_cur    *cur;
+       struct xfs_buf          *agbp;
+       struct xfs_perag        *pag;
+       int                     error;
+       int                     i;
+
+       pag = xfs_perag_get(mp, agno);
+
+       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       if (error || !agbp)
+               goto out_put_perag;
+
+       cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
+
+       /*
+        * Force out the log.  This means any transactions that might have freed
+        * space before we took the AGF buffer lock are now on disk, and the
+        * volatile disk cache is flushed.
+        */
+       xfs_log_force(mp, XFS_LOG_SYNC);
+
+       /*
+        * Look up the longest btree in the AGF and start with it.
+        */
+       error = xfs_alloc_lookup_le(cur, 0,
+                                   XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
+       if (error)
+               goto out_del_cursor;
+
+       /*
+        * Loop until we are done with all extents that are large
+        * enough to be worth discarding.
+        */
+       while (i) {
+               xfs_agblock_t fbno;
+               xfs_extlen_t flen;
+
+               error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
+               if (error)
+                       goto out_del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
+               ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
+
+               /*
+                * Too small?  Give up.
+                */
+               if (flen < minlen) {
+                       trace_xfs_discard_toosmall(mp, agno, fbno, flen);
+                       goto out_del_cursor;
+               }
+
+               /*
+                * If the extent is entirely outside of the range we are
+                * supposed to discard skip it.  Do not bother to trim
+                * down partially overlapping ranges for now.
+                */
+               if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
+                   XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
+                       trace_xfs_discard_exclude(mp, agno, fbno, flen);
+                       goto next_extent;
+               }
+
+               /*
+                * If any blocks in the range are still busy, skip the
+                * discard and try again the next time.
+                */
+               if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
+                       trace_xfs_discard_busy(mp, agno, fbno, flen);
+                       goto next_extent;
+               }
+
+               trace_xfs_discard_extent(mp, agno, fbno, flen);
+               error = -blkdev_issue_discard(bdev,
+                               XFS_AGB_TO_DADDR(mp, agno, fbno),
+                               XFS_FSB_TO_BB(mp, flen),
+                               GFP_NOFS, 0);
+               if (error)
+                       goto out_del_cursor;
+               *blocks_trimmed += flen;
+
+next_extent:
+               error = xfs_btree_decrement(cur, 0, &i);
+               if (error)
+                       goto out_del_cursor;
+       }
+
+out_del_cursor:
+       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       xfs_buf_relse(agbp);
+out_put_perag:
+       xfs_perag_put(pag);
+       return error;
+}
+
+int
+xfs_ioc_trim(
+       struct xfs_mount                *mp,
+       struct fstrim_range __user      *urange)
+{
+       struct request_queue    *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
+       unsigned int            granularity = q->limits.discard_granularity;
+       struct fstrim_range     range;
+       xfs_fsblock_t           start, len, minlen;
+       xfs_agnumber_t          start_agno, end_agno, agno;
+       __uint64_t              blocks_trimmed = 0;
+       int                     error, last_error = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (!blk_queue_discard(q))
+               return -XFS_ERROR(EOPNOTSUPP);
+       if (copy_from_user(&range, urange, sizeof(range)))
+               return -XFS_ERROR(EFAULT);
+
+       /*
+        * Truncating down the len isn't actually quite correct, but using
+        * XFS_B_TO_FSB would mean we trivially get overflows for values
+        * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
+        * used by the fstrim application.  In the end it really doesn't
+        * matter as trimming blocks is an advisory interface.
+        */
+       start = XFS_B_TO_FSBT(mp, range.start);
+       len = XFS_B_TO_FSBT(mp, range.len);
+       minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
+
+       start_agno = XFS_FSB_TO_AGNO(mp, start);
+       if (start_agno >= mp->m_sb.sb_agcount)
+               return -XFS_ERROR(EINVAL);
+
+       end_agno = XFS_FSB_TO_AGNO(mp, start + len);
+       if (end_agno >= mp->m_sb.sb_agcount)
+               end_agno = mp->m_sb.sb_agcount - 1;
+
+       for (agno = start_agno; agno <= end_agno; agno++) {
+               error = -xfs_trim_extents(mp, agno, start, len, minlen,
+                                         &blocks_trimmed);
+               if (error)
+                       last_error = error;
+       }
+
+       if (last_error)
+               return last_error;
+
+       range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
+       if (copy_to_user(urange, &range, sizeof(range)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+int
+xfs_discard_extents(
+       struct xfs_mount        *mp,
+       struct list_head        *list)
+{
+       struct xfs_busy_extent  *busyp;
+       int                     error = 0;
+
+       list_for_each_entry(busyp, list, list) {
+               trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
+                                        busyp->length);
+
+               error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+                               XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
+                               XFS_FSB_TO_BB(mp, busyp->length),
+                               GFP_NOFS, 0);
+               if (error && error != EOPNOTSUPP) {
+                       xfs_info(mp,
+        "discard failed for extent [0x%llu,%u], error %d",
+                                (unsigned long long)busyp->bno,
+                                busyp->length,
+                                error);
+                       return error;
+               }
+       }
+
+       return 0;
+}
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h
new file mode 100644 (file)
index 0000000..344879a
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef XFS_DISCARD_H
+#define XFS_DISCARD_H 1
+
+struct fstrim_range;
+struct list_head;
+
+extern int     xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
+extern int     xfs_discard_extents(struct xfs_mount *, struct list_head *);
+
+#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
new file mode 100644 (file)
index 0000000..db62959
--- /dev/null
@@ -0,0 +1,1454 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+
+/*
+   LOCK ORDER
+
+   inode lock              (ilock)
+   dquot hash-chain lock    (hashlock)
+   xqm dquot freelist lock  (freelistlock
+   mount's dquot list lock  (mplistlock)
+   user dquot lock - lock ordering among dquots is based on the uid or gid
+   group dquot lock - similar to udquots. Between the two dquots, the udquot
+                     has to be locked first.
+   pin lock - the dquot lock must be held to take this lock.
+   flush lock - ditto.
+*/
+
+#ifdef DEBUG
+xfs_buftarg_t *xfs_dqerror_target;
+int xfs_do_dqerror;
+int xfs_dqreq_num;
+int xfs_dqerror_mod = 33;
+#endif
+
+static struct lock_class_key xfs_dquot_other_class;
+
+/*
+ * Allocate and initialize a dquot. We don't always allocate fresh memory;
+ * we try to reclaim a free dquot if the number of incore dquots are above
+ * a threshold.
+ * The only field inside the core that gets initialized at this point
+ * is the d_id field. The idea is to fill in the entire q_core
+ * when we read in the on disk dquot.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqinit(
+       xfs_mount_t  *mp,
+       xfs_dqid_t   id,
+       uint         type)
+{
+       xfs_dquot_t     *dqp;
+       boolean_t       brandnewdquot;
+
+       brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
+       dqp->dq_flags = type;
+       dqp->q_core.d_id = cpu_to_be32(id);
+       dqp->q_mount = mp;
+
+       /*
+        * No need to re-initialize these if this is a reclaimed dquot.
+        */
+       if (brandnewdquot) {
+               INIT_LIST_HEAD(&dqp->q_freelist);
+               mutex_init(&dqp->q_qlock);
+               init_waitqueue_head(&dqp->q_pinwait);
+
+               /*
+                * Because we want to use a counting completion, complete
+                * the flush completion once to allow a single access to
+                * the flush completion without blocking.
+                */
+               init_completion(&dqp->q_flush);
+               complete(&dqp->q_flush);
+
+               trace_xfs_dqinit(dqp);
+       } else {
+               /*
+                * Only the q_core portion was zeroed in dqreclaim_one().
+                * So, we need to reset others.
+                */
+               dqp->q_nrefs = 0;
+               dqp->q_blkno = 0;
+               INIT_LIST_HEAD(&dqp->q_mplist);
+               INIT_LIST_HEAD(&dqp->q_hashlist);
+               dqp->q_bufoffset = 0;
+               dqp->q_fileoffset = 0;
+               dqp->q_transp = NULL;
+               dqp->q_gdquot = NULL;
+               dqp->q_res_bcount = 0;
+               dqp->q_res_icount = 0;
+               dqp->q_res_rtbcount = 0;
+               atomic_set(&dqp->q_pincount, 0);
+               dqp->q_hash = NULL;
+               ASSERT(list_empty(&dqp->q_freelist));
+
+               trace_xfs_dqreuse(dqp);
+       }
+
+       /*
+        * In either case we need to make sure group quotas have a different
+        * lock class than user quotas, to make sure lockdep knows we can
+        * locks of one of each at the same time.
+        */
+       if (!(type & XFS_DQ_USER))
+               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+
+       /*
+        * log item gets initialized later
+        */
+       return (dqp);
+}
+
+/*
+ * This is called to free all the memory associated with a dquot
+ */
+void
+xfs_qm_dqdestroy(
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(list_empty(&dqp->q_freelist));
+
+       mutex_destroy(&dqp->q_qlock);
+       kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+
+       atomic_dec(&xfs_Gqm->qm_totaldquots);
+}
+
+/*
+ * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
+ */
+STATIC void
+xfs_qm_dqinit_core(
+       xfs_dqid_t      id,
+       uint            type,
+       xfs_dqblk_t     *d)
+{
+       /*
+        * Caller has zero'd the entire dquot 'chunk' already.
+        */
+       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+       d->dd_diskdq.d_id = cpu_to_be32(id);
+       d->dd_diskdq.d_flags = type;
+}
+
+/*
+ * If default limits are in force, push them into the dquot now.
+ * We overwrite the dquot limits only if they are zero and this
+ * is not the root dquot.
+ */
+void
+xfs_qm_adjust_dqlimits(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *d)
+{
+       xfs_quotainfo_t         *q = mp->m_quotainfo;
+
+       ASSERT(d->d_id);
+
+       if (q->qi_bsoftlimit && !d->d_blk_softlimit)
+               d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
+       if (q->qi_bhardlimit && !d->d_blk_hardlimit)
+               d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
+       if (q->qi_isoftlimit && !d->d_ino_softlimit)
+               d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
+       if (q->qi_ihardlimit && !d->d_ino_hardlimit)
+               d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
+       if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
+               d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
+       if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
+               d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
+}
+
+/*
+ * Check the limits and timers of a dquot and start or reset timers
+ * if necessary.
+ * This gets called even when quota enforcement is OFF, which makes our
+ * life a little less complicated. (We just don't reject any quota
+ * reservations in that case, when enforcement is off).
+ * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
+ * enforcement's off.
+ * In contrast, warnings are a little different in that they don't
+ * 'automatically' get started when limits get exceeded.  They do
+ * get reset to zero, however, when we find the count to be under
+ * the soft limit (they are only ever set non-zero via userspace).
+ */
+void
+xfs_qm_adjust_dqtimers(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *d)
+{
+       ASSERT(d->d_id);
+
+#ifdef DEBUG
+       if (d->d_blk_hardlimit)
+               ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
+                      be64_to_cpu(d->d_blk_hardlimit));
+       if (d->d_ino_hardlimit)
+               ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
+                      be64_to_cpu(d->d_ino_hardlimit));
+       if (d->d_rtb_hardlimit)
+               ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
+                      be64_to_cpu(d->d_rtb_hardlimit));
+#endif
+
+       if (!d->d_btimer) {
+               if ((d->d_blk_softlimit &&
+                    (be64_to_cpu(d->d_bcount) >=
+                     be64_to_cpu(d->d_blk_softlimit))) ||
+                   (d->d_blk_hardlimit &&
+                    (be64_to_cpu(d->d_bcount) >=
+                     be64_to_cpu(d->d_blk_hardlimit)))) {
+                       d->d_btimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_btimelimit);
+               } else {
+                       d->d_bwarns = 0;
+               }
+       } else {
+               if ((!d->d_blk_softlimit ||
+                    (be64_to_cpu(d->d_bcount) <
+                     be64_to_cpu(d->d_blk_softlimit))) &&
+                   (!d->d_blk_hardlimit ||
+                   (be64_to_cpu(d->d_bcount) <
+                    be64_to_cpu(d->d_blk_hardlimit)))) {
+                       d->d_btimer = 0;
+               }
+       }
+
+       if (!d->d_itimer) {
+               if ((d->d_ino_softlimit &&
+                    (be64_to_cpu(d->d_icount) >=
+                     be64_to_cpu(d->d_ino_softlimit))) ||
+                   (d->d_ino_hardlimit &&
+                    (be64_to_cpu(d->d_icount) >=
+                     be64_to_cpu(d->d_ino_hardlimit)))) {
+                       d->d_itimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_itimelimit);
+               } else {
+                       d->d_iwarns = 0;
+               }
+       } else {
+               if ((!d->d_ino_softlimit ||
+                    (be64_to_cpu(d->d_icount) <
+                     be64_to_cpu(d->d_ino_softlimit)))  &&
+                   (!d->d_ino_hardlimit ||
+                    (be64_to_cpu(d->d_icount) <
+                     be64_to_cpu(d->d_ino_hardlimit)))) {
+                       d->d_itimer = 0;
+               }
+       }
+
+       if (!d->d_rtbtimer) {
+               if ((d->d_rtb_softlimit &&
+                    (be64_to_cpu(d->d_rtbcount) >=
+                     be64_to_cpu(d->d_rtb_softlimit))) ||
+                   (d->d_rtb_hardlimit &&
+                    (be64_to_cpu(d->d_rtbcount) >=
+                     be64_to_cpu(d->d_rtb_hardlimit)))) {
+                       d->d_rtbtimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_rtbtimelimit);
+               } else {
+                       d->d_rtbwarns = 0;
+               }
+       } else {
+               if ((!d->d_rtb_softlimit ||
+                    (be64_to_cpu(d->d_rtbcount) <
+                     be64_to_cpu(d->d_rtb_softlimit))) &&
+                   (!d->d_rtb_hardlimit ||
+                    (be64_to_cpu(d->d_rtbcount) <
+                     be64_to_cpu(d->d_rtb_hardlimit)))) {
+                       d->d_rtbtimer = 0;
+               }
+       }
+}
+
+/*
+ * initialize a buffer full of dquots and log the whole thing
+ */
+STATIC void
+xfs_qm_init_dquot_blk(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,
+       uint            type,
+       xfs_buf_t       *bp)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       xfs_dqblk_t     *d;
+       int             curid, i;
+
+       ASSERT(tp);
+       ASSERT(xfs_buf_islocked(bp));
+
+       d = bp->b_addr;
+
+       /*
+        * ID of the first dquot in the block - id's are zero based.
+        */
+       curid = id - (id % q->qi_dqperchunk);
+       ASSERT(curid >= 0);
+       memset(d, 0, BBTOB(q->qi_dqchunklen));
+       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
+               xfs_qm_dqinit_core(curid, type, d);
+       xfs_trans_dquot_buf(tp, bp,
+                           (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
+                           ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
+                            XFS_BLF_GDQUOT_BUF)));
+       xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
+}
+
+
+
+/*
+ * Allocate a block and fill it with dquots.
+ * This is called when the bmapi finds a hole.
+ */
+STATIC int
+xfs_qm_dqalloc(
+       xfs_trans_t     **tpp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *dqp,
+       xfs_inode_t     *quotip,
+       xfs_fileoff_t   offset_fsb,
+       xfs_buf_t       **O_bpp)
+{
+       xfs_fsblock_t   firstblock;
+       xfs_bmap_free_t flist;
+       xfs_bmbt_irec_t map;
+       int             nmaps, error, committed;
+       xfs_buf_t       *bp;
+       xfs_trans_t     *tp = *tpp;
+
+       ASSERT(tp != NULL);
+
+       trace_xfs_dqalloc(dqp);
+
+       /*
+        * Initialize the bmap freelist prior to calling bmapi code.
+        */
+       xfs_bmap_init(&flist, &firstblock);
+       xfs_ilock(quotip, XFS_ILOCK_EXCL);
+       /*
+        * Return if this type of quotas is turned off while we didn't
+        * have an inode lock
+        */
+       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+               xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+               return (ESRCH);
+       }
+
+       xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
+       nmaps = 1;
+       if ((error = xfs_bmapi(tp, quotip,
+                             offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
+                             XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
+                             &firstblock,
+                             XFS_QM_DQALLOC_SPACE_RES(mp),
+                             &map, &nmaps, &flist))) {
+               goto error0;
+       }
+       ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
+       ASSERT(nmaps == 1);
+       ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+              (map.br_startblock != HOLESTARTBLOCK));
+
+       /*
+        * Keep track of the blkno to save a lookup later
+        */
+       dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+       /* now we can just get the buffer (there's nothing to read yet) */
+       bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+                              dqp->q_blkno,
+                              mp->m_quotainfo->qi_dqchunklen,
+                              0);
+       if (!bp || (error = xfs_buf_geterror(bp)))
+               goto error1;
+       /*
+        * Make a chunk of dquots out of this buffer and log
+        * the entire thing.
+        */
+       xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
+                             dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
+
+       /*
+        * xfs_bmap_finish() may commit the current transaction and
+        * start a second transaction if the freelist is not empty.
+        *
+        * Since we still want to modify this buffer, we need to
+        * ensure that the buffer is not released on commit of
+        * the first transaction and ensure the buffer is added to the
+        * second transaction.
+        *
+        * If there is only one transaction then don't stop the buffer
+        * from being released when it commits later on.
+        */
+
+       xfs_trans_bhold(tp, bp);
+
+       if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
+               goto error1;
+       }
+
+       if (committed) {
+               tp = *tpp;
+               xfs_trans_bjoin(tp, bp);
+       } else {
+               xfs_trans_bhold_release(tp, bp);
+       }
+
+       *O_bpp = bp;
+       return 0;
+
+      error1:
+       xfs_bmap_cancel(&flist);
+      error0:
+       xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+
+       return (error);
+}
+
+/*
+ * Maps a dquot to the buffer containing its on-disk version.
+ * This returns a ptr to the buffer containing the on-disk dquot
+ * in the bpp param, and a ptr to the on-disk dquot within that buffer
+ */
+STATIC int
+xfs_qm_dqtobp(
+       xfs_trans_t             **tpp,
+       xfs_dquot_t             *dqp,
+       xfs_disk_dquot_t        **O_ddpp,
+       xfs_buf_t               **O_bpp,
+       uint                    flags)
+{
+       xfs_bmbt_irec_t map;
+       int             nmaps = 1, error;
+       xfs_buf_t       *bp;
+       xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
+       xfs_mount_t     *mp = dqp->q_mount;
+       xfs_disk_dquot_t *ddq;
+       xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
+       xfs_trans_t     *tp = (tpp ? *tpp : NULL);
+
+       dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
+
+       xfs_ilock(quotip, XFS_ILOCK_SHARED);
+       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+               /*
+                * Return if this type of quotas is turned off while we
+                * didn't have the quota inode lock.
+                */
+               xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+               return ESRCH;
+       }
+
+       /*
+        * Find the block map; no allocations yet
+        */
+       error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+                         XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+                         NULL, 0, &map, &nmaps, NULL);
+
+       xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+       if (error)
+               return error;
+
+       ASSERT(nmaps == 1);
+       ASSERT(map.br_blockcount == 1);
+
+       /*
+        * Offset of dquot in the (fixed sized) dquot chunk.
+        */
+       dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+               sizeof(xfs_dqblk_t);
+
+       ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+       if (map.br_startblock == HOLESTARTBLOCK) {
+               /*
+                * We don't allocate unless we're asked to
+                */
+               if (!(flags & XFS_QMOPT_DQALLOC))
+                       return ENOENT;
+
+               ASSERT(tp);
+               error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
+                                       dqp->q_fileoffset, &bp);
+               if (error)
+                       return error;
+               tp = *tpp;
+       } else {
+               trace_xfs_dqtobp_read(dqp);
+
+               /*
+                * store the blkno etc so that we don't have to do the
+                * mapping all the time
+                */
+               dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+               error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                                          dqp->q_blkno,
+                                          mp->m_quotainfo->qi_dqchunklen,
+                                          0, &bp);
+               if (error || !bp)
+                       return XFS_ERROR(error);
+       }
+
+       ASSERT(xfs_buf_islocked(bp));
+
+       /*
+        * calculate the location of the dquot inside the buffer.
+        */
+       ddq = bp->b_addr + dqp->q_bufoffset;
+
+       /*
+        * A simple sanity check in case we got a corrupted dquot...
+        */
+       error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
+                          flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
+                          "dqtobp");
+       if (error) {
+               if (!(flags & XFS_QMOPT_DQREPAIR)) {
+                       xfs_trans_brelse(tp, bp);
+                       return XFS_ERROR(EIO);
+               }
+       }
+
+       *O_bpp = bp;
+       *O_ddpp = ddq;
+
+       return (0);
+}
+
+
+/*
+ * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
+ * and release the buffer immediately.
+ *
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqread(
+       xfs_trans_t     **tpp,
+       xfs_dqid_t      id,
+       xfs_dquot_t     *dqp,   /* dquot to get filled in */
+       uint            flags)
+{
+       xfs_disk_dquot_t *ddqp;
+       xfs_buf_t        *bp;
+       int              error;
+       xfs_trans_t      *tp;
+
+       ASSERT(tpp);
+
+       trace_xfs_dqread(dqp);
+
+       /*
+        * get a pointer to the on-disk dquot and the buffer containing it
+        * dqp already knows its own type (GROUP/USER).
+        */
+       if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
+               return (error);
+       }
+       tp = *tpp;
+
+       /* copy everything from disk dquot to the incore dquot */
+       memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
+       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+       xfs_qm_dquot_logitem_init(dqp);
+
+       /*
+        * Reservation counters are defined as reservation plus current usage
+        * to avoid having to add every time.
+        */
+       dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
+       dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
+       dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
+
+       /* Mark the buf so that this will stay incore a little longer */
+       XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
+
+       /*
+        * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
+        * So we need to release with xfs_trans_brelse().
+        * The strategy here is identical to that of inodes; we lock
+        * the dquot in xfs_qm_dqget() before making it accessible to
+        * others. This is because dquots, like inodes, need a good level of
+        * concurrency, and we don't want to take locks on the entire buffers
+        * for dquot accesses.
+        * Note also that the dquot buffer may even be dirty at this point, if
+        * this particular dquot was repaired. We still aren't afraid to
+        * brelse it because we have the changes incore.
+        */
+       ASSERT(xfs_buf_islocked(bp));
+       xfs_trans_brelse(tp, bp);
+
+       return (error);
+}
+
+
+/*
+ * allocate an incore dquot from the kernel heap,
+ * and fill its core with quota information kept on disk.
+ * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
+ * if it wasn't already allocated.
+ */
+STATIC int
+xfs_qm_idtodq(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,      /* gid or uid, depending on type */
+       uint            type,    /* UDQUOT or GDQUOT */
+       uint            flags,   /* DQALLOC, DQREPAIR */
+       xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+       xfs_trans_t     *tp;
+       int             cancelflags=0;
+
+       dqp = xfs_qm_dqinit(mp, id, type);
+       tp = NULL;
+       if (flags & XFS_QMOPT_DQALLOC) {
+               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
+                               XFS_WRITE_LOG_RES(mp) +
+                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
+                               128,
+                               0,
+                               XFS_TRANS_PERM_LOG_RES,
+                               XFS_WRITE_LOG_COUNT);
+               if (error) {
+                       cancelflags = 0;
+                       goto error0;
+               }
+               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+       }
+
+       /*
+        * Read it from disk; xfs_dqread() takes care of
+        * all the necessary initialization of dquot's fields (locks, etc)
+        */
+       if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
+               /*
+                * This can happen if quotas got turned off (ESRCH),
+                * or if the dquot didn't exist on disk and we ask to
+                * allocate (ENOENT).
+                */
+               trace_xfs_dqread_fail(dqp);
+               cancelflags |= XFS_TRANS_ABORT;
+               goto error0;
+       }
+       if (tp) {
+               if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
+                       goto error1;
+       }
+
+       *O_dqpp = dqp;
+       return (0);
+
+ error0:
+       ASSERT(error);
+       if (tp)
+               xfs_trans_cancel(tp, cancelflags);
+ error1:
+       xfs_qm_dqdestroy(dqp);
+       *O_dqpp = NULL;
+       return (error);
+}
+
+/*
+ * Lookup a dquot in the incore dquot hashtable. We keep two separate
+ * hashtables for user and group dquots; and, these are global tables
+ * inside the XQM, not per-filesystem tables.
+ * The hash chain must be locked by caller, and it is left locked
+ * on return. Returning dquot is locked.
+ */
+STATIC int
+xfs_qm_dqlookup(
+       xfs_mount_t             *mp,
+       xfs_dqid_t              id,
+       xfs_dqhash_t            *qh,
+       xfs_dquot_t             **O_dqpp)
+{
+       xfs_dquot_t             *dqp;
+       uint                    flist_locked;
+
+       ASSERT(mutex_is_locked(&qh->qh_lock));
+
+       flist_locked = B_FALSE;
+
+       /*
+        * Traverse the hashchain looking for a match
+        */
+       list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
+               /*
+                * We already have the hashlock. We don't need the
+                * dqlock to look at the id field of the dquot, since the
+                * id can't be modified without the hashlock anyway.
+                */
+               if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
+                       trace_xfs_dqlookup_found(dqp);
+
+                       /*
+                        * All in core dquots must be on the dqlist of mp
+                        */
+                       ASSERT(!list_empty(&dqp->q_mplist));
+
+                       xfs_dqlock(dqp);
+                       if (dqp->q_nrefs == 0) {
+                               ASSERT(!list_empty(&dqp->q_freelist));
+                               if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+                                       trace_xfs_dqlookup_want(dqp);
+
+                                       /*
+                                        * We may have raced with dqreclaim_one()
+                                        * (and lost). So, flag that we don't
+                                        * want the dquot to be reclaimed.
+                                        */
+                                       dqp->dq_flags |= XFS_DQ_WANT;
+                                       xfs_dqunlock(dqp);
+                                       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+                                       xfs_dqlock(dqp);
+                                       dqp->dq_flags &= ~(XFS_DQ_WANT);
+                               }
+                               flist_locked = B_TRUE;
+                       }
+
+                       /*
+                        * id couldn't have changed; we had the hashlock all
+                        * along
+                        */
+                       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+
+                       if (flist_locked) {
+                               if (dqp->q_nrefs != 0) {
+                                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                                       flist_locked = B_FALSE;
+                               } else {
+                                       /* take it off the freelist */
+                                       trace_xfs_dqlookup_freelist(dqp);
+                                       list_del_init(&dqp->q_freelist);
+                                       xfs_Gqm->qm_dqfrlist_cnt--;
+                               }
+                       }
+
+                       XFS_DQHOLD(dqp);
+
+                       if (flist_locked)
+                               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                       /*
+                        * move the dquot to the front of the hashchain
+                        */
+                       ASSERT(mutex_is_locked(&qh->qh_lock));
+                       list_move(&dqp->q_hashlist, &qh->qh_list);
+                       trace_xfs_dqlookup_done(dqp);
+                       *O_dqpp = dqp;
+                       return 0;
+               }
+       }
+
+       *O_dqpp = NULL;
+       ASSERT(mutex_is_locked(&qh->qh_lock));
+       return (1);
+}
+
+/*
+ * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
+ * a locked dquot, doing an allocation (if requested) as needed.
+ * When both an inode and an id are given, the inode's id takes precedence.
+ * That is, if the id changes while we don't hold the ilock inside this
+ * function, the new dquot is returned, not necessarily the one requested
+ * in the id argument.
+ */
+int
+xfs_qm_dqget(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *ip,      /* locked inode (optional) */
+       xfs_dqid_t      id,       /* uid/projid/gid depending on type */
+       uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
+       uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
+       xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
+{
+       xfs_dquot_t     *dqp;
+       xfs_dqhash_t    *h;
+       uint            version;
+       int             error;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+       if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
+           (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
+           (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
+               return (ESRCH);
+       }
+       h = XFS_DQ_HASH(mp, id, type);
+
+#ifdef DEBUG
+       if (xfs_do_dqerror) {
+               if ((xfs_dqerror_target == mp->m_ddev_targp) &&
+                   (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
+                       xfs_debug(mp, "Returning error in dqget");
+                       return (EIO);
+               }
+       }
+#endif
+
+ again:
+
+#ifdef DEBUG
+       ASSERT(type == XFS_DQ_USER ||
+              type == XFS_DQ_PROJ ||
+              type == XFS_DQ_GROUP);
+       if (ip) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+               if (type == XFS_DQ_USER)
+                       ASSERT(ip->i_udquot == NULL);
+               else
+                       ASSERT(ip->i_gdquot == NULL);
+       }
+#endif
+       mutex_lock(&h->qh_lock);
+
+       /*
+        * Look in the cache (hashtable).
+        * The chain is kept locked during lookup.
+        */
+       if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+               XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
+               /*
+                * The dquot was found, moved to the front of the chain,
+                * taken off the freelist if it was on it, and locked
+                * at this point. Just unlock the hashchain and return.
+                */
+               ASSERT(*O_dqpp);
+               ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
+               mutex_unlock(&h->qh_lock);
+               trace_xfs_dqget_hit(*O_dqpp);
+               return (0);     /* success */
+       }
+       XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+
+       /*
+        * Dquot cache miss. We don't want to keep the inode lock across
+        * a (potential) disk read. Also we don't want to deal with the lock
+        * ordering between quotainode and this inode. OTOH, dropping the inode
+        * lock here means dealing with a chown that can happen before
+        * we re-acquire the lock.
+        */
+       if (ip)
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       /*
+        * Save the hashchain version stamp, and unlock the chain, so that
+        * we don't keep the lock across a disk read
+        */
+       version = h->qh_version;
+       mutex_unlock(&h->qh_lock);
+
+       /*
+        * Allocate the dquot on the kernel heap, and read the ondisk
+        * portion off the disk. Also, do all the necessary initialization
+        * This can return ENOENT if dquot didn't exist on disk and we didn't
+        * ask it to allocate; ESRCH if quotas got turned off suddenly.
+        */
+       if ((error = xfs_qm_idtodq(mp, id, type,
+                                 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
+                                          XFS_QMOPT_DOWARN),
+                                 &dqp))) {
+               if (ip)
+                       xfs_ilock(ip, XFS_ILOCK_EXCL);
+               return (error);
+       }
+
+       /*
+        * See if this is mount code calling to look at the overall quota limits
+        * which are stored in the id == 0 user or group's dquot.
+        * Since we may not have done a quotacheck by this point, just return
+        * the dquot without attaching it to any hashtables, lists, etc, or even
+        * taking a reference.
+        * The caller must dqdestroy this once done.
+        */
+       if (flags & XFS_QMOPT_DQSUSER) {
+               ASSERT(id == 0);
+               ASSERT(! ip);
+               goto dqret;
+       }
+
+       /*
+        * Dquot lock comes after hashlock in the lock ordering
+        */
+       if (ip) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+               /*
+                * A dquot could be attached to this inode by now, since
+                * we had dropped the ilock.
+                */
+               if (type == XFS_DQ_USER) {
+                       if (!XFS_IS_UQUOTA_ON(mp)) {
+                               /* inode stays locked on return */
+                               xfs_qm_dqdestroy(dqp);
+                               return XFS_ERROR(ESRCH);
+                       }
+                       if (ip->i_udquot) {
+                               xfs_qm_dqdestroy(dqp);
+                               dqp = ip->i_udquot;
+                               xfs_dqlock(dqp);
+                               goto dqret;
+                       }
+               } else {
+                       if (!XFS_IS_OQUOTA_ON(mp)) {
+                               /* inode stays locked on return */
+                               xfs_qm_dqdestroy(dqp);
+                               return XFS_ERROR(ESRCH);
+                       }
+                       if (ip->i_gdquot) {
+                               xfs_qm_dqdestroy(dqp);
+                               dqp = ip->i_gdquot;
+                               xfs_dqlock(dqp);
+                               goto dqret;
+                       }
+               }
+       }
+
+       /*
+        * Hashlock comes after ilock in lock order
+        */
+       mutex_lock(&h->qh_lock);
+       if (version != h->qh_version) {
+               xfs_dquot_t *tmpdqp;
+               /*
+                * Now, see if somebody else put the dquot in the
+                * hashtable before us. This can happen because we didn't
+                * keep the hashchain lock. We don't have to worry about
+                * lock order between the two dquots here since dqp isn't
+                * on any findable lists yet.
+                */
+               if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+                       /*
+                        * Duplicate found. Just throw away the new dquot
+                        * and start over.
+                        */
+                       xfs_qm_dqput(tmpdqp);
+                       mutex_unlock(&h->qh_lock);
+                       xfs_qm_dqdestroy(dqp);
+                       XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+                       goto again;
+               }
+       }
+
+       /*
+        * Put the dquot at the beginning of the hash-chain and mp's list
+        * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
+        */
+       ASSERT(mutex_is_locked(&h->qh_lock));
+       dqp->q_hash = h;
+       list_add(&dqp->q_hashlist, &h->qh_list);
+       h->qh_version++;
+
+       /*
+        * Attach this dquot to this filesystem's list of all dquots,
+        * kept inside the mount structure in m_quotainfo field
+        */
+       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
+
+       /*
+        * We return a locked dquot to the caller, with a reference taken
+        */
+       xfs_dqlock(dqp);
+       dqp->q_nrefs = 1;
+
+       list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
+       mp->m_quotainfo->qi_dquots++;
+       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+       mutex_unlock(&h->qh_lock);
+ dqret:
+       ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       trace_xfs_dqget_miss(dqp);
+       *O_dqpp = dqp;
+       return (0);
+}
+
+
+/*
+ * Release a reference to the dquot (decrement ref-count)
+ * and unlock it. If there is a group quota attached to this
+ * dquot, carefully release that too without tripping over
+ * deadlocks'n'stuff.
+ */
+void
+xfs_qm_dqput(
+       xfs_dquot_t     *dqp)
+{
+       xfs_dquot_t     *gdqp;
+
+       ASSERT(dqp->q_nrefs > 0);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       trace_xfs_dqput(dqp);
+
+       if (dqp->q_nrefs != 1) {
+               dqp->q_nrefs--;
+               xfs_dqunlock(dqp);
+               return;
+       }
+
+       /*
+        * drop the dqlock and acquire the freelist and dqlock
+        * in the right order; but try to get it out-of-order first
+        */
+       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+               trace_xfs_dqput_wait(dqp);
+               xfs_dqunlock(dqp);
+               mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+               xfs_dqlock(dqp);
+       }
+
+       while (1) {
+               gdqp = NULL;
+
+               /* We can't depend on nrefs being == 1 here */
+               if (--dqp->q_nrefs == 0) {
+                       trace_xfs_dqput_free(dqp);
+
+                       list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+                       xfs_Gqm->qm_dqfrlist_cnt++;
+
+                       /*
+                        * If we just added a udquot to the freelist, then
+                        * we want to release the gdquot reference that
+                        * it (probably) has. Otherwise it'll keep the
+                        * gdquot from getting reclaimed.
+                        */
+                       if ((gdqp = dqp->q_gdquot)) {
+                               /*
+                                * Avoid a recursive dqput call
+                                */
+                               xfs_dqlock(gdqp);
+                               dqp->q_gdquot = NULL;
+                       }
+               }
+               xfs_dqunlock(dqp);
+
+               /*
+                * If we had a group quota inside the user quota as a hint,
+                * release it now.
+                */
+               if (! gdqp)
+                       break;
+               dqp = gdqp;
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+}
+
+/*
+ * Release a dquot. Flush it if dirty, then dqput() it.
+ * dquot must not be locked.
+ */
+void
+xfs_qm_dqrele(
+       xfs_dquot_t     *dqp)
+{
+       if (!dqp)
+               return;
+
+       trace_xfs_dqrele(dqp);
+
+       xfs_dqlock(dqp);
+       /*
+        * We don't care to flush it if the dquot is dirty here.
+        * That will create stutters that we want to avoid.
+        * Instead we do a delayed write when we try to reclaim
+        * a dirty dquot. Also xfs_sync will take part of the burden...
+        */
+       xfs_qm_dqput(dqp);
+}
+
+/*
+ * This is the dquot flushing I/O completion routine.  It is called
+ * from interrupt level when the buffer containing the dquot is
+ * flushed to disk.  It is responsible for removing the dquot logitem
+ * from the AIL if it has not been re-logged, and unlocking the dquot's
+ * flush lock. This behavior is very similar to that of inodes..
+ */
+STATIC void
+xfs_qm_dqflush_done(
+       struct xfs_buf          *bp,
+       struct xfs_log_item     *lip)
+{
+       xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
+       xfs_dquot_t             *dqp = qip->qli_dquot;
+       struct xfs_ail          *ailp = lip->li_ailp;
+
+       /*
+        * We only want to pull the item from the AIL if its
+        * location in the log has not changed since we started the flush.
+        * Thus, we only bother if the dquot's lsn has
+        * not changed. First we check the lsn outside the lock
+        * since it's cheaper, and then we recheck while
+        * holding the lock before removing the dquot from the AIL.
+        */
+       if ((lip->li_flags & XFS_LI_IN_AIL) &&
+           lip->li_lsn == qip->qli_flush_lsn) {
+
+               /* xfs_trans_ail_delete() drops the AIL lock. */
+               spin_lock(&ailp->xa_lock);
+               if (lip->li_lsn == qip->qli_flush_lsn)
+                       xfs_trans_ail_delete(ailp, lip);
+               else
+                       spin_unlock(&ailp->xa_lock);
+       }
+
+       /*
+        * Release the dq's flush lock since we're done with it.
+        */
+       xfs_dqfunlock(dqp);
+}
+
+/*
+ * Write a modified dquot to disk.
+ * The dquot must be locked and the flush lock too taken by caller.
+ * The flush lock will not be unlocked until the dquot reaches the disk,
+ * but the dquot is free to be unlocked and modified by the caller
+ * in the interim. Dquot is still locked on return. This behavior is
+ * identical to that of inodes.
+ */
+int
+xfs_qm_dqflush(
+       xfs_dquot_t             *dqp,
+       uint                    flags)
+{
+       struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_buf          *bp;
+       struct xfs_disk_dquot   *ddqp;
+       int                     error;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(!completion_done(&dqp->q_flush));
+
+       trace_xfs_dqflush(dqp);
+
+       /*
+        * If not dirty, or it's pinned and we are not supposed to block, nada.
+        */
+       if (!XFS_DQ_IS_DIRTY(dqp) ||
+           (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
+               xfs_dqfunlock(dqp);
+               return 0;
+       }
+       xfs_qm_dqunpin_wait(dqp);
+
+       /*
+        * This may have been unpinned because the filesystem is shutting
+        * down forcibly. If that's the case we must not write this dquot
+        * to disk, because the log record didn't make it to disk!
+        */
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               dqp->dq_flags &= ~XFS_DQ_DIRTY;
+               xfs_dqfunlock(dqp);
+               return XFS_ERROR(EIO);
+       }
+
+       /*
+        * Get the buffer containing the on-disk dquot
+        */
+       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+                                  mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+       if (error) {
+               ASSERT(error != ENOENT);
+               xfs_dqfunlock(dqp);
+               return error;
+       }
+
+       /*
+        * Calculate the location of the dquot inside the buffer.
+        */
+       ddqp = bp->b_addr + dqp->q_bufoffset;
+
+       /*
+        * A simple sanity check in case we got a corrupted dquot..
+        */
+       error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
+                          XFS_QMOPT_DOWARN, "dqflush (incore copy)");
+       if (error) {
+               xfs_buf_relse(bp);
+               xfs_dqfunlock(dqp);
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               return XFS_ERROR(EIO);
+       }
+
+       /* This is the only portion of data that needs to persist */
+       memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
+
+       /*
+        * Clear the dirty field and remember the flush lsn for later use.
+        */
+       dqp->dq_flags &= ~XFS_DQ_DIRTY;
+
+       xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
+                                       &dqp->q_logitem.qli_item.li_lsn);
+
+       /*
+        * Attach an iodone routine so that we can remove this dquot from the
+        * AIL and release the flush lock once the dquot is synced to disk.
+        */
+       xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
+                                 &dqp->q_logitem.qli_item);
+
+       /*
+        * If the buffer is pinned then push on the log so we won't
+        * get stuck waiting in the write for too long.
+        */
+       if (xfs_buf_ispinned(bp)) {
+               trace_xfs_dqflush_force(dqp);
+               xfs_log_force(mp, 0);
+       }
+
+       if (flags & SYNC_WAIT)
+               error = xfs_bwrite(mp, bp);
+       else
+               xfs_bdwrite(mp, bp);
+
+       trace_xfs_dqflush_done(dqp);
+
+       /*
+        * dqp is still locked, but caller is free to unlock it now.
+        */
+       return error;
+
+}
+
+int
+xfs_qm_dqlock_nowait(
+       xfs_dquot_t *dqp)
+{
+       return mutex_trylock(&dqp->q_qlock);
+}
+
+void
+xfs_dqlock(
+       xfs_dquot_t *dqp)
+{
+       mutex_lock(&dqp->q_qlock);
+}
+
+void
+xfs_dqunlock(
+       xfs_dquot_t *dqp)
+{
+       mutex_unlock(&(dqp->q_qlock));
+       if (dqp->q_logitem.qli_dquot == dqp) {
+               /* Once was dqp->q_mount, but might just have been cleared */
+               xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
+                                       (xfs_log_item_t*)&(dqp->q_logitem));
+       }
+}
+
+
+void
+xfs_dqunlock_nonotify(
+       xfs_dquot_t *dqp)
+{
+       mutex_unlock(&(dqp->q_qlock));
+}
+
+/*
+ * Lock two xfs_dquot structures.
+ *
+ * To avoid deadlocks we always lock the quota structure with
+ * the lowerd id first.
+ */
+void
+xfs_dqlock2(
+       xfs_dquot_t     *d1,
+       xfs_dquot_t     *d2)
+{
+       if (d1 && d2) {
+               ASSERT(d1 != d2);
+               if (be32_to_cpu(d1->q_core.d_id) >
+                   be32_to_cpu(d2->q_core.d_id)) {
+                       mutex_lock(&d2->q_qlock);
+                       mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
+               } else {
+                       mutex_lock(&d1->q_qlock);
+                       mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
+               }
+       } else if (d1) {
+               mutex_lock(&d1->q_qlock);
+       } else if (d2) {
+               mutex_lock(&d2->q_qlock);
+       }
+}
+
+
+/*
+ * Take a dquot out of the mount's dqlist as well as the hashlist.
+ * This is called via unmount as well as quotaoff, and the purge
+ * will always succeed unless there are soft (temp) references
+ * outstanding.
+ *
+ * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
+ * that we're returning! XXXsup - not cool.
+ */
+/* ARGSUSED */
+int
+xfs_qm_dqpurge(
+       xfs_dquot_t     *dqp)
+{
+       xfs_dqhash_t    *qh = dqp->q_hash;
+       xfs_mount_t     *mp = dqp->q_mount;
+
+       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
+       ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
+
+       xfs_dqlock(dqp);
+       /*
+        * We really can't afford to purge a dquot that is
+        * referenced, because these are hard refs.
+        * It shouldn't happen in general because we went thru _all_ inodes in
+        * dqrele_all_inodes before calling this and didn't let the mountlock go.
+        * However it is possible that we have dquots with temporary
+        * references that are not attached to an inode. e.g. see xfs_setattr().
+        */
+       if (dqp->q_nrefs != 0) {
+               xfs_dqunlock(dqp);
+               mutex_unlock(&dqp->q_hash->qh_lock);
+               return (1);
+       }
+
+       ASSERT(!list_empty(&dqp->q_freelist));
+
+       /*
+        * If we're turning off quotas, we have to make sure that, for
+        * example, we don't delete quota disk blocks while dquots are
+        * in the process of getting written to those disk blocks.
+        * This dquot might well be on AIL, and we can't leave it there
+        * if we're turning off quotas. Basically, we need this flush
+        * lock, and are willing to block on it.
+        */
+       if (!xfs_dqflock_nowait(dqp)) {
+               /*
+                * Block on the flush lock after nudging dquot buffer,
+                * if it is incore.
+                */
+               xfs_qm_dqflock_pushbuf_wait(dqp);
+       }
+
+       /*
+        * XXXIf we're turning this type of quotas off, we don't care
+        * about the dirty metadata sitting in this dquot. OTOH, if
+        * we're unmounting, we do care, so we flush it and wait.
+        */
+       if (XFS_DQ_IS_DIRTY(dqp)) {
+               int     error;
+
+               /* dqflush unlocks dqflock */
+               /*
+                * Given that dqpurge is a very rare occurrence, it is OK
+                * that we're holding the hashlist and mplist locks
+                * across the disk write. But, ... XXXsup
+                *
+                * We don't care about getting disk errors here. We need
+                * to purge this dquot anyway, so we go ahead regardless.
+                */
+               error = xfs_qm_dqflush(dqp, SYNC_WAIT);
+               if (error)
+                       xfs_warn(mp, "%s: dquot %p flush failed",
+                               __func__, dqp);
+               xfs_dqflock(dqp);
+       }
+       ASSERT(atomic_read(&dqp->q_pincount) == 0);
+       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+
+       list_del_init(&dqp->q_hashlist);
+       qh->qh_version++;
+       list_del_init(&dqp->q_mplist);
+       mp->m_quotainfo->qi_dqreclaims++;
+       mp->m_quotainfo->qi_dquots--;
+       /*
+        * XXX Move this to the front of the freelist, if we can get the
+        * freelist lock.
+        */
+       ASSERT(!list_empty(&dqp->q_freelist));
+
+       dqp->q_mount = NULL;
+       dqp->q_hash = NULL;
+       dqp->dq_flags = XFS_DQ_INACTIVE;
+       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+       xfs_dqfunlock(dqp);
+       xfs_dqunlock(dqp);
+       mutex_unlock(&qh->qh_lock);
+       return (0);
+}
+
+
+/*
+ * Give the buffer a little push if it is incore and
+ * wait on the flush lock.
+ */
+void
+xfs_qm_dqflock_pushbuf_wait(
+       xfs_dquot_t     *dqp)
+{
+       xfs_mount_t     *mp = dqp->q_mount;
+       xfs_buf_t       *bp;
+
+       /*
+        * Check to see if the dquot has been flushed delayed
+        * write.  If so, grab its buffer and send it
+        * out immediately.  We'll be able to acquire
+        * the flush lock when the I/O completes.
+        */
+       bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
+                       mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+       if (!bp)
+               goto out_lock;
+
+       if (XFS_BUF_ISDELAYWRITE(bp)) {
+               if (xfs_buf_ispinned(bp))
+                       xfs_log_force(mp, 0);
+               xfs_buf_delwri_promote(bp);
+               wake_up_process(bp->b_target->bt_task);
+       }
+       xfs_buf_relse(bp);
+out_lock:
+       xfs_dqflock(dqp);
+}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
new file mode 100644 (file)
index 0000000..34b7e94
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DQUOT_H__
+#define __XFS_DQUOT_H__
+
+/*
+ * Dquots are structures that hold quota information about a user or a group,
+ * much like inodes are for files. In fact, dquots share many characteristics
+ * with inodes. However, dquots can also be a centralized resource, relative
+ * to a collection of inodes. In this respect, dquots share some characteristics
+ * of the superblock.
+ * XFS dquots exploit both those in its algorithms. They make every attempt
+ * to not be a bottleneck when quotas are on and have minimal impact, if any,
+ * when quotas are off.
+ */
+
+/*
+ * The hash chain headers (hash buckets)
+ */
+typedef struct xfs_dqhash {
+       struct list_head  qh_list;
+       struct mutex      qh_lock;
+       uint              qh_version;   /* ever increasing version */
+       uint              qh_nelems;    /* number of dquots on the list */
+} xfs_dqhash_t;
+
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * The incore dquot structure
+ */
+typedef struct xfs_dquot {
+       uint             dq_flags;      /* various flags (XFS_DQ_*) */
+       struct list_head q_freelist;    /* global free list of dquots */
+       struct list_head q_mplist;      /* mount's list of dquots */
+       struct list_head q_hashlist;    /* gloabl hash list of dquots */
+       xfs_dqhash_t    *q_hash;        /* the hashchain header */
+       struct xfs_mount*q_mount;       /* filesystem this relates to */
+       struct xfs_trans*q_transp;      /* trans this belongs to currently */
+       uint             q_nrefs;       /* # active refs from inodes */
+       xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
+       int              q_bufoffset;   /* off of dq in buffer (# dquots) */
+       xfs_fileoff_t    q_fileoffset;  /* offset in quotas file */
+
+       struct xfs_dquot*q_gdquot;      /* group dquot, hint only */
+       xfs_disk_dquot_t q_core;        /* actual usage & quotas */
+       xfs_dq_logitem_t q_logitem;     /* dquot log item */
+       xfs_qcnt_t       q_res_bcount;  /* total regular nblks used+reserved */
+       xfs_qcnt_t       q_res_icount;  /* total inos allocd+reserved */
+       xfs_qcnt_t       q_res_rtbcount;/* total realtime blks used+reserved */
+       struct mutex     q_qlock;       /* quota lock */
+       struct completion q_flush;      /* flush completion queue */
+       atomic_t          q_pincount;   /* dquot pin count */
+       wait_queue_head_t q_pinwait;    /* dquot pinning wait queue */
+} xfs_dquot_t;
+
+/*
+ * Lock hierarchy for q_qlock:
+ *     XFS_QLOCK_NORMAL is the implicit default,
+ *     XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
+ */
+enum {
+       XFS_QLOCK_NORMAL = 0,
+       XFS_QLOCK_NESTED,
+};
+
+#define XFS_DQHOLD(dqp)                ((dqp)->q_nrefs++)
+
+/*
+ * Manage the q_flush completion queue embedded in the dquot.  This completion
+ * queue synchronizes processes attempting to flush the in-core dquot back to
+ * disk.
+ */
+static inline void xfs_dqflock(xfs_dquot_t *dqp)
+{
+       wait_for_completion(&dqp->q_flush);
+}
+
+static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
+{
+       return try_wait_for_completion(&dqp->q_flush);
+}
+
+static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
+{
+       complete(&dqp->q_flush);
+}
+
+#define XFS_DQ_IS_LOCKED(dqp)  (mutex_is_locked(&((dqp)->q_qlock)))
+#define XFS_DQ_IS_DIRTY(dqp)   ((dqp)->dq_flags & XFS_DQ_DIRTY)
+#define XFS_QM_ISUDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_USER)
+#define XFS_QM_ISPDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_PROJ)
+#define XFS_QM_ISGDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_GROUP)
+#define XFS_DQ_TO_QINF(dqp)    ((dqp)->q_mount->m_quotainfo)
+#define XFS_DQ_TO_QIP(dqp)     (XFS_QM_ISUDQ(dqp) ? \
+                                XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
+                                XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
+
+#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
+                                    (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
+                                    (XFS_IS_OQUOTA_ON((d)->q_mount))))
+
+extern void            xfs_qm_dqdestroy(xfs_dquot_t *);
+extern int             xfs_qm_dqflush(xfs_dquot_t *, uint);
+extern int             xfs_qm_dqpurge(xfs_dquot_t *);
+extern void            xfs_qm_dqunpin_wait(xfs_dquot_t *);
+extern int             xfs_qm_dqlock_nowait(xfs_dquot_t *);
+extern void            xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
+extern void            xfs_qm_adjust_dqtimers(xfs_mount_t *,
+                                       xfs_disk_dquot_t *);
+extern void            xfs_qm_adjust_dqlimits(xfs_mount_t *,
+                                       xfs_disk_dquot_t *);
+extern int             xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
+                                       xfs_dqid_t, uint, uint, xfs_dquot_t **);
+extern void            xfs_qm_dqput(xfs_dquot_t *);
+extern void            xfs_dqlock(xfs_dquot_t *);
+extern void            xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
+extern void            xfs_dqunlock(xfs_dquot_t *);
+extern void            xfs_dqunlock_nonotify(xfs_dquot_t *);
+
+#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
new file mode 100644 (file)
index 0000000..9e0e2fa
--- /dev/null
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+
+static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
+{
+       return container_of(lip, struct xfs_dq_logitem, qli_item);
+}
+
+/*
+ * returns the number of iovecs needed to log the given dquot item.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_size(
+       struct xfs_log_item     *lip)
+{
+       /*
+        * we need only two iovecs, one for the format, one for the real thing
+        */
+       return 2;
+}
+
+/*
+ * fills in the vector of log iovecs for the given dquot log item.
+ */
+STATIC void
+xfs_qm_dquot_logitem_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_iovec    *logvec)
+{
+       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
+
+       logvec->i_addr = &qlip->qli_format;
+       logvec->i_len  = sizeof(xfs_dq_logformat_t);
+       logvec->i_type = XLOG_REG_TYPE_QFORMAT;
+       logvec++;
+       logvec->i_addr = &qlip->qli_dquot->q_core;
+       logvec->i_len  = sizeof(xfs_disk_dquot_t);
+       logvec->i_type = XLOG_REG_TYPE_DQUOT;
+
+       ASSERT(2 == lip->li_desc->lid_size);
+       qlip->qli_format.qlf_size = 2;
+
+}
+
+/*
+ * Increment the pin count of the given dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pin(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       atomic_inc(&dqp->q_pincount);
+}
+
+/*
+ * Decrement the pin count of the given dquot, and wake up
+ * anyone in xfs_dqwait_unpin() if the count goes to 0.         The
+ * dquot must have been previously pinned with a call to
+ * xfs_qm_dquot_logitem_pin().
+ */
+STATIC void
+xfs_qm_dquot_logitem_unpin(
+       struct xfs_log_item     *lip,
+       int                     remove)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(atomic_read(&dqp->q_pincount) > 0);
+       if (atomic_dec_and_test(&dqp->q_pincount))
+               wake_up(&dqp->q_pinwait);
+}
+
+/*
+ * Given the logitem, this writes the corresponding dquot entry to disk
+ * asynchronously. This is called with the dquot entry securely locked;
+ * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
+ * at the end.
+ */
+STATIC void
+xfs_qm_dquot_logitem_push(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+       int                     error;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(!completion_done(&dqp->q_flush));
+
+       /*
+        * Since we were able to lock the dquot's flush lock and
+        * we found it on the AIL, the dquot must be dirty.  This
+        * is because the dquot is removed from the AIL while still
+        * holding the flush lock in xfs_dqflush_done().  Thus, if
+        * we found it in the AIL and were able to obtain the flush
+        * lock without sleeping, then there must not have been
+        * anyone in the process of flushing the dquot.
+        */
+       error = xfs_qm_dqflush(dqp, 0);
+       if (error)
+               xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
+                       __func__, error, dqp);
+       xfs_dqunlock(dqp);
+}
+
+STATIC xfs_lsn_t
+xfs_qm_dquot_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       /*
+        * We always re-log the entire dquot when it becomes dirty,
+        * so, the latest copy _is_ the only one that matters.
+        */
+       return lsn;
+}
+
+/*
+ * This is called to wait for the given dquot to be unpinned.
+ * Most of these pin/unpin routines are plagiarized from inode code.
+ */
+void
+xfs_qm_dqunpin_wait(
+       struct xfs_dquot        *dqp)
+{
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       if (atomic_read(&dqp->q_pincount) == 0)
+               return;
+
+       /*
+        * Give the log a push so we don't wait here too long.
+        */
+       xfs_log_force(dqp->q_mount, 0);
+       wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
+}
+
+/*
+ * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
+ * the dquot is locked by us, but the flush lock isn't. So, here we are
+ * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
+ * If so, we want to push it out to help us take this item off the AIL as soon
+ * as possible.
+ *
+ * We must not be holding the AIL lock at this point. Calling incore() to
+ * search the buffer cache can be a time consuming thing, and AIL lock is a
+ * spinlock.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pushbuf(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
+       struct xfs_dquot        *dqp = qlip->qli_dquot;
+       struct xfs_buf          *bp;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       /*
+        * If flushlock isn't locked anymore, chances are that the
+        * inode flush completed and the inode was taken off the AIL.
+        * So, just get out.
+        */
+       if (completion_done(&dqp->q_flush) ||
+           !(lip->li_flags & XFS_LI_IN_AIL)) {
+               xfs_dqunlock(dqp);
+               return;
+       }
+
+       bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
+                       dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+       xfs_dqunlock(dqp);
+       if (!bp)
+               return;
+       if (XFS_BUF_ISDELAYWRITE(bp))
+               xfs_buf_delwri_promote(bp);
+       xfs_buf_relse(bp);
+}
+
+/*
+ * This is called to attempt to lock the dquot associated with this
+ * dquot log item.  Don't sleep on the dquot lock or the flush lock.
+ * If the flush lock is already held, indicating that the dquot has
+ * been or is in the process of being flushed, then see if we can
+ * find the dquot's buffer in the buffer cache without sleeping.  If
+ * we can and it is marked delayed write, then we want to send it out.
+ * We delay doing so until the push routine, though, to avoid sleeping
+ * in any device strategy routines.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_trylock(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       if (atomic_read(&dqp->q_pincount) > 0)
+               return XFS_ITEM_PINNED;
+
+       if (!xfs_qm_dqlock_nowait(dqp))
+               return XFS_ITEM_LOCKED;
+
+       if (!xfs_dqflock_nowait(dqp)) {
+               /*
+                * dquot has already been flushed to the backing buffer,
+                * leave it locked, pushbuf routine will unlock it.
+                */
+               return XFS_ITEM_PUSHBUF;
+       }
+
+       ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+       return XFS_ITEM_SUCCESS;
+}
+
+/*
+ * Unlock the dquot associated with the log item.
+ * Clear the fields of the dquot and dquot log item that
+ * are specific to the current transaction.  If the
+ * hold flags is set, do not unlock the dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_unlock(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       /*
+        * Clear the transaction pointer in the dquot
+        */
+       dqp->q_transp = NULL;
+
+       /*
+        * dquots are never 'held' from getting unlocked at the end of
+        * a transaction.  Their locking and unlocking is hidden inside the
+        * transaction layer, within trans_commit. Hence, no LI_HOLD flag
+        * for the logitem.
+        */
+       xfs_dqunlock(dqp);
+}
+
+/*
+ * this needs to stamp an lsn into the dquot, I think.
+ * rpc's that look at user dquot's would then have to
+ * push on the dependency recorded in the dquot
+ */
+STATIC void
+xfs_qm_dquot_logitem_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+}
+
+/*
+ * This is the ops vector for dquots
+ */
+static struct xfs_item_ops xfs_dquot_item_ops = {
+       .iop_size       = xfs_qm_dquot_logitem_size,
+       .iop_format     = xfs_qm_dquot_logitem_format,
+       .iop_pin        = xfs_qm_dquot_logitem_pin,
+       .iop_unpin      = xfs_qm_dquot_logitem_unpin,
+       .iop_trylock    = xfs_qm_dquot_logitem_trylock,
+       .iop_unlock     = xfs_qm_dquot_logitem_unlock,
+       .iop_committed  = xfs_qm_dquot_logitem_committed,
+       .iop_push       = xfs_qm_dquot_logitem_push,
+       .iop_pushbuf    = xfs_qm_dquot_logitem_pushbuf,
+       .iop_committing = xfs_qm_dquot_logitem_committing
+};
+
+/*
+ * Initialize the dquot log item for a newly allocated dquot.
+ * The dquot isn't locked at this point, but it isn't on any of the lists
+ * either, so we don't care.
+ */
+void
+xfs_qm_dquot_logitem_init(
+       struct xfs_dquot        *dqp)
+{
+       struct xfs_dq_logitem   *lp = &dqp->q_logitem;
+
+       xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
+                                       &xfs_dquot_item_ops);
+       lp->qli_dquot = dqp;
+       lp->qli_format.qlf_type = XFS_LI_DQUOT;
+       lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
+       lp->qli_format.qlf_blkno = dqp->q_blkno;
+       lp->qli_format.qlf_len = 1;
+       /*
+        * This is just the offset of this dquot within its buffer
+        * (which is currently 1 FSB and probably won't change).
+        * Hence 32 bits for this offset should be just fine.
+        * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
+        * here, and recompute it at recovery time.
+        */
+       lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
+}
+
+/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
+
+static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
+{
+       return container_of(lip, struct xfs_qoff_logitem, qql_item);
+}
+
+
+/*
+ * This returns the number of iovecs needed to log the given quotaoff item.
+ * We only need 1 iovec for an quotaoff item.  It just logs the
+ * quotaoff_log_format structure.
+ */
+STATIC uint
+xfs_qm_qoff_logitem_size(
+       struct xfs_log_item     *lip)
+{
+       return 1;
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given quotaoff log item. We use only 1 iovec, and we point that
+ * at the quotaoff_log_format structure embedded in the quotaoff item.
+ * It is at this point that we assert that all of the extent
+ * slots in the quotaoff item have been filled.
+ */
+STATIC void
+xfs_qm_qoff_logitem_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_iovec    *log_vector)
+{
+       struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
+
+       ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
+
+       log_vector->i_addr = &qflip->qql_format;
+       log_vector->i_len = sizeof(xfs_qoff_logitem_t);
+       log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
+       qflip->qql_format.qf_size = 1;
+}
+
+/*
+ * Pinning has no meaning for an quotaoff item, so just return.
+ */
+STATIC void
+xfs_qm_qoff_logitem_pin(
+       struct xfs_log_item     *lip)
+{
+}
+
+/*
+ * Since pinning has no meaning for an quotaoff item, unpinning does
+ * not either.
+ */
+STATIC void
+xfs_qm_qoff_logitem_unpin(
+       struct xfs_log_item     *lip,
+       int                     remove)
+{
+}
+
+/*
+ * Quotaoff items have no locking, so just return success.
+ */
+STATIC uint
+xfs_qm_qoff_logitem_trylock(
+       struct xfs_log_item     *lip)
+{
+       return XFS_ITEM_LOCKED;
+}
+
+/*
+ * Quotaoff items have no locking or pushing, so return failure
+ * so that the caller doesn't bother with us.
+ */
+STATIC void
+xfs_qm_qoff_logitem_unlock(
+       struct xfs_log_item     *lip)
+{
+}
+
+/*
+ * The quotaoff-start-item is logged only once and cannot be moved in the log,
+ * so simply return the lsn at which it's been logged.
+ */
+STATIC xfs_lsn_t
+xfs_qm_qoff_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       return lsn;
+}
+
+/*
+ * There isn't much you can do to push on an quotaoff item.  It is simply
+ * stuck waiting for the log to be flushed to disk.
+ */
+STATIC void
+xfs_qm_qoff_logitem_push(
+       struct xfs_log_item     *lip)
+{
+}
+
+
+STATIC xfs_lsn_t
+xfs_qm_qoffend_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
+       struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
+       struct xfs_ail          *ailp = qfs->qql_item.li_ailp;
+
+       /*
+        * Delete the qoff-start logitem from the AIL.
+        * xfs_trans_ail_delete() drops the AIL lock.
+        */
+       spin_lock(&ailp->xa_lock);
+       xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
+
+       kmem_free(qfs);
+       kmem_free(qfe);
+       return (xfs_lsn_t)-1;
+}
+
+/*
+ * XXX rcc - don't know quite what to do with this.  I think we can
+ * just ignore it.  The only time that isn't the case is if we allow
+ * the client to somehow see that quotas have been turned off in which
+ * we can't allow that to get back until the quotaoff hits the disk.
+ * So how would that happen?  Also, do we need different routines for
+ * quotaoff start and quotaoff end?  I suspect the answer is yes but
+ * to be sure, I need to look at the recovery code and see how quota off
+ * recovery is handled (do we roll forward or back or do something else).
+ * If we roll forwards or backwards, then we need two separate routines,
+ * one that does nothing and one that stamps in the lsn that matters
+ * (truly makes the quotaoff irrevocable).  If we do something else,
+ * then maybe we don't need two.
+ */
+STATIC void
+xfs_qm_qoff_logitem_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               commit_lsn)
+{
+}
+
+static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+       .iop_size       = xfs_qm_qoff_logitem_size,
+       .iop_format     = xfs_qm_qoff_logitem_format,
+       .iop_pin        = xfs_qm_qoff_logitem_pin,
+       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
+       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
+       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
+       .iop_committed  = xfs_qm_qoffend_logitem_committed,
+       .iop_push       = xfs_qm_qoff_logitem_push,
+       .iop_committing = xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * This is the ops vector shared by all quotaoff-start log items.
+ */
+static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+       .iop_size       = xfs_qm_qoff_logitem_size,
+       .iop_format     = xfs_qm_qoff_logitem_format,
+       .iop_pin        = xfs_qm_qoff_logitem_pin,
+       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
+       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
+       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
+       .iop_committed  = xfs_qm_qoff_logitem_committed,
+       .iop_push       = xfs_qm_qoff_logitem_push,
+       .iop_committing = xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * Allocate and initialize an quotaoff item of the correct quota type(s).
+ */
+struct xfs_qoff_logitem *
+xfs_qm_qoff_logitem_init(
+       struct xfs_mount        *mp,
+       struct xfs_qoff_logitem *start,
+       uint                    flags)
+{
+       struct xfs_qoff_logitem *qf;
+
+       qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
+
+       xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
+                       &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
+       qf->qql_item.li_mountp = mp;
+       qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
+       qf->qql_format.qf_flags = flags;
+       qf->qql_start_lip = start;
+       return qf;
+}
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
new file mode 100644 (file)
index 0000000..5acae2a
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DQUOT_ITEM_H__
+#define __XFS_DQUOT_ITEM_H__
+
+struct xfs_dquot;
+struct xfs_trans;
+struct xfs_mount;
+struct xfs_qoff_logitem;
+
+typedef struct xfs_dq_logitem {
+       xfs_log_item_t           qli_item;         /* common portion */
+       struct xfs_dquot        *qli_dquot;        /* dquot ptr */
+       xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
+       xfs_dq_logformat_t       qli_format;       /* logged structure */
+} xfs_dq_logitem_t;
+
+typedef struct xfs_qoff_logitem {
+       xfs_log_item_t           qql_item;      /* common portion */
+       struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+       xfs_qoff_logformat_t     qql_format;    /* logged structure */
+} xfs_qoff_logitem_t;
+
+
+extern void               xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
+                                       struct xfs_qoff_logitem *, uint);
+extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
+                                       struct xfs_qoff_logitem *, uint);
+extern void               xfs_trans_log_quotaoff_item(struct xfs_trans *,
+                                       struct xfs_qoff_logitem *);
+
+#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
new file mode 100644 (file)
index 0000000..75e5d32
--- /dev/null
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_mount.h"
+#include "xfs_export.h"
+#include "xfs_vnodeops.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+
+/*
+ * Note that we only accept fileids which are long enough rather than allow
+ * the parent generation number to default to zero.  XFS considers zero a
+ * valid generation number not an invalid/wildcard value.
+ */
+static int xfs_fileid_length(int fileid_type)
+{
+       switch (fileid_type) {
+       case FILEID_INO32_GEN:
+               return 2;
+       case FILEID_INO32_GEN_PARENT:
+               return 4;
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               return 3;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               return 6;
+       }
+       return 255; /* invalid */
+}
+
+STATIC int
+xfs_fs_encode_fh(
+       struct dentry           *dentry,
+       __u32                   *fh,
+       int                     *max_len,
+       int                     connectable)
+{
+       struct fid              *fid = (struct fid *)fh;
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fh;
+       struct inode            *inode = dentry->d_inode;
+       int                     fileid_type;
+       int                     len;
+
+       /* Directories don't need their parent encoded, they have ".." */
+       if (S_ISDIR(inode->i_mode) || !connectable)
+               fileid_type = FILEID_INO32_GEN;
+       else
+               fileid_type = FILEID_INO32_GEN_PARENT;
+
+       /*
+        * If the the filesystem may contain 64bit inode numbers, we need
+        * to use larger file handles that can represent them.
+        *
+        * While we only allocate inodes that do not fit into 32 bits any
+        * large enough filesystem may contain them, thus the slightly
+        * confusing looking conditional below.
+        */
+       if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
+           (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
+               fileid_type |= XFS_FILEID_TYPE_64FLAG;
+
+       /*
+        * Only encode if there is enough space given.  In practice
+        * this means we can't export a filesystem with 64bit inodes
+        * over NFSv2 with the subtree_check export option; the other
+        * seven combinations work.  The real answer is "don't use v2".
+        */
+       len = xfs_fileid_length(fileid_type);
+       if (*max_len < len) {
+               *max_len = len;
+               return 255;
+       }
+       *max_len = len;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+               spin_lock(&dentry->d_lock);
+               fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
+               fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
+               spin_unlock(&dentry->d_lock);
+               /*FALLTHRU*/
+       case FILEID_INO32_GEN:
+               fid->i32.ino = inode->i_ino;
+               fid->i32.gen = inode->i_generation;
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               spin_lock(&dentry->d_lock);
+               fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
+               fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
+               spin_unlock(&dentry->d_lock);
+               /*FALLTHRU*/
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               fid64->ino = inode->i_ino;
+               fid64->gen = inode->i_generation;
+               break;
+       }
+
+       return fileid_type;
+}
+
+STATIC struct inode *
+xfs_nfs_get_inode(
+       struct super_block      *sb,
+       u64                     ino,
+       u32                     generation)
+ {
+       xfs_mount_t             *mp = XFS_M(sb);
+       xfs_inode_t             *ip;
+       int                     error;
+
+       /*
+        * NFS can sometimes send requests for ino 0.  Fail them gracefully.
+        */
+       if (ino == 0)
+               return ERR_PTR(-ESTALE);
+
+       /*
+        * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
+        * fine and not an indication of a corrupted filesystem as clients can
+        * send invalid file handles and we have to handle it gracefully..
+        */
+       error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
+       if (error) {
+               /*
+                * EINVAL means the inode cluster doesn't exist anymore.
+                * This implies the filehandle is stale, so we should
+                * translate it here.
+                * We don't use ESTALE directly down the chain to not
+                * confuse applications using bulkstat that expect EINVAL.
+                */
+               if (error == EINVAL || error == ENOENT)
+                       error = ESTALE;
+               return ERR_PTR(-error);
+       }
+
+       if (ip->i_d.di_gen != generation) {
+               IRELE(ip);
+               return ERR_PTR(-ESTALE);
+       }
+
+       return VFS_I(ip);
+}
+
+STATIC struct dentry *
+xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+                int fh_len, int fileid_type)
+{
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
+       struct inode            *inode = NULL;
+
+       if (fh_len < xfs_fileid_length(fileid_type))
+               return NULL;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+       case FILEID_INO32_GEN:
+               inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
+               break;
+       }
+
+       return d_obtain_alias(inode);
+}
+
+STATIC struct dentry *
+xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
+                int fh_len, int fileid_type)
+{
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
+       struct inode            *inode = NULL;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+               inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
+                                             fid->i32.parent_gen);
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
+                                             fid64->parent_gen);
+               break;
+       }
+
+       return d_obtain_alias(inode);
+}
+
+STATIC struct dentry *
+xfs_fs_get_parent(
+       struct dentry           *child)
+{
+       int                     error;
+       struct xfs_inode        *cip;
+
+       error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
+       if (unlikely(error))
+               return ERR_PTR(-error);
+
+       return d_obtain_alias(VFS_I(cip));
+}
+
+STATIC int
+xfs_fs_nfs_commit_metadata(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     error = 0;
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       if (xfs_ipincount(ip)) {
+               error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
+                               XFS_LOG_SYNC, NULL);
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       return error;
+}
+
+const struct export_operations xfs_export_operations = {
+       .encode_fh              = xfs_fs_encode_fh,
+       .fh_to_dentry           = xfs_fs_fh_to_dentry,
+       .fh_to_parent           = xfs_fs_fh_to_parent,
+       .get_parent             = xfs_fs_get_parent,
+       .commit_metadata        = xfs_fs_nfs_commit_metadata,
+};
diff --git a/fs/xfs/xfs_export.h b/fs/xfs/xfs_export.h
new file mode 100644 (file)
index 0000000..3272b6a
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_EXPORT_H__
+#define __XFS_EXPORT_H__
+
+/*
+ * Common defines for code related to exporting XFS filesystems over NFS.
+ *
+ * The NFS fileid goes out on the wire as an array of
+ * 32bit unsigned ints in host order.  There are 5 possible
+ * formats.
+ *
+ * (1) fileid_type=0x00
+ *     (no fileid data; handled by the generic code)
+ *
+ * (2) fileid_type=0x01
+ *     inode-num
+ *     generation
+ *
+ * (3) fileid_type=0x02
+ *     inode-num
+ *     generation
+ *     parent-inode-num
+ *     parent-generation
+ *
+ * (4) fileid_type=0x81
+ *     inode-num-lo32
+ *     inode-num-hi32
+ *     generation
+ *
+ * (5) fileid_type=0x82
+ *     inode-num-lo32
+ *     inode-num-hi32
+ *     generation
+ *     parent-inode-num-lo32
+ *     parent-inode-num-hi32
+ *     parent-generation
+ *
+ * Note, the NFS filehandle also includes an fsid portion which
+ * may have an inode number in it.  That number is hardcoded to
+ * 32bits and there is no way for XFS to intercept it.  In
+ * practice this means when exporting an XFS filesystem with 64bit
+ * inodes you should either export the mountpoint (rather than
+ * a subdirectory) or use the "fsid" export option.
+ */
+
+struct xfs_fid64 {
+       u64 ino;
+       u32 gen;
+       u64 parent_ino;
+       u32 parent_gen;
+} __attribute__((packed));
+
+/* This flag goes on the wire.  Don't play with it. */
+#define XFS_FILEID_TYPE_64FLAG 0x80    /* NFS fileid has 64bit inodes */
+
+#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
new file mode 100644 (file)
index 0000000..7f7b424
--- /dev/null
@@ -0,0 +1,1096 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_vnodeops.h"
+#include "xfs_da_btree.h"
+#include "xfs_ioctl.h"
+#include "xfs_trace.h"
+
+#include <linux/dcache.h>
+#include <linux/falloc.h>
+
+static const struct vm_operations_struct xfs_file_vm_ops;
+
+/*
+ * Locking primitives for read and write IO paths to ensure we consistently use
+ * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
+ */
+static inline void
+xfs_rw_ilock(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_lock(&VFS_I(ip)->i_mutex);
+       xfs_ilock(ip, type);
+}
+
+static inline void
+xfs_rw_iunlock(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       xfs_iunlock(ip, type);
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+static inline void
+xfs_rw_ilock_demote(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       xfs_ilock_demote(ip, type);
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+/*
+ *     xfs_iozero
+ *
+ *     xfs_iozero clears the specified range of buffer supplied,
+ *     and marks all the affected blocks as valid and modified.  If
+ *     an affected block is not allocated, it will be allocated.  If
+ *     an affected block is not completely overwritten, and is not
+ *     valid before the operation, it will be read from disk before
+ *     being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+       struct xfs_inode        *ip,    /* inode                        */
+       loff_t                  pos,    /* offset in file               */
+       size_t                  count)  /* size of data to zero         */
+{
+       struct page             *page;
+       struct address_space    *mapping;
+       int                     status;
+
+       mapping = VFS_I(ip)->i_mapping;
+       do {
+               unsigned offset, bytes;
+               void *fsdata;
+
+               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+               bytes = PAGE_CACHE_SIZE - offset;
+               if (bytes > count)
+                       bytes = count;
+
+               status = pagecache_write_begin(NULL, mapping, pos, bytes,
+                                       AOP_FLAG_UNINTERRUPTIBLE,
+                                       &page, &fsdata);
+               if (status)
+                       break;
+
+               zero_user(page, offset, bytes);
+
+               status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
+                                       page, fsdata);
+               WARN_ON(status <= 0); /* can't return less than zero! */
+               pos += bytes;
+               count -= bytes;
+               status = 0;
+       } while (count);
+
+       return (-status);
+}
+
+STATIC int
+xfs_file_fsync(
+       struct file             *file,
+       loff_t                  start,
+       loff_t                  end,
+       int                     datasync)
+{
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error = 0;
+       int                     log_flushed = 0;
+
+       trace_xfs_file_fsync(ip);
+
+       error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+       if (error)
+               return error;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       xfs_ioend_wait(ip);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+       if (mp->m_flags & XFS_MOUNT_BARRIER) {
+               /*
+                * If we have an RT and/or log subvolume we need to make sure
+                * to flush the write cache the device used for file data
+                * first.  This is to ensure newly written file data make
+                * it to disk before logging the new inode size in case of
+                * an extending write.
+                */
+               if (XFS_IS_REALTIME_INODE(ip))
+                       xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+               else if (mp->m_logdev_targp != mp->m_ddev_targp)
+                       xfs_blkdev_issue_flush(mp->m_ddev_targp);
+       }
+
+       /*
+        * We always need to make sure that the required inode state is safe on
+        * disk.  The inode might be clean but we still might need to force the
+        * log because of committed transactions that haven't hit the disk yet.
+        * Likewise, there could be unflushed non-transactional changes to the
+        * inode core that have to go to disk and this requires us to issue
+        * a synchronous transaction to capture these changes correctly.
+        *
+        * This code relies on the assumption that if the i_update_core field
+        * of the inode is clear and the inode is unpinned then it is clean
+        * and no action is required.
+        */
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+       /*
+        * First check if the VFS inode is marked dirty.  All the dirtying
+        * of non-transactional updates no goes through mark_inode_dirty*,
+        * which allows us to distinguish beteeen pure timestamp updates
+        * and i_size updates which need to be caught for fdatasync.
+        * After that also theck for the dirty state in the XFS inode, which
+        * might gets cleared when the inode gets written out via the AIL
+        * or xfs_iflush_cluster.
+        */
+       if (((inode->i_state & I_DIRTY_DATASYNC) ||
+           ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
+           ip->i_update_core) {
+               /*
+                * Kick off a transaction to log the inode core to get the
+                * updates.  The sync transaction will also force the log.
+                */
+               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+               error = xfs_trans_reserve(tp, 0,
+                               XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+               if (error) {
+                       xfs_trans_cancel(tp, 0);
+                       return -error;
+               }
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+               /*
+                * Note - it's possible that we might have pushed ourselves out
+                * of the way during trans_reserve which would flush the inode.
+                * But there's no guarantee that the inode buffer has actually
+                * gone out yet (it's delwri).  Plus the buffer could be pinned
+                * anyway if it's part of an inode in another recent
+                * transaction.  So we play it safe and fire off the
+                * transaction anyway.
+                */
+               xfs_trans_ijoin(tp, ip);
+               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+               xfs_trans_set_sync(tp);
+               error = _xfs_trans_commit(tp, 0, &log_flushed);
+
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       } else {
+               /*
+                * Timestamps/size haven't changed since last inode flush or
+                * inode transaction commit.  That means either nothing got
+                * written or a transaction committed which caught the updates.
+                * If the latter happened and the transaction hasn't hit the
+                * disk yet, the inode will be still be pinned.  If it is,
+                * force the log.
+                */
+               if (xfs_ipincount(ip)) {
+                       error = _xfs_log_force_lsn(mp,
+                                       ip->i_itemp->ili_last_lsn,
+                                       XFS_LOG_SYNC, &log_flushed);
+               }
+               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       }
+
+       /*
+        * If we only have a single device, and the log force about was
+        * a no-op we might have to flush the data device cache here.
+        * This can only happen for fdatasync/O_DSYNC if we were overwriting
+        * an already allocated file and thus do not have any metadata to
+        * commit.
+        */
+       if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
+           mp->m_logdev_targp == mp->m_ddev_targp &&
+           !XFS_IS_REALTIME_INODE(ip) &&
+           !log_flushed)
+               xfs_blkdev_issue_flush(mp->m_ddev_targp);
+
+       return -error;
+}
+
+STATIC ssize_t
+xfs_file_aio_read(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos)
+{
+       struct file             *file = iocb->ki_filp;
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       size_t                  size = 0;
+       ssize_t                 ret = 0;
+       int                     ioflags = 0;
+       xfs_fsize_t             n;
+       unsigned long           seg;
+
+       XFS_STATS_INC(xs_read_calls);
+
+       BUG_ON(iocb->ki_pos != pos);
+
+       if (unlikely(file->f_flags & O_DIRECT))
+               ioflags |= IO_ISDIRECT;
+       if (file->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       /* START copy & waste from filemap.c */
+       for (seg = 0; seg < nr_segs; seg++) {
+               const struct iovec *iv = &iovp[seg];
+
+               /*
+                * If any segment has a negative length, or the cumulative
+                * length ever wraps negative then return -EINVAL.
+                */
+               size += iv->iov_len;
+               if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+                       return XFS_ERROR(-EINVAL);
+       }
+       /* END copy & waste from filemap.c */
+
+       if (unlikely(ioflags & IO_ISDIRECT)) {
+               xfs_buftarg_t   *target =
+                       XFS_IS_REALTIME_INODE(ip) ?
+                               mp->m_rtdev_targp : mp->m_ddev_targp;
+               if ((iocb->ki_pos & target->bt_smask) ||
+                   (size & target->bt_smask)) {
+                       if (iocb->ki_pos == ip->i_size)
+                               return 0;
+                       return -XFS_ERROR(EINVAL);
+               }
+       }
+
+       n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
+       if (n <= 0 || size == 0)
+               return 0;
+
+       if (n < size)
+               size = n;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       if (unlikely(ioflags & IO_ISDIRECT)) {
+               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+
+               if (inode->i_mapping->nrpages) {
+                       ret = -xfs_flushinval_pages(ip,
+                                       (iocb->ki_pos & PAGE_CACHE_MASK),
+                                       -1, FI_REMAPF_LOCKED);
+                       if (ret) {
+                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
+                               return ret;
+                       }
+               }
+               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+       } else
+               xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+
+       trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
+
+       ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
+       if (ret > 0)
+               XFS_STATS_ADD(xs_read_bytes, ret);
+
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_splice_read(
+       struct file             *infilp,
+       loff_t                  *ppos,
+       struct pipe_inode_info  *pipe,
+       size_t                  count,
+       unsigned int            flags)
+{
+       struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
+       int                     ioflags = 0;
+       ssize_t                 ret;
+
+       XFS_STATS_INC(xs_read_calls);
+
+       if (infilp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+
+       trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
+
+       ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+       if (ret > 0)
+               XFS_STATS_ADD(xs_read_bytes, ret);
+
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       return ret;
+}
+
+STATIC void
+xfs_aio_write_isize_update(
+       struct inode    *inode,
+       loff_t          *ppos,
+       ssize_t         bytes_written)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             isize = i_size_read(inode);
+
+       if (bytes_written > 0)
+               XFS_STATS_ADD(xs_write_bytes, bytes_written);
+
+       if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
+                                       *ppos > isize))
+               *ppos = isize;
+
+       if (*ppos > ip->i_size) {
+               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+               if (*ppos > ip->i_size)
+                       ip->i_size = *ppos;
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+/*
+ * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
+ * part of the I/O may have been written to disk before the error occurred.  In
+ * this case the on-disk file size may have been adjusted beyond the in-memory
+ * file size and now needs to be truncated back.
+ */
+STATIC void
+xfs_aio_write_newsize_update(
+       struct xfs_inode        *ip)
+{
+       if (ip->i_new_size) {
+               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+               ip->i_new_size = 0;
+               if (ip->i_d.di_size > ip->i_size)
+                       ip->i_d.di_size = ip->i_size;
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+/*
+ * xfs_file_splice_write() does not use xfs_rw_ilock() because
+ * generic_file_splice_write() takes the i_mutex itself. This, in theory,
+ * couuld cause lock inversions between the aio_write path and the splice path
+ * if someone is doing concurrent splice(2) based writes and write(2) based
+ * writes to the same inode. The only real way to fix this is to re-implement
+ * the generic code here with correct locking orders.
+ */
+STATIC ssize_t
+xfs_file_splice_write(
+       struct pipe_inode_info  *pipe,
+       struct file             *outfilp,
+       loff_t                  *ppos,
+       size_t                  count,
+       unsigned int            flags)
+{
+       struct inode            *inode = outfilp->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             new_size;
+       int                     ioflags = 0;
+       ssize_t                 ret;
+
+       XFS_STATS_INC(xs_write_calls);
+
+       if (outfilp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       new_size = *ppos + count;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (new_size > ip->i_size)
+               ip->i_new_size = new_size;
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
+
+       ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
+
+       xfs_aio_write_isize_update(inode, ppos, ret);
+       xfs_aio_write_newsize_update(ip);
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       return ret;
+}
+
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF.  We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int                             /* error (positive) */
+xfs_zero_last_block(
+       xfs_inode_t     *ip,
+       xfs_fsize_t     offset,
+       xfs_fsize_t     isize)
+{
+       xfs_fileoff_t   last_fsb;
+       xfs_mount_t     *mp = ip->i_mount;
+       int             nimaps;
+       int             zero_offset;
+       int             zero_len;
+       int             error = 0;
+       xfs_bmbt_irec_t imap;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       zero_offset = XFS_B_FSB_OFFSET(mp, isize);
+       if (zero_offset == 0) {
+               /*
+                * There are no extra bytes in the last block on disk to
+                * zero, so return.
+                */
+               return 0;
+       }
+
+       last_fsb = XFS_B_TO_FSBT(mp, isize);
+       nimaps = 1;
+       error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
+                         &nimaps, NULL);
+       if (error) {
+               return error;
+       }
+       ASSERT(nimaps > 0);
+       /*
+        * If the block underlying isize is just a hole, then there
+        * is nothing to zero.
+        */
+       if (imap.br_startblock == HOLESTARTBLOCK) {
+               return 0;
+       }
+       /*
+        * Zero the part of the last block beyond the EOF, and write it
+        * out sync.  We need to drop the ilock while we do this so we
+        * don't deadlock when the buffer cache calls back to us.
+        */
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       zero_len = mp->m_sb.sb_blocksize - zero_offset;
+       if (isize + zero_len > offset)
+               zero_len = offset - isize;
+       error = xfs_iozero(ip, isize, zero_len);
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       ASSERT(error >= 0);
+       return error;
+}
+
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF.  This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file.  This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated.  If fill is set,
+ * then any holes in the range are filled and zeroed.  If not, the holes
+ * are left alone as holes.
+ */
+
+int                                    /* error (positive) */
+xfs_zero_eof(
+       xfs_inode_t     *ip,
+       xfs_off_t       offset,         /* starting I/O offset */
+       xfs_fsize_t     isize)          /* current inode size */
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       xfs_fileoff_t   start_zero_fsb;
+       xfs_fileoff_t   end_zero_fsb;
+       xfs_fileoff_t   zero_count_fsb;
+       xfs_fileoff_t   last_fsb;
+       xfs_fileoff_t   zero_off;
+       xfs_fsize_t     zero_len;
+       int             nimaps;
+       int             error = 0;
+       xfs_bmbt_irec_t imap;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+       ASSERT(offset > isize);
+
+       /*
+        * First handle zeroing the block on which isize resides.
+        * We only zero a part of that block so it is handled specially.
+        */
+       error = xfs_zero_last_block(ip, offset, isize);
+       if (error) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+               return error;
+       }
+
+       /*
+        * Calculate the range between the new size and the old
+        * where blocks needing to be zeroed may exist.  To get the
+        * block where the last byte in the file currently resides,
+        * we need to subtract one from the size and truncate back
+        * to a block boundary.  We subtract 1 in case the size is
+        * exactly on a block boundary.
+        */
+       last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+       start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+       end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+       ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+       if (last_fsb == end_zero_fsb) {
+               /*
+                * The size was only incremented on its last block.
+                * We took care of that above, so just return.
+                */
+               return 0;
+       }
+
+       ASSERT(start_zero_fsb <= end_zero_fsb);
+       while (start_zero_fsb <= end_zero_fsb) {
+               nimaps = 1;
+               zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+               error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
+                                 0, NULL, 0, &imap, &nimaps, NULL);
+               if (error) {
+                       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+                       return error;
+               }
+               ASSERT(nimaps > 0);
+
+               if (imap.br_state == XFS_EXT_UNWRITTEN ||
+                   imap.br_startblock == HOLESTARTBLOCK) {
+                       /*
+                        * This loop handles initializing pages that were
+                        * partially initialized by the code below this
+                        * loop. It basically zeroes the part of the page
+                        * that sits on a hole and sets the page as P_HOLE
+                        * and calls remapf if it is a mapped file.
+                        */
+                       start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+                       ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+                       continue;
+               }
+
+               /*
+                * There are blocks we need to zero.
+                * Drop the inode lock while we're doing the I/O.
+                * We'll still have the iolock to protect us.
+                */
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+               zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+               zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+
+               if ((zero_off + zero_len) > offset)
+                       zero_len = offset - zero_off;
+
+               error = xfs_iozero(ip, zero_off, zero_len);
+               if (error) {
+                       goto out_lock;
+               }
+
+               start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+               ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+       }
+
+       return 0;
+
+out_lock:
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       ASSERT(error >= 0);
+       return error;
+}
+
+/*
+ * Common pre-write limit and setup checks.
+ *
+ * Returns with iolock held according to @iolock.
+ */
+STATIC ssize_t
+xfs_file_aio_write_checks(
+       struct file             *file,
+       loff_t                  *pos,
+       size_t                  *count,
+       int                     *iolock)
+{
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             new_size;
+       int                     error = 0;
+
+       error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
+       if (error) {
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
+               *iolock = 0;
+               return error;
+       }
+
+       new_size = *pos + *count;
+       if (new_size > ip->i_size)
+               ip->i_new_size = new_size;
+
+       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+               file_update_time(file);
+
+       /*
+        * If the offset is beyond the size of the file, we need to zero any
+        * blocks that fall between the existing EOF and the start of this
+        * write.
+        */
+       if (*pos > ip->i_size)
+               error = -xfs_zero_eof(ip, *pos, ip->i_size);
+
+       xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       if (error)
+               return error;
+
+       /*
+        * If we're writing the file then make sure to clear the setuid and
+        * setgid bits if the process is not being run by root.  This keeps
+        * people from modifying setuid and setgid binaries.
+        */
+       return file_remove_suid(file);
+
+}
+
+/*
+ * xfs_file_dio_aio_write - handle direct IO writes
+ *
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
+ * By separating it from the buffered write path we remove all the tricky to
+ * follow locking changes and looping.
+ *
+ * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
+ * until we're sure the bytes at the new EOF have been zeroed and/or the cached
+ * pages are flushed out.
+ *
+ * In most cases the direct IO writes will be done holding IOLOCK_SHARED
+ * allowing them to be done in parallel with reads and other direct IO writes.
+ * However, if the IO is not aligned to filesystem blocks, the direct IO layer
+ * needs to do sub-block zeroing and that requires serialisation against other
+ * direct IOs to the same block. In this case we need to serialise the
+ * submission of the unaligned IOs so that we don't get racing block zeroing in
+ * the dio layer.  To avoid the problem with aio, we also need to wait for
+ * outstanding IOs to complete so that unwritten extent conversion is completed
+ * before we try to map the overlapping block. This is currently implemented by
+ * hitting it with a big hammer (i.e. xfs_ioend_wait()).
+ *
+ * Returns with locks held indicated by @iolock and errors indicated by
+ * negative return values.
+ */
+STATIC ssize_t
+xfs_file_dio_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos,
+       size_t                  ocount,
+       int                     *iolock)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       ssize_t                 ret = 0;
+       size_t                  count = ocount;
+       int                     unaligned_io = 0;
+       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
+                                       mp->m_rtdev_targp : mp->m_ddev_targp;
+
+       *iolock = 0;
+       if ((pos & target->bt_smask) || (count & target->bt_smask))
+               return -XFS_ERROR(EINVAL);
+
+       if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
+               unaligned_io = 1;
+
+       if (unaligned_io || mapping->nrpages || pos > ip->i_size)
+               *iolock = XFS_IOLOCK_EXCL;
+       else
+               *iolock = XFS_IOLOCK_SHARED;
+       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+       if (ret)
+               return ret;
+
+       if (mapping->nrpages) {
+               WARN_ON(*iolock != XFS_IOLOCK_EXCL);
+               ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
+                                                       FI_REMAPF_LOCKED);
+               if (ret)
+                       return ret;
+       }
+
+       /*
+        * If we are doing unaligned IO, wait for all other IO to drain,
+        * otherwise demote the lock if we had to flush cached pages
+        */
+       if (unaligned_io)
+               xfs_ioend_wait(ip);
+       else if (*iolock == XFS_IOLOCK_EXCL) {
+               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               *iolock = XFS_IOLOCK_SHARED;
+       }
+
+       trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+       ret = generic_file_direct_write(iocb, iovp,
+                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
+
+       /* No fallback to buffered IO on errors for XFS. */
+       ASSERT(ret < 0 || ret == count);
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_buffered_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos,
+       size_t                  ocount,
+       int                     *iolock)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       ssize_t                 ret;
+       int                     enospc = 0;
+       size_t                  count = ocount;
+
+       *iolock = XFS_IOLOCK_EXCL;
+       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+       if (ret)
+               return ret;
+
+       /* We can write back this queue in page reclaim */
+       current->backing_dev_info = mapping->backing_dev_info;
+
+write_retry:
+       trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
+       ret = generic_file_buffered_write(iocb, iovp, nr_segs,
+                       pos, &iocb->ki_pos, count, ret);
+       /*
+        * if we just got an ENOSPC, flush the inode now we aren't holding any
+        * page locks and retry *once*
+        */
+       if (ret == -ENOSPC && !enospc) {
+               ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
+               if (ret)
+                       return ret;
+               enospc = 1;
+               goto write_retry;
+       }
+       current->backing_dev_info = NULL;
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       ssize_t                 ret;
+       int                     iolock;
+       size_t                  ocount = 0;
+
+       XFS_STATS_INC(xs_write_calls);
+
+       BUG_ON(iocb->ki_pos != pos);
+
+       ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
+       if (ret)
+               return ret;
+
+       if (ocount == 0)
+               return 0;
+
+       xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       if (unlikely(file->f_flags & O_DIRECT))
+               ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
+                                               ocount, &iolock);
+       else
+               ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
+                                               ocount, &iolock);
+
+       xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
+
+       if (ret <= 0)
+               goto out_unlock;
+
+       /* Handle various SYNC-type writes */
+       if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
+               loff_t end = pos + ret - 1;
+               int error;
+
+               xfs_rw_iunlock(ip, iolock);
+               error = xfs_file_fsync(file, pos, end,
+                                     (file->f_flags & __O_SYNC) ? 0 : 1);
+               xfs_rw_ilock(ip, iolock);
+               if (error)
+                       ret = error;
+       }
+
+out_unlock:
+       xfs_aio_write_newsize_update(ip);
+       xfs_rw_iunlock(ip, iolock);
+       return ret;
+}
+
+STATIC long
+xfs_file_fallocate(
+       struct file     *file,
+       int             mode,
+       loff_t          offset,
+       loff_t          len)
+{
+       struct inode    *inode = file->f_path.dentry->d_inode;
+       long            error;
+       loff_t          new_size = 0;
+       xfs_flock64_t   bf;
+       xfs_inode_t     *ip = XFS_I(inode);
+       int             cmd = XFS_IOC_RESVSP;
+       int             attr_flags = XFS_ATTR_NOLOCK;
+
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+               return -EOPNOTSUPP;
+
+       bf.l_whence = 0;
+       bf.l_start = offset;
+       bf.l_len = len;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       if (mode & FALLOC_FL_PUNCH_HOLE)
+               cmd = XFS_IOC_UNRESVSP;
+
+       /* check the new inode size is valid before allocating */
+       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+           offset + len > i_size_read(inode)) {
+               new_size = offset + len;
+               error = inode_newsize_ok(inode, new_size);
+               if (error)
+                       goto out_unlock;
+       }
+
+       if (file->f_flags & O_DSYNC)
+               attr_flags |= XFS_ATTR_SYNC;
+
+       error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
+       if (error)
+               goto out_unlock;
+
+       /* Change file size if needed */
+       if (new_size) {
+               struct iattr iattr;
+
+               iattr.ia_valid = ATTR_SIZE;
+               iattr.ia_size = new_size;
+               error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
+       }
+
+out_unlock:
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       return error;
+}
+
+
+STATIC int
+xfs_file_open(
+       struct inode    *inode,
+       struct file     *file)
+{
+       if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+               return -EFBIG;
+       if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
+               return -EIO;
+       return 0;
+}
+
+STATIC int
+xfs_dir_open(
+       struct inode    *inode,
+       struct file     *file)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       int             mode;
+       int             error;
+
+       error = xfs_file_open(inode, file);
+       if (error)
+               return error;
+
+       /*
+        * If there are any blocks, read-ahead block 0 as we're almost
+        * certain to have the next operation be a read there.
+        */
+       mode = xfs_ilock_map_shared(ip);
+       if (ip->i_d.di_nextents > 0)
+               xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
+       xfs_iunlock(ip, mode);
+       return 0;
+}
+
+STATIC int
+xfs_file_release(
+       struct inode    *inode,
+       struct file     *filp)
+{
+       return -xfs_release(XFS_I(inode));
+}
+
+STATIC int
+xfs_file_readdir(
+       struct file     *filp,
+       void            *dirent,
+       filldir_t       filldir)
+{
+       struct inode    *inode = filp->f_path.dentry->d_inode;
+       xfs_inode_t     *ip = XFS_I(inode);
+       int             error;
+       size_t          bufsize;
+
+       /*
+        * The Linux API doesn't pass down the total size of the buffer
+        * we read into down to the filesystem.  With the filldir concept
+        * it's not needed for correct information, but the XFS dir2 leaf
+        * code wants an estimate of the buffer size to calculate it's
+        * readahead window and size the buffers used for mapping to
+        * physical blocks.
+        *
+        * Try to give it an estimate that's good enough, maybe at some
+        * point we can change the ->readdir prototype to include the
+        * buffer size.  For now we use the current glibc buffer size.
+        */
+       bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
+
+       error = xfs_readdir(ip, dirent, bufsize,
+                               (xfs_off_t *)&filp->f_pos, filldir);
+       if (error)
+               return -error;
+       return 0;
+}
+
+STATIC int
+xfs_file_mmap(
+       struct file     *filp,
+       struct vm_area_struct *vma)
+{
+       vma->vm_ops = &xfs_file_vm_ops;
+       vma->vm_flags |= VM_CAN_NONLINEAR;
+
+       file_accessed(filp);
+       return 0;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made
+ * writable. We can set the page state up correctly for a writable
+ * page, which means we can do correct delalloc accounting (ENOSPC
+ * checking!) and unwritten extent mapping.
+ */
+STATIC int
+xfs_vm_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
+}
+
+const struct file_operations xfs_file_operations = {
+       .llseek         = generic_file_llseek,
+       .read           = do_sync_read,
+       .write          = do_sync_write,
+       .aio_read       = xfs_file_aio_read,
+       .aio_write      = xfs_file_aio_write,
+       .splice_read    = xfs_file_splice_read,
+       .splice_write   = xfs_file_splice_write,
+       .unlocked_ioctl = xfs_file_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = xfs_file_compat_ioctl,
+#endif
+       .mmap           = xfs_file_mmap,
+       .open           = xfs_file_open,
+       .release        = xfs_file_release,
+       .fsync          = xfs_file_fsync,
+       .fallocate      = xfs_file_fallocate,
+};
+
+const struct file_operations xfs_dir_file_operations = {
+       .open           = xfs_dir_open,
+       .read           = generic_read_dir,
+       .readdir        = xfs_file_readdir,
+       .llseek         = generic_file_llseek,
+       .unlocked_ioctl = xfs_file_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = xfs_file_compat_ioctl,
+#endif
+       .fsync          = xfs_file_fsync,
+};
+
+static const struct vm_operations_struct xfs_file_vm_ops = {
+       .fault          = filemap_fault,
+       .page_mkwrite   = xfs_vm_page_mkwrite,
+};
diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c
new file mode 100644 (file)
index 0000000..ed88ed1
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_vnodeops.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+
+/*
+ * note: all filemap functions return negative error codes. These
+ * need to be inverted before returning to the xfs core functions.
+ */
+void
+xfs_tosspages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       int             fiopt)
+{
+       /* can't toss partial tail pages, so mask them out */
+       last &= ~(PAGE_SIZE - 1);
+       truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
+}
+
+int
+xfs_flushinval_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       int             fiopt)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+       int             ret = 0;
+
+       trace_xfs_pagecache_inval(ip, first, last);
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+       ret = filemap_write_and_wait_range(mapping, first,
+                               last == -1 ? LLONG_MAX : last);
+       if (!ret)
+               truncate_inode_pages_range(mapping, first, last);
+       return -ret;
+}
+
+int
+xfs_flush_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       uint64_t        flags,
+       int             fiopt)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+       int             ret = 0;
+       int             ret2;
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+       ret = -filemap_fdatawrite_range(mapping, first,
+                               last == -1 ? LLONG_MAX : last);
+       if (flags & XBF_ASYNC)
+               return ret;
+       ret2 = xfs_wait_on_pages(ip, first, last);
+       if (!ret)
+               ret = ret2;
+       return ret;
+}
+
+int
+xfs_wait_on_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+
+       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+               return -filemap_fdatawait_range(mapping, first,
+                                       last == -1 ? ip->i_size - 1 : last);
+       }
+       return 0;
+}
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
new file mode 100644 (file)
index 0000000..76e81cf
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sysctl.h"
+
+/*
+ * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
+ * other XFS code uses these values.  Times are measured in centisecs (i.e.
+ * 100ths of a second).
+ */
+xfs_param_t xfs_params = {
+                         /*    MIN             DFLT            MAX     */
+       .sgid_inherit   = {     0,              0,              1       },
+       .symlink_mode   = {     0,              0,              1       },
+       .panic_mask     = {     0,              0,              255     },
+       .error_level    = {     0,              3,              11      },
+       .syncd_timer    = {     1*100,          30*100,         7200*100},
+       .stats_clear    = {     0,              0,              1       },
+       .inherit_sync   = {     0,              1,              1       },
+       .inherit_nodump = {     0,              1,              1       },
+       .inherit_noatim = {     0,              1,              1       },
+       .xfs_buf_timer  = {     100/2,          1*100,          30*100  },
+       .xfs_buf_age    = {     1*100,          15*100,         7200*100},
+       .inherit_nosym  = {     0,              0,              1       },
+       .rotorstep      = {     1,              1,              255     },
+       .inherit_nodfrg = {     0,              1,              1       },
+       .fstrm_timer    = {     1,              30*100,         3600*100},
+};
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
new file mode 100644 (file)
index 0000000..f7ce7de
--- /dev/null
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ioctl.h"
+#include "xfs_rtalloc.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_bmap.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_dfrag.h"
+#include "xfs_fsops.h"
+#include "xfs_vnodeops.h"
+#include "xfs_discard.h"
+#include "xfs_quota.h"
+#include "xfs_inode_item.h"
+#include "xfs_export.h"
+#include "xfs_trace.h"
+
+#include <linux/capability.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/exportfs.h>
+
+/*
+ * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
+ * a file or fs handle.
+ *
+ * XFS_IOC_PATH_TO_FSHANDLE
+ *    returns fs handle for a mount point or path within that mount point
+ * XFS_IOC_FD_TO_HANDLE
+ *    returns full handle for a FD opened in user space
+ * XFS_IOC_PATH_TO_HANDLE
+ *    returns full handle for a path
+ */
+int
+xfs_find_handle(
+       unsigned int            cmd,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       int                     hsize;
+       xfs_handle_t            handle;
+       struct inode            *inode;
+       struct file             *file = NULL;
+       struct path             path;
+       int                     error;
+       struct xfs_inode        *ip;
+
+       if (cmd == XFS_IOC_FD_TO_HANDLE) {
+               file = fget(hreq->fd);
+               if (!file)
+                       return -EBADF;
+               inode = file->f_path.dentry->d_inode;
+       } else {
+               error = user_lpath((const char __user *)hreq->path, &path);
+               if (error)
+                       return error;
+               inode = path.dentry->d_inode;
+       }
+       ip = XFS_I(inode);
+
+       /*
+        * We can only generate handles for inodes residing on a XFS filesystem,
+        * and only for regular files, directories or symbolic links.
+        */
+       error = -EINVAL;
+       if (inode->i_sb->s_magic != XFS_SB_MAGIC)
+               goto out_put;
+
+       error = -EBADF;
+       if (!S_ISREG(inode->i_mode) &&
+           !S_ISDIR(inode->i_mode) &&
+           !S_ISLNK(inode->i_mode))
+               goto out_put;
+
+
+       memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
+
+       if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
+               /*
+                * This handle only contains an fsid, zero the rest.
+                */
+               memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
+               hsize = sizeof(xfs_fsid_t);
+       } else {
+               int             lock_mode;
+
+               lock_mode = xfs_ilock_map_shared(ip);
+               handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
+                                       sizeof(handle.ha_fid.fid_len);
+               handle.ha_fid.fid_pad = 0;
+               handle.ha_fid.fid_gen = ip->i_d.di_gen;
+               handle.ha_fid.fid_ino = ip->i_ino;
+               xfs_iunlock_map_shared(ip, lock_mode);
+
+               hsize = XFS_HSIZE(handle);
+       }
+
+       error = -EFAULT;
+       if (copy_to_user(hreq->ohandle, &handle, hsize) ||
+           copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
+               goto out_put;
+
+       error = 0;
+
+ out_put:
+       if (cmd == XFS_IOC_FD_TO_HANDLE)
+               fput(file);
+       else
+               path_put(&path);
+       return error;
+}
+
+/*
+ * No need to do permission checks on the various pathname components
+ * as the handle operations are privileged.
+ */
+STATIC int
+xfs_handle_acceptable(
+       void                    *context,
+       struct dentry           *dentry)
+{
+       return 1;
+}
+
+/*
+ * Convert userspace handle data into a dentry.
+ */
+struct dentry *
+xfs_handle_to_dentry(
+       struct file             *parfilp,
+       void __user             *uhandle,
+       u32                     hlen)
+{
+       xfs_handle_t            handle;
+       struct xfs_fid64        fid;
+
+       /*
+        * Only allow handle opens under a directory.
+        */
+       if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
+               return ERR_PTR(-ENOTDIR);
+
+       if (hlen != sizeof(xfs_handle_t))
+               return ERR_PTR(-EINVAL);
+       if (copy_from_user(&handle, uhandle, hlen))
+               return ERR_PTR(-EFAULT);
+       if (handle.ha_fid.fid_len !=
+           sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
+               return ERR_PTR(-EINVAL);
+
+       memset(&fid, 0, sizeof(struct fid));
+       fid.ino = handle.ha_fid.fid_ino;
+       fid.gen = handle.ha_fid.fid_gen;
+
+       return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
+                       FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
+                       xfs_handle_acceptable, NULL);
+}
+
+STATIC struct dentry *
+xfs_handlereq_to_dentry(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
+}
+
+int
+xfs_open_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       const struct cred       *cred = current_cred();
+       int                     error;
+       int                     fd;
+       int                     permflag;
+       struct file             *filp;
+       struct inode            *inode;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+       inode = dentry->d_inode;
+
+       /* Restrict xfs_open_by_handle to directories & regular files. */
+       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+               error = -XFS_ERROR(EPERM);
+               goto out_dput;
+       }
+
+#if BITS_PER_LONG != 32
+       hreq->oflags |= O_LARGEFILE;
+#endif
+
+       /* Put open permission in namei format. */
+       permflag = hreq->oflags;
+       if ((permflag+1) & O_ACCMODE)
+               permflag++;
+       if (permflag & O_TRUNC)
+               permflag |= 2;
+
+       if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
+           (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out_dput;
+       }
+
+       if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+               error = -XFS_ERROR(EACCES);
+               goto out_dput;
+       }
+
+       /* Can't write directories. */
+       if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+               error = -XFS_ERROR(EISDIR);
+               goto out_dput;
+       }
+
+       fd = get_unused_fd();
+       if (fd < 0) {
+               error = fd;
+               goto out_dput;
+       }
+
+       filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
+                          hreq->oflags, cred);
+       if (IS_ERR(filp)) {
+               put_unused_fd(fd);
+               return PTR_ERR(filp);
+       }
+
+       if (S_ISREG(inode->i_mode)) {
+               filp->f_flags |= O_NOATIME;
+               filp->f_mode |= FMODE_NOCMTIME;
+       }
+
+       fd_install(fd, filp);
+       return fd;
+
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+/*
+ * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
+ * unused first argument.
+ */
+STATIC int
+do_readlink(
+       char __user             *buffer,
+       int                     buflen,
+       const char              *link)
+{
+        int len;
+
+       len = PTR_ERR(link);
+       if (IS_ERR(link))
+               goto out;
+
+       len = strlen(link);
+       if (len > (unsigned) buflen)
+               len = buflen;
+       if (copy_to_user(buffer, link, len))
+               len = -EFAULT;
+ out:
+       return len;
+}
+
+
+int
+xfs_readlink_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       struct dentry           *dentry;
+       __u32                   olen;
+       void                    *link;
+       int                     error;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       /* Restrict this handle operation to symlinks only. */
+       if (!S_ISLNK(dentry->d_inode->i_mode)) {
+               error = -XFS_ERROR(EINVAL);
+               goto out_dput;
+       }
+
+       if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out_dput;
+       }
+
+       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+       if (!link) {
+               error = -XFS_ERROR(ENOMEM);
+               goto out_dput;
+       }
+
+       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+       if (error)
+               goto out_kfree;
+       error = do_readlink(hreq->ohandle, olen, link);
+       if (error)
+               goto out_kfree;
+
+ out_kfree:
+       kfree(link);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_fssetdm_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       struct fsdmidata        fsd;
+       xfs_fsop_setdm_handlereq_t dmhreq;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_MKNOD))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out;
+       }
+
+       if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out;
+       }
+
+       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+                                fsd.fsd_dmstate);
+
+ out:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_attrlist_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error = -ENOMEM;
+       attrlist_cursor_kern_t  *cursor;
+       xfs_fsop_attrlist_handlereq_t al_hreq;
+       struct dentry           *dentry;
+       char                    *kbuf;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+       if (al_hreq.buflen > XATTR_LIST_MAX)
+               return -XFS_ERROR(EINVAL);
+
+       /*
+        * Reject flags, only allow namespaces.
+        */
+       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+               return -XFS_ERROR(EINVAL);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
+       if (!kbuf)
+               goto out_dput;
+
+       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+                                       al_hreq.flags, cursor);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
+               error = -EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+int
+xfs_attrmulti_attr_get(
+       struct inode            *inode,
+       unsigned char           *name,
+       unsigned char           __user *ubuf,
+       __uint32_t              *len,
+       __uint32_t              flags)
+{
+       unsigned char           *kbuf;
+       int                     error = EFAULT;
+
+       if (*len > XATTR_SIZE_MAX)
+               return EINVAL;
+       kbuf = kmalloc(*len, GFP_KERNEL);
+       if (!kbuf)
+               return ENOMEM;
+
+       error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(ubuf, kbuf, *len))
+               error = EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+       return error;
+}
+
+int
+xfs_attrmulti_attr_set(
+       struct inode            *inode,
+       unsigned char           *name,
+       const unsigned char     __user *ubuf,
+       __uint32_t              len,
+       __uint32_t              flags)
+{
+       unsigned char           *kbuf;
+       int                     error = EFAULT;
+
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+               return EPERM;
+       if (len > XATTR_SIZE_MAX)
+               return EINVAL;
+
+       kbuf = memdup_user(ubuf, len);
+       if (IS_ERR(kbuf))
+               return PTR_ERR(kbuf);
+
+       error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+
+       return error;
+}
+
+int
+xfs_attrmulti_attr_remove(
+       struct inode            *inode,
+       unsigned char           *name,
+       __uint32_t              flags)
+{
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+               return EPERM;
+       return xfs_attr_remove(XFS_I(inode), name, flags);
+}
+
+STATIC int
+xfs_attrmulti_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       xfs_attr_multiop_t      *ops;
+       xfs_fsop_attrmulti_handlereq_t am_hreq;
+       struct dentry           *dentry;
+       unsigned int            i, size;
+       unsigned char           *attr_name;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       /* overflow check */
+       if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
+               return -E2BIG;
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = E2BIG;
+       size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
+       if (!size || size > 16 * PAGE_SIZE)
+               goto out_dput;
+
+       ops = memdup_user(am_hreq.ops, size);
+       if (IS_ERR(ops)) {
+               error = PTR_ERR(ops);
+               goto out_dput;
+       }
+
+       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+       if (!attr_name)
+               goto out_kfree_ops;
+
+       error = 0;
+       for (i = 0; i < am_hreq.opcount; i++) {
+               ops[i].am_error = strncpy_from_user((char *)attr_name,
+                               ops[i].am_attrname, MAXNAMELEN);
+               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+                       error = -ERANGE;
+               if (ops[i].am_error < 0)
+                       break;
+
+               switch (ops[i].am_opcode) {
+               case ATTR_OP_GET:
+                       ops[i].am_error = xfs_attrmulti_attr_get(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_attrvalue, &ops[i].am_length,
+                                       ops[i].am_flags);
+                       break;
+               case ATTR_OP_SET:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_set(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_attrvalue, ops[i].am_length,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               case ATTR_OP_REMOVE:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_remove(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               default:
+                       ops[i].am_error = EINVAL;
+               }
+       }
+
+       if (copy_to_user(am_hreq.ops, ops, size))
+               error = XFS_ERROR(EFAULT);
+
+       kfree(attr_name);
+ out_kfree_ops:
+       kfree(ops);
+ out_dput:
+       dput(dentry);
+       return -error;
+}
+
+int
+xfs_ioc_space(
+       struct xfs_inode        *ip,
+       struct inode            *inode,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       xfs_flock64_t           *bf)
+{
+       int                     attr_flags = 0;
+       int                     error;
+
+       /*
+        * Only allow the sys admin to reserve space unless
+        * unwritten extents are enabled.
+        */
+       if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
+           !capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
+               return -XFS_ERROR(EPERM);
+
+       if (!(filp->f_mode & FMODE_WRITE))
+               return -XFS_ERROR(EBADF);
+
+       if (!S_ISREG(inode->i_mode))
+               return -XFS_ERROR(EINVAL);
+
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               attr_flags |= XFS_ATTR_NONBLOCK;
+
+       if (filp->f_flags & O_DSYNC)
+               attr_flags |= XFS_ATTR_SYNC;
+
+       if (ioflags & IO_INVIS)
+               attr_flags |= XFS_ATTR_DMI;
+
+       error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
+       return -error;
+}
+
+STATIC int
+xfs_ioc_bulkstat(
+       xfs_mount_t             *mp,
+       unsigned int            cmd,
+       void                    __user *arg)
+{
+       xfs_fsop_bulkreq_t      bulkreq;
+       int                     count;  /* # of records returned */
+       xfs_ino_t               inlast; /* last inode number */
+       int                     done;
+       int                     error;
+
+       /* done = 1 if there are more stats to get and if bulkstat */
+       /* should be called again (unused here, but used in dmapi) */
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+               return -XFS_ERROR(EFAULT);
+
+       if ((count = bulkreq.icount) <= 0)
+               return -XFS_ERROR(EINVAL);
+
+       if (bulkreq.ubuffer == NULL)
+               return -XFS_ERROR(EINVAL);
+
+       if (cmd == XFS_IOC_FSINUMBERS)
+               error = xfs_inumbers(mp, &inlast, &count,
+                                       bulkreq.ubuffer, xfs_inumbers_fmt);
+       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
+               error = xfs_bulkstat_single(mp, &inlast,
+                                               bulkreq.ubuffer, &done);
+       else    /* XFS_IOC_FSBULKSTAT */
+               error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
+                                    sizeof(xfs_bstat_t), bulkreq.ubuffer,
+                                    &done);
+
+       if (error)
+               return -error;
+
+       if (bulkreq.ocount != NULL) {
+               if (copy_to_user(bulkreq.lastip, &inlast,
+                                               sizeof(xfs_ino_t)))
+                       return -XFS_ERROR(EFAULT);
+
+               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+                       return -XFS_ERROR(EFAULT);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+       xfs_mount_t             *mp,
+       void                    __user *arg)
+{
+       xfs_fsop_geom_t         fsgeo;
+       int                     error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 3);
+       if (error)
+               return -error;
+
+       /*
+        * Caller should have passed an argument of type
+        * xfs_fsop_geom_v1_t.  This is a proper subset of the
+        * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+        */
+       if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry(
+       xfs_mount_t             *mp,
+       void                    __user *arg)
+{
+       xfs_fsop_geom_t         fsgeo;
+       int                     error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 4);
+       if (error)
+               return -error;
+
+       if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/*
+ * Linux extended inode flags interface.
+ */
+
+STATIC unsigned int
+xfs_merge_ioc_xflags(
+       unsigned int    flags,
+       unsigned int    start)
+{
+       unsigned int    xflags = start;
+
+       if (flags & FS_IMMUTABLE_FL)
+               xflags |= XFS_XFLAG_IMMUTABLE;
+       else
+               xflags &= ~XFS_XFLAG_IMMUTABLE;
+       if (flags & FS_APPEND_FL)
+               xflags |= XFS_XFLAG_APPEND;
+       else
+               xflags &= ~XFS_XFLAG_APPEND;
+       if (flags & FS_SYNC_FL)
+               xflags |= XFS_XFLAG_SYNC;
+       else
+               xflags &= ~XFS_XFLAG_SYNC;
+       if (flags & FS_NOATIME_FL)
+               xflags |= XFS_XFLAG_NOATIME;
+       else
+               xflags &= ~XFS_XFLAG_NOATIME;
+       if (flags & FS_NODUMP_FL)
+               xflags |= XFS_XFLAG_NODUMP;
+       else
+               xflags &= ~XFS_XFLAG_NODUMP;
+
+       return xflags;
+}
+
+STATIC unsigned int
+xfs_di2lxflags(
+       __uint16_t      di_flags)
+{
+       unsigned int    flags = 0;
+
+       if (di_flags & XFS_DIFLAG_IMMUTABLE)
+               flags |= FS_IMMUTABLE_FL;
+       if (di_flags & XFS_DIFLAG_APPEND)
+               flags |= FS_APPEND_FL;
+       if (di_flags & XFS_DIFLAG_SYNC)
+               flags |= FS_SYNC_FL;
+       if (di_flags & XFS_DIFLAG_NOATIME)
+               flags |= FS_NOATIME_FL;
+       if (di_flags & XFS_DIFLAG_NODUMP)
+               flags |= FS_NODUMP_FL;
+       return flags;
+}
+
+STATIC int
+xfs_ioc_fsgetxattr(
+       xfs_inode_t             *ip,
+       int                     attr,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+
+       memset(&fa, 0, sizeof(struct fsxattr));
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       fa.fsx_xflags = xfs_ip2xflags(ip);
+       fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
+       fa.fsx_projid = xfs_get_projid(ip);
+
+       if (attr) {
+               if (ip->i_afp) {
+                       if (ip->i_afp->if_flags & XFS_IFEXTENTS)
+                               fa.fsx_nextents = ip->i_afp->if_bytes /
+                                                       sizeof(xfs_bmbt_rec_t);
+                       else
+                               fa.fsx_nextents = ip->i_d.di_anextents;
+               } else
+                       fa.fsx_nextents = 0;
+       } else {
+               if (ip->i_df.if_flags & XFS_IFEXTENTS)
+                       fa.fsx_nextents = ip->i_df.if_bytes /
+                                               sizeof(xfs_bmbt_rec_t);
+               else
+                       fa.fsx_nextents = ip->i_d.di_nextents;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       if (copy_to_user(arg, &fa, sizeof(fa)))
+               return -EFAULT;
+       return 0;
+}
+
+STATIC void
+xfs_set_diflags(
+       struct xfs_inode        *ip,
+       unsigned int            xflags)
+{
+       unsigned int            di_flags;
+
+       /* can't set PREALLOC this way, just preserve it */
+       di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+       if (xflags & XFS_XFLAG_IMMUTABLE)
+               di_flags |= XFS_DIFLAG_IMMUTABLE;
+       if (xflags & XFS_XFLAG_APPEND)
+               di_flags |= XFS_DIFLAG_APPEND;
+       if (xflags & XFS_XFLAG_SYNC)
+               di_flags |= XFS_DIFLAG_SYNC;
+       if (xflags & XFS_XFLAG_NOATIME)
+               di_flags |= XFS_DIFLAG_NOATIME;
+       if (xflags & XFS_XFLAG_NODUMP)
+               di_flags |= XFS_DIFLAG_NODUMP;
+       if (xflags & XFS_XFLAG_PROJINHERIT)
+               di_flags |= XFS_DIFLAG_PROJINHERIT;
+       if (xflags & XFS_XFLAG_NODEFRAG)
+               di_flags |= XFS_DIFLAG_NODEFRAG;
+       if (xflags & XFS_XFLAG_FILESTREAM)
+               di_flags |= XFS_DIFLAG_FILESTREAM;
+       if (S_ISDIR(ip->i_d.di_mode)) {
+               if (xflags & XFS_XFLAG_RTINHERIT)
+                       di_flags |= XFS_DIFLAG_RTINHERIT;
+               if (xflags & XFS_XFLAG_NOSYMLINKS)
+                       di_flags |= XFS_DIFLAG_NOSYMLINKS;
+               if (xflags & XFS_XFLAG_EXTSZINHERIT)
+                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+       } else if (S_ISREG(ip->i_d.di_mode)) {
+               if (xflags & XFS_XFLAG_REALTIME)
+                       di_flags |= XFS_DIFLAG_REALTIME;
+               if (xflags & XFS_XFLAG_EXTSIZE)
+                       di_flags |= XFS_DIFLAG_EXTSIZE;
+       }
+
+       ip->i_d.di_flags = di_flags;
+}
+
+STATIC void
+xfs_diflags_to_linux(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = VFS_I(ip);
+       unsigned int            xflags = xfs_ip2xflags(ip);
+
+       if (xflags & XFS_XFLAG_IMMUTABLE)
+               inode->i_flags |= S_IMMUTABLE;
+       else
+               inode->i_flags &= ~S_IMMUTABLE;
+       if (xflags & XFS_XFLAG_APPEND)
+               inode->i_flags |= S_APPEND;
+       else
+               inode->i_flags &= ~S_APPEND;
+       if (xflags & XFS_XFLAG_SYNC)
+               inode->i_flags |= S_SYNC;
+       else
+               inode->i_flags &= ~S_SYNC;
+       if (xflags & XFS_XFLAG_NOATIME)
+               inode->i_flags |= S_NOATIME;
+       else
+               inode->i_flags &= ~S_NOATIME;
+}
+
+#define FSX_PROJID     1
+#define FSX_EXTSIZE    2
+#define FSX_XFLAGS     4
+#define FSX_NONBLOCK   8
+
+STATIC int
+xfs_ioctl_setattr(
+       xfs_inode_t             *ip,
+       struct fsxattr          *fa,
+       int                     mask)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       unsigned int            lock_flags = 0;
+       struct xfs_dquot        *udqp = NULL;
+       struct xfs_dquot        *gdqp = NULL;
+       struct xfs_dquot        *olddquot = NULL;
+       int                     code;
+
+       trace_xfs_ioctl_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       /*
+        * Disallow 32bit project ids when projid32bit feature is not enabled.
+        */
+       if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+                       !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
+               return XFS_ERROR(EINVAL);
+
+       /*
+        * If disk quotas is on, we make sure that the dquots do exist on disk,
+        * before we start any other transactions. Trying to do this later
+        * is messy. We don't care to take a readlock to look at the ids
+        * in inode here, because we can't hold it across the trans_reserve.
+        * If the IDs do change before we take the ilock, we're covered
+        * because the i_*dquot fields will get updated anyway.
+        */
+       if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
+               code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
+                                        ip->i_d.di_gid, fa->fsx_projid,
+                                        XFS_QMOPT_PQUOTA, &udqp, &gdqp);
+               if (code)
+                       return code;
+       }
+
+       /*
+        * For the other attributes, we acquire the inode lock and
+        * first do an error checking pass.
+        */
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+       code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+       if (code)
+               goto error_return;
+
+       lock_flags = XFS_ILOCK_EXCL;
+       xfs_ilock(ip, lock_flags);
+
+       /*
+        * CAP_FOWNER overrides the following restrictions:
+        *
+        * The user ID of the calling process must be equal
+        * to the file owner ID, except in cases where the
+        * CAP_FSETID capability is applicable.
+        */
+       if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+               code = XFS_ERROR(EPERM);
+               goto error_return;
+       }
+
+       /*
+        * Do a quota reservation only if projid is actually going to change.
+        */
+       if (mask & FSX_PROJID) {
+               if (XFS_IS_QUOTA_RUNNING(mp) &&
+                   XFS_IS_PQUOTA_ON(mp) &&
+                   xfs_get_projid(ip) != fa->fsx_projid) {
+                       ASSERT(tp);
+                       code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+                                               capable(CAP_FOWNER) ?
+                                               XFS_QMOPT_FORCE_RES : 0);
+                       if (code)       /* out of quota */
+                               goto error_return;
+               }
+       }
+
+       if (mask & FSX_EXTSIZE) {
+               /*
+                * Can't change extent size if any extents are allocated.
+                */
+               if (ip->i_d.di_nextents &&
+                   ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+                    fa->fsx_extsize)) {
+                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
+                       goto error_return;
+               }
+
+               /*
+                * Extent size must be a multiple of the appropriate block
+                * size, if set at all. It must also be smaller than the
+                * maximum extent size supported by the filesystem.
+                *
+                * Also, for non-realtime files, limit the extent size hint to
+                * half the size of the AGs in the filesystem so alignment
+                * doesn't result in extents larger than an AG.
+                */
+               if (fa->fsx_extsize != 0) {
+                       xfs_extlen_t    size;
+                       xfs_fsblock_t   extsize_fsb;
+
+                       extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
+                       if (extsize_fsb > MAXEXTLEN) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+
+                       if (XFS_IS_REALTIME_INODE(ip) ||
+                           ((mask & FSX_XFLAGS) &&
+                           (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
+                               size = mp->m_sb.sb_rextsize <<
+                                      mp->m_sb.sb_blocklog;
+                       } else {
+                               size = mp->m_sb.sb_blocksize;
+                               if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
+                                       code = XFS_ERROR(EINVAL);
+                                       goto error_return;
+                               }
+                       }
+
+                       if (fa->fsx_extsize % size) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+               }
+       }
+
+
+       if (mask & FSX_XFLAGS) {
+               /*
+                * Can't change realtime flag if any extents are allocated.
+                */
+               if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+                   (XFS_IS_REALTIME_INODE(ip)) !=
+                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
+                       goto error_return;
+               }
+
+               /*
+                * If realtime flag is set then must have realtime data.
+                */
+               if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                       if ((mp->m_sb.sb_rblocks == 0) ||
+                           (mp->m_sb.sb_rextsize == 0) ||
+                           (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+               }
+
+               /*
+                * Can't modify an immutable/append-only file unless
+                * we have appropriate permission.
+                */
+               if ((ip->i_d.di_flags &
+                               (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+                    (fa->fsx_xflags &
+                               (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+                   !capable(CAP_LINUX_IMMUTABLE)) {
+                       code = XFS_ERROR(EPERM);
+                       goto error_return;
+               }
+       }
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & FSX_PROJID) {
+               /*
+                * CAP_FSETID overrides the following restrictions:
+                *
+                * The set-user-ID and set-group-ID bits of a file will be
+                * cleared upon successful return from chown()
+                */
+               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+                   !capable(CAP_FSETID))
+                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+               /*
+                * Change the ownerships and register quota modifications
+                * in the transaction.
+                */
+               if (xfs_get_projid(ip) != fa->fsx_projid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+                               olddquot = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_gdquot, gdqp);
+                       }
+                       xfs_set_projid(ip, fa->fsx_projid);
+
+                       /*
+                        * We may have to rev the inode as well as
+                        * the superblock version number since projids didn't
+                        * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+                        */
+                       if (ip->i_d.di_version == 1)
+                               xfs_bump_ino_vers2(tp, ip);
+               }
+
+       }
+
+       if (mask & FSX_EXTSIZE)
+               ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+       if (mask & FSX_XFLAGS) {
+               xfs_set_diflags(ip, fa->fsx_xflags);
+               xfs_diflags_to_linux(ip);
+       }
+
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       /*
+        * If this is a synchronous mount, make sure that the
+        * transaction goes to disk before returning to the user.
+        * This is slightly sub-optimal in that truncates require
+        * two sync transactions instead of one for wsync filesystems.
+        * One for the truncate and one for the timestamps since we
+        * don't want to change the timestamps unless we're sure the
+        * truncate worked.  Truncates are less than 1% of the laddis
+        * mix so this probably isn't worth the trouble to optimize.
+        */
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+       code = xfs_trans_commit(tp, 0);
+       xfs_iunlock(ip, lock_flags);
+
+       /*
+        * Release any dquot(s) the inode had kept before chown.
+        */
+       xfs_qm_dqrele(olddquot);
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+
+       return code;
+
+ error_return:
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       xfs_trans_cancel(tp, 0);
+       if (lock_flags)
+               xfs_iunlock(ip, lock_flags);
+       return code;
+}
+
+STATIC int
+xfs_ioc_fssetxattr(
+       xfs_inode_t             *ip,
+       struct file             *filp,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+       unsigned int            mask;
+
+       if (copy_from_user(&fa, arg, sizeof(fa)))
+               return -EFAULT;
+
+       mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               mask |= FSX_NONBLOCK;
+
+       return -xfs_ioctl_setattr(ip, &fa, mask);
+}
+
+STATIC int
+xfs_ioc_getxflags(
+       xfs_inode_t             *ip,
+       void                    __user *arg)
+{
+       unsigned int            flags;
+
+       flags = xfs_di2lxflags(ip->i_d.di_flags);
+       if (copy_to_user(arg, &flags, sizeof(flags)))
+               return -EFAULT;
+       return 0;
+}
+
+STATIC int
+xfs_ioc_setxflags(
+       xfs_inode_t             *ip,
+       struct file             *filp,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+       unsigned int            flags;
+       unsigned int            mask;
+
+       if (copy_from_user(&flags, arg, sizeof(flags)))
+               return -EFAULT;
+
+       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+                     FS_NOATIME_FL | FS_NODUMP_FL | \
+                     FS_SYNC_FL))
+               return -EOPNOTSUPP;
+
+       mask = FSX_XFLAGS;
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               mask |= FSX_NONBLOCK;
+       fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
+
+       return -xfs_ioctl_setattr(ip, &fa, mask);
+}
+
+STATIC int
+xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
+{
+       struct getbmap __user   *base = *ap;
+
+       /* copy only getbmap portion (not getbmapx) */
+       if (copy_to_user(base, bmv, sizeof(struct getbmap)))
+               return XFS_ERROR(EFAULT);
+
+       *ap += sizeof(struct getbmap);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_getbmap(
+       struct xfs_inode        *ip,
+       int                     ioflags,
+       unsigned int            cmd,
+       void                    __user *arg)
+{
+       struct getbmapx         bmx;
+       int                     error;
+
+       if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
+               return -XFS_ERROR(EFAULT);
+
+       if (bmx.bmv_count < 2)
+               return -XFS_ERROR(EINVAL);
+
+       bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+       if (ioflags & IO_INVIS)
+               bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
+
+       error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
+                           (struct getbmap *)arg+1);
+       if (error)
+               return -error;
+
+       /* copy back header - only size of getbmap */
+       if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
+{
+       struct getbmapx __user  *base = *ap;
+
+       if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
+               return XFS_ERROR(EFAULT);
+
+       *ap += sizeof(struct getbmapx);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_getbmapx(
+       struct xfs_inode        *ip,
+       void                    __user *arg)
+{
+       struct getbmapx         bmx;
+       int                     error;
+
+       if (copy_from_user(&bmx, arg, sizeof(bmx)))
+               return -XFS_ERROR(EFAULT);
+
+       if (bmx.bmv_count < 2)
+               return -XFS_ERROR(EINVAL);
+
+       if (bmx.bmv_iflags & (~BMV_IF_VALID))
+               return -XFS_ERROR(EINVAL);
+
+       error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
+                           (struct getbmapx *)arg+1);
+       if (error)
+               return -error;
+
+       /* copy back header */
+       if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
+               return -XFS_ERROR(EFAULT);
+
+       return 0;
+}
+
+/*
+ * Note: some of the ioctl's return positive numbers as a
+ * byte count indicating success, such as readlink_by_handle.
+ * So we don't "sign flip" like most other routines.  This means
+ * true errors need to be returned as a negative value.
+ */
+long
+xfs_file_ioctl(
+       struct file             *filp,
+       unsigned int            cmd,
+       unsigned long           p)
+{
+       struct inode            *inode = filp->f_path.dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       void                    __user *arg = (void __user *)p;
+       int                     ioflags = 0;
+       int                     error;
+
+       if (filp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       trace_xfs_file_ioctl(ip);
+
+       switch (cmd) {
+       case FITRIM:
+               return xfs_ioc_trim(mp, arg);
+       case XFS_IOC_ALLOCSP:
+       case XFS_IOC_FREESP:
+       case XFS_IOC_RESVSP:
+       case XFS_IOC_UNRESVSP:
+       case XFS_IOC_ALLOCSP64:
+       case XFS_IOC_FREESP64:
+       case XFS_IOC_RESVSP64:
+       case XFS_IOC_UNRESVSP64:
+       case XFS_IOC_ZERO_RANGE: {
+               xfs_flock64_t           bf;
+
+               if (copy_from_user(&bf, arg, sizeof(bf)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+       }
+       case XFS_IOC_DIOINFO: {
+               struct dioattr  da;
+               xfs_buftarg_t   *target =
+                       XFS_IS_REALTIME_INODE(ip) ?
+                       mp->m_rtdev_targp : mp->m_ddev_targp;
+
+               da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+               da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
+
+               if (copy_to_user(arg, &da, sizeof(da)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_FSBULKSTAT_SINGLE:
+       case XFS_IOC_FSBULKSTAT:
+       case XFS_IOC_FSINUMBERS:
+               return xfs_ioc_bulkstat(mp, cmd, arg);
+
+       case XFS_IOC_FSGEOMETRY_V1:
+               return xfs_ioc_fsgeometry_v1(mp, arg);
+
+       case XFS_IOC_FSGEOMETRY:
+               return xfs_ioc_fsgeometry(mp, arg);
+
+       case XFS_IOC_GETVERSION:
+               return put_user(inode->i_generation, (int __user *)arg);
+
+       case XFS_IOC_FSGETXATTR:
+               return xfs_ioc_fsgetxattr(ip, 0, arg);
+       case XFS_IOC_FSGETXATTRA:
+               return xfs_ioc_fsgetxattr(ip, 1, arg);
+       case XFS_IOC_FSSETXATTR:
+               return xfs_ioc_fssetxattr(ip, filp, arg);
+       case XFS_IOC_GETXFLAGS:
+               return xfs_ioc_getxflags(ip, arg);
+       case XFS_IOC_SETXFLAGS:
+               return xfs_ioc_setxflags(ip, filp, arg);
+
+       case XFS_IOC_FSSETDM: {
+               struct fsdmidata        dmi;
+
+               if (copy_from_user(&dmi, arg, sizeof(dmi)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
+                               dmi.fsd_dmstate);
+               return -error;
+       }
+
+       case XFS_IOC_GETBMAP:
+       case XFS_IOC_GETBMAPA:
+               return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+
+       case XFS_IOC_GETBMAPX:
+               return xfs_ioc_getbmapx(ip, arg);
+
+       case XFS_IOC_FD_TO_HANDLE:
+       case XFS_IOC_PATH_TO_HANDLE:
+       case XFS_IOC_PATH_TO_FSHANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(hreq)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_find_handle(cmd, &hreq);
+       }
+       case XFS_IOC_OPEN_BY_HANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_open_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_FSSETDM_BY_HANDLE:
+               return xfs_fssetdm_by_handle(filp, arg);
+
+       case XFS_IOC_READLINK_BY_HANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_readlink_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_ATTRLIST_BY_HANDLE:
+               return xfs_attrlist_by_handle(filp, arg);
+
+       case XFS_IOC_ATTRMULTI_BY_HANDLE:
+               return xfs_attrmulti_by_handle(filp, arg);
+
+       case XFS_IOC_SWAPEXT: {
+               struct xfs_swapext      sxp;
+
+               if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_swapext(&sxp);
+               return -error;
+       }
+
+       case XFS_IOC_FSCOUNTS: {
+               xfs_fsop_counts_t out;
+
+               error = xfs_fs_counts(mp, &out);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &out, sizeof(out)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_SET_RESBLKS: {
+               xfs_fsop_resblks_t inout;
+               __uint64_t         in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (mp->m_flags & XFS_MOUNT_RDONLY)
+                       return -XFS_ERROR(EROFS);
+
+               if (copy_from_user(&inout, arg, sizeof(inout)))
+                       return -XFS_ERROR(EFAULT);
+
+               /* input parameter is passed in resblks field of structure */
+               in = inout.resblks;
+               error = xfs_reserve_blocks(mp, &in, &inout);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &inout, sizeof(inout)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_GET_RESBLKS: {
+               xfs_fsop_resblks_t out;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               error = xfs_reserve_blocks(mp, NULL, &out);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &out, sizeof(out)))
+                       return -XFS_ERROR(EFAULT);
+
+               return 0;
+       }
+
+       case XFS_IOC_FSGROWFSDATA: {
+               xfs_growfs_data_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_data(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FSGROWFSLOG: {
+               xfs_growfs_log_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_log(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FSGROWFSRT: {
+               xfs_growfs_rt_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_rt(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_GOINGDOWN: {
+               __uint32_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (get_user(in, (__uint32_t __user *)arg))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_fs_goingdown(mp, in);
+               return -error;
+       }
+
+       case XFS_IOC_ERROR_INJECTION: {
+               xfs_error_injection_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_errortag_add(in.errtag, mp);
+               return -error;
+       }
+
+       case XFS_IOC_ERROR_CLEARALL:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               error = xfs_errortag_clearall(mp, 1);
+               return -error;
+
+       default:
+               return -ENOTTY;
+       }
+}
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
new file mode 100644 (file)
index 0000000..d56173b
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL_H__
+#define __XFS_IOCTL_H__
+
+extern int
+xfs_ioc_space(
+       struct xfs_inode        *ip,
+       struct inode            *inode,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       xfs_flock64_t           *bf);
+
+extern int
+xfs_find_handle(
+       unsigned int            cmd,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_open_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_readlink_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_attrmulti_attr_get(
+       struct inode            *inode,
+       unsigned char           *name,
+       unsigned char           __user *ubuf,
+       __uint32_t              *len,
+       __uint32_t              flags);
+
+extern int
+xfs_attrmulti_attr_set(
+       struct inode            *inode,
+       unsigned char           *name,
+       const unsigned char     __user *ubuf,
+       __uint32_t              len,
+       __uint32_t              flags);
+
+extern int
+xfs_attrmulti_attr_remove(
+       struct inode            *inode,
+       unsigned char           *name,
+       __uint32_t              flags);
+
+extern struct dentry *
+xfs_handle_to_dentry(
+       struct file             *parfilp,
+       void __user             *uhandle,
+       u32                     hlen);
+
+extern long
+xfs_file_ioctl(
+       struct file             *filp,
+       unsigned int            cmd,
+       unsigned long           p);
+
+extern long
+xfs_file_compat_ioctl(
+       struct file             *file,
+       unsigned int            cmd,
+       unsigned long           arg);
+
+#endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
new file mode 100644 (file)
index 0000000..54e623b
--- /dev/null
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/compat.h>
+#include <linux/ioctl.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_vnode.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
+#include "xfs_dfrag.h"
+#include "xfs_vnodeops.h"
+#include "xfs_fsops.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_attr.h"
+#include "xfs_ioctl.h"
+#include "xfs_ioctl32.h"
+#include "xfs_trace.h"
+
+#define  _NATIVE_IOC(cmd, type) \
+         _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
+
+#ifdef BROKEN_X86_ALIGNMENT
+STATIC int
+xfs_compat_flock64_copyin(
+       xfs_flock64_t           *bf,
+       compat_xfs_flock64_t    __user *arg32)
+{
+       if (get_user(bf->l_type,        &arg32->l_type) ||
+           get_user(bf->l_whence,      &arg32->l_whence) ||
+           get_user(bf->l_start,       &arg32->l_start) ||
+           get_user(bf->l_len,         &arg32->l_len) ||
+           get_user(bf->l_sysid,       &arg32->l_sysid) ||
+           get_user(bf->l_pid,         &arg32->l_pid) ||
+           copy_from_user(bf->l_pad,   &arg32->l_pad,  4*sizeof(u32)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_ioc_fsgeometry_v1(
+       struct xfs_mount          *mp,
+       compat_xfs_fsop_geom_v1_t __user *arg32)
+{
+       xfs_fsop_geom_t           fsgeo;
+       int                       error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 3);
+       if (error)
+               return -error;
+       /* The 32-bit variant simply has some padding at the end */
+       if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_growfs_data_copyin(
+       struct xfs_growfs_data   *in,
+       compat_xfs_growfs_data_t __user *arg32)
+{
+       if (get_user(in->newblocks, &arg32->newblocks) ||
+           get_user(in->imaxpct,   &arg32->imaxpct))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_growfs_rt_copyin(
+       struct xfs_growfs_rt     *in,
+       compat_xfs_growfs_rt_t  __user *arg32)
+{
+       if (get_user(in->newblocks, &arg32->newblocks) ||
+           get_user(in->extsize,   &arg32->extsize))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt_compat(
+       void                    __user *ubuffer,
+       const xfs_inogrp_t      *buffer,
+       long                    count,
+       long                    *written)
+{
+       compat_xfs_inogrp_t     __user *p32 = ubuffer;
+       long                    i;
+
+       for (i = 0; i < count; i++) {
+               if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
+                   put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
+                   put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
+                       return -XFS_ERROR(EFAULT);
+       }
+       *written = count * sizeof(*p32);
+       return 0;
+}
+
+#else
+#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#endif /* BROKEN_X86_ALIGNMENT */
+
+STATIC int
+xfs_ioctl32_bstime_copyin(
+       xfs_bstime_t            *bstime,
+       compat_xfs_bstime_t     __user *bstime32)
+{
+       compat_time_t           sec32;  /* tv_sec differs on 64 vs. 32 */
+
+       if (get_user(sec32,             &bstime32->tv_sec)      ||
+           get_user(bstime->tv_nsec,   &bstime32->tv_nsec))
+               return -XFS_ERROR(EFAULT);
+       bstime->tv_sec = sec32;
+       return 0;
+}
+
+/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+STATIC int
+xfs_ioctl32_bstat_copyin(
+       xfs_bstat_t             *bstat,
+       compat_xfs_bstat_t      __user *bstat32)
+{
+       if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
+           get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
+           get_user(bstat->bs_nlink,   &bstat32->bs_nlink)     ||
+           get_user(bstat->bs_uid,     &bstat32->bs_uid)       ||
+           get_user(bstat->bs_gid,     &bstat32->bs_gid)       ||
+           get_user(bstat->bs_rdev,    &bstat32->bs_rdev)      ||
+           get_user(bstat->bs_blksize, &bstat32->bs_blksize)   ||
+           get_user(bstat->bs_size,    &bstat32->bs_size)      ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
+           get_user(bstat->bs_blocks,  &bstat32->bs_size)      ||
+           get_user(bstat->bs_xflags,  &bstat32->bs_size)      ||
+           get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
+           get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
+           get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
+           get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+           get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
+           get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
+           get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
+           get_user(bstat->bs_aextents, &bstat32->bs_aextents))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/* XFS_IOC_FSBULKSTAT and friends */
+
+STATIC int
+xfs_bstime_store_compat(
+       compat_xfs_bstime_t     __user *p32,
+       const xfs_bstime_t      *p)
+{
+       __s32                   sec32;
+
+       sec32 = p->tv_sec;
+       if (put_user(sec32, &p32->tv_sec) ||
+           put_user(p->tv_nsec, &p32->tv_nsec))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/* Return 0 on success or positive error (to xfs_bulkstat()) */
+STATIC int
+xfs_bulkstat_one_fmt_compat(
+       void                    __user *ubuffer,
+       int                     ubsize,
+       int                     *ubused,
+       const xfs_bstat_t       *buffer)
+{
+       compat_xfs_bstat_t      __user *p32 = ubuffer;
+
+       if (ubsize < sizeof(*p32))
+               return XFS_ERROR(ENOMEM);
+
+       if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
+           put_user(buffer->bs_mode,     &p32->bs_mode)        ||
+           put_user(buffer->bs_nlink,    &p32->bs_nlink)       ||
+           put_user(buffer->bs_uid,      &p32->bs_uid)         ||
+           put_user(buffer->bs_gid,      &p32->bs_gid)         ||
+           put_user(buffer->bs_rdev,     &p32->bs_rdev)        ||
+           put_user(buffer->bs_blksize,  &p32->bs_blksize)     ||
+           put_user(buffer->bs_size,     &p32->bs_size)        ||
+           xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
+           xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
+           xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
+           put_user(buffer->bs_blocks,   &p32->bs_blocks)      ||
+           put_user(buffer->bs_xflags,   &p32->bs_xflags)      ||
+           put_user(buffer->bs_extsize,  &p32->bs_extsize)     ||
+           put_user(buffer->bs_extents,  &p32->bs_extents)     ||
+           put_user(buffer->bs_gen,      &p32->bs_gen)         ||
+           put_user(buffer->bs_projid,   &p32->bs_projid)      ||
+           put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
+           put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
+           put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
+           put_user(buffer->bs_aextents, &p32->bs_aextents))
+               return XFS_ERROR(EFAULT);
+       if (ubused)
+               *ubused = sizeof(*p32);
+       return 0;
+}
+
+STATIC int
+xfs_bulkstat_one_compat(
+       xfs_mount_t     *mp,            /* mount point for filesystem */
+       xfs_ino_t       ino,            /* inode number to get data for */
+       void            __user *buffer, /* buffer to place output in */
+       int             ubsize,         /* size of buffer */
+       int             *ubused,        /* bytes used by me */
+       int             *stat)          /* BULKSTAT_RV_... */
+{
+       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
+                                   xfs_bulkstat_one_fmt_compat,
+                                   ubused, stat);
+}
+
+/* copied from xfs_ioctl.c */
+STATIC int
+xfs_compat_ioc_bulkstat(
+       xfs_mount_t               *mp,
+       unsigned int              cmd,
+       compat_xfs_fsop_bulkreq_t __user *p32)
+{
+       u32                     addr;
+       xfs_fsop_bulkreq_t      bulkreq;
+       int                     count;  /* # of records returned */
+       xfs_ino_t               inlast; /* last inode number */
+       int                     done;
+       int                     error;
+
+       /* done = 1 if there are more stats to get and if bulkstat */
+       /* should be called again (unused here, but used in dmapi) */
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (get_user(addr, &p32->lastip))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.lastip = compat_ptr(addr);
+       if (get_user(bulkreq.icount, &p32->icount) ||
+           get_user(addr, &p32->ubuffer))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.ubuffer = compat_ptr(addr);
+       if (get_user(addr, &p32->ocount))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.ocount = compat_ptr(addr);
+
+       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+               return -XFS_ERROR(EFAULT);
+
+       if ((count = bulkreq.icount) <= 0)
+               return -XFS_ERROR(EINVAL);
+
+       if (bulkreq.ubuffer == NULL)
+               return -XFS_ERROR(EINVAL);
+
+       if (cmd == XFS_IOC_FSINUMBERS_32) {
+               error = xfs_inumbers(mp, &inlast, &count,
+                               bulkreq.ubuffer, xfs_inumbers_fmt_compat);
+       } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
+               int res;
+
+               error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
+                               sizeof(compat_xfs_bstat_t), 0, &res);
+       } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
+               error = xfs_bulkstat(mp, &inlast, &count,
+                       xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+                       bulkreq.ubuffer, &done);
+       } else
+               error = XFS_ERROR(EINVAL);
+       if (error)
+               return -error;
+
+       if (bulkreq.ocount != NULL) {
+               if (copy_to_user(bulkreq.lastip, &inlast,
+                                               sizeof(xfs_ino_t)))
+                       return -XFS_ERROR(EFAULT);
+
+               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+                       return -XFS_ERROR(EFAULT);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_compat_handlereq_copyin(
+       xfs_fsop_handlereq_t            *hreq,
+       compat_xfs_fsop_handlereq_t     __user *arg32)
+{
+       compat_xfs_fsop_handlereq_t     hreq32;
+
+       if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       hreq->fd = hreq32.fd;
+       hreq->path = compat_ptr(hreq32.path);
+       hreq->oflags = hreq32.oflags;
+       hreq->ihandle = compat_ptr(hreq32.ihandle);
+       hreq->ihandlen = hreq32.ihandlen;
+       hreq->ohandle = compat_ptr(hreq32.ohandle);
+       hreq->ohandlen = compat_ptr(hreq32.ohandlen);
+
+       return 0;
+}
+
+STATIC struct dentry *
+xfs_compat_handlereq_to_dentry(
+       struct file             *parfilp,
+       compat_xfs_fsop_handlereq_t *hreq)
+{
+       return xfs_handle_to_dentry(parfilp,
+                       compat_ptr(hreq->ihandle), hreq->ihandlen);
+}
+
+STATIC int
+xfs_compat_attrlist_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       attrlist_cursor_kern_t  *cursor;
+       compat_xfs_fsop_attrlist_handlereq_t al_hreq;
+       struct dentry           *dentry;
+       char                    *kbuf;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&al_hreq, arg,
+                          sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+       if (al_hreq.buflen > XATTR_LIST_MAX)
+               return -XFS_ERROR(EINVAL);
+
+       /*
+        * Reject flags, only allow namespaces.
+        */
+       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+               return -XFS_ERROR(EINVAL);
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = -ENOMEM;
+       kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+       if (!kbuf)
+               goto out_dput;
+
+       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+                                       al_hreq.flags, cursor);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
+               error = -EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_compat_attrmulti_by_handle(
+       struct file                             *parfilp,
+       void                                    __user *arg)
+{
+       int                                     error;
+       compat_xfs_attr_multiop_t               *ops;
+       compat_xfs_fsop_attrmulti_handlereq_t   am_hreq;
+       struct dentry                           *dentry;
+       unsigned int                            i, size;
+       unsigned char                           *attr_name;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&am_hreq, arg,
+                          sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       /* overflow check */
+       if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
+               return -E2BIG;
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = E2BIG;
+       size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
+       if (!size || size > 16 * PAGE_SIZE)
+               goto out_dput;
+
+       ops = memdup_user(compat_ptr(am_hreq.ops), size);
+       if (IS_ERR(ops)) {
+               error = PTR_ERR(ops);
+               goto out_dput;
+       }
+
+       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+       if (!attr_name)
+               goto out_kfree_ops;
+
+       error = 0;
+       for (i = 0; i < am_hreq.opcount; i++) {
+               ops[i].am_error = strncpy_from_user((char *)attr_name,
+                               compat_ptr(ops[i].am_attrname),
+                               MAXNAMELEN);
+               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+                       error = -ERANGE;
+               if (ops[i].am_error < 0)
+                       break;
+
+               switch (ops[i].am_opcode) {
+               case ATTR_OP_GET:
+                       ops[i].am_error = xfs_attrmulti_attr_get(
+                                       dentry->d_inode, attr_name,
+                                       compat_ptr(ops[i].am_attrvalue),
+                                       &ops[i].am_length, ops[i].am_flags);
+                       break;
+               case ATTR_OP_SET:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_set(
+                                       dentry->d_inode, attr_name,
+                                       compat_ptr(ops[i].am_attrvalue),
+                                       ops[i].am_length, ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               case ATTR_OP_REMOVE:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_remove(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               default:
+                       ops[i].am_error = EINVAL;
+               }
+       }
+
+       if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
+               error = XFS_ERROR(EFAULT);
+
+       kfree(attr_name);
+ out_kfree_ops:
+       kfree(ops);
+ out_dput:
+       dput(dentry);
+       return -error;
+}
+
+STATIC int
+xfs_compat_fssetdm_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       struct fsdmidata        fsd;
+       compat_xfs_fsop_setdm_handlereq_t dmhreq;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_MKNOD))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&dmhreq, arg,
+                          sizeof(compat_xfs_fsop_setdm_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out;
+       }
+
+       if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out;
+       }
+
+       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+                                fsd.fsd_dmstate);
+
+out:
+       dput(dentry);
+       return error;
+}
+
+long
+xfs_file_compat_ioctl(
+       struct file             *filp,
+       unsigned                cmd,
+       unsigned long           p)
+{
+       struct inode            *inode = filp->f_path.dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       void                    __user *arg = (void __user *)p;
+       int                     ioflags = 0;
+       int                     error;
+
+       if (filp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       trace_xfs_file_compat_ioctl(ip);
+
+       switch (cmd) {
+       /* No size or alignment issues on any arch */
+       case XFS_IOC_DIOINFO:
+       case XFS_IOC_FSGEOMETRY:
+       case XFS_IOC_FSGETXATTR:
+       case XFS_IOC_FSSETXATTR:
+       case XFS_IOC_FSGETXATTRA:
+       case XFS_IOC_FSSETDM:
+       case XFS_IOC_GETBMAP:
+       case XFS_IOC_GETBMAPA:
+       case XFS_IOC_GETBMAPX:
+       case XFS_IOC_FSCOUNTS:
+       case XFS_IOC_SET_RESBLKS:
+       case XFS_IOC_GET_RESBLKS:
+       case XFS_IOC_FSGROWFSLOG:
+       case XFS_IOC_GOINGDOWN:
+       case XFS_IOC_ERROR_INJECTION:
+       case XFS_IOC_ERROR_CLEARALL:
+               return xfs_file_ioctl(filp, cmd, p);
+#ifndef BROKEN_X86_ALIGNMENT
+       /* These are handled fine if no alignment issues */
+       case XFS_IOC_ALLOCSP:
+       case XFS_IOC_FREESP:
+       case XFS_IOC_RESVSP:
+       case XFS_IOC_UNRESVSP:
+       case XFS_IOC_ALLOCSP64:
+       case XFS_IOC_FREESP64:
+       case XFS_IOC_RESVSP64:
+       case XFS_IOC_UNRESVSP64:
+       case XFS_IOC_FSGEOMETRY_V1:
+       case XFS_IOC_FSGROWFSDATA:
+       case XFS_IOC_FSGROWFSRT:
+       case XFS_IOC_ZERO_RANGE:
+               return xfs_file_ioctl(filp, cmd, p);
+#else
+       case XFS_IOC_ALLOCSP_32:
+       case XFS_IOC_FREESP_32:
+       case XFS_IOC_ALLOCSP64_32:
+       case XFS_IOC_FREESP64_32:
+       case XFS_IOC_RESVSP_32:
+       case XFS_IOC_UNRESVSP_32:
+       case XFS_IOC_RESVSP64_32:
+       case XFS_IOC_UNRESVSP64_32:
+       case XFS_IOC_ZERO_RANGE_32: {
+               struct xfs_flock64      bf;
+
+               if (xfs_compat_flock64_copyin(&bf, arg))
+                       return -XFS_ERROR(EFAULT);
+               cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
+               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+       }
+       case XFS_IOC_FSGEOMETRY_V1_32:
+               return xfs_compat_ioc_fsgeometry_v1(mp, arg);
+       case XFS_IOC_FSGROWFSDATA_32: {
+               struct xfs_growfs_data  in;
+
+               if (xfs_compat_growfs_data_copyin(&in, arg))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_growfs_data(mp, &in);
+               return -error;
+       }
+       case XFS_IOC_FSGROWFSRT_32: {
+               struct xfs_growfs_rt    in;
+
+               if (xfs_compat_growfs_rt_copyin(&in, arg))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_growfs_rt(mp, &in);
+               return -error;
+       }
+#endif
+       /* long changes size, but xfs only copiese out 32 bits */
+       case XFS_IOC_GETXFLAGS_32:
+       case XFS_IOC_SETXFLAGS_32:
+       case XFS_IOC_GETVERSION_32:
+               cmd = _NATIVE_IOC(cmd, long);
+               return xfs_file_ioctl(filp, cmd, p);
+       case XFS_IOC_SWAPEXT_32: {
+               struct xfs_swapext        sxp;
+               struct compat_xfs_swapext __user *sxu = arg;
+
+               /* Bulk copy in up to the sx_stat field, then copy bstat */
+               if (copy_from_user(&sxp, sxu,
+                                  offsetof(struct xfs_swapext, sx_stat)) ||
+                   xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_swapext(&sxp);
+               return -error;
+       }
+       case XFS_IOC_FSBULKSTAT_32:
+       case XFS_IOC_FSBULKSTAT_SINGLE_32:
+       case XFS_IOC_FSINUMBERS_32:
+               return xfs_compat_ioc_bulkstat(mp, cmd, arg);
+       case XFS_IOC_FD_TO_HANDLE_32:
+       case XFS_IOC_PATH_TO_HANDLE_32:
+       case XFS_IOC_PATH_TO_FSHANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
+               return xfs_find_handle(cmd, &hreq);
+       }
+       case XFS_IOC_OPEN_BY_HANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_open_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_READLINK_BY_HANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_readlink_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_ATTRLIST_BY_HANDLE_32:
+               return xfs_compat_attrlist_by_handle(filp, arg);
+       case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
+               return xfs_compat_attrmulti_by_handle(filp, arg);
+       case XFS_IOC_FSSETDM_BY_HANDLE_32:
+               return xfs_compat_fssetdm_by_handle(filp, arg);
+       default:
+               return -XFS_ERROR(ENOIOCTLCMD);
+       }
+}
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
new file mode 100644 (file)
index 0000000..80f4060
--- /dev/null
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL32_H__
+#define __XFS_IOCTL32_H__
+
+#include <linux/compat.h>
+
+/*
+ * on 32-bit arches, ioctl argument structures may have different sizes
+ * and/or alignment.  We define compat structures which match the
+ * 32-bit sizes/alignments here, and their associated ioctl numbers.
+ *
+ * xfs_ioctl32.c contains routines to copy these structures in and out.
+ */
+
+/* stock kernel-level ioctls we support */
+#define XFS_IOC_GETXFLAGS_32   FS_IOC32_GETFLAGS
+#define XFS_IOC_SETXFLAGS_32   FS_IOC32_SETFLAGS
+#define XFS_IOC_GETVERSION_32  FS_IOC32_GETVERSION
+
+/*
+ * On intel, even if sizes match, alignment and/or padding may differ.
+ */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#define __compat_packed __attribute__((packed))
+#else
+#define __compat_packed
+#endif
+
+typedef struct compat_xfs_bstime {
+       compat_time_t   tv_sec;         /* seconds              */
+       __s32           tv_nsec;        /* and nanoseconds      */
+} compat_xfs_bstime_t;
+
+typedef struct compat_xfs_bstat {
+       __u64           bs_ino;         /* inode number                 */
+       __u16           bs_mode;        /* type and mode                */
+       __u16           bs_nlink;       /* number of links              */
+       __u32           bs_uid;         /* user id                      */
+       __u32           bs_gid;         /* group id                     */
+       __u32           bs_rdev;        /* device value                 */
+       __s32           bs_blksize;     /* block size                   */
+       __s64           bs_size;        /* file size                    */
+       compat_xfs_bstime_t bs_atime;   /* access time                  */
+       compat_xfs_bstime_t bs_mtime;   /* modify time                  */
+       compat_xfs_bstime_t bs_ctime;   /* inode change time            */
+       int64_t         bs_blocks;      /* number of blocks             */
+       __u32           bs_xflags;      /* extended flags               */
+       __s32           bs_extsize;     /* extent size                  */
+       __s32           bs_extents;     /* number of extents            */
+       __u32           bs_gen;         /* generation count             */
+       __u16           bs_projid_lo;   /* lower part of project id     */
+#define        bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
+       __u16           bs_projid_hi;   /* high part of project id      */
+       unsigned char   bs_pad[12];     /* pad space, unused            */
+       __u32           bs_dmevmask;    /* DMIG event mask              */
+       __u16           bs_dmstate;     /* DMIG state info              */
+       __u16           bs_aextents;    /* attribute number of extents  */
+} __compat_packed compat_xfs_bstat_t;
+
+typedef struct compat_xfs_fsop_bulkreq {
+       compat_uptr_t   lastip;         /* last inode # pointer         */
+       __s32           icount;         /* count of entries in buffer   */
+       compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
+       compat_uptr_t   ocount;         /* output count pointer         */
+} compat_xfs_fsop_bulkreq_t;
+
+#define XFS_IOC_FSBULKSTAT_32 \
+       _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+       _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+       _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+
+typedef struct compat_xfs_fsop_handlereq {
+       __u32           fd;             /* fd for FD_TO_HANDLE          */
+       compat_uptr_t   path;           /* user pathname                */
+       __u32           oflags;         /* open flags                   */
+       compat_uptr_t   ihandle;        /* user supplied handle         */
+       __u32           ihandlen;       /* user supplied length         */
+       compat_uptr_t   ohandle;        /* user buffer for handle       */
+       compat_uptr_t   ohandlen;       /* user buffer length           */
+} compat_xfs_fsop_handlereq_t;
+
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+       _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+       _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+       _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+       _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+       _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+
+/* The bstat field in the swapext struct needs translation */
+typedef struct compat_xfs_swapext {
+       __int64_t               sx_version;     /* version */
+       __int64_t               sx_fdtarget;    /* fd of target file */
+       __int64_t               sx_fdtmp;       /* fd of tmp file */
+       xfs_off_t               sx_offset;      /* offset into file */
+       xfs_off_t               sx_length;      /* leng from offset */
+       char                    sx_pad[16];     /* pad space, unused */
+       compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
+} __compat_packed compat_xfs_swapext_t;
+
+#define XFS_IOC_SWAPEXT_32     _IOWR('X', 109, struct compat_xfs_swapext)
+
+typedef struct compat_xfs_fsop_attrlist_handlereq {
+       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+       struct xfs_attrlist_cursor      pos; /* opaque cookie, list offset */
+       __u32                           flags;  /* which namespace to use */
+       __u32                           buflen; /* length of buffer supplied */
+       compat_uptr_t                   buffer; /* returned names */
+} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
+
+/* Note: actually this is read/write */
+#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
+       _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
+
+/* am_opcodes defined in xfs_fs.h */
+typedef struct compat_xfs_attr_multiop {
+       __u32           am_opcode;
+       __s32           am_error;
+       compat_uptr_t   am_attrname;
+       compat_uptr_t   am_attrvalue;
+       __u32           am_length;
+       __u32           am_flags;
+} compat_xfs_attr_multiop_t;
+
+typedef struct compat_xfs_fsop_attrmulti_handlereq {
+       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+       __u32                           opcount;/* count of following multiop */
+       /* ptr to compat_xfs_attr_multiop */
+       compat_uptr_t                   ops; /* attr_multi data */
+} compat_xfs_fsop_attrmulti_handlereq_t;
+
+#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
+       _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
+
+typedef struct compat_xfs_fsop_setdm_handlereq {
+       struct compat_xfs_fsop_handlereq hreq;  /* handle information   */
+       /* ptr to struct fsdmidata */
+       compat_uptr_t                   data;   /* DMAPI data   */
+} compat_xfs_fsop_setdm_handlereq_t;
+
+#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
+       _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
+
+#ifdef BROKEN_X86_ALIGNMENT
+/* on ia32 l_start is on a 32-bit boundary */
+typedef struct compat_xfs_flock64 {
+       __s16           l_type;
+       __s16           l_whence;
+       __s64           l_start __attribute__((packed));
+                       /* len == 0 means until end of file */
+       __s64           l_len __attribute__((packed));
+       __s32           l_sysid;
+       __u32           l_pid;
+       __s32           l_pad[4];       /* reserve area */
+} compat_xfs_flock64_t;
+
+#define XFS_IOC_ALLOCSP_32     _IOW('X', 10, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP_32      _IOW('X', 11, struct compat_xfs_flock64)
+#define XFS_IOC_ALLOCSP64_32   _IOW('X', 36, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP64_32    _IOW('X', 37, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP_32      _IOW('X', 40, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP_32    _IOW('X', 41, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP64_32    _IOW('X', 42, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP64_32  _IOW('X', 43, struct compat_xfs_flock64)
+#define XFS_IOC_ZERO_RANGE_32  _IOW('X', 57, struct compat_xfs_flock64)
+
+typedef struct compat_xfs_fsop_geom_v1 {
+       __u32           blocksize;      /* filesystem (data) block size */
+       __u32           rtextsize;      /* realtime extent size         */
+       __u32           agblocks;       /* fsblocks in an AG            */
+       __u32           agcount;        /* number of allocation groups  */
+       __u32           logblocks;      /* fsblocks in the log          */
+       __u32           sectsize;       /* (data) sector size, bytes    */
+       __u32           inodesize;      /* inode size in bytes          */
+       __u32           imaxpct;        /* max allowed inode space(%)   */
+       __u64           datablocks;     /* fsblocks in data subvolume   */
+       __u64           rtblocks;       /* fsblocks in realtime subvol  */
+       __u64           rtextents;      /* rt extents in realtime subvol*/
+       __u64           logstart;       /* starting fsblock of the log  */
+       unsigned char   uuid[16];       /* unique id of the filesystem  */
+       __u32           sunit;          /* stripe unit, fsblocks        */
+       __u32           swidth;         /* stripe width, fsblocks       */
+       __s32           version;        /* structure version            */
+       __u32           flags;          /* superblock version flags     */
+       __u32           logsectsize;    /* log sector size, bytes       */
+       __u32           rtsectsize;     /* realtime sector size, bytes  */
+       __u32           dirblocksize;   /* directory block size, bytes  */
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+
+#define XFS_IOC_FSGEOMETRY_V1_32  \
+       _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
+
+typedef struct compat_xfs_inogrp {
+       __u64           xi_startino;    /* starting inode number        */
+       __s32           xi_alloccount;  /* # bits set in allocmask      */
+       __u64           xi_allocmask;   /* mask of allocated inodes     */
+} __attribute__((packed)) compat_xfs_inogrp_t;
+
+/* These growfs input structures have padding on the end, so must translate */
+typedef struct compat_xfs_growfs_data {
+       __u64           newblocks;      /* new data subvol size, fsblocks */
+       __u32           imaxpct;        /* new inode space percentage limit */
+} __attribute__((packed)) compat_xfs_growfs_data_t;
+
+typedef struct compat_xfs_growfs_rt {
+       __u64           newblocks;      /* new realtime size, fsblocks */
+       __u32           extsize;        /* new realtime extent size, fsblocks */
+} __attribute__((packed)) compat_xfs_growfs_rt_t;
+
+#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
+#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
+
+#endif /* BROKEN_X86_ALIGNMENT */
+
+#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
new file mode 100644 (file)
index 0000000..b9c172b
--- /dev/null
@@ -0,0 +1,1210 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_acl.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+
+#include <linux/capability.h>
+#include <linux/xattr.h>
+#include <linux/namei.h>
+#include <linux/posix_acl.h>
+#include <linux/security.h>
+#include <linux/fiemap.h>
+#include <linux/slab.h>
+
+/*
+ * Bring the timestamps in the XFS inode uptodate.
+ *
+ * Used before writing the inode to disk.
+ */
+void
+xfs_synchronize_times(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+       ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+       ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
+       ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
+       ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
+       ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
+}
+
+/*
+ * If the linux inode is valid, mark it dirty.
+ * Used when committing a dirty inode into a transaction so that
+ * the inode will get written back by the linux code
+ */
+void
+xfs_mark_inode_dirty_sync(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
+               mark_inode_dirty_sync(inode);
+}
+
+void
+xfs_mark_inode_dirty(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
+               mark_inode_dirty(inode);
+}
+
+/*
+ * Hook in SELinux.  This is not quite correct yet, what we really need
+ * here (as we do for default ACLs) is a mechanism by which creation of
+ * these attrs can be journalled at inode creation time (along with the
+ * inode, of course, such that log replay can't cause these to be lost).
+ */
+STATIC int
+xfs_init_security(
+       struct inode    *inode,
+       struct inode    *dir,
+       const struct qstr *qstr)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       size_t          length;
+       void            *value;
+       unsigned char   *name;
+       int             error;
+
+       error = security_inode_init_security(inode, dir, qstr, (char **)&name,
+                                            &value, &length);
+       if (error) {
+               if (error == -EOPNOTSUPP)
+                       return 0;
+               return -error;
+       }
+
+       error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
+
+       kfree(name);
+       kfree(value);
+       return error;
+}
+
+static void
+xfs_dentry_to_name(
+       struct xfs_name *namep,
+       struct dentry   *dentry)
+{
+       namep->name = dentry->d_name.name;
+       namep->len = dentry->d_name.len;
+}
+
+STATIC void
+xfs_cleanup_inode(
+       struct inode    *dir,
+       struct inode    *inode,
+       struct dentry   *dentry)
+{
+       struct xfs_name teardown;
+
+       /* Oh, the horror.
+        * If we can't add the ACL or we fail in
+        * xfs_init_security we must back out.
+        * ENOSPC can hit here, among other things.
+        */
+       xfs_dentry_to_name(&teardown, dentry);
+
+       xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
+       iput(inode);
+}
+
+STATIC int
+xfs_vn_mknod(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode,
+       dev_t           rdev)
+{
+       struct inode    *inode;
+       struct xfs_inode *ip = NULL;
+       struct posix_acl *default_acl = NULL;
+       struct xfs_name name;
+       int             error;
+
+       /*
+        * Irix uses Missed'em'V split, but doesn't want to see
+        * the upper 5 bits of (14bit) major.
+        */
+       if (S_ISCHR(mode) || S_ISBLK(mode)) {
+               if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
+                       return -EINVAL;
+               rdev = sysv_encode_dev(rdev);
+       } else {
+               rdev = 0;
+       }
+
+       if (IS_POSIXACL(dir)) {
+               default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+               if (IS_ERR(default_acl))
+                       return PTR_ERR(default_acl);
+
+               if (!default_acl)
+                       mode &= ~current_umask();
+       }
+
+       xfs_dentry_to_name(&name, dentry);
+       error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+       if (unlikely(error))
+               goto out_free_acl;
+
+       inode = VFS_I(ip);
+
+       error = xfs_init_security(inode, dir, &dentry->d_name);
+       if (unlikely(error))
+               goto out_cleanup_inode;
+
+       if (default_acl) {
+               error = -xfs_inherit_acl(inode, default_acl);
+               default_acl = NULL;
+               if (unlikely(error))
+                       goto out_cleanup_inode;
+       }
+
+
+       d_instantiate(dentry, inode);
+       return -error;
+
+ out_cleanup_inode:
+       xfs_cleanup_inode(dir, inode, dentry);
+ out_free_acl:
+       posix_acl_release(default_acl);
+       return -error;
+}
+
+STATIC int
+xfs_vn_create(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode,
+       struct nameidata *nd)
+{
+       return xfs_vn_mknod(dir, dentry, mode, 0);
+}
+
+STATIC int
+xfs_vn_mkdir(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode)
+{
+       return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
+}
+
+STATIC struct dentry *
+xfs_vn_lookup(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       struct nameidata *nd)
+{
+       struct xfs_inode *cip;
+       struct xfs_name name;
+       int             error;
+
+       if (dentry->d_name.len >= MAXNAMELEN)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       xfs_dentry_to_name(&name, dentry);
+       error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
+       if (unlikely(error)) {
+               if (unlikely(error != ENOENT))
+                       return ERR_PTR(-error);
+               d_add(dentry, NULL);
+               return NULL;
+       }
+
+       return d_splice_alias(VFS_I(cip), dentry);
+}
+
+STATIC struct dentry *
+xfs_vn_ci_lookup(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       struct nameidata *nd)
+{
+       struct xfs_inode *ip;
+       struct xfs_name xname;
+       struct xfs_name ci_name;
+       struct qstr     dname;
+       int             error;
+
+       if (dentry->d_name.len >= MAXNAMELEN)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       xfs_dentry_to_name(&xname, dentry);
+       error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+       if (unlikely(error)) {
+               if (unlikely(error != ENOENT))
+                       return ERR_PTR(-error);
+               /*
+                * call d_add(dentry, NULL) here when d_drop_negative_children
+                * is called in xfs_vn_mknod (ie. allow negative dentries
+                * with CI filesystems).
+                */
+               return NULL;
+       }
+
+       /* if exact match, just splice and exit */
+       if (!ci_name.name)
+               return d_splice_alias(VFS_I(ip), dentry);
+
+       /* else case-insensitive match... */
+       dname.name = ci_name.name;
+       dname.len = ci_name.len;
+       dentry = d_add_ci(dentry, VFS_I(ip), &dname);
+       kmem_free(ci_name.name);
+       return dentry;
+}
+
+STATIC int
+xfs_vn_link(
+       struct dentry   *old_dentry,
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct inode    *inode = old_dentry->d_inode;
+       struct xfs_name name;
+       int             error;
+
+       xfs_dentry_to_name(&name, dentry);
+
+       error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
+       if (unlikely(error))
+               return -error;
+
+       ihold(inode);
+       d_instantiate(dentry, inode);
+       return 0;
+}
+
+STATIC int
+xfs_vn_unlink(
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct xfs_name name;
+       int             error;
+
+       xfs_dentry_to_name(&name, dentry);
+
+       error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+       if (error)
+               return error;
+
+       /*
+        * With unlink, the VFS makes the dentry "negative": no inode,
+        * but still hashed. This is incompatible with case-insensitive
+        * mode, so invalidate (unhash) the dentry in CI-mode.
+        */
+       if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+               d_invalidate(dentry);
+       return 0;
+}
+
+STATIC int
+xfs_vn_symlink(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       const char      *symname)
+{
+       struct inode    *inode;
+       struct xfs_inode *cip = NULL;
+       struct xfs_name name;
+       int             error;
+       mode_t          mode;
+
+       mode = S_IFLNK |
+               (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
+       xfs_dentry_to_name(&name, dentry);
+
+       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
+       if (unlikely(error))
+               goto out;
+
+       inode = VFS_I(cip);
+
+       error = xfs_init_security(inode, dir, &dentry->d_name);
+       if (unlikely(error))
+               goto out_cleanup_inode;
+
+       d_instantiate(dentry, inode);
+       return 0;
+
+ out_cleanup_inode:
+       xfs_cleanup_inode(dir, inode, dentry);
+ out:
+       return -error;
+}
+
+STATIC int
+xfs_vn_rename(
+       struct inode    *odir,
+       struct dentry   *odentry,
+       struct inode    *ndir,
+       struct dentry   *ndentry)
+{
+       struct inode    *new_inode = ndentry->d_inode;
+       struct xfs_name oname;
+       struct xfs_name nname;
+
+       xfs_dentry_to_name(&oname, odentry);
+       xfs_dentry_to_name(&nname, ndentry);
+
+       return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+                          XFS_I(ndir), &nname, new_inode ?
+                                               XFS_I(new_inode) : NULL);
+}
+
+/*
+ * careful here - this function can get called recursively, so
+ * we need to be very careful about how much stack we use.
+ * uio is kmalloced for this reason...
+ */
+STATIC void *
+xfs_vn_follow_link(
+       struct dentry           *dentry,
+       struct nameidata        *nd)
+{
+       char                    *link;
+       int                     error = -ENOMEM;
+
+       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+       if (!link)
+               goto out_err;
+
+       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+       if (unlikely(error))
+               goto out_kfree;
+
+       nd_set_link(nd, link);
+       return NULL;
+
+ out_kfree:
+       kfree(link);
+ out_err:
+       nd_set_link(nd, ERR_PTR(error));
+       return NULL;
+}
+
+STATIC void
+xfs_vn_put_link(
+       struct dentry   *dentry,
+       struct nameidata *nd,
+       void            *p)
+{
+       char            *s = nd_get_link(nd);
+
+       if (!IS_ERR(s))
+               kfree(s);
+}
+
+STATIC int
+xfs_vn_getattr(
+       struct vfsmount         *mnt,
+       struct dentry           *dentry,
+       struct kstat            *stat)
+{
+       struct inode            *inode = dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+
+       trace_xfs_getattr(ip);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       stat->size = XFS_ISIZE(ip);
+       stat->dev = inode->i_sb->s_dev;
+       stat->mode = ip->i_d.di_mode;
+       stat->nlink = ip->i_d.di_nlink;
+       stat->uid = ip->i_d.di_uid;
+       stat->gid = ip->i_d.di_gid;
+       stat->ino = ip->i_ino;
+       stat->atime = inode->i_atime;
+       stat->mtime = inode->i_mtime;
+       stat->ctime = inode->i_ctime;
+       stat->blocks =
+               XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFBLK:
+       case S_IFCHR:
+               stat->blksize = BLKDEV_IOSIZE;
+               stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+                                  sysv_minor(ip->i_df.if_u2.if_rdev));
+               break;
+       default:
+               if (XFS_IS_REALTIME_INODE(ip)) {
+                       /*
+                        * If the file blocks are being allocated from a
+                        * realtime volume, then return the inode's realtime
+                        * extent size or the realtime volume's extent size.
+                        */
+                       stat->blksize =
+                               xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
+               } else
+                       stat->blksize = xfs_preferred_iosize(mp);
+               stat->rdev = 0;
+               break;
+       }
+
+       return 0;
+}
+
+int
+xfs_setattr_nonsize(
+       struct xfs_inode        *ip,
+       struct iattr            *iattr,
+       int                     flags)
+{
+       xfs_mount_t             *mp = ip->i_mount;
+       struct inode            *inode = VFS_I(ip);
+       int                     mask = iattr->ia_valid;
+       xfs_trans_t             *tp;
+       int                     error;
+       uid_t                   uid = 0, iuid = 0;
+       gid_t                   gid = 0, igid = 0;
+       struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
+       struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
+
+       trace_xfs_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       error = -inode_change_ok(inode, iattr);
+       if (error)
+               return XFS_ERROR(error);
+
+       ASSERT((mask & ATTR_SIZE) == 0);
+
+       /*
+        * If disk quotas is on, we make sure that the dquots do exist on disk,
+        * before we start any other transactions. Trying to do this later
+        * is messy. We don't care to take a readlock to look at the ids
+        * in inode here, because we can't hold it across the trans_reserve.
+        * If the IDs do change before we take the ilock, we're covered
+        * because the i_*dquot fields will get updated anyway.
+        */
+       if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
+               uint    qflags = 0;
+
+               if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+                       uid = iattr->ia_uid;
+                       qflags |= XFS_QMOPT_UQUOTA;
+               } else {
+                       uid = ip->i_d.di_uid;
+               }
+               if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+                       gid = iattr->ia_gid;
+                       qflags |= XFS_QMOPT_GQUOTA;
+               }  else {
+                       gid = ip->i_d.di_gid;
+               }
+
+               /*
+                * We take a reference when we initialize udqp and gdqp,
+                * so it is important that we never blindly double trip on
+                * the same variable. See xfs_create() for an example.
+                */
+               ASSERT(udqp == NULL);
+               ASSERT(gdqp == NULL);
+               error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
+                                        qflags, &udqp, &gdqp);
+               if (error)
+                       return error;
+       }
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+       if (error)
+               goto out_dqrele;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & (ATTR_UID|ATTR_GID)) {
+               /*
+                * These IDs could have changed since we last looked at them.
+                * But, we're assured that if the ownership did change
+                * while we didn't have the inode locked, inode's dquot(s)
+                * would have changed also.
+                */
+               iuid = ip->i_d.di_uid;
+               igid = ip->i_d.di_gid;
+               gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+               uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+
+               /*
+                * Do a quota reservation only if uid/gid is actually
+                * going to change.
+                */
+               if (XFS_IS_QUOTA_RUNNING(mp) &&
+                   ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+                    (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+                       ASSERT(tp);
+                       error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+                                               capable(CAP_FOWNER) ?
+                                               XFS_QMOPT_FORCE_RES : 0);
+                       if (error)      /* out of quota */
+                               goto out_trans_cancel;
+               }
+       }
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & (ATTR_UID|ATTR_GID)) {
+               /*
+                * CAP_FSETID overrides the following restrictions:
+                *
+                * The set-user-ID and set-group-ID bits of a file will be
+                * cleared upon successful return from chown()
+                */
+               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+                   !capable(CAP_FSETID))
+                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+               /*
+                * Change the ownerships and register quota modifications
+                * in the transaction.
+                */
+               if (iuid != uid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+                               ASSERT(mask & ATTR_UID);
+                               ASSERT(udqp);
+                               olddquot1 = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_udquot, udqp);
+                       }
+                       ip->i_d.di_uid = uid;
+                       inode->i_uid = uid;
+               }
+               if (igid != gid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+                               ASSERT(!XFS_IS_PQUOTA_ON(mp));
+                               ASSERT(mask & ATTR_GID);
+                               ASSERT(gdqp);
+                               olddquot2 = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_gdquot, gdqp);
+                       }
+                       ip->i_d.di_gid = gid;
+                       inode->i_gid = gid;
+               }
+       }
+
+       /*
+        * Change file access modes.
+        */
+       if (mask & ATTR_MODE) {
+               umode_t mode = iattr->ia_mode;
+
+               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                       mode &= ~S_ISGID;
+
+               ip->i_d.di_mode &= S_IFMT;
+               ip->i_d.di_mode |= mode & ~S_IFMT;
+
+               inode->i_mode &= S_IFMT;
+               inode->i_mode |= mode & ~S_IFMT;
+       }
+
+       /*
+        * Change file access or modified times.
+        */
+       if (mask & ATTR_ATIME) {
+               inode->i_atime = iattr->ia_atime;
+               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_CTIME) {
+               inode->i_ctime = iattr->ia_ctime;
+               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_MTIME) {
+               inode->i_mtime = iattr->ia_mtime;
+               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Release any dquot(s) the inode had kept before chown.
+        */
+       xfs_qm_dqrele(olddquot1);
+       xfs_qm_dqrele(olddquot2);
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+
+       if (error)
+               return XFS_ERROR(error);
+
+       /*
+        * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+        *           update.  We could avoid this with linked transactions
+        *           and passing down the transaction pointer all the way
+        *           to attr_set.  No previous user of the generic
+        *           Posix ACL code seems to care about this issue either.
+        */
+       if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+               error = -xfs_acl_chmod(inode);
+               if (error)
+                       return XFS_ERROR(error);
+       }
+
+       return 0;
+
+out_trans_cancel:
+       xfs_trans_cancel(tp, 0);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_dqrele:
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       return error;
+}
+
+/*
+ * Truncate file.  Must have write permission and not be a directory.
+ */
+int
+xfs_setattr_size(
+       struct xfs_inode        *ip,
+       struct iattr            *iattr,
+       int                     flags)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct inode            *inode = VFS_I(ip);
+       int                     mask = iattr->ia_valid;
+       struct xfs_trans        *tp;
+       int                     error;
+       uint                    lock_flags;
+       uint                    commit_flags = 0;
+
+       trace_xfs_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       error = -inode_change_ok(inode, iattr);
+       if (error)
+               return XFS_ERROR(error);
+
+       ASSERT(S_ISREG(ip->i_d.di_mode));
+       ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+                       ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+                       ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+
+       lock_flags = XFS_ILOCK_EXCL;
+       if (!(flags & XFS_ATTR_NOLOCK))
+               lock_flags |= XFS_IOLOCK_EXCL;
+       xfs_ilock(ip, lock_flags);
+
+       /*
+        * Short circuit the truncate case for zero length files.
+        */
+       if (iattr->ia_size == 0 &&
+           ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+               if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+                       goto out_unlock;
+
+               /*
+                * Use the regular setattr path to update the timestamps.
+                */
+               xfs_iunlock(ip, lock_flags);
+               iattr->ia_valid &= ~ATTR_SIZE;
+               return xfs_setattr_nonsize(ip, iattr, 0);
+       }
+
+       /*
+        * Make sure that the dquots are attached to the inode.
+        */
+       error = xfs_qm_dqattach_locked(ip, 0);
+       if (error)
+               goto out_unlock;
+
+       /*
+        * Now we can make the changes.  Before we join the inode to the
+        * transaction, take care of the part of the truncation that must be
+        * done without the inode lock.  This needs to be done before joining
+        * the inode to the transaction, because the inode cannot be unlocked
+        * once it is a part of the transaction.
+        */
+       if (iattr->ia_size > ip->i_size) {
+               /*
+                * Do the first part of growing a file: zero any data in the
+                * last block that is beyond the old EOF.  We need to do this
+                * before the inode is joined to the transaction to modify
+                * i_size.
+                */
+               error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+               if (error)
+                       goto out_unlock;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       lock_flags &= ~XFS_ILOCK_EXCL;
+
+       /*
+        * We are going to log the inode size change in this transaction so
+        * any previous writes that are beyond the on disk EOF and the new
+        * EOF that have not been written out need to be written here.  If we
+        * do not write the data out, we expose ourselves to the null files
+        * problem.
+        *
+        * Only flush from the on disk size to the smaller of the in memory
+        * file size or the new size as that's the range we really care about
+        * here and prevents waiting for other data not within the range we
+        * care about here.
+        */
+       if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
+               error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
+                                       XBF_ASYNC, FI_NONE);
+               if (error)
+                       goto out_unlock;
+       }
+
+       /*
+        * Wait for all I/O to complete.
+        */
+       xfs_ioend_wait(ip);
+
+       error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+                                    xfs_get_blocks);
+       if (error)
+               goto out_unlock;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                XFS_TRANS_PERM_LOG_RES,
+                                XFS_ITRUNCATE_LOG_COUNT);
+       if (error)
+               goto out_trans_cancel;
+
+       truncate_setsize(inode, iattr->ia_size);
+
+       commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+       lock_flags |= XFS_ILOCK_EXCL;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Only change the c/mtime if we are changing the size or we are
+        * explicitly asked to change it.  This handles the semantic difference
+        * between truncate() and ftruncate() as implemented in the VFS.
+        *
+        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+        * special case where we need to update the times despite not having
+        * these flags set.  For all other operations the VFS set these flags
+        * explicitly if it wants a timestamp update.
+        */
+       if (iattr->ia_size != ip->i_size &&
+           (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+               iattr->ia_ctime = iattr->ia_mtime =
+                       current_fs_time(inode->i_sb);
+               mask |= ATTR_CTIME | ATTR_MTIME;
+       }
+
+       if (iattr->ia_size > ip->i_size) {
+               ip->i_d.di_size = iattr->ia_size;
+               ip->i_size = iattr->ia_size;
+       } else if (iattr->ia_size <= ip->i_size ||
+                  (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
+               error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
+               if (error)
+                       goto out_trans_abort;
+
+               /*
+                * Truncated "down", so we're removing references to old data
+                * here - if we delay flushing for a long time, we expose
+                * ourselves unduly to the notorious NULL files problem.  So,
+                * we mark this inode and flush it when the file is closed,
+                * and do not wait the usual (long) time for writeout.
+                */
+               xfs_iflags_set(ip, XFS_ITRUNCATED);
+       }
+
+       if (mask & ATTR_CTIME) {
+               inode->i_ctime = iattr->ia_ctime;
+               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_MTIME) {
+               inode->i_mtime = iattr->ia_mtime;
+               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+out_unlock:
+       if (lock_flags)
+               xfs_iunlock(ip, lock_flags);
+       return error;
+
+out_trans_abort:
+       commit_flags |= XFS_TRANS_ABORT;
+out_trans_cancel:
+       xfs_trans_cancel(tp, commit_flags);
+       goto out_unlock;
+}
+
+STATIC int
+xfs_vn_setattr(
+       struct dentry   *dentry,
+       struct iattr    *iattr)
+{
+       if (iattr->ia_valid & ATTR_SIZE)
+               return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
+       return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
+}
+
+#define XFS_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+
+/*
+ * Call fiemap helper to fill in user data.
+ * Returns positive errors to xfs_getbmap.
+ */
+STATIC int
+xfs_fiemap_format(
+       void                    **arg,
+       struct getbmapx         *bmv,
+       int                     *full)
+{
+       int                     error;
+       struct fiemap_extent_info *fieinfo = *arg;
+       u32                     fiemap_flags = 0;
+       u64                     logical, physical, length;
+
+       /* Do nothing for a hole */
+       if (bmv->bmv_block == -1LL)
+               return 0;
+
+       logical = BBTOB(bmv->bmv_offset);
+       physical = BBTOB(bmv->bmv_block);
+       length = BBTOB(bmv->bmv_length);
+
+       if (bmv->bmv_oflags & BMV_OF_PREALLOC)
+               fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
+       else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
+               fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
+               physical = 0;   /* no block yet */
+       }
+       if (bmv->bmv_oflags & BMV_OF_LAST)
+               fiemap_flags |= FIEMAP_EXTENT_LAST;
+
+       error = fiemap_fill_next_extent(fieinfo, logical, physical,
+                                       length, fiemap_flags);
+       if (error > 0) {
+               error = 0;
+               *full = 1;      /* user array now full */
+       }
+
+       return -error;
+}
+
+STATIC int
+xfs_vn_fiemap(
+       struct inode            *inode,
+       struct fiemap_extent_info *fieinfo,
+       u64                     start,
+       u64                     length)
+{
+       xfs_inode_t             *ip = XFS_I(inode);
+       struct getbmapx         bm;
+       int                     error;
+
+       error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
+       if (error)
+               return error;
+
+       /* Set up bmap header for xfs internal routine */
+       bm.bmv_offset = BTOBB(start);
+       /* Special case for whole file */
+       if (length == FIEMAP_MAX_OFFSET)
+               bm.bmv_length = -1LL;
+       else
+               bm.bmv_length = BTOBB(length);
+
+       /* We add one because in getbmap world count includes the header */
+       bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
+                                       fieinfo->fi_extents_max + 1;
+       bm.bmv_count = min_t(__s32, bm.bmv_count,
+                            (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
+       bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
+               bm.bmv_iflags |= BMV_IF_ATTRFORK;
+       if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
+               bm.bmv_iflags |= BMV_IF_DELALLOC;
+
+       error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
+       if (error)
+               return -error;
+
+       return 0;
+}
+
+static const struct inode_operations xfs_inode_operations = {
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+       .fiemap                 = xfs_vn_fiemap,
+};
+
+static const struct inode_operations xfs_dir_inode_operations = {
+       .create                 = xfs_vn_create,
+       .lookup                 = xfs_vn_lookup,
+       .link                   = xfs_vn_link,
+       .unlink                 = xfs_vn_unlink,
+       .symlink                = xfs_vn_symlink,
+       .mkdir                  = xfs_vn_mkdir,
+       /*
+        * Yes, XFS uses the same method for rmdir and unlink.
+        *
+        * There are some subtile differences deeper in the code,
+        * but we use S_ISDIR to check for those.
+        */
+       .rmdir                  = xfs_vn_unlink,
+       .mknod                  = xfs_vn_mknod,
+       .rename                 = xfs_vn_rename,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_dir_ci_inode_operations = {
+       .create                 = xfs_vn_create,
+       .lookup                 = xfs_vn_ci_lookup,
+       .link                   = xfs_vn_link,
+       .unlink                 = xfs_vn_unlink,
+       .symlink                = xfs_vn_symlink,
+       .mkdir                  = xfs_vn_mkdir,
+       /*
+        * Yes, XFS uses the same method for rmdir and unlink.
+        *
+        * There are some subtile differences deeper in the code,
+        * but we use S_ISDIR to check for those.
+        */
+       .rmdir                  = xfs_vn_unlink,
+       .mknod                  = xfs_vn_mknod,
+       .rename                 = xfs_vn_rename,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_symlink_inode_operations = {
+       .readlink               = generic_readlink,
+       .follow_link            = xfs_vn_follow_link,
+       .put_link               = xfs_vn_put_link,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+STATIC void
+xfs_diflags_to_iflags(
+       struct inode            *inode,
+       struct xfs_inode        *ip)
+{
+       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+               inode->i_flags |= S_IMMUTABLE;
+       else
+               inode->i_flags &= ~S_IMMUTABLE;
+       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+               inode->i_flags |= S_APPEND;
+       else
+               inode->i_flags &= ~S_APPEND;
+       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+               inode->i_flags |= S_SYNC;
+       else
+               inode->i_flags &= ~S_SYNC;
+       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+               inode->i_flags |= S_NOATIME;
+       else
+               inode->i_flags &= ~S_NOATIME;
+}
+
+/*
+ * Initialize the Linux inode, set up the operation vectors and
+ * unlock the inode.
+ *
+ * When reading existing inodes from disk this is called directly
+ * from xfs_iget, when creating a new inode it is called from
+ * xfs_ialloc after setting up the inode.
+ *
+ * We are always called with an uninitialised linux inode here.
+ * We need to initialise the necessary fields and take a reference
+ * on it.
+ */
+void
+xfs_setup_inode(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = &ip->i_vnode;
+
+       inode->i_ino = ip->i_ino;
+       inode->i_state = I_NEW;
+
+       inode_sb_list_add(inode);
+       /* make the inode look hashed for the writeback code */
+       hlist_add_fake(&inode->i_hash);
+
+       inode->i_mode   = ip->i_d.di_mode;
+       inode->i_nlink  = ip->i_d.di_nlink;
+       inode->i_uid    = ip->i_d.di_uid;
+       inode->i_gid    = ip->i_d.di_gid;
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFBLK:
+       case S_IFCHR:
+               inode->i_rdev =
+                       MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+                             sysv_minor(ip->i_df.if_u2.if_rdev));
+               break;
+       default:
+               inode->i_rdev = 0;
+               break;
+       }
+
+       inode->i_generation = ip->i_d.di_gen;
+       i_size_write(inode, ip->i_d.di_size);
+       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
+       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
+       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
+       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
+       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
+       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
+       xfs_diflags_to_iflags(inode, ip);
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
+               inode->i_op = &xfs_inode_operations;
+               inode->i_fop = &xfs_file_operations;
+               inode->i_mapping->a_ops = &xfs_address_space_operations;
+               break;
+       case S_IFDIR:
+               if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+                       inode->i_op = &xfs_dir_ci_inode_operations;
+               else
+                       inode->i_op = &xfs_dir_inode_operations;
+               inode->i_fop = &xfs_dir_file_operations;
+               break;
+       case S_IFLNK:
+               inode->i_op = &xfs_symlink_inode_operations;
+               if (!(ip->i_df.if_flags & XFS_IFINLINE))
+                       inode->i_mapping->a_ops = &xfs_address_space_operations;
+               break;
+       default:
+               inode->i_op = &xfs_inode_operations;
+               init_special_inode(inode, inode->i_mode, inode->i_rdev);
+               break;
+       }
+
+       /*
+        * If there is no attribute fork no ACL can exist on this inode,
+        * and it can't have any file capabilities attached to it either.
+        */
+       if (!XFS_IFORK_Q(ip)) {
+               inode_has_no_xattr(inode);
+               cache_no_acl(inode);
+       }
+
+       xfs_iflags_clear(ip, XFS_INEW);
+       barrier();
+
+       unlock_new_inode(inode);
+}
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
new file mode 100644 (file)
index 0000000..ef41c92
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOPS_H__
+#define __XFS_IOPS_H__
+
+struct xfs_inode;
+
+extern const struct file_operations xfs_file_operations;
+extern const struct file_operations xfs_dir_file_operations;
+
+extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
+
+extern void xfs_setup_inode(struct xfs_inode *);
+
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
new file mode 100644 (file)
index 0000000..1e8a45e
--- /dev/null
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_LINUX__
+#define __XFS_LINUX__
+
+#include <linux/types.h>
+
+/*
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS        1
+# define XFS_BIG_INUMS 1
+#else
+# define XFS_BIG_BLKNOS        0
+# define XFS_BIG_INUMS 0
+#endif
+
+#include "xfs_types.h"
+
+#include "kmem.h"
+#include "mrlock.h"
+#include "time.h"
+#include "uuid.h"
+
+#include <linux/semaphore.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/pagemap.h>
+#include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/sort.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/ctype.h>
+#include <linux/writeback.h>
+#include <linux/capability.h>
+#include <linux/list_sort.h>
+
+#include <asm/page.h>
+#include <asm/div64.h>
+#include <asm/param.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include "xfs_vnode.h"
+#include "xfs_stats.h"
+#include "xfs_sysctl.h"
+#include "xfs_iops.h"
+#include "xfs_aops.h"
+#include "xfs_super.h"
+#include "xfs_buf.h"
+#include "xfs_message.h"
+
+#ifdef __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
+/*
+ * Feature macros (disable/enable)
+ */
+#ifdef CONFIG_SMP
+#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
+#else
+#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
+#endif
+
+#define irix_sgid_inherit      xfs_params.sgid_inherit.val
+#define irix_symlink_mode      xfs_params.symlink_mode.val
+#define xfs_panic_mask         xfs_params.panic_mask.val
+#define xfs_error_level                xfs_params.error_level.val
+#define xfs_syncd_centisecs    xfs_params.syncd_timer.val
+#define xfs_stats_clear                xfs_params.stats_clear.val
+#define xfs_inherit_sync       xfs_params.inherit_sync.val
+#define xfs_inherit_nodump     xfs_params.inherit_nodump.val
+#define xfs_inherit_noatime    xfs_params.inherit_noatim.val
+#define xfs_buf_timer_centisecs        xfs_params.xfs_buf_timer.val
+#define xfs_buf_age_centisecs  xfs_params.xfs_buf_age.val
+#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
+#define xfs_rotorstep          xfs_params.rotorstep.val
+#define xfs_inherit_nodefrag   xfs_params.inherit_nodfrg.val
+#define xfs_fstrm_centisecs    xfs_params.fstrm_timer.val
+
+#define current_cpu()          (raw_smp_processor_id())
+#define current_pid()          (current->pid)
+#define current_test_flags(f)  (current->flags & (f))
+#define current_set_flags_nested(sp, f)                \
+               (*(sp) = current->flags, current->flags |= (f))
+#define current_clear_flags_nested(sp, f)      \
+               (*(sp) = current->flags, current->flags &= ~(f))
+#define current_restore_flags_nested(sp, f)    \
+               (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
+
+#define spinlock_destroy(lock)
+
+#define NBBY           8               /* number of bits per byte */
+
+/*
+ * Size of block device i/o is parameterized here.
+ * Currently the system supports page-sized i/o.
+ */
+#define        BLKDEV_IOSHIFT          PAGE_CACHE_SHIFT
+#define        BLKDEV_IOSIZE           (1<<BLKDEV_IOSHIFT)
+/* number of BB's per block device block */
+#define        BLKDEV_BB               BTOBB(BLKDEV_IOSIZE)
+
+#define ENOATTR                ENODATA         /* Attribute not found */
+#define EWRONGFS       EINVAL          /* Mount with wrong filesystem type */
+#define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
+
+#define SYNCHRONIZE()  barrier()
+#define __return_address __builtin_return_address(0)
+
+#define XFS_PROJID_DEFAULT     0
+#define MAXPATHLEN     1024
+
+#define MIN(a,b)       (min(a,b))
+#define MAX(a,b)       (max(a,b))
+#define howmany(x, y)  (((x)+((y)-1))/(y))
+
+/*
+ * Various platform dependent calls that don't fit anywhere else
+ */
+#define xfs_sort(a,n,s,fn)     sort(a,n,s,fn,NULL)
+#define xfs_stack_trace()      dump_stack()
+
+
+/* Move the kernel do_div definition off to one side */
+
+#if defined __i386__
+/* For ia32 we need to pull some tricks to get past various versions
+ * of the compiler which do not like us using do_div in the middle
+ * of large functions.
+ */
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+       __u32   mod;
+
+       switch (n) {
+               case 4:
+                       mod = *(__u32 *)a % b;
+                       *(__u32 *)a = *(__u32 *)a / b;
+                       return mod;
+               case 8:
+                       {
+                       unsigned long __upper, __low, __high, __mod;
+                       __u64   c = *(__u64 *)a;
+                       __upper = __high = c >> 32;
+                       __low = c;
+                       if (__high) {
+                               __upper = __high % (b);
+                               __high = __high / (b);
+                       }
+                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                       asm("":"=A" (c):"a" (__low),"d" (__high));
+                       *(__u64 *)a = c;
+                       return __mod;
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+       switch (n) {
+               case 4:
+                       return *(__u32 *)a % b;
+               case 8:
+                       {
+                       unsigned long __upper, __low, __high, __mod;
+                       __u64   c = *(__u64 *)a;
+                       __upper = __high = c >> 32;
+                       __low = c;
+                       if (__high) {
+                               __upper = __high % (b);
+                               __high = __high / (b);
+                       }
+                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                       asm("":"=A" (c):"a" (__low),"d" (__high));
+                       return __mod;
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+#else
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+       __u32   mod;
+
+       switch (n) {
+               case 4:
+                       mod = *(__u32 *)a % b;
+                       *(__u32 *)a = *(__u32 *)a / b;
+                       return mod;
+               case 8:
+                       mod = do_div(*(__u64 *)a, b);
+                       return mod;
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+       switch (n) {
+               case 4:
+                       return *(__u32 *)a % b;
+               case 8:
+                       {
+                       __u64   c = *(__u64 *)a;
+                       return do_div(c, b);
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+#endif
+
+#undef do_div
+#define do_div(a, b)   xfs_do_div(&(a), (b), sizeof(a))
+#define do_mod(a, b)   xfs_do_mod(&(a), (b), sizeof(a))
+
+static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+{
+       x += y - 1;
+       do_div(x, y);
+       return(x * y);
+}
+
+static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+{
+       x += y - 1;
+       do_div(x, y);
+       return x;
+}
+
+/* ARM old ABI has some weird alignment/padding */
+#if defined(__arm__) && !defined(__ARM_EABI__)
+#define __arch_pack __attribute__((packed))
+#else
+#define __arch_pack
+#endif
+
+#define ASSERT_ALWAYS(expr)    \
+       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+#ifndef DEBUG
+#define ASSERT(expr)   ((void)0)
+
+#ifndef STATIC
+# define STATIC static noinline
+#endif
+
+#else /* DEBUG */
+
+#define ASSERT(expr)   \
+       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+#ifndef STATIC
+# define STATIC noinline
+#endif
+
+#endif /* DEBUG */
+
+#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
new file mode 100644 (file)
index 0000000..bd672de
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2011 Red Hat, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+
+/*
+ * XFS logging functions
+ */
+static void
+__xfs_printk(
+       const char              *level,
+       const struct xfs_mount  *mp,
+       struct va_format        *vaf)
+{
+       if (mp && mp->m_fsname) {
+               printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
+               return;
+       }
+       printk("%sXFS: %pV\n", level, vaf);
+}
+
+#define define_xfs_printk_level(func, kern_level)              \
+void func(const struct xfs_mount *mp, const char *fmt, ...)    \
+{                                                              \
+       struct va_format        vaf;                            \
+       va_list                 args;                           \
+                                                               \
+       va_start(args, fmt);                                    \
+                                                               \
+       vaf.fmt = fmt;                                          \
+       vaf.va = &args;                                         \
+                                                               \
+       __xfs_printk(kern_level, mp, &vaf);                     \
+       va_end(args);                                           \
+}                                                              \
+
+define_xfs_printk_level(xfs_emerg, KERN_EMERG);
+define_xfs_printk_level(xfs_alert, KERN_ALERT);
+define_xfs_printk_level(xfs_crit, KERN_CRIT);
+define_xfs_printk_level(xfs_err, KERN_ERR);
+define_xfs_printk_level(xfs_warn, KERN_WARNING);
+define_xfs_printk_level(xfs_notice, KERN_NOTICE);
+define_xfs_printk_level(xfs_info, KERN_INFO);
+#ifdef DEBUG
+define_xfs_printk_level(xfs_debug, KERN_DEBUG);
+#endif
+
+void
+xfs_alert_tag(
+       const struct xfs_mount  *mp,
+       int                     panic_tag,
+       const char              *fmt, ...)
+{
+       struct va_format        vaf;
+       va_list                 args;
+       int                     do_panic = 0;
+
+       if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
+               xfs_alert(mp, "Transforming an alert into a BUG.");
+               do_panic = 1;
+       }
+
+       va_start(args, fmt);
+
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       __xfs_printk(KERN_ALERT, mp, &vaf);
+       va_end(args);
+
+       BUG_ON(do_panic);
+}
+
+void
+assfail(char *expr, char *file, int line)
+{
+       xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
+               expr, file, line);
+       BUG();
+}
+
+void
+xfs_hex_dump(void *p, int length)
+{
+       print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
+}
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
new file mode 100644 (file)
index 0000000..7fb7ea0
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef __XFS_MESSAGE_H
+#define __XFS_MESSAGE_H 1
+
+struct xfs_mount;
+
+extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
+                        const char *fmt, ...)
+        __attribute__ ((format (printf, 3, 4)));
+extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+
+#ifdef DEBUG
+extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+#else
+static inline void
+__attribute__ ((format (printf, 2, 3)))
+xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+{
+}
+#endif
+
+extern void assfail(char *expr, char *f, int l);
+
+extern void xfs_hex_dump(void *p, int length);
+
+#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
new file mode 100644 (file)
index 0000000..9a0aa76
--- /dev/null
@@ -0,0 +1,2416 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_utils.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+/*
+ * The global quota manager. There is only one of these for the entire
+ * system, _not_ one per file system. XQM keeps track of the overall
+ * quota functionality, including maintaining the freelist and hash
+ * tables of dquots.
+ */
+struct mutex   xfs_Gqm_lock;
+struct xfs_qm  *xfs_Gqm;
+uint           ndquot;
+
+kmem_zone_t    *qm_dqzone;
+kmem_zone_t    *qm_dqtrxzone;
+
+STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
+STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
+
+STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
+STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
+STATIC int     xfs_qm_shake(struct shrinker *, struct shrink_control *);
+
+static struct shrinker xfs_qm_shaker = {
+       .shrink = xfs_qm_shake,
+       .seeks = DEFAULT_SEEKS,
+};
+
+/*
+ * Initialize the XQM structure.
+ * Note that there is not one quota manager per file system.
+ */
+STATIC struct xfs_qm *
+xfs_Gqm_init(void)
+{
+       xfs_dqhash_t    *udqhash, *gdqhash;
+       xfs_qm_t        *xqm;
+       size_t          hsize;
+       uint            i;
+
+       /*
+        * Initialize the dquot hash tables.
+        */
+       udqhash = kmem_zalloc_greedy(&hsize,
+                                    XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
+                                    XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
+       if (!udqhash)
+               goto out;
+
+       gdqhash = kmem_zalloc_large(hsize);
+       if (!gdqhash)
+               goto out_free_udqhash;
+
+       hsize /= sizeof(xfs_dqhash_t);
+       ndquot = hsize << 8;
+
+       xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
+       xqm->qm_dqhashmask = hsize - 1;
+       xqm->qm_usr_dqhtable = udqhash;
+       xqm->qm_grp_dqhtable = gdqhash;
+       ASSERT(xqm->qm_usr_dqhtable != NULL);
+       ASSERT(xqm->qm_grp_dqhtable != NULL);
+
+       for (i = 0; i < hsize; i++) {
+               xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
+               xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
+       }
+
+       /*
+        * Freelist of all dquots of all file systems
+        */
+       INIT_LIST_HEAD(&xqm->qm_dqfrlist);
+       xqm->qm_dqfrlist_cnt = 0;
+       mutex_init(&xqm->qm_dqfrlist_lock);
+
+       /*
+        * dquot zone. we register our own low-memory callback.
+        */
+       if (!qm_dqzone) {
+               xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
+                                               "xfs_dquots");
+               qm_dqzone = xqm->qm_dqzone;
+       } else
+               xqm->qm_dqzone = qm_dqzone;
+
+       register_shrinker(&xfs_qm_shaker);
+
+       /*
+        * The t_dqinfo portion of transactions.
+        */
+       if (!qm_dqtrxzone) {
+               xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
+                                                  "xfs_dqtrx");
+               qm_dqtrxzone = xqm->qm_dqtrxzone;
+       } else
+               xqm->qm_dqtrxzone = qm_dqtrxzone;
+
+       atomic_set(&xqm->qm_totaldquots, 0);
+       xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
+       xqm->qm_nrefs = 0;
+       return xqm;
+
+ out_free_udqhash:
+       kmem_free_large(udqhash);
+ out:
+       return NULL;
+}
+
+/*
+ * Destroy the global quota manager when its reference count goes to zero.
+ */
+STATIC void
+xfs_qm_destroy(
+       struct xfs_qm   *xqm)
+{
+       struct xfs_dquot *dqp, *n;
+       int             hsize, i;
+
+       ASSERT(xqm != NULL);
+       ASSERT(xqm->qm_nrefs == 0);
+       unregister_shrinker(&xfs_qm_shaker);
+       hsize = xqm->qm_dqhashmask + 1;
+       for (i = 0; i < hsize; i++) {
+               xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
+               xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
+       }
+       kmem_free_large(xqm->qm_usr_dqhtable);
+       kmem_free_large(xqm->qm_grp_dqhtable);
+       xqm->qm_usr_dqhtable = NULL;
+       xqm->qm_grp_dqhtable = NULL;
+       xqm->qm_dqhashmask = 0;
+
+       /* frlist cleanup */
+       mutex_lock(&xqm->qm_dqfrlist_lock);
+       list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
+               xfs_dqlock(dqp);
+               list_del_init(&dqp->q_freelist);
+               xfs_Gqm->qm_dqfrlist_cnt--;
+               xfs_dqunlock(dqp);
+               xfs_qm_dqdestroy(dqp);
+       }
+       mutex_unlock(&xqm->qm_dqfrlist_lock);
+       mutex_destroy(&xqm->qm_dqfrlist_lock);
+       kmem_free(xqm);
+}
+
+/*
+ * Called at mount time to let XQM know that another file system is
+ * starting quotas. This isn't crucial information as the individual mount
+ * structures are pretty independent, but it helps the XQM keep a
+ * global view of what's going on.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_hold_quotafs_ref(
+       struct xfs_mount *mp)
+{
+       /*
+        * Need to lock the xfs_Gqm structure for things like this. For example,
+        * the structure could disappear between the entry to this routine and
+        * a HOLD operation if not locked.
+        */
+       mutex_lock(&xfs_Gqm_lock);
+
+       if (!xfs_Gqm) {
+               xfs_Gqm = xfs_Gqm_init();
+               if (!xfs_Gqm) {
+                       mutex_unlock(&xfs_Gqm_lock);
+                       return ENOMEM;
+               }
+       }
+
+       /*
+        * We can keep a list of all filesystems with quotas mounted for
+        * debugging and statistical purposes, but ...
+        * Just take a reference and get out.
+        */
+       xfs_Gqm->qm_nrefs++;
+       mutex_unlock(&xfs_Gqm_lock);
+
+       return 0;
+}
+
+
+/*
+ * Release the reference that a filesystem took at mount time,
+ * so that we know when we need to destroy the entire quota manager.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_rele_quotafs_ref(
+       struct xfs_mount *mp)
+{
+       xfs_dquot_t     *dqp, *n;
+
+       ASSERT(xfs_Gqm);
+       ASSERT(xfs_Gqm->qm_nrefs > 0);
+
+       /*
+        * Go thru the freelist and destroy all inactive dquots.
+        */
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+               xfs_dqlock(dqp);
+               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+                       ASSERT(dqp->q_mount == NULL);
+                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+                       ASSERT(list_empty(&dqp->q_hashlist));
+                       ASSERT(list_empty(&dqp->q_mplist));
+                       list_del_init(&dqp->q_freelist);
+                       xfs_Gqm->qm_dqfrlist_cnt--;
+                       xfs_dqunlock(dqp);
+                       xfs_qm_dqdestroy(dqp);
+               } else {
+                       xfs_dqunlock(dqp);
+               }
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       /*
+        * Destroy the entire XQM. If somebody mounts with quotaon, this'll
+        * be restarted.
+        */
+       mutex_lock(&xfs_Gqm_lock);
+       if (--xfs_Gqm->qm_nrefs == 0) {
+               xfs_qm_destroy(xfs_Gqm);
+               xfs_Gqm = NULL;
+       }
+       mutex_unlock(&xfs_Gqm_lock);
+}
+
+/*
+ * Just destroy the quotainfo structure.
+ */
+void
+xfs_qm_unmount(
+       struct xfs_mount        *mp)
+{
+       if (mp->m_quotainfo) {
+               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+               xfs_qm_destroy_quotainfo(mp);
+       }
+}
+
+
+/*
+ * This is called from xfs_mountfs to start quotas and initialize all
+ * necessary data structures like quotainfo.  This is also responsible for
+ * running a quotacheck as necessary.  We are guaranteed that the superblock
+ * is consistently read in at this point.
+ *
+ * If we fail here, the mount will continue with quota turned off. We don't
+ * need to inidicate success or failure at all.
+ */
+void
+xfs_qm_mount_quotas(
+       xfs_mount_t     *mp)
+{
+       int             error = 0;
+       uint            sbf;
+
+       /*
+        * If quotas on realtime volumes is not supported, we disable
+        * quotas immediately.
+        */
+       if (mp->m_sb.sb_rextents) {
+               xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
+               mp->m_qflags = 0;
+               goto write_changes;
+       }
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * Allocate the quotainfo structure inside the mount struct, and
+        * create quotainode(s), and change/rev superblock if necessary.
+        */
+       error = xfs_qm_init_quotainfo(mp);
+       if (error) {
+               /*
+                * We must turn off quotas.
+                */
+               ASSERT(mp->m_quotainfo == NULL);
+               mp->m_qflags = 0;
+               goto write_changes;
+       }
+       /*
+        * If any of the quotas are not consistent, do a quotacheck.
+        */
+       if (XFS_QM_NEED_QUOTACHECK(mp)) {
+               error = xfs_qm_quotacheck(mp);
+               if (error) {
+                       /* Quotacheck failed and disabled quotas. */
+                       return;
+               }
+       }
+       /* 
+        * If one type of quotas is off, then it will lose its
+        * quotachecked status, since we won't be doing accounting for
+        * that type anymore.
+        */
+       if (!XFS_IS_UQUOTA_ON(mp))
+               mp->m_qflags &= ~XFS_UQUOTA_CHKD;
+       if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
+               mp->m_qflags &= ~XFS_OQUOTA_CHKD;
+
+ write_changes:
+       /*
+        * We actually don't have to acquire the m_sb_lock at all.
+        * This can only be called from mount, and that's single threaded. XXX
+        */
+       spin_lock(&mp->m_sb_lock);
+       sbf = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
+       spin_unlock(&mp->m_sb_lock);
+
+       if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
+               if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
+                       /*
+                        * We could only have been turning quotas off.
+                        * We aren't in very good shape actually because
+                        * the incore structures are convinced that quotas are
+                        * off, but the on disk superblock doesn't know that !
+                        */
+                       ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+                       xfs_alert(mp, "%s: Superblock update failed!",
+                               __func__);
+               }
+       }
+
+       if (error) {
+               xfs_warn(mp, "Failed to initialize disk quotas.");
+               return;
+       }
+}
+
+/*
+ * Called from the vfsops layer.
+ */
+void
+xfs_qm_unmount_quotas(
+       xfs_mount_t     *mp)
+{
+       /*
+        * Release the dquots that root inode, et al might be holding,
+        * before we flush quotas and blow away the quotainfo structure.
+        */
+       ASSERT(mp->m_rootip);
+       xfs_qm_dqdetach(mp->m_rootip);
+       if (mp->m_rbmip)
+               xfs_qm_dqdetach(mp->m_rbmip);
+       if (mp->m_rsumip)
+               xfs_qm_dqdetach(mp->m_rsumip);
+
+       /*
+        * Release the quota inodes.
+        */
+       if (mp->m_quotainfo) {
+               if (mp->m_quotainfo->qi_uquotaip) {
+                       IRELE(mp->m_quotainfo->qi_uquotaip);
+                       mp->m_quotainfo->qi_uquotaip = NULL;
+               }
+               if (mp->m_quotainfo->qi_gquotaip) {
+                       IRELE(mp->m_quotainfo->qi_gquotaip);
+                       mp->m_quotainfo->qi_gquotaip = NULL;
+               }
+       }
+}
+
+/*
+ * Flush all dquots of the given file system to disk. The dquots are
+ * _not_ purged from memory here, just their data written to disk.
+ */
+STATIC int
+xfs_qm_dqflush_all(
+       struct xfs_mount        *mp,
+       int                     sync_mode)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       int                     recl;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       if (!q)
+               return 0;
+again:
+       mutex_lock(&q->qi_dqlist_lock);
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               xfs_dqlock(dqp);
+               if (! XFS_DQ_IS_DIRTY(dqp)) {
+                       xfs_dqunlock(dqp);
+                       continue;
+               }
+
+               /* XXX a sentinel would be better */
+               recl = q->qi_dqreclaims;
+               if (!xfs_dqflock_nowait(dqp)) {
+                       /*
+                        * If we can't grab the flush lock then check
+                        * to see if the dquot has been flushed delayed
+                        * write.  If so, grab its buffer and send it
+                        * out immediately.  We'll be able to acquire
+                        * the flush lock when the I/O completes.
+                        */
+                       xfs_qm_dqflock_pushbuf_wait(dqp);
+               }
+               /*
+                * Let go of the mplist lock. We don't want to hold it
+                * across a disk write.
+                */
+               mutex_unlock(&q->qi_dqlist_lock);
+               error = xfs_qm_dqflush(dqp, sync_mode);
+               xfs_dqunlock(dqp);
+               if (error)
+                       return error;
+
+               mutex_lock(&q->qi_dqlist_lock);
+               if (recl != q->qi_dqreclaims) {
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       /* XXX restart limit */
+                       goto again;
+               }
+       }
+
+       mutex_unlock(&q->qi_dqlist_lock);
+       /* return ! busy */
+       return 0;
+}
+/*
+ * Release the group dquot pointers the user dquots may be
+ * carrying around as a hint. mplist is locked on entry and exit.
+ */
+STATIC void
+xfs_qm_detach_gdquots(
+       struct xfs_mount        *mp)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_dquot        *dqp, *gdqp;
+       int                     nrecl;
+
+ again:
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               xfs_dqlock(dqp);
+               if ((gdqp = dqp->q_gdquot)) {
+                       xfs_dqlock(gdqp);
+                       dqp->q_gdquot = NULL;
+               }
+               xfs_dqunlock(dqp);
+
+               if (gdqp) {
+                       /*
+                        * Can't hold the mplist lock across a dqput.
+                        * XXXmust convert to marker based iterations here.
+                        */
+                       nrecl = q->qi_dqreclaims;
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       xfs_qm_dqput(gdqp);
+
+                       mutex_lock(&q->qi_dqlist_lock);
+                       if (nrecl != q->qi_dqreclaims)
+                               goto again;
+               }
+       }
+}
+
+/*
+ * Go through all the incore dquots of this file system and take them
+ * off the mplist and hashlist, if the dquot type matches the dqtype
+ * parameter. This is used when turning off quota accounting for
+ * users and/or groups, as well as when the filesystem is unmounting.
+ */
+STATIC int
+xfs_qm_dqpurge_int(
+       struct xfs_mount        *mp,
+       uint                    flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_dquot        *dqp, *n;
+       uint                    dqtype;
+       int                     nrecl;
+       int                     nmisses;
+
+       if (!q)
+               return 0;
+
+       dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
+       dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
+       dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
+
+       mutex_lock(&q->qi_dqlist_lock);
+
+       /*
+        * In the first pass through all incore dquots of this filesystem,
+        * we release the group dquot pointers the user dquots may be
+        * carrying around as a hint. We need to do this irrespective of
+        * what's being turned off.
+        */
+       xfs_qm_detach_gdquots(mp);
+
+      again:
+       nmisses = 0;
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       /*
+        * Try to get rid of all of the unwanted dquots. The idea is to
+        * get them off mplist and hashlist, but leave them on freelist.
+        */
+       list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
+               /*
+                * It's OK to look at the type without taking dqlock here.
+                * We're holding the mplist lock here, and that's needed for
+                * a dqreclaim.
+                */
+               if ((dqp->dq_flags & dqtype) == 0)
+                       continue;
+
+               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+                       nrecl = q->qi_dqreclaims;
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       mutex_lock(&dqp->q_hash->qh_lock);
+                       mutex_lock(&q->qi_dqlist_lock);
+
+                       /*
+                        * XXXTheoretically, we can get into a very long
+                        * ping pong game here.
+                        * No one can be adding dquots to the mplist at
+                        * this point, but somebody might be taking things off.
+                        */
+                       if (nrecl != q->qi_dqreclaims) {
+                               mutex_unlock(&dqp->q_hash->qh_lock);
+                               goto again;
+                       }
+               }
+
+               /*
+                * Take the dquot off the mplist and hashlist. It may remain on
+                * freelist in INACTIVE state.
+                */
+               nmisses += xfs_qm_dqpurge(dqp);
+       }
+       mutex_unlock(&q->qi_dqlist_lock);
+       return nmisses;
+}
+
+int
+xfs_qm_dqpurge_all(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             ndquots;
+
+       /*
+        * Purge the dquot cache.
+        * None of the dquots should really be busy at this point.
+        */
+       if (mp->m_quotainfo) {
+               while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
+                       delay(ndquots * 10);
+               }
+       }
+       return 0;
+}
+
+STATIC int
+xfs_qm_dqattach_one(
+       xfs_inode_t     *ip,
+       xfs_dqid_t      id,
+       uint            type,
+       uint            doalloc,
+       xfs_dquot_t     *udqhint, /* hint */
+       xfs_dquot_t     **IO_idqpp)
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       error = 0;
+
+       /*
+        * See if we already have it in the inode itself. IO_idqpp is
+        * &i_udquot or &i_gdquot. This made the code look weird, but
+        * made the logic a lot simpler.
+        */
+       dqp = *IO_idqpp;
+       if (dqp) {
+               trace_xfs_dqattach_found(dqp);
+               return 0;
+       }
+
+       /*
+        * udqhint is the i_udquot field in inode, and is non-NULL only
+        * when the type arg is group/project. Its purpose is to save a
+        * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
+        * the user dquot.
+        */
+       if (udqhint) {
+               ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
+               xfs_dqlock(udqhint);
+
+               /*
+                * No need to take dqlock to look at the id.
+                *
+                * The ID can't change until it gets reclaimed, and it won't
+                * be reclaimed as long as we have a ref from inode and we
+                * hold the ilock.
+                */
+               dqp = udqhint->q_gdquot;
+               if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
+                       xfs_dqlock(dqp);
+                       XFS_DQHOLD(dqp);
+                       ASSERT(*IO_idqpp == NULL);
+                       *IO_idqpp = dqp;
+
+                       xfs_dqunlock(dqp);
+                       xfs_dqunlock(udqhint);
+                       return 0;
+               }
+
+               /*
+                * We can't hold a dquot lock when we call the dqget code.
+                * We'll deadlock in no time, because of (not conforming to)
+                * lock ordering - the inodelock comes before any dquot lock,
+                * and we may drop and reacquire the ilock in xfs_qm_dqget().
+                */
+               xfs_dqunlock(udqhint);
+       }
+
+       /*
+        * Find the dquot from somewhere. This bumps the
+        * reference count of dquot and returns it locked.
+        * This can return ENOENT if dquot didn't exist on
+        * disk and we didn't ask it to allocate;
+        * ESRCH if quotas got turned off suddenly.
+        */
+       error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+       if (error)
+               return error;
+
+       trace_xfs_dqattach_get(dqp);
+
+       /*
+        * dqget may have dropped and re-acquired the ilock, but it guarantees
+        * that the dquot returned is the one that should go in the inode.
+        */
+       *IO_idqpp = dqp;
+       xfs_dqunlock(dqp);
+       return 0;
+}
+
+
+/*
+ * Given a udquot and gdquot, attach a ptr to the group dquot in the
+ * udquot as a hint for future lookups. The idea sounds simple, but the
+ * execution isn't, because the udquot might have a group dquot attached
+ * already and getting rid of that gets us into lock ordering constraints.
+ * The process is complicated more by the fact that the dquots may or may not
+ * be locked on entry.
+ */
+STATIC void
+xfs_qm_dqattach_grouphint(
+       xfs_dquot_t     *udq,
+       xfs_dquot_t     *gdq)
+{
+       xfs_dquot_t     *tmp;
+
+       xfs_dqlock(udq);
+
+       if ((tmp = udq->q_gdquot)) {
+               if (tmp == gdq) {
+                       xfs_dqunlock(udq);
+                       return;
+               }
+
+               udq->q_gdquot = NULL;
+               /*
+                * We can't keep any dqlocks when calling dqrele,
+                * because the freelist lock comes before dqlocks.
+                */
+               xfs_dqunlock(udq);
+               /*
+                * we took a hard reference once upon a time in dqget,
+                * so give it back when the udquot no longer points at it
+                * dqput() does the unlocking of the dquot.
+                */
+               xfs_qm_dqrele(tmp);
+
+               xfs_dqlock(udq);
+               xfs_dqlock(gdq);
+
+       } else {
+               ASSERT(XFS_DQ_IS_LOCKED(udq));
+               xfs_dqlock(gdq);
+       }
+
+       ASSERT(XFS_DQ_IS_LOCKED(udq));
+       ASSERT(XFS_DQ_IS_LOCKED(gdq));
+       /*
+        * Somebody could have attached a gdquot here,
+        * when we dropped the uqlock. If so, just do nothing.
+        */
+       if (udq->q_gdquot == NULL) {
+               XFS_DQHOLD(gdq);
+               udq->q_gdquot = gdq;
+       }
+
+       xfs_dqunlock(gdq);
+       xfs_dqunlock(udq);
+}
+
+
+/*
+ * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
+ * into account.
+ * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
+ * Inode may get unlocked and relocked in here, and the caller must deal with
+ * the consequences.
+ */
+int
+xfs_qm_dqattach_locked(
+       xfs_inode_t     *ip,
+       uint            flags)
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       uint            nquotas = 0;
+       int             error = 0;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) ||
+           !XFS_IS_QUOTA_ON(mp) ||
+           !XFS_NOT_DQATTACHED(mp, ip) ||
+           ip->i_ino == mp->m_sb.sb_uquotino ||
+           ip->i_ino == mp->m_sb.sb_gquotino)
+               return 0;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       if (XFS_IS_UQUOTA_ON(mp)) {
+               error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               NULL, &ip->i_udquot);
+               if (error)
+                       goto done;
+               nquotas++;
+       }
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       if (XFS_IS_OQUOTA_ON(mp)) {
+               error = XFS_IS_GQUOTA_ON(mp) ?
+                       xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               ip->i_udquot, &ip->i_gdquot) :
+                       xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               ip->i_udquot, &ip->i_gdquot);
+               /*
+                * Don't worry about the udquot that we may have
+                * attached above. It'll get detached, if not already.
+                */
+               if (error)
+                       goto done;
+               nquotas++;
+       }
+
+       /*
+        * Attach this group quota to the user quota as a hint.
+        * This WON'T, in general, result in a thrash.
+        */
+       if (nquotas == 2) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+               ASSERT(ip->i_udquot);
+               ASSERT(ip->i_gdquot);
+
+               /*
+                * We may or may not have the i_udquot locked at this point,
+                * but this check is OK since we don't depend on the i_gdquot to
+                * be accurate 100% all the time. It is just a hint, and this
+                * will succeed in general.
+                */
+               if (ip->i_udquot->q_gdquot == ip->i_gdquot)
+                       goto done;
+               /*
+                * Attach i_gdquot to the gdquot hint inside the i_udquot.
+                */
+               xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
+       }
+
+ done:
+#ifdef DEBUG
+       if (!error) {
+               if (XFS_IS_UQUOTA_ON(mp))
+                       ASSERT(ip->i_udquot);
+               if (XFS_IS_OQUOTA_ON(mp))
+                       ASSERT(ip->i_gdquot);
+       }
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+#endif
+       return error;
+}
+
+int
+xfs_qm_dqattach(
+       struct xfs_inode        *ip,
+       uint                    flags)
+{
+       int                     error;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       error = xfs_qm_dqattach_locked(ip, flags);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       return error;
+}
+
+/*
+ * Release dquots (and their references) if any.
+ * The inode should be locked EXCL except when this's called by
+ * xfs_ireclaim.
+ */
+void
+xfs_qm_dqdetach(
+       xfs_inode_t     *ip)
+{
+       if (!(ip->i_udquot || ip->i_gdquot))
+               return;
+
+       trace_xfs_dquot_dqdetach(ip);
+
+       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
+       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
+       if (ip->i_udquot) {
+               xfs_qm_dqrele(ip->i_udquot);
+               ip->i_udquot = NULL;
+       }
+       if (ip->i_gdquot) {
+               xfs_qm_dqrele(ip->i_gdquot);
+               ip->i_gdquot = NULL;
+       }
+}
+
+int
+xfs_qm_sync(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       int                     recl, restarts;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       restarts = 0;
+
+  again:
+       mutex_lock(&q->qi_dqlist_lock);
+       /*
+        * dqpurge_all() also takes the mplist lock and iterate thru all dquots
+        * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
+        * when we have the mplist lock, we know that dquots will be consistent
+        * as long as we have it locked.
+        */
+       if (!XFS_IS_QUOTA_ON(mp)) {
+               mutex_unlock(&q->qi_dqlist_lock);
+               return 0;
+       }
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               /*
+                * If this is vfs_sync calling, then skip the dquots that
+                * don't 'seem' to be dirty. ie. don't acquire dqlock.
+                * This is very similar to what xfs_sync does with inodes.
+                */
+               if (flags & SYNC_TRYLOCK) {
+                       if (!XFS_DQ_IS_DIRTY(dqp))
+                               continue;
+                       if (!xfs_qm_dqlock_nowait(dqp))
+                               continue;
+               } else {
+                       xfs_dqlock(dqp);
+               }
+
+               /*
+                * Now, find out for sure if this dquot is dirty or not.
+                */
+               if (! XFS_DQ_IS_DIRTY(dqp)) {
+                       xfs_dqunlock(dqp);
+                       continue;
+               }
+
+               /* XXX a sentinel would be better */
+               recl = q->qi_dqreclaims;
+               if (!xfs_dqflock_nowait(dqp)) {
+                       if (flags & SYNC_TRYLOCK) {
+                               xfs_dqunlock(dqp);
+                               continue;
+                       }
+                       /*
+                        * If we can't grab the flush lock then if the caller
+                        * really wanted us to give this our best shot, so
+                        * see if we can give a push to the buffer before we wait
+                        * on the flush lock. At this point, we know that
+                        * even though the dquot is being flushed,
+                        * it has (new) dirty data.
+                        */
+                       xfs_qm_dqflock_pushbuf_wait(dqp);
+               }
+               /*
+                * Let go of the mplist lock. We don't want to hold it
+                * across a disk write
+                */
+               mutex_unlock(&q->qi_dqlist_lock);
+               error = xfs_qm_dqflush(dqp, flags);
+               xfs_dqunlock(dqp);
+               if (error && XFS_FORCED_SHUTDOWN(mp))
+                       return 0;       /* Need to prevent umount failure */
+               else if (error)
+                       return error;
+
+               mutex_lock(&q->qi_dqlist_lock);
+               if (recl != q->qi_dqreclaims) {
+                       if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
+                               break;
+
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       goto again;
+               }
+       }
+
+       mutex_unlock(&q->qi_dqlist_lock);
+       return 0;
+}
+
+/*
+ * The hash chains and the mplist use the same xfs_dqhash structure as
+ * their list head, but we can take the mplist qh_lock and one of the
+ * hash qh_locks at the same time without any problem as they aren't
+ * related.
+ */
+static struct lock_class_key xfs_quota_mplist_class;
+
+/*
+ * This initializes all the quota information that's kept in the
+ * mount structure
+ */
+STATIC int
+xfs_qm_init_quotainfo(
+       xfs_mount_t     *mp)
+{
+       xfs_quotainfo_t *qinf;
+       int             error;
+       xfs_dquot_t     *dqp;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * Tell XQM that we exist as soon as possible.
+        */
+       if ((error = xfs_qm_hold_quotafs_ref(mp))) {
+               return error;
+       }
+
+       qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
+
+       /*
+        * See if quotainodes are setup, and if not, allocate them,
+        * and change the superblock accordingly.
+        */
+       if ((error = xfs_qm_init_quotainos(mp))) {
+               kmem_free(qinf);
+               mp->m_quotainfo = NULL;
+               return error;
+       }
+
+       INIT_LIST_HEAD(&qinf->qi_dqlist);
+       mutex_init(&qinf->qi_dqlist_lock);
+       lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
+
+       qinf->qi_dqreclaims = 0;
+
+       /* mutex used to serialize quotaoffs */
+       mutex_init(&qinf->qi_quotaofflock);
+
+       /* Precalc some constants */
+       qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+       ASSERT(qinf->qi_dqchunklen);
+       qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
+       do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
+
+       mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
+
+       /*
+        * We try to get the limits from the superuser's limits fields.
+        * This is quite hacky, but it is standard quota practice.
+        * We look at the USR dquot with id == 0 first, but if user quotas
+        * are not enabled we goto the GRP dquot with id == 0.
+        * We don't really care to keep separate default limits for user
+        * and group quotas, at least not at this point.
+        */
+       error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
+                            XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
+                            (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
+                               XFS_DQ_PROJ),
+                            XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
+                            &dqp);
+       if (! error) {
+               xfs_disk_dquot_t        *ddqp = &dqp->q_core;
+
+               /*
+                * The warnings and timers set the grace period given to
+                * a user or group before he or she can not perform any
+                * more writing. If it is zero, a default is used.
+                */
+               qinf->qi_btimelimit = ddqp->d_btimer ?
+                       be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
+               qinf->qi_itimelimit = ddqp->d_itimer ?
+                       be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
+               qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
+                       be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
+               qinf->qi_bwarnlimit = ddqp->d_bwarns ?
+                       be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
+               qinf->qi_iwarnlimit = ddqp->d_iwarns ?
+                       be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
+               qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
+                       be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
+               qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+               qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+               qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+               qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+               qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+               qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+               /*
+                * We sent the XFS_QMOPT_DQSUSER flag to dqget because
+                * we don't want this dquot cached. We haven't done a
+                * quotacheck yet, and quotacheck doesn't like incore dquots.
+                */
+               xfs_qm_dqdestroy(dqp);
+       } else {
+               qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
+               qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
+               qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
+               qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
+               qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+               qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
+       }
+
+       return 0;
+}
+
+
+/*
+ * Gets called when unmounting a filesystem or when all quotas get
+ * turned off.
+ * This purges the quota inodes, destroys locks and frees itself.
+ */
+void
+xfs_qm_destroy_quotainfo(
+       xfs_mount_t     *mp)
+{
+       xfs_quotainfo_t *qi;
+
+       qi = mp->m_quotainfo;
+       ASSERT(qi != NULL);
+       ASSERT(xfs_Gqm != NULL);
+
+       /*
+        * Release the reference that XQM kept, so that we know
+        * when the XQM structure should be freed. We cannot assume
+        * that xfs_Gqm is non-null after this point.
+        */
+       xfs_qm_rele_quotafs_ref(mp);
+
+       ASSERT(list_empty(&qi->qi_dqlist));
+       mutex_destroy(&qi->qi_dqlist_lock);
+
+       if (qi->qi_uquotaip) {
+               IRELE(qi->qi_uquotaip);
+               qi->qi_uquotaip = NULL; /* paranoia */
+       }
+       if (qi->qi_gquotaip) {
+               IRELE(qi->qi_gquotaip);
+               qi->qi_gquotaip = NULL;
+       }
+       mutex_destroy(&qi->qi_quotaofflock);
+       kmem_free(qi);
+       mp->m_quotainfo = NULL;
+}
+
+
+
+/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_list_init(
+       xfs_dqlist_t    *list,
+       char            *str,
+       int             n)
+{
+       mutex_init(&list->qh_lock);
+       INIT_LIST_HEAD(&list->qh_list);
+       list->qh_version = 0;
+       list->qh_nelems = 0;
+}
+
+STATIC void
+xfs_qm_list_destroy(
+       xfs_dqlist_t    *list)
+{
+       mutex_destroy(&(list->qh_lock));
+}
+
+/*
+ * Create an inode and return with a reference already taken, but unlocked
+ * This is how we create quota inodes
+ */
+STATIC int
+xfs_qm_qino_alloc(
+       xfs_mount_t     *mp,
+       xfs_inode_t     **ip,
+       __int64_t       sbfields,
+       uint            flags)
+{
+       xfs_trans_t     *tp;
+       int             error;
+       int             committed;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
+       if ((error = xfs_trans_reserve(tp,
+                                     XFS_QM_QINOCREATE_SPACE_RES(mp),
+                                     XFS_CREATE_LOG_RES(mp), 0,
+                                     XFS_TRANS_PERM_LOG_RES,
+                                     XFS_CREATE_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
+       if (error) {
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+                                XFS_TRANS_ABORT);
+               return error;
+       }
+
+       /*
+        * Make the changes in the superblock, and log those too.
+        * sbfields arg may contain fields other than *QUOTINO;
+        * VERSIONNUM for example.
+        */
+       spin_lock(&mp->m_sb_lock);
+       if (flags & XFS_QMOPT_SBVERSION) {
+               ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
+               ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                                  XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
+                      (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                       XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
+
+               xfs_sb_version_addquota(&mp->m_sb);
+               mp->m_sb.sb_uquotino = NULLFSINO;
+               mp->m_sb.sb_gquotino = NULLFSINO;
+
+               /* qflags will get updated _after_ quotacheck */
+               mp->m_sb.sb_qflags = 0;
+       }
+       if (flags & XFS_QMOPT_UQUOTA)
+               mp->m_sb.sb_uquotino = (*ip)->i_ino;
+       else
+               mp->m_sb.sb_gquotino = (*ip)->i_ino;
+       spin_unlock(&mp->m_sb_lock);
+       xfs_mod_sb(tp, sbfields);
+
+       if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
+               xfs_alert(mp, "%s failed (error %d)!", __func__, error);
+               return error;
+       }
+       return 0;
+}
+
+
+STATIC void
+xfs_qm_reset_dqcounts(
+       xfs_mount_t     *mp,
+       xfs_buf_t       *bp,
+       xfs_dqid_t      id,
+       uint            type)
+{
+       xfs_disk_dquot_t        *ddq;
+       int                     j;
+
+       trace_xfs_reset_dqcounts(bp, _RET_IP_);
+
+       /*
+        * Reset all counters and timers. They'll be
+        * started afresh by xfs_qm_quotacheck.
+        */
+#ifdef DEBUG
+       j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+       do_div(j, sizeof(xfs_dqblk_t));
+       ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
+#endif
+       ddq = bp->b_addr;
+       for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
+               /*
+                * Do a sanity check, and if needed, repair the dqblk. Don't
+                * output any warnings because it's perfectly possible to
+                * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
+                */
+               (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
+                                     "xfs_quotacheck");
+               ddq->d_bcount = 0;
+               ddq->d_icount = 0;
+               ddq->d_rtbcount = 0;
+               ddq->d_btimer = 0;
+               ddq->d_itimer = 0;
+               ddq->d_rtbtimer = 0;
+               ddq->d_bwarns = 0;
+               ddq->d_iwarns = 0;
+               ddq->d_rtbwarns = 0;
+               ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+       }
+}
+
+STATIC int
+xfs_qm_dqiter_bufs(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      firstid,
+       xfs_fsblock_t   bno,
+       xfs_filblks_t   blkcnt,
+       uint            flags)
+{
+       xfs_buf_t       *bp;
+       int             error;
+       int             type;
+
+       ASSERT(blkcnt > 0);
+       type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
+               (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
+       error = 0;
+
+       /*
+        * Blkcnt arg can be a very big number, and might even be
+        * larger than the log itself. So, we have to break it up into
+        * manageable-sized transactions.
+        * Note that we don't start a permanent transaction here; we might
+        * not be able to get a log reservation for the whole thing up front,
+        * and we don't really care to either, because we just discard
+        * everything if we were to crash in the middle of this loop.
+        */
+       while (blkcnt--) {
+               error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+                             XFS_FSB_TO_DADDR(mp, bno),
+                             mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+               if (error)
+                       break;
+
+               xfs_qm_reset_dqcounts(mp, bp, firstid, type);
+               xfs_bdwrite(mp, bp);
+               /*
+                * goto the next block.
+                */
+               bno++;
+               firstid += mp->m_quotainfo->qi_dqperchunk;
+       }
+       return error;
+}
+
+/*
+ * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
+ * caller supplied function for every chunk of dquots that we find.
+ */
+STATIC int
+xfs_qm_dqiterate(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *qip,
+       uint            flags)
+{
+       xfs_bmbt_irec_t         *map;
+       int                     i, nmaps;       /* number of map entries */
+       int                     error;          /* return value */
+       xfs_fileoff_t           lblkno;
+       xfs_filblks_t           maxlblkcnt;
+       xfs_dqid_t              firstid;
+       xfs_fsblock_t           rablkno;
+       xfs_filblks_t           rablkcnt;
+
+       error = 0;
+       /*
+        * This looks racy, but we can't keep an inode lock across a
+        * trans_reserve. But, this gets called during quotacheck, and that
+        * happens only at mount time which is single threaded.
+        */
+       if (qip->i_d.di_nblocks == 0)
+               return 0;
+
+       map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
+
+       lblkno = 0;
+       maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+       do {
+               nmaps = XFS_DQITER_MAP_SIZE;
+               /*
+                * We aren't changing the inode itself. Just changing
+                * some of its data. No new blocks are added here, and
+                * the inode is never added to the transaction.
+                */
+               xfs_ilock(qip, XFS_ILOCK_SHARED);
+               error = xfs_bmapi(NULL, qip, lblkno,
+                                 maxlblkcnt - lblkno,
+                                 XFS_BMAPI_METADATA,
+                                 NULL,
+                                 0, map, &nmaps, NULL);
+               xfs_iunlock(qip, XFS_ILOCK_SHARED);
+               if (error)
+                       break;
+
+               ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
+               for (i = 0; i < nmaps; i++) {
+                       ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
+                       ASSERT(map[i].br_blockcount);
+
+
+                       lblkno += map[i].br_blockcount;
+
+                       if (map[i].br_startblock == HOLESTARTBLOCK)
+                               continue;
+
+                       firstid = (xfs_dqid_t) map[i].br_startoff *
+                               mp->m_quotainfo->qi_dqperchunk;
+                       /*
+                        * Do a read-ahead on the next extent.
+                        */
+                       if ((i+1 < nmaps) &&
+                           (map[i+1].br_startblock != HOLESTARTBLOCK)) {
+                               rablkcnt =  map[i+1].br_blockcount;
+                               rablkno = map[i+1].br_startblock;
+                               while (rablkcnt--) {
+                                       xfs_buf_readahead(mp->m_ddev_targp,
+                                              XFS_FSB_TO_DADDR(mp, rablkno),
+                                              mp->m_quotainfo->qi_dqchunklen);
+                                       rablkno++;
+                               }
+                       }
+                       /*
+                        * Iterate thru all the blks in the extent and
+                        * reset the counters of all the dquots inside them.
+                        */
+                       if ((error = xfs_qm_dqiter_bufs(mp,
+                                                      firstid,
+                                                      map[i].br_startblock,
+                                                      map[i].br_blockcount,
+                                                      flags))) {
+                               break;
+                       }
+               }
+
+               if (error)
+                       break;
+       } while (nmaps > 0);
+
+       kmem_free(map);
+
+       return error;
+}
+
+/*
+ * Called by dqusage_adjust in doing a quotacheck.
+ *
+ * Given the inode, and a dquot id this updates both the incore dqout as well
+ * as the buffer copy. This is so that once the quotacheck is done, we can
+ * just log all the buffers, as opposed to logging numerous updates to
+ * individual dquots.
+ */
+STATIC int
+xfs_qm_quotacheck_dqadjust(
+       struct xfs_inode        *ip,
+       xfs_dqid_t              id,
+       uint                    type,
+       xfs_qcnt_t              nblks,
+       xfs_qcnt_t              rtblks)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       error = xfs_qm_dqget(mp, ip, id, type,
+                            XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+       if (error) {
+               /*
+                * Shouldn't be able to turn off quotas here.
+                */
+               ASSERT(error != ESRCH);
+               ASSERT(error != ENOENT);
+               return error;
+       }
+
+       trace_xfs_dqadjust(dqp);
+
+       /*
+        * Adjust the inode count and the block count to reflect this inode's
+        * resource usage.
+        */
+       be64_add_cpu(&dqp->q_core.d_icount, 1);
+       dqp->q_res_icount++;
+       if (nblks) {
+               be64_add_cpu(&dqp->q_core.d_bcount, nblks);
+               dqp->q_res_bcount += nblks;
+       }
+       if (rtblks) {
+               be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
+               dqp->q_res_rtbcount += rtblks;
+       }
+
+       /*
+        * Set default limits, adjust timers (since we changed usages)
+        *
+        * There are no timers for the default values set in the root dquot.
+        */
+       if (dqp->q_core.d_id) {
+               xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
+               xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
+       }
+
+       dqp->dq_flags |= XFS_DQ_DIRTY;
+       xfs_qm_dqput(dqp);
+       return 0;
+}
+
+STATIC int
+xfs_qm_get_rtblks(
+       xfs_inode_t     *ip,
+       xfs_qcnt_t      *O_rtblks)
+{
+       xfs_filblks_t   rtblks;                 /* total rt blks */
+       xfs_extnum_t    idx;                    /* extent record index */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_extnum_t    nextents;               /* number of extent entries */
+       int             error;
+
+       ASSERT(XFS_IS_REALTIME_INODE(ip));
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
+                       return error;
+       }
+       rtblks = 0;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       for (idx = 0; idx < nextents; idx++)
+               rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
+       *O_rtblks = (xfs_qcnt_t)rtblks;
+       return 0;
+}
+
+/*
+ * callback routine supplied to bulkstat(). Given an inumber, find its
+ * dquots and update them to account for resources taken by that inode.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqusage_adjust(
+       xfs_mount_t     *mp,            /* mount point for filesystem */
+       xfs_ino_t       ino,            /* inode number to get data for */
+       void            __user *buffer, /* not used */
+       int             ubsize,         /* not used */
+       int             *ubused,        /* not used */
+       int             *res)           /* result code value */
+{
+       xfs_inode_t     *ip;
+       xfs_qcnt_t      nblks, rtblks = 0;
+       int             error;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * rootino must have its resources accounted for, not so with the quota
+        * inodes.
+        */
+       if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+               *res = BULKSTAT_RV_NOTHING;
+               return XFS_ERROR(EINVAL);
+       }
+
+       /*
+        * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
+        * interface expects the inode to be exclusively locked because that's
+        * the case in all other instances. It's OK that we do this because
+        * quotacheck is done only at mount time.
+        */
+       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
+       if (error) {
+               *res = BULKSTAT_RV_NOTHING;
+               return error;
+       }
+
+       ASSERT(ip->i_delayed_blks == 0);
+
+       if (XFS_IS_REALTIME_INODE(ip)) {
+               /*
+                * Walk thru the extent list and count the realtime blocks.
+                */
+               error = xfs_qm_get_rtblks(ip, &rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
+
+       /*
+        * Add the (disk blocks and inode) resources occupied by this
+        * inode to its dquots. We do this adjustment in the incore dquot,
+        * and also copy the changes to its buffer.
+        * We don't care about putting these changes in a transaction
+        * envelope because if we crash in the middle of a 'quotacheck'
+        * we have to start from the beginning anyway.
+        * Once we're done, we'll log all the dquot bufs.
+        *
+        * The *QUOTA_ON checks below may look pretty racy, but quotachecks
+        * and quotaoffs don't race. (Quotachecks happen at mount time only).
+        */
+       if (XFS_IS_UQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
+                                                  XFS_DQ_USER, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       if (XFS_IS_GQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
+                                                  XFS_DQ_GROUP, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       if (XFS_IS_PQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
+                                                  XFS_DQ_PROJ, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       IRELE(ip);
+       *res = BULKSTAT_RV_DIDONE;
+       return 0;
+
+error0:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       IRELE(ip);
+       *res = BULKSTAT_RV_GIVEUP;
+       return error;
+}
+
+/*
+ * Walk thru all the filesystem inodes and construct a consistent view
+ * of the disk quota world. If the quotacheck fails, disable quotas.
+ */
+int
+xfs_qm_quotacheck(
+       xfs_mount_t     *mp)
+{
+       int             done, count, error;
+       xfs_ino_t       lastino;
+       size_t          structsz;
+       xfs_inode_t     *uip, *gip;
+       uint            flags;
+
+       count = INT_MAX;
+       structsz = 1;
+       lastino = 0;
+       flags = 0;
+
+       ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * There should be no cached dquots. The (simplistic) quotacheck
+        * algorithm doesn't like that.
+        */
+       ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
+
+       xfs_notice(mp, "Quotacheck needed: Please wait.");
+
+       /*
+        * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
+        * their counters to zero. We need a clean slate.
+        * We don't log our changes till later.
+        */
+       uip = mp->m_quotainfo->qi_uquotaip;
+       if (uip) {
+               error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
+               if (error)
+                       goto error_return;
+               flags |= XFS_UQUOTA_CHKD;
+       }
+
+       gip = mp->m_quotainfo->qi_gquotaip;
+       if (gip) {
+               error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
+                                       XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+               if (error)
+                       goto error_return;
+               flags |= XFS_OQUOTA_CHKD;
+       }
+
+       do {
+               /*
+                * Iterate thru all the inodes in the file system,
+                * adjusting the corresponding dquot counters in core.
+                */
+               error = xfs_bulkstat(mp, &lastino, &count,
+                                    xfs_qm_dqusage_adjust,
+                                    structsz, NULL, &done);
+               if (error)
+                       break;
+
+       } while (!done);
+
+       /*
+        * We've made all the changes that we need to make incore.
+        * Flush them down to disk buffers if everything was updated
+        * successfully.
+        */
+       if (!error)
+               error = xfs_qm_dqflush_all(mp, 0);
+
+       /*
+        * We can get this error if we couldn't do a dquot allocation inside
+        * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
+        * dirty dquots that might be cached, we just want to get rid of them
+        * and turn quotaoff. The dquots won't be attached to any of the inodes
+        * at this point (because we intentionally didn't in dqget_noattach).
+        */
+       if (error) {
+               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+               goto error_return;
+       }
+
+       /*
+        * We didn't log anything, because if we crashed, we'll have to
+        * start the quotacheck from scratch anyway. However, we must make
+        * sure that our dquot changes are secure before we put the
+        * quotacheck'd stamp on the superblock. So, here we do a synchronous
+        * flush.
+        */
+       XFS_bflush(mp->m_ddev_targp);
+
+       /*
+        * If one type of quotas is off, then it will lose its
+        * quotachecked status, since we won't be doing accounting for
+        * that type anymore.
+        */
+       mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
+       mp->m_qflags |= flags;
+
+ error_return:
+       if (error) {
+               xfs_warn(mp,
+       "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
+                       error);
+               /*
+                * We must turn off quotas.
+                */
+               ASSERT(mp->m_quotainfo != NULL);
+               ASSERT(xfs_Gqm != NULL);
+               xfs_qm_destroy_quotainfo(mp);
+               if (xfs_mount_reset_sbqflags(mp)) {
+                       xfs_warn(mp,
+                               "Quotacheck: Failed to reset quota flags.");
+               }
+       } else
+               xfs_notice(mp, "Quotacheck: Done.");
+       return (error);
+}
+
+/*
+ * This is called after the superblock has been read in and we're ready to
+ * iget the quota inodes.
+ */
+STATIC int
+xfs_qm_init_quotainos(
+       xfs_mount_t     *mp)
+{
+       xfs_inode_t     *uip, *gip;
+       int             error;
+       __int64_t       sbflags;
+       uint            flags;
+
+       ASSERT(mp->m_quotainfo);
+       uip = gip = NULL;
+       sbflags = 0;
+       flags = 0;
+
+       /*
+        * Get the uquota and gquota inodes
+        */
+       if (xfs_sb_version_hasquota(&mp->m_sb)) {
+               if (XFS_IS_UQUOTA_ON(mp) &&
+                   mp->m_sb.sb_uquotino != NULLFSINO) {
+                       ASSERT(mp->m_sb.sb_uquotino > 0);
+                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+                                            0, 0, &uip)))
+                               return XFS_ERROR(error);
+               }
+               if (XFS_IS_OQUOTA_ON(mp) &&
+                   mp->m_sb.sb_gquotino != NULLFSINO) {
+                       ASSERT(mp->m_sb.sb_gquotino > 0);
+                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+                                            0, 0, &gip))) {
+                               if (uip)
+                                       IRELE(uip);
+                               return XFS_ERROR(error);
+                       }
+               }
+       } else {
+               flags |= XFS_QMOPT_SBVERSION;
+               sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                           XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
+       }
+
+       /*
+        * Create the two inodes, if they don't exist already. The changes
+        * made above will get added to a transaction and logged in one of
+        * the qino_alloc calls below.  If the device is readonly,
+        * temporarily switch to read-write to do this.
+        */
+       if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
+               if ((error = xfs_qm_qino_alloc(mp, &uip,
+                                             sbflags | XFS_SB_UQUOTINO,
+                                             flags | XFS_QMOPT_UQUOTA)))
+                       return XFS_ERROR(error);
+
+               flags &= ~XFS_QMOPT_SBVERSION;
+       }
+       if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
+               flags |= (XFS_IS_GQUOTA_ON(mp) ?
+                               XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+               error = xfs_qm_qino_alloc(mp, &gip,
+                                         sbflags | XFS_SB_GQUOTINO, flags);
+               if (error) {
+                       if (uip)
+                               IRELE(uip);
+
+                       return XFS_ERROR(error);
+               }
+       }
+
+       mp->m_quotainfo->qi_uquotaip = uip;
+       mp->m_quotainfo->qi_gquotaip = gip;
+
+       return 0;
+}
+
+
+
+/*
+ * Just pop the least recently used dquot off the freelist and
+ * recycle it. The returned dquot is locked.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqreclaim_one(void)
+{
+       xfs_dquot_t     *dqpout;
+       xfs_dquot_t     *dqp;
+       int             restarts;
+       int             startagain;
+
+       restarts = 0;
+       dqpout = NULL;
+
+       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
+again:
+       startagain = 0;
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+               struct xfs_mount *mp = dqp->q_mount;
+               xfs_dqlock(dqp);
+
+               /*
+                * We are racing with dqlookup here. Naturally we don't
+                * want to reclaim a dquot that lookup wants. We release the
+                * freelist lock and start over, so that lookup will grab
+                * both the dquot and the freelistlock.
+                */
+               if (dqp->dq_flags & XFS_DQ_WANT) {
+                       ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
+
+                       trace_xfs_dqreclaim_want(dqp);
+                       XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+                       restarts++;
+                       startagain = 1;
+                       goto dqunlock;
+               }
+
+               /*
+                * If the dquot is inactive, we are assured that it is
+                * not on the mplist or the hashlist, and that makes our
+                * life easier.
+                */
+               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+                       ASSERT(mp == NULL);
+                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+                       ASSERT(list_empty(&dqp->q_hashlist));
+                       ASSERT(list_empty(&dqp->q_mplist));
+                       list_del_init(&dqp->q_freelist);
+                       xfs_Gqm->qm_dqfrlist_cnt--;
+                       dqpout = dqp;
+                       XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+                       goto dqunlock;
+               }
+
+               ASSERT(dqp->q_hash);
+               ASSERT(!list_empty(&dqp->q_mplist));
+
+               /*
+                * Try to grab the flush lock. If this dquot is in the process
+                * of getting flushed to disk, we don't want to reclaim it.
+                */
+               if (!xfs_dqflock_nowait(dqp))
+                       goto dqunlock;
+
+               /*
+                * We have the flush lock so we know that this is not in the
+                * process of being flushed. So, if this is dirty, flush it
+                * DELWRI so that we don't get a freelist infested with
+                * dirty dquots.
+                */
+               if (XFS_DQ_IS_DIRTY(dqp)) {
+                       int     error;
+
+                       trace_xfs_dqreclaim_dirty(dqp);
+
+                       /*
+                        * We flush it delayed write, so don't bother
+                        * releasing the freelist lock.
+                        */
+                       error = xfs_qm_dqflush(dqp, 0);
+                       if (error) {
+                               xfs_warn(mp, "%s: dquot %p flush failed",
+                                       __func__, dqp);
+                       }
+                       goto dqunlock;
+               }
+
+               /*
+                * We're trying to get the hashlock out of order. This races
+                * with dqlookup; so, we giveup and goto the next dquot if
+                * we couldn't get the hashlock. This way, we won't starve
+                * a dqlookup process that holds the hashlock that is
+                * waiting for the freelist lock.
+                */
+               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+                       restarts++;
+                       goto dqfunlock;
+               }
+
+               /*
+                * This races with dquot allocation code as well as dqflush_all
+                * and reclaim code. So, if we failed to grab the mplist lock,
+                * giveup everything and start over.
+                */
+               if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
+                       restarts++;
+                       startagain = 1;
+                       goto qhunlock;
+               }
+
+               ASSERT(dqp->q_nrefs == 0);
+               list_del_init(&dqp->q_mplist);
+               mp->m_quotainfo->qi_dquots--;
+               mp->m_quotainfo->qi_dqreclaims++;
+               list_del_init(&dqp->q_hashlist);
+               dqp->q_hash->qh_version++;
+               list_del_init(&dqp->q_freelist);
+               xfs_Gqm->qm_dqfrlist_cnt--;
+               dqpout = dqp;
+               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+qhunlock:
+               mutex_unlock(&dqp->q_hash->qh_lock);
+dqfunlock:
+               xfs_dqfunlock(dqp);
+dqunlock:
+               xfs_dqunlock(dqp);
+               if (dqpout)
+                       break;
+               if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+                       break;
+               if (startagain) {
+                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                       goto again;
+               }
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+       return dqpout;
+}
+
+/*
+ * Traverse the freelist of dquots and attempt to reclaim a maximum of
+ * 'howmany' dquots. This operation races with dqlookup(), and attempts to
+ * favor the lookup function ...
+ */
+STATIC int
+xfs_qm_shake_freelist(
+       int     howmany)
+{
+       int             nreclaimed = 0;
+       xfs_dquot_t     *dqp;
+
+       if (howmany <= 0)
+               return 0;
+
+       while (nreclaimed < howmany) {
+               dqp = xfs_qm_dqreclaim_one();
+               if (!dqp)
+                       return nreclaimed;
+               xfs_qm_dqdestroy(dqp);
+               nreclaimed++;
+       }
+       return nreclaimed;
+}
+
+/*
+ * The kmem_shake interface is invoked when memory is running low.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_shake(
+       struct shrinker *shrink,
+       struct shrink_control *sc)
+{
+       int     ndqused, nfree, n;
+       gfp_t gfp_mask = sc->gfp_mask;
+
+       if (!kmem_shake_allow(gfp_mask))
+               return 0;
+       if (!xfs_Gqm)
+               return 0;
+
+       nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
+       /* incore dquots in all f/s's */
+       ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
+
+       ASSERT(ndqused >= 0);
+
+       if (nfree <= ndqused && nfree < ndquot)
+               return 0;
+
+       ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
+       n = nfree - ndqused - ndquot;           /* # over target */
+
+       return xfs_qm_shake_freelist(MAX(nfree, n));
+}
+
+
+/*------------------------------------------------------------------*/
+
+/*
+ * Return a new incore dquot. Depending on the number of
+ * dquots in the system, we either allocate a new one on the kernel heap,
+ * or reclaim a free one.
+ * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
+ * to reclaim an existing one from the freelist.
+ */
+boolean_t
+xfs_qm_dqalloc_incore(
+       xfs_dquot_t **O_dqpp)
+{
+       xfs_dquot_t     *dqp;
+
+       /*
+        * Check against high water mark to see if we want to pop
+        * a nincompoop dquot off the freelist.
+        */
+       if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
+               /*
+                * Try to recycle a dquot from the freelist.
+                */
+               if ((dqp = xfs_qm_dqreclaim_one())) {
+                       XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+                       /*
+                        * Just zero the core here. The rest will get
+                        * reinitialized by caller. XXX we shouldn't even
+                        * do this zero ...
+                        */
+                       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+                       *O_dqpp = dqp;
+                       return B_FALSE;
+               }
+               XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+       }
+
+       /*
+        * Allocate a brand new dquot on the kernel heap and return it
+        * to the caller to initialize.
+        */
+       ASSERT(xfs_Gqm->qm_dqzone != NULL);
+       *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+       atomic_inc(&xfs_Gqm->qm_totaldquots);
+
+       return B_TRUE;
+}
+
+
+/*
+ * Start a transaction and write the incore superblock changes to
+ * disk. flags parameter indicates which fields have changed.
+ */
+int
+xfs_qm_write_sb_changes(
+       xfs_mount_t     *mp,
+       __int64_t       flags)
+{
+       xfs_trans_t     *tp;
+       int             error;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+       if ((error = xfs_trans_reserve(tp, 0,
+                                     mp->m_sb.sb_sectsize + 128, 0,
+                                     0,
+                                     XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       xfs_mod_sb(tp, flags);
+       error = xfs_trans_commit(tp, 0);
+
+       return error;
+}
+
+
+/* --------------- utility functions for vnodeops ---------------- */
+
+
+/*
+ * Given an inode, a uid, gid and prid make sure that we have
+ * allocated relevant dquot(s) on disk, and that we won't exceed inode
+ * quotas by creating this file.
+ * This also attaches dquot(s) to the given inode after locking it,
+ * and returns the dquots corresponding to the uid and/or gid.
+ *
+ * in  : inode (unlocked)
+ * out : udquot, gdquot with references taken and unlocked
+ */
+int
+xfs_qm_vop_dqalloc(
+       struct xfs_inode        *ip,
+       uid_t                   uid,
+       gid_t                   gid,
+       prid_t                  prid,
+       uint                    flags,
+       struct xfs_dquot        **O_udqpp,
+       struct xfs_dquot        **O_gdqpp)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_dquot        *uq, *gq;
+       int                     error;
+       uint                    lockflags;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       lockflags = XFS_ILOCK_EXCL;
+       xfs_ilock(ip, lockflags);
+
+       if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
+               gid = ip->i_d.di_gid;
+
+       /*
+        * Attach the dquot(s) to this inode, doing a dquot allocation
+        * if necessary. The dquot(s) will not be locked.
+        */
+       if (XFS_NOT_DQATTACHED(mp, ip)) {
+               error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+               if (error) {
+                       xfs_iunlock(ip, lockflags);
+                       return error;
+               }
+       }
+
+       uq = gq = NULL;
+       if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
+               if (ip->i_d.di_uid != uid) {
+                       /*
+                        * What we need is the dquot that has this uid, and
+                        * if we send the inode to dqget, the uid of the inode
+                        * takes priority over what's sent in the uid argument.
+                        * We must unlock inode here before calling dqget if
+                        * we're not sending the inode, because otherwise
+                        * we'll deadlock by doing trans_reserve while
+                        * holding ilock.
+                        */
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
+                                                XFS_DQ_USER,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &uq))) {
+                               ASSERT(error != ENOENT);
+                               return error;
+                       }
+                       /*
+                        * Get the ilock in the right order.
+                        */
+                       xfs_dqunlock(uq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       /*
+                        * Take an extra reference, because we'll return
+                        * this to caller
+                        */
+                       ASSERT(ip->i_udquot);
+                       uq = ip->i_udquot;
+                       xfs_dqlock(uq);
+                       XFS_DQHOLD(uq);
+                       xfs_dqunlock(uq);
+               }
+       }
+       if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
+               if (ip->i_d.di_gid != gid) {
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
+                                                XFS_DQ_GROUP,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &gq))) {
+                               if (uq)
+                                       xfs_qm_dqrele(uq);
+                               ASSERT(error != ENOENT);
+                               return error;
+                       }
+                       xfs_dqunlock(gq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       ASSERT(ip->i_gdquot);
+                       gq = ip->i_gdquot;
+                       xfs_dqlock(gq);
+                       XFS_DQHOLD(gq);
+                       xfs_dqunlock(gq);
+               }
+       } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+               if (xfs_get_projid(ip) != prid) {
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
+                                                XFS_DQ_PROJ,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &gq))) {
+                               if (uq)
+                                       xfs_qm_dqrele(uq);
+                               ASSERT(error != ENOENT);
+                               return (error);
+                       }
+                       xfs_dqunlock(gq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       ASSERT(ip->i_gdquot);
+                       gq = ip->i_gdquot;
+                       xfs_dqlock(gq);
+                       XFS_DQHOLD(gq);
+                       xfs_dqunlock(gq);
+               }
+       }
+       if (uq)
+               trace_xfs_dquot_dqalloc(ip);
+
+       xfs_iunlock(ip, lockflags);
+       if (O_udqpp)
+               *O_udqpp = uq;
+       else if (uq)
+               xfs_qm_dqrele(uq);
+       if (O_gdqpp)
+               *O_gdqpp = gq;
+       else if (gq)
+               xfs_qm_dqrele(gq);
+       return 0;
+}
+
+/*
+ * Actually transfer ownership, and do dquot modifications.
+ * These were already reserved.
+ */
+xfs_dquot_t *
+xfs_qm_vop_chown(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       xfs_dquot_t     **IO_olddq,
+       xfs_dquot_t     *newdq)
+{
+       xfs_dquot_t     *prevdq;
+       uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
+                                XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+
+       /* old dquot */
+       prevdq = *IO_olddq;
+       ASSERT(prevdq);
+       ASSERT(prevdq != newdq);
+
+       xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
+       xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
+
+       /* the sparkling new dquot */
+       xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
+       xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
+
+       /*
+        * Take an extra reference, because the inode
+        * is going to keep this dquot pointer even
+        * after the trans_commit.
+        */
+       xfs_dqlock(newdq);
+       XFS_DQHOLD(newdq);
+       xfs_dqunlock(newdq);
+       *IO_olddq = newdq;
+
+       return prevdq;
+}
+
+/*
+ * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
+ */
+int
+xfs_qm_vop_chown_reserve(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       xfs_dquot_t     *udqp,
+       xfs_dquot_t     *gdqp,
+       uint            flags)
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       uint            delblks, blkflags, prjflags = 0;
+       xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
+       int             error;
+
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       delblks = ip->i_delayed_blks;
+       delblksudq = delblksgdq = unresudq = unresgdq = NULL;
+       blkflags = XFS_IS_REALTIME_INODE(ip) ?
+                       XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
+
+       if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+           ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
+               delblksudq = udqp;
+               /*
+                * If there are delayed allocation blocks, then we have to
+                * unreserve those from the old dquot, and add them to the
+                * new dquot.
+                */
+               if (delblks) {
+                       ASSERT(ip->i_udquot);
+                       unresudq = ip->i_udquot;
+               }
+       }
+       if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
+               if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
+                    xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
+                       prjflags = XFS_QMOPT_ENOSPC;
+
+               if (prjflags ||
+                   (XFS_IS_GQUOTA_ON(ip->i_mount) &&
+                    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
+                       delblksgdq = gdqp;
+                       if (delblks) {
+                               ASSERT(ip->i_gdquot);
+                               unresgdq = ip->i_gdquot;
+                       }
+               }
+       }
+
+       if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+                               delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
+                               flags | blkflags | prjflags)))
+               return (error);
+
+       /*
+        * Do the delayed blks reservations/unreservations now. Since, these
+        * are done without the help of a transaction, if a reservation fails
+        * its previous reservations won't be automatically undone by trans
+        * code. So, we have to do it manually here.
+        */
+       if (delblks) {
+               /*
+                * Do the reservations first. Unreservation can't fail.
+                */
+               ASSERT(delblksudq || delblksgdq);
+               ASSERT(unresudq || unresgdq);
+               if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+                               delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
+                               flags | blkflags | prjflags)))
+                       return (error);
+               xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+                               unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
+                               blkflags);
+       }
+
+       return (0);
+}
+
+int
+xfs_qm_vop_rename_dqattach(
+       struct xfs_inode        **i_tab)
+{
+       struct xfs_mount        *mp = i_tab[0]->i_mount;
+       int                     i;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       for (i = 0; (i < 4 && i_tab[i]); i++) {
+               struct xfs_inode        *ip = i_tab[i];
+               int                     error;
+
+               /*
+                * Watch out for duplicate entries in the table.
+                */
+               if (i == 0 || ip != i_tab[i-1]) {
+                       if (XFS_NOT_DQATTACHED(mp, ip)) {
+                               error = xfs_qm_dqattach(ip, 0);
+                               if (error)
+                                       return error;
+                       }
+               }
+       }
+       return 0;
+}
+
+void
+xfs_qm_vop_create_dqattach(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       struct xfs_dquot        *udqp,
+       struct xfs_dquot        *gdqp)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       if (udqp) {
+               xfs_dqlock(udqp);
+               XFS_DQHOLD(udqp);
+               xfs_dqunlock(udqp);
+               ASSERT(ip->i_udquot == NULL);
+               ip->i_udquot = udqp;
+               ASSERT(XFS_IS_UQUOTA_ON(mp));
+               ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
+               xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
+       }
+       if (gdqp) {
+               xfs_dqlock(gdqp);
+               XFS_DQHOLD(gdqp);
+               xfs_dqunlock(gdqp);
+               ASSERT(ip->i_gdquot == NULL);
+               ip->i_gdquot = gdqp;
+               ASSERT(XFS_IS_OQUOTA_ON(mp));
+               ASSERT((XFS_IS_GQUOTA_ON(mp) ?
+                       ip->i_d.di_gid : xfs_get_projid(ip)) ==
+                               be32_to_cpu(gdqp->q_core.d_id));
+               xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
+       }
+}
+
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
new file mode 100644 (file)
index 0000000..43b9abe
--- /dev/null
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QM_H__
+#define __XFS_QM_H__
+
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_quota_priv.h"
+#include "xfs_qm_stats.h"
+
+struct xfs_qm;
+struct xfs_inode;
+
+extern uint            ndquot;
+extern struct mutex    xfs_Gqm_lock;
+extern struct xfs_qm   *xfs_Gqm;
+extern kmem_zone_t     *qm_dqzone;
+extern kmem_zone_t     *qm_dqtrxzone;
+
+/*
+ * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
+ * iterate over the mountpt's dquot list in one call.
+ */
+#define XFS_QM_SYNC_MAX_RESTARTS       7
+
+/*
+ * Ditto, for xfs_qm_dqreclaim_one.
+ */
+#define XFS_QM_RECLAIM_MAX_RESTARTS    4
+
+/*
+ * Ideal ratio of free to in use dquots. Quota manager makes an attempt
+ * to keep this balance.
+ */
+#define XFS_QM_DQFREE_RATIO            2
+
+/*
+ * Dquot hashtable constants/threshold values.
+ */
+#define XFS_QM_HASHSIZE_LOW            (PAGE_SIZE / sizeof(xfs_dqhash_t))
+#define XFS_QM_HASHSIZE_HIGH           ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
+
+/*
+ * This defines the unit of allocation of dquots.
+ * Currently, it is just one file system block, and a 4K blk contains 30
+ * (136 * 30 = 4080) dquots. It's probably not worth trying to make
+ * this more dynamic.
+ * XXXsup However, if this number is changed, we have to make sure that we don't
+ * implicitly assume that we do allocations in chunks of a single filesystem
+ * block in the dquot/xqm code.
+ */
+#define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
+
+typedef xfs_dqhash_t   xfs_dqlist_t;
+
+/*
+ * Quota Manager (global) structure. Lives only in core.
+ */
+typedef struct xfs_qm {
+       xfs_dqlist_t    *qm_usr_dqhtable;/* udquot hash table */
+       xfs_dqlist_t    *qm_grp_dqhtable;/* gdquot hash table */
+       uint             qm_dqhashmask;  /* # buckets in dq hashtab - 1 */
+       struct list_head qm_dqfrlist;    /* freelist of dquots */
+       struct mutex     qm_dqfrlist_lock;
+       int              qm_dqfrlist_cnt;
+       atomic_t         qm_totaldquots; /* total incore dquots */
+       uint             qm_nrefs;       /* file systems with quota on */
+       int              qm_dqfree_ratio;/* ratio of free to inuse dquots */
+       kmem_zone_t     *qm_dqzone;      /* dquot mem-alloc zone */
+       kmem_zone_t     *qm_dqtrxzone;   /* t_dqinfo of transactions */
+} xfs_qm_t;
+
+/*
+ * Various quota information for individual filesystems.
+ * The mount structure keeps a pointer to this.
+ */
+typedef struct xfs_quotainfo {
+       xfs_inode_t     *qi_uquotaip;    /* user quota inode */
+       xfs_inode_t     *qi_gquotaip;    /* group quota inode */
+       struct list_head qi_dqlist;      /* all dquots in filesys */
+       struct mutex     qi_dqlist_lock;
+       int              qi_dquots;
+       int              qi_dqreclaims;  /* a change here indicates
+                                           a removal in the dqlist */
+       time_t           qi_btimelimit;  /* limit for blks timer */
+       time_t           qi_itimelimit;  /* limit for inodes timer */
+       time_t           qi_rtbtimelimit;/* limit for rt blks timer */
+       xfs_qwarncnt_t   qi_bwarnlimit;  /* limit for blks warnings */
+       xfs_qwarncnt_t   qi_iwarnlimit;  /* limit for inodes warnings */
+       xfs_qwarncnt_t   qi_rtbwarnlimit;/* limit for rt blks warnings */
+       struct mutex     qi_quotaofflock;/* to serialize quotaoff */
+       xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
+       uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
+       xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
+       xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
+       xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
+       xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
+       xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
+       xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
+} xfs_quotainfo_t;
+
+
+extern void    xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
+extern int     xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
+                       xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
+extern void    xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
+extern void    xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
+
+/*
+ * We keep the usr and grp dquots separately so that locking will be easier
+ * to do at commit time. All transactions that we know of at this point
+ * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
+ */
+#define XFS_QM_TRANS_MAXDQS            2
+typedef struct xfs_dquot_acct {
+       xfs_dqtrx_t     dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
+       xfs_dqtrx_t     dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
+} xfs_dquot_acct_t;
+
+/*
+ * Users are allowed to have a usage exceeding their softlimit for
+ * a period this long.
+ */
+#define XFS_QM_BTIMELIMIT      (7 * 24*60*60)          /* 1 week */
+#define XFS_QM_RTBTIMELIMIT    (7 * 24*60*60)          /* 1 week */
+#define XFS_QM_ITIMELIMIT      (7 * 24*60*60)          /* 1 week */
+
+#define XFS_QM_BWARNLIMIT      5
+#define XFS_QM_IWARNLIMIT      5
+#define XFS_QM_RTBWARNLIMIT    5
+
+extern void            xfs_qm_destroy_quotainfo(xfs_mount_t *);
+extern int             xfs_qm_quotacheck(xfs_mount_t *);
+extern int             xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
+
+/* dquot stuff */
+extern boolean_t       xfs_qm_dqalloc_incore(xfs_dquot_t **);
+extern int             xfs_qm_dqpurge_all(xfs_mount_t *, uint);
+extern void            xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
+
+/* quota ops */
+extern int             xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
+extern int             xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
+                                       fs_disk_quota_t *);
+extern int             xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
+                                       fs_disk_quota_t *);
+extern int             xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
+extern int             xfs_qm_scall_quotaon(xfs_mount_t *, uint);
+extern int             xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
+
+#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
new file mode 100644 (file)
index 0000000..a0a829a
--- /dev/null
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_qm.h"
+
+
+STATIC void
+xfs_fill_statvfs_from_dquot(
+       struct kstatfs          *statp,
+       xfs_disk_dquot_t        *dp)
+{
+       __uint64_t              limit;
+
+       limit = dp->d_blk_softlimit ?
+               be64_to_cpu(dp->d_blk_softlimit) :
+               be64_to_cpu(dp->d_blk_hardlimit);
+       if (limit && statp->f_blocks > limit) {
+               statp->f_blocks = limit;
+               statp->f_bfree = statp->f_bavail =
+                       (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
+                        (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
+       }
+
+       limit = dp->d_ino_softlimit ?
+               be64_to_cpu(dp->d_ino_softlimit) :
+               be64_to_cpu(dp->d_ino_hardlimit);
+       if (limit && statp->f_files > limit) {
+               statp->f_files = limit;
+               statp->f_ffree =
+                       (statp->f_files > be64_to_cpu(dp->d_icount)) ?
+                        (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
+       }
+}
+
+
+/*
+ * Directory tree accounting is implemented using project quotas, where
+ * the project identifier is inherited from parent directories.
+ * A statvfs (df, etc.) of a directory that is using project quota should
+ * return a statvfs of the project, not the entire filesystem.
+ * This makes such trees appear as if they are filesystems in themselves.
+ */
+void
+xfs_qm_statvfs(
+       xfs_inode_t             *ip,
+       struct kstatfs          *statp)
+{
+       xfs_mount_t             *mp = ip->i_mount;
+       xfs_dquot_t             *dqp;
+
+       if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
+               xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
+               xfs_qm_dqput(dqp);
+       }
+}
+
+int
+xfs_qm_newmount(
+       xfs_mount_t     *mp,
+       uint            *needquotamount,
+       uint            *quotaflags)
+{
+       uint            quotaondisk;
+       uint            uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
+
+       quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
+                               (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
+
+       if (quotaondisk) {
+               uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
+               pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
+               gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
+       }
+
+       /*
+        * If the device itself is read-only, we can't allow
+        * the user to change the state of quota on the mount -
+        * this would generate a transaction on the ro device,
+        * which would lead to an I/O error and shutdown
+        */
+
+       if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
+           (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
+            (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
+           (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
+            (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
+           (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
+           xfs_dev_is_read_only(mp, "changing quota state")) {
+               xfs_warn(mp, "please mount with%s%s%s%s.",
+                       (!quotaondisk ? "out quota" : ""),
+                       (uquotaondisk ? " usrquota" : ""),
+                       (pquotaondisk ? " prjquota" : ""),
+                       (gquotaondisk ? " grpquota" : ""));
+               return XFS_ERROR(EPERM);
+       }
+
+       if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
+               /*
+                * Call mount_quotas at this point only if we won't have to do
+                * a quotacheck.
+                */
+               if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
+                       /*
+                        * If an error occurred, qm_mount_quotas code
+                        * has already disabled quotas. So, just finish
+                        * mounting, and get on with the boring life
+                        * without disk quotas.
+                        */
+                       xfs_qm_mount_quotas(mp);
+               } else {
+                       /*
+                        * Clear the quota flags, but remember them. This
+                        * is so that the quota code doesn't get invoked
+                        * before we're ready. This can happen when an
+                        * inode goes inactive and wants to free blocks,
+                        * or via xfs_log_mount_finish.
+                        */
+                       *needquotamount = B_TRUE;
+                       *quotaflags = mp->m_qflags;
+                       mp->m_qflags = 0;
+               }
+       }
+
+       return 0;
+}
+
+void __init
+xfs_qm_init(void)
+{
+       printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
+       mutex_init(&xfs_Gqm_lock);
+       xfs_qm_init_procfs();
+}
+
+void __exit
+xfs_qm_exit(void)
+{
+       xfs_qm_cleanup_procfs();
+       if (qm_dqzone)
+               kmem_zone_destroy(qm_dqzone);
+       if (qm_dqtrxzone)
+               kmem_zone_destroy(qm_dqtrxzone);
+}
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
new file mode 100644 (file)
index 0000000..8671a0b
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_qm.h"
+
+struct xqmstats xqmstats;
+
+static int xqm_proc_show(struct seq_file *m, void *v)
+{
+       /* maximum; incore; ratio free to inuse; freelist */
+       seq_printf(m, "%d\t%d\t%d\t%u\n",
+                       ndquot,
+                       xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
+                       xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+                       xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
+       return 0;
+}
+
+static int xqm_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xqm_proc_show, NULL);
+}
+
+static const struct file_operations xqm_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xqm_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static int xqmstat_proc_show(struct seq_file *m, void *v)
+{
+       /* quota performance statistics */
+       seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
+                       xqmstats.xs_qm_dqreclaims,
+                       xqmstats.xs_qm_dqreclaim_misses,
+                       xqmstats.xs_qm_dquot_dups,
+                       xqmstats.xs_qm_dqcachemisses,
+                       xqmstats.xs_qm_dqcachehits,
+                       xqmstats.xs_qm_dqwants,
+                       xqmstats.xs_qm_dqshake_reclaims,
+                       xqmstats.xs_qm_dqinact_reclaims);
+       return 0;
+}
+
+static int xqmstat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xqmstat_proc_show, NULL);
+}
+
+static const struct file_operations xqmstat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xqmstat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+void
+xfs_qm_init_procfs(void)
+{
+       proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
+       proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
+}
+
+void
+xfs_qm_cleanup_procfs(void)
+{
+       remove_proc_entry("fs/xfs/xqm", NULL);
+       remove_proc_entry("fs/xfs/xqmstat", NULL);
+}
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h
new file mode 100644 (file)
index 0000000..5b964fc
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QM_STATS_H__
+#define __XFS_QM_STATS_H__
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+/*
+ * XQM global statistics
+ */
+struct xqmstats {
+       __uint32_t              xs_qm_dqreclaims;
+       __uint32_t              xs_qm_dqreclaim_misses;
+       __uint32_t              xs_qm_dquot_dups;
+       __uint32_t              xs_qm_dqcachemisses;
+       __uint32_t              xs_qm_dqcachehits;
+       __uint32_t              xs_qm_dqwants;
+       __uint32_t              xs_qm_dqshake_reclaims;
+       __uint32_t              xs_qm_dqinact_reclaims;
+};
+
+extern struct xqmstats xqmstats;
+
+# define XQM_STATS_INC(count)  ( (count)++ )
+
+extern void xfs_qm_init_procfs(void);
+extern void xfs_qm_cleanup_procfs(void);
+
+#else
+
+# define XQM_STATS_INC(count)  do { } while (0)
+
+static inline void xfs_qm_init_procfs(void) { };
+static inline void xfs_qm_cleanup_procfs(void) { };
+
+#endif
+
+#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
new file mode 100644 (file)
index 0000000..609246f
--- /dev/null
@@ -0,0 +1,906 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/capability.h>
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+STATIC int     xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
+STATIC int     xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+                                       uint);
+STATIC uint    xfs_qm_export_flags(uint);
+STATIC uint    xfs_qm_export_qtype_flags(uint);
+STATIC void    xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
+                                       fs_disk_quota_t *);
+
+
+/*
+ * Turn off quota accounting and/or enforcement for all udquots and/or
+ * gdquots. Called only at unmount time.
+ *
+ * This assumes that there are no dquots of this file system cached
+ * incore, and modifies the ondisk dquot directly. Therefore, for example,
+ * it is an error to call this twice, without purging the cache.
+ */
+int
+xfs_qm_scall_quotaoff(
+       xfs_mount_t             *mp,
+       uint                    flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       uint                    dqtype;
+       int                     error;
+       uint                    inactivate_flags;
+       xfs_qoff_logitem_t      *qoffstart;
+       int                     nculprits;
+
+       /*
+        * No file system can have quotas enabled on disk but not in core.
+        * Note that quota utilities (like quotaoff) _expect_
+        * errno == EEXIST here.
+        */
+       if ((mp->m_qflags & flags) == 0)
+               return XFS_ERROR(EEXIST);
+       error = 0;
+
+       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+
+       /*
+        * We don't want to deal with two quotaoffs messing up each other,
+        * so we're going to serialize it. quotaoff isn't exactly a performance
+        * critical thing.
+        * If quotaoff, then we must be dealing with the root filesystem.
+        */
+       ASSERT(q);
+       mutex_lock(&q->qi_quotaofflock);
+
+       /*
+        * If we're just turning off quota enforcement, change mp and go.
+        */
+       if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
+               mp->m_qflags &= ~(flags);
+
+               spin_lock(&mp->m_sb_lock);
+               mp->m_sb.sb_qflags = mp->m_qflags;
+               spin_unlock(&mp->m_sb_lock);
+               mutex_unlock(&q->qi_quotaofflock);
+
+               /* XXX what to do if error ? Revert back to old vals incore ? */
+               error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
+               return (error);
+       }
+
+       dqtype = 0;
+       inactivate_flags = 0;
+       /*
+        * If accounting is off, we must turn enforcement off, clear the
+        * quota 'CHKD' certificate to make it known that we have to
+        * do a quotacheck the next time this quota is turned on.
+        */
+       if (flags & XFS_UQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_UQUOTA;
+               flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
+               inactivate_flags |= XFS_UQUOTA_ACTIVE;
+       }
+       if (flags & XFS_GQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_GQUOTA;
+               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+               inactivate_flags |= XFS_GQUOTA_ACTIVE;
+       } else if (flags & XFS_PQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_PQUOTA;
+               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+               inactivate_flags |= XFS_PQUOTA_ACTIVE;
+       }
+
+       /*
+        * Nothing to do?  Don't complain. This happens when we're just
+        * turning off quota enforcement.
+        */
+       if ((mp->m_qflags & flags) == 0)
+               goto out_unlock;
+
+       /*
+        * Write the LI_QUOTAOFF log record, and do SB changes atomically,
+        * and synchronously. If we fail to write, we should abort the
+        * operation as it cannot be recovered safely if we crash.
+        */
+       error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
+       if (error)
+               goto out_unlock;
+
+       /*
+        * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
+        * to take care of the race between dqget and quotaoff. We don't take
+        * any special locks to reset these bits. All processes need to check
+        * these bits *after* taking inode lock(s) to see if the particular
+        * quota type is in the process of being turned off. If *ACTIVE, it is
+        * guaranteed that all dquot structures and all quotainode ptrs will all
+        * stay valid as long as that inode is kept locked.
+        *
+        * There is no turning back after this.
+        */
+       mp->m_qflags &= ~inactivate_flags;
+
+       /*
+        * Give back all the dquot reference(s) held by inodes.
+        * Here we go thru every single incore inode in this file system, and
+        * do a dqrele on the i_udquot/i_gdquot that it may have.
+        * Essentially, as long as somebody has an inode locked, this guarantees
+        * that quotas will not be turned off. This is handy because in a
+        * transaction once we lock the inode(s) and check for quotaon, we can
+        * depend on the quota inodes (and other things) being valid as long as
+        * we keep the lock(s).
+        */
+       xfs_qm_dqrele_all_inodes(mp, flags);
+
+       /*
+        * Next we make the changes in the quota flag in the mount struct.
+        * This isn't protected by a particular lock directly, because we
+        * don't want to take a mrlock every time we depend on quotas being on.
+        */
+       mp->m_qflags &= ~(flags);
+
+       /*
+        * Go through all the dquots of this file system and purge them,
+        * according to what was turned off. We may not be able to get rid
+        * of all dquots, because dquots can have temporary references that
+        * are not attached to inodes. eg. xfs_setattr, xfs_create.
+        * So, if we couldn't purge all the dquots from the filesystem,
+        * we can't get rid of the incore data structures.
+        */
+       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
+               delay(10 * nculprits);
+
+       /*
+        * Transactions that had started before ACTIVE state bit was cleared
+        * could have logged many dquots, so they'd have higher LSNs than
+        * the first QUOTAOFF log record does. If we happen to crash when
+        * the tail of the log has gone past the QUOTAOFF record, but
+        * before the last dquot modification, those dquots __will__
+        * recover, and that's not good.
+        *
+        * So, we have QUOTAOFF start and end logitems; the start
+        * logitem won't get overwritten until the end logitem appears...
+        */
+       error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
+       if (error) {
+               /* We're screwed now. Shutdown is the only option. */
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               goto out_unlock;
+       }
+
+       /*
+        * If quotas is completely disabled, close shop.
+        */
+       if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
+           ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
+               mutex_unlock(&q->qi_quotaofflock);
+               xfs_qm_destroy_quotainfo(mp);
+               return (0);
+       }
+
+       /*
+        * Release our quotainode references if we don't need them anymore.
+        */
+       if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
+               IRELE(q->qi_uquotaip);
+               q->qi_uquotaip = NULL;
+       }
+       if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
+               IRELE(q->qi_gquotaip);
+               q->qi_gquotaip = NULL;
+       }
+
+out_unlock:
+       mutex_unlock(&q->qi_quotaofflock);
+       return error;
+}
+
+STATIC int
+xfs_qm_scall_trunc_qfile(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino)
+{
+       struct xfs_inode        *ip;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       if (ino == NULLFSINO)
+               return 0;
+
+       error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
+       if (error)
+               return error;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
+       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                 XFS_TRANS_PERM_LOG_RES,
+                                 XFS_ITRUNCATE_LOG_COUNT);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+               goto out_put;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip);
+
+       error = xfs_itruncate_data(&tp, ip, 0);
+       if (error) {
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+                                    XFS_TRANS_ABORT);
+               goto out_unlock;
+       }
+
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+out_put:
+       IRELE(ip);
+       return error;
+}
+
+int
+xfs_qm_scall_trunc_qfiles(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             error = 0, error2 = 0;
+
+       if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
+               xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
+                       __func__, flags, mp->m_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+
+       if (flags & XFS_DQ_USER)
+               error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
+       if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
+               error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
+
+       return error ? error : error2;
+}
+
+/*
+ * Switch on (a given) quota enforcement for a filesystem.  This takes
+ * effect immediately.
+ * (Switching on quota accounting must be done at mount time.)
+ */
+int
+xfs_qm_scall_quotaon(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             error;
+       uint            qf;
+       __int64_t       sbflags;
+
+       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+       /*
+        * Switching on quota accounting must be done at mount time.
+        */
+       flags &= ~(XFS_ALL_QUOTA_ACCT);
+
+       sbflags = 0;
+
+       if (flags == 0) {
+               xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
+                       __func__, mp->m_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+
+       /* No fs can turn on quotas with a delayed effect */
+       ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
+
+       /*
+        * Can't enforce without accounting. We check the superblock
+        * qflags here instead of m_qflags because rootfs can have
+        * quota acct on ondisk without m_qflags' knowing.
+        */
+       if (((flags & XFS_UQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
+           (flags & XFS_UQUOTA_ENFD))
+           ||
+           ((flags & XFS_PQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
+           (flags & XFS_GQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
+           (flags & XFS_OQUOTA_ENFD))) {
+               xfs_debug(mp,
+                       "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
+                       __func__, flags, mp->m_sb.sb_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+       /*
+        * If everything's up to-date incore, then don't waste time.
+        */
+       if ((mp->m_qflags & flags) == flags)
+               return XFS_ERROR(EEXIST);
+
+       /*
+        * Change sb_qflags on disk but not incore mp->qflags
+        * if this is the root filesystem.
+        */
+       spin_lock(&mp->m_sb_lock);
+       qf = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = qf | flags;
+       spin_unlock(&mp->m_sb_lock);
+
+       /*
+        * There's nothing to change if it's the same.
+        */
+       if ((qf & flags) == flags && sbflags == 0)
+               return XFS_ERROR(EEXIST);
+       sbflags |= XFS_SB_QFLAGS;
+
+       if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
+               return (error);
+       /*
+        * If we aren't trying to switch on quota enforcement, we are done.
+        */
+       if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
+            ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
+            ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
+           (flags & XFS_ALL_QUOTA_ENFD) == 0)
+               return (0);
+
+       if (! XFS_IS_QUOTA_RUNNING(mp))
+               return XFS_ERROR(ESRCH);
+
+       /*
+        * Switch on quota enforcement in core.
+        */
+       mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
+       mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
+       mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
+
+       return (0);
+}
+
+
+/*
+ * Return quota status information, such as uquota-off, enforcements, etc.
+ */
+int
+xfs_qm_scall_getqstat(
+       struct xfs_mount        *mp,
+       struct fs_quota_stat    *out)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_inode        *uip, *gip;
+       boolean_t               tempuqip, tempgqip;
+
+       uip = gip = NULL;
+       tempuqip = tempgqip = B_FALSE;
+       memset(out, 0, sizeof(fs_quota_stat_t));
+
+       out->qs_version = FS_QSTAT_VERSION;
+       if (!xfs_sb_version_hasquota(&mp->m_sb)) {
+               out->qs_uquota.qfs_ino = NULLFSINO;
+               out->qs_gquota.qfs_ino = NULLFSINO;
+               return (0);
+       }
+       out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
+                                                       (XFS_ALL_QUOTA_ACCT|
+                                                        XFS_ALL_QUOTA_ENFD));
+       out->qs_pad = 0;
+       out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
+       out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+
+       if (q) {
+               uip = q->qi_uquotaip;
+               gip = q->qi_gquotaip;
+       }
+       if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
+               if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+                                       0, 0, &uip) == 0)
+                       tempuqip = B_TRUE;
+       }
+       if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
+               if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+                                       0, 0, &gip) == 0)
+                       tempgqip = B_TRUE;
+       }
+       if (uip) {
+               out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
+               out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
+               if (tempuqip)
+                       IRELE(uip);
+       }
+       if (gip) {
+               out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
+               out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
+               if (tempgqip)
+                       IRELE(gip);
+       }
+       if (q) {
+               out->qs_incoredqs = q->qi_dquots;
+               out->qs_btimelimit = q->qi_btimelimit;
+               out->qs_itimelimit = q->qi_itimelimit;
+               out->qs_rtbtimelimit = q->qi_rtbtimelimit;
+               out->qs_bwarnlimit = q->qi_bwarnlimit;
+               out->qs_iwarnlimit = q->qi_iwarnlimit;
+       }
+       return 0;
+}
+
+#define XFS_DQ_MASK \
+       (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
+
+/*
+ * Adjust quota limits, and start/stop timers accordingly.
+ */
+int
+xfs_qm_scall_setqlim(
+       xfs_mount_t             *mp,
+       xfs_dqid_t              id,
+       uint                    type,
+       fs_disk_quota_t         *newlim)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       xfs_disk_dquot_t        *ddq;
+       xfs_dquot_t             *dqp;
+       xfs_trans_t             *tp;
+       int                     error;
+       xfs_qcnt_t              hard, soft;
+
+       if (newlim->d_fieldmask & ~XFS_DQ_MASK)
+               return EINVAL;
+       if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
+               return 0;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
+                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return (error);
+       }
+
+       /*
+        * We don't want to race with a quotaoff so take the quotaoff lock.
+        * (We don't hold an inode lock, so there's nothing else to stop
+        * a quotaoff from happening). (XXXThis doesn't currently happen
+        * because we take the vfslock before calling xfs_qm_sysent).
+        */
+       mutex_lock(&q->qi_quotaofflock);
+
+       /*
+        * Get the dquot (locked), and join it to the transaction.
+        * Allocate the dquot if this doesn't exist.
+        */
+       if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
+               xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+               ASSERT(error != ENOENT);
+               goto out_unlock;
+       }
+       xfs_trans_dqjoin(tp, dqp);
+       ddq = &dqp->q_core;
+
+       /*
+        * Make sure that hardlimits are >= soft limits before changing.
+        */
+       hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
+                       be64_to_cpu(ddq->d_blk_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
+                       be64_to_cpu(ddq->d_blk_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_blk_hardlimit = cpu_to_be64(hard);
+               ddq->d_blk_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_bhardlimit = hard;
+                       q->qi_bsoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
+       }
+       hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
+                       be64_to_cpu(ddq->d_rtb_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
+                       be64_to_cpu(ddq->d_rtb_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_rtb_hardlimit = cpu_to_be64(hard);
+               ddq->d_rtb_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_rtbhardlimit = hard;
+                       q->qi_rtbsoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
+       }
+
+       hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
+               (xfs_qcnt_t) newlim->d_ino_hardlimit :
+                       be64_to_cpu(ddq->d_ino_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
+               (xfs_qcnt_t) newlim->d_ino_softlimit :
+                       be64_to_cpu(ddq->d_ino_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_ino_hardlimit = cpu_to_be64(hard);
+               ddq->d_ino_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_ihardlimit = hard;
+                       q->qi_isoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
+       }
+
+       /*
+        * Update warnings counter(s) if requested
+        */
+       if (newlim->d_fieldmask & FS_DQ_BWARNS)
+               ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
+       if (newlim->d_fieldmask & FS_DQ_IWARNS)
+               ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
+       if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+               ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
+
+       if (id == 0) {
+               /*
+                * Timelimits for the super user set the relative time
+                * the other users can be over quota for this file system.
+                * If it is zero a default is used.  Ditto for the default
+                * soft and hard limit values (already done, above), and
+                * for warnings.
+                */
+               if (newlim->d_fieldmask & FS_DQ_BTIMER) {
+                       q->qi_btimelimit = newlim->d_btimer;
+                       ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_ITIMER) {
+                       q->qi_itimelimit = newlim->d_itimer;
+                       ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
+                       q->qi_rtbtimelimit = newlim->d_rtbtimer;
+                       ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_BWARNS)
+                       q->qi_bwarnlimit = newlim->d_bwarns;
+               if (newlim->d_fieldmask & FS_DQ_IWARNS)
+                       q->qi_iwarnlimit = newlim->d_iwarns;
+               if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+                       q->qi_rtbwarnlimit = newlim->d_rtbwarns;
+       } else {
+               /*
+                * If the user is now over quota, start the timelimit.
+                * The user will not be 'warned'.
+                * Note that we keep the timers ticking, whether enforcement
+                * is on or off. We don't really want to bother with iterating
+                * over all ondisk dquots and turning the timers on/off.
+                */
+               xfs_qm_adjust_dqtimers(mp, ddq);
+       }
+       dqp->dq_flags |= XFS_DQ_DIRTY;
+       xfs_trans_log_dquot(tp, dqp);
+
+       error = xfs_trans_commit(tp, 0);
+       xfs_qm_dqrele(dqp);
+
+ out_unlock:
+       mutex_unlock(&q->qi_quotaofflock);
+       return error;
+}
+
+int
+xfs_qm_scall_getquota(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,
+       uint            type,
+       fs_disk_quota_t *out)
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+
+       /*
+        * Try to get the dquot. We don't want it allocated on disk, so
+        * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
+        * exist, we'll get ENOENT back.
+        */
+       if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
+               return (error);
+       }
+
+       /*
+        * If everything's NULL, this dquot doesn't quite exist as far as
+        * our utility programs are concerned.
+        */
+       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+               xfs_qm_dqput(dqp);
+               return XFS_ERROR(ENOENT);
+       }
+       /*
+        * Convert the disk dquot to the exportable format
+        */
+       xfs_qm_export_dquot(mp, &dqp->q_core, out);
+       xfs_qm_dqput(dqp);
+       return (error ? XFS_ERROR(EFAULT) : 0);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff_end(
+       xfs_mount_t             *mp,
+       xfs_qoff_logitem_t      *startqoff,
+       uint                    flags)
+{
+       xfs_trans_t             *tp;
+       int                     error;
+       xfs_qoff_logitem_t      *qoffi;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
+
+       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
+                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return (error);
+       }
+
+       qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+                                       flags & XFS_ALL_QUOTA_ACCT);
+       xfs_trans_log_quotaoff_item(tp, qoffi);
+
+       /*
+        * We have to make sure that the transaction is secure on disk before we
+        * return and actually stop quota accounting. So, make it synchronous.
+        * We don't care about quotoff's performance.
+        */
+       xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+       return (error);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff(
+       xfs_mount_t            *mp,
+       xfs_qoff_logitem_t     **qoffstartp,
+       uint                   flags)
+{
+       xfs_trans_t            *tp;
+       int                     error;
+       xfs_qoff_logitem_t     *qoffi=NULL;
+       uint                    oldsbqflag=0;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
+       if ((error = xfs_trans_reserve(tp, 0,
+                                     sizeof(xfs_qoff_logitem_t) * 2 +
+                                     mp->m_sb.sb_sectsize + 128,
+                                     0,
+                                     0,
+                                     XFS_DEFAULT_LOG_COUNT))) {
+               goto error0;
+       }
+
+       qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+       xfs_trans_log_quotaoff_item(tp, qoffi);
+
+       spin_lock(&mp->m_sb_lock);
+       oldsbqflag = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+       spin_unlock(&mp->m_sb_lock);
+
+       xfs_mod_sb(tp, XFS_SB_QFLAGS);
+
+       /*
+        * We have to make sure that the transaction is secure on disk before we
+        * return and actually stop quota accounting. So, make it synchronous.
+        * We don't care about quotoff's performance.
+        */
+       xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+
+error0:
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               /*
+                * No one else is modifying sb_qflags, so this is OK.
+                * We still hold the quotaofflock.
+                */
+               spin_lock(&mp->m_sb_lock);
+               mp->m_sb.sb_qflags = oldsbqflag;
+               spin_unlock(&mp->m_sb_lock);
+       }
+       *qoffstartp = qoffi;
+       return (error);
+}
+
+
+/*
+ * Translate an internal style on-disk-dquot to the exportable format.
+ * The main differences are that the counters/limits are all in Basic
+ * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
+ * to be converted to the native endianness.
+ */
+STATIC void
+xfs_qm_export_dquot(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *src,
+       struct fs_disk_quota    *dst)
+{
+       memset(dst, 0, sizeof(*dst));
+       dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
+       dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
+       dst->d_id = be32_to_cpu(src->d_id);
+       dst->d_blk_hardlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
+       dst->d_blk_softlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
+       dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
+       dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
+       dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
+       dst->d_icount = be64_to_cpu(src->d_icount);
+       dst->d_btimer = be32_to_cpu(src->d_btimer);
+       dst->d_itimer = be32_to_cpu(src->d_itimer);
+       dst->d_iwarns = be16_to_cpu(src->d_iwarns);
+       dst->d_bwarns = be16_to_cpu(src->d_bwarns);
+       dst->d_rtb_hardlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
+       dst->d_rtb_softlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
+       dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
+       dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
+       dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
+
+       /*
+        * Internally, we don't reset all the timers when quota enforcement
+        * gets turned off. No need to confuse the user level code,
+        * so return zeroes in that case.
+        */
+       if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
+           (!XFS_IS_OQUOTA_ENFORCED(mp) &&
+                       (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
+               dst->d_btimer = 0;
+               dst->d_itimer = 0;
+               dst->d_rtbtimer = 0;
+       }
+
+#ifdef DEBUG
+       if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
+            (XFS_IS_OQUOTA_ENFORCED(mp) &&
+                       (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
+           dst->d_id != 0) {
+               if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+                   (dst->d_blk_softlimit > 0)) {
+                       ASSERT(dst->d_btimer != 0);
+               }
+               if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+                   (dst->d_ino_softlimit > 0)) {
+                       ASSERT(dst->d_itimer != 0);
+               }
+       }
+#endif
+}
+
+STATIC uint
+xfs_qm_export_qtype_flags(
+       uint flags)
+{
+       /*
+        * Can't be more than one, or none.
+        */
+       ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
+               (FS_PROJ_QUOTA | FS_USER_QUOTA));
+       ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
+               (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
+       ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
+               (FS_USER_QUOTA | FS_GROUP_QUOTA));
+       ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
+
+       return (flags & XFS_DQ_USER) ?
+               FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
+                       FS_PROJ_QUOTA : FS_GROUP_QUOTA;
+}
+
+STATIC uint
+xfs_qm_export_flags(
+       uint flags)
+{
+       uint uflags;
+
+       uflags = 0;
+       if (flags & XFS_UQUOTA_ACCT)
+               uflags |= FS_QUOTA_UDQ_ACCT;
+       if (flags & XFS_PQUOTA_ACCT)
+               uflags |= FS_QUOTA_PDQ_ACCT;
+       if (flags & XFS_GQUOTA_ACCT)
+               uflags |= FS_QUOTA_GDQ_ACCT;
+       if (flags & XFS_UQUOTA_ENFD)
+               uflags |= FS_QUOTA_UDQ_ENFD;
+       if (flags & (XFS_OQUOTA_ENFD)) {
+               uflags |= (flags & XFS_GQUOTA_ACCT) ?
+                       FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
+       }
+       return (uflags);
+}
+
+
+STATIC int
+xfs_dqrele_inode(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       /* skip quota inodes */
+       if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
+           ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
+               ASSERT(ip->i_udquot == NULL);
+               ASSERT(ip->i_gdquot == NULL);
+               return 0;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+               xfs_qm_dqrele(ip->i_udquot);
+               ip->i_udquot = NULL;
+       }
+       if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+               xfs_qm_dqrele(ip->i_gdquot);
+               ip->i_gdquot = NULL;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return 0;
+}
+
+
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ *
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+       struct xfs_mount *mp,
+       uint             flags)
+{
+       ASSERT(mp->m_quotainfo);
+       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
+}
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
new file mode 100644 (file)
index 0000000..94a3d92
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QUOTA_PRIV_H__
+#define __XFS_QUOTA_PRIV_H__
+
+/*
+ * Number of bmaps that we ask from bmapi when doing a quotacheck.
+ * We make this restriction to keep the memory usage to a minimum.
+ */
+#define XFS_DQITER_MAP_SIZE    10
+
+/*
+ * Hash into a bucket in the dquot hash table, based on <mp, id>.
+ */
+#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+                                (__psunsigned_t)(id)) & \
+                               (xfs_Gqm->qm_dqhashmask - 1))
+#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
+                                    (xfs_Gqm->qm_usr_dqhtable + \
+                                     XFS_DQ_HASHVAL(mp, id)) : \
+                                    (xfs_Gqm->qm_grp_dqhtable + \
+                                     XFS_DQ_HASHVAL(mp, id)))
+#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
+       !dqp->q_core.d_blk_hardlimit && \
+       !dqp->q_core.d_blk_softlimit && \
+       !dqp->q_core.d_rtb_hardlimit && \
+       !dqp->q_core.d_rtb_softlimit && \
+       !dqp->q_core.d_ino_hardlimit && \
+       !dqp->q_core.d_ino_softlimit && \
+       !dqp->q_core.d_bcount && \
+       !dqp->q_core.d_rtbcount && \
+       !dqp->q_core.d_icount)
+
+#define DQFLAGTO_TYPESTR(d)    (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
+                                (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
+                                (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
+
+#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
new file mode 100644 (file)
index 0000000..7e76f53
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_qm.h"
+#include <linux/quota.h>
+
+
+STATIC int
+xfs_quota_type(int type)
+{
+       switch (type) {
+       case USRQUOTA:
+               return XFS_DQ_USER;
+       case GRPQUOTA:
+               return XFS_DQ_GROUP;
+       default:
+               return XFS_DQ_PROJ;
+       }
+}
+
+STATIC int
+xfs_fs_get_xstate(
+       struct super_block      *sb,
+       struct fs_quota_stat    *fqs)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       return -xfs_qm_scall_getqstat(mp, fqs);
+}
+
+STATIC int
+xfs_fs_set_xstate(
+       struct super_block      *sb,
+       unsigned int            uflags,
+       int                     op)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       unsigned int            flags = 0;
+
+       if (sb->s_flags & MS_RDONLY)
+               return -EROFS;
+       if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+
+       if (uflags & FS_QUOTA_UDQ_ACCT)
+               flags |= XFS_UQUOTA_ACCT;
+       if (uflags & FS_QUOTA_PDQ_ACCT)
+               flags |= XFS_PQUOTA_ACCT;
+       if (uflags & FS_QUOTA_GDQ_ACCT)
+               flags |= XFS_GQUOTA_ACCT;
+       if (uflags & FS_QUOTA_UDQ_ENFD)
+               flags |= XFS_UQUOTA_ENFD;
+       if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
+               flags |= XFS_OQUOTA_ENFD;
+
+       switch (op) {
+       case Q_XQUOTAON:
+               return -xfs_qm_scall_quotaon(mp, flags);
+       case Q_XQUOTAOFF:
+               if (!XFS_IS_QUOTA_ON(mp))
+                       return -EINVAL;
+               return -xfs_qm_scall_quotaoff(mp, flags);
+       case Q_XQUOTARM:
+               if (XFS_IS_QUOTA_ON(mp))
+                       return -EINVAL;
+               return -xfs_qm_scall_trunc_qfiles(mp, flags);
+       }
+
+       return -EINVAL;
+}
+
+STATIC int
+xfs_fs_get_dqblk(
+       struct super_block      *sb,
+       int                     type,
+       qid_t                   id,
+       struct fs_disk_quota    *fdq)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       if (!XFS_IS_QUOTA_ON(mp))
+               return -ESRCH;
+
+       return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
+}
+
+STATIC int
+xfs_fs_set_dqblk(
+       struct super_block      *sb,
+       int                     type,
+       qid_t                   id,
+       struct fs_disk_quota    *fdq)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (sb->s_flags & MS_RDONLY)
+               return -EROFS;
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       if (!XFS_IS_QUOTA_ON(mp))
+               return -ESRCH;
+
+       return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
+}
+
+const struct quotactl_ops xfs_quotactl_operations = {
+       .get_xstate             = xfs_fs_get_xstate,
+       .set_xstate             = xfs_fs_set_xstate,
+       .get_dqblk              = xfs_fs_get_dqblk,
+       .set_dqblk              = xfs_fs_set_dqblk,
+};
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
new file mode 100644 (file)
index 0000000..76fdc58
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/proc_fs.h>
+
+DEFINE_PER_CPU(struct xfsstats, xfsstats);
+
+static int xfs_stat_proc_show(struct seq_file *m, void *v)
+{
+       int             c, i, j, val;
+       __uint64_t      xs_xstrat_bytes = 0;
+       __uint64_t      xs_write_bytes = 0;
+       __uint64_t      xs_read_bytes = 0;
+
+       static const struct xstats_entry {
+               char    *desc;
+               int     endpoint;
+       } xstats[] = {
+               { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
+               { "abt",                XFSSTAT_END_ALLOC_BTREE         },
+               { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
+               { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
+               { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
+               { "trans",              XFSSTAT_END_TRANSACTIONS        },
+               { "ig",                 XFSSTAT_END_INODE_OPS           },
+               { "log",                XFSSTAT_END_LOG_OPS             },
+               { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
+               { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
+               { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
+               { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
+               { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
+               { "vnodes",             XFSSTAT_END_VNODE_OPS           },
+               { "buf",                XFSSTAT_END_BUF                 },
+               { "abtb2",              XFSSTAT_END_ABTB_V2             },
+               { "abtc2",              XFSSTAT_END_ABTC_V2             },
+               { "bmbt2",              XFSSTAT_END_BMBT_V2             },
+               { "ibt2",               XFSSTAT_END_IBT_V2              },
+       };
+
+       /* Loop over all stats groups */
+       for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
+               seq_printf(m, "%s", xstats[i].desc);
+               /* inner loop does each group */
+               while (j < xstats[i].endpoint) {
+                       val = 0;
+                       /* sum over all cpus */
+                       for_each_possible_cpu(c)
+                               val += *(((__u32*)&per_cpu(xfsstats, c) + j));
+                       seq_printf(m, " %u", val);
+                       j++;
+               }
+               seq_putc(m, '\n');
+       }
+       /* extra precision counters */
+       for_each_possible_cpu(i) {
+               xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+               xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+               xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+       }
+
+       seq_printf(m, "xpc %Lu %Lu %Lu\n",
+                       xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+       seq_printf(m, "debug %u\n",
+#if defined(DEBUG)
+               1);
+#else
+               0);
+#endif
+       return 0;
+}
+
+static int xfs_stat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xfs_stat_proc_show, NULL);
+}
+
+static const struct file_operations xfs_stat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xfs_stat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+int
+xfs_init_procfs(void)
+{
+       if (!proc_mkdir("fs/xfs", NULL))
+               goto out;
+
+       if (!proc_create("fs/xfs/stat", 0, NULL,
+                        &xfs_stat_proc_fops))
+               goto out_remove_entry;
+       return 0;
+
+ out_remove_entry:
+       remove_proc_entry("fs/xfs", NULL);
+ out:
+       return -ENOMEM;
+}
+
+void
+xfs_cleanup_procfs(void)
+{
+       remove_proc_entry("fs/xfs/stat", NULL);
+       remove_proc_entry("fs/xfs", NULL);
+}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
new file mode 100644 (file)
index 0000000..736854b
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_STATS_H__
+#define __XFS_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+#include <linux/percpu.h>
+
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC      4
+       __uint32_t              xs_allocx;
+       __uint32_t              xs_allocb;
+       __uint32_t              xs_freex;
+       __uint32_t              xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
+       __uint32_t              xs_abt_lookup;
+       __uint32_t              xs_abt_compare;
+       __uint32_t              xs_abt_insrec;
+       __uint32_t              xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
+       __uint32_t              xs_blk_mapr;
+       __uint32_t              xs_blk_mapw;
+       __uint32_t              xs_blk_unmap;
+       __uint32_t              xs_add_exlist;
+       __uint32_t              xs_del_exlist;
+       __uint32_t              xs_look_exlist;
+       __uint32_t              xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
+       __uint32_t              xs_bmbt_lookup;
+       __uint32_t              xs_bmbt_compare;
+       __uint32_t              xs_bmbt_insrec;
+       __uint32_t              xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
+       __uint32_t              xs_dir_lookup;
+       __uint32_t              xs_dir_create;
+       __uint32_t              xs_dir_remove;
+       __uint32_t              xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
+       __uint32_t              xs_trans_sync;
+       __uint32_t              xs_trans_async;
+       __uint32_t              xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
+       __uint32_t              xs_ig_attempts;
+       __uint32_t              xs_ig_found;
+       __uint32_t              xs_ig_frecycle;
+       __uint32_t              xs_ig_missed;
+       __uint32_t              xs_ig_dup;
+       __uint32_t              xs_ig_reclaims;
+       __uint32_t              xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
+       __uint32_t              xs_log_writes;
+       __uint32_t              xs_log_blocks;
+       __uint32_t              xs_log_noiclogs;
+       __uint32_t              xs_log_force;
+       __uint32_t              xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
+       __uint32_t              xs_try_logspace;
+       __uint32_t              xs_sleep_logspace;
+       __uint32_t              xs_push_ail;
+       __uint32_t              xs_push_ail_success;
+       __uint32_t              xs_push_ail_pushbuf;
+       __uint32_t              xs_push_ail_pinned;
+       __uint32_t              xs_push_ail_locked;
+       __uint32_t              xs_push_ail_flushing;
+       __uint32_t              xs_push_ail_restarts;
+       __uint32_t              xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
+       __uint32_t              xs_xstrat_quick;
+       __uint32_t              xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
+       __uint32_t              xs_write_calls;
+       __uint32_t              xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
+       __uint32_t              xs_attr_get;
+       __uint32_t              xs_attr_set;
+       __uint32_t              xs_attr_remove;
+       __uint32_t              xs_attr_list;
+# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_ATTRIBUTE_OPS+3)
+       __uint32_t              xs_iflush_count;
+       __uint32_t              xs_icluster_flushcnt;
+       __uint32_t              xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
+       __uint32_t              vn_active;      /* # vnodes not on free lists */
+       __uint32_t              vn_alloc;       /* # times vn_alloc called */
+       __uint32_t              vn_get;         /* # times vn_get called */
+       __uint32_t              vn_hold;        /* # times vn_hold called */
+       __uint32_t              vn_rele;        /* # times vn_rele called */
+       __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
+       __uint32_t              vn_remove;      /* # times vn_remove called */
+       __uint32_t              vn_free;        /* # times vn_free called */
+#define XFSSTAT_END_BUF                        (XFSSTAT_END_VNODE_OPS+9)
+       __uint32_t              xb_get;
+       __uint32_t              xb_create;
+       __uint32_t              xb_get_locked;
+       __uint32_t              xb_get_locked_waited;
+       __uint32_t              xb_busy_locked;
+       __uint32_t              xb_miss_locked;
+       __uint32_t              xb_page_retries;
+       __uint32_t              xb_page_found;
+       __uint32_t              xb_get_read;
+/* Version 2 btree counters */
+#define XFSSTAT_END_ABTB_V2            (XFSSTAT_END_BUF+15)
+       __uint32_t              xs_abtb_2_lookup;
+       __uint32_t              xs_abtb_2_compare;
+       __uint32_t              xs_abtb_2_insrec;
+       __uint32_t              xs_abtb_2_delrec;
+       __uint32_t              xs_abtb_2_newroot;
+       __uint32_t              xs_abtb_2_killroot;
+       __uint32_t              xs_abtb_2_increment;
+       __uint32_t              xs_abtb_2_decrement;
+       __uint32_t              xs_abtb_2_lshift;
+       __uint32_t              xs_abtb_2_rshift;
+       __uint32_t              xs_abtb_2_split;
+       __uint32_t              xs_abtb_2_join;
+       __uint32_t              xs_abtb_2_alloc;
+       __uint32_t              xs_abtb_2_free;
+       __uint32_t              xs_abtb_2_moves;
+#define XFSSTAT_END_ABTC_V2            (XFSSTAT_END_ABTB_V2+15)
+       __uint32_t              xs_abtc_2_lookup;
+       __uint32_t              xs_abtc_2_compare;
+       __uint32_t              xs_abtc_2_insrec;
+       __uint32_t              xs_abtc_2_delrec;
+       __uint32_t              xs_abtc_2_newroot;
+       __uint32_t              xs_abtc_2_killroot;
+       __uint32_t              xs_abtc_2_increment;
+       __uint32_t              xs_abtc_2_decrement;
+       __uint32_t              xs_abtc_2_lshift;
+       __uint32_t              xs_abtc_2_rshift;
+       __uint32_t              xs_abtc_2_split;
+       __uint32_t              xs_abtc_2_join;
+       __uint32_t              xs_abtc_2_alloc;
+       __uint32_t              xs_abtc_2_free;
+       __uint32_t              xs_abtc_2_moves;
+#define XFSSTAT_END_BMBT_V2            (XFSSTAT_END_ABTC_V2+15)
+       __uint32_t              xs_bmbt_2_lookup;
+       __uint32_t              xs_bmbt_2_compare;
+       __uint32_t              xs_bmbt_2_insrec;
+       __uint32_t              xs_bmbt_2_delrec;
+       __uint32_t              xs_bmbt_2_newroot;
+       __uint32_t              xs_bmbt_2_killroot;
+       __uint32_t              xs_bmbt_2_increment;
+       __uint32_t              xs_bmbt_2_decrement;
+       __uint32_t              xs_bmbt_2_lshift;
+       __uint32_t              xs_bmbt_2_rshift;
+       __uint32_t              xs_bmbt_2_split;
+       __uint32_t              xs_bmbt_2_join;
+       __uint32_t              xs_bmbt_2_alloc;
+       __uint32_t              xs_bmbt_2_free;
+       __uint32_t              xs_bmbt_2_moves;
+#define XFSSTAT_END_IBT_V2             (XFSSTAT_END_BMBT_V2+15)
+       __uint32_t              xs_ibt_2_lookup;
+       __uint32_t              xs_ibt_2_compare;
+       __uint32_t              xs_ibt_2_insrec;
+       __uint32_t              xs_ibt_2_delrec;
+       __uint32_t              xs_ibt_2_newroot;
+       __uint32_t              xs_ibt_2_killroot;
+       __uint32_t              xs_ibt_2_increment;
+       __uint32_t              xs_ibt_2_decrement;
+       __uint32_t              xs_ibt_2_lshift;
+       __uint32_t              xs_ibt_2_rshift;
+       __uint32_t              xs_ibt_2_split;
+       __uint32_t              xs_ibt_2_join;
+       __uint32_t              xs_ibt_2_alloc;
+       __uint32_t              xs_ibt_2_free;
+       __uint32_t              xs_ibt_2_moves;
+/* Extra precision counters */
+       __uint64_t              xs_xstrat_bytes;
+       __uint64_t              xs_write_bytes;
+       __uint64_t              xs_read_bytes;
+};
+
+DECLARE_PER_CPU(struct xfsstats, xfsstats);
+
+/*
+ * We don't disable preempt, not too worried about poking the
+ * wrong CPU's stat for now (also aggregated before reporting).
+ */
+#define XFS_STATS_INC(v)       (per_cpu(xfsstats, current_cpu()).v++)
+#define XFS_STATS_DEC(v)       (per_cpu(xfsstats, current_cpu()).v--)
+#define XFS_STATS_ADD(v, inc)  (per_cpu(xfsstats, current_cpu()).v += (inc))
+
+extern int xfs_init_procfs(void);
+extern void xfs_cleanup_procfs(void);
+
+
+#else  /* !CONFIG_PROC_FS */
+
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+
+static inline int xfs_init_procfs(void)
+{
+       return 0;
+}
+
+static inline void xfs_cleanup_procfs(void)
+{
+}
+
+#endif /* !CONFIG_PROC_FS */
+
+#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
new file mode 100644 (file)
index 0000000..9a72dda
--- /dev/null
@@ -0,0 +1,1773 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_fsops.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_vnodeops.h"
+#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
+#include "xfs_sync.h"
+#include "xfs_trace.h"
+
+#include <linux/namei.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mount.h>
+#include <linux/mempool.h>
+#include <linux/writeback.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/parser.h>
+
+static const struct super_operations xfs_super_operations;
+static kmem_zone_t *xfs_ioend_zone;
+mempool_t *xfs_ioend_pool;
+
+#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
+#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
+#define MNTOPT_LOGDEV  "logdev"        /* log device */
+#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
+#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
+#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
+#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
+#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
+#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
+#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
+#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
+#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
+#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
+#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
+#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
+#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
+#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
+#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
+#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
+                                        * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
+#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
+#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
+#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
+#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
+#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
+                                        * in stat(). */
+#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
+#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
+#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
+#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
+#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
+#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
+#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
+#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
+#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
+#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
+#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
+#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
+#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
+#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
+#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
+#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
+#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
+
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+       Opt_barrier, Opt_nobarrier, Opt_err
+};
+
+static const match_table_t tokens = {
+       {Opt_barrier, "barrier"},
+       {Opt_nobarrier, "nobarrier"},
+       {Opt_err, NULL}
+};
+
+
+STATIC unsigned long
+suffix_strtoul(char *s, char **endp, unsigned int base)
+{
+       int     last, shift_left_factor = 0;
+       char    *value = s;
+
+       last = strlen(value) - 1;
+       if (value[last] == 'K' || value[last] == 'k') {
+               shift_left_factor = 10;
+               value[last] = '\0';
+       }
+       if (value[last] == 'M' || value[last] == 'm') {
+               shift_left_factor = 20;
+               value[last] = '\0';
+       }
+       if (value[last] == 'G' || value[last] == 'g') {
+               shift_left_factor = 30;
+               value[last] = '\0';
+       }
+
+       return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ *
+ * Note that this function leaks the various device name allocations on
+ * failure.  The caller takes care of them.
+ */
+STATIC int
+xfs_parseargs(
+       struct xfs_mount        *mp,
+       char                    *options)
+{
+       struct super_block      *sb = mp->m_super;
+       char                    *this_char, *value, *eov;
+       int                     dsunit = 0;
+       int                     dswidth = 0;
+       int                     iosize = 0;
+       __uint8_t               iosizelog = 0;
+
+       /*
+        * set up the mount name first so all the errors will refer to the
+        * correct device.
+        */
+       mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
+       if (!mp->m_fsname)
+               return ENOMEM;
+       mp->m_fsname_len = strlen(mp->m_fsname) + 1;
+
+       /*
+        * Copy binary VFS mount flags we are interested in.
+        */
+       if (sb->s_flags & MS_RDONLY)
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+       if (sb->s_flags & MS_DIRSYNC)
+               mp->m_flags |= XFS_MOUNT_DIRSYNC;
+       if (sb->s_flags & MS_SYNCHRONOUS)
+               mp->m_flags |= XFS_MOUNT_WSYNC;
+
+       /*
+        * Set some default flags that could be cleared by the mount option
+        * parsing.
+        */
+       mp->m_flags |= XFS_MOUNT_BARRIER;
+       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+       mp->m_flags |= XFS_MOUNT_DELAYLOG;
+
+       /*
+        * These can be overridden by the mount option parsing.
+        */
+       mp->m_logbufs = -1;
+       mp->m_logbsize = -1;
+
+       if (!options)
+               goto done;
+
+       while ((this_char = strsep(&options, ",")) != NULL) {
+               if (!*this_char)
+                       continue;
+               if ((value = strchr(this_char, '=')) != NULL)
+                       *value++ = 0;
+
+               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logbufs = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logbsize = suffix_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!mp->m_logname)
+                               return ENOMEM;
+               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
+                       xfs_warn(mp, "%s option not allowed on this system",
+                               this_char);
+                       return EINVAL;
+               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!mp->m_rtname)
+                               return ENOMEM;
+               } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       iosize = simple_strtoul(value, &eov, 10);
+                       iosizelog = ffs(iosize) - 1;
+               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       iosize = suffix_strtoul(value, &eov, 10);
+                       iosizelog = ffs(iosize) - 1;
+               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
+                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+                       mp->m_flags |= XFS_MOUNT_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
+                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+                       mp->m_flags &= ~XFS_MOUNT_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+                       mp->m_flags |= XFS_MOUNT_WSYNC;
+               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+                       mp->m_flags |= XFS_MOUNT_NORECOVERY;
+               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+                       mp->m_flags |= XFS_MOUNT_NOALIGN;
+               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+                       mp->m_flags |= XFS_MOUNT_SWALLOC;
+               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       dsunit = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       dswidth = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+#if !XFS_BIG_INUMS
+                       xfs_warn(mp, "%s option not allowed on this system",
+                               this_char);
+                       return EINVAL;
+#endif
+               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+                       mp->m_flags |= XFS_MOUNT_NOUUID;
+               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+                       mp->m_flags |= XFS_MOUNT_BARRIER;
+               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
+               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+                       mp->m_flags |= XFS_MOUNT_IKEEP;
+               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+                       mp->m_flags &= ~XFS_MOUNT_IKEEP;
+               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+                       mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
+               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+                       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+                       mp->m_flags |= XFS_MOUNT_ATTR2;
+               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+                       mp->m_flags &= ~XFS_MOUNT_ATTR2;
+                       mp->m_flags |= XFS_MOUNT_NOATTR2;
+               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+                       mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+                       mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                         XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                         XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                         XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
+                          !strcmp(this_char, MNTOPT_UQUOTA) ||
+                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
+                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                        XFS_UQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
+                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_UQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
+                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                        XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
+                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                        XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
+                       mp->m_flags |= XFS_MOUNT_DELAYLOG;
+               } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
+                       mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
+               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
+                       mp->m_flags |= XFS_MOUNT_DISCARD;
+               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
+                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
+               } else if (!strcmp(this_char, "ihashsize")) {
+                       xfs_warn(mp,
+       "ihashsize no longer used, option is deprecated.");
+               } else if (!strcmp(this_char, "osyncisdsync")) {
+                       xfs_warn(mp,
+       "osyncisdsync has no effect, option is deprecated.");
+               } else if (!strcmp(this_char, "osyncisosync")) {
+                       xfs_warn(mp,
+       "osyncisosync has no effect, option is deprecated.");
+               } else if (!strcmp(this_char, "irixsgid")) {
+                       xfs_warn(mp,
+       "irixsgid is now a sysctl(2) variable, option is deprecated.");
+               } else {
+                       xfs_warn(mp, "unknown mount option [%s].", this_char);
+                       return EINVAL;
+               }
+       }
+
+       /*
+        * no recovery flag requires a read-only mount
+        */
+       if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
+           !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+               xfs_warn(mp, "no-recovery mounts must be read-only.");
+               return EINVAL;
+       }
+
+       if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
+               xfs_warn(mp,
+       "sunit and swidth options incompatible with the noalign option");
+               return EINVAL;
+       }
+
+       if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
+           !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
+               xfs_warn(mp,
+       "the discard option is incompatible with the nodelaylog option");
+               return EINVAL;
+       }
+
+#ifndef CONFIG_XFS_QUOTA
+       if (XFS_IS_QUOTA_RUNNING(mp)) {
+               xfs_warn(mp, "quota support not available in this kernel.");
+               return EINVAL;
+       }
+#endif
+
+       if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+           (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
+               xfs_warn(mp, "cannot mount with both project and group quota");
+               return EINVAL;
+       }
+
+       if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
+               xfs_warn(mp, "sunit and swidth must be specified together");
+               return EINVAL;
+       }
+
+       if (dsunit && (dswidth % dsunit != 0)) {
+               xfs_warn(mp,
+       "stripe width (%d) must be a multiple of the stripe unit (%d)",
+                       dswidth, dsunit);
+               return EINVAL;
+       }
+
+done:
+       if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+               /*
+                * At this point the superblock has not been read
+                * in, therefore we do not know the block size.
+                * Before the mount call ends we will convert
+                * these to FSBs.
+                */
+               if (dsunit) {
+                       mp->m_dalign = dsunit;
+                       mp->m_flags |= XFS_MOUNT_RETERR;
+               }
+
+               if (dswidth)
+                       mp->m_swidth = dswidth;
+       }
+
+       if (mp->m_logbufs != -1 &&
+           mp->m_logbufs != 0 &&
+           (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+            mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+               xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
+                       mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+               return XFS_ERROR(EINVAL);
+       }
+       if (mp->m_logbsize != -1 &&
+           mp->m_logbsize !=  0 &&
+           (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+            mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+            !is_power_of_2(mp->m_logbsize))) {
+               xfs_warn(mp,
+                       "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+                       mp->m_logbsize);
+               return XFS_ERROR(EINVAL);
+       }
+
+       if (iosizelog) {
+               if (iosizelog > XFS_MAX_IO_LOG ||
+                   iosizelog < XFS_MIN_IO_LOG) {
+                       xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
+                               iosizelog, XFS_MIN_IO_LOG,
+                               XFS_MAX_IO_LOG);
+                       return XFS_ERROR(EINVAL);
+               }
+
+               mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+               mp->m_readio_log = iosizelog;
+               mp->m_writeio_log = iosizelog;
+       }
+
+       return 0;
+}
+
+struct proc_xfs_info {
+       int     flag;
+       char    *str;
+};
+
+STATIC int
+xfs_showargs(
+       struct xfs_mount        *mp,
+       struct seq_file         *m)
+{
+       static struct proc_xfs_info xfs_info_set[] = {
+               /* the few simple ones we can get from the mount struct */
+               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
+               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
+               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
+               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
+               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
+               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
+               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
+               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
+               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
+               { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
+               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
+               { 0, NULL }
+       };
+       static struct proc_xfs_info xfs_info_unset[] = {
+               /* the few simple ones we can get from the mount struct */
+               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
+               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
+               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
+               { 0, NULL }
+       };
+       struct proc_xfs_info    *xfs_infop;
+
+       for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
+               if (mp->m_flags & xfs_infop->flag)
+                       seq_puts(m, xfs_infop->str);
+       }
+       for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
+               if (!(mp->m_flags & xfs_infop->flag))
+                       seq_puts(m, xfs_infop->str);
+       }
+
+       if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+                               (int)(1 << mp->m_writeio_log) >> 10);
+
+       if (mp->m_logbufs > 0)
+               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+       if (mp->m_logbsize > 0)
+               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+
+       if (mp->m_logname)
+               seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
+       if (mp->m_rtname)
+               seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
+
+       if (mp->m_dalign > 0)
+               seq_printf(m, "," MNTOPT_SUNIT "=%d",
+                               (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
+       if (mp->m_swidth > 0)
+               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+                               (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
+
+       if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
+               seq_puts(m, "," MNTOPT_USRQUOTA);
+       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+               seq_puts(m, "," MNTOPT_UQUOTANOENF);
+
+       /* Either project or group quotas can be active, not both */
+
+       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_PRJQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
+       } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_GRPQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
+       }
+
+       if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
+               seq_puts(m, "," MNTOPT_NOQUOTA);
+
+       return 0;
+}
+__uint64_t
+xfs_max_file_offset(
+       unsigned int            blockshift)
+{
+       unsigned int            pagefactor = 1;
+       unsigned int            bitshift = BITS_PER_LONG - 1;
+
+       /* Figure out maximum filesize, on Linux this can depend on
+        * the filesystem blocksize (on 32 bit platforms).
+        * __block_write_begin does this in an [unsigned] long...
+        *      page->index << (PAGE_CACHE_SHIFT - bbits)
+        * So, for page sized blocks (4K on 32 bit platforms),
+        * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+        *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+        * but for smaller blocksizes it is less (bbits = log2 bsize).
+        * Note1: get_block_t takes a long (implicit cast from above)
+        * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
+        * can optionally convert the [unsigned] long from above into
+        * an [unsigned] long long.
+        */
+
+#if BITS_PER_LONG == 32
+# if defined(CONFIG_LBDAF)
+       ASSERT(sizeof(sector_t) == 8);
+       pagefactor = PAGE_CACHE_SIZE;
+       bitshift = BITS_PER_LONG;
+# else
+       pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+# endif
+#endif
+
+       return (((__uint64_t)pagefactor) << bitshift) - 1;
+}
+
+STATIC int
+xfs_blkdev_get(
+       xfs_mount_t             *mp,
+       const char              *name,
+       struct block_device     **bdevp)
+{
+       int                     error = 0;
+
+       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                   mp);
+       if (IS_ERR(*bdevp)) {
+               error = PTR_ERR(*bdevp);
+               xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
+       }
+
+       return -error;
+}
+
+STATIC void
+xfs_blkdev_put(
+       struct block_device     *bdev)
+{
+       if (bdev)
+               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+}
+
+void
+xfs_blkdev_issue_flush(
+       xfs_buftarg_t           *buftarg)
+{
+       blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
+}
+
+STATIC void
+xfs_close_devices(
+       struct xfs_mount        *mp)
+{
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+               xfs_free_buftarg(mp, mp->m_logdev_targp);
+               xfs_blkdev_put(logdev);
+       }
+       if (mp->m_rtdev_targp) {
+               struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
+               xfs_blkdev_put(rtdev);
+       }
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
+}
+
+/*
+ * The file system configurations are:
+ *     (1) device (partition) with data and internal log
+ *     (2) logical volume with data and log subvolumes.
+ *     (3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present.  The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+       struct xfs_mount        *mp)
+{
+       struct block_device     *ddev = mp->m_super->s_bdev;
+       struct block_device     *logdev = NULL, *rtdev = NULL;
+       int                     error;
+
+       /*
+        * Open real time and log devices - order is important.
+        */
+       if (mp->m_logname) {
+               error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
+               if (error)
+                       goto out;
+       }
+
+       if (mp->m_rtname) {
+               error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
+               if (error)
+                       goto out_close_logdev;
+
+               if (rtdev == ddev || rtdev == logdev) {
+                       xfs_warn(mp,
+       "Cannot mount filesystem with identical rtdev and ddev/logdev.");
+                       error = EINVAL;
+                       goto out_close_rtdev;
+               }
+       }
+
+       /*
+        * Setup xfs_mount buffer target pointers
+        */
+       error = ENOMEM;
+       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
+       if (!mp->m_ddev_targp)
+               goto out_close_rtdev;
+
+       if (rtdev) {
+               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+                                                       mp->m_fsname);
+               if (!mp->m_rtdev_targp)
+                       goto out_free_ddev_targ;
+       }
+
+       if (logdev && logdev != ddev) {
+               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+                                                       mp->m_fsname);
+               if (!mp->m_logdev_targp)
+                       goto out_free_rtdev_targ;
+       } else {
+               mp->m_logdev_targp = mp->m_ddev_targp;
+       }
+
+       return 0;
+
+ out_free_rtdev_targ:
+       if (mp->m_rtdev_targp)
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
+ out_free_ddev_targ:
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
+ out_close_rtdev:
+       if (rtdev)
+               xfs_blkdev_put(rtdev);
+ out_close_logdev:
+       if (logdev && logdev != ddev)
+               xfs_blkdev_put(logdev);
+ out:
+       return error;
+}
+
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+       struct xfs_mount        *mp)
+{
+       int                     error;
+
+       error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+                                   mp->m_sb.sb_sectsize);
+       if (error)
+               return error;
+
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               unsigned int    log_sector_size = BBSIZE;
+
+               if (xfs_sb_version_hassector(&mp->m_sb))
+                       log_sector_size = mp->m_sb.sb_logsectsize;
+               error = xfs_setsize_buftarg(mp->m_logdev_targp,
+                                           mp->m_sb.sb_blocksize,
+                                           log_sector_size);
+               if (error)
+                       return error;
+       }
+       if (mp->m_rtdev_targp) {
+               error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+                                           mp->m_sb.sb_blocksize,
+                                           mp->m_sb.sb_sectsize);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+/* Catch misguided souls that try to use this interface on XFS */
+STATIC struct inode *
+xfs_fs_alloc_inode(
+       struct super_block      *sb)
+{
+       BUG();
+       return NULL;
+}
+
+/*
+ * Now that the generic code is guaranteed not to be accessing
+ * the linux inode, we can reclaim the inode.
+ */
+STATIC void
+xfs_fs_destroy_inode(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       trace_xfs_destroy_inode(ip);
+
+       XFS_STATS_INC(vn_reclaim);
+
+       /* bad inode, get out here ASAP */
+       if (is_bad_inode(inode))
+               goto out_reclaim;
+
+       xfs_ioend_wait(ip);
+
+       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+
+       /*
+        * We should never get here with one of the reclaim flags already set.
+        */
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+
+       /*
+        * We always use background reclaim here because even if the
+        * inode is clean, it still may be under IO and hence we have
+        * to take the flush lock. The background reclaim path handles
+        * this more efficiently than we can here, so simply let background
+        * reclaim tear down all inodes.
+        */
+out_reclaim:
+       xfs_inode_set_reclaim_tag(ip);
+}
+
+/*
+ * Slab object creation initialisation for the XFS inode.
+ * This covers only the idempotent fields in the XFS inode;
+ * all other fields need to be initialised on allocation
+ * from the slab. This avoids the need to repeatedly initialise
+ * fields in the xfs inode that left in the initialise state
+ * when freeing the inode.
+ */
+STATIC void
+xfs_fs_inode_init_once(
+       void                    *inode)
+{
+       struct xfs_inode        *ip = inode;
+
+       memset(ip, 0, sizeof(struct xfs_inode));
+
+       /* vfs inode */
+       inode_init_once(VFS_I(ip));
+
+       /* xfs inode */
+       atomic_set(&ip->i_iocount, 0);
+       atomic_set(&ip->i_pincount, 0);
+       spin_lock_init(&ip->i_flags_lock);
+       init_waitqueue_head(&ip->i_ipin_wait);
+       /*
+        * Because we want to use a counting completion, complete
+        * the flush completion once to allow a single access to
+        * the flush completion without blocking.
+        */
+       init_completion(&ip->i_flush);
+       complete(&ip->i_flush);
+
+       mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                    "xfsino", ip->i_ino);
+}
+
+/*
+ * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
+ * we catch unlogged VFS level updates to the inode.
+ *
+ * We need the barrier() to maintain correct ordering between unlogged
+ * updates and the transaction commit code that clears the i_update_core
+ * field. This requires all updates to be completed before marking the
+ * inode dirty.
+ */
+STATIC void
+xfs_fs_dirty_inode(
+       struct inode    *inode,
+       int             flags)
+{
+       barrier();
+       XFS_I(inode)->i_update_core = 1;
+}
+
+STATIC int
+xfs_log_inode(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               /* we need to return with the lock hold shared */
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+               return error;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Note - it's possible that we might have pushed ourselves out of the
+        * way during trans_reserve which would flush the inode.  But there's
+        * no guarantee that the inode buffer has actually gone out yet (it's
+        * delwri).  Plus the buffer could be pinned anyway if it's part of
+        * an inode in another recent transaction.  So we play it safe and
+        * fire off the transaction anyway.
+        */
+       xfs_trans_ijoin(tp, ip);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       error = xfs_trans_commit(tp, 0);
+       xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+
+       return error;
+}
+
+STATIC int
+xfs_fs_write_inode(
+       struct inode            *inode,
+       struct writeback_control *wbc)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     error = EAGAIN;
+
+       trace_xfs_write_inode(ip);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       if (wbc->sync_mode == WB_SYNC_ALL) {
+               /*
+                * Make sure the inode has made it it into the log.  Instead
+                * of forcing it all the way to stable storage using a
+                * synchronous transaction we let the log force inside the
+                * ->sync_fs call do that for thus, which reduces the number
+                * of synchronous log foces dramatically.
+                */
+               xfs_ioend_wait(ip);
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+               if (ip->i_update_core) {
+                       error = xfs_log_inode(ip);
+                       if (error)
+                               goto out_unlock;
+               }
+       } else {
+               /*
+                * We make this non-blocking if the inode is contended, return
+                * EAGAIN to indicate to the caller that they did not succeed.
+                * This prevents the flush path from blocking on inodes inside
+                * another operation right now, they get caught later by
+                * xfs_sync.
+                */
+               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
+                       goto out;
+
+               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+                       goto out_unlock;
+
+               /*
+                * Now we have the flush lock and the inode is not pinned, we
+                * can check if the inode is really clean as we know that
+                * there are no pending transaction completions, it is not
+                * waiting on the delayed write queue and there is no IO in
+                * progress.
+                */
+               if (xfs_inode_clean(ip)) {
+                       xfs_ifunlock(ip);
+                       error = 0;
+                       goto out_unlock;
+               }
+               error = xfs_iflush(ip, SYNC_TRYLOCK);
+       }
+
+ out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ out:
+       /*
+        * if we failed to write out the inode then mark
+        * it dirty again so we'll try again later.
+        */
+       if (error)
+               xfs_mark_inode_dirty_sync(ip);
+       return -error;
+}
+
+STATIC void
+xfs_fs_evict_inode(
+       struct inode            *inode)
+{
+       xfs_inode_t             *ip = XFS_I(inode);
+
+       trace_xfs_evict_inode(ip);
+
+       truncate_inode_pages(&inode->i_data, 0);
+       end_writeback(inode);
+       XFS_STATS_INC(vn_rele);
+       XFS_STATS_INC(vn_remove);
+       XFS_STATS_DEC(vn_active);
+
+       /*
+        * The iolock is used by the file system to coordinate reads,
+        * writes, and block truncates.  Up to this point the lock
+        * protected concurrent accesses by users of the inode.  But
+        * from here forward we're doing some final processing of the
+        * inode because we're done with it, and although we reuse the
+        * iolock for protection it is really a distinct lock class
+        * (in the lockdep sense) from before.  To keep lockdep happy
+        * (and basically indicate what we are doing), we explicitly
+        * re-init the iolock here.
+        */
+       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+       lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+                       &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
+
+       xfs_inactive(ip);
+}
+
+STATIC void
+xfs_free_fsname(
+       struct xfs_mount        *mp)
+{
+       kfree(mp->m_fsname);
+       kfree(mp->m_rtname);
+       kfree(mp->m_logname);
+}
+
+STATIC void
+xfs_fs_put_super(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_syncd_stop(mp);
+
+       /*
+        * Blow away any referenced inode in the filestreams cache.
+        * This can and will cause log traffic as inodes go inactive
+        * here.
+        */
+       xfs_filestream_unmount(mp);
+
+       XFS_bflush(mp->m_ddev_targp);
+
+       xfs_unmountfs(mp);
+       xfs_freesb(mp);
+       xfs_icsb_destroy_counters(mp);
+       xfs_close_devices(mp);
+       xfs_free_fsname(mp);
+       kfree(mp);
+}
+
+STATIC int
+xfs_fs_sync_fs(
+       struct super_block      *sb,
+       int                     wait)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       int                     error;
+
+       /*
+        * Not much we can do for the first async pass.  Writing out the
+        * superblock would be counter-productive as we are going to redirty
+        * when writing out other data and metadata (and writing out a single
+        * block is quite fast anyway).
+        *
+        * Try to asynchronously kick off quota syncing at least.
+        */
+       if (!wait) {
+               xfs_qm_sync(mp, SYNC_TRYLOCK);
+               return 0;
+       }
+
+       error = xfs_quiesce_data(mp);
+       if (error)
+               return -error;
+
+       if (laptop_mode) {
+               /*
+                * The disk must be active because we're syncing.
+                * We schedule xfssyncd now (now that the disk is
+                * active) instead of later (when it might not be).
+                */
+               flush_delayed_work_sync(&mp->m_sync_work);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_fs_statfs(
+       struct dentry           *dentry,
+       struct kstatfs          *statp)
+{
+       struct xfs_mount        *mp = XFS_M(dentry->d_sb);
+       xfs_sb_t                *sbp = &mp->m_sb;
+       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
+       __uint64_t              fakeinos, id;
+       xfs_extlen_t            lsize;
+       __int64_t               ffree;
+
+       statp->f_type = XFS_SB_MAGIC;
+       statp->f_namelen = MAXNAMELEN - 1;
+
+       id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
+       statp->f_fsid.val[0] = (u32)id;
+       statp->f_fsid.val[1] = (u32)(id >> 32);
+
+       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+
+       spin_lock(&mp->m_sb_lock);
+       statp->f_bsize = sbp->sb_blocksize;
+       lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
+       statp->f_blocks = sbp->sb_dblocks - lsize;
+       statp->f_bfree = statp->f_bavail =
+                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       fakeinos = statp->f_bfree << sbp->sb_inopblog;
+       statp->f_files =
+           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+       if (mp->m_maxicount)
+               statp->f_files = min_t(typeof(statp->f_files),
+                                       statp->f_files,
+                                       mp->m_maxicount);
+
+       /* make sure statp->f_ffree does not underflow */
+       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+       statp->f_ffree = max_t(__int64_t, ffree, 0);
+
+       spin_unlock(&mp->m_sb_lock);
+
+       if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+           ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+                             (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+               xfs_qm_statvfs(ip, statp);
+       return 0;
+}
+
+STATIC void
+xfs_save_resvblks(struct xfs_mount *mp)
+{
+       __uint64_t resblks = 0;
+
+       mp->m_resblks_save = mp->m_resblks;
+       xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC void
+xfs_restore_resvblks(struct xfs_mount *mp)
+{
+       __uint64_t resblks;
+
+       if (mp->m_resblks_save) {
+               resblks = mp->m_resblks_save;
+               mp->m_resblks_save = 0;
+       } else
+               resblks = xfs_default_resblks(mp);
+
+       xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC int
+xfs_fs_remount(
+       struct super_block      *sb,
+       int                     *flags,
+       char                    *options)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       substring_t             args[MAX_OPT_ARGS];
+       char                    *p;
+       int                     error;
+
+       while ((p = strsep(&options, ",")) != NULL) {
+               int token;
+
+               if (!*p)
+                       continue;
+
+               token = match_token(p, tokens, args);
+               switch (token) {
+               case Opt_barrier:
+                       mp->m_flags |= XFS_MOUNT_BARRIER;
+                       break;
+               case Opt_nobarrier:
+                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                       break;
+               default:
+                       /*
+                        * Logically we would return an error here to prevent
+                        * users from believing they might have changed
+                        * mount options using remount which can't be changed.
+                        *
+                        * But unfortunately mount(8) adds all options from
+                        * mtab and fstab to the mount arguments in some cases
+                        * so we can't blindly reject options, but have to
+                        * check for each specified option if it actually
+                        * differs from the currently set option and only
+                        * reject it if that's the case.
+                        *
+                        * Until that is implemented we return success for
+                        * every remount request, and silently ignore all
+                        * options that we can't actually change.
+                        */
+#if 0
+                       xfs_info(mp,
+               "mount option \"%s\" not supported for remount\n", p);
+                       return -EINVAL;
+#else
+                       break;
+#endif
+               }
+       }
+
+       /* ro -> rw */
+       if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+               mp->m_flags &= ~XFS_MOUNT_RDONLY;
+
+               /*
+                * If this is the first remount to writeable state we
+                * might have some superblock changes to update.
+                */
+               if (mp->m_update_flags) {
+                       error = xfs_mount_log_sb(mp, mp->m_update_flags);
+                       if (error) {
+                               xfs_warn(mp, "failed to write sb changes");
+                               return error;
+                       }
+                       mp->m_update_flags = 0;
+               }
+
+               /*
+                * Fill out the reserve pool if it is empty. Use the stashed
+                * value if it is non-zero, otherwise go with the default.
+                */
+               xfs_restore_resvblks(mp);
+       }
+
+       /* rw -> ro */
+       if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+               /*
+                * After we have synced the data but before we sync the
+                * metadata, we need to free up the reserve block pool so that
+                * the used block count in the superblock on disk is correct at
+                * the end of the remount. Stash the current reserve pool size
+                * so that if we get remounted rw, we can return it to the same
+                * size.
+                */
+
+               xfs_quiesce_data(mp);
+               xfs_save_resvblks(mp);
+               xfs_quiesce_attr(mp);
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+       }
+
+       return 0;
+}
+
+/*
+ * Second stage of a freeze. The data is already frozen so we only
+ * need to take care of the metadata. Once that's done write a dummy
+ * record to dirty the log in case of a crash while frozen.
+ */
+STATIC int
+xfs_fs_freeze(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_save_resvblks(mp);
+       xfs_quiesce_attr(mp);
+       return -xfs_fs_log_dummy(mp);
+}
+
+STATIC int
+xfs_fs_unfreeze(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_restore_resvblks(mp);
+       return 0;
+}
+
+STATIC int
+xfs_fs_show_options(
+       struct seq_file         *m,
+       struct vfsmount         *mnt)
+{
+       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+       struct xfs_mount        *mp)
+{
+       int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
+
+       /* Fail a mount where the logbuf is smaller than the log stripe */
+       if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+               if (mp->m_logbsize <= 0 &&
+                   mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
+                       mp->m_logbsize = mp->m_sb.sb_logsunit;
+               } else if (mp->m_logbsize > 0 &&
+                          mp->m_logbsize < mp->m_sb.sb_logsunit) {
+                       xfs_warn(mp,
+               "logbuf size must be greater than or equal to log stripe size");
+                       return XFS_ERROR(EINVAL);
+               }
+       } else {
+               /* Fail a mount if the logbuf is larger than 32K */
+               if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
+                       xfs_warn(mp,
+               "logbuf size for version 1 logs must be 16K or 32K");
+                       return XFS_ERROR(EINVAL);
+               }
+       }
+
+       /*
+        * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+        * told by noattr2 to turn it off
+        */
+       if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+           !(mp->m_flags & XFS_MOUNT_NOATTR2))
+               mp->m_flags |= XFS_MOUNT_ATTR2;
+
+       /*
+        * prohibit r/w mounts of read-only filesystems
+        */
+       if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+               xfs_warn(mp,
+                       "cannot mount a read-only filesystem as read-write");
+               return XFS_ERROR(EROFS);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_fs_fill_super(
+       struct super_block      *sb,
+       void                    *data,
+       int                     silent)
+{
+       struct inode            *root;
+       struct xfs_mount        *mp = NULL;
+       int                     flags = 0, error = ENOMEM;
+
+       mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+       if (!mp)
+               goto out;
+
+       spin_lock_init(&mp->m_sb_lock);
+       mutex_init(&mp->m_growlock);
+       atomic_set(&mp->m_active_trans, 0);
+
+       mp->m_super = sb;
+       sb->s_fs_info = mp;
+
+       error = xfs_parseargs(mp, (char *)data);
+       if (error)
+               goto out_free_fsname;
+
+       sb_min_blocksize(sb, BBSIZE);
+       sb->s_xattr = xfs_xattr_handlers;
+       sb->s_export_op = &xfs_export_operations;
+#ifdef CONFIG_XFS_QUOTA
+       sb->s_qcop = &xfs_quotactl_operations;
+#endif
+       sb->s_op = &xfs_super_operations;
+
+       if (silent)
+               flags |= XFS_MFSI_QUIET;
+
+       error = xfs_open_devices(mp);
+       if (error)
+               goto out_free_fsname;
+
+       error = xfs_icsb_init_counters(mp);
+       if (error)
+               goto out_close_devices;
+
+       error = xfs_readsb(mp, flags);
+       if (error)
+               goto out_destroy_counters;
+
+       error = xfs_finish_flags(mp);
+       if (error)
+               goto out_free_sb;
+
+       error = xfs_setup_devices(mp);
+       if (error)
+               goto out_free_sb;
+
+       error = xfs_filestream_mount(mp);
+       if (error)
+               goto out_free_sb;
+
+       /*
+        * we must configure the block size in the superblock before we run the
+        * full mount process as the mount process can lookup and cache inodes.
+        * For the same reason we must also initialise the syncd and register
+        * the inode cache shrinker so that inodes can be reclaimed during
+        * operations like a quotacheck that iterate all inodes in the
+        * filesystem.
+        */
+       sb->s_magic = XFS_SB_MAGIC;
+       sb->s_blocksize = mp->m_sb.sb_blocksize;
+       sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
+       sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+       sb->s_time_gran = 1;
+       set_posix_acl_flag(sb);
+
+       error = xfs_mountfs(mp);
+       if (error)
+               goto out_filestream_unmount;
+
+       error = xfs_syncd_init(mp);
+       if (error)
+               goto out_unmount;
+
+       root = igrab(VFS_I(mp->m_rootip));
+       if (!root) {
+               error = ENOENT;
+               goto out_syncd_stop;
+       }
+       if (is_bad_inode(root)) {
+               error = EINVAL;
+               goto out_syncd_stop;
+       }
+       sb->s_root = d_alloc_root(root);
+       if (!sb->s_root) {
+               error = ENOMEM;
+               goto out_iput;
+       }
+
+       return 0;
+
+ out_filestream_unmount:
+       xfs_filestream_unmount(mp);
+ out_free_sb:
+       xfs_freesb(mp);
+ out_destroy_counters:
+       xfs_icsb_destroy_counters(mp);
+ out_close_devices:
+       xfs_close_devices(mp);
+ out_free_fsname:
+       xfs_free_fsname(mp);
+       kfree(mp);
+ out:
+       return -error;
+
+ out_iput:
+       iput(root);
+ out_syncd_stop:
+       xfs_syncd_stop(mp);
+ out_unmount:
+       /*
+        * Blow away any referenced inode in the filestreams cache.
+        * This can and will cause log traffic as inodes go inactive
+        * here.
+        */
+       xfs_filestream_unmount(mp);
+
+       XFS_bflush(mp->m_ddev_targp);
+
+       xfs_unmountfs(mp);
+       goto out_free_sb;
+}
+
+STATIC struct dentry *
+xfs_fs_mount(
+       struct file_system_type *fs_type,
+       int                     flags,
+       const char              *dev_name,
+       void                    *data)
+{
+       return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+}
+
+static int
+xfs_fs_nr_cached_objects(
+       struct super_block      *sb)
+{
+       return xfs_reclaim_inodes_count(XFS_M(sb));
+}
+
+static void
+xfs_fs_free_cached_objects(
+       struct super_block      *sb,
+       int                     nr_to_scan)
+{
+       xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+}
+
+static const struct super_operations xfs_super_operations = {
+       .alloc_inode            = xfs_fs_alloc_inode,
+       .destroy_inode          = xfs_fs_destroy_inode,
+       .dirty_inode            = xfs_fs_dirty_inode,
+       .write_inode            = xfs_fs_write_inode,
+       .evict_inode            = xfs_fs_evict_inode,
+       .put_super              = xfs_fs_put_super,
+       .sync_fs                = xfs_fs_sync_fs,
+       .freeze_fs              = xfs_fs_freeze,
+       .unfreeze_fs            = xfs_fs_unfreeze,
+       .statfs                 = xfs_fs_statfs,
+       .remount_fs             = xfs_fs_remount,
+       .show_options           = xfs_fs_show_options,
+       .nr_cached_objects      = xfs_fs_nr_cached_objects,
+       .free_cached_objects    = xfs_fs_free_cached_objects,
+};
+
+static struct file_system_type xfs_fs_type = {
+       .owner                  = THIS_MODULE,
+       .name                   = "xfs",
+       .mount                  = xfs_fs_mount,
+       .kill_sb                = kill_block_super,
+       .fs_flags               = FS_REQUIRES_DEV,
+};
+
+STATIC int __init
+xfs_init_zones(void)
+{
+
+       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+       if (!xfs_ioend_zone)
+               goto out;
+
+       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+                                                 xfs_ioend_zone);
+       if (!xfs_ioend_pool)
+               goto out_destroy_ioend_zone;
+
+       xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+                                               "xfs_log_ticket");
+       if (!xfs_log_ticket_zone)
+               goto out_destroy_ioend_pool;
+
+       xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+                                               "xfs_bmap_free_item");
+       if (!xfs_bmap_free_item_zone)
+               goto out_destroy_log_ticket_zone;
+
+       xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+                                               "xfs_btree_cur");
+       if (!xfs_btree_cur_zone)
+               goto out_destroy_bmap_free_item_zone;
+
+       xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+                                               "xfs_da_state");
+       if (!xfs_da_state_zone)
+               goto out_destroy_btree_cur_zone;
+
+       xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+       if (!xfs_dabuf_zone)
+               goto out_destroy_da_state_zone;
+
+       xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+       if (!xfs_ifork_zone)
+               goto out_destroy_dabuf_zone;
+
+       xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+       if (!xfs_trans_zone)
+               goto out_destroy_ifork_zone;
+
+       xfs_log_item_desc_zone =
+               kmem_zone_init(sizeof(struct xfs_log_item_desc),
+                              "xfs_log_item_desc");
+       if (!xfs_log_item_desc_zone)
+               goto out_destroy_trans_zone;
+
+       /*
+        * The size of the zone allocated buf log item is the maximum
+        * size possible under XFS.  This wastes a little bit of memory,
+        * but it is much faster.
+        */
+       xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+                               (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
+                                 NBWORD) * sizeof(int))), "xfs_buf_item");
+       if (!xfs_buf_item_zone)
+               goto out_destroy_log_item_desc_zone;
+
+       xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+                       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+                                sizeof(xfs_extent_t))), "xfs_efd_item");
+       if (!xfs_efd_zone)
+               goto out_destroy_buf_item_zone;
+
+       xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+                       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+                               sizeof(xfs_extent_t))), "xfs_efi_item");
+       if (!xfs_efi_zone)
+               goto out_destroy_efd_zone;
+
+       xfs_inode_zone =
+               kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+                       KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
+                       xfs_fs_inode_init_once);
+       if (!xfs_inode_zone)
+               goto out_destroy_efi_zone;
+
+       xfs_ili_zone =
+               kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+                                       KM_ZONE_SPREAD, NULL);
+       if (!xfs_ili_zone)
+               goto out_destroy_inode_zone;
+
+       return 0;
+
+ out_destroy_inode_zone:
+       kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+       kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+       kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+       kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_log_item_desc_zone:
+       kmem_zone_destroy(xfs_log_item_desc_zone);
+ out_destroy_trans_zone:
+       kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+       kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+       kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+       kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+       kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+       kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+       kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+       mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+       kmem_zone_destroy(xfs_ioend_zone);
+ out:
+       return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_zones(void)
+{
+       kmem_zone_destroy(xfs_ili_zone);
+       kmem_zone_destroy(xfs_inode_zone);
+       kmem_zone_destroy(xfs_efi_zone);
+       kmem_zone_destroy(xfs_efd_zone);
+       kmem_zone_destroy(xfs_buf_item_zone);
+       kmem_zone_destroy(xfs_log_item_desc_zone);
+       kmem_zone_destroy(xfs_trans_zone);
+       kmem_zone_destroy(xfs_ifork_zone);
+       kmem_zone_destroy(xfs_dabuf_zone);
+       kmem_zone_destroy(xfs_da_state_zone);
+       kmem_zone_destroy(xfs_btree_cur_zone);
+       kmem_zone_destroy(xfs_bmap_free_item_zone);
+       kmem_zone_destroy(xfs_log_ticket_zone);
+       mempool_destroy(xfs_ioend_pool);
+       kmem_zone_destroy(xfs_ioend_zone);
+
+}
+
+STATIC int __init
+xfs_init_workqueues(void)
+{
+       /*
+        * max_active is set to 8 to give enough concurency to allow
+        * multiple work operations on each CPU to run. This allows multiple
+        * filesystems to be running sync work concurrently, and scales with
+        * the number of CPUs in the system.
+        */
+       xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
+       if (!xfs_syncd_wq)
+               goto out;
+
+       xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
+       if (!xfs_ail_wq)
+               goto out_destroy_syncd;
+
+       return 0;
+
+out_destroy_syncd:
+       destroy_workqueue(xfs_syncd_wq);
+out:
+       return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_workqueues(void)
+{
+       destroy_workqueue(xfs_ail_wq);
+       destroy_workqueue(xfs_syncd_wq);
+}
+
+STATIC int __init
+init_xfs_fs(void)
+{
+       int                     error;
+
+       printk(KERN_INFO XFS_VERSION_STRING " with "
+                        XFS_BUILD_OPTIONS " enabled\n");
+
+       xfs_ioend_init();
+       xfs_dir_startup();
+
+       error = xfs_init_zones();
+       if (error)
+               goto out;
+
+       error = xfs_init_workqueues();
+       if (error)
+               goto out_destroy_zones;
+
+       error = xfs_mru_cache_init();
+       if (error)
+               goto out_destroy_wq;
+
+       error = xfs_filestream_init();
+       if (error)
+               goto out_mru_cache_uninit;
+
+       error = xfs_buf_init();
+       if (error)
+               goto out_filestream_uninit;
+
+       error = xfs_init_procfs();
+       if (error)
+               goto out_buf_terminate;
+
+       error = xfs_sysctl_register();
+       if (error)
+               goto out_cleanup_procfs;
+
+       vfs_initquota();
+
+       error = register_filesystem(&xfs_fs_type);
+       if (error)
+               goto out_sysctl_unregister;
+       return 0;
+
+ out_sysctl_unregister:
+       xfs_sysctl_unregister();
+ out_cleanup_procfs:
+       xfs_cleanup_procfs();
+ out_buf_terminate:
+       xfs_buf_terminate();
+ out_filestream_uninit:
+       xfs_filestream_uninit();
+ out_mru_cache_uninit:
+       xfs_mru_cache_uninit();
+ out_destroy_wq:
+       xfs_destroy_workqueues();
+ out_destroy_zones:
+       xfs_destroy_zones();
+ out:
+       return error;
+}
+
+STATIC void __exit
+exit_xfs_fs(void)
+{
+       vfs_exitquota();
+       unregister_filesystem(&xfs_fs_type);
+       xfs_sysctl_unregister();
+       xfs_cleanup_procfs();
+       xfs_buf_terminate();
+       xfs_filestream_uninit();
+       xfs_mru_cache_uninit();
+       xfs_destroy_workqueues();
+       xfs_destroy_zones();
+}
+
+module_init(init_xfs_fs);
+module_exit(exit_xfs_fs);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_LICENSE("GPL");
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
new file mode 100644 (file)
index 0000000..50a3266
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPER_H__
+#define __XFS_SUPER_H__
+
+#include <linux/exportfs.h>
+
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
+# define vfs_initquota()       xfs_qm_init()
+# define vfs_exitquota()       xfs_qm_exit()
+#else
+# define vfs_initquota()       do { } while (0)
+# define vfs_exitquota()       do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING                "ACLs, "
+# define set_posix_acl_flag(sb)        ((sb)->s_flags |= MS_POSIXACL)
+#else
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb)        do { } while (0)
+#endif
+
+#define XFS_SECURITY_STRING    "security attributes, "
+
+#ifdef CONFIG_XFS_RT
+# define XFS_REALTIME_STRING   "realtime, "
+#else
+# define XFS_REALTIME_STRING
+#endif
+
+#if XFS_BIG_BLKNOS
+# if XFS_BIG_INUMS
+#  define XFS_BIGFS_STRING     "large block/inode numbers, "
+# else
+#  define XFS_BIGFS_STRING     "large block numbers, "
+# endif
+#else
+# define XFS_BIGFS_STRING
+#endif
+
+#ifdef DEBUG
+# define XFS_DBG_STRING                "debug"
+#else
+# define XFS_DBG_STRING                "no debug"
+#endif
+
+#define XFS_VERSION_STRING     "SGI XFS"
+#define XFS_BUILD_OPTIONS      XFS_ACL_STRING \
+                               XFS_SECURITY_STRING \
+                               XFS_REALTIME_STRING \
+                               XFS_BIGFS_STRING \
+                               XFS_DBG_STRING /* DBG must be last */
+
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_buftarg;
+struct block_device;
+
+extern __uint64_t xfs_max_file_offset(unsigned int);
+
+extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
+
+extern const struct export_operations xfs_export_operations;
+extern const struct xattr_handler *xfs_xattr_handlers[];
+extern const struct quotactl_ops xfs_quotactl_operations;
+
+#define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
+
+#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
new file mode 100644 (file)
index 0000000..4604f90
--- /dev/null
@@ -0,0 +1,1065 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_dinode.h"
+#include "xfs_error.h"
+#include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
+#include "xfs_quota.h"
+#include "xfs_trace.h"
+#include "xfs_fsops.h"
+
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+struct workqueue_struct        *xfs_syncd_wq;  /* sync workqueue */
+
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH       32
+
+STATIC int
+xfs_inode_ag_walk_grab(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = VFS_I(ip);
+
+       ASSERT(rcu_read_lock_held());
+
+       /*
+        * check for stale RCU freed inode
+        *
+        * If the inode has been reallocated, it doesn't matter if it's not in
+        * the AG we are walking - we are walking for writeback, so if it
+        * passes all the "valid inode" checks and is dirty, then we'll write
+        * it back anyway.  If it has been reallocated and still being
+        * initialised, the XFS_INEW check below will catch it.
+        */
+       spin_lock(&ip->i_flags_lock);
+       if (!ip->i_ino)
+               goto out_unlock_noent;
+
+       /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+       if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+               goto out_unlock_noent;
+       spin_unlock(&ip->i_flags_lock);
+
+       /* nothing to sync during shutdown */
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return EFSCORRUPTED;
+
+       /* If we can't grab the inode, it must on it's way to reclaim. */
+       if (!igrab(inode))
+               return ENOENT;
+
+       if (is_bad_inode(inode)) {
+               IRELE(ip);
+               return ENOENT;
+       }
+
+       /* inode is valid */
+       return 0;
+
+out_unlock_noent:
+       spin_unlock(&ip->i_flags_lock);
+       return ENOENT;
+}
+
+STATIC int
+xfs_inode_ag_walk(
+       struct xfs_mount        *mp,
+       struct xfs_perag        *pag,
+       int                     (*execute)(struct xfs_inode *ip,
+                                          struct xfs_perag *pag, int flags),
+       int                     flags)
+{
+       uint32_t                first_index;
+       int                     last_error = 0;
+       int                     skipped;
+       int                     done;
+       int                     nr_found;
+
+restart:
+       done = 0;
+       skipped = 0;
+       first_index = 0;
+       nr_found = 0;
+       do {
+               struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+               int             error = 0;
+               int             i;
+
+               rcu_read_lock();
+               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH);
+               if (!nr_found) {
+                       rcu_read_unlock();
+                       break;
+               }
+
+               /*
+                * Grab the inodes before we drop the lock. if we found
+                * nothing, nr == 0 and the loop will be skipped.
+                */
+               for (i = 0; i < nr_found; i++) {
+                       struct xfs_inode *ip = batch[i];
+
+                       if (done || xfs_inode_ag_walk_grab(ip))
+                               batch[i] = NULL;
+
+                       /*
+                        * Update the index for the next lookup. Catch
+                        * overflows into the next AG range which can occur if
+                        * we have inodes in the last block of the AG and we
+                        * are currently pointing to the last inode.
+                        *
+                        * Because we may see inodes that are from the wrong AG
+                        * due to RCU freeing and reallocation, only update the
+                        * index if it lies in this AG. It was a race that lead
+                        * us to see this inode, so another lookup from the
+                        * same index will not find it again.
+                        */
+                       if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+                               continue;
+                       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                       if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                               done = 1;
+               }
+
+               /* unlock now we've grabbed the inodes. */
+               rcu_read_unlock();
+
+               for (i = 0; i < nr_found; i++) {
+                       if (!batch[i])
+                               continue;
+                       error = execute(batch[i], pag, flags);
+                       IRELE(batch[i]);
+                       if (error == EAGAIN) {
+                               skipped++;
+                               continue;
+                       }
+                       if (error && last_error != EFSCORRUPTED)
+                               last_error = error;
+               }
+
+               /* bail out if the filesystem is corrupted.  */
+               if (error == EFSCORRUPTED)
+                       break;
+
+               cond_resched();
+
+       } while (nr_found && !done);
+
+       if (skipped) {
+               delay(1);
+               goto restart;
+       }
+       return last_error;
+}
+
+int
+xfs_inode_ag_iterator(
+       struct xfs_mount        *mp,
+       int                     (*execute)(struct xfs_inode *ip,
+                                          struct xfs_perag *pag, int flags),
+       int                     flags)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+
+       ag = 0;
+       while ((pag = xfs_perag_get(mp, ag))) {
+               ag = pag->pag_agno + 1;
+               error = xfs_inode_ag_walk(mp, pag, execute, flags);
+               xfs_perag_put(pag);
+               if (error) {
+                       last_error = error;
+                       if (error == EFSCORRUPTED)
+                               break;
+               }
+       }
+       return XFS_ERROR(last_error);
+}
+
+STATIC int
+xfs_sync_inode_data(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       struct inode            *inode = VFS_I(ip);
+       struct address_space *mapping = inode->i_mapping;
+       int                     error = 0;
+
+       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+               goto out_wait;
+
+       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+               if (flags & SYNC_TRYLOCK)
+                       goto out_wait;
+               xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       }
+
+       error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+                               0 : XBF_ASYNC, FI_NONE);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ out_wait:
+       if (flags & SYNC_WAIT)
+               xfs_ioend_wait(ip);
+       return error;
+}
+
+STATIC int
+xfs_sync_inode_attr(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       int                     error = 0;
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       if (xfs_inode_clean(ip))
+               goto out_unlock;
+       if (!xfs_iflock_nowait(ip)) {
+               if (!(flags & SYNC_WAIT))
+                       goto out_unlock;
+               xfs_iflock(ip);
+       }
+
+       if (xfs_inode_clean(ip)) {
+               xfs_ifunlock(ip);
+               goto out_unlock;
+       }
+
+       error = xfs_iflush(ip, flags);
+
+       /*
+        * We don't want to try again on non-blocking flushes that can't run
+        * again immediately. If an inode really must be written, then that's
+        * what the SYNC_WAIT flag is for.
+        */
+       if (error == EAGAIN) {
+               ASSERT(!(flags & SYNC_WAIT));
+               error = 0;
+       }
+
+ out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       return error;
+}
+
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
+STATIC int
+xfs_sync_data(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       int                     error;
+
+       ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+
+       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
+       if (error)
+               return XFS_ERROR(error);
+
+       xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
+       return 0;
+}
+
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+STATIC int
+xfs_sync_attr(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       ASSERT((flags & ~SYNC_WAIT) == 0);
+
+       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
+}
+
+STATIC int
+xfs_sync_fsdata(
+       struct xfs_mount        *mp)
+{
+       struct xfs_buf          *bp;
+
+       /*
+        * If the buffer is pinned then push on the log so we won't get stuck
+        * waiting in the write for someone, maybe ourselves, to flush the log.
+        *
+        * Even though we just pushed the log above, we did not have the
+        * superblock buffer locked at that point so it can become pinned in
+        * between there and here.
+        */
+       bp = xfs_getsb(mp, 0);
+       if (xfs_buf_ispinned(bp))
+               xfs_log_force(mp, 0);
+
+       return xfs_bwrite(mp, bp);
+}
+
+/*
+ * When remounting a filesystem read-only or freezing the filesystem, we have
+ * two phases to execute. This first phase is syncing the data before we
+ * quiesce the filesystem, and the second is flushing all the inodes out after
+ * we've waited for all the transactions created by the first phase to
+ * complete. The second phase ensures that the inodes are written to their
+ * location on disk rather than just existing in transactions in the log. This
+ * means after a quiesce there is no log replay required to write the inodes to
+ * disk (this is the main difference between a sync and a quiesce).
+ */
+/*
+ * First stage of freeze - no writers will make progress now we are here,
+ * so we flush delwri and delalloc buffers here, then wait for all I/O to
+ * complete.  Data is frozen at that point. Metadata is not frozen,
+ * transactions can still occur here so don't bother flushing the buftarg
+ * because it'll just get dirty again.
+ */
+int
+xfs_quiesce_data(
+       struct xfs_mount        *mp)
+{
+       int                     error, error2 = 0;
+
+       xfs_qm_sync(mp, SYNC_TRYLOCK);
+       xfs_qm_sync(mp, SYNC_WAIT);
+
+       /* force out the newly dirtied log buffers */
+       xfs_log_force(mp, XFS_LOG_SYNC);
+
+       /* write superblock and hoover up shutdown errors */
+       error = xfs_sync_fsdata(mp);
+
+       /* make sure all delwri buffers are written out */
+       xfs_flush_buftarg(mp->m_ddev_targp, 1);
+
+       /* mark the log as covered if needed */
+       if (xfs_log_need_covered(mp))
+               error2 = xfs_fs_log_dummy(mp);
+
+       /* flush data-only devices */
+       if (mp->m_rtdev_targp)
+               XFS_bflush(mp->m_rtdev_targp);
+
+       return error ? error : error2;
+}
+
+STATIC void
+xfs_quiesce_fs(
+       struct xfs_mount        *mp)
+{
+       int     count = 0, pincount;
+
+       xfs_reclaim_inodes(mp, 0);
+       xfs_flush_buftarg(mp->m_ddev_targp, 0);
+
+       /*
+        * This loop must run at least twice.  The first instance of the loop
+        * will flush most meta data but that will generate more meta data
+        * (typically directory updates).  Which then must be flushed and
+        * logged before we can write the unmount record. We also so sync
+        * reclaim of inodes to catch any that the above delwri flush skipped.
+        */
+       do {
+               xfs_reclaim_inodes(mp, SYNC_WAIT);
+               xfs_sync_attr(mp, SYNC_WAIT);
+               pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+               if (!pincount) {
+                       delay(50);
+                       count++;
+               }
+       } while (count < 2);
+}
+
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceeding.
+ */
+void
+xfs_quiesce_attr(
+       struct xfs_mount        *mp)
+{
+       int     error = 0;
+
+       /* wait for all modifications to complete */
+       while (atomic_read(&mp->m_active_trans) > 0)
+               delay(100);
+
+       /* flush inodes and push all remaining buffers out to disk */
+       xfs_quiesce_fs(mp);
+
+       /*
+        * Just warn here till VFS can correctly support
+        * read-only remount without racing.
+        */
+       WARN_ON(atomic_read(&mp->m_active_trans) != 0);
+
+       /* Push the superblock and write an unmount record */
+       error = xfs_log_sbcount(mp);
+       if (error)
+               xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
+                               "Frozen image may not be consistent.");
+       xfs_log_unmount_write(mp);
+       xfs_unmountfs_writesb(mp);
+}
+
+static void
+xfs_syncd_queue_sync(
+       struct xfs_mount        *mp)
+{
+       queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
+                               msecs_to_jiffies(xfs_syncd_centisecs * 10));
+}
+
+/*
+ * Every sync period we need to unpin all items, reclaim inodes and sync
+ * disk quotas.  We might need to cover the log to indicate that the
+ * filesystem is idle and not frozen.
+ */
+STATIC void
+xfs_sync_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                       struct xfs_mount, m_sync_work);
+       int             error;
+
+       if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+               /* dgc: errors ignored here */
+               if (mp->m_super->s_frozen == SB_UNFROZEN &&
+                   xfs_log_need_covered(mp))
+                       error = xfs_fs_log_dummy(mp);
+               else
+                       xfs_log_force(mp, 0);
+               error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+
+               /* start pushing all the metadata that is currently dirty */
+               xfs_ail_push_all(mp->m_ail);
+       }
+
+       /* queue us up again */
+       xfs_syncd_queue_sync(mp);
+}
+
+/*
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs syncd work default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_syncd_queue_reclaim(
+       struct xfs_mount        *mp)
+{
+
+       /*
+        * We can have inodes enter reclaim after we've shut down the syncd
+        * workqueue during unmount, so don't allow reclaim work to be queued
+        * during unmount.
+        */
+       if (!(mp->m_super->s_flags & MS_ACTIVE))
+               return;
+
+       rcu_read_lock();
+       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+               queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
+                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+STATIC void
+xfs_reclaim_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                       struct xfs_mount, m_reclaim_work);
+
+       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+       xfs_syncd_queue_reclaim(mp);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room.
+ *
+ * Queue a new data flush if there isn't one already in progress and
+ * wait for completion of the flush. This means that we only ever have one
+ * inode flush in progress no matter how many ENOSPC events are occurring and
+ * so will prevent the system from bogging down due to every concurrent
+ * ENOSPC event scanning all the active inodes in the system for writeback.
+ */
+void
+xfs_flush_inodes(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+
+       queue_work(xfs_syncd_wq, &mp->m_flush_work);
+       flush_work_sync(&mp->m_flush_work);
+}
+
+STATIC void
+xfs_flush_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(work,
+                                       struct xfs_mount, m_flush_work);
+
+       xfs_sync_data(mp, SYNC_TRYLOCK);
+       xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
+}
+
+int
+xfs_syncd_init(
+       struct xfs_mount        *mp)
+{
+       INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
+       INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
+       INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+
+       xfs_syncd_queue_sync(mp);
+       xfs_syncd_queue_reclaim(mp);
+
+       return 0;
+}
+
+void
+xfs_syncd_stop(
+       struct xfs_mount        *mp)
+{
+       cancel_delayed_work_sync(&mp->m_sync_work);
+       cancel_delayed_work_sync(&mp->m_reclaim_work);
+       cancel_work_sync(&mp->m_flush_work);
+}
+
+void
+__xfs_inode_set_reclaim_tag(
+       struct xfs_perag        *pag,
+       struct xfs_inode        *ip)
+{
+       radix_tree_tag_set(&pag->pag_ici_root,
+                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+                          XFS_ICI_RECLAIM_TAG);
+
+       if (!pag->pag_ici_reclaimable) {
+               /* propagate the reclaim tag up into the perag radix tree */
+               spin_lock(&ip->i_mount->m_perag_lock);
+               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                               XFS_ICI_RECLAIM_TAG);
+               spin_unlock(&ip->i_mount->m_perag_lock);
+
+               /* schedule periodic background inode reclaim */
+               xfs_syncd_queue_reclaim(ip->i_mount);
+
+               trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+                                                       -1, _RET_IP_);
+       }
+       pag->pag_ici_reclaimable++;
+}
+
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+       xfs_inode_t     *ip)
+{
+       struct xfs_mount *mp = ip->i_mount;
+       struct xfs_perag *pag;
+
+       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+       spin_lock(&pag->pag_ici_lock);
+       spin_lock(&ip->i_flags_lock);
+       __xfs_inode_set_reclaim_tag(pag, ip);
+       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+       spin_unlock(&ip->i_flags_lock);
+       spin_unlock(&pag->pag_ici_lock);
+       xfs_perag_put(pag);
+}
+
+STATIC void
+__xfs_inode_clear_reclaim(
+       xfs_perag_t     *pag,
+       xfs_inode_t     *ip)
+{
+       pag->pag_ici_reclaimable--;
+       if (!pag->pag_ici_reclaimable) {
+               /* clear the reclaim tag from the perag radix tree */
+               spin_lock(&ip->i_mount->m_perag_lock);
+               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                               XFS_ICI_RECLAIM_TAG);
+               spin_unlock(&ip->i_mount->m_perag_lock);
+               trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+                                                       -1, _RET_IP_);
+       }
+}
+
+void
+__xfs_inode_clear_reclaim_tag(
+       xfs_mount_t     *mp,
+       xfs_perag_t     *pag,
+       xfs_inode_t     *ip)
+{
+       radix_tree_tag_clear(&pag->pag_ici_root,
+                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+       __xfs_inode_clear_reclaim(pag, ip);
+}
+
+/*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+       struct xfs_inode        *ip,
+       int                     flags)
+{
+       ASSERT(rcu_read_lock_held());
+
+       /* quick check for stale RCU freed inode */
+       if (!ip->i_ino)
+               return 1;
+
+       /*
+        * do some unlocked checks first to avoid unnecessary lock traffic.
+        * The first is a flush lock check, the second is a already in reclaim
+        * check. Only do these checks if we are not going to block on locks.
+        */
+       if ((flags & SYNC_TRYLOCK) &&
+           (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+               return 1;
+       }
+
+       /*
+        * The radix tree lock here protects a thread in xfs_iget from racing
+        * with us starting reclaim on the inode.  Once we have the
+        * XFS_IRECLAIM flag set it will not touch us.
+        *
+        * Due to RCU lookup, we may find inodes that have been freed and only
+        * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
+        * aren't candidates for reclaim at all, so we must check the
+        * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
+        */
+       spin_lock(&ip->i_flags_lock);
+       if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
+           __xfs_iflags_test(ip, XFS_IRECLAIM)) {
+               /* not a reclaim candidate. */
+               spin_unlock(&ip->i_flags_lock);
+               return 1;
+       }
+       __xfs_iflags_set(ip, XFS_IRECLAIM);
+       spin_unlock(&ip->i_flags_lock);
+       return 0;
+}
+
+/*
+ * Inodes in different states need to be treated differently, and the return
+ * value of xfs_iflush is not sufficient to get this right. The following table
+ * lists the inode states and the reclaim actions necessary for non-blocking
+ * reclaim:
+ *
+ *
+ *     inode state          iflush ret         required action
+ *      ---------------      ----------         ---------------
+ *     bad                     -               reclaim
+ *     shutdown                EIO             unpin and reclaim
+ *     clean, unpinned         0               reclaim
+ *     stale, unpinned         0               reclaim
+ *     clean, pinned(*)        0               requeue
+ *     stale, pinned           EAGAIN          requeue
+ *     dirty, delwri ok        0               requeue
+ *     dirty, delwri blocked   EAGAIN          requeue
+ *     dirty, sync flush       0               reclaim
+ *
+ * (*) dgc: I don't think the clean, pinned state is possible but it gets
+ * handled anyway given the order of checks implemented.
+ *
+ * As can be seen from the table, the return value of xfs_iflush() is not
+ * sufficient to correctly decide the reclaim action here. The checks in
+ * xfs_iflush() might look like duplicates, but they are not.
+ *
+ * Also, because we get the flush lock first, we know that any inode that has
+ * been flushed delwri has had the flush completed by the time we check that
+ * the inode is clean. The clean inode check needs to be done before flushing
+ * the inode delwri otherwise we would loop forever requeuing clean inodes as
+ * we cannot tell apart a successful delwri flush and a clean inode from the
+ * return value of xfs_iflush().
+ *
+ * Note that because the inode is flushed delayed write by background
+ * writeback, the flush lock may already be held here and waiting on it can
+ * result in very long latencies. Hence for sync reclaims, where we wait on the
+ * flush lock, the caller should push out delayed write inodes first before
+ * trying to reclaim them to minimise the amount of time spent waiting. For
+ * background relaim, we just requeue the inode for the next pass.
+ *
+ * Hence the order of actions after gaining the locks should be:
+ *     bad             => reclaim
+ *     shutdown        => unpin and reclaim
+ *     pinned, delwri  => requeue
+ *     pinned, sync    => unpin
+ *     stale           => reclaim
+ *     clean           => reclaim
+ *     dirty, delwri   => flush and requeue
+ *     dirty, sync     => flush, wait and reclaim
+ */
+STATIC int
+xfs_reclaim_inode(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     sync_mode)
+{
+       int     error;
+
+restart:
+       error = 0;
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (!xfs_iflock_nowait(ip)) {
+               if (!(sync_mode & SYNC_WAIT))
+                       goto out;
+               xfs_iflock(ip);
+       }
+
+       if (is_bad_inode(VFS_I(ip)))
+               goto reclaim;
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+               xfs_iunpin_wait(ip);
+               goto reclaim;
+       }
+       if (xfs_ipincount(ip)) {
+               if (!(sync_mode & SYNC_WAIT)) {
+                       xfs_ifunlock(ip);
+                       goto out;
+               }
+               xfs_iunpin_wait(ip);
+       }
+       if (xfs_iflags_test(ip, XFS_ISTALE))
+               goto reclaim;
+       if (xfs_inode_clean(ip))
+               goto reclaim;
+
+       /*
+        * Now we have an inode that needs flushing.
+        *
+        * We do a nonblocking flush here even if we are doing a SYNC_WAIT
+        * reclaim as we can deadlock with inode cluster removal.
+        * xfs_ifree_cluster() can lock the inode buffer before it locks the
+        * ip->i_lock, and we are doing the exact opposite here. As a result,
+        * doing a blocking xfs_itobp() to get the cluster buffer will result
+        * in an ABBA deadlock with xfs_ifree_cluster().
+        *
+        * As xfs_ifree_cluser() must gather all inodes that are active in the
+        * cache to mark them stale, if we hit this case we don't actually want
+        * to do IO here - we want the inode marked stale so we can simply
+        * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
+        * just unlock the inode, back off and try again. Hopefully the next
+        * pass through will see the stale flag set on the inode.
+        */
+       error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
+       if (sync_mode & SYNC_WAIT) {
+               if (error == EAGAIN) {
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                       /* backoff longer than in xfs_ifree_cluster */
+                       delay(2);
+                       goto restart;
+               }
+               xfs_iflock(ip);
+               goto reclaim;
+       }
+
+       /*
+        * When we have to flush an inode but don't have SYNC_WAIT set, we
+        * flush the inode out using a delwri buffer and wait for the next
+        * call into reclaim to find it in a clean state instead of waiting for
+        * it now. We also don't return errors here - if the error is transient
+        * then the next reclaim pass will flush the inode, and if the error
+        * is permanent then the next sync reclaim will reclaim the inode and
+        * pass on the error.
+        */
+       if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+               xfs_warn(ip->i_mount,
+                       "inode 0x%llx background reclaim flush failed with %d",
+                       (long long)ip->i_ino, error);
+       }
+out:
+       xfs_iflags_clear(ip, XFS_IRECLAIM);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       /*
+        * We could return EAGAIN here to make reclaim rescan the inode tree in
+        * a short while. However, this just burns CPU time scanning the tree
+        * waiting for IO to complete and xfssyncd never goes back to the idle
+        * state. Instead, return 0 to let the next scheduled background reclaim
+        * attempt to reclaim the inode again.
+        */
+       return 0;
+
+reclaim:
+       xfs_ifunlock(ip);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       XFS_STATS_INC(xs_ig_reclaims);
+       /*
+        * Remove the inode from the per-AG radix tree.
+        *
+        * Because radix_tree_delete won't complain even if the item was never
+        * added to the tree assert that it's been there before to catch
+        * problems with the inode life time early on.
+        */
+       spin_lock(&pag->pag_ici_lock);
+       if (!radix_tree_delete(&pag->pag_ici_root,
+                               XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+               ASSERT(0);
+       __xfs_inode_clear_reclaim(pag, ip);
+       spin_unlock(&pag->pag_ici_lock);
+
+       /*
+        * Here we do an (almost) spurious inode lock in order to coordinate
+        * with inode cache radix tree lookups.  This is because the lookup
+        * can reference the inodes in the cache without taking references.
+        *
+        * We make that OK here by ensuring that we wait until the inode is
+        * unlocked after the lookup before we go ahead and free it.  We get
+        * both the ilock and the iolock because the code may need to drop the
+        * ilock one but will still hold the iolock.
+        */
+       xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+       xfs_qm_dqdetach(ip);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+       xfs_inode_free(ip);
+       return error;
+
+}
+
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+       struct xfs_mount        *mp,
+       int                     flags,
+       int                     *nr_to_scan)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+       int                     trylock = flags & SYNC_TRYLOCK;
+       int                     skipped;
+
+restart:
+       ag = 0;
+       skipped = 0;
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               unsigned long   first_index = 0;
+               int             done = 0;
+               int             nr_found = 0;
+
+               ag = pag->pag_agno + 1;
+
+               if (trylock) {
+                       if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+                               skipped++;
+                               xfs_perag_put(pag);
+                               continue;
+                       }
+                       first_index = pag->pag_ici_reclaim_cursor;
+               } else
+                       mutex_lock(&pag->pag_ici_reclaim_lock);
+
+               do {
+                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+                       int     i;
+
+                       rcu_read_lock();
+                       nr_found = radix_tree_gang_lookup_tag(
+                                       &pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH,
+                                       XFS_ICI_RECLAIM_TAG);
+                       if (!nr_found) {
+                               done = 1;
+                               rcu_read_unlock();
+                               break;
+                       }
+
+                       /*
+                        * Grab the inodes before we drop the lock. if we found
+                        * nothing, nr == 0 and the loop will be skipped.
+                        */
+                       for (i = 0; i < nr_found; i++) {
+                               struct xfs_inode *ip = batch[i];
+
+                               if (done || xfs_reclaim_inode_grab(ip, flags))
+                                       batch[i] = NULL;
+
+                               /*
+                                * Update the index for the next lookup. Catch
+                                * overflows into the next AG range which can
+                                * occur if we have inodes in the last block of
+                                * the AG and we are currently pointing to the
+                                * last inode.
+                                *
+                                * Because we may see inodes that are from the
+                                * wrong AG due to RCU freeing and
+                                * reallocation, only update the index if it
+                                * lies in this AG. It was a race that lead us
+                                * to see this inode, so another lookup from
+                                * the same index will not find it again.
+                                */
+                               if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
+                                                               pag->pag_agno)
+                                       continue;
+                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                                       done = 1;
+                       }
+
+                       /* unlock now we've grabbed the inodes. */
+                       rcu_read_unlock();
+
+                       for (i = 0; i < nr_found; i++) {
+                               if (!batch[i])
+                                       continue;
+                               error = xfs_reclaim_inode(batch[i], pag, flags);
+                               if (error && last_error != EFSCORRUPTED)
+                                       last_error = error;
+                       }
+
+                       *nr_to_scan -= XFS_LOOKUP_BATCH;
+
+                       cond_resched();
+
+               } while (nr_found && !done && *nr_to_scan > 0);
+
+               if (trylock && !done)
+                       pag->pag_ici_reclaim_cursor = first_index;
+               else
+                       pag->pag_ici_reclaim_cursor = 0;
+               mutex_unlock(&pag->pag_ici_reclaim_lock);
+               xfs_perag_put(pag);
+       }
+
+       /*
+        * if we skipped any AG, and we still have scan count remaining, do
+        * another pass this time using blocking reclaim semantics (i.e
+        * waiting on the reclaim locks and ignoring the reclaim cursors). This
+        * ensure that when we get more reclaimers than AGs we block rather
+        * than spin trying to execute reclaim.
+        */
+       if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
+               trylock = 0;
+               goto restart;
+       }
+       return XFS_ERROR(last_error);
+}
+
+int
+xfs_reclaim_inodes(
+       xfs_mount_t     *mp,
+       int             mode)
+{
+       int             nr_to_scan = INT_MAX;
+
+       return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+}
+
+/*
+ * Scan a certain number of inodes for reclaim.
+ *
+ * When called we make sure that there is a background (fast) inode reclaim in
+ * progress, while we will throttle the speed of reclaim via doing synchronous
+ * reclaim of inodes. That means if we come across dirty inodes, we wait for
+ * them to be cleaned, which we hope will not be very long due to the
+ * background walker having already kicked the IO off on those dirty inodes.
+ */
+void
+xfs_reclaim_inodes_nr(
+       struct xfs_mount        *mp,
+       int                     nr_to_scan)
+{
+       /* kick background reclaimer and push the AIL */
+       xfs_syncd_queue_reclaim(mp);
+       xfs_ail_push_all(mp->m_ail);
+
+       xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
+}
+
+/*
+ * Return the number of reclaimable inodes in the filesystem for
+ * the shrinker to determine how much to reclaim.
+ */
+int
+xfs_reclaim_inodes_count(
+       struct xfs_mount        *mp)
+{
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          ag = 0;
+       int                     reclaimable = 0;
+
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               ag = pag->pag_agno + 1;
+               reclaimable += pag->pag_ici_reclaimable;
+               xfs_perag_put(pag);
+       }
+       return reclaimable;
+}
+
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
new file mode 100644 (file)
index 0000000..941202e
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef XFS_SYNC_H
+#define XFS_SYNC_H 1
+
+struct xfs_mount;
+struct xfs_perag;
+
+#define SYNC_WAIT              0x0001  /* wait for i/o to complete */
+#define SYNC_TRYLOCK           0x0002  /* only try to lock inodes */
+
+extern struct workqueue_struct *xfs_syncd_wq;  /* sync workqueue */
+
+int xfs_syncd_init(struct xfs_mount *mp);
+void xfs_syncd_stop(struct xfs_mount *mp);
+
+int xfs_quiesce_data(struct xfs_mount *mp);
+void xfs_quiesce_attr(struct xfs_mount *mp);
+
+void xfs_flush_inodes(struct xfs_inode *ip);
+
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
+int xfs_reclaim_inodes_count(struct xfs_mount *mp);
+void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
+
+void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
+void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
+                               struct xfs_inode *ip);
+
+int xfs_sync_inode_grab(struct xfs_inode *ip);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+       int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+       int flags);
+
+#endif
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
new file mode 100644 (file)
index 0000000..ee2d2ad
--- /dev/null
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include "xfs_error.h"
+
+static struct ctl_table_header *xfs_table_header;
+
+#ifdef CONFIG_PROC_FS
+STATIC int
+xfs_stats_clear_proc_handler(
+       ctl_table       *ctl,
+       int             write,
+       void            __user *buffer,
+       size_t          *lenp,
+       loff_t          *ppos)
+{
+       int             c, ret, *valp = ctl->data;
+       __uint32_t      vn_active;
+
+       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+
+       if (!ret && write && *valp) {
+               xfs_notice(NULL, "Clearing xfsstats");
+               for_each_possible_cpu(c) {
+                       preempt_disable();
+                       /* save vn_active, it's a universal truth! */
+                       vn_active = per_cpu(xfsstats, c).vn_active;
+                       memset(&per_cpu(xfsstats, c), 0,
+                              sizeof(struct xfsstats));
+                       per_cpu(xfsstats, c).vn_active = vn_active;
+                       preempt_enable();
+               }
+               xfs_stats_clear = 0;
+       }
+
+       return ret;
+}
+
+STATIC int
+xfs_panic_mask_proc_handler(
+       ctl_table       *ctl,
+       int             write,
+       void            __user *buffer,
+       size_t          *lenp,
+       loff_t          *ppos)
+{
+       int             ret, *valp = ctl->data;
+
+       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+       if (!ret && write) {
+               xfs_panic_mask = *valp;
+#ifdef DEBUG
+               xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
+#endif
+       }
+       return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
+static ctl_table xfs_table[] = {
+       {
+               .procname       = "irix_sgid_inherit",
+               .data           = &xfs_params.sgid_inherit.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.sgid_inherit.min,
+               .extra2         = &xfs_params.sgid_inherit.max
+       },
+       {
+               .procname       = "irix_symlink_mode",
+               .data           = &xfs_params.symlink_mode.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.symlink_mode.min,
+               .extra2         = &xfs_params.symlink_mode.max
+       },
+       {
+               .procname       = "panic_mask",
+               .data           = &xfs_params.panic_mask.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = xfs_panic_mask_proc_handler,
+               .extra1         = &xfs_params.panic_mask.min,
+               .extra2         = &xfs_params.panic_mask.max
+       },
+
+       {
+               .procname       = "error_level",
+               .data           = &xfs_params.error_level.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.error_level.min,
+               .extra2         = &xfs_params.error_level.max
+       },
+       {
+               .procname       = "xfssyncd_centisecs",
+               .data           = &xfs_params.syncd_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.syncd_timer.min,
+               .extra2         = &xfs_params.syncd_timer.max
+       },
+       {
+               .procname       = "inherit_sync",
+               .data           = &xfs_params.inherit_sync.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_sync.min,
+               .extra2         = &xfs_params.inherit_sync.max
+       },
+       {
+               .procname       = "inherit_nodump",
+               .data           = &xfs_params.inherit_nodump.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nodump.min,
+               .extra2         = &xfs_params.inherit_nodump.max
+       },
+       {
+               .procname       = "inherit_noatime",
+               .data           = &xfs_params.inherit_noatim.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_noatim.min,
+               .extra2         = &xfs_params.inherit_noatim.max
+       },
+       {
+               .procname       = "xfsbufd_centisecs",
+               .data           = &xfs_params.xfs_buf_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.xfs_buf_timer.min,
+               .extra2         = &xfs_params.xfs_buf_timer.max
+       },
+       {
+               .procname       = "age_buffer_centisecs",
+               .data           = &xfs_params.xfs_buf_age.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.xfs_buf_age.min,
+               .extra2         = &xfs_params.xfs_buf_age.max
+       },
+       {
+               .procname       = "inherit_nosymlinks",
+               .data           = &xfs_params.inherit_nosym.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nosym.min,
+               .extra2         = &xfs_params.inherit_nosym.max
+       },
+       {
+               .procname       = "rotorstep",
+               .data           = &xfs_params.rotorstep.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.rotorstep.min,
+               .extra2         = &xfs_params.rotorstep.max
+       },
+       {
+               .procname       = "inherit_nodefrag",
+               .data           = &xfs_params.inherit_nodfrg.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nodfrg.min,
+               .extra2         = &xfs_params.inherit_nodfrg.max
+       },
+       {
+               .procname       = "filestream_centisecs",
+               .data           = &xfs_params.fstrm_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.fstrm_timer.min,
+               .extra2         = &xfs_params.fstrm_timer.max,
+       },
+       /* please keep this the last entry */
+#ifdef CONFIG_PROC_FS
+       {
+               .procname       = "stats_clear",
+               .data           = &xfs_params.stats_clear.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = xfs_stats_clear_proc_handler,
+               .extra1         = &xfs_params.stats_clear.min,
+               .extra2         = &xfs_params.stats_clear.max
+       },
+#endif /* CONFIG_PROC_FS */
+
+       {}
+};
+
+static ctl_table xfs_dir_table[] = {
+       {
+               .procname       = "xfs",
+               .mode           = 0555,
+               .child          = xfs_table
+       },
+       {}
+};
+
+static ctl_table xfs_root_table[] = {
+       {
+               .procname       = "fs",
+               .mode           = 0555,
+               .child          = xfs_dir_table
+       },
+       {}
+};
+
+int
+xfs_sysctl_register(void)
+{
+       xfs_table_header = register_sysctl_table(xfs_root_table);
+       if (!xfs_table_header)
+               return -ENOMEM;
+       return 0;
+}
+
+void
+xfs_sysctl_unregister(void)
+{
+       unregister_sysctl_table(xfs_table_header);
+}
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
new file mode 100644 (file)
index 0000000..b9937d4
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SYSCTL_H__
+#define __XFS_SYSCTL_H__
+
+#include <linux/sysctl.h>
+
+/*
+ * Tunable xfs parameters
+ */
+
+typedef struct xfs_sysctl_val {
+       int min;
+       int val;
+       int max;
+} xfs_sysctl_val_t;
+
+typedef struct xfs_param {
+       xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID if process' GID is
+                                        * not a member of parent dir GID. */
+       xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
+       xfs_sysctl_val_t panic_mask;    /* bitmask to cause panic on errors. */
+       xfs_sysctl_val_t error_level;   /* Degree of reporting for problems  */
+       xfs_sysctl_val_t syncd_timer;   /* Interval between xfssyncd wakeups */
+       xfs_sysctl_val_t stats_clear;   /* Reset all XFS statistics to zero. */
+       xfs_sysctl_val_t inherit_sync;  /* Inherit the "sync" inode flag. */
+       xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
+       xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
+       xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
+       xfs_sysctl_val_t xfs_buf_age;   /* Metadata buffer age before flush. */
+       xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
+       xfs_sysctl_val_t rotorstep;     /* inode32 AG rotoring control knob */
+       xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
+       xfs_sysctl_val_t fstrm_timer;   /* Filestream dir-AG assoc'n timeout. */
+} xfs_param_t;
+
+/*
+ * xfs_error_level:
+ *
+ * How much error reporting will be done when internal problems are
+ * encountered.  These problems normally return an EFSCORRUPTED to their
+ * caller, with no other information reported.
+ *
+ * 0   No error reports
+ * 1   Report EFSCORRUPTED errors that will cause a filesystem shutdown
+ * 5   Report all EFSCORRUPTED errors (all of the above errors, plus any
+ *     additional errors that are known to not cause shutdowns)
+ *
+ * xfs_panic_mask bit 0x8 turns the error reports into panics
+ */
+
+enum {
+       /* XFS_REFCACHE_SIZE = 1 */
+       /* XFS_REFCACHE_PURGE = 2 */
+       /* XFS_RESTRICT_CHOWN = 3 */
+       XFS_SGID_INHERIT = 4,
+       XFS_SYMLINK_MODE = 5,
+       XFS_PANIC_MASK = 6,
+       XFS_ERRLEVEL = 7,
+       XFS_SYNCD_TIMER = 8,
+       /* XFS_PROBE_DMAPI = 9 */
+       /* XFS_PROBE_IOOPS = 10 */
+       /* XFS_PROBE_QUOTA = 11 */
+       XFS_STATS_CLEAR = 12,
+       XFS_INHERIT_SYNC = 13,
+       XFS_INHERIT_NODUMP = 14,
+       XFS_INHERIT_NOATIME = 15,
+       XFS_BUF_TIMER = 16,
+       XFS_BUF_AGE = 17,
+       /* XFS_IO_BYPASS = 18 */
+       XFS_INHERIT_NOSYM = 19,
+       XFS_ROTORSTEP = 20,
+       XFS_INHERIT_NODFRG = 21,
+       XFS_FILESTREAM_TIMER = 22,
+};
+
+extern xfs_param_t     xfs_params;
+
+#ifdef CONFIG_SYSCTL
+extern int xfs_sysctl_register(void);
+extern void xfs_sysctl_unregister(void);
+#else
+# define xfs_sysctl_register()         (0)
+# define xfs_sysctl_unregister()       do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
new file mode 100644 (file)
index 0000000..9010ce8
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2009, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_mount.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_quota.h"
+#include "xfs_iomap.h"
+#include "xfs_aops.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_log_recover.h"
+#include "xfs_inode_item.h"
+
+/*
+ * We include this last to have the helpers above available for the trace
+ * event implementations.
+ */
+#define CREATE_TRACE_POINTS
+#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
new file mode 100644 (file)
index 0000000..690fc7a
--- /dev/null
@@ -0,0 +1,1746 @@
+/*
+ * Copyright (c) 2009, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xfs
+
+#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_XFS_H
+
+#include <linux/tracepoint.h>
+
+struct xfs_agf;
+struct xfs_alloc_arg;
+struct xfs_attr_list_context;
+struct xfs_buf_log_item;
+struct xfs_da_args;
+struct xfs_da_node_entry;
+struct xfs_dquot;
+struct xlog_ticket;
+struct log;
+struct xlog_recover;
+struct xlog_recover_item;
+struct xfs_buf_log_format;
+struct xfs_inode_log_format;
+
+DECLARE_EVENT_CLASS(xfs_attr_list_class,
+       TP_PROTO(struct xfs_attr_list_context *ctx),
+       TP_ARGS(ctx),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(u32, hashval)
+               __field(u32, blkno)
+               __field(u32, offset)
+               __field(void *, alist)
+               __field(int, bufsize)
+               __field(int, count)
+               __field(int, firstu)
+               __field(int, dupcnt)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+               __entry->ino = ctx->dp->i_ino;
+               __entry->hashval = ctx->cursor->hashval;
+               __entry->blkno = ctx->cursor->blkno;
+               __entry->offset = ctx->cursor->offset;
+               __entry->alist = ctx->alist;
+               __entry->bufsize = ctx->bufsize;
+               __entry->count = ctx->count;
+               __entry->firstu = ctx->firstu;
+               __entry->flags = ctx->flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+                 "alist 0x%p size %u count %u firstu %u flags %d %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->ino,
+                  __entry->hashval,
+                  __entry->blkno,
+                  __entry->offset,
+                  __entry->dupcnt,
+                  __entry->alist,
+                  __entry->bufsize,
+                  __entry->count,
+                  __entry->firstu,
+                  __entry->flags,
+                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
+       )
+)
+
+#define DEFINE_ATTR_LIST_EVENT(name) \
+DEFINE_EVENT(xfs_attr_list_class, name, \
+       TP_PROTO(struct xfs_attr_list_context *ctx), \
+       TP_ARGS(ctx))
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
+
+DECLARE_EVENT_CLASS(xfs_perag_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+                unsigned long caller_ip),
+       TP_ARGS(mp, agno, refcount, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, refcount)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->refcount = refcount;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->refcount,
+                 (char *)__entry->caller_ip)
+);
+
+#define DEFINE_PERAG_REF_EVENT(name)   \
+DEFINE_EVENT(xfs_perag_class, name,    \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,       \
+                unsigned long caller_ip),                                      \
+       TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
+DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
+
+TRACE_EVENT(xfs_attr_list_node_descend,
+       TP_PROTO(struct xfs_attr_list_context *ctx,
+                struct xfs_da_node_entry *btree),
+       TP_ARGS(ctx, btree),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(u32, hashval)
+               __field(u32, blkno)
+               __field(u32, offset)
+               __field(void *, alist)
+               __field(int, bufsize)
+               __field(int, count)
+               __field(int, firstu)
+               __field(int, dupcnt)
+               __field(int, flags)
+               __field(u32, bt_hashval)
+               __field(u32, bt_before)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+               __entry->ino = ctx->dp->i_ino;
+               __entry->hashval = ctx->cursor->hashval;
+               __entry->blkno = ctx->cursor->blkno;
+               __entry->offset = ctx->cursor->offset;
+               __entry->alist = ctx->alist;
+               __entry->bufsize = ctx->bufsize;
+               __entry->count = ctx->count;
+               __entry->firstu = ctx->firstu;
+               __entry->flags = ctx->flags;
+               __entry->bt_hashval = be32_to_cpu(btree->hashval);
+               __entry->bt_before = be32_to_cpu(btree->before);
+       ),
+       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+                 "alist 0x%p size %u count %u firstu %u flags %d %s "
+                 "node hashval %u, node before %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->ino,
+                  __entry->hashval,
+                  __entry->blkno,
+                  __entry->offset,
+                  __entry->dupcnt,
+                  __entry->alist,
+                  __entry->bufsize,
+                  __entry->count,
+                  __entry->firstu,
+                  __entry->flags,
+                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
+                  __entry->bt_hashval,
+                  __entry->bt_before)
+);
+
+TRACE_EVENT(xfs_iext_insert,
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
+                struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
+       TP_ARGS(ip, idx, r, state, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_extnum_t, idx)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+               __field(xfs_exntst_t, state)
+               __field(int, bmap_state)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->idx = idx;
+               __entry->startoff = r->br_startoff;
+               __entry->startblock = r->br_startblock;
+               __entry->blockcount = r->br_blockcount;
+               __entry->state = r->br_state;
+               __entry->bmap_state = state;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+                 (long)__entry->idx,
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount,
+                 __entry->state,
+                 (char *)__entry->caller_ip)
+);
+
+DECLARE_EVENT_CLASS(xfs_bmap_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
+                unsigned long caller_ip),
+       TP_ARGS(ip, idx, state, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_extnum_t, idx)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+               __field(xfs_exntst_t, state)
+               __field(int, bmap_state)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               struct xfs_ifork        *ifp = (state & BMAP_ATTRFORK) ?
+                                               ip->i_afp : &ip->i_df;
+               struct xfs_bmbt_irec    r;
+
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->idx = idx;
+               __entry->startoff = r.br_startoff;
+               __entry->startblock = r.br_startblock;
+               __entry->blockcount = r.br_blockcount;
+               __entry->state = r.br_state;
+               __entry->bmap_state = state;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+                 (long)__entry->idx,
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount,
+                 __entry->state,
+                 (char *)__entry->caller_ip)
+)
+
+#define DEFINE_BMAP_EVENT(name) \
+DEFINE_EVENT(xfs_bmap_class, name, \
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
+                unsigned long caller_ip), \
+       TP_ARGS(ip, idx, state, caller_ip))
+DEFINE_BMAP_EVENT(xfs_iext_remove);
+DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
+DEFINE_BMAP_EVENT(xfs_bmap_post_update);
+DEFINE_BMAP_EVENT(xfs_extlist);
+
+DECLARE_EVENT_CLASS(xfs_buf_class,
+       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
+       TP_ARGS(bp, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(unsigned, flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->flags = bp->b_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_BUF_EVENT(name) \
+DEFINE_EVENT(xfs_buf_class, name, \
+       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
+       TP_ARGS(bp, caller_ip))
+DEFINE_BUF_EVENT(xfs_buf_init);
+DEFINE_BUF_EVENT(xfs_buf_free);
+DEFINE_BUF_EVENT(xfs_buf_hold);
+DEFINE_BUF_EVENT(xfs_buf_rele);
+DEFINE_BUF_EVENT(xfs_buf_iodone);
+DEFINE_BUF_EVENT(xfs_buf_iorequest);
+DEFINE_BUF_EVENT(xfs_buf_bawrite);
+DEFINE_BUF_EVENT(xfs_buf_bdwrite);
+DEFINE_BUF_EVENT(xfs_buf_lock);
+DEFINE_BUF_EVENT(xfs_buf_lock_done);
+DEFINE_BUF_EVENT(xfs_buf_trylock);
+DEFINE_BUF_EVENT(xfs_buf_unlock);
+DEFINE_BUF_EVENT(xfs_buf_iowait);
+DEFINE_BUF_EVENT(xfs_buf_iowait_done);
+DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
+DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
+DEFINE_BUF_EVENT(xfs_buf_delwri_split);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
+DEFINE_BUF_EVENT(xfs_bdstrat_shut);
+DEFINE_BUF_EVENT(xfs_buf_item_relse);
+DEFINE_BUF_EVENT(xfs_buf_item_iodone);
+DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
+DEFINE_BUF_EVENT(xfs_buf_error_relse);
+DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
+DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
+
+/* not really buffer traces, but the buf provides useful information */
+DEFINE_BUF_EVENT(xfs_btree_corrupt);
+DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
+DEFINE_BUF_EVENT(xfs_reset_dqcounts);
+DEFINE_BUF_EVENT(xfs_inode_item_push);
+
+/* pass flags explicitly */
+DECLARE_EVENT_CLASS(xfs_buf_flags_class,
+       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
+       TP_ARGS(bp, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(unsigned, flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->flags = flags;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_BUF_FLAGS_EVENT(name) \
+DEFINE_EVENT(xfs_buf_flags_class, name, \
+       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
+       TP_ARGS(bp, flags, caller_ip))
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
+
+TRACE_EVENT(xfs_buf_ioerror,
+       TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
+       TP_ARGS(bp, error, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(unsigned, flags)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(int, error)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->error = error;
+               __entry->flags = bp->b_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d error %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __entry->error,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+);
+
+DECLARE_EVENT_CLASS(xfs_buf_item_class,
+       TP_PROTO(struct xfs_buf_log_item *bip),
+       TP_ARGS(bip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, buf_bno)
+               __field(size_t, buf_len)
+               __field(int, buf_hold)
+               __field(int, buf_pincount)
+               __field(int, buf_lockval)
+               __field(unsigned, buf_flags)
+               __field(unsigned, bli_recur)
+               __field(int, bli_refcount)
+               __field(unsigned, bli_flags)
+               __field(void *, li_desc)
+               __field(unsigned, li_flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = bip->bli_buf->b_target->bt_dev;
+               __entry->bli_flags = bip->bli_flags;
+               __entry->bli_recur = bip->bli_recur;
+               __entry->bli_refcount = atomic_read(&bip->bli_refcount);
+               __entry->buf_bno = bip->bli_buf->b_bn;
+               __entry->buf_len = bip->bli_buf->b_buffer_length;
+               __entry->buf_flags = bip->bli_buf->b_flags;
+               __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
+               __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
+               __entry->buf_lockval = bip->bli_buf->b_sema.count;
+               __entry->li_desc = bip->bli_item.li_desc;
+               __entry->li_flags = bip->bli_item.li_flags;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s recur %d refcount %d bliflags %s "
+                 "lidesc 0x%p liflags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->buf_bno,
+                 __entry->buf_len,
+                 __entry->buf_hold,
+                 __entry->buf_pincount,
+                 __entry->buf_lockval,
+                 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
+                 __entry->bli_recur,
+                 __entry->bli_refcount,
+                 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
+                 __entry->li_desc,
+                 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
+)
+
+#define DEFINE_BUF_ITEM_EVENT(name) \
+DEFINE_EVENT(xfs_buf_item_class, name, \
+       TP_PROTO(struct xfs_buf_log_item *bip), \
+       TP_ARGS(bip))
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
+
+DECLARE_EVENT_CLASS(xfs_lock_class,
+       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
+                unsigned long caller_ip),
+       TP_ARGS(ip,  lock_flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, lock_flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->lock_flags = lock_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_LOCK_EVENT(name) \
+DEFINE_EVENT(xfs_lock_class, name, \
+       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
+                unsigned long caller_ip), \
+       TP_ARGS(ip,  lock_flags, caller_ip))
+DEFINE_LOCK_EVENT(xfs_ilock);
+DEFINE_LOCK_EVENT(xfs_ilock_nowait);
+DEFINE_LOCK_EVENT(xfs_ilock_demote);
+DEFINE_LOCK_EVENT(xfs_iunlock);
+
+DECLARE_EVENT_CLASS(xfs_inode_class,
+       TP_PROTO(struct xfs_inode *ip),
+       TP_ARGS(ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino)
+)
+
+#define DEFINE_INODE_EVENT(name) \
+DEFINE_EVENT(xfs_inode_class, name, \
+       TP_PROTO(struct xfs_inode *ip), \
+       TP_ARGS(ip))
+DEFINE_INODE_EVENT(xfs_iget_skip);
+DEFINE_INODE_EVENT(xfs_iget_reclaim);
+DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
+DEFINE_INODE_EVENT(xfs_iget_hit);
+DEFINE_INODE_EVENT(xfs_iget_miss);
+
+DEFINE_INODE_EVENT(xfs_getattr);
+DEFINE_INODE_EVENT(xfs_setattr);
+DEFINE_INODE_EVENT(xfs_readlink);
+DEFINE_INODE_EVENT(xfs_alloc_file_space);
+DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_readdir);
+#ifdef CONFIG_XFS_POSIX_ACL
+DEFINE_INODE_EVENT(xfs_get_acl);
+#endif
+DEFINE_INODE_EVENT(xfs_vm_bmap);
+DEFINE_INODE_EVENT(xfs_file_ioctl);
+DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
+DEFINE_INODE_EVENT(xfs_ioctl_setattr);
+DEFINE_INODE_EVENT(xfs_file_fsync);
+DEFINE_INODE_EVENT(xfs_destroy_inode);
+DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_evict_inode);
+
+DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
+DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+
+DECLARE_EVENT_CLASS(xfs_iref_class,
+       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
+       TP_ARGS(ip, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, count)
+               __field(int, pincount)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->count = atomic_read(&VFS_I(ip)->i_count);
+               __entry->pincount = atomic_read(&ip->i_pincount);
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->count,
+                 __entry->pincount,
+                 (char *)__entry->caller_ip)
+)
+
+#define DEFINE_IREF_EVENT(name) \
+DEFINE_EVENT(xfs_iref_class, name, \
+       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
+       TP_ARGS(ip, caller_ip))
+DEFINE_IREF_EVENT(xfs_ihold);
+DEFINE_IREF_EVENT(xfs_irele);
+DEFINE_IREF_EVENT(xfs_inode_pin);
+DEFINE_IREF_EVENT(xfs_inode_unpin);
+DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+
+DECLARE_EVENT_CLASS(xfs_namespace_class,
+       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
+       TP_ARGS(dp, name),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, dp_ino)
+               __dynamic_array(char, name, name->len)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(dp)->i_sb->s_dev;
+               __entry->dp_ino = dp->i_ino;
+               memcpy(__get_str(name), name->name, name->len);
+       ),
+       TP_printk("dev %d:%d dp ino 0x%llx name %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->dp_ino,
+                 __get_str(name))
+)
+
+#define DEFINE_NAMESPACE_EVENT(name) \
+DEFINE_EVENT(xfs_namespace_class, name, \
+       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
+       TP_ARGS(dp, name))
+DEFINE_NAMESPACE_EVENT(xfs_remove);
+DEFINE_NAMESPACE_EVENT(xfs_link);
+DEFINE_NAMESPACE_EVENT(xfs_lookup);
+DEFINE_NAMESPACE_EVENT(xfs_create);
+DEFINE_NAMESPACE_EVENT(xfs_symlink);
+
+TRACE_EVENT(xfs_rename,
+       TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
+                struct xfs_name *src_name, struct xfs_name *target_name),
+       TP_ARGS(src_dp, target_dp, src_name, target_name),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, src_dp_ino)
+               __field(xfs_ino_t, target_dp_ino)
+               __dynamic_array(char, src_name, src_name->len)
+               __dynamic_array(char, target_name, target_name->len)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
+               __entry->src_dp_ino = src_dp->i_ino;
+               __entry->target_dp_ino = target_dp->i_ino;
+               memcpy(__get_str(src_name), src_name->name, src_name->len);
+               memcpy(__get_str(target_name), target_name->name, target_name->len);
+       ),
+       TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
+                 " src name %s target name %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->src_dp_ino,
+                 __entry->target_dp_ino,
+                 __get_str(src_name),
+                 __get_str(target_name))
+)
+
+DECLARE_EVENT_CLASS(xfs_dquot_class,
+       TP_PROTO(struct xfs_dquot *dqp),
+       TP_ARGS(dqp),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(u32, id)
+               __field(unsigned, flags)
+               __field(unsigned, nrefs)
+               __field(unsigned long long, res_bcount)
+               __field(unsigned long long, bcount)
+               __field(unsigned long long, icount)
+               __field(unsigned long long, blk_hardlimit)
+               __field(unsigned long long, blk_softlimit)
+               __field(unsigned long long, ino_hardlimit)
+               __field(unsigned long long, ino_softlimit)
+       ), \
+       TP_fast_assign(
+               __entry->dev = dqp->q_mount->m_super->s_dev;
+               __entry->id = be32_to_cpu(dqp->q_core.d_id);
+               __entry->flags = dqp->dq_flags;
+               __entry->nrefs = dqp->q_nrefs;
+               __entry->res_bcount = dqp->q_res_bcount;
+               __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
+               __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
+               __entry->blk_hardlimit =
+                       be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+               __entry->blk_softlimit =
+                       be64_to_cpu(dqp->q_core.d_blk_softlimit);
+               __entry->ino_hardlimit =
+                       be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+               __entry->ino_softlimit =
+                       be64_to_cpu(dqp->q_core.d_ino_softlimit);
+       ),
+       TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
+                 "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
+                 "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->id,
+                 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
+                 __entry->nrefs,
+                 __entry->res_bcount,
+                 __entry->bcount,
+                 __entry->blk_hardlimit,
+                 __entry->blk_softlimit,
+                 __entry->icount,
+                 __entry->ino_hardlimit,
+                 __entry->ino_softlimit)
+)
+
+#define DEFINE_DQUOT_EVENT(name) \
+DEFINE_EVENT(xfs_dquot_class, name, \
+       TP_PROTO(struct xfs_dquot *dqp), \
+       TP_ARGS(dqp))
+DEFINE_DQUOT_EVENT(xfs_dqadjust);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
+DEFINE_DQUOT_EVENT(xfs_dqattach_found);
+DEFINE_DQUOT_EVENT(xfs_dqattach_get);
+DEFINE_DQUOT_EVENT(xfs_dqinit);
+DEFINE_DQUOT_EVENT(xfs_dqreuse);
+DEFINE_DQUOT_EVENT(xfs_dqalloc);
+DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
+DEFINE_DQUOT_EVENT(xfs_dqread);
+DEFINE_DQUOT_EVENT(xfs_dqread_fail);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
+DEFINE_DQUOT_EVENT(xfs_dqget_hit);
+DEFINE_DQUOT_EVENT(xfs_dqget_miss);
+DEFINE_DQUOT_EVENT(xfs_dqput);
+DEFINE_DQUOT_EVENT(xfs_dqput_wait);
+DEFINE_DQUOT_EVENT(xfs_dqput_free);
+DEFINE_DQUOT_EVENT(xfs_dqrele);
+DEFINE_DQUOT_EVENT(xfs_dqflush);
+DEFINE_DQUOT_EVENT(xfs_dqflush_force);
+DEFINE_DQUOT_EVENT(xfs_dqflush_done);
+
+DECLARE_EVENT_CLASS(xfs_loggrant_class,
+       TP_PROTO(struct log *log, struct xlog_ticket *tic),
+       TP_ARGS(log, tic),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned, trans_type)
+               __field(char, ocnt)
+               __field(char, cnt)
+               __field(int, curr_res)
+               __field(int, unit_res)
+               __field(unsigned int, flags)
+               __field(int, reserveq)
+               __field(int, writeq)
+               __field(int, grant_reserve_cycle)
+               __field(int, grant_reserve_bytes)
+               __field(int, grant_write_cycle)
+               __field(int, grant_write_bytes)
+               __field(int, curr_cycle)
+               __field(int, curr_block)
+               __field(xfs_lsn_t, tail_lsn)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->trans_type = tic->t_trans_type;
+               __entry->ocnt = tic->t_ocnt;
+               __entry->cnt = tic->t_cnt;
+               __entry->curr_res = tic->t_curr_res;
+               __entry->unit_res = tic->t_unit_res;
+               __entry->flags = tic->t_flags;
+               __entry->reserveq = list_empty(&log->l_reserveq);
+               __entry->writeq = list_empty(&log->l_writeq);
+               xlog_crack_grant_head(&log->l_grant_reserve_head,
+                               &__entry->grant_reserve_cycle,
+                               &__entry->grant_reserve_bytes);
+               xlog_crack_grant_head(&log->l_grant_write_head,
+                               &__entry->grant_write_cycle,
+                               &__entry->grant_write_bytes);
+               __entry->curr_cycle = log->l_curr_cycle;
+               __entry->curr_block = log->l_curr_block;
+               __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
+       ),
+       TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
+                 "t_unit_res %u t_flags %s reserveq %s "
+                 "writeq %s grant_reserve_cycle %d "
+                 "grant_reserve_bytes %d grant_write_cycle %d "
+                 "grant_write_bytes %d curr_cycle %d curr_block %d "
+                 "tail_cycle %d tail_block %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
+                 __entry->ocnt,
+                 __entry->cnt,
+                 __entry->curr_res,
+                 __entry->unit_res,
+                 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
+                 __entry->reserveq ? "empty" : "active",
+                 __entry->writeq ? "empty" : "active",
+                 __entry->grant_reserve_cycle,
+                 __entry->grant_reserve_bytes,
+                 __entry->grant_write_cycle,
+                 __entry->grant_write_bytes,
+                 __entry->curr_cycle,
+                 __entry->curr_block,
+                 CYCLE_LSN(__entry->tail_lsn),
+                 BLOCK_LSN(__entry->tail_lsn)
+       )
+)
+
+#define DEFINE_LOGGRANT_EVENT(name) \
+DEFINE_EVENT(xfs_loggrant_class, name, \
+       TP_PROTO(struct log *log, struct xlog_ticket *tic), \
+       TP_ARGS(log, tic))
+DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
+DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
+DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
+DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
+
+DECLARE_EVENT_CLASS(xfs_file_class,
+       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
+       TP_ARGS(ip, count, offset, flags),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fsize_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count 0x%zx ioflags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count,
+                 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
+)
+
+#define DEFINE_RW_EVENT(name)          \
+DEFINE_EVENT(xfs_file_class, name,     \
+       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
+       TP_ARGS(ip, count, offset, flags))
+DEFINE_RW_EVENT(xfs_file_read);
+DEFINE_RW_EVENT(xfs_file_buffered_write);
+DEFINE_RW_EVENT(xfs_file_direct_write);
+DEFINE_RW_EVENT(xfs_file_splice_read);
+DEFINE_RW_EVENT(xfs_file_splice_write);
+
+DECLARE_EVENT_CLASS(xfs_page_class,
+       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
+       TP_ARGS(inode, page, off),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(pgoff_t, pgoff)
+               __field(loff_t, size)
+               __field(unsigned long, offset)
+               __field(int, delalloc)
+               __field(int, unwritten)
+       ),
+       TP_fast_assign(
+               int delalloc = -1, unwritten = -1;
+
+               if (page_has_buffers(page))
+                       xfs_count_page_state(page, &delalloc, &unwritten);
+               __entry->dev = inode->i_sb->s_dev;
+               __entry->ino = XFS_I(inode)->i_ino;
+               __entry->pgoff = page_offset(page);
+               __entry->size = i_size_read(inode);
+               __entry->offset = off;
+               __entry->delalloc = delalloc;
+               __entry->unwritten = unwritten;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
+                 "delalloc %d unwritten %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->pgoff,
+                 __entry->size,
+                 __entry->offset,
+                 __entry->delalloc,
+                 __entry->unwritten)
+)
+
+#define DEFINE_PAGE_EVENT(name)                \
+DEFINE_EVENT(xfs_page_class, name,     \
+       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),    \
+       TP_ARGS(inode, page, off))
+DEFINE_PAGE_EVENT(xfs_writepage);
+DEFINE_PAGE_EVENT(xfs_releasepage);
+DEFINE_PAGE_EVENT(xfs_invalidatepage);
+
+DECLARE_EVENT_CLASS(xfs_imap_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
+                int type, struct xfs_bmbt_irec *irec),
+       TP_ARGS(ip, offset, count, type, irec),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(loff_t, size)
+               __field(loff_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+               __field(int, type)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+               __entry->type = type;
+               __entry->startoff = irec ? irec->br_startoff : 0;
+               __entry->startblock = irec ? irec->br_startblock : 0;
+               __entry->blockcount = irec ? irec->br_blockcount : 0;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count %zd type %s "
+                 "startoff 0x%llx startblock %lld blockcount 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count,
+                 __print_symbolic(__entry->type, XFS_IO_TYPES),
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount)
+)
+
+#define DEFINE_IOMAP_EVENT(name)       \
+DEFINE_EVENT(xfs_imap_class, name,     \
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
+                int type, struct xfs_bmbt_irec *irec),         \
+       TP_ARGS(ip, offset, count, type, irec))
+DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
+
+DECLARE_EVENT_CLASS(xfs_simple_io_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+       TP_ARGS(ip, offset, count),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(loff_t, isize)
+               __field(loff_t, disize)
+               __field(loff_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->isize = ip->i_size;
+               __entry->disize = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count %zd",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->isize,
+                 __entry->disize,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count)
+);
+
+#define DEFINE_SIMPLE_IO_EVENT(name)   \
+DEFINE_EVENT(xfs_simple_io_class, name,        \
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),        \
+       TP_ARGS(ip, offset, count))
+DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
+DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
+DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
+DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
+
+DECLARE_EVENT_CLASS(xfs_itrunc_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
+       TP_ARGS(ip, new_size),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fsize_t, new_size)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = new_size;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size)
+)
+
+#define DEFINE_ITRUNC_EVENT(name) \
+DEFINE_EVENT(xfs_itrunc_class, name, \
+       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
+       TP_ARGS(ip, new_size))
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
+
+TRACE_EVENT(xfs_pagecache_inval,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
+       TP_ARGS(ip, start, finish),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_off_t, start)
+               __field(xfs_off_t, finish)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->start = start;
+               __entry->finish = finish;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->start,
+                 __entry->finish)
+);
+
+TRACE_EVENT(xfs_bunmap,
+       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
+                int flags, unsigned long caller_ip),
+       TP_ARGS(ip, bno, len, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fileoff_t, bno)
+               __field(xfs_filblks_t, len)
+               __field(unsigned long, caller_ip)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->bno = bno;
+               __entry->len = len;
+               __entry->caller_ip = caller_ip;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
+                 "flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->bno,
+                 __entry->len,
+                 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
+                 (void *)__entry->caller_ip)
+
+);
+
+DECLARE_EVENT_CLASS(xfs_busy_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len),
+       TP_ARGS(mp, agno, agbno, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len)
+);
+#define DEFINE_BUSY_EVENT(name) \
+DEFINE_EVENT(xfs_busy_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_extlen_t len), \
+       TP_ARGS(mp, agno, agbno, len))
+DEFINE_BUSY_EVENT(xfs_alloc_busy);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
+
+TRACE_EVENT(xfs_alloc_busy_trim,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len,
+                xfs_agblock_t tbno, xfs_extlen_t tlen),
+       TP_ARGS(mp, agno, agbno, len, tbno, tlen),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(xfs_agblock_t, tbno)
+               __field(xfs_extlen_t, tlen)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->tbno = tbno;
+               __entry->tlen = tlen;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->tbno,
+                 __entry->tlen)
+);
+
+TRACE_EVENT(xfs_trans_commit_lsn,
+       TP_PROTO(struct xfs_trans *trans),
+       TP_ARGS(trans),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(struct xfs_trans *, tp)
+               __field(xfs_lsn_t, lsn)
+       ),
+       TP_fast_assign(
+               __entry->dev = trans->t_mountp->m_super->s_dev;
+               __entry->tp = trans;
+               __entry->lsn = trans->t_commit_lsn;
+       ),
+       TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tp,
+                 __entry->lsn)
+);
+
+TRACE_EVENT(xfs_agf,
+       TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
+                unsigned long caller_ip),
+       TP_ARGS(mp, agf, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, flags)
+               __field(__u32, length)
+               __field(__u32, bno_root)
+               __field(__u32, cnt_root)
+               __field(__u32, bno_level)
+               __field(__u32, cnt_level)
+               __field(__u32, flfirst)
+               __field(__u32, fllast)
+               __field(__u32, flcount)
+               __field(__u32, freeblks)
+               __field(__u32, longest)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = be32_to_cpu(agf->agf_seqno),
+               __entry->flags = flags;
+               __entry->length = be32_to_cpu(agf->agf_length),
+               __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
+               __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
+               __entry->bno_level =
+                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
+               __entry->cnt_level =
+                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
+               __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
+               __entry->fllast = be32_to_cpu(agf->agf_fllast),
+               __entry->flcount = be32_to_cpu(agf->agf_flcount),
+               __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
+               __entry->longest = be32_to_cpu(agf->agf_longest);
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
+                 "levels b %u c %u flfirst %u fllast %u flcount %u "
+                 "freeblks %u longest %u caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
+                 __entry->length,
+                 __entry->bno_root,
+                 __entry->cnt_root,
+                 __entry->bno_level,
+                 __entry->cnt_level,
+                 __entry->flfirst,
+                 __entry->fllast,
+                 __entry->flcount,
+                 __entry->freeblks,
+                 __entry->longest,
+                 (void *)__entry->caller_ip)
+);
+
+TRACE_EVENT(xfs_free_extent,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+                xfs_extlen_t len, bool isfl, int haveleft, int haveright),
+       TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(int, isfl)
+               __field(int, haveleft)
+               __field(int, haveright)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->isfl = isfl;
+               __entry->haveleft = haveleft;
+               __entry->haveright = haveright;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->isfl,
+                 __entry->haveleft ?
+                       (__entry->haveright ? "both" : "left") :
+                       (__entry->haveright ? "right" : "none"))
+
+);
+
+DECLARE_EVENT_CLASS(xfs_alloc_class,
+       TP_PROTO(struct xfs_alloc_arg *args),
+       TP_ARGS(args),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, minlen)
+               __field(xfs_extlen_t, maxlen)
+               __field(xfs_extlen_t, mod)
+               __field(xfs_extlen_t, prod)
+               __field(xfs_extlen_t, minleft)
+               __field(xfs_extlen_t, total)
+               __field(xfs_extlen_t, alignment)
+               __field(xfs_extlen_t, minalignslop)
+               __field(xfs_extlen_t, len)
+               __field(short, type)
+               __field(short, otype)
+               __field(char, wasdel)
+               __field(char, wasfromfl)
+               __field(char, isfl)
+               __field(char, userdata)
+               __field(xfs_fsblock_t, firstblock)
+       ),
+       TP_fast_assign(
+               __entry->dev = args->mp->m_super->s_dev;
+               __entry->agno = args->agno;
+               __entry->agbno = args->agbno;
+               __entry->minlen = args->minlen;
+               __entry->maxlen = args->maxlen;
+               __entry->mod = args->mod;
+               __entry->prod = args->prod;
+               __entry->minleft = args->minleft;
+               __entry->total = args->total;
+               __entry->alignment = args->alignment;
+               __entry->minalignslop = args->minalignslop;
+               __entry->len = args->len;
+               __entry->type = args->type;
+               __entry->otype = args->otype;
+               __entry->wasdel = args->wasdel;
+               __entry->wasfromfl = args->wasfromfl;
+               __entry->isfl = args->isfl;
+               __entry->userdata = args->userdata;
+               __entry->firstblock = args->firstblock;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
+                 "prod %u minleft %u total %u alignment %u minalignslop %u "
+                 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
+                 "userdata %d firstblock 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->minlen,
+                 __entry->maxlen,
+                 __entry->mod,
+                 __entry->prod,
+                 __entry->minleft,
+                 __entry->total,
+                 __entry->alignment,
+                 __entry->minalignslop,
+                 __entry->len,
+                 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
+                 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
+                 __entry->wasdel,
+                 __entry->wasfromfl,
+                 __entry->isfl,
+                 __entry->userdata,
+                 (unsigned long long)__entry->firstblock)
+)
+
+#define DEFINE_ALLOC_EVENT(name) \
+DEFINE_EVENT(xfs_alloc_class, name, \
+       TP_PROTO(struct xfs_alloc_arg *args), \
+       TP_ARGS(args))
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
+
+DECLARE_EVENT_CLASS(xfs_dir2_class,
+       TP_PROTO(struct xfs_da_args *args),
+       TP_ARGS(args),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __dynamic_array(char, name, args->namelen)
+               __field(int, namelen)
+               __field(xfs_dahash_t, hashval)
+               __field(xfs_ino_t, inumber)
+               __field(int, op_flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               if (args->namelen)
+                       memcpy(__get_str(name), args->name, args->namelen);
+               __entry->namelen = args->namelen;
+               __entry->hashval = args->hashval;
+               __entry->inumber = args->inumber;
+               __entry->op_flags = args->op_flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
+                 "inumber 0x%llx op_flags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->namelen,
+                 __entry->namelen ? __get_str(name) : NULL,
+                 __entry->namelen,
+                 __entry->hashval,
+                 __entry->inumber,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
+)
+
+#define DEFINE_DIR2_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_class, name, \
+       TP_PROTO(struct xfs_da_args *args), \
+       TP_ARGS(args))
+DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
+DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
+
+DECLARE_EVENT_CLASS(xfs_dir2_space_class,
+       TP_PROTO(struct xfs_da_args *args, int idx),
+       TP_ARGS(args, idx),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, op_flags)
+               __field(int, idx)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               __entry->op_flags = args->op_flags;
+               __entry->idx = idx;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+                 __entry->idx)
+)
+
+#define DEFINE_DIR2_SPACE_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_space_class, name, \
+       TP_PROTO(struct xfs_da_args *args, int idx), \
+       TP_ARGS(args, idx))
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
+
+TRACE_EVENT(xfs_dir2_leafn_moveents,
+       TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
+       TP_ARGS(args, src_idx, dst_idx, count),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, op_flags)
+               __field(int, src_idx)
+               __field(int, dst_idx)
+               __field(int, count)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               __entry->op_flags = args->op_flags;
+               __entry->src_idx = src_idx;
+               __entry->dst_idx = dst_idx;
+               __entry->count = count;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx op_flags %s "
+                 "src_idx %d dst_idx %d count %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+                 __entry->src_idx,
+                 __entry->dst_idx,
+                 __entry->count)
+);
+
+#define XFS_SWAPEXT_INODES \
+       { 0,    "target" }, \
+       { 1,    "temp" }
+
+#define XFS_INODE_FORMAT_STR \
+       { 0,    "invalid" }, \
+       { 1,    "local" }, \
+       { 2,    "extent" }, \
+       { 3,    "btree" }
+
+DECLARE_EVENT_CLASS(xfs_swap_extent_class,
+       TP_PROTO(struct xfs_inode *ip, int which),
+       TP_ARGS(ip, which),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, which)
+               __field(xfs_ino_t, ino)
+               __field(int, format)
+               __field(int, nex)
+               __field(int, max_nex)
+               __field(int, broot_size)
+               __field(int, fork_off)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->which = which;
+               __entry->ino = ip->i_ino;
+               __entry->format = ip->i_d.di_format;
+               __entry->nex = ip->i_d.di_nextents;
+               __entry->max_nex = ip->i_df.if_ext_max;
+               __entry->broot_size = ip->i_df.if_broot_bytes;
+               __entry->fork_off = XFS_IFORK_BOFF(ip);
+       ),
+       TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
+                 "Max in-fork extents %d, broot size %d, fork offset %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
+                 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
+                 __entry->nex,
+                 __entry->max_nex,
+                 __entry->broot_size,
+                 __entry->fork_off)
+)
+
+#define DEFINE_SWAPEXT_EVENT(name) \
+DEFINE_EVENT(xfs_swap_extent_class, name, \
+       TP_PROTO(struct xfs_inode *ip, int which), \
+       TP_ARGS(ip, which))
+
+DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
+DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
+       TP_PROTO(struct log *log, struct xlog_recover *trans,
+               struct xlog_recover_item *item, int pass),
+       TP_ARGS(log, trans, item, pass),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned long, item)
+               __field(xlog_tid_t, tid)
+               __field(int, type)
+               __field(int, pass)
+               __field(int, count)
+               __field(int, total)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->item = (unsigned long)item;
+               __entry->tid = trans->r_log_tid;
+               __entry->type = ITEM_TYPE(item);
+               __entry->pass = pass;
+               __entry->count = item->ri_cnt;
+               __entry->total = item->ri_total;
+       ),
+       TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
+                 "item region count/total %d/%d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tid,
+                 __entry->pass,
+                 (void *)__entry->item,
+                 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+                 __entry->count,
+                 __entry->total)
+)
+
+#define DEFINE_LOG_RECOVER_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_item_class, name, \
+       TP_PROTO(struct log *log, struct xlog_recover *trans, \
+               struct xlog_recover_item *item, int pass), \
+       TP_ARGS(log, trans, item, pass))
+
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
+       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+       TP_ARGS(log, buf_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(__int64_t, blkno)
+               __field(unsigned short, len)
+               __field(unsigned short, flags)
+               __field(unsigned short, size)
+               __field(unsigned int, map_size)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->blkno = buf_f->blf_blkno;
+               __entry->len = buf_f->blf_len;
+               __entry->flags = buf_f->blf_flags;
+               __entry->size = buf_f->blf_size;
+               __entry->map_size = buf_f->blf_map_size;
+       ),
+       TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
+                       "map_size %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->blkno,
+                 __entry->len,
+                 __entry->flags,
+                 __entry->size,
+                 __entry->map_size)
+)
+
+#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
+       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+       TP_ARGS(log, buf_f))
+
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
+       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+       TP_ARGS(log, in_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(unsigned short, size)
+               __field(int, fields)
+               __field(unsigned short, asize)
+               __field(unsigned short, dsize)
+               __field(__int64_t, blkno)
+               __field(int, len)
+               __field(int, boffset)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->ino = in_f->ilf_ino;
+               __entry->size = in_f->ilf_size;
+               __entry->fields = in_f->ilf_fields;
+               __entry->asize = in_f->ilf_asize;
+               __entry->dsize = in_f->ilf_dsize;
+               __entry->blkno = in_f->ilf_blkno;
+               __entry->len = in_f->ilf_len;
+               __entry->boffset = in_f->ilf_boffset;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
+                       "dsize %d, blkno 0x%llx, len %d, boffset %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->fields,
+                 __entry->asize,
+                 __entry->dsize,
+                 __entry->blkno,
+                 __entry->len,
+                 __entry->boffset)
+)
+#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
+       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+       TP_ARGS(log, in_f))
+
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
+
+DECLARE_EVENT_CLASS(xfs_discard_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len),
+       TP_ARGS(mp, agno, agbno, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len)
+)
+
+#define DEFINE_DISCARD_EVENT(name) \
+DEFINE_EVENT(xfs_discard_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_extlen_t len), \
+       TP_ARGS(mp, agno, agbno, len))
+DEFINE_DISCARD_EVENT(xfs_discard_extent);
+DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
+DEFINE_DISCARD_EVENT(xfs_discard_exclude);
+DEFINE_DISCARD_EVENT(xfs_discard_busy);
+
+#endif /* _TRACE_XFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE xfs_trace
+#include <trace/define_trace.h>
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
new file mode 100644 (file)
index 0000000..4d00ee6
--- /dev/null
@@ -0,0 +1,890 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+
+STATIC void    xfs_trans_alloc_dqinfo(xfs_trans_t *);
+
+/*
+ * Add the locked dquot to the transaction.
+ * The dquot must be locked, and it cannot be associated with any
+ * transaction.
+ */
+void
+xfs_trans_dqjoin(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(dqp->q_transp != tp);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(dqp->q_logitem.qli_dquot == dqp);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
+
+       /*
+        * Initialize d_transp so we can later determine if this dquot is
+        * associated with this transaction.
+        */
+       dqp->q_transp = tp;
+}
+
+
+/*
+ * This is called to mark the dquot as needing
+ * to be logged when the transaction is committed.  The dquot must
+ * already be associated with the given transaction.
+ * Note that it marks the entire transaction as dirty. In the ordinary
+ * case, this gets called via xfs_trans_commit, after the transaction
+ * is already dirty. However, there's nothing stop this from getting
+ * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
+ * flag.
+ */
+void
+xfs_trans_log_dquot(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(dqp->q_transp == tp);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+}
+
+/*
+ * Carry forward whatever is left of the quota blk reservation to
+ * the spanky new transaction
+ */
+void
+xfs_trans_dup_dqinfo(
+       xfs_trans_t     *otp,
+       xfs_trans_t     *ntp)
+{
+       xfs_dqtrx_t     *oq, *nq;
+       int             i,j;
+       xfs_dqtrx_t     *oqa, *nqa;
+
+       if (!otp->t_dqinfo)
+               return;
+
+       xfs_trans_alloc_dqinfo(ntp);
+       oqa = otp->t_dqinfo->dqa_usrdquots;
+       nqa = ntp->t_dqinfo->dqa_usrdquots;
+
+       /*
+        * Because the quota blk reservation is carried forward,
+        * it is also necessary to carry forward the DQ_DIRTY flag.
+        */
+       if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+               ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
+
+       for (j = 0; j < 2; j++) {
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       if (oqa[i].qt_dquot == NULL)
+                               break;
+                       oq = &oqa[i];
+                       nq = &nqa[i];
+
+                       nq->qt_dquot = oq->qt_dquot;
+                       nq->qt_bcount_delta = nq->qt_icount_delta = 0;
+                       nq->qt_rtbcount_delta = 0;
+
+                       /*
+                        * Transfer whatever is left of the reservations.
+                        */
+                       nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
+                       oq->qt_blk_res = oq->qt_blk_res_used;
+
+                       nq->qt_rtblk_res = oq->qt_rtblk_res -
+                               oq->qt_rtblk_res_used;
+                       oq->qt_rtblk_res = oq->qt_rtblk_res_used;
+
+                       nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
+                       oq->qt_ino_res = oq->qt_ino_res_used;
+
+               }
+               oqa = otp->t_dqinfo->dqa_grpdquots;
+               nqa = ntp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+/*
+ * Wrap around mod_dquot to account for both user and group quotas.
+ */
+void
+xfs_trans_mod_dquot_byino(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       uint            field,
+       long            delta)
+{
+       xfs_mount_t     *mp = tp->t_mountp;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) ||
+           !XFS_IS_QUOTA_ON(mp) ||
+           ip->i_ino == mp->m_sb.sb_uquotino ||
+           ip->i_ino == mp->m_sb.sb_gquotino)
+               return;
+
+       if (tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+
+       if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
+               (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
+       if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
+               (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
+}
+
+STATIC xfs_dqtrx_t *
+xfs_trans_get_dqtrx(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       int             i;
+       xfs_dqtrx_t     *qa;
+
+       qa = XFS_QM_ISUDQ(dqp) ?
+               tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
+
+       for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+               if (qa[i].qt_dquot == NULL ||
+                   qa[i].qt_dquot == dqp)
+                       return &qa[i];
+       }
+
+       return NULL;
+}
+
+/*
+ * Make the changes in the transaction structure.
+ * The moral equivalent to xfs_trans_mod_sb().
+ * We don't touch any fields in the dquot, so we don't care
+ * if it's locked or not (most of the time it won't be).
+ */
+void
+xfs_trans_mod_dquot(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp,
+       uint            field,
+       long            delta)
+{
+       xfs_dqtrx_t     *qtrx;
+
+       ASSERT(tp);
+       ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+       qtrx = NULL;
+
+       if (tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+       /*
+        * Find either the first free slot or the slot that belongs
+        * to this dquot.
+        */
+       qtrx = xfs_trans_get_dqtrx(tp, dqp);
+       ASSERT(qtrx);
+       if (qtrx->qt_dquot == NULL)
+               qtrx->qt_dquot = dqp;
+
+       switch (field) {
+
+               /*
+                * regular disk blk reservation
+                */
+             case XFS_TRANS_DQ_RES_BLKS:
+               qtrx->qt_blk_res += (ulong)delta;
+               break;
+
+               /*
+                * inode reservation
+                */
+             case XFS_TRANS_DQ_RES_INOS:
+               qtrx->qt_ino_res += (ulong)delta;
+               break;
+
+               /*
+                * disk blocks used.
+                */
+             case XFS_TRANS_DQ_BCOUNT:
+               if (qtrx->qt_blk_res && delta > 0) {
+                       qtrx->qt_blk_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
+               }
+               qtrx->qt_bcount_delta += delta;
+               break;
+
+             case XFS_TRANS_DQ_DELBCOUNT:
+               qtrx->qt_delbcnt_delta += delta;
+               break;
+
+               /*
+                * Inode Count
+                */
+             case XFS_TRANS_DQ_ICOUNT:
+               if (qtrx->qt_ino_res && delta > 0) {
+                       qtrx->qt_ino_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
+               }
+               qtrx->qt_icount_delta += delta;
+               break;
+
+               /*
+                * rtblk reservation
+                */
+             case XFS_TRANS_DQ_RES_RTBLKS:
+               qtrx->qt_rtblk_res += (ulong)delta;
+               break;
+
+               /*
+                * rtblk count
+                */
+             case XFS_TRANS_DQ_RTBCOUNT:
+               if (qtrx->qt_rtblk_res && delta > 0) {
+                       qtrx->qt_rtblk_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
+               }
+               qtrx->qt_rtbcount_delta += delta;
+               break;
+
+             case XFS_TRANS_DQ_DELRTBCOUNT:
+               qtrx->qt_delrtb_delta += delta;
+               break;
+
+             default:
+               ASSERT(0);
+       }
+       tp->t_flags |= XFS_TRANS_DQ_DIRTY;
+}
+
+
+/*
+ * Given an array of dqtrx structures, lock all the dquots associated
+ * and join them to the transaction, provided they have been modified.
+ * We know that the highest number of dquots (of one type - usr OR grp),
+ * involved in a transaction is 2 and that both usr and grp combined - 3.
+ * So, we don't attempt to make this very generic.
+ */
+STATIC void
+xfs_trans_dqlockedjoin(
+       xfs_trans_t     *tp,
+       xfs_dqtrx_t     *q)
+{
+       ASSERT(q[0].qt_dquot != NULL);
+       if (q[1].qt_dquot == NULL) {
+               xfs_dqlock(q[0].qt_dquot);
+               xfs_trans_dqjoin(tp, q[0].qt_dquot);
+       } else {
+               ASSERT(XFS_QM_TRANS_MAXDQS == 2);
+               xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
+               xfs_trans_dqjoin(tp, q[0].qt_dquot);
+               xfs_trans_dqjoin(tp, q[1].qt_dquot);
+       }
+}
+
+
+/*
+ * Called by xfs_trans_commit() and similar in spirit to
+ * xfs_trans_apply_sb_deltas().
+ * Go thru all the dquots belonging to this transaction and modify the
+ * INCORE dquot to reflect the actual usages.
+ * Unreserve just the reservations done by this transaction.
+ * dquot is still left locked at exit.
+ */
+void
+xfs_trans_apply_dquot_deltas(
+       xfs_trans_t             *tp)
+{
+       int                     i, j;
+       xfs_dquot_t             *dqp;
+       xfs_dqtrx_t             *qtrx, *qa;
+       xfs_disk_dquot_t        *d;
+       long                    totalbdelta;
+       long                    totalrtbdelta;
+
+       if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+               return;
+
+       ASSERT(tp->t_dqinfo);
+       qa = tp->t_dqinfo->dqa_usrdquots;
+       for (j = 0; j < 2; j++) {
+               if (qa[0].qt_dquot == NULL) {
+                       qa = tp->t_dqinfo->dqa_grpdquots;
+                       continue;
+               }
+
+               /*
+                * Lock all of the dquots and join them to the transaction.
+                */
+               xfs_trans_dqlockedjoin(tp, qa);
+
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       qtrx = &qa[i];
+                       /*
+                        * The array of dquots is filled
+                        * sequentially, not sparsely.
+                        */
+                       if ((dqp = qtrx->qt_dquot) == NULL)
+                               break;
+
+                       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+                       ASSERT(dqp->q_transp == tp);
+
+                       /*
+                        * adjust the actual number of blocks used
+                        */
+                       d = &dqp->q_core;
+
+                       /*
+                        * The issue here is - sometimes we don't make a blkquota
+                        * reservation intentionally to be fair to users
+                        * (when the amount is small). On the other hand,
+                        * delayed allocs do make reservations, but that's
+                        * outside of a transaction, so we have no
+                        * idea how much was really reserved.
+                        * So, here we've accumulated delayed allocation blks and
+                        * non-delay blks. The assumption is that the
+                        * delayed ones are always reserved (outside of a
+                        * transaction), and the others may or may not have
+                        * quota reservations.
+                        */
+                       totalbdelta = qtrx->qt_bcount_delta +
+                               qtrx->qt_delbcnt_delta;
+                       totalrtbdelta = qtrx->qt_rtbcount_delta +
+                               qtrx->qt_delrtb_delta;
+#ifdef DEBUG
+                       if (totalbdelta < 0)
+                               ASSERT(be64_to_cpu(d->d_bcount) >=
+                                      -totalbdelta);
+
+                       if (totalrtbdelta < 0)
+                               ASSERT(be64_to_cpu(d->d_rtbcount) >=
+                                      -totalrtbdelta);
+
+                       if (qtrx->qt_icount_delta < 0)
+                               ASSERT(be64_to_cpu(d->d_icount) >=
+                                      -qtrx->qt_icount_delta);
+#endif
+                       if (totalbdelta)
+                               be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
+
+                       if (qtrx->qt_icount_delta)
+                               be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
+
+                       if (totalrtbdelta)
+                               be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
+
+                       /*
+                        * Get any default limits in use.
+                        * Start/reset the timer(s) if needed.
+                        */
+                       if (d->d_id) {
+                               xfs_qm_adjust_dqlimits(tp->t_mountp, d);
+                               xfs_qm_adjust_dqtimers(tp->t_mountp, d);
+                       }
+
+                       dqp->dq_flags |= XFS_DQ_DIRTY;
+                       /*
+                        * add this to the list of items to get logged
+                        */
+                       xfs_trans_log_dquot(tp, dqp);
+                       /*
+                        * Take off what's left of the original reservation.
+                        * In case of delayed allocations, there's no
+                        * reservation that a transaction structure knows of.
+                        */
+                       if (qtrx->qt_blk_res != 0) {
+                               if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+                                       if (qtrx->qt_blk_res >
+                                           qtrx->qt_blk_res_used)
+                                               dqp->q_res_bcount -= (xfs_qcnt_t)
+                                                       (qtrx->qt_blk_res -
+                                                        qtrx->qt_blk_res_used);
+                                       else
+                                               dqp->q_res_bcount -= (xfs_qcnt_t)
+                                                       (qtrx->qt_blk_res_used -
+                                                        qtrx->qt_blk_res);
+                               }
+                       } else {
+                               /*
+                                * These blks were never reserved, either inside
+                                * a transaction or outside one (in a delayed
+                                * allocation). Also, this isn't always a
+                                * negative number since we sometimes
+                                * deliberately skip quota reservations.
+                                */
+                               if (qtrx->qt_bcount_delta) {
+                                       dqp->q_res_bcount +=
+                                             (xfs_qcnt_t)qtrx->qt_bcount_delta;
+                               }
+                       }
+                       /*
+                        * Adjust the RT reservation.
+                        */
+                       if (qtrx->qt_rtblk_res != 0) {
+                               if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
+                                       if (qtrx->qt_rtblk_res >
+                                           qtrx->qt_rtblk_res_used)
+                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
+                                                      (qtrx->qt_rtblk_res -
+                                                       qtrx->qt_rtblk_res_used);
+                                       else
+                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
+                                                      (qtrx->qt_rtblk_res_used -
+                                                       qtrx->qt_rtblk_res);
+                               }
+                       } else {
+                               if (qtrx->qt_rtbcount_delta)
+                                       dqp->q_res_rtbcount +=
+                                           (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
+                       }
+
+                       /*
+                        * Adjust the inode reservation.
+                        */
+                       if (qtrx->qt_ino_res != 0) {
+                               ASSERT(qtrx->qt_ino_res >=
+                                      qtrx->qt_ino_res_used);
+                               if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
+                                       dqp->q_res_icount -= (xfs_qcnt_t)
+                                               (qtrx->qt_ino_res -
+                                                qtrx->qt_ino_res_used);
+                       } else {
+                               if (qtrx->qt_icount_delta)
+                                       dqp->q_res_icount +=
+                                           (xfs_qcnt_t)qtrx->qt_icount_delta;
+                       }
+
+                       ASSERT(dqp->q_res_bcount >=
+                               be64_to_cpu(dqp->q_core.d_bcount));
+                       ASSERT(dqp->q_res_icount >=
+                               be64_to_cpu(dqp->q_core.d_icount));
+                       ASSERT(dqp->q_res_rtbcount >=
+                               be64_to_cpu(dqp->q_core.d_rtbcount));
+               }
+               /*
+                * Do the group quotas next
+                */
+               qa = tp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+/*
+ * Release the reservations, and adjust the dquots accordingly.
+ * This is called only when the transaction is being aborted. If by
+ * any chance we have done dquot modifications incore (ie. deltas) already,
+ * we simply throw those away, since that's the expected behavior
+ * when a transaction is curtailed without a commit.
+ */
+void
+xfs_trans_unreserve_and_mod_dquots(
+       xfs_trans_t             *tp)
+{
+       int                     i, j;
+       xfs_dquot_t             *dqp;
+       xfs_dqtrx_t             *qtrx, *qa;
+       boolean_t               locked;
+
+       if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+               return;
+
+       qa = tp->t_dqinfo->dqa_usrdquots;
+
+       for (j = 0; j < 2; j++) {
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       qtrx = &qa[i];
+                       /*
+                        * We assume that the array of dquots is filled
+                        * sequentially, not sparsely.
+                        */
+                       if ((dqp = qtrx->qt_dquot) == NULL)
+                               break;
+                       /*
+                        * Unreserve the original reservation. We don't care
+                        * about the number of blocks used field, or deltas.
+                        * Also we don't bother to zero the fields.
+                        */
+                       locked = B_FALSE;
+                       if (qtrx->qt_blk_res) {
+                               xfs_dqlock(dqp);
+                               locked = B_TRUE;
+                               dqp->q_res_bcount -=
+                                       (xfs_qcnt_t)qtrx->qt_blk_res;
+                       }
+                       if (qtrx->qt_ino_res) {
+                               if (!locked) {
+                                       xfs_dqlock(dqp);
+                                       locked = B_TRUE;
+                               }
+                               dqp->q_res_icount -=
+                                       (xfs_qcnt_t)qtrx->qt_ino_res;
+                       }
+
+                       if (qtrx->qt_rtblk_res) {
+                               if (!locked) {
+                                       xfs_dqlock(dqp);
+                                       locked = B_TRUE;
+                               }
+                               dqp->q_res_rtbcount -=
+                                       (xfs_qcnt_t)qtrx->qt_rtblk_res;
+                       }
+                       if (locked)
+                               xfs_dqunlock(dqp);
+
+               }
+               qa = tp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+STATIC void
+xfs_quota_warn(
+       struct xfs_mount        *mp,
+       struct xfs_dquot        *dqp,
+       int                     type)
+{
+       /* no warnings for project quotas - we just return ENOSPC later */
+       if (dqp->dq_flags & XFS_DQ_PROJ)
+               return;
+       quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
+                          be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
+                          type);
+}
+
+/*
+ * This reserves disk blocks and inodes against a dquot.
+ * Flags indicate if the dquot is to be locked here and also
+ * if the blk reservation is for RT or regular blocks.
+ * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
+ */
+STATIC int
+xfs_trans_dqresv(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *dqp,
+       long            nblks,
+       long            ninos,
+       uint            flags)
+{
+       xfs_qcnt_t      hardlimit;
+       xfs_qcnt_t      softlimit;
+       time_t          timer;
+       xfs_qwarncnt_t  warns;
+       xfs_qwarncnt_t  warnlimit;
+       xfs_qcnt_t      count;
+       xfs_qcnt_t      *resbcountp;
+       xfs_quotainfo_t *q = mp->m_quotainfo;
+
+
+       xfs_dqlock(dqp);
+
+       if (flags & XFS_TRANS_DQ_RES_BLKS) {
+               hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+               if (!hardlimit)
+                       hardlimit = q->qi_bhardlimit;
+               softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
+               if (!softlimit)
+                       softlimit = q->qi_bsoftlimit;
+               timer = be32_to_cpu(dqp->q_core.d_btimer);
+               warns = be16_to_cpu(dqp->q_core.d_bwarns);
+               warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
+               resbcountp = &dqp->q_res_bcount;
+       } else {
+               ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
+               hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
+               if (!hardlimit)
+                       hardlimit = q->qi_rtbhardlimit;
+               softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
+               if (!softlimit)
+                       softlimit = q->qi_rtbsoftlimit;
+               timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
+               warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
+               warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
+               resbcountp = &dqp->q_res_rtbcount;
+       }
+
+       if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
+           dqp->q_core.d_id &&
+           ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
+            (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
+             (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
+               if (nblks > 0) {
+                       /*
+                        * dquot is locked already. See if we'd go over the
+                        * hardlimit or exceed the timelimit if we allocate
+                        * nblks.
+                        */
+                       if (hardlimit > 0ULL &&
+                           hardlimit <= nblks + *resbcountp) {
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
+                               goto error_return;
+                       }
+                       if (softlimit > 0ULL &&
+                           softlimit <= nblks + *resbcountp) {
+                               if ((timer != 0 && get_seconds() > timer) ||
+                                   (warns != 0 && warns >= warnlimit)) {
+                                       xfs_quota_warn(mp, dqp,
+                                                      QUOTA_NL_BSOFTLONGWARN);
+                                       goto error_return;
+                               }
+
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
+                       }
+               }
+               if (ninos > 0) {
+                       count = be64_to_cpu(dqp->q_core.d_icount);
+                       timer = be32_to_cpu(dqp->q_core.d_itimer);
+                       warns = be16_to_cpu(dqp->q_core.d_iwarns);
+                       warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
+                       hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+                       if (!hardlimit)
+                               hardlimit = q->qi_ihardlimit;
+                       softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
+                       if (!softlimit)
+                               softlimit = q->qi_isoftlimit;
+
+                       if (hardlimit > 0ULL && count >= hardlimit) {
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
+                               goto error_return;
+                       }
+                       if (softlimit > 0ULL && count >= softlimit) {
+                               if  ((timer != 0 && get_seconds() > timer) ||
+                                    (warns != 0 && warns >= warnlimit)) {
+                                       xfs_quota_warn(mp, dqp,
+                                                      QUOTA_NL_ISOFTLONGWARN);
+                                       goto error_return;
+                               }
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
+                       }
+               }
+       }
+
+       /*
+        * Change the reservation, but not the actual usage.
+        * Note that q_res_bcount = q_core.d_bcount + resv
+        */
+       (*resbcountp) += (xfs_qcnt_t)nblks;
+       if (ninos != 0)
+               dqp->q_res_icount += (xfs_qcnt_t)ninos;
+
+       /*
+        * note the reservation amt in the trans struct too,
+        * so that the transaction knows how much was reserved by
+        * it against this particular dquot.
+        * We don't do this when we are reserving for a delayed allocation,
+        * because we don't have the luxury of a transaction envelope then.
+        */
+       if (tp) {
+               ASSERT(tp->t_dqinfo);
+               ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+               if (nblks != 0)
+                       xfs_trans_mod_dquot(tp, dqp,
+                                           flags & XFS_QMOPT_RESBLK_MASK,
+                                           nblks);
+               if (ninos != 0)
+                       xfs_trans_mod_dquot(tp, dqp,
+                                           XFS_TRANS_DQ_RES_INOS,
+                                           ninos);
+       }
+       ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
+       ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
+       ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
+
+       xfs_dqunlock(dqp);
+       return 0;
+
+error_return:
+       xfs_dqunlock(dqp);
+       if (flags & XFS_QMOPT_ENOSPC)
+               return ENOSPC;
+       return EDQUOT;
+}
+
+
+/*
+ * Given dquot(s), make disk block and/or inode reservations against them.
+ * The fact that this does the reservation against both the usr and
+ * grp/prj quotas is important, because this follows a both-or-nothing
+ * approach.
+ *
+ * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
+ *        XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
+ *        XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
+ *        XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
+ * dquots are unlocked on return, if they were not locked by caller.
+ */
+int
+xfs_trans_reserve_quota_bydquots(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *udqp,
+       xfs_dquot_t     *gdqp,
+       long            nblks,
+       long            ninos,
+       uint            flags)
+{
+       int             resvd = 0, error;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       if (tp && tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+
+       ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+
+       if (udqp) {
+               error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
+                                       (flags & ~XFS_QMOPT_ENOSPC));
+               if (error)
+                       return error;
+               resvd = 1;
+       }
+
+       if (gdqp) {
+               error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
+               if (error) {
+                       /*
+                        * can't do it, so backout previous reservation
+                        */
+                       if (resvd) {
+                               flags |= XFS_QMOPT_FORCE_RES;
+                               xfs_trans_dqresv(tp, mp, udqp,
+                                                -nblks, -ninos, flags);
+                       }
+                       return error;
+               }
+       }
+
+       /*
+        * Didn't change anything critical, so, no need to log
+        */
+       return 0;
+}
+
+
+/*
+ * Lock the dquot and change the reservation if we can.
+ * This doesn't change the actual usage, just the reservation.
+ * The inode sent in is locked.
+ */
+int
+xfs_trans_reserve_quota_nblks(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       long                    nblks,
+       long                    ninos,
+       uint                    flags)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+       if (XFS_IS_PQUOTA_ON(mp))
+               flags |= XFS_QMOPT_ENOSPC;
+
+       ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
+       ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
+                               XFS_TRANS_DQ_RES_RTBLKS ||
+              (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
+                               XFS_TRANS_DQ_RES_BLKS);
+
+       /*
+        * Reserve nblks against these dquots, with trans as the mediator.
+        */
+       return xfs_trans_reserve_quota_bydquots(tp, mp,
+                                               ip->i_udquot, ip->i_gdquot,
+                                               nblks, ninos, flags);
+}
+
+/*
+ * This routine is called to allocate a quotaoff log item.
+ */
+xfs_qoff_logitem_t *
+xfs_trans_get_qoff_item(
+       xfs_trans_t             *tp,
+       xfs_qoff_logitem_t      *startqoff,
+       uint                    flags)
+{
+       xfs_qoff_logitem_t      *q;
+
+       ASSERT(tp != NULL);
+
+       q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
+       ASSERT(q != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &q->qql_item);
+       return q;
+}
+
+
+/*
+ * This is called to mark the quotaoff logitem as needing
+ * to be logged when the transaction is committed.  The logitem must
+ * already be associated with the given transaction.
+ */
+void
+xfs_trans_log_quotaoff_item(
+       xfs_trans_t             *tp,
+       xfs_qoff_logitem_t      *qlp)
+{
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+}
+
+STATIC void
+xfs_trans_alloc_dqinfo(
+       xfs_trans_t     *tp)
+{
+       tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+}
+
+void
+xfs_trans_free_dqinfo(
+       xfs_trans_t     *tp)
+{
+       if (!tp->t_dqinfo)
+               return;
+       kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+       tp->t_dqinfo = NULL;
+}
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
new file mode 100644 (file)
index 0000000..7c220b4
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_VNODE_H__
+#define __XFS_VNODE_H__
+
+#include "xfs_fs.h"
+
+struct file;
+struct xfs_inode;
+struct xfs_iomap;
+struct attrlist_cursor_kern;
+
+/*
+ * Return values for xfs_inactive.  A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define        VN_INACTIVE_CACHE       0
+#define        VN_INACTIVE_NOCACHE     1
+
+/*
+ * Flags for read/write calls - same values as IRIX
+ */
+#define IO_ISDIRECT    0x00004         /* bypass page cache */
+#define IO_INVIS       0x00020         /* don't update inode timestamps */
+
+#define XFS_IO_FLAGS \
+       { IO_ISDIRECT,  "DIRECT" }, \
+       { IO_INVIS,     "INVIS"}
+
+/*
+ * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
+ */
+#define FI_NONE                        0       /* none */
+#define FI_REMAPF              1       /* Do a remapf prior to the operation */
+#define FI_REMAPF_LOCKED       2       /* Do a remapf prior to the operation.
+                                          Prevent VM access to the pages until
+                                          the operation completes. */
+
+/*
+ * Some useful predicates.
+ */
+#define VN_MAPPED(vp)  mapping_mapped(vp->i_mapping)
+#define VN_CACHED(vp)  (vp->i_mapping->nrpages)
+#define VN_DIRTY(vp)   mapping_tagged(vp->i_mapping, \
+                                       PAGECACHE_TAG_DIRTY)
+
+
+#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
new file mode 100644 (file)
index 0000000..87d3e03
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2008 Christoph Hellwig.
+ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_acl.h"
+#include "xfs_vnodeops.h"
+
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+static int
+xfs_xattr_get(struct dentry *dentry, const char *name,
+               void *value, size_t size, int xflags)
+{
+       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+       int error, asize = size;
+
+       if (strcmp(name, "") == 0)
+               return -EINVAL;
+
+       /* Convert Linux syscall to XFS internal ATTR flags */
+       if (!size) {
+               xflags |= ATTR_KERNOVAL;
+               value = NULL;
+       }
+
+       error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
+       if (error)
+               return error;
+       return asize;
+}
+
+static int
+xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
+               size_t size, int flags, int xflags)
+{
+       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+
+       if (strcmp(name, "") == 0)
+               return -EINVAL;
+
+       /* Convert Linux syscall to XFS internal ATTR flags */
+       if (flags & XATTR_CREATE)
+               xflags |= ATTR_CREATE;
+       if (flags & XATTR_REPLACE)
+               xflags |= ATTR_REPLACE;
+
+       if (!value)
+               return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
+       return -xfs_attr_set(ip, (unsigned char *)name,
+                               (void *)value, size, xflags);
+}
+
+static const struct xattr_handler xfs_xattr_user_handler = {
+       .prefix = XATTR_USER_PREFIX,
+       .flags  = 0, /* no flags implies user namespace */
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+static const struct xattr_handler xfs_xattr_trusted_handler = {
+       .prefix = XATTR_TRUSTED_PREFIX,
+       .flags  = ATTR_ROOT,
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+static const struct xattr_handler xfs_xattr_security_handler = {
+       .prefix = XATTR_SECURITY_PREFIX,
+       .flags  = ATTR_SECURE,
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+const struct xattr_handler *xfs_xattr_handlers[] = {
+       &xfs_xattr_user_handler,
+       &xfs_xattr_trusted_handler,
+       &xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
+       &xfs_xattr_acl_access_handler,
+       &xfs_xattr_acl_default_handler,
+#endif
+       NULL
+};
+
+static unsigned int xfs_xattr_prefix_len(int flags)
+{
+       if (flags & XFS_ATTR_SECURE)
+               return sizeof("security");
+       else if (flags & XFS_ATTR_ROOT)
+               return sizeof("trusted");
+       else
+               return sizeof("user");
+}
+
+static const char *xfs_xattr_prefix(int flags)
+{
+       if (flags & XFS_ATTR_SECURE)
+               return xfs_xattr_security_handler.prefix;
+       else if (flags & XFS_ATTR_ROOT)
+               return xfs_xattr_trusted_handler.prefix;
+       else
+               return xfs_xattr_user_handler.prefix;
+}
+
+static int
+xfs_xattr_put_listent(
+       struct xfs_attr_list_context *context,
+       int             flags,
+       unsigned char   *name,
+       int             namelen,
+       int             valuelen,
+       unsigned char   *value)
+{
+       unsigned int prefix_len = xfs_xattr_prefix_len(flags);
+       char *offset;
+       int arraytop;
+
+       ASSERT(context->count >= 0);
+
+       /*
+        * Only show root namespace entries if we are actually allowed to
+        * see them.
+        */
+       if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
+               return 0;
+
+       arraytop = context->count + prefix_len + namelen + 1;
+       if (arraytop > context->firstu) {
+               context->count = -1;    /* insufficient space */
+               return 1;
+       }
+       offset = (char *)context->alist + context->count;
+       strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+       offset += prefix_len;
+       strncpy(offset, (char *)name, namelen);                 /* real name */
+       offset += namelen;
+       *offset = '\0';
+       context->count += prefix_len + namelen + 1;
+       return 0;
+}
+
+static int
+xfs_xattr_put_listent_sizes(
+       struct xfs_attr_list_context *context,
+       int             flags,
+       unsigned char   *name,
+       int             namelen,
+       int             valuelen,
+       unsigned char   *value)
+{
+       context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
+       return 0;
+}
+
+static int
+list_one_attr(const char *name, const size_t len, void *data,
+               size_t size, ssize_t *result)
+{
+       char *p = data + *result;
+
+       *result += len;
+       if (!size)
+               return 0;
+       if (*result > size)
+               return -ERANGE;
+
+       strcpy(p, name);
+       return 0;
+}
+
+ssize_t
+xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+       struct xfs_attr_list_context context;
+       struct attrlist_cursor_kern cursor = { 0 };
+       struct inode            *inode = dentry->d_inode;
+       int                     error;
+
+       /*
+        * First read the regular on-disk attributes.
+        */
+       memset(&context, 0, sizeof(context));
+       context.dp = XFS_I(inode);
+       context.cursor = &cursor;
+       context.resynch = 1;
+       context.alist = data;
+       context.bufsize = size;
+       context.firstu = context.bufsize;
+
+       if (size)
+               context.put_listent = xfs_xattr_put_listent;
+       else
+               context.put_listent = xfs_xattr_put_listent_sizes;
+
+       xfs_attr_list_int(&context);
+       if (context.count < 0)
+               return -ERANGE;
+
+       /*
+        * Then add the two synthetic ACL attributes.
+        */
+       if (posix_acl_access_exists(inode)) {
+               error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
+                               strlen(POSIX_ACL_XATTR_ACCESS) + 1,
+                               data, size, &context.count);
+               if (error)
+                       return error;
+       }
+
+       if (posix_acl_default_exists(inode)) {
+               error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
+                               strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
+                               data, size, &context.count);
+               if (error)
+                       return error;
+       }
+
+       return context.count;
+}
index 4f76959397fa88a869dff276ba5ab0ccc9494071..f4c38d8c6674a3dd71ea08a47dc68e89e81e0d92 100644 (file)
@@ -143,7 +143,7 @@ __SYSCALL(__NR_pivot_root, sys_pivot_root)
 
 /* fs/nfsctl.c */
 #define __NR_nfsservctl 42
-__SC_COMP(__NR_nfsservctl, sys_nfsservctl, compat_sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
 
 /* fs/open.c */
 #define __NR3264_statfs 43
index 8779405e15a871eba64681021065ed1ab0e71853..c6e7523bf7652ab050285c2c02dad3715a010cab 100644 (file)
@@ -438,7 +438,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
                                 struct compat_timespec __user *tsp,
                                 const compat_sigset_t __user *sigmask,
                                 compat_size_t sigsetsize);
-asmlinkage long compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
 asmlinkage long compat_sys_signalfd4(int ufd,
                                     const compat_sigset_t __user *sigmask,
                                     compat_size_t sigsetsize, int flags);
index 0c69ad825b39f656d3599d1c79dc16661772057f..3c9c54fd5690a6142cf155091a2e5546600037df 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *     connector.h
  * 
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * 2004-2005 Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
  * All rights reserved.
  * 
  * This program is free software; you can redistribute it and/or modify
index 178cdb4f1d4afe81a66e631872de4b587e95d1c4..c2bd68f2277a4b533f804d5dc3c6890a4485d95d 100644 (file)
@@ -2318,6 +2318,11 @@ extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*te
 extern struct inode * iget_locked(struct super_block *, unsigned long);
 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
 extern int insert_inode_locked(struct inode *);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
+#else
+static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
+#endif
 extern void unlock_new_inode(struct inode *);
 extern unsigned int get_next_ino(void);
 
index d464de53db4399c598ec3793f75290856b049255..464cff52686092c0f862f551b2db53ed3050b629 100644 (file)
@@ -47,6 +47,9 @@
  *  - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
  *    fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
  *  - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
  */
 
 #ifndef _LINUX_FUSE_H
@@ -78,7 +81,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 16
+#define FUSE_KERNEL_MINOR_VERSION 17
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -153,8 +156,10 @@ struct fuse_file_lock {
 /**
  * INIT request/reply flags
  *
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
  * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
  * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -163,6 +168,7 @@ struct fuse_file_lock {
 #define FUSE_EXPORT_SUPPORT    (1 << 4)
 #define FUSE_BIG_WRITES                (1 << 5)
 #define FUSE_DONT_MASK         (1 << 6)
+#define FUSE_FLOCK_LOCKS       (1 << 10)
 
 /**
  * CUSE INIT request/reply flags
@@ -175,6 +181,7 @@ struct fuse_file_lock {
  * Release flags
  */
 #define FUSE_RELEASE_FLUSH     (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK      (1 << 1)
 
 /**
  * Getattr flags
index eec3bae164d451a38b23906a283af54965074ec0..8fc7dd1a57ff29cb96722a01290f07de6dd1d23a 100644 (file)
@@ -22,6 +22,7 @@ extern int            __set_personality(unsigned int);
  * These occupy the top three bytes.
  */
 enum {
+       UNAME26 =               0x0020000,
        ADDR_NO_RANDOMIZE =     0x0040000,      /* disable randomization of VA space */
        FDPIC_FUNCPTRS =        0x0080000,      /* userspace function ptrs point to descriptors
                                                 * (signal handling)
index 5e3e25a3c9c38d3c3c5be63d2b40cd740f62cfd7..63d2df43e61a1999206d15b6d06f3567b9ac8f9e 100644 (file)
@@ -14,6 +14,7 @@ struct platform_pwm_backlight_data {
        unsigned int pwm_period_ns;
        int (*init)(struct device *dev);
        int (*notify)(struct device *dev, int brightness);
+       void (*notify_after)(struct device *dev, int brightness);
        void (*exit)(struct device *dev);
        int (*check_fb)(struct device *dev, struct fb_info *info);
 };
index 9026b30238f32de96612bee16d8579022b8e8e10..218168a2b5e9c4fb94055b844b7326996f245447 100644 (file)
 #define  RIO_PEF_PROCESSOR             0x20000000      /* [I] Processor */
 #define  RIO_PEF_SWITCH                        0x10000000      /* [I] Switch */
 #define  RIO_PEF_MULTIPORT             0x08000000      /* [VI, 2.1] Multiport */
-#define  RIO_PEF_INB_MBOX              0x00f00000      /* [II] Mailboxes */
-#define  RIO_PEF_INB_MBOX0             0x00800000      /* [II] Mailbox 0 */
-#define  RIO_PEF_INB_MBOX1             0x00400000      /* [II] Mailbox 1 */
-#define  RIO_PEF_INB_MBOX2             0x00200000      /* [II] Mailbox 2 */
-#define  RIO_PEF_INB_MBOX3             0x00100000      /* [II] Mailbox 3 */
-#define  RIO_PEF_INB_DOORBELL          0x00080000      /* [II] Doorbells */
+#define  RIO_PEF_INB_MBOX              0x00f00000      /* [II, <= 1.2] Mailboxes */
+#define  RIO_PEF_INB_MBOX0             0x00800000      /* [II, <= 1.2] Mailbox 0 */
+#define  RIO_PEF_INB_MBOX1             0x00400000      /* [II, <= 1.2] Mailbox 1 */
+#define  RIO_PEF_INB_MBOX2             0x00200000      /* [II, <= 1.2] Mailbox 2 */
+#define  RIO_PEF_INB_MBOX3             0x00100000      /* [II, <= 1.2] Mailbox 3 */
+#define  RIO_PEF_INB_DOORBELL          0x00080000      /* [II, <= 1.2] Doorbells */
 #define  RIO_PEF_EXT_RT                        0x00000200      /* [III, 1.3] Extended route table support */
 #define  RIO_PEF_STD_RT                        0x00000100      /* [III, 1.3] Standard route table support */
 #define  RIO_PEF_CTLS                  0x00000010      /* [III] CTLS */
 #define        RIO_SWITCH_RT_LIMIT     0x34    /* [III, 1.3] Switch Route Table Destination ID Limit CAR */
 #define         RIO_RT_MAX_DESTID              0x0000ffff
 
-#define RIO_MBOX_CSR           0x40    /* [II] Mailbox CSR */
+#define RIO_MBOX_CSR           0x40    /* [II, <= 1.2] Mailbox CSR */
 #define  RIO_MBOX0_AVAIL               0x80000000      /* [II] Mbox 0 avail */
 #define  RIO_MBOX0_FULL                        0x40000000      /* [II] Mbox 0 full */
 #define  RIO_MBOX0_EMPTY               0x20000000      /* [II] Mbox 0 empty */
 #define  RIO_MBOX3_FAIL                        0x00000008      /* [II] Mbox 3 fail */
 #define  RIO_MBOX3_ERROR               0x00000004      /* [II] Mbox 3 error */
 
-#define RIO_WRITE_PORT_CSR     0x44    /* [I] Write Port CSR */
-#define RIO_DOORBELL_CSR       0x44    /* [II] Doorbell CSR */
+#define RIO_WRITE_PORT_CSR     0x44    /* [I, <= 1.2] Write Port CSR */
+#define RIO_DOORBELL_CSR       0x44    /* [II, <= 1.2] Doorbell CSR */
 #define  RIO_DOORBELL_AVAIL            0x80000000      /* [II] Doorbell avail */
 #define  RIO_DOORBELL_FULL             0x40000000      /* [II] Doorbell full */
 #define  RIO_DOORBELL_EMPTY            0x20000000      /* [II] Doorbell empty */
index 8c03b98df5f93d196d523c60f31a850ffe9a22e4..1ff0ec2a5e8d29a2a4519bb882900c56f39c0dab 100644 (file)
@@ -702,9 +702,6 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args);
 asmlinkage long sys_sysinfo(struct sysinfo __user *info);
 asmlinkage long sys_sysfs(int option,
                                unsigned long arg1, unsigned long arg2);
-asmlinkage long sys_nfsservctl(int cmd,
-                               struct nfsctl_arg __user *arg,
-                               void __user *res);
 asmlinkage long sys_syslog(int type, char __user *buf, int len);
 asmlinkage long sys_uselib(const char __user *library);
 asmlinkage long sys_ni_syscall(void);
index b004e557caa9c064c15b41b26bf773677f529913..2ef4385da6bf7a70ed60a8a01b7fdb54201c1840 100644 (file)
@@ -410,7 +410,28 @@ struct gps_event_hdr {
        u16 plen;
 } __attribute__ ((packed));
 
-/* platform data */
+/**
+ * struct ti_st_plat_data - platform data shared between ST driver and
+ *     platform specific board file which adds the ST device.
+ * @nshutdown_gpio: Host's GPIO line to which chip's BT_EN is connected.
+ * @dev_name: The UART/TTY name to which chip is interfaced. (eg: /dev/ttyS1)
+ * @flow_cntrl: Should always be 1, since UART's CTS/RTS is used for PM
+ *     purposes.
+ * @baud_rate: The baud rate supported by the Host UART controller, this will
+ *     be shared across with the chip via a HCI VS command from User-Space Init
+ *     Mgr application.
+ * @suspend:
+ * @resume: legacy PM routines hooked to platform specific board file, so as
+ *     to take chip-host interface specific action.
+ * @chip_enable:
+ * @chip_disable: Platform/Interface specific mux mode setting, GPIO
+ *     configuring, Host side PM disabling etc.. can be done here.
+ * @chip_asleep:
+ * @chip_awake: Chip specific deep sleep states is communicated to Host
+ *     specific board-xx.c to take actions such as cut UART clocks when chip
+ *     asleep or run host faster when chip awake etc..
+ *
+ */
 struct ti_st_plat_data {
        long nshutdown_gpio;
        unsigned char dev_name[UART_DEV_NAME_LEN]; /* uart name */
@@ -418,6 +439,10 @@ struct ti_st_plat_data {
        unsigned long baud_rate;
        int (*suspend)(struct platform_device *, pm_message_t);
        int (*resume)(struct platform_device *);
+       int (*chip_enable) (struct kim_data_s *);
+       int (*chip_disable) (struct kim_data_s *);
+       int (*chip_asleep) (struct kim_data_s *);
+       int (*chip_awake) (struct kim_data_s *);
 };
 
 #endif /* TI_WILINK_ST_H */
index 44bc0c5617e1c227df1b3699dbdac1ac7dbab191..5f2ede82b3d67e5223089f6db9cec489267991ef 100644 (file)
@@ -421,6 +421,8 @@ extern void tty_driver_flush_buffer(struct tty_struct *tty);
 extern void tty_throttle(struct tty_struct *tty);
 extern void tty_unthrottle(struct tty_struct *tty);
 extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
+extern void tty_driver_remove_tty(struct tty_driver *driver,
+                                 struct tty_struct *tty);
 extern void tty_shutdown(struct tty_struct *tty);
 extern void tty_free_termios(struct tty_struct *tty);
 extern int is_current_pgrp_orphaned(void);
index 9deeac85524078f0ef97b140f270c39391509f51..ecdaeb98b293727274b6511ee7ef523c00324564 100644 (file)
@@ -47,6 +47,9 @@
  *
  *     This routine is called synchronously when a particular tty device
  *     is closed for the last time freeing up the resources.
+ *     Note that tty_shutdown() is not called if ops->shutdown is defined.
+ *     This means one is responsible to take care of calling ops->remove (e.g.
+ *     via tty_driver_remove_tty) and releasing tty->termios.
  *
  *
  * void (*cleanup)(struct tty_struct * tty);
index f1bfa12ea246209802624502c50da5380e66049e..2b8963ff0f359b4c6dab919e2ccc3c2c0324b5bb 100644 (file)
  *
  *     (thresh - thresh/DIRTY_FULL_SCOPE, thresh)
  *
- * The 1/16 region above the global dirty limit will be put to maximum pauses:
- *
- *     (limit, limit + limit/DIRTY_MAXPAUSE_AREA)
- *
- * The 1/16 region above the max-pause region, dirty exceeded bdi's will be put
- * to loops:
- *
- *     (limit + limit/DIRTY_MAXPAUSE_AREA, limit + limit/DIRTY_PASSGOOD_AREA)
- *
  * Further beyond, all dirtier tasks will enter a loop waiting (possibly long
  * time) for the dirty pages to drop, unless written enough pages.
  *
@@ -31,8 +22,6 @@
  */
 #define DIRTY_SCOPE            8
 #define DIRTY_FULL_SCOPE       (DIRTY_SCOPE / 2)
-#define DIRTY_MAXPAUSE_AREA            16
-#define DIRTY_PASSGOOD_AREA            8
 
 /*
  * 4MB minimal write chunk size
index d17f47fc9e31b9a3298b1911b5c46c9fb34d3400..408ae4882d2262d4d8b084701cdb530cb39cbfe3 100644 (file)
@@ -1865,6 +1865,9 @@ struct wiphy {
         * you need use set_wiphy_dev() (see below) */
        struct device dev;
 
+       /* protects ->resume, ->suspend sysfs callbacks against unregister hw */
+       bool registered;
+
        /* dir in debugfs: ieee80211/<wiphyname> */
        struct dentry *debugfsdir;
 
index 2de8fe9075963350f8159759c118dedc70db1b57..126c675f4f14e5b41afd30694615b84d5eedbcbc 100644 (file)
@@ -27,6 +27,12 @@ struct target_core_fabric_ops {
        int (*tpg_check_demo_mode_cache)(struct se_portal_group *);
        int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *);
        int (*tpg_check_prod_mode_write_protect)(struct se_portal_group *);
+       /*
+        * Optionally used by fabrics to allow demo-mode login, but not
+        * expose any TPG LUNs, and return 'not connected' in standard
+        * inquiry response
+        */
+       int (*tpg_check_demo_mode_login_only)(struct se_portal_group *);
        struct se_node_acl *(*tpg_alloc_fabric_acl)(
                                        struct se_portal_group *);
        void (*tpg_release_fabric_acl)(struct se_portal_group *,
index 2e9425889fa8874f742820b0f886f1cf0ee0a77c..9b956fa20308032c33c6b9550faacf9718b9fc6d 100644 (file)
@@ -1331,7 +1331,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
                if (!thread_fn)
                        return -EINVAL;
                handler = irq_default_primary_handler;
-               irqflags |= IRQF_ONESHOT;
        }
 
        action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
index 836a2ae0ac31c9120e362bad14254c3e2247eb25..28a40d8171b8e67488efc6f383736d5f7532c5ff 100644 (file)
@@ -1604,7 +1604,7 @@ static int __init printk_late_init(void)
        struct console *con;
 
        for_each_console(con) {
-               if (con->flags & CON_BOOT) {
+               if (!keep_bootcon && con->flags & CON_BOOT) {
                        printk(KERN_INFO "turn off boot console %s%d\n",
                                con->name, con->index);
                        unregister_console(con);
index dd948a1fca4c7e0b70ed7d5dc3902350c65d0c25..18ee1d2f647408a6ffef7494ffdfef2043749dcf 100644 (file)
@@ -37,6 +37,8 @@
 #include <linux/fs_struct.h>
 #include <linux/gfp.h>
 #include <linux/syscore_ops.h>
+#include <linux/version.h>
+#include <linux/ctype.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -44,6 +46,8 @@
 #include <linux/user_namespace.h>
 
 #include <linux/kmsg_dump.h>
+/* Move somewhere else to avoid recompiling? */
+#include <generated/utsrelease.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1161,6 +1165,34 @@ DECLARE_RWSEM(uts_sem);
 #define override_architecture(name)    0
 #endif
 
+/*
+ * Work around broken programs that cannot handle "Linux 3.0".
+ * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ */
+static int override_release(char __user *release, int len)
+{
+       int ret = 0;
+       char buf[len];
+
+       if (current->personality & UNAME26) {
+               char *rest = UTS_RELEASE;
+               int ndots = 0;
+               unsigned v;
+
+               while (*rest) {
+                       if (*rest == '.' && ++ndots >= 3)
+                               break;
+                       if (!isdigit(*rest) && *rest != '.')
+                               break;
+                       rest++;
+               }
+               v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+               snprintf(buf, len, "2.6.%u%s", v, rest);
+               ret = copy_to_user(release, buf, len);
+       }
+       return ret;
+}
+
 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 {
        int errno = 0;
@@ -1170,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
                errno = -EFAULT;
        up_read(&uts_sem);
 
+       if (!errno && override_release(name->release, sizeof(name->release)))
+               errno = -EFAULT;
        if (!errno && override_architecture(name))
                errno = -EFAULT;
        return errno;
@@ -1191,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
                error = -EFAULT;
        up_read(&uts_sem);
 
+       if (!error && override_release(name->release, sizeof(name->release)))
+               error = -EFAULT;
        if (!error && override_architecture(name))
                error = -EFAULT;
        return error;
@@ -1225,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
 
        if (!error && override_architecture(name))
                error = -EFAULT;
+       if (!error && override_release(name->release, sizeof(name->release)))
+               error = -EFAULT;
        return error ? -EFAULT : 0;
 }
 #endif
index 62cbc8877fef2564d2d7284d44959d08e64b08ce..a9a5de07c4f16e1310616cb9dd51903f710199ff 100644 (file)
@@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void)
        return -ENOSYS;
 }
 
-cond_syscall(sys_nfsservctl);
 cond_syscall(sys_quotactl);
 cond_syscall(sys32_quotactl);
 cond_syscall(sys_acct);
index 3b8e028b96014a088b6227859b9163e8bceddb5a..e8bffbe2ba4b3c078594f34f4594769dfc18f2c3 100644 (file)
@@ -1,6 +1,6 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
 #include <net/ip_vs.h>
index 4e4932a7b3608ac6e32175ee87481c1ac47a3572..362da653813da60e339d57c9d28262b0dc7c36cd 100644 (file)
@@ -1,6 +1,6 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
 #include <net/ip_vs.h>
index 930de943727117043c0274f35cbe6e70d5eb8160..ebd1e86bef1c2d150a34239f6583e2340b078ea1 100644 (file)
@@ -1841,29 +1841,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
  */
 static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
 {
-       int lock_count = -1;
        struct mem_cgroup *iter, *failed = NULL;
        bool cond = true;
 
        for_each_mem_cgroup_tree_cond(iter, mem, cond) {
-               bool locked = iter->oom_lock;
-
-               iter->oom_lock = true;
-               if (lock_count == -1)
-                       lock_count = iter->oom_lock;
-               else if (lock_count != locked) {
+               if (iter->oom_lock) {
                        /*
                         * this subtree of our hierarchy is already locked
                         * so we cannot give a lock.
                         */
-                       lock_count = 0;
                        failed = iter;
                        cond = false;
-               }
+               } else
+                       iter->oom_lock = true;
        }
 
        if (!failed)
-               goto done;
+               return true;
 
        /*
         * OK, we failed to lock the whole subtree so we have to clean up
@@ -1877,8 +1871,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
                }
                iter->oom_lock = false;
        }
-done:
-       return lock_count;
+       return false;
 }
 
 /*
@@ -2169,13 +2162,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
 
        /* Notify other cpus that system-wide "drain" is running */
        get_online_cpus();
-       /*
-        * Get a hint for avoiding draining charges on the current cpu,
-        * which must be exhausted by our charging.  It is not required that
-        * this be a precise check, so we use raw_smp_processor_id() instead of
-        * getcpu()/putcpu().
-        */
-       curcpu = raw_smp_processor_id();
+       curcpu = get_cpu();
        for_each_online_cpu(cpu) {
                struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
                struct mem_cgroup *mem;
@@ -2192,6 +2179,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
                                schedule_work_on(cpu, &stock->work);
                }
        }
+       put_cpu();
 
        if (!sync)
                goto out;
index d1960744f881d34fe3f1c3412d717db86b8e5478..0e309cd1b5b9a2e2841a10205d7d91ad87083dc7 100644 (file)
@@ -754,21 +754,10 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * 200ms is typically more than enough to curb heavy dirtiers;
                 * (b) the pause time limit makes the dirtiers more responsive.
                 */
-               if (nr_dirty < dirty_thresh +
-                              dirty_thresh / DIRTY_MAXPAUSE_AREA &&
+               if (nr_dirty < dirty_thresh &&
+                   bdi_dirty < (task_bdi_thresh + bdi_thresh) / 2 &&
                    time_after(jiffies, start_time + MAX_PAUSE))
                        break;
-               /*
-                * pass-good area. When some bdi gets blocked (eg. NFS server
-                * not responding), or write bandwidth dropped dramatically due
-                * to concurrent reads, or dirty threshold suddenly dropped and
-                * the dirty pages cannot be brought down anytime soon (eg. on
-                * slow USB stick), at least let go of the good bdi's.
-                */
-               if (nr_dirty < dirty_thresh +
-                              dirty_thresh / DIRTY_PASSGOOD_AREA &&
-                   bdi_dirty < bdi_thresh)
-                       break;
 
                /*
                 * Increase the delay for each loop, up to our previous
index 7ef69124fa3e5f4ef28baaed58a7e997d40155ab..b7719ec10dc5a998a102548cb38e9223c6e6be3c 100644 (file)
@@ -2283,7 +2283,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
                .mem_cgroup = mem,
                .memcg_record = rec,
        };
-       unsigned long start, end;
+       ktime_t start, end;
 
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2292,7 +2292,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
                                                      sc.may_writepage,
                                                      sc.gfp_mask);
 
-       start = sched_clock();
+       start = ktime_get();
        /*
         * NOTE: Although we can get the priority field, using it
         * here is not a good idea, since it limits the pages we can scan.
@@ -2301,10 +2301,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
         * the priority and make it zero.
         */
        shrink_zone(0, zone, &sc);
-       end = sched_clock();
+       end = ktime_get();
 
        if (rec)
-               rec->elapsed += end - start;
+               rec->elapsed += ktime_to_ns(ktime_sub(end, start));
        *scanned = sc.nr_scanned;
 
        trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2319,7 +2319,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 {
        struct zonelist *zonelist;
        unsigned long nr_reclaimed;
-       unsigned long start, end;
+       ktime_t start, end;
        int nid;
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
@@ -2337,7 +2337,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                .gfp_mask = sc.gfp_mask,
        };
 
-       start = sched_clock();
+       start = ktime_get();
        /*
         * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
         * take care of from where we get pages. So the node where we start the
@@ -2352,9 +2352,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                            sc.gfp_mask);
 
        nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
-       end = sched_clock();
+       end = ktime_get();
        if (rec)
-               rec->elapsed += end - start;
+               rec->elapsed += ktime_to_ns(ktime_sub(end, start));
 
        trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -2529,6 +2529,9 @@ loop_again:
                                        high_wmark_pages(zone), 0, 0)) {
                                end_zone = i;
                                break;
+                       } else {
+                               /* If balanced, clear the congested flag */
+                               zone_clear_flag(zone, ZONE_CONGESTED);
                        }
                }
                if (i < 0)
index 5f27f8e302546371dec3b6ff24e62d389e4c41c2..f1f2f7bb6661e18c77276d20598e59caba8c34ed 100644 (file)
@@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
        if (unlikely(!skb))
                goto err_free;
 
+       skb_reset_network_header(skb);
+       skb_reset_transport_header(skb);
        return skb;
 
 err_free:
index 52cfd0c3ea71f19ef844531d055b4f96a0eb6e27..d07223c834af2a6b2bc45606d64d0093f30d9897 100644 (file)
@@ -558,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
        spin_unlock_irqrestore(&rq->lock, flags);
 
        skb_queue_walk_safe(&queue, skb, tmp) {
-               struct net_device *dev = skb->dev;
+               struct net_device *dev;
+
+               br2684_push(atmvcc, skb);
+               dev = skb->dev;
 
                dev->stats.rx_bytes -= skb->len;
                dev->stats.rx_packets--;
-
-               br2684_push(atmvcc, skb);
        }
 
        /* initialize netdev carrier state */
index 8add9b4999129bcbdf62122221bd4bfec0c686ac..117e0d161780b4a94d6350d0618a87d5937ffc6a 100644 (file)
@@ -494,9 +494,8 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
        BT_DBG("sk %p", sk);
 
        add_wait_queue(sk_sleep(sk), &wait);
+       set_current_state(TASK_INTERRUPTIBLE);
        while (sk->sk_state != state) {
-               set_current_state(TASK_INTERRUPTIBLE);
-
                if (!timeo) {
                        err = -EINPROGRESS;
                        break;
@@ -510,12 +509,13 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
                release_sock(sk);
                timeo = schedule_timeout(timeo);
                lock_sock(sk);
+               set_current_state(TASK_INTERRUPTIBLE);
 
                err = sock_error(sk);
                if (err)
                        break;
        }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
        remove_wait_queue(sk_sleep(sk), &wait);
        return err;
 }
index 8e6c06158f8ebf02268362d4286b4763813b4e0c..e7ee5314f39a1e6a611dd3f0bd59c10c012ae699 100644 (file)
@@ -155,6 +155,7 @@ struct bnep_session {
        unsigned int  role;
        unsigned long state;
        unsigned long flags;
+       atomic_t      terminate;
        struct task_struct *task;
 
        struct ethhdr eh;
index ca39fcf010ce3353e73312f4172f32c9ce12cb1f..d9edfe8bf9d677268cfc1f1e10b85f04d9f00f73 100644 (file)
@@ -484,9 +484,11 @@ static int bnep_session(void *arg)
 
        init_waitqueue_entry(&wait, current);
        add_wait_queue(sk_sleep(sk), &wait);
-       while (!kthread_should_stop()) {
+       while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
 
+               if (atomic_read(&s->terminate))
+                       break;
                /* RX */
                while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
                        skb_orphan(skb);
@@ -504,7 +506,7 @@ static int bnep_session(void *arg)
 
                schedule();
        }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
        remove_wait_queue(sk_sleep(sk), &wait);
 
        /* Cleanup session */
@@ -640,9 +642,10 @@ int bnep_del_connection(struct bnep_conndel_req *req)
        down_read(&bnep_session_sem);
 
        s = __bnep_get_session(req->dst);
-       if (s)
-               kthread_stop(s->task);
-       else
+       if (s) {
+               atomic_inc(&s->terminate);
+               wake_up_process(s->task);
+       } else
                err = -ENOENT;
 
        up_read(&bnep_session_sem);
index 040f67b12978c17713e4040937127b0cc814de8e..50f0d135eb8f201daf8156c433f08a973bfa338f 100644 (file)
@@ -386,7 +386,8 @@ static void cmtp_reset_ctr(struct capi_ctr *ctrl)
 
        capi_ctr_down(ctrl);
 
-       kthread_stop(session->task);
+       atomic_inc(&session->terminate);
+       wake_up_process(session->task);
 }
 
 static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp)
index db43b54ac9afb91cfc78f911f8a15c05104fe7ed..c32638dddbf9409d685c3436eb1b1541d04d5a9e 100644 (file)
@@ -81,6 +81,7 @@ struct cmtp_session {
 
        char name[BTNAMSIZ];
 
+       atomic_t terminate;
        struct task_struct *task;
 
        wait_queue_head_t wait;
index c5b11af908be4fc3ab6c7a0cebb9639900feb162..521baa4fe835eb775f24e59dda205ae76d1f87ce 100644 (file)
@@ -292,9 +292,11 @@ static int cmtp_session(void *arg)
 
        init_waitqueue_entry(&wait, current);
        add_wait_queue(sk_sleep(sk), &wait);
-       while (!kthread_should_stop()) {
+       while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
 
+               if (atomic_read(&session->terminate))
+                       break;
                if (sk->sk_state != BT_CONNECTED)
                        break;
 
@@ -307,7 +309,7 @@ static int cmtp_session(void *arg)
 
                schedule();
        }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
        remove_wait_queue(sk_sleep(sk), &wait);
 
        down_write(&cmtp_session_sem);
@@ -380,16 +382,17 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
 
        if (!(session->flags & (1 << CMTP_LOOPBACK))) {
                err = cmtp_attach_device(session);
-               if (err < 0)
-                       goto detach;
+               if (err < 0) {
+                       atomic_inc(&session->terminate);
+                       wake_up_process(session->task);
+                       up_write(&cmtp_session_sem);
+                       return err;
+               }
        }
 
        up_write(&cmtp_session_sem);
        return 0;
 
-detach:
-       cmtp_detach_device(session);
-
 unlink:
        __cmtp_unlink_session(session);
 
@@ -414,7 +417,8 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
                skb_queue_purge(&session->transmit);
 
                /* Stop session thread */
-               kthread_stop(session->task);
+               atomic_inc(&session->terminate);
+               wake_up_process(session->task);
        } else
                err = -ENOENT;
 
index ec0bc3f60f2eea4216471746fdbea09c51b3fbcb..56943add45cc44707167f4877e27fe5630b00c95 100644 (file)
@@ -1209,7 +1209,6 @@ static void hci_cmd_timer(unsigned long arg)
 
        BT_ERR("%s command tx timeout", hdev->name);
        atomic_set(&hdev->cmd_cnt, 1);
-       clear_bit(HCI_RESET, &hdev->flags);
        tasklet_schedule(&hdev->cmd_task);
 }
 
@@ -1327,7 +1326,7 @@ int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr)
 
        entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
        if (!entry) {
-               return -ENOMEM;
+               err = -ENOMEM;
                goto err;
        }
 
@@ -2408,7 +2407,10 @@ static void hci_cmd_task(unsigned long arg)
                if (hdev->sent_cmd) {
                        atomic_dec(&hdev->cmd_cnt);
                        hci_send_frame(skb);
-                       mod_timer(&hdev->cmd_timer,
+                       if (test_bit(HCI_RESET, &hdev->flags))
+                               del_timer(&hdev->cmd_timer);
+                       else
+                               mod_timer(&hdev->cmd_timer,
                                  jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT));
                } else {
                        skb_queue_head(&hdev->cmd_q, skb);
index 43b4c2deb7cc05bdc875e3f7b23a999468f5c1a4..fb68f344c34a68e5a4334b751196680ac33ffdfe 100644 (file)
@@ -764,6 +764,7 @@ static int hidp_session(void *arg)
 
        up_write(&hidp_session_sem);
 
+       kfree(session->rd_data);
        kfree(session);
        return 0;
 }
@@ -841,7 +842,8 @@ static int hidp_setup_input(struct hidp_session *session,
 
        err = input_register_device(input);
        if (err < 0) {
-               hci_conn_put_device(session->conn);
+               input_free_device(input);
+               session->input = NULL;
                return err;
        }
 
@@ -1044,8 +1046,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
        }
 
        err = hid_add_device(session->hid);
-       if (err < 0)
-               goto err_add_device;
+       if (err < 0) {
+               atomic_inc(&session->terminate);
+               wake_up_process(session->task);
+               up_write(&hidp_session_sem);
+               return err;
+       }
 
        if (session->input) {
                hidp_send_ctrl_message(session,
@@ -1059,12 +1065,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
        up_write(&hidp_session_sem);
        return 0;
 
-err_add_device:
-       hid_destroy_device(session->hid);
-       session->hid = NULL;
-       atomic_inc(&session->terminate);
-       wake_up_process(session->task);
-
 unlink:
        hidp_del_timer(session);
 
@@ -1090,7 +1090,6 @@ purge:
 failed:
        up_write(&hidp_session_sem);
 
-       input_free_device(session->input);
        kfree(session);
        return err;
 }
index 3204ba8a701ca0d7c99eebfc629d22e51a007021..b3bdb482bbe6f5fdf2d197fc1035082f495c448b 100644 (file)
@@ -1159,9 +1159,8 @@ int __l2cap_wait_ack(struct sock *sk)
        int timeo = HZ/5;
 
        add_wait_queue(sk_sleep(sk), &wait);
-       while ((chan->unacked_frames > 0 && chan->conn)) {
-               set_current_state(TASK_INTERRUPTIBLE);
-
+       set_current_state(TASK_INTERRUPTIBLE);
+       while (chan->unacked_frames > 0 && chan->conn) {
                if (!timeo)
                        timeo = HZ/5;
 
@@ -1173,6 +1172,7 @@ int __l2cap_wait_ack(struct sock *sk)
                release_sock(sk);
                timeo = schedule_timeout(timeo);
                lock_sock(sk);
+               set_current_state(TASK_INTERRUPTIBLE);
 
                err = sock_error(sk);
                if (err)
index 5c36b3e8739cb706f9aa47e74a0ed9c5f72b8e24..61f1f623091dbcd89992a64a635fc8de62963795 100644 (file)
@@ -235,30 +235,26 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
 
        lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 
-       if (sk->sk_state != BT_LISTEN) {
-               err = -EBADFD;
-               goto done;
-       }
-
        timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
        BT_DBG("sk %p timeo %ld", sk, timeo);
 
        /* Wait for an incoming connection. (wake-one). */
        add_wait_queue_exclusive(sk_sleep(sk), &wait);
-       while (!(nsk = bt_accept_dequeue(sk, newsock))) {
+       while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
-               if (!timeo) {
-                       err = -EAGAIN;
+
+               if (sk->sk_state != BT_LISTEN) {
+                       err = -EBADFD;
                        break;
                }
 
-               release_sock(sk);
-               timeo = schedule_timeout(timeo);
-               lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+               nsk = bt_accept_dequeue(sk, newsock);
+               if (nsk)
+                       break;
 
-               if (sk->sk_state != BT_LISTEN) {
-                       err = -EBADFD;
+               if (!timeo) {
+                       err = -EAGAIN;
                        break;
                }
 
@@ -266,8 +262,12 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
                        err = sock_intr_errno(timeo);
                        break;
                }
+
+               release_sock(sk);
+               timeo = schedule_timeout(timeo);
+               lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
        }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
        remove_wait_queue(sk_sleep(sk), &wait);
 
        if (err)
@@ -993,7 +993,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
        INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
 
        sk->sk_destruct = l2cap_sock_destruct;
-       sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
+       sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
 
        sock_reset_flag(sk, SOCK_ZAPPED);
 
index 5759bb7054f7f4aca8822271b7de4da8997af915..5ba3f6df665cda5748044aaf21133f620979ca55 100644 (file)
@@ -62,7 +62,6 @@ static DEFINE_MUTEX(rfcomm_mutex);
 #define rfcomm_lock()  mutex_lock(&rfcomm_mutex)
 #define rfcomm_unlock()        mutex_unlock(&rfcomm_mutex)
 
-static unsigned long rfcomm_event;
 
 static LIST_HEAD(session_list);
 
@@ -120,7 +119,6 @@ static inline void rfcomm_schedule(void)
 {
        if (!rfcomm_thread)
                return;
-       set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
        wake_up_process(rfcomm_thread);
 }
 
@@ -2038,19 +2036,18 @@ static int rfcomm_run(void *unused)
 
        rfcomm_add_listener(BDADDR_ANY);
 
-       while (!kthread_should_stop()) {
+       while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
-               if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) {
-                       /* No pending events. Let's sleep.
-                        * Incoming connections and data will wake us up. */
-                       schedule();
-               }
-               set_current_state(TASK_RUNNING);
+
+               if (kthread_should_stop())
+                       break;
 
                /* Process stuff */
-               clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
                rfcomm_process_sessions();
+
+               schedule();
        }
+       __set_current_state(TASK_RUNNING);
 
        rfcomm_kill_listener();
 
index 8f01e6b11a7037d487c5a32072d460d56760a64a..482722bbc7a052c67053a95ad357463a5bc641bf 100644 (file)
@@ -485,11 +485,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
 
        lock_sock(sk);
 
-       if (sk->sk_state != BT_LISTEN) {
-               err = -EBADFD;
-               goto done;
-       }
-
        if (sk->sk_type != SOCK_STREAM) {
                err = -EINVAL;
                goto done;
@@ -501,19 +496,20 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
 
        /* Wait for an incoming connection. (wake-one). */
        add_wait_queue_exclusive(sk_sleep(sk), &wait);
-       while (!(nsk = bt_accept_dequeue(sk, newsock))) {
+       while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
-               if (!timeo) {
-                       err = -EAGAIN;
+
+               if (sk->sk_state != BT_LISTEN) {
+                       err = -EBADFD;
                        break;
                }
 
-               release_sock(sk);
-               timeo = schedule_timeout(timeo);
-               lock_sock(sk);
+               nsk = bt_accept_dequeue(sk, newsock);
+               if (nsk)
+                       break;
 
-               if (sk->sk_state != BT_LISTEN) {
-                       err = -EBADFD;
+               if (!timeo) {
+                       err = -EAGAIN;
                        break;
                }
 
@@ -521,8 +517,12 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
                        err = sock_intr_errno(timeo);
                        break;
                }
+
+               release_sock(sk);
+               timeo = schedule_timeout(timeo);
+               lock_sock(sk);
        }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
        remove_wait_queue(sk_sleep(sk), &wait);
 
        if (err)
index 4c3621b5e0aa2344196767bdeb6be407918732b4..8270f05e3f1f27b0883020eede325df8d17154d0 100644 (file)
@@ -564,30 +564,26 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
 
        lock_sock(sk);
 
-       if (sk->sk_state != BT_LISTEN) {
-               err = -EBADFD;
-               goto done;
-       }
-
        timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
        BT_DBG("sk %p timeo %ld", sk, timeo);
 
        /* Wait for an incoming connection. (wake-one). */
        add_wait_queue_exclusive(sk_sleep(sk), &wait);
-       while (!(ch = bt_accept_dequeue(sk, newsock))) {
+       while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
-               if (!timeo) {
-                       err = -EAGAIN;
+
+               if (sk->sk_state != BT_LISTEN) {
+                       err = -EBADFD;
                        break;
                }
 
-               release_sock(sk);
-               timeo = schedule_timeout(timeo);
-               lock_sock(sk);
+               ch = bt_accept_dequeue(sk, newsock);
+               if (ch)
+                       break;
 
-               if (sk->sk_state != BT_LISTEN) {
-                       err = -EBADFD;
+               if (!timeo) {
+                       err = -EAGAIN;
                        break;
                }
 
@@ -595,8 +591,12 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
                        err = sock_intr_errno(timeo);
                        break;
                }
+
+               release_sock(sk);
+               timeo = schedule_timeout(timeo);
+               lock_sock(sk);
        }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
        remove_wait_queue(sk_sleep(sk), &wait);
 
        if (err)
index 2cdf0070419f368738464ad5972fb4a0018f7820..e73815456adf2763f72d15ee649e2a3b1d6052a5 100644 (file)
@@ -231,6 +231,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 int br_add_bridge(struct net *net, const char *name)
 {
        struct net_device *dev;
+       int res;
 
        dev = alloc_netdev(sizeof(struct net_bridge), name,
                           br_dev_setup);
@@ -240,7 +241,10 @@ int br_add_bridge(struct net *net, const char *name)
 
        dev_net_set(dev, net);
 
-       return register_netdev(dev);
+       res = register_netdev(dev);
+       if (res)
+               free_netdev(dev);
+       return res;
 }
 
 int br_del_bridge(struct net *net, const char *name)
index 2d85ca7111d3994f1c60941a4992da749d195b57..995cbe0ac0b2b1e74f2b4762d61a13102d9f66bb 100644 (file)
@@ -1456,7 +1456,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 {
        struct sk_buff *skb2;
        const struct ipv6hdr *ip6h;
-       struct icmp6hdr *icmp6h;
+       u8 icmp6_type;
        u8 nexthdr;
        unsigned len;
        int offset;
@@ -1502,9 +1502,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
        __skb_pull(skb2, offset);
        skb_reset_transport_header(skb2);
 
-       icmp6h = icmp6_hdr(skb2);
+       icmp6_type = icmp6_hdr(skb2)->icmp6_type;
 
-       switch (icmp6h->icmp6_type) {
+       switch (icmp6_type) {
        case ICMPV6_MGM_QUERY:
        case ICMPV6_MGM_REPORT:
        case ICMPV6_MGM_REDUCTION:
@@ -1520,16 +1520,23 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
                err = pskb_trim_rcsum(skb2, len);
                if (err)
                        goto out;
+               err = -EINVAL;
        }
 
+       ip6h = ipv6_hdr(skb2);
+
        switch (skb2->ip_summed) {
        case CHECKSUM_COMPLETE:
-               if (!csum_fold(skb2->csum))
+               if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len,
+                                       IPPROTO_ICMPV6, skb2->csum))
                        break;
                /*FALLTHROUGH*/
        case CHECKSUM_NONE:
-               skb2->csum = 0;
-               if (skb_checksum_complete(skb2))
+               skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+                                                       &ip6h->daddr,
+                                                       skb2->len,
+                                                       IPPROTO_ICMPV6, 0));
+               if (__skb_checksum_complete(skb2))
                        goto out;
        }
 
@@ -1537,7 +1544,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 
        BR_INPUT_SKB_CB(skb)->igmp = 1;
 
-       switch (icmp6h->icmp6_type) {
+       switch (icmp6_type) {
        case ICMPV6_MGM_REPORT:
            {
                struct mld_msg *mld;
index 8fab9b0bb2036784e002a4b3bb0464db059bd5ad..1334d7e56f020d5fac869f528590842483b68178 100644 (file)
@@ -1319,11 +1319,15 @@ static void neigh_proxy_process(unsigned long arg)
 
                if (tdif <= 0) {
                        struct net_device *dev = skb->dev;
+
                        __skb_unlink(skb, &tbl->proxy_queue);
-                       if (tbl->proxy_redo && netif_running(dev))
+                       if (tbl->proxy_redo && netif_running(dev)) {
+                               rcu_read_lock();
                                tbl->proxy_redo(skb);
-                       else
+                               rcu_read_unlock();
+                       } else {
                                kfree_skb(skb);
+                       }
 
                        dev_put(dev);
                } else if (!sched_next || tdif < sched_next)
index adf84dd8c7b5b44a215b7e79a3b3a838fe09b7d2..52622517e0d883c5315114ee16d9dd0ecf24eabf 100644 (file)
@@ -558,13 +558,14 @@ int __netpoll_rx(struct sk_buff *skb)
        if (skb_shared(skb))
                goto out;
 
-       iph = (struct iphdr *)skb->data;
        if (!pskb_may_pull(skb, sizeof(struct iphdr)))
                goto out;
+       iph = (struct iphdr *)skb->data;
        if (iph->ihl < 5 || iph->version != 4)
                goto out;
        if (!pskb_may_pull(skb, iph->ihl*4))
                goto out;
+       iph = (struct iphdr *)skb->data;
        if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
                goto out;
 
@@ -579,6 +580,7 @@ int __netpoll_rx(struct sk_buff *skb)
        if (pskb_trim_rcsum(skb, len))
                goto out;
 
+       iph = (struct iphdr *)skb->data;
        if (iph->protocol != IPPROTO_UDP)
                goto out;
 
index 283c0a26e03f9ba2e3e0532638b6810fb4f7f349..d577199eabd5c3f179acd8e3d7f6602837fb8b5b 100644 (file)
@@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
                        break;
                for (i=0; i<nsrcs; i++) {
                        /* skip inactive filters */
-                       if (pmc->sfcount[MCAST_INCLUDE] ||
+                       if (psf->sf_count[MCAST_INCLUDE] ||
                            pmc->sfcount[MCAST_EXCLUDE] !=
                            psf->sf_count[MCAST_EXCLUDE])
                                continue;
index 9cb191ecaba8e7992834242364c745287157c317..147ede38ab484e7447516c0499900a592b235a15 100644 (file)
@@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
 }
 
 static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
-                   char __user *optval, int __user *optlen)
+                   char __user *optval, int __user *optlen, unsigned flags)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        int len;
@@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 
                msg.msg_control = optval;
                msg.msg_controllen = len;
-               msg.msg_flags = 0;
+               msg.msg_flags = flags;
 
                lock_sock(sk);
                skb = np->pktoptions;
@@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
        if(level != SOL_IPV6)
                return -ENOPROTOOPT;
 
-       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
@@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
                return compat_mc_getsockopt(sk, level, optname, optval, optlen,
                        ipv6_getsockopt);
 
-       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
+                                MSG_CMSG_COMPAT);
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
index 3e6ebcdb4779761a5b35a362b9453d7017551f50..ee7839f4d6e3450c222f2b2677d3b9cf0709d21e 100644 (file)
@@ -1059,7 +1059,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
                        break;
                for (i=0; i<nsrcs; i++) {
                        /* skip inactive filters */
-                       if (pmc->mca_sfcount[MCAST_INCLUDE] ||
+                       if (psf->sf_count[MCAST_INCLUDE] ||
                            pmc->mca_sfcount[MCAST_EXCLUDE] !=
                            psf->sf_count[MCAST_EXCLUDE])
                                continue;
index 07bf1085458f24ee1b8f9487c150158dd587675f..00b15ac7a70256f8bb780007cd960f725cbe8159 100644 (file)
@@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
        if (skb->protocol != htons(ETH_P_IPV6))
                goto tx_error;
 
+       if (tos == 1)
+               tos = ipv6_get_dsfield(iph6);
+
        /* ISATAP (RFC4214) - must come before 6to4 */
        if (dev->priv_flags & IFF_ISATAP) {
                struct neighbour *neigh = NULL;
index 866f269183cf9a1532f317e3b9f52bc437a98213..acb44230b2512996ff754126f617942dca572683 100644 (file)
@@ -1012,7 +1012,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
        cancel_work_sync(&local->reconfig_filter);
 
        ieee80211_clear_tx_pending(local);
-       sta_info_stop(local);
        rate_control_deinitialize(local);
 
        if (skb_queue_len(&local->skb_queue) ||
@@ -1024,6 +1023,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 
        destroy_workqueue(local->workqueue);
        wiphy_unregister(local->hw.wiphy);
+       sta_info_stop(local);
        ieee80211_wep_free(local);
        ieee80211_led_exit(local);
        kfree(local->int_scan_req);
index 102fc212cd64093726dffa311ba41716d2cc0250..e051398fdf6baf4834bbe21bf13deac081594148 100644 (file)
@@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
        skb2->skb_iif = skb->dev->ifindex;
        skb2->dev = dev;
-       dev_queue_xmit(skb2);
-       err = 0;
+       err = dev_queue_xmit(skb2);
 
 out:
        if (err) {
index 24a77400b65e4e8fd63f825144bdc4891b5fced9..ffe92ca32f2acc031605e69d13614b6c8374bed3 100644 (file)
@@ -1965,8 +1965,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
         * used_address->name_len is initialized to UINT_MAX so that the first
         * destination address never matches.
         */
-       if (used_address && used_address->name_len == msg_sys->msg_namelen &&
-           !memcmp(&used_address->name, msg->msg_name,
+       if (used_address && msg_sys->msg_name &&
+           used_address->name_len == msg_sys->msg_namelen &&
+           !memcmp(&used_address->name, msg_sys->msg_name,
                    used_address->name_len)) {
                err = sock_sendmsg_nosec(sock, msg_sys, total_len);
                goto out_freectl;
@@ -1978,8 +1979,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
         */
        if (used_address && err >= 0) {
                used_address->name_len = msg_sys->msg_namelen;
-               memcpy(&used_address->name, msg->msg_name,
-                      used_address->name_len);
+               if (msg_sys->msg_name)
+                       memcpy(&used_address->name, msg_sys->msg_name,
+                              used_address->name_len);
        }
 
 out_freectl:
index 645437cfc464d505a3673870f8c0a6eb73287535..c14865172da70d0fd04918f503d0714042ec5e64 100644 (file)
@@ -616,6 +616,9 @@ int wiphy_register(struct wiphy *wiphy)
        if (res)
                goto out_rm_dev;
 
+       rtnl_lock();
+       rdev->wiphy.registered = true;
+       rtnl_unlock();
        return 0;
 
 out_rm_dev:
@@ -647,6 +650,10 @@ void wiphy_unregister(struct wiphy *wiphy)
 {
        struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 
+       rtnl_lock();
+       rdev->wiphy.registered = false;
+       rtnl_unlock();
+
        rfkill_unregister(rdev->rfkill);
 
        /* protect the device list */
index c6e4ca6a7d2e0b1fcc6e450b1255a0140ae64cff..ff574597a8544030ef18b2e2186354ef46511bc3 100644 (file)
@@ -93,7 +93,8 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
 
        if (rdev->ops->suspend) {
                rtnl_lock();
-               ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
+               if (rdev->wiphy.registered)
+                       ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
                rtnl_unlock();
        }
 
@@ -112,7 +113,8 @@ static int wiphy_resume(struct device *dev)
 
        if (rdev->ops->resume) {
                rtnl_lock();
-               ret = rdev->ops->resume(&rdev->wiphy);
+               if (rdev->wiphy.registered)
+                       ret = rdev->ops->resume(&rdev->wiphy);
                rtnl_unlock();
        }
 
index 9d761c95eca2988e0ff227baa8619b684d6eb33a..3dfc47134e51e9cbee67aab2490360927ff7c0f4 100755 (executable)
@@ -2574,7 +2574,8 @@ sub process {
                                } else {
                                        $cast = $cast2;
                                }
-                               WARN("$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr);
+                               WARN("MINMAX",
+                                    "$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr);
                        }
                }
 
index eb2f1e64edf79b63069fc53399ce71d759ae8795..4594f334105110fefba64883186a2cd546641a82 100755 (executable)
@@ -1389,7 +1389,7 @@ sub vcs_exists {
        warn("$P: No supported VCS found.  Add --nogit to options?\n");
        warn("Using a git repository produces better results.\n");
        warn("Try Linus Torvalds' latest git repository using:\n");
-       warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git\n");
+       warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
        $printed_novcs = 1;
     }
     return 0;
index 502fc94994531118926bd8846befcd39a1d55325..7696d05b935629f354c81866fdb89e43222f415f 100644 (file)
@@ -3348,6 +3348,8 @@ static hda_nid_t get_unassigned_dac(struct hda_codec *codec, hda_nid_t pin,
 
 #define MAX_AUTO_DACS  5
 
+#define DAC_SLAVE_FLAG 0x8000  /* filled dac is a slave */
+
 /* fill analog DAC list from the widget tree */
 static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs)
 {
@@ -3370,16 +3372,26 @@ static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs)
 /* fill pin_dac_pair list from the pin and dac list */
 static int fill_dacs_for_pins(struct hda_codec *codec, hda_nid_t *pins,
                              int num_pins, hda_nid_t *dacs, int *rest,
-                             struct pin_dac_pair *filled, int type)
+                             struct pin_dac_pair *filled, int nums, 
+                             int type)
 {
-       int i, nums;
+       int i, start = nums;
 
-       nums = 0;
-       for (i = 0; i < num_pins; i++) {
+       for (i = 0; i < num_pins; i++, nums++) {
                filled[nums].pin = pins[i];
                filled[nums].type = type;
                filled[nums].dac = get_unassigned_dac(codec, pins[i], dacs, rest);
-               nums++;
+               if (filled[nums].dac) 
+                       continue;
+               if (filled[start].dac && get_connection_index(codec, pins[i], filled[start].dac) >= 0) {
+                       filled[nums].dac = filled[start].dac | DAC_SLAVE_FLAG;
+                       continue;
+               }
+               if (filled[0].dac && get_connection_index(codec, pins[i], filled[0].dac) >= 0) {
+                       filled[nums].dac = filled[0].dac | DAC_SLAVE_FLAG;
+                       continue;
+               }
+               snd_printdd("Failed to find a DAC for pin 0x%x", pins[i]);
        }
        return nums;
 }
@@ -3395,19 +3407,19 @@ static void cx_auto_parse_output(struct hda_codec *codec)
        rest = fill_cx_auto_dacs(codec, dacs);
        /* parse all analog output pins */
        nums = fill_dacs_for_pins(codec, cfg->line_out_pins, cfg->line_outs,
-                                 dacs, &rest, spec->dac_info,
-                                 AUTO_PIN_LINE_OUT);
-       nums += fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs,
-                                 dacs, &rest, spec->dac_info + nums,
-                                 AUTO_PIN_HP_OUT);
-       nums += fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs,
-                                 dacs, &rest, spec->dac_info + nums,
-                                 AUTO_PIN_SPEAKER_OUT);
+                         dacs, &rest, spec->dac_info, 0,
+                         AUTO_PIN_LINE_OUT);
+       nums = fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs,
+                         dacs, &rest, spec->dac_info, nums,
+                         AUTO_PIN_HP_OUT);
+       nums = fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs,
+                         dacs, &rest, spec->dac_info, nums,
+                         AUTO_PIN_SPEAKER_OUT);
        spec->dac_info_filled = nums;
        /* fill multiout struct */
        for (i = 0; i < nums; i++) {
                hda_nid_t dac = spec->dac_info[i].dac;
-               if (!dac)
+               if (!dac || (dac & DAC_SLAVE_FLAG))
                        continue;
                switch (spec->dac_info[i].type) {
                case AUTO_PIN_LINE_OUT:
@@ -3862,7 +3874,7 @@ static void cx_auto_parse_input(struct hda_codec *codec)
        }
        if (imux->num_items >= 2 && cfg->num_inputs == imux->num_items)
                cx_auto_check_auto_mic(codec);
-       if (imux->num_items > 1 && !spec->auto_mic) {
+       if (imux->num_items > 1) {
                for (i = 1; i < imux->num_items; i++) {
                        if (spec->imux_info[i].adc != spec->imux_info[0].adc) {
                                spec->adc_switching = 1;
@@ -4035,6 +4047,8 @@ static void cx_auto_init_output(struct hda_codec *codec)
                nid = spec->dac_info[i].dac;
                if (!nid)
                        nid = spec->multiout.dac_nids[0];
+               else if (nid & DAC_SLAVE_FLAG)
+                       nid &= ~DAC_SLAVE_FLAG;
                select_connection(codec, spec->dac_info[i].pin, nid);
        }
        if (spec->auto_mute) {
@@ -4167,9 +4181,11 @@ static int try_add_pb_volume(struct hda_codec *codec, hda_nid_t dac,
                             hda_nid_t pin, const char *name, int idx)
 {
        unsigned int caps;
-       caps = query_amp_caps(codec, dac, HDA_OUTPUT);
-       if (caps & AC_AMPCAP_NUM_STEPS)
-               return cx_auto_add_pb_volume(codec, dac, name, idx);
+       if (dac && !(dac & DAC_SLAVE_FLAG)) {
+               caps = query_amp_caps(codec, dac, HDA_OUTPUT);
+               if (caps & AC_AMPCAP_NUM_STEPS)
+                       return cx_auto_add_pb_volume(codec, dac, name, idx);
+       }
        caps = query_amp_caps(codec, pin, HDA_OUTPUT);
        if (caps & AC_AMPCAP_NUM_STEPS)
                return cx_auto_add_pb_volume(codec, pin, name, idx);
@@ -4191,8 +4207,7 @@ static int cx_auto_build_output_controls(struct hda_codec *codec)
        for (i = 0; i < spec->dac_info_filled; i++) {
                const char *label;
                int idx, type;
-               if (!spec->dac_info[i].dac)
-                       continue;
+               hda_nid_t dac = spec->dac_info[i].dac;
                type = spec->dac_info[i].type;
                if (type == AUTO_PIN_LINE_OUT)
                        type = spec->autocfg.line_out_type;
@@ -4211,7 +4226,7 @@ static int cx_auto_build_output_controls(struct hda_codec *codec)
                        idx = num_spk++;
                        break;
                }
-               err = try_add_pb_volume(codec, spec->dac_info[i].dac,
+               err = try_add_pb_volume(codec, dac,
                                        spec->dac_info[i].pin,
                                        label, idx);
                if (err < 0)
index fcb11af9ad24f76a489f35472933238075b7bad5..7cabd731716395e7f5386425236660283f096fdf 100644 (file)
@@ -565,11 +565,11 @@ static void alc_hp_automute(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
 
-       if (!spec->automute)
-               return;
        spec->jack_present =
                detect_jacks(codec, ARRAY_SIZE(spec->autocfg.hp_pins),
                             spec->autocfg.hp_pins);
+       if (!spec->automute)
+               return;
        update_speakers(codec);
 }
 
@@ -578,11 +578,11 @@ static void alc_line_automute(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
 
-       if (!spec->automute || !spec->detect_line)
-               return;
        spec->line_jack_present =
                detect_jacks(codec, ARRAY_SIZE(spec->autocfg.line_out_pins),
                             spec->autocfg.line_out_pins);
+       if (!spec->automute || !spec->detect_line)
+               return;
        update_speakers(codec);
 }
 
@@ -3083,16 +3083,22 @@ static void alc_auto_init_multi_out(struct hda_codec *codec)
 static void alc_auto_init_extra_out(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t pin;
+       hda_nid_t pin, dac;
 
        pin = spec->autocfg.hp_pins[0];
-       if (pin)
-               alc_auto_set_output_and_unmute(codec, pin, PIN_HP,
-                                                 spec->multiout.hp_nid);
+       if (pin) {
+               dac = spec->multiout.hp_nid;
+               if (!dac)
+                       dac = spec->multiout.dac_nids[0];
+               alc_auto_set_output_and_unmute(codec, pin, PIN_HP, dac);
+       }
        pin = spec->autocfg.speaker_pins[0];
-       if (pin)
-               alc_auto_set_output_and_unmute(codec, pin, PIN_OUT,
-                                       spec->multiout.extra_out_nid[0]);
+       if (pin) {
+               dac = spec->multiout.extra_out_nid[0];
+               if (!dac)
+                       dac = spec->multiout.dac_nids[0];
+               alc_auto_set_output_and_unmute(codec, pin, PIN_OUT, dac);
+       }
 }
 
 /*
index d6651c033cb711a35c69dcde2320c213e50a5f5b..a118a0fb9d818ebd65dad18bfc0c3459d625d565 100644 (file)
@@ -56,7 +56,7 @@ static int bf5xx_ad193x_hw_params(struct snd_pcm_substream *substream,
 
        switch (params_rate(params)) {
        case 48000:
-               clk = 12288000;
+               clk = 24576000;
                break;
        }
 
index 2374ca5ffe68bacc6c37db16be249858fb17fad2..eedb6f5e5823499919e14611db21a747a2a023b6 100644 (file)
@@ -27,11 +27,6 @@ struct ad193x_priv {
        int sysclk;
 };
 
-/* ad193x register cache & default register settings */
-static const u8 ad193x_reg[AD193X_NUM_REGS] = {
-       0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0,
-};
-
 /*
  * AD193X volume/mute/de-emphasis etc. controls
  */
@@ -307,7 +302,8 @@ static int ad193x_hw_params(struct snd_pcm_substream *substream,
        snd_soc_write(codec, AD193X_PLL_CLK_CTRL0, reg);
 
        reg = snd_soc_read(codec, AD193X_DAC_CTRL2);
-       reg = (reg & (~AD193X_DAC_WORD_LEN_MASK)) | word_len;
+       reg = (reg & (~AD193X_DAC_WORD_LEN_MASK))
+               | (word_len << AD193X_DAC_WORD_LEN_SHFT);
        snd_soc_write(codec, AD193X_DAC_CTRL2, reg);
 
        reg = snd_soc_read(codec, AD193X_ADC_CTRL1);
@@ -389,9 +385,6 @@ static int ad193x_probe(struct snd_soc_codec *codec)
 
 static struct snd_soc_codec_driver soc_codec_dev_ad193x = {
        .probe =        ad193x_probe,
-       .reg_cache_default = ad193x_reg,
-       .reg_cache_size = AD193X_NUM_REGS,
-       .reg_word_size = sizeof(u16),
 };
 
 #if defined(CONFIG_SPI_MASTER)
index 9747b54978775cd43fe5d5af4f0234670ba2ff2f..cccc2e8e5fbd3e830a3a402ed565a2c981ad7f2b 100644 (file)
@@ -34,7 +34,8 @@
 #define AD193X_DAC_LEFT_HIGH    (1 << 3)
 #define AD193X_DAC_BCLK_INV     (1 << 7)
 #define AD193X_DAC_CTRL2        0x804
-#define AD193X_DAC_WORD_LEN_MASK       0xC
+#define AD193X_DAC_WORD_LEN_SHFT        3
+#define AD193X_DAC_WORD_LEN_MASK        0x18
 #define AD193X_DAC_MASTER_MUTE  1
 #define AD193X_DAC_CHNL_MUTE    0x805
 #define AD193X_DACL1_MUTE       0
@@ -63,7 +64,7 @@
 #define AD193X_ADC_CTRL1        0x80f
 #define AD193X_ADC_SERFMT_MASK         0x60
 #define AD193X_ADC_SERFMT_STEREO       (0 << 5)
-#define AD193X_ADC_SERFMT_TDM          (1 << 2)
+#define AD193X_ADC_SERFMT_TDM          (1 << 5)
 #define AD193X_ADC_SERFMT_AUX          (2 << 5)
 #define AD193X_ADC_WORD_LEN_MASK       0x3
 #define AD193X_ADC_CTRL2        0x810
index 409d89d1f34c26fb44b58d979256541d798a5dc1..fbd7eb9e61ce197ae9c5bb22df308d618f9545eb 100644 (file)
@@ -857,6 +857,7 @@ static __devinit int sta32x_i2c_probe(struct i2c_client *i2c,
        ret = snd_soc_register_codec(&i2c->dev, &sta32x_codec, &sta32x_dai, 1);
        if (ret != 0) {
                dev_err(&i2c->dev, "Failed to register codec (%d)\n", ret);
+               kfree(sta32x);
                return ret;
        }
 
index 60d740ebeb5bb8b7502dd93b5b2ece605407d70a..1725550c293ed449d5ef29c8963f47d3a62cbd00 100644 (file)
@@ -2221,6 +2221,8 @@ static int sysclk_event(struct snd_soc_dapm_widget *w,
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                if (fll) {
+                       try_wait_for_completion(&wm8962->fll_lock);
+
                        snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1,
                                            WM8962_FLL_ENA, WM8962_FLL_ENA);
                        if (wm8962->irq) {
@@ -2927,10 +2929,6 @@ static int wm8962_set_bias_level(struct snd_soc_codec *codec,
                                            WM8962_BIAS_ENA | 0x180);
 
                        msleep(5);
-
-                       snd_soc_update_bits(codec, WM8962_CLOCKING2,
-                                           WM8962_CLKREG_OVD,
-                                           WM8962_CLKREG_OVD);
                }
 
                /* VMID 2*250k */
@@ -3288,6 +3286,8 @@ static int wm8962_set_fll(struct snd_soc_codec *codec, int fll_id, int source,
        snd_soc_write(codec, WM8962_FLL_CONTROL_7, fll_div.lambda);
        snd_soc_write(codec, WM8962_FLL_CONTROL_8, fll_div.n);
 
+       try_wait_for_completion(&wm8962->fll_lock);
+
        snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1,
                            WM8962_FLL_FRAC | WM8962_FLL_REFCLK_SRC_MASK |
                            WM8962_FLL_ENA, fll1);
@@ -3868,6 +3868,10 @@ static int wm8962_probe(struct snd_soc_codec *codec)
         */
        snd_soc_update_bits(codec, WM8962_CLOCKING2, WM8962_SYSCLK_ENA, 0);
 
+       /* Ensure we have soft control over all registers */
+       snd_soc_update_bits(codec, WM8962_CLOCKING2,
+                           WM8962_CLKREG_OVD, WM8962_CLKREG_OVD);
+
        regulator_bulk_disable(ARRAY_SIZE(wm8962->supplies), wm8962->supplies);
 
        if (pdata) {
index ab8e9d1aaff0a0194138cb08a06a806ea8e0559e..0cdb9d1056712df48689c47eba7fa440fb73debe 100644 (file)
@@ -420,7 +420,7 @@ static const char *sidetone_hpf_text[] = {
 };
 
 static const struct soc_enum sidetone_hpf =
-       SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 6, sidetone_hpf_text);
+       SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 7, sidetone_hpf_text);
 
 static const char *hpf_mode_text[] = {
        "HiFi", "Custom", "Voice"
@@ -988,15 +988,10 @@ SND_SOC_DAPM_MICBIAS("MICB1", WM8996_POWER_MANAGEMENT_1, 8, 0),
 SND_SOC_DAPM_PGA("IN1L PGA", WM8996_POWER_MANAGEMENT_2, 5, 0, NULL, 0),
 SND_SOC_DAPM_PGA("IN1R PGA", WM8996_POWER_MANAGEMENT_2, 4, 0, NULL, 0),
 
-SND_SOC_DAPM_MUX("IN1L Mux", SND_SOC_NOPM, 0, 0, &in1_mux),
-SND_SOC_DAPM_MUX("IN1R Mux", SND_SOC_NOPM, 0, 0, &in1_mux),
-SND_SOC_DAPM_MUX("IN2L Mux", SND_SOC_NOPM, 0, 0, &in2_mux),
-SND_SOC_DAPM_MUX("IN2R Mux", SND_SOC_NOPM, 0, 0, &in2_mux),
-
-SND_SOC_DAPM_PGA("IN1L", WM8996_POWER_MANAGEMENT_7, 2, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN1R", WM8996_POWER_MANAGEMENT_7, 3, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN2L", WM8996_POWER_MANAGEMENT_7, 6, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN2R", WM8996_POWER_MANAGEMENT_7, 7, 0, NULL, 0),
+SND_SOC_DAPM_MUX("IN1L Mux", WM8996_POWER_MANAGEMENT_7, 2, 0, &in1_mux),
+SND_SOC_DAPM_MUX("IN1R Mux", WM8996_POWER_MANAGEMENT_7, 3, 0, &in1_mux),
+SND_SOC_DAPM_MUX("IN2L Mux", WM8996_POWER_MANAGEMENT_7, 6, 0, &in2_mux),
+SND_SOC_DAPM_MUX("IN2R Mux", WM8996_POWER_MANAGEMENT_7, 7, 0, &in2_mux),
 
 SND_SOC_DAPM_SUPPLY("DMIC2", WM8996_POWER_MANAGEMENT_7, 9, 0, NULL, 0),
 SND_SOC_DAPM_SUPPLY("DMIC1", WM8996_POWER_MANAGEMENT_7, 8, 0, NULL, 0),
@@ -1213,6 +1208,16 @@ static const struct snd_soc_dapm_route wm8996_dapm_routes[] = {
        { "AIF2RX0", NULL, "AIFCLK" },
        { "AIF2RX1", NULL, "AIFCLK" },
 
+       { "AIF1TX0", NULL, "AIFCLK" },
+       { "AIF1TX1", NULL, "AIFCLK" },
+       { "AIF1TX2", NULL, "AIFCLK" },
+       { "AIF1TX3", NULL, "AIFCLK" },
+       { "AIF1TX4", NULL, "AIFCLK" },
+       { "AIF1TX5", NULL, "AIFCLK" },
+
+       { "AIF2TX0", NULL, "AIFCLK" },
+       { "AIF2TX1", NULL, "AIFCLK" },
+
        { "DSP1RXL", NULL, "SYSDSPCLK" },
        { "DSP1RXR", NULL, "SYSDSPCLK" },
        { "DSP2RXL", NULL, "SYSDSPCLK" },
@@ -2106,6 +2111,9 @@ static int wm8996_set_fll(struct snd_soc_codec *codec, int fll_id, int source,
 
        snd_soc_write(codec, WM8996_FLL_EFS_1, fll_div.lambda);
 
+       /* Clear any pending completions (eg, from failed startups) */
+       try_wait_for_completion(&wm8996->fll_lock);
+
        snd_soc_update_bits(codec, WM8996_FLL_CONTROL_1,
                            WM8996_FLL_ENA, WM8996_FLL_ENA);
 
index 56efa0c1c9a9c746b70e19e9d3bc966adea0edeb..099614e16651bb78c78e0c805aa56c72c97fe9e9 100644 (file)
@@ -385,14 +385,14 @@ static int ep93xx_i2s_probe(struct platform_device *pdev)
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!res) {
                err = -ENODEV;
-               goto fail;
+               goto fail_free_info;
        }
 
        info->mem = request_mem_region(res->start, resource_size(res),
                                       pdev->name);
        if (!info->mem) {
                err = -EBUSY;
-               goto fail;
+               goto fail_free_info;
        }
 
        info->regs = ioremap(info->mem->start, resource_size(info->mem));
@@ -435,6 +435,7 @@ fail_unmap_mem:
        iounmap(info->regs);
 fail_release_mem:
        release_mem_region(info->mem->start, resource_size(info->mem));
+fail_free_info:
        kfree(info);
 fail:
        return err;
index 732208c8c0b40c8c3e673ff3c6efd0a071b99e94..cb50598338e92afd2d10997d220a274fd264d378 100644 (file)
@@ -879,10 +879,12 @@ static struct device_node *find_ssi_node(struct device_node *dma_channel_np)
                 * assume that device_node pointers are a valid comparison.
                 */
                np = of_parse_phandle(ssi_np, "fsl,playback-dma", 0);
+               of_node_put(np);
                if (np == dma_channel_np)
                        return ssi_np;
 
                np = of_parse_phandle(ssi_np, "fsl,capture-dma", 0);
+               of_node_put(np);
                if (np == dma_channel_np)
                        return ssi_np;
        }
index a19297959587fb422f3efbbe207bc47b500f0168..358f0baaf71b2df230295cda7ccf9f4df2fa901a 100644 (file)
@@ -345,8 +345,10 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
        }
 
        machine_data = kzalloc(sizeof(struct mpc8610_hpcd_data), GFP_KERNEL);
-       if (!machine_data)
-               return -ENOMEM;
+       if (!machine_data) {
+               ret = -ENOMEM;
+               goto error_alloc;
+       }
 
        machine_data->dai[0].cpu_dai_name = dev_name(&ssi_pdev->dev);
        machine_data->dai[0].ops = &mpc8610_hpcd_ops;
@@ -494,7 +496,7 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
        ret = platform_device_add(sound_device);
        if (ret) {
                dev_err(&pdev->dev, "platform device add failed\n");
-               goto error;
+               goto error_sound;
        }
        dev_set_drvdata(&pdev->dev, sound_device);
 
@@ -502,14 +504,12 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
 
        return 0;
 
+error_sound:
+       platform_device_unregister(sound_device);
 error:
-       of_node_put(codec_np);
-
-       if (sound_device)
-               platform_device_unregister(sound_device);
-
        kfree(machine_data);
-
+error_alloc:
+       of_node_put(codec_np);
        return ret;
 }
 
index 8fa4d5f8eda1deff7c854cb557a199364fc4ce84..fcb862eb0c73420e1c3ecb65c240e8ed2a2569f2 100644 (file)
@@ -297,8 +297,10 @@ static int get_dma_channel(struct device_node *ssi_np,
         * dai->platform name should already point to an allocated buffer.
         */
        ret = of_address_to_resource(dma_channel_np, 0, &res);
-       if (ret)
+       if (ret) {
+               of_node_put(dma_channel_np);
                return ret;
+       }
        snprintf((char *)dai->platform_name, DAI_NAME_SIZE, "%llx.%s",
                 (unsigned long long) res.start, dma_channel_np->name);
 
index a33fc51f363be864ad2385e434dce97c409b5991..8f16cd37c2af9e74388ae8ebe2f23bce1405f82a 100644 (file)
@@ -424,7 +424,7 @@ static __devinit int kirkwood_i2s_dev_probe(struct platform_device *pdev)
        if (!priv->mem) {
                dev_err(&pdev->dev, "request_mem_region failed\n");
                err = -EBUSY;
-               goto error;
+               goto error_alloc;
        }
 
        priv->io = ioremap(priv->mem->start, SZ_16K);
index 30fe0d0efe1c7b5d290d5f247d271f6231340cf1..0aa475f92efaac9f01ad36d7fc2d334acdc1d0df 100644 (file)
@@ -514,7 +514,7 @@ static int ams_delta_cx20442_init(struct snd_soc_pcm_runtime *rtd)
        }
 
        /* Set codec bias level */
-       ams_delta_set_bias_level(card, SND_SOC_BIAS_STANDBY);
+       ams_delta_set_bias_level(card, dapm, SND_SOC_BIAS_STANDBY);
 
        /* Add hook switch - can be used to control the codec from userspace
         * even if line discipline fails */
@@ -649,7 +649,9 @@ static void __exit ams_delta_module_exit(void)
                        ams_delta_hook_switch_gpios);
 
        /* Keep modem power on */
-       ams_delta_set_bias_level(&ams_delta_audio_card, SND_SOC_BIAS_STANDBY);
+       ams_delta_set_bias_level(&ams_delta_audio_card,
+                                &ams_delta_audio_card.rtd[0].codec->dapm,
+                                SND_SOC_BIAS_STANDBY);
 
        platform_device_unregister(cx20442_platform_device);
        platform_device_unregister(ams_delta_audio_platform_device);
index b99091fc34eb48db7a27ca1c0e94790e99b8898d..65f980ef28708689f7e554b514cbd54a8b18b3a2 100644 (file)
@@ -185,6 +185,7 @@ config SND_SOC_SPEYSIDE
        select SND_SAMSUNG_I2S
        select SND_SOC_WM8996
        select SND_SOC_WM9081
+       select SND_SOC_WM1250_EV1
 
 config SND_SOC_SPEYSIDE_WM8962
        tristate "Audio support for Wolfson Speyside with WM8962"
index 241f55d0066070b299159c0f92f6f8836fed4ad5..c6c65892294e4bf8cee9829a1c6fb456e522aebd 100644 (file)
@@ -13,6 +13,7 @@
  *
  */
 
+#include <linux/types.h>
 #include <linux/gpio.h>
 
 #include <sound/soc.h>
index 1e574a5d440d0610e77d68c1d9582a2765719d01..bc8c1676459f781b56b28e5fbcadd9ad7732f4a4 100644 (file)
@@ -17,6 +17,7 @@
  *
  */
 
+#include <linux/types.h>
 #include <linux/gpio.h>
 
 #include <sound/soc.h>
index 0b9eb5f7ec4cdb02656f8b94fe0506fbc8b49626..72535f2daaf20612c28eef1fcadc577e96cd5ba2 100644 (file)
@@ -23,6 +23,9 @@ static int speyside_wm8962_set_bias_level(struct snd_soc_card *card,
        struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai;
        int ret;
 
+       if (dapm->dev != codec_dai->dev)
+               return 0;
+
        switch (level) {
        case SND_SOC_BIAS_PREPARE:
                if (dapm->bias_level == SND_SOC_BIAS_STANDBY) {
@@ -57,6 +60,9 @@ static int speyside_wm8962_set_bias_level_post(struct snd_soc_card *card,
        struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai;
        int ret;
 
+       if (dapm->dev != codec_dai->dev)
+               return 0;
+
        switch (level) {
        case SND_SOC_BIAS_STANDBY:
                ret = snd_soc_dai_set_sysclk(codec_dai, WM8962_SYSCLK_MCLK,
index 83ad8ca274903cff750166606c613db1c5f79df0..b085d8e87574b2a34d113117f623f76c562a2426 100644 (file)
@@ -1913,7 +1913,7 @@ struct snd_kcontrol *snd_soc_cnew(const struct snd_kcontrol_new *_template,
 
        if (prefix) {
                name_len = strlen(long_name) + strlen(prefix) + 2;
-               name = kmalloc(name_len, GFP_ATOMIC);
+               name = kmalloc(name_len, GFP_KERNEL);
                if (!name)
                        return NULL;
 
index cca490c80589db6e5c5f402da6dda77c988bf41e..a62f7dd4ba96bcd266cd98e468d3248fe87ee096 100644 (file)
@@ -205,6 +205,25 @@ static unsigned int snd_soc_16_8_read_i2c(struct snd_soc_codec *codec,
 #define snd_soc_16_8_read_i2c NULL
 #endif
 
+#if defined(CONFIG_SPI_MASTER)
+static unsigned int snd_soc_16_8_read_spi(struct snd_soc_codec *codec,
+                                         unsigned int r)
+{
+       struct spi_device *spi = codec->control_data;
+
+       const u16 reg = cpu_to_be16(r | 0x100);
+       u8 data;
+       int ret;
+
+       ret = spi_write_then_read(spi, &reg, 2, &data, 1);
+       if (ret < 0)
+               return 0;
+       return data;
+}
+#else
+#define snd_soc_16_8_read_spi NULL
+#endif
+
 static int snd_soc_16_8_write(struct snd_soc_codec *codec, unsigned int reg,
                              unsigned int value)
 {
@@ -295,6 +314,7 @@ static struct {
        int (*write)(struct snd_soc_codec *codec, unsigned int, unsigned int);
        unsigned int (*read)(struct snd_soc_codec *, unsigned int);
        unsigned int (*i2c_read)(struct snd_soc_codec *, unsigned int);
+       unsigned int (*spi_read)(struct snd_soc_codec *, unsigned int);
 } io_types[] = {
        {
                .addr_bits = 4, .data_bits = 12,
@@ -318,6 +338,7 @@ static struct {
                .addr_bits = 16, .data_bits = 8,
                .write = snd_soc_16_8_write,
                .i2c_read = snd_soc_16_8_read_i2c,
+               .spi_read = snd_soc_16_8_read_spi,
        },
        {
                .addr_bits = 16, .data_bits = 16,
@@ -383,6 +404,8 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec,
 #ifdef CONFIG_SPI_MASTER
                codec->hw_write = do_spi_write;
 #endif
+               if (io_types[i].spi_read)
+                       codec->hw_read = io_types[i].spi_read;
 
                codec->control_data = container_of(codec->dev,
                                                   struct spi_device,
index 7c17b98d584609c4a9fd78afe5c019a43728db1f..38b00131b2fe20cf395b70197a650d4e2e35e679 100644 (file)
@@ -327,7 +327,7 @@ int snd_soc_jack_add_gpios(struct snd_soc_jack *jack, int count,
                                              IRQF_TRIGGER_FALLING,
                                              gpios[i].name,
                                              &gpios[i]);
-               if (ret)
+               if (ret < 0)
                        goto err;
 
                if (gpios[i].wake) {
index b5759397afa342a89fe494b51d2d97e2a0d3e346..2879c883eebc2161d0c1ff3fdc14e764cde9ac33 100644 (file)
@@ -290,6 +290,9 @@ static int soc_pcm_close(struct snd_pcm_substream *substream)
        codec_dai->active--;
        codec->active--;
 
+       if (!cpu_dai->active && !codec_dai->active)
+               rtd->rate = 0;
+
        /* Muting the DAC suppresses artifacts caused during digital
         * shutdown, for example from stopping clocks.
         */
index 661373c2352a09039c924484b5c5ba677b80d0a4..be27f1d229af9df67fd8bb2dc269e3f2337b31a0 100644 (file)
@@ -319,7 +319,7 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd)
        snd_soc_dapm_force_enable_pin(dapm, "Mic Bias");
 
        /* FIXME: Calculate automatically based on DAPM routes? */
-       if (!machine_is_harmony() && !machine_is_ventana())
+       if (!machine_is_harmony())
                snd_soc_dapm_nc_pin(dapm, "IN1L");
        if (!machine_is_seaboard() && !machine_is_aebl())
                snd_soc_dapm_nc_pin(dapm, "IN1R");
@@ -395,7 +395,7 @@ static __devinit int tegra_wm8903_driver_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, card);
        snd_soc_card_set_drvdata(card, machine);
 
-       if (machine_is_harmony() || machine_is_ventana()) {
+       if (machine_is_harmony()) {
                card->dapm_routes = harmony_audio_map;
                card->num_dapm_routes = ARRAY_SIZE(harmony_audio_map);
        } else if (machine_is_seaboard()) {
index 94c2cf0a98b88e771ef4377fef83ea16deb72398..e8a03aceceb11fe88b5552a0952431866213ee42 100644 (file)
@@ -24,7 +24,7 @@
 
 # Set the following to `true' to make a unstripped, unoptimized
 # binary. Leave this set to `false' for production use.
-DEBUG ?=       false
+DEBUG ?=       true
 
 # make the build silent. Set this to something else to make it noisy again.
 V ?=           false
@@ -35,7 +35,7 @@ NLS ?=                true
 
 # Set the following to 'true' to build/install the
 # cpufreq-bench benchmarking tool
-CPUFRQ_BENCH ?= true
+CPUFREQ_BENCH ?= true
 
 # Prefix to the directories we're installing to
 DESTDIR ?=
@@ -137,9 +137,10 @@ CFLAGS +=  -pipe
 ifeq ($(strip $(NLS)),true)
        INSTALL_NLS += install-gmo
        COMPILE_NLS += create-gmo
+       CFLAGS += -DNLS
 endif
 
-ifeq ($(strip $(CPUFRQ_BENCH)),true)
+ifeq ($(strip $(CPUFREQ_BENCH)),true)
        INSTALL_BENCH += install-bench
        COMPILE_BENCH += compile-bench
 endif
index dbf13998462a3c3e02b2784dd10c24314d2c006b..3326217dd31158d24446213d709a3f8cb5b7002c 100644 (file)
@@ -1,10 +1,10 @@
 default: all
 
-centrino-decode: centrino-decode.c
-       $(CC) $(CFLAGS) -o centrino-decode centrino-decode.c
+centrino-decode: ../i386/centrino-decode.c
+       $(CC) $(CFLAGS) -o $@ $<
 
-powernow-k8-decode: powernow-k8-decode.c
-       $(CC) $(CFLAGS) -o powernow-k8-decode powernow-k8-decode.c
+powernow-k8-decode: ../i386/powernow-k8-decode.c
+       $(CC) $(CFLAGS) -o $@ $<
 
 all: centrino-decode powernow-k8-decode
 
diff --git a/tools/power/cpupower/debug/x86_64/centrino-decode.c b/tools/power/cpupower/debug/x86_64/centrino-decode.c
deleted file mode 120000 (symlink)
index 26fb3f1..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../i386/centrino-decode.c
\ No newline at end of file
diff --git a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c b/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c
deleted file mode 120000 (symlink)
index eb30c79..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../i386/powernow-k8-decode.c
\ No newline at end of file
index 3194811d58f55e83c02bc6c3fa7c1aa96fc922a5..bb60a8d1e45abb0e4685792ad02ca187893cac8a 100644 (file)
@@ -1,10 +1,10 @@
-.TH "cpufreq-info" "1" "0.1" "Mattia Dongili" ""
+.TH "cpupower-frequency-info" "1" "0.1" "Mattia Dongili" ""
 .SH "NAME"
 .LP 
-cpufreq\-info \- Utility to retrieve cpufreq kernel information
+cpupower frequency\-info \- Utility to retrieve cpufreq kernel information
 .SH "SYNTAX"
 .LP 
-cpufreq\-info [\fIoptions\fP]
+cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP]
 .SH "DESCRIPTION"
 .LP 
 A small tool which prints out cpufreq information helpful to developers and interested users.
index 26e3e13eee3b61c7427d579896e9f31f50c01440..685f469093ad200c93e2e9feb3e23dcaec863a5a 100644 (file)
@@ -1,13 +1,13 @@
-.TH "cpufreq-set" "1" "0.1" "Mattia Dongili" ""
+.TH "cpupower-freqency-set" "1" "0.1" "Mattia Dongili" ""
 .SH "NAME"
 .LP 
-cpufreq\-set \- A small tool which allows to modify cpufreq settings.
+cpupower frequency\-set \- A small tool which allows to modify cpufreq settings.
 .SH "SYNTAX"
 .LP 
-cpufreq\-set [\fIoptions\fP]
+cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP]
 .SH "DESCRIPTION"
 .LP 
-cpufreq\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
+cpupower frequency\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
 .SH "OPTIONS"
 .LP 
 .TP 
index 78c20feab85c27447563ee29cd7927aa0cac6363..baf741d06e82543f3529f4f13769bfc1d196cffc 100644 (file)
@@ -3,7 +3,7 @@
 cpupower \- Shows and sets processor power related values
 .SH SYNOPSIS
 .ft B
-.B cpupower [ \-c cpulist ] subcommand [ARGS]
+.B cpupower [ \-c cpulist ] <command> [ARGS]
 
 .B cpupower \-v|\-\-version
 
@@ -13,24 +13,24 @@ cpupower \- Shows and sets processor power related values
 \fBcpupower \fP is a collection of tools to examine and tune power saving
 related features of your processor.
 
-The manpages of the subcommands (cpupower\-<subcommand>(1)) provide detailed
+The manpages of the commands (cpupower\-<command>(1)) provide detailed
 descriptions of supported features. Run \fBcpupower help\fP to get an overview
-of supported subcommands.
+of supported commands.
 
 .SH Options
 .PP
 \-\-help, \-h
 .RS 4
-Shows supported subcommands and general usage.
+Shows supported commands and general usage.
 .RE
 .PP
 \-\-cpu cpulist,  \-c cpulist
 .RS 4
 Only show or set values for specific cores.
-This option is not supported by all subcommands, details can be found in the
-manpages of the subcommands.
+This option is not supported by all commands, details can be found in the
+manpages of the commands.
 
-Some subcommands access all cores (typically the *\-set commands), some only
+Some commands access all cores (typically the *\-set commands), some only
 the first core (typically the *\-info commands) by default.
 
 The syntax for <cpulist> is based on how the kernel exports CPU bitmasks via
index c870ffba5219615c4d55823100c6854c66707f38..c10496fbe3c629c270d6b9ab406f587c8b4736c6 100644 (file)
@@ -8,11 +8,4 @@ extern int cmd_freq_info(int argc, const char **argv);
 extern int cmd_idle_info(int argc, const char **argv);
 extern int cmd_monitor(int argc, const char **argv);
 
-extern void set_help(void);
-extern void info_help(void);
-extern void freq_set_help(void);
-extern void freq_info_help(void);
-extern void idle_info_help(void);
-extern void monitor_help(void);
-
 #endif
index 5a1d25f056b3b60c0f547cc43314d03272fcfac7..28953c9a7bd5980ad600d8830c736119809e51e0 100644 (file)
@@ -510,37 +510,6 @@ static int get_latency(unsigned int cpu, unsigned int human)
        return 0;
 }
 
-void freq_info_help(void)
-{
-       printf(_("Usage: cpupower freqinfo [options]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -e, --debug          Prints out debug information [default]\n"));
-       printf(_("  -f, --freq           Get frequency the CPU currently runs at, according\n"
-              "                       to the cpufreq core *\n"));
-       printf(_("  -w, --hwfreq         Get frequency the CPU currently runs at, by reading\n"
-              "                       it from hardware (only available to root) *\n"));
-       printf(_("  -l, --hwlimits       Determine the minimum and maximum CPU frequency allowed *\n"));
-       printf(_("  -d, --driver         Determines the used cpufreq kernel driver *\n"));
-       printf(_("  -p, --policy         Gets the currently used cpufreq policy *\n"));
-       printf(_("  -g, --governors      Determines available cpufreq governors *\n"));
-       printf(_("  -r, --related-cpus   Determines which CPUs run at the same hardware frequency *\n"));
-       printf(_("  -a, --affected-cpus  Determines which CPUs need to have their frequency\n"
-                       "                       coordinated by software *\n"));
-       printf(_("  -s, --stats          Shows cpufreq statistics if available\n"));
-       printf(_("  -y, --latency        Determines the maximum latency on CPU frequency changes *\n"));
-       printf(_("  -b, --boost          Checks for turbo or boost modes  *\n"));
-       printf(_("  -o, --proc           Prints out information like provided by the /proc/cpufreq\n"
-              "                       interface in 2.4. and early 2.6. kernels\n"));
-       printf(_("  -m, --human          human-readable output for the -f, -w, -s and -y parameters\n"));
-       printf(_("  -h, --help           Prints out this screen\n"));
-
-       printf("\n");
-       printf(_("If no argument is given, full output about\n"
-              "cpufreq is printed which is useful e.g. for reporting bugs.\n\n"));
-       printf(_("By default info of CPU 0 is shown which can be overridden\n"
-                "with the cpupower --cpu main command option.\n"));
-}
-
 static struct option info_opts[] = {
        { .name = "debug",      .has_arg = no_argument,         .flag = NULL,   .val = 'e'},
        { .name = "boost",      .has_arg = no_argument,         .flag = NULL,   .val = 'b'},
@@ -556,7 +525,6 @@ static struct option info_opts[] = {
        { .name = "latency",    .has_arg = no_argument,         .flag = NULL,   .val = 'y'},
        { .name = "proc",       .has_arg = no_argument,         .flag = NULL,   .val = 'o'},
        { .name = "human",      .has_arg = no_argument,         .flag = NULL,   .val = 'm'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
        { },
 };
 
@@ -570,16 +538,12 @@ int cmd_freq_info(int argc, char **argv)
        int output_param = 0;
 
        do {
-               ret = getopt_long(argc, argv, "hoefwldpgrasmyb", info_opts, NULL);
+               ret = getopt_long(argc, argv, "oefwldpgrasmyb", info_opts, NULL);
                switch (ret) {
                case '?':
                        output_param = '?';
                        cont = 0;
                        break;
-               case 'h':
-                       output_param = 'h';
-                       cont = 0;
-                       break;
                case -1:
                        cont = 0;
                        break;
@@ -642,11 +606,7 @@ int cmd_freq_info(int argc, char **argv)
                return -EINVAL;
        case '?':
                printf(_("invalid or unknown argument\n"));
-               freq_info_help();
                return -EINVAL;
-       case 'h':
-               freq_info_help();
-               return EXIT_SUCCESS;
        case 'o':
                proc_cpufreq_output();
                return EXIT_SUCCESS;
index 5f783622bf31f89e2618ff5b84a50fb37221957b..dd1539eb8c63de3e8d166b24a5f79b65702a4685 100644 (file)
 
 #define NORM_FREQ_LEN 32
 
-void freq_set_help(void)
-{
-       printf(_("Usage: cpupower frequency-set [options]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -d FREQ, --min FREQ      new minimum CPU frequency the governor may select\n"));
-       printf(_("  -u FREQ, --max FREQ      new maximum CPU frequency the governor may select\n"));
-       printf(_("  -g GOV, --governor GOV   new cpufreq governor\n"));
-       printf(_("  -f FREQ, --freq FREQ     specific frequency to be set. Requires userspace\n"
-              "                           governor to be available and loaded\n"));
-       printf(_("  -r, --related            Switches all hardware-related CPUs\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf("\n");
-       printf(_("Notes:\n"
-              "1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"));
-       printf(_("2. The -f FREQ, --freq FREQ parameter cannot be combined with any other parameter\n"
-              "   except the -c CPU, --cpu CPU parameter\n"
-              "3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
-              "   by postfixing the value with the wanted unit name, without any space\n"
-              "   (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"));
-
-}
-
 static struct option set_opts[] = {
        { .name = "min",        .has_arg = required_argument,   .flag = NULL,   .val = 'd'},
        { .name = "max",        .has_arg = required_argument,   .flag = NULL,   .val = 'u'},
        { .name = "governor",   .has_arg = required_argument,   .flag = NULL,   .val = 'g'},
        { .name = "freq",       .has_arg = required_argument,   .flag = NULL,   .val = 'f'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
        { .name = "related",    .has_arg = no_argument,         .flag = NULL,   .val='r'},
        { },
 };
@@ -80,7 +57,6 @@ const struct freq_units def_units[] = {
 static void print_unknown_arg(void)
 {
        printf(_("invalid or unknown argument\n"));
-       freq_set_help();
 }
 
 static unsigned long string_to_frequency(const char *str)
@@ -231,14 +207,11 @@ int cmd_freq_set(int argc, char **argv)
 
        /* parameter parsing */
        do {
-               ret = getopt_long(argc, argv, "d:u:g:f:hr", set_opts, NULL);
+               ret = getopt_long(argc, argv, "d:u:g:f:r", set_opts, NULL);
                switch (ret) {
                case '?':
                        print_unknown_arg();
                        return -EINVAL;
-               case 'h':
-                       freq_set_help();
-                       return 0;
                case -1:
                        cont = 0;
                        break;
index 70da3574f1e99940a33dfa4348a35a046de2c5b8..b028267c1376a6c63c455bbdabeb5c36218aee2a 100644 (file)
@@ -139,30 +139,14 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
        }
 }
 
-/* --freq / -f */
-
-void idle_info_help(void)
-{
-       printf(_ ("Usage: cpupower idleinfo [options]\n"));
-       printf(_ ("Options:\n"));
-       printf(_ ("  -s, --silent         Only show general C-state information\n"));
-       printf(_ ("  -o, --proc           Prints out information like provided by the /proc/acpi/processor/*/power\n"
-              "                       interface in older kernels\n"));
-       printf(_ ("  -h, --help           Prints out this screen\n"));
-
-       printf("\n");
-}
-
 static struct option info_opts[] = {
        { .name = "silent",     .has_arg = no_argument, .flag = NULL,   .val = 's'},
        { .name = "proc",       .has_arg = no_argument, .flag = NULL,   .val = 'o'},
-       { .name = "help",       .has_arg = no_argument, .flag = NULL,   .val = 'h'},
        { },
 };
 
 static inline void cpuidle_exit(int fail)
 {
-       idle_info_help();
        exit(EXIT_FAILURE);
 }
 
@@ -174,7 +158,7 @@ int cmd_idle_info(int argc, char **argv)
        unsigned int cpu = 0;
 
        do {
-               ret = getopt_long(argc, argv, "hos", info_opts, NULL);
+               ret = getopt_long(argc, argv, "os", info_opts, NULL);
                if (ret == -1)
                        break;
                switch (ret) {
@@ -182,10 +166,6 @@ int cmd_idle_info(int argc, char **argv)
                        output_param = '?';
                        cont = 0;
                        break;
-               case 'h':
-                       output_param = 'h';
-                       cont = 0;
-                       break;
                case 's':
                        verbose = 0;
                        break;
@@ -211,8 +191,6 @@ int cmd_idle_info(int argc, char **argv)
        case '?':
                printf(_("invalid or unknown argument\n"));
                cpuidle_exit(EXIT_FAILURE);
-       case 'h':
-               cpuidle_exit(EXIT_SUCCESS);
        }
 
        /* Default is: show output of CPU 0 only */
index 85253cb7600ef8c7850bcd7332a6639be20b9bf3..3f68632c28c7bccc02f2d0a3266288e4a7bfd74a 100644 (file)
 #include "helpers/helpers.h"
 #include "helpers/sysfs.h"
 
-void info_help(void)
-{
-       printf(_("Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -b, --perf-bias    Gets CPU's power vs performance policy on some\n"
-              "                           Intel models [0-15], see manpage for details\n"));
-       printf(_("  -m, --sched-mc     Gets the kernel's multi core scheduler policy.\n"));
-       printf(_("  -s, --sched-smt    Gets the kernel's thread sibling scheduler policy.\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf(_("\nPassing no option will show all info, by default only on core 0\n"));
-       printf("\n");
-}
-
 static struct option set_opts[] = {
        { .name = "perf-bias",  .has_arg = optional_argument,   .flag = NULL,   .val = 'b'},
        { .name = "sched-mc",   .has_arg = optional_argument,   .flag = NULL,   .val = 'm'},
        { .name = "sched-smt",  .has_arg = optional_argument,   .flag = NULL,   .val = 's'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
        { },
 };
 
 static void print_wrong_arg_exit(void)
 {
        printf(_("invalid or unknown argument\n"));
-       info_help();
        exit(EXIT_FAILURE);
 }
 
@@ -64,11 +49,8 @@ int cmd_info(int argc, char **argv)
        textdomain(PACKAGE);
 
        /* parameter parsing */
-       while ((ret = getopt_long(argc, argv, "msbh", set_opts, NULL)) != -1) {
+       while ((ret = getopt_long(argc, argv, "msb", set_opts, NULL)) != -1) {
                switch (ret) {
-               case 'h':
-                       info_help();
-                       return 0;
                case 'b':
                        if (params.perf_bias)
                                print_wrong_arg_exit();
index bc1b391e46f0ba785ed6ae0bc11f18cc7d3678e3..dc4de37621117f7dc6fe8d47669e761063c9eed0 100644 (file)
 #include "helpers/sysfs.h"
 #include "helpers/bitmask.h"
 
-void set_help(void)
-{
-       printf(_("Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -b, --perf-bias [VAL]    Sets CPU's power vs performance policy on some\n"
-              "                           Intel models [0-15], see manpage for details\n"));
-       printf(_("  -m, --sched-mc  [VAL]    Sets the kernel's multi core scheduler policy.\n"));
-       printf(_("  -s, --sched-smt [VAL]    Sets the kernel's thread sibling scheduler policy.\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf("\n");
-}
-
 static struct option set_opts[] = {
        { .name = "perf-bias",  .has_arg = optional_argument,   .flag = NULL,   .val = 'b'},
        { .name = "sched-mc",   .has_arg = optional_argument,   .flag = NULL,   .val = 'm'},
        { .name = "sched-smt",  .has_arg = optional_argument,   .flag = NULL,   .val = 's'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
        { },
 };
 
 static void print_wrong_arg_exit(void)
 {
        printf(_("invalid or unknown argument\n"));
-       set_help();
        exit(EXIT_FAILURE);
 }
 
@@ -66,12 +52,9 @@ int cmd_set(int argc, char **argv)
 
        params.params = 0;
        /* parameter parsing */
-       while ((ret = getopt_long(argc, argv, "m:s:b:h",
+       while ((ret = getopt_long(argc, argv, "m:s:b:",
                                                set_opts, NULL)) != -1) {
                switch (ret) {
-               case 'h':
-                       set_help();
-                       return 0;
                case 'b':
                        if (params.perf_bias)
                                print_wrong_arg_exit();
@@ -110,10 +93,8 @@ int cmd_set(int argc, char **argv)
                }
        };
 
-       if (!params.params) {
-               set_help();
-               return -EINVAL;
-       }
+       if (!params.params)
+               print_wrong_arg_exit();
 
        if (params.sched_mc) {
                ret = sysfs_set_sched("mc", sched_mc);
index 5844ae0f786f2a09ca3b01016e338c03585d3af4..52bee591c1c565f3bf9760505c7a824122d2537c 100644 (file)
@@ -11,6 +11,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <errno.h>
 
 #include "builtin.h"
 #include "helpers/helpers.h"
 struct cmd_struct {
        const char *cmd;
        int (*main)(int, const char **);
-       void (*usage)(void);
        int needs_root;
 };
 
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 
-int cmd_help(int argc, const char **argv);
+static int cmd_help(int argc, const char **argv);
 
 /* Global cpu_info object available for all binaries
  * Info only retrieved from CPU 0
@@ -44,55 +44,66 @@ int be_verbose;
 static void print_help(void);
 
 static struct cmd_struct commands[] = {
-       { "frequency-info",     cmd_freq_info,  freq_info_help, 0       },
-       { "frequency-set",      cmd_freq_set,   freq_set_help,  1       },
-       { "idle-info",          cmd_idle_info,  idle_info_help, 0       },
-       { "set",                cmd_set,        set_help,       1       },
-       { "info",               cmd_info,       info_help,      0       },
-       { "monitor",            cmd_monitor,    monitor_help,   0       },
-       { "help",               cmd_help,       print_help,     0       },
-       /*      { "bench",      cmd_bench,      NULL,           1       }, */
+       { "frequency-info",     cmd_freq_info,  0       },
+       { "frequency-set",      cmd_freq_set,   1       },
+       { "idle-info",          cmd_idle_info,  0       },
+       { "set",                cmd_set,        1       },
+       { "info",               cmd_info,       0       },
+       { "monitor",            cmd_monitor,    0       },
+       { "help",               cmd_help,       0       },
+       /*      { "bench",      cmd_bench,      1       }, */
 };
 
-int cmd_help(int argc, const char **argv)
-{
-       unsigned int i;
-
-       if (argc > 1) {
-               for (i = 0; i < ARRAY_SIZE(commands); i++) {
-                       struct cmd_struct *p = commands + i;
-                       if (strcmp(p->cmd, argv[1]))
-                               continue;
-                       if (p->usage) {
-                               p->usage();
-                               return EXIT_SUCCESS;
-                       }
-               }
-       }
-       print_help();
-       if (argc == 1)
-               return EXIT_SUCCESS; /* cpupower help */
-       return EXIT_FAILURE;
-}
-
 static void print_help(void)
 {
        unsigned int i;
 
 #ifdef DEBUG
-       printf(_("cpupower [ -d ][ -c cpulist ] subcommand [ARGS]\n"));
-       printf(_("  -d, --debug      May increase output (stderr) on some subcommands\n"));
+       printf(_("Usage:\tcpupower [-d|--debug] [-c|--cpu cpulist ] <command> [<args>]\n"));
 #else
-       printf(_("cpupower [ -c cpulist ] subcommand [ARGS]\n"));
+       printf(_("Usage:\tcpupower [-c|--cpu cpulist ] <command> [<args>]\n"));
 #endif
-       printf(_("cpupower --version\n"));
-       printf(_("Supported subcommands are:\n"));
+       printf(_("Supported commands are:\n"));
        for (i = 0; i < ARRAY_SIZE(commands); i++)
                printf("\t%s\n", commands[i].cmd);
-       printf(_("\nSome subcommands can make use of the -c cpulist option.\n"));
-       printf(_("Look at the general cpupower manpage how to use it\n"));
-       printf(_("and read up the subcommand's manpage whether it is supported.\n"));
-       printf(_("\nUse cpupower help subcommand for getting help for above subcommands.\n"));
+       printf(_("\nNot all commands can make use of the -c cpulist option.\n"));
+       printf(_("\nUse 'cpupower help <command>' for getting help for above commands.\n"));
+}
+
+static int print_man_page(const char *subpage)
+{
+       int len;
+       char *page;
+
+       len = 10; /* enough for "cpupower-" */
+       if (subpage != NULL)
+               len += strlen(subpage);
+
+       page = malloc(len);
+       if (!page)
+               return -ENOMEM;
+
+       sprintf(page, "cpupower");
+       if ((subpage != NULL) && strcmp(subpage, "help")) {
+               strcat(page, "-");
+               strcat(page, subpage);
+       }
+
+       execlp("man", "man", page, NULL);
+
+       /* should not be reached */
+       return -EINVAL;
+}
+
+static int cmd_help(int argc, const char **argv)
+{
+       if (argc > 1) {
+               print_man_page(argv[1]); /* exits within execlp() */
+               return EXIT_FAILURE;
+       }
+
+       print_help();
+       return EXIT_SUCCESS;
 }
 
 static void print_version(void)
index 592ee362b877c92083d736d7699d7cb6d3e3250a..2747e738efb04d3fa1a53e7686162240cfc7a7ec 100644 (file)
 #include "helpers/bitmask.h"
 
 /* Internationalization ****************************/
+#ifdef NLS
+
 #define _(String) gettext(String)
 #ifndef gettext_noop
 #define gettext_noop(String) String
 #endif
 #define N_(String) gettext_noop(String)
+
+#else /* !NLS */
+
+#define _(String) String
+#define N_(String) String
+
+#endif
 /* Internationalization ****************************/
 
 extern int run_as_root;
@@ -96,6 +105,9 @@ struct cpupower_topology {
                int pkg;
                int core;
                int cpu;
+
+               /* flags */
+               unsigned int is_online:1;
        } *core_info;
 };
 
index 55e2466674c636d767dc3efdb32cf5c145d3fc06..c6343024a61158d094c0a831d369b7d1bd8cdb45 100644 (file)
@@ -56,6 +56,56 @@ static unsigned int sysfs_write_file(const char *path,
        return (unsigned int) numwrite;
 }
 
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ *     1 -> if CPU is online
+ *     0 -> if CPU is offline
+ *     negative errno values in error case
+ */
+int sysfs_is_cpu_online(unsigned int cpu)
+{
+       char path[SYSFS_PATH_MAX];
+       int fd;
+       ssize_t numread;
+       unsigned long long value;
+       char linebuf[MAX_LINE_LEN];
+       char *endp;
+       struct stat statbuf;
+
+       snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+       if (stat(path, &statbuf) != 0)
+               return 0;
+
+       /*
+        * kernel without CONFIG_HOTPLUG_CPU
+        * -> cpuX directory exists, but not cpuX/online file
+        */
+       snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+       if (stat(path, &statbuf) != 0)
+               return 1;
+
+       fd = open(path, O_RDONLY);
+       if (fd == -1)
+               return -errno;
+
+       numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+       if (numread < 1) {
+               close(fd);
+               return -EIO;
+       }
+       linebuf[numread] = '\0';
+       close(fd);
+
+       value = strtoull(linebuf, &endp, 0);
+       if (value > 1 || value < 0)
+               return -EINVAL;
+
+       return value;
+}
+
 /* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
 
 /*
index f9373e0906377d78235972950a00e42a6d0c3744..8cb797bbceb0b565a5f698e68c71b856dbe2b37c 100644 (file)
@@ -7,6 +7,8 @@
 
 extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
 
+extern int sysfs_is_cpu_online(unsigned int cpu);
+
 extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu,
                                                unsigned int idlestate);
 extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu,
index 385ee5c7570cc6551c64436cd8cf65a213080188..4eae2c47ba48d20d9e62505e023d0dd0a1c7862f 100644 (file)
@@ -41,6 +41,8 @@ struct cpuid_core_info {
        unsigned int pkg;
        unsigned int thread;
        unsigned int cpu;
+       /* flags */
+       unsigned int is_online:1;
 };
 
 static int __compare(const void *t1, const void *t2)
@@ -78,6 +80,8 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
                return -ENOMEM;
        cpu_top->pkgs = cpu_top->cores = 0;
        for (cpu = 0; cpu < cpus; cpu++) {
+               cpu_top->core_info[cpu].cpu = cpu;
+               cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
                cpu_top->core_info[cpu].pkg =
                        sysfs_topology_read_file(cpu, "physical_package_id");
                if ((int)cpu_top->core_info[cpu].pkg != -1 &&
@@ -85,7 +89,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
                        cpu_top->pkgs = cpu_top->core_info[cpu].pkg;
                cpu_top->core_info[cpu].core =
                        sysfs_topology_read_file(cpu, "core_id");
-               cpu_top->core_info[cpu].cpu = cpu;
        }
        cpu_top->pkgs++;
 
index d048b96a61553d8ab54ca908acd7edeced287354..bcd22a1a397083d1b6e5ed7d311fcbcf8e01dffb 100644 (file)
@@ -134,7 +134,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
        /* Assume idle state count is the same for all CPUs */
        cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
 
-       if (cpuidle_sysfs_monitor.hw_states_num == 0)
+       if (cpuidle_sysfs_monitor.hw_states_num <= 0)
                return NULL;
 
        for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
index ba4bf068380d9002bac92fd4d8370f2ebd20040c..0d6571e418db4b7eb73017f23be7041c64bbe56c 100644 (file)
@@ -43,6 +43,12 @@ static struct cpupower_topology cpu_top;
 /* ToDo: Document this in the manpage */
 static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
 
+static void print_wrong_arg_exit(void)
+{
+       printf(_("invalid or unknown argument\n"));
+       exit(EXIT_FAILURE);
+}
+
 long long timespec_diff_us(struct timespec start, struct timespec end)
 {
        struct timespec temp;
@@ -56,21 +62,6 @@ long long timespec_diff_us(struct timespec start, struct timespec end)
        return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000);
 }
 
-void monitor_help(void)
-{
-       printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] command\n"));
-       printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] [ -i interval_sec ]\n"));
-       printf(_("cpupower monitor: -l\n"));
-       printf(_("\t command: pass an arbitrary command to measure specific workload\n"));
-       printf(_("\t -i: time intervall to measure for in seconds (default 1)\n"));
-       printf(_("\t -l: list available CPU sleep monitors (for use with -m)\n"));
-       printf(_("\t -m: show specific CPU sleep monitors only (in same order)\n"));
-       printf(_("\t -h: print this help\n"));
-       printf("\n");
-       printf(_("only one of: -l, -m are allowed\nIf none of them is passed,"));
-       printf(_(" all supported monitors are shown\n"));
-}
-
 void print_n_spaces(int n)
 {
        int x;
@@ -149,6 +140,10 @@ void print_results(int topology_depth, int cpu)
        unsigned long long result;
        cstate_t s;
 
+       /* Be careful CPUs may got resorted for pkg value do not just use cpu */
+       if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu))
+               return;
+
        if (topology_depth > 2)
                printf("%4d|", cpu_top.core_info[cpu].pkg);
        if (topology_depth > 1)
@@ -190,9 +185,13 @@ void print_results(int topology_depth, int cpu)
                        }
                }
        }
-       /* cpu offline */
-       if (cpu_top.core_info[cpu].pkg == -1 ||
-           cpu_top.core_info[cpu].core == -1) {
+       /*
+        * The monitor could still provide useful data, for example
+        * AMD HW counters partly sit in PCI config space.
+        * It's up to the monitor plug-in to check .is_online, this one
+        * is just for additional info.
+        */
+       if (!cpu_top.core_info[cpu].is_online) {
                printf(_(" *is offline\n"));
                return;
        } else
@@ -238,7 +237,6 @@ static void parse_monitor_param(char *param)
        if (hits == 0) {
                printf(_("No matching monitor found in %s, "
                         "try -l option\n"), param);
-               monitor_help();
                exit(EXIT_FAILURE);
        }
        /* Override detected/registerd monitors array with requested one */
@@ -335,37 +333,27 @@ static void cmdline(int argc, char *argv[])
        int opt;
        progname = basename(argv[0]);
 
-       while ((opt = getopt(argc, argv, "+hli:m:")) != -1) {
+       while ((opt = getopt(argc, argv, "+li:m:")) != -1) {
                switch (opt) {
-               case 'h':
-                       monitor_help();
-                       exit(EXIT_SUCCESS);
                case 'l':
-                       if (mode) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode)
+                               print_wrong_arg_exit();
                        mode = list;
                        break;
                case 'i':
                        /* only allow -i with -m or no option */
-                       if (mode && mode != show) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode && mode != show)
+                               print_wrong_arg_exit();
                        interval = atoi(optarg);
                        break;
                case 'm':
-                       if (mode) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode)
+                               print_wrong_arg_exit();
                        mode = show;
                        show_monitors_param = optarg;
                        break;
                default:
-                       monitor_help();
-                       exit(EXIT_FAILURE);
+                       print_wrong_arg_exit();
                }
        }
        if (!mode)
@@ -385,6 +373,10 @@ int cmd_monitor(int argc, char **argv)
                return EXIT_FAILURE;
        }
 
+       /* Default is: monitor all CPUs */
+       if (bitmask_isallclear(cpus_chosen))
+               bitmask_setall(cpus_chosen);
+
        dprint("System has up to %d CPU cores\n", cpu_count);
 
        for (num = 0; all_monitors[num]; num++) {
index 63ca87a05e5ffba9400e8caf493c24b6fdb3e22d..5650ab5a2c206b05e23296ae035aae52b9a528ca 100644 (file)
 
 #define MSR_TSC        0x10
 
+#define MSR_AMD_HWCR 0xc0010015
+
 enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
 
 static int mperf_get_count_percent(unsigned int self_id, double *percent,
                                   unsigned int cpu);
 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
                                unsigned int cpu);
+static struct timespec time_start, time_end;
 
 static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
        {
@@ -54,19 +57,33 @@ static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
        },
 };
 
+enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF };
+static int max_freq_mode;
+/*
+ * The max frequency mperf is ticking at (in C0), either retrieved via:
+ *   1) calculated after measurements if we know TSC ticks at mperf/P0 frequency
+ *   2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time
+ * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen)
+ */
+static unsigned long max_frequency;
+
 static unsigned long long tsc_at_measure_start;
 static unsigned long long tsc_at_measure_end;
-static unsigned long max_frequency;
 static unsigned long long *mperf_previous_count;
 static unsigned long long *aperf_previous_count;
 static unsigned long long *mperf_current_count;
 static unsigned long long *aperf_current_count;
+
 /* valid flag for all CPUs. If a MSR read failed it will be zero */
 static int *is_valid;
 
 static int mperf_get_tsc(unsigned long long *tsc)
 {
-       return read_msr(0, MSR_TSC, tsc);
+       int ret;
+       ret = read_msr(0, MSR_TSC, tsc);
+       if (ret)
+               dprint("Reading TSC MSR failed, returning %llu\n", *tsc);
+       return ret;
 }
 
 static int mperf_init_stats(unsigned int cpu)
@@ -97,36 +114,11 @@ static int mperf_measure_stats(unsigned int cpu)
        return 0;
 }
 
-/*
- * get_average_perf()
- *
- * Returns the average performance (also considers boosted frequencies)
- *
- * Input:
- *   aperf_diff: Difference of the aperf register over a time period
- *   mperf_diff: Difference of the mperf register over the same time period
- *   max_freq:   Maximum frequency (P0)
- *
- * Returns:
- *   Average performance over the time period
- */
-static unsigned long get_average_perf(unsigned long long aperf_diff,
-                                     unsigned long long mperf_diff)
-{
-       unsigned int perf_percent = 0;
-       if (((unsigned long)(-1) / 100) < aperf_diff) {
-               int shift_count = 7;
-               aperf_diff >>= shift_count;
-               mperf_diff >>= shift_count;
-       }
-       perf_percent = (aperf_diff * 100) / mperf_diff;
-       return (max_frequency * perf_percent) / 100;
-}
-
 static int mperf_get_count_percent(unsigned int id, double *percent,
                                   unsigned int cpu)
 {
        unsigned long long aperf_diff, mperf_diff, tsc_diff;
+       unsigned long long timediff;
 
        if (!is_valid[cpu])
                return -1;
@@ -136,11 +128,19 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
 
        mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
        aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
-       tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
 
-       *percent = 100.0 * mperf_diff / tsc_diff;
-       dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n",
-              mperf_cstates[id].name, mperf_diff, tsc_diff);
+       if (max_freq_mode == MAX_FREQ_TSC_REF) {
+               tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+               *percent = 100.0 * mperf_diff / tsc_diff;
+               dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
+                      mperf_cstates[id].name, mperf_diff, tsc_diff);
+       } else if (max_freq_mode == MAX_FREQ_SYSFS) {
+               timediff = timespec_diff_us(time_start, time_end);
+               *percent = 100.0 * mperf_diff / timediff;
+               dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
+                      mperf_cstates[id].name, mperf_diff, timediff);
+       } else
+               return -1;
 
        if (id == Cx)
                *percent = 100.0 - *percent;
@@ -154,7 +154,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
                                unsigned int cpu)
 {
-       unsigned long long aperf_diff, mperf_diff;
+       unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff;
 
        if (id != AVG_FREQ)
                return 1;
@@ -165,11 +165,21 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
        mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
        aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
 
-       /* Return MHz for now, might want to return KHz if column width is more
-          generic */
-       *count = get_average_perf(aperf_diff, mperf_diff) / 1000;
-       dprint("%s: %llu\n", mperf_cstates[id].name, *count);
+       if (max_freq_mode == MAX_FREQ_TSC_REF) {
+               /* Calculate max_freq from TSC count */
+               tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+               time_diff = timespec_diff_us(time_start, time_end);
+               max_frequency = tsc_diff / time_diff;
+       }
 
+       *count = max_frequency * ((double)aperf_diff / mperf_diff);
+       dprint("%s: Average freq based on %s maximum frequency:\n",
+              mperf_cstates[id].name,
+              (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read");
+       dprint("%max_frequency: %lu", max_frequency);
+       dprint("aperf_diff: %llu\n", aperf_diff);
+       dprint("mperf_diff: %llu\n", mperf_diff);
+       dprint("avg freq:   %llu\n", *count);
        return 0;
 }
 
@@ -178,6 +188,7 @@ static int mperf_start(void)
        int cpu;
        unsigned long long dbg;
 
+       clock_gettime(CLOCK_REALTIME, &time_start);
        mperf_get_tsc(&tsc_at_measure_start);
 
        for (cpu = 0; cpu < cpu_count; cpu++)
@@ -193,32 +204,104 @@ static int mperf_stop(void)
        unsigned long long dbg;
        int cpu;
 
-       mperf_get_tsc(&tsc_at_measure_end);
-
        for (cpu = 0; cpu < cpu_count; cpu++)
                mperf_measure_stats(cpu);
 
+       mperf_get_tsc(&tsc_at_measure_end);
+       clock_gettime(CLOCK_REALTIME, &time_end);
+
        mperf_get_tsc(&dbg);
        dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
 
        return 0;
 }
 
-struct cpuidle_monitor mperf_monitor;
-
-struct cpuidle_monitor *mperf_register(void)
+/*
+ * Mperf register is defined to tick at P0 (maximum) frequency
+ *
+ * Instead of reading out P0 which can be tricky to read out from HW,
+ * we use TSC counter if it reliably ticks at P0/mperf frequency.
+ *
+ * Still try to fall back to:
+ * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
+ * on older Intel HW without invariant TSC feature.
+ * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but
+ * it's still double checked (MSR_AMD_HWCR)).
+ *
+ * On these machines the user would still get useful mperf
+ * stats when acpi-cpufreq driver is loaded.
+ */
+static int init_maxfreq_mode(void)
 {
+       int ret;
+       unsigned long long hwcr;
        unsigned long min;
 
-       if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
-               return NULL;
-
-       /* Assume min/max all the same on all cores */
+       if (!cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC)
+               goto use_sysfs;
+
+       if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
+               /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
+                * freq.
+                * A test whether hwcr is accessable/available would be:
+                * (cpupower_cpu_info.family > 0x10 ||
+                *   cpupower_cpu_info.family == 0x10 &&
+                *   cpupower_cpu_info.model >= 0x2))
+                * This should be the case for all aperf/mperf
+                * capable AMD machines and is therefore safe to test here.
+                * Compare with Linus kernel git commit: acf01734b1747b1ec4
+                */
+               ret = read_msr(0, MSR_AMD_HWCR, &hwcr);
+               /*
+                * If the MSR read failed, assume a Xen system that did
+                * not explicitly provide access to it and assume TSC works
+               */
+               if (ret != 0) {
+                       dprint("TSC read 0x%x failed - assume TSC working\n",
+                              MSR_AMD_HWCR);
+                       return 0;
+               } else if (1 & (hwcr >> 24)) {
+                       max_freq_mode = MAX_FREQ_TSC_REF;
+                       return 0;
+               } else { /* Use sysfs max frequency if available */ }
+       } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) {
+               /*
+                * On Intel we assume mperf (in C0) is ticking at same
+                * rate than TSC
+                */
+               max_freq_mode = MAX_FREQ_TSC_REF;
+               return 0;
+       }
+use_sysfs:
        if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
                dprint("Cannot retrieve max freq from cpufreq kernel "
                       "subsystem\n");
-               return NULL;
+               return -1;
        }
+       max_freq_mode = MAX_FREQ_SYSFS;
+       return 0;
+}
+
+/*
+ * This monitor provides:
+ *
+ * 1) Average frequency a CPU resided in
+ *    This always works if the CPU has aperf/mperf capabilities
+ *
+ * 2) C0 and Cx (any sleep state) time a CPU resided in
+ *    Works if mperf timer stops ticking in sleep states which
+ *    seem to be the case on all current HW.
+ * Both is directly retrieved from HW registers and is independent
+ * from kernel statistics.
+ */
+struct cpuidle_monitor mperf_monitor;
+struct cpuidle_monitor *mperf_register(void)
+{
+       if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+               return NULL;
+
+       if (init_maxfreq_mode())
+               return NULL;
 
        /* Free this at program termination */
        is_valid = calloc(cpu_count, sizeof(int));