]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge branch 'akpm' (second patchbomb from Andrew Morton)
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Aug 2014 22:57:47 +0000 (15:57 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Aug 2014 22:57:47 +0000 (15:57 -0700)
Merge more incoming from Andrew Morton:
 "Two new syscalls:

     memfd_create in "shm: add memfd_create() syscall"
     kexec_file_load in "kexec: implementation of new syscall kexec_file_load"

  And:

   - Most (all?) of the rest of MM

   - Lots of the usual misc bits

   - fs/autofs4

   - drivers/rtc

   - fs/nilfs

   - procfs

   - fork.c, exec.c

   - more in lib/

   - rapidio

   - Janitorial work in filesystems: fs/ufs, fs/reiserfs, fs/adfs,
     fs/cramfs, fs/romfs, fs/qnx6.

   - initrd/initramfs work

   - "file sealing" and the memfd_create() syscall, in tmpfs

   - add pci_zalloc_consistent, use it in lots of places

   - MAINTAINERS maintenance

   - kexec feature work"

* emailed patches from Andrew Morton <akpm@linux-foundation.org: (193 commits)
  MAINTAINERS: update nomadik patterns
  MAINTAINERS: update usb/gadget patterns
  MAINTAINERS: update DMA BUFFER SHARING patterns
  kexec: verify the signature of signed PE bzImage
  kexec: support kexec/kdump on EFI systems
  kexec: support for kexec on panic using new system call
  kexec-bzImage64: support for loading bzImage using 64bit entry
  kexec: load and relocate purgatory at kernel load time
  purgatory: core purgatory functionality
  purgatory/sha256: provide implementation of sha256 in purgaotory context
  kexec: implementation of new syscall kexec_file_load
  kexec: new syscall kexec_file_load() declaration
  kexec: make kexec_segment user buffer pointer a union
  resource: provide new functions to walk through resources
  kexec: use common function for kimage_normal_alloc() and kimage_crash_alloc()
  kexec: move segment verification code in a separate function
  kexec: rename unusebale_pages to unusable_pages
  kernel: build bin2c based on config option CONFIG_BUILD_BIN2C
  bin2c: move bin2c in scripts/basic
  shm: wait for pins to be released when sealing
  ...

349 files changed:
CREDITS
Documentation/ABI/testing/sysfs-fs-nilfs2 [new file with mode: 0644]
Documentation/cgroups/memcg_test.txt
Documentation/devicetree/bindings/i2c/trivial-devices.txt
Documentation/oops-tracing.txt
Documentation/rapidio/tsi721.txt
Documentation/sysctl/kernel.txt
MAINTAINERS
arch/alpha/include/asm/Kbuild
arch/alpha/include/asm/scatterlist.h [deleted file]
arch/arm/Kconfig
arch/arm/include/asm/Kbuild
arch/arm/include/asm/scatterlist.h [deleted file]
arch/arm/mach-omap2/board-omap3touchbook.c
arch/arm/mach-omap2/mux.c
arch/arm/mach-pxa/balloon3.c
arch/arm/mach-pxa/viper.c
arch/arm/mach-s3c24xx/mach-jive.c
arch/arm/mach-w90x900/cpu.c
arch/arm64/Kconfig
arch/arm64/include/asm/page.h
arch/arm64/kernel/vdso.c
arch/cris/include/asm/Kbuild
arch/cris/include/asm/scatterlist.h [deleted file]
arch/frv/include/asm/Kbuild
arch/frv/include/asm/scatterlist.h [deleted file]
arch/ia64/Kconfig
arch/ia64/include/asm/Kbuild
arch/ia64/include/asm/page.h
arch/ia64/include/asm/scatterlist.h [deleted file]
arch/ia64/kernel/time.c
arch/ia64/mm/init.c
arch/m32r/include/asm/Kbuild
arch/m32r/include/asm/scatterlist.h [deleted file]
arch/m68k/Kconfig
arch/microblaze/include/asm/Kbuild
arch/microblaze/include/asm/scatterlist.h [deleted file]
arch/mips/Kconfig
arch/mn10300/include/asm/Kbuild
arch/mn10300/include/asm/scatterlist.h [deleted file]
arch/powerpc/Kconfig
arch/powerpc/include/asm/Kbuild
arch/powerpc/include/asm/page.h
arch/powerpc/include/asm/scatterlist.h [deleted file]
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/vdso.c
arch/powerpc/kernel/vio.c
arch/powerpc/mm/dma-noncoherent.c
arch/powerpc/platforms/44x/warp.c
arch/powerpc/platforms/52xx/efika.c
arch/powerpc/platforms/amigaone/setup.c
arch/powerpc/platforms/pseries/dlpar.c
arch/powerpc/platforms/pseries/mobility.c
arch/s390/Kconfig
arch/s390/include/asm/Kbuild
arch/s390/include/asm/page.h
arch/s390/include/asm/scatterlist.h [deleted file]
arch/s390/kernel/vdso.c
arch/score/include/asm/Kbuild
arch/score/include/asm/scatterlist.h [deleted file]
arch/sh/Kconfig
arch/sh/include/asm/page.h
arch/sh/kernel/vsyscall/vsyscall.c
arch/sparc/Kconfig
arch/sparc/include/asm/Kbuild
arch/sparc/include/asm/scatterlist.h [deleted file]
arch/tile/Kconfig
arch/tile/include/asm/hardwall.h
arch/tile/include/asm/page.h
arch/tile/kernel/hardwall.c
arch/tile/kernel/vdso.c
arch/um/include/asm/Kbuild
arch/um/include/asm/page.h
arch/x86/Kbuild
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/include/asm/Kbuild
arch/x86/include/asm/crash.h [new file with mode: 0644]
arch/x86/include/asm/kexec-bzimage64.h [new file with mode: 0644]
arch/x86/include/asm/kexec.h
arch/x86/include/asm/page.h
arch/x86/include/asm/page_64.h
arch/x86/include/asm/scatterlist.h [deleted file]
arch/x86/kernel/Makefile
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/crash.c
arch/x86/kernel/kexec-bzimage64.c [new file with mode: 0644]
arch/x86/kernel/machine_kexec_64.c
arch/x86/kvm/mmu_audit.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/purgatory/Makefile [new file with mode: 0644]
arch/x86/purgatory/entry64.S [new file with mode: 0644]
arch/x86/purgatory/purgatory.c [new file with mode: 0644]
arch/x86/purgatory/setup-x86_64.S [new file with mode: 0644]
arch/x86/purgatory/sha256.c [new file with mode: 0644]
arch/x86/purgatory/sha256.h [new file with mode: 0644]
arch/x86/purgatory/stack.S [new file with mode: 0644]
arch/x86/purgatory/string.c [new file with mode: 0644]
arch/x86/syscalls/syscall_32.tbl
arch/x86/syscalls/syscall_64.tbl
arch/x86/um/asm/elf.h
arch/x86/um/mem_64.c
arch/x86/vdso/vdso32-setup.c
crypto/zlib.c
drivers/atm/he.c
drivers/atm/idt77252.c
drivers/block/DAC960.c
drivers/block/cciss.c
drivers/block/skd_main.c
drivers/crypto/hifn_795x.c
drivers/firmware/efi/runtime-map.c
drivers/gpu/drm/i810/i810_dma.c
drivers/infiniband/hw/amso1100/c2.c
drivers/infiniband/hw/nes/nes_hw.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/media/common/saa7146/saa7146_core.c
drivers/media/common/saa7146/saa7146_fops.c
drivers/media/pci/bt8xx/bt878.c
drivers/media/pci/ngene/ngene-core.c
drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
drivers/media/usb/ttusb-dec/ttusb_dec.c
drivers/net/ethernet/amd/pcnet32.c
drivers/net/ethernet/atheros/atl1e/atl1e_main.c
drivers/net/ethernet/cisco/enic/vnic_dev.c
drivers/net/ethernet/marvell/sky2.c
drivers/net/ethernet/micrel/ksz884x.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
drivers/net/ethernet/qlogic/qlge/qlge_main.c
drivers/net/irda/vlsi_ir.c
drivers/net/wireless/ipw2x00/ipw2100.c
drivers/net/wireless/mwl8k.c
drivers/net/wireless/rtl818x/rtl8180/dev.c
drivers/net/wireless/rtlwifi/pci.c
drivers/parport/parport_ip32.c
drivers/rapidio/devices/tsi721.h
drivers/rapidio/devices/tsi721_dma.c
drivers/rapidio/rio.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/class.c
drivers/rtc/interface.c
drivers/rtc/rtc-ds1343.c
drivers/rtc/rtc-ds1742.c
drivers/rtc/rtc-efi-platform.c [new file with mode: 0644]
drivers/rtc/rtc-efi.c
drivers/rtc/rtc-isl12022.c
drivers/rtc/rtc-pcf85063.c [new file with mode: 0644]
drivers/rtc/rtc-pcf8563.c
drivers/rtc/rtc-tps65910.c
drivers/scsi/3w-sas.c
drivers/scsi/a100u2w.c
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/be2iscsi/be_mgmt.c
drivers/scsi/csiostor/csio_wr.c
drivers/scsi/eata.c
drivers/scsi/hpsa.c
drivers/scsi/megaraid/megaraid_mbox.c
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/mesh.c
drivers/scsi/mvumi.c
drivers/scsi/pm8001/pm8001_sas.c
drivers/scsi/pmcraid.c
drivers/scsi/scsi_sysfs.c
drivers/staging/rtl8192e/rtl8192e/rtl_core.c
drivers/staging/rtl8192ee/pci.c
drivers/staging/rtl8821ae/pci.c
drivers/staging/slicoss/slicoss.c
drivers/staging/vt6655/device_main.c
drivers/tty/synclink_gt.c
drivers/vme/bridges/vme_ca91cx42.c
drivers/vme/bridges/vme_tsi148.c
fs/adfs/adfs.h
fs/adfs/dir.c
fs/adfs/dir_fplus.c
fs/autofs4/autofs_i.h
fs/autofs4/expire.c
fs/autofs4/root.c
fs/befs/linuxvfs.c
fs/bfs/bfs.h
fs/bfs/dir.c
fs/bfs/inode.c
fs/coda/cache.c
fs/coda/coda_linux.c
fs/coda/dir.c
fs/coda/file.c
fs/coda/inode.c
fs/coda/pioctl.c
fs/coda/psdev.c
fs/coda/upcall.c
fs/cramfs/inode.c
fs/cramfs/uncompress.c
fs/dlm/debug_fs.c
fs/efs/namei.c
fs/exec.c
fs/exofs/ore_raid.c
fs/fcntl.c
fs/hpfs/dnode.c
fs/inode.c
fs/isofs/compress.c
fs/jffs2/compr_zlib.c
fs/minix/bitmap.c
fs/minix/inode.c
fs/nilfs2/Makefile
fs/nilfs2/nilfs.h
fs/nilfs2/super.c
fs/nilfs2/sysfs.c [new file with mode: 0644]
fs/nilfs2/sysfs.h [new file with mode: 0644]
fs/nilfs2/the_nilfs.c
fs/nilfs2/the_nilfs.h
fs/omfs/inode.c
fs/proc/base.c
fs/proc/fd.c
fs/proc/generic.c
fs/proc/internal.h
fs/proc/kcore.c
fs/proc/proc_sysctl.c
fs/proc/proc_tty.c
fs/proc/root.c
fs/proc/vmcore.c
fs/pstore/ram_core.c
fs/qnx6/Makefile
fs/qnx6/dir.c
fs/qnx6/inode.c
fs/qnx6/namei.c
fs/qnx6/qnx6.h
fs/qnx6/super_mmi.c
fs/ramfs/file-nommu.c
fs/reiserfs/dir.c
fs/reiserfs/do_balan.c
fs/reiserfs/file.c
fs/reiserfs/ibalance.c
fs/reiserfs/inode.c
fs/reiserfs/ioctl.c
fs/reiserfs/item_ops.c
fs/reiserfs/lbalance.c
fs/reiserfs/prints.c
fs/reiserfs/procfs.c
fs/reiserfs/stree.c
fs/reiserfs/super.c
fs/reiserfs/xattr.c
fs/reiserfs/xattr_acl.c
fs/reiserfs/xattr_security.c
fs/reiserfs/xattr_trusted.c
fs/reiserfs/xattr_user.c
fs/romfs/super.c
fs/ufs/Makefile
fs/ufs/inode.c
fs/ufs/super.c
fs/ufs/ufs.h
include/asm-generic/pci-dma-compat.h
include/linux/decompress/bunzip2.h
include/linux/decompress/generic.h
include/linux/decompress/inflate.h
include/linux/decompress/unlz4.h
include/linux/decompress/unlzma.h
include/linux/decompress/unlzo.h
include/linux/decompress/unxz.h
include/linux/efi.h
include/linux/fs.h
include/linux/ioport.h
include/linux/kernel.h
include/linux/kexec.h
include/linux/memcontrol.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/page_cgroup.h
include/linux/rio_drv.h
include/linux/scatterlist.h
include/linux/sched.h
include/linux/shm.h
include/linux/shmem_fs.h
include/linux/swap.h
include/linux/syscalls.h
include/linux/sysctl.h
include/linux/user_namespace.h
include/linux/zlib.h
include/scsi/scsi.h
include/uapi/linux/fcntl.h
include/uapi/linux/kexec.h
include/uapi/linux/memfd.h [new file with mode: 0644]
init/Kconfig
init/do_mounts.c
init/do_mounts_rd.c
init/initramfs.c
init/main.c
ipc/shm.c
kernel/Makefile
kernel/acct.c
kernel/bounds.c
kernel/events/uprobes.c
kernel/exit.c
kernel/fork.c
kernel/gcov/fs.c
kernel/kallsyms.c
kernel/kexec.c
kernel/panic.c
kernel/resource.c
kernel/sys_ni.c
kernel/test_kprobes.c
kernel/user_namespace.c
kernel/watchdog.c
lib/Kconfig
lib/decompress.c
lib/decompress_bunzip2.c
lib/decompress_inflate.c
lib/decompress_unlz4.c
lib/decompress_unlzma.c
lib/decompress_unlzo.c
lib/decompress_unxz.c
lib/idr.c
lib/kfifo.c
lib/rbtree.c
lib/scatterlist.c
mm/filemap.c
mm/huge_memory.c
mm/memcontrol.c
mm/memory.c
mm/migrate.c
mm/mmap.c
mm/nommu.c
mm/rmap.c
mm/shmem.c
mm/slab.c
mm/swap.c
mm/swap_state.c
mm/swapfile.c
mm/truncate.c
mm/util.c
mm/vmscan.c
mm/zswap.c
scripts/.gitignore
scripts/Makefile
scripts/basic/.gitignore
scripts/basic/Makefile
scripts/basic/bin2c.c [new file with mode: 0644]
scripts/bin2c.c [deleted file]
scripts/checkstack.pl
scripts/coccinelle/free/ifnullfree.cocci [new file with mode: 0644]
scripts/tags.sh
tools/testing/selftests/Makefile
tools/testing/selftests/memfd/.gitignore [new file with mode: 0644]
tools/testing/selftests/memfd/Makefile [new file with mode: 0644]
tools/testing/selftests/memfd/fuse_mnt.c [new file with mode: 0644]
tools/testing/selftests/memfd/fuse_test.c [new file with mode: 0644]
tools/testing/selftests/memfd/memfd_test.c [new file with mode: 0644]
tools/testing/selftests/memfd/run_fuse_test.sh [new file with mode: 0644]
tools/testing/selftests/ptrace/peeksiginfo.c

diff --git a/CREDITS b/CREDITS
index a80b66718f66550abee52ea2e13e1058917325c4..bb6278884f894878dc49e1c6722a0cbe3e89e82e 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -1381,6 +1381,9 @@ S: 17 rue Danton
 S: F - 94270 Le Kremlin-Bicêtre
 S: France
 
+N: Jack Hammer
+D: IBM ServeRAID RAID (ips) driver maintenance
+
 N: Greg Hankins
 E: gregh@cc.gatech.edu
 D: fixed keyboard driver to separate LED and locking status
@@ -1691,6 +1694,10 @@ S: Reading
 S: RG6 2NU
 S: United Kingdom
 
+N: Dave Jeffery
+E: dhjeffery@gmail.com
+D: SCSI hacks and IBM ServeRAID RAID driver maintenance
+
 N: Jakub Jelinek
 E: jakub@redhat.com
 W: http://sunsite.mff.cuni.cz/~jj
diff --git a/Documentation/ABI/testing/sysfs-fs-nilfs2 b/Documentation/ABI/testing/sysfs-fs-nilfs2
new file mode 100644 (file)
index 0000000..304ba84
--- /dev/null
@@ -0,0 +1,269 @@
+
+What:          /sys/fs/nilfs2/features/revision
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show current revision of NILFS file system driver.
+               This value informs about file system revision that
+               driver is ready to support.
+
+What:          /sys/fs/nilfs2/features/README
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Describe attributes of /sys/fs/nilfs2/features group.
+
+What:          /sys/fs/nilfs2/<device>/revision
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show NILFS file system revision on volume.
+               This value informs about metadata structures'
+               revision on mounted volume.
+
+What:          /sys/fs/nilfs2/<device>/blocksize
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show volume's block size in bytes.
+
+What:          /sys/fs/nilfs2/<device>/device_size
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show volume size in bytes.
+
+What:          /sys/fs/nilfs2/<device>/free_blocks
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show count of free blocks on volume.
+
+What:          /sys/fs/nilfs2/<device>/uuid
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show volume's UUID (Universally Unique Identifier).
+
+What:          /sys/fs/nilfs2/<device>/volume_name
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show volume's label.
+
+What:          /sys/fs/nilfs2/<device>/README
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Describe attributes of /sys/fs/nilfs2/<device> group.
+
+What:          /sys/fs/nilfs2/<device>/superblock/sb_write_time
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show last write time of super block in human-readable
+               format.
+
+What:          /sys/fs/nilfs2/<device>/superblock/sb_write_time_secs
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show last write time of super block in seconds.
+
+What:          /sys/fs/nilfs2/<device>/superblock/sb_write_count
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show current write count of super block.
+
+What:          /sys/fs/nilfs2/<device>/superblock/sb_update_frequency
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show/Set interval of periodical update of superblock
+               (in seconds).
+
+What:          /sys/fs/nilfs2/<device>/superblock/README
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Describe attributes of /sys/fs/nilfs2/<device>/superblock
+               group.
+
+What:          /sys/fs/nilfs2/<device>/segctor/last_pseg_block
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show start block number of the latest segment.
+
+What:          /sys/fs/nilfs2/<device>/segctor/last_seg_sequence
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show sequence value of the latest segment.
+
+What:          /sys/fs/nilfs2/<device>/segctor/last_seg_checkpoint
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show checkpoint number of the latest segment.
+
+What:          /sys/fs/nilfs2/<device>/segctor/current_seg_sequence
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show segment sequence counter.
+
+What:          /sys/fs/nilfs2/<device>/segctor/current_last_full_seg
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show index number of the latest full segment.
+
+What:          /sys/fs/nilfs2/<device>/segctor/next_full_seg
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show index number of the full segment index
+               to be used next.
+
+What:          /sys/fs/nilfs2/<device>/segctor/next_pseg_offset
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show offset of next partial segment in the current
+               full segment.
+
+What:          /sys/fs/nilfs2/<device>/segctor/next_checkpoint
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show next checkpoint number.
+
+What:          /sys/fs/nilfs2/<device>/segctor/last_seg_write_time
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show write time of the last segment in
+               human-readable format.
+
+What:          /sys/fs/nilfs2/<device>/segctor/last_seg_write_time_secs
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show write time of the last segment in seconds.
+
+What:          /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show write time of the last segment not for cleaner
+               operation in human-readable format.
+
+What:          /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time_secs
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show write time of the last segment not for cleaner
+               operation in seconds.
+
+What:          /sys/fs/nilfs2/<device>/segctor/dirty_data_blocks_count
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show number of dirty data blocks.
+
+What:          /sys/fs/nilfs2/<device>/segctor/README
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Describe attributes of /sys/fs/nilfs2/<device>/segctor
+               group.
+
+What:          /sys/fs/nilfs2/<device>/segments/segments_number
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show number of segments on a volume.
+
+What:          /sys/fs/nilfs2/<device>/segments/blocks_per_segment
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show number of blocks in segment.
+
+What:          /sys/fs/nilfs2/<device>/segments/clean_segments
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show count of clean segments.
+
+What:          /sys/fs/nilfs2/<device>/segments/dirty_segments
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show count of dirty segments.
+
+What:          /sys/fs/nilfs2/<device>/segments/README
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Describe attributes of /sys/fs/nilfs2/<device>/segments
+               group.
+
+What:          /sys/fs/nilfs2/<device>/checkpoints/checkpoints_number
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show number of checkpoints on volume.
+
+What:          /sys/fs/nilfs2/<device>/checkpoints/snapshots_number
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show number of snapshots on volume.
+
+What:          /sys/fs/nilfs2/<device>/checkpoints/last_seg_checkpoint
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show checkpoint number of the latest segment.
+
+What:          /sys/fs/nilfs2/<device>/checkpoints/next_checkpoint
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show next checkpoint number.
+
+What:          /sys/fs/nilfs2/<device>/checkpoints/README
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Describe attributes of /sys/fs/nilfs2/<device>/checkpoints
+               group.
+
+What:          /sys/fs/nilfs2/<device>/mounted_snapshots/README
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Describe content of /sys/fs/nilfs2/<device>/mounted_snapshots
+               group.
+
+What:          /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/inodes_count
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show number of inodes for snapshot.
+
+What:          /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/blocks_count
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Show number of blocks for snapshot.
+
+What:          /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/README
+Date:          April 2014
+Contact:       "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+               Describe attributes of /sys/fs/nilfs2/<device>/mounted_snapshots/<id>
+               group.
index 80ac454704b80903ba20dc4c00785d1f1eed9bf0..8870b02121502430a420e129b61e7d887ee0c762 100644 (file)
@@ -24,64 +24,27 @@ Please note that implementation details can be changed.
 
    a page/swp_entry may be charged (usage += PAGE_SIZE) at
 
-       mem_cgroup_charge_anon()
-         Called at new page fault and Copy-On-Write.
-
-       mem_cgroup_try_charge_swapin()
-         Called at do_swap_page() (page fault on swap entry) and swapoff.
-         Followed by charge-commit-cancel protocol. (With swap accounting)
-         At commit, a charge recorded in swap_cgroup is removed.
-
-       mem_cgroup_charge_file()
-         Called at add_to_page_cache()
-
-       mem_cgroup_cache_charge_swapin()
-         Called at shmem's swapin.
-
-       mem_cgroup_prepare_migration()
-         Called before migration. "extra" charge is done and followed by
-         charge-commit-cancel protocol.
-         At commit, charge against oldpage or newpage will be committed.
+       mem_cgroup_try_charge()
 
 2. Uncharge
   a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
 
-       mem_cgroup_uncharge_page()
-         Called when an anonymous page is fully unmapped. I.e., mapcount goes
-         to 0. If the page is SwapCache, uncharge is delayed until
-         mem_cgroup_uncharge_swapcache().
-
-       mem_cgroup_uncharge_cache_page()
-         Called when a page-cache is deleted from radix-tree. If the page is
-         SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
-
-       mem_cgroup_uncharge_swapcache()
-         Called when SwapCache is removed from radix-tree. The charge itself
-         is moved to swap_cgroup. (If mem+swap controller is disabled, no
-         charge to swap occurs.)
+       mem_cgroup_uncharge()
+         Called when a page's refcount goes down to 0.
 
        mem_cgroup_uncharge_swap()
          Called when swp_entry's refcnt goes down to 0. A charge against swap
          disappears.
 
-       mem_cgroup_end_migration(old, new)
-       At success of migration old is uncharged (if necessary), a charge
-       to new page is committed. At failure, charge to old page is committed.
-
 3. charge-commit-cancel
-       In some case, we can't know this "charge" is valid or not at charging
-       (because of races).
-       To handle such case, there are charge-commit-cancel functions.
-               mem_cgroup_try_charge_XXX
-               mem_cgroup_commit_charge_XXX
-               mem_cgroup_cancel_charge_XXX
-       these are used in swap-in and migration.
+       Memcg pages are charged in two steps:
+               mem_cgroup_try_charge()
+               mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
 
        At try_charge(), there are no flags to say "this page is charged".
        at this point, usage += PAGE_SIZE.
 
-       At commit(), the function checks the page should be charged or not
-       and set flags or avoid charging.(usage -= PAGE_SIZE)
+       At commit(), the page is associated with the memcg.
 
        At cancel(), simply usage -= PAGE_SIZE.
 
@@ -91,18 +54,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
        Anonymous page is newly allocated at
                  - page fault into MAP_ANONYMOUS mapping.
                  - Copy-On-Write.
-       It is charged right after it's allocated before doing any page table
-       related operations. Of course, it's uncharged when another page is used
-       for the fault address.
-
-       At freeing anonymous page (by exit() or munmap()), zap_pte() is called
-       and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
-       are done at page_remove_rmap() when page_mapcount() goes down to 0.
-
-       Another page freeing is by page-reclaim (vmscan.c) and anonymous
-       pages are swapped out. In this case, the page is marked as
-       PageSwapCache(). uncharge() routine doesn't uncharge the page marked
-       as SwapCache(). It's delayed until __delete_from_swap_cache().
 
        4.1 Swap-in.
        At swap-in, the page is taken from swap-cache. There are 2 cases.
@@ -111,41 +62,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
        (b) If the SwapCache has been mapped by processes, it has been
            charged already.
 
-       This swap-in is one of the most complicated work. In do_swap_page(),
-       following events occur when pte is unchanged.
-
-       (1) the page (SwapCache) is looked up.
-       (2) lock_page()
-       (3) try_charge_swapin()
-       (4) reuse_swap_page() (may call delete_swap_cache())
-       (5) commit_charge_swapin()
-       (6) swap_free().
-
-       Considering following situation for example.
-
-       (A) The page has not been charged before (2) and reuse_swap_page()
-           doesn't call delete_from_swap_cache().
-       (B) The page has not been charged before (2) and reuse_swap_page()
-           calls delete_from_swap_cache().
-       (C) The page has been charged before (2) and reuse_swap_page() doesn't
-           call delete_from_swap_cache().
-       (D) The page has been charged before (2) and reuse_swap_page() calls
-           delete_from_swap_cache().
-
-           memory.usage/memsw.usage changes to this page/swp_entry will be
-        Case          (A)      (B)       (C)     (D)
-         Event
-       Before (2)     0/ 1     0/ 1      1/ 1    1/ 1
-          ===========================================
-          (3)        +1/+1    +1/+1     +1/+1   +1/+1
-          (4)          -       0/ 0       -     -1/ 0
-          (5)         0/-1     0/ 0     -1/-1    0/ 0
-          (6)          -       0/-1       -      0/-1
-          ===========================================
-       Result         1/ 1     1/ 1      1/ 1    1/ 1
-
-       In any cases, charges to this page should be 1/ 1.
-
        4.2 Swap-out.
        At swap-out, typical state transition is below.
 
@@ -158,28 +74,20 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
            swp_entry's refcnt -= 1.
 
 
-       At (b), the page is marked as SwapCache and not uncharged.
-       At (d), the page is removed from SwapCache and a charge in page_cgroup
-       is moved to swap_cgroup.
-
        Finally, at task exit,
        (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
-       Here, a charge in swap_cgroup disappears.
 
 5. Page Cache
        Page Cache is charged at
        - add_to_page_cache_locked().
 
-       uncharged at
-       - __remove_from_page_cache().
-
        The logic is very clear. (About migration, see below)
        Note: __remove_from_page_cache() is called by remove_from_page_cache()
        and __remove_mapping().
 
 6. Shmem(tmpfs) Page Cache
-       Memcg's charge/uncharge have special handlers of shmem. The best way
-       to understand shmem's page state transition is to read mm/shmem.c.
+       The best way to understand shmem's page state transition is to read
+       mm/shmem.c.
        But brief explanation of the behavior of memcg around shmem will be
        helpful to understand the logic.
 
@@ -192,56 +100,10 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
        It's charged when...
        - A new page is added to shmem's radix-tree.
        - A swp page is read. (move a charge from swap_cgroup to page_cgroup)
-       It's uncharged when
-       - A page is removed from radix-tree and not SwapCache.
-       - When SwapCache is removed, a charge is moved to swap_cgroup.
-       - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
-         disappears.
 
 7. Page Migration
-       One of the most complicated functions is page-migration-handler.
-       Memcg has 2 routines. Assume that we are migrating a page's contents
-       from OLDPAGE to NEWPAGE.
-
-       Usual migration logic is..
-       (a) remove the page from LRU.
-       (b) allocate NEWPAGE (migration target)
-       (c) lock by lock_page().
-       (d) unmap all mappings.
-       (e-1) If necessary, replace entry in radix-tree.
-       (e-2) move contents of a page.
-       (f) map all mappings again.
-       (g) pushback the page to LRU.
-       (-) OLDPAGE will be freed.
-
-       Before (g), memcg should complete all necessary charge/uncharge to
-       NEWPAGE/OLDPAGE.
-
-       The point is....
-       - If OLDPAGE is anonymous, all charges will be dropped at (d) because
-          try_to_unmap() drops all mapcount and the page will not be
-         SwapCache.
-
-       - If OLDPAGE is SwapCache, charges will be kept at (g) because
-         __delete_from_swap_cache() isn't called at (e-1)
-
-       - If OLDPAGE is page-cache, charges will be kept at (g) because
-         remove_from_swap_cache() isn't called at (e-1)
-
-       memcg provides following hooks.
-
-       - mem_cgroup_prepare_migration(OLDPAGE)
-         Called after (b) to account a charge (usage += PAGE_SIZE) against
-         memcg which OLDPAGE belongs to.
-
-        - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
-         Called after (f) before (g).
-         If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
-         charged, a charge by prepare_migration() is automatically canceled.
-         If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
-
-         But zap_pte() (by exit or munmap) can be called while migration,
-         we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
+
+       mem_cgroup_migrate()
 
 8. LRU
         Each memcg has its own private LRU. Now, its handling is under global
index 37803eb5521efb1b78ed6f0c9cf9d6865f4b1efe..6af570ec53b4b5b9ea96bcc42a722007265d96aa 100644 (file)
@@ -70,6 +70,7 @@ nuvoton,npct501               i2c trusted platform module (TPM)
 nxp,pca9556            Octal SMBus and I2C registered interface
 nxp,pca9557            8-bit I2C-bus and SMBus I/O port with reset
 nxp,pcf8563            Real-time clock/calendar
+nxp,pcf85063           Tiny Real-Time Clock
 ovti,ov5642            OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI and Embedded TrueFocus
 pericom,pt7c4338       Real-time Clock Module
 plx,pex8648            48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch
index e3155995ddd878c59d60d3519f2addaac716a743..beefb9f82902569bc120797108dbfc12d01faa97 100644 (file)
@@ -268,6 +268,8 @@ characters, each representing a particular tainted value.
  14: 'E' if an unsigned module has been loaded in a kernel supporting
      module signature.
 
+ 15: 'L' if a soft lockup has previously occurred on the system.
+
 The primary reason for the 'Tainted: ' string is to tell kernel
 debuggers if this is a clean kernel or if anything unusual has
 occurred.  Tainting is permanent: even if an offending module is
index 335f3c6087dcb35d5285bed3d766bba8f9bc4470..626052f403bb3300899659eb86676f62a9702618 100644 (file)
@@ -20,13 +20,26 @@ II. Known problems
 
   None.
 
-III. To do
+III. DMA Engine Support
 
- Add DMA data transfers (non-messaging).
- Add inbound region (SRIO-to-PCIe) mapping.
+Tsi721 mport driver supports DMA data transfers between local system memory and
+remote RapidIO devices. This functionality is implemented according to SLAVE
+mode API defined by common Linux kernel DMA Engine framework.
+
+Depending on system requirements RapidIO DMA operations can be included/excluded
+by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven
+out of eight available BDMA channels to support DMA data transfers.
+One BDMA channel is reserved for generation of maintenance read/write requests.
+
+If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
+this driver will accept DMA-specific module parameter:
+  "dma_desc_per_channel" - defines number of hardware buffer descriptors used by
+                           each BDMA channel of Tsi721 (by default - 128).
 
 IV. Version History
 
+  1.1.0 - DMA operations re-worked to support data scatter/gather lists larger
+          than hardware buffer descriptors ring.
   1.0.0 - Initial driver release.
 
 V.  License
index c14374e71775f62ba1ed72a856a702cb1d819e47..f79eb96663790f1c116f6a44d706af048bdedc88 100644 (file)
@@ -826,6 +826,7 @@ can be ORed together:
 4096 - An out-of-tree module has been loaded.
 8192 - An unsigned module has been loaded in a kernel supporting module
        signature.
+16384 - A soft lockup has previously occurred on the system.
 
 ==============================================================
 
index e065c38816266d50c4011676402b722b68436e32..30873e781dfa9584d22c002ef0f41aa5e571cb75 100644 (file)
@@ -597,7 +597,7 @@ AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER
 M:     Thomas Dahlmann <dahlmann.thomas@arcor.de>
 L:     linux-geode@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
-F:     drivers/usb/gadget/amd5536udc.*
+F:     drivers/usb/gadget/udc/amd5536udc.*
 
 AMD GEODE PROCESSOR/CHIPSET SUPPORT
 P:     Andres Salomon <dilinger@queued.net>
@@ -621,7 +621,7 @@ AMD MICROCODE UPDATE SUPPORT
 M:     Andreas Herrmann <herrmann.der.user@googlemail.com>
 L:     amd64-microcode@amd64.org
 S:     Maintained
-F:     arch/x86/kernel/microcode_amd.c
+F:     arch/x86/kernel/cpu/microcode/amd*
 
 AMD XGBE DRIVER
 M:     Tom Lendacky <thomas.lendacky@amd.com>
@@ -911,7 +911,7 @@ L:  linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
 S:     Maintained
 F:     arch/arm/mach-prima2/
-F:     drivers/clk/clk-prima2.c
+F:     drivers/clk/sirf/
 F:     drivers/clocksource/timer-prima2.c
 F:     drivers/clocksource/timer-marco.c
 N:     [^a-z]sirf
@@ -1164,6 +1164,7 @@ M:        Linus Walleij <linus.walleij@linaro.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-nomadik/
+F:     drivers/pinctrl/nomadik/
 F:     drivers/i2c/busses/i2c-nomadik.c
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
 
@@ -1185,8 +1186,7 @@ F:        drivers/mmc/host/msm_sdcc.h
 F:     drivers/tty/serial/msm_serial.h
 F:     drivers/tty/serial/msm_serial.c
 F:     drivers/*/pm8???-*
-F:     drivers/mfd/ssbi/
-F:     include/linux/mfd/pm8xxx/
+F:     drivers/mfd/ssbi.c
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davidb/linux-msm.git
 S:     Maintained
 
@@ -1443,7 +1443,8 @@ F:        drivers/mfd/abx500*
 F:     drivers/mfd/ab8500*
 F:     drivers/mfd/dbx500*
 F:     drivers/mfd/db8500*
-F:     drivers/pinctrl/pinctrl-nomadik*
+F:     drivers/pinctrl/nomadik/pinctrl-ab*
+F:     drivers/pinctrl/nomadik/pinctrl-nomadik*
 F:     drivers/rtc/rtc-ab8500.c
 F:     drivers/rtc/rtc-pl031.c
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
@@ -1699,7 +1700,7 @@ ATMEL USBA UDC DRIVER
 M:     Nicolas Ferre <nicolas.ferre@atmel.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
-F:     drivers/usb/gadget/atmel_usba_udc.*
+F:     drivers/usb/gadget/udc/atmel_usba_udc.*
 
 ATMEL WIRELESS DRIVER
 M:     Simon Kelley <simon@thekelleys.org.uk>
@@ -1991,7 +1992,7 @@ F:        arch/arm/boot/dts/bcm113*
 F:     arch/arm/boot/dts/bcm216*
 F:     arch/arm/boot/dts/bcm281*
 F:     arch/arm/configs/bcm_defconfig
-F:     drivers/mmc/host/sdhci_bcm_kona.c
+F:     drivers/mmc/host/sdhci-bcm-kona.c
 F:     drivers/clocksource/bcm_kona_timer.c
 
 BROADCOM BCM2835 ARM ARCHICTURE
@@ -2341,12 +2342,6 @@ L:       netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/ethernet/cirrus/ep93xx_eth.c
 
-CIRRUS LOGIC EP93XX OHCI USB HOST DRIVER
-M:     Lennert Buytenhek <kernel@wantstofly.org>
-L:     linux-usb@vger.kernel.org
-S:     Maintained
-F:     drivers/usb/host/ohci-ep93xx.c
-
 CIRRUS LOGIC AUDIO CODEC DRIVERS
 M:     Brian Austin <brian.austin@cirrus.com>
 M:     Paul Handrigan <Paul.Handrigan@cirrus.com>
@@ -2431,7 +2426,7 @@ W:        http://linux-cifs.samba.org/
 Q:     http://patchwork.ozlabs.org/project/linux-cifs-client/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git
 S:     Supported
-F:     Documentation/filesystems/cifs.txt
+F:     Documentation/filesystems/cifs/
 F:     fs/cifs/
 
 COMPACTPCI HOTPLUG CORE
@@ -2966,7 +2961,9 @@ L:        linux-media@vger.kernel.org
 L:     dri-devel@lists.freedesktop.org
 L:     linaro-mm-sig@lists.linaro.org
 F:     drivers/dma-buf/
-F:     include/linux/dma-buf* include/linux/reservation.h include/linux/*fence.h
+F:     include/linux/dma-buf*
+F:     include/linux/reservation.h
+F:     include/linux/*fence.h
 F:     Documentation/dma-buf-sharing.txt
 T:     git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
 
@@ -3061,7 +3058,6 @@ L:        dri-devel@lists.freedesktop.org
 T:     git git://people.freedesktop.org/~agd5f/linux
 S:     Supported
 F:     drivers/gpu/drm/radeon/
-F:     include/drm/radeon*
 F:     include/uapi/drm/radeon*
 
 DRM PANEL DRIVERS
@@ -3255,26 +3251,12 @@ T:      git git://linuxtv.org/anttip/media_tree.git
 S:     Maintained
 F:     drivers/media/tuners/e4000*
 
-EATA-DMA SCSI DRIVER
-M:     Michael Neuffer <mike@i-Connect.Net>
-L:     linux-eata@i-connect.net
-L:     linux-scsi@vger.kernel.org
-S:     Maintained
-F:     drivers/scsi/eata*
-
 EATA ISA/EISA/PCI SCSI DRIVER
 M:     Dario Ballabio <ballabio_dario@emc.com>
 L:     linux-scsi@vger.kernel.org
 S:     Maintained
 F:     drivers/scsi/eata.c
 
-EATA-PIO SCSI DRIVER
-M:     Michael Neuffer <mike@i-Connect.Net>
-L:     linux-eata@i-connect.net
-L:     linux-scsi@vger.kernel.org
-S:     Maintained
-F:     drivers/scsi/eata_pio.*
-
 EC100 MEDIA DRIVER
 M:     Antti Palosaari <crope@iki.fi>
 L:     linux-media@vger.kernel.org
@@ -3449,7 +3431,7 @@ M:        Matt Fleming <matt.fleming@intel.com>
 L:     linux-efi@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
 S:     Maintained
-F:     Documentation/x86/efi-stub.txt
+F:     Documentation/efi-stub.txt
 F:     arch/ia64/kernel/efi.c
 F:     arch/x86/boot/compressed/eboot.[ch]
 F:     arch/x86/include/asm/efi.h
@@ -3836,7 +3818,7 @@ M:        Li Yang <leoli@freescale.com>
 L:     linux-usb@vger.kernel.org
 L:     linuxppc-dev@lists.ozlabs.org
 S:     Maintained
-F:     drivers/usb/gadget/fsl*
+F:     drivers/usb/gadget/udc/fsl*
 
 FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
 M:     Li Yang <leoli@freescale.com>
@@ -4525,10 +4507,7 @@ S:       Supported
 F:     drivers/scsi/ibmvscsi/ibmvfc*
 
 IBM ServeRAID RAID DRIVER
-P:     Jack Hammer
-M:     Dave Jeffery <ipslinux@adaptec.com>
-W:     http://www.developer.ibm.com/welcome/netfinity/serveraid.html
-S:     Supported
+S:     Orphan
 F:     drivers/scsi/ips.*
 
 ICH LPC AND GPIO DRIVER
@@ -4725,8 +4704,8 @@ F:        drivers/platform/x86/intel_menlow.c
 INTEL IA32 MICROCODE UPDATE SUPPORT
 M:     Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
 S:     Maintained
-F:     arch/x86/kernel/microcode_core.c
-F:     arch/x86/kernel/microcode_intel.c
+F:     arch/x86/kernel/cpu/microcode/core*
+F:     arch/x86/kernel/cpu/microcode/intel*
 
 INTEL I/OAT DMA DRIVER
 M:     Dan Williams <dan.j.williams@intel.com>
@@ -5185,7 +5164,6 @@ L:        linux-nfs@vger.kernel.org
 W:     http://nfs.sourceforge.net/
 S:     Supported
 F:     fs/nfsd/
-F:     include/linux/nfsd/
 F:     include/uapi/linux/nfsd/
 F:     fs/lockd/
 F:     fs/nfs_common/
@@ -5906,7 +5884,6 @@ F:        drivers/clocksource/metag_generic.c
 F:     drivers/irqchip/irq-metag.c
 F:     drivers/irqchip/irq-metag-ext.c
 F:     drivers/tty/metag_da.c
-F:     fs/imgdafs/
 
 MICROBLAZE ARCHITECTURE
 M:     Michal Simek <monstr@monstr.eu>
@@ -6997,9 +6974,9 @@ M:        Jamie Iles <jamie@jamieiles.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/jamieiles/linux-2.6-ji.git
 S:     Supported
+F:     arch/arm/boot/dts/picoxcell*
 F:     arch/arm/mach-picoxcell/
-F:     drivers/*/picoxcell*
-F:     drivers/*/*/picoxcell*
+F:     drivers/crypto/picoxcell*
 
 PIN CONTROL SUBSYSTEM
 M:     Linus Walleij <linus.walleij@linaro.org>
@@ -7224,7 +7201,7 @@ F:        drivers/ptp/*
 F:     include/linux/ptp_cl*
 
 PTRACE SUPPORT
-M:     Roland McGrath <roland@redhat.com>
+M:     Roland McGrath <roland@hack.frob.com>
 M:     Oleg Nesterov <oleg@redhat.com>
 S:     Maintained
 F:     include/asm-generic/syscall.h
@@ -7274,7 +7251,7 @@ S:        Maintained
 F:     arch/arm/mach-pxa/
 F:     drivers/pcmcia/pxa2xx*
 F:     drivers/spi/spi-pxa2xx*
-F:     drivers/usb/gadget/pxa2*
+F:     drivers/usb/gadget/udc/pxa2*
 F:     include/sound/pxa2xx-lib.h
 F:     sound/arm/pxa*
 F:     sound/soc/pxa/
@@ -7283,7 +7260,7 @@ PXA3xx NAND FLASH DRIVER
 M:     Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
 L:     linux-mtd@lists.infradead.org
 S:     Maintained
-F:     drivers/mtd/nand/pxa3xx-nand.c
+F:     drivers/mtd/nand/pxa3xx_nand.c
 
 MMP SUPPORT
 M:     Eric Miao <eric.y.miao@gmail.com>
@@ -9628,8 +9605,8 @@ USB WEBCAM GADGET
 M:     Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:     linux-usb@vger.kernel.org
 S:     Maintained
-F:     drivers/usb/gadget/*uvc*.c
-F:     drivers/usb/gadget/webcam.c
+F:     drivers/usb/gadget/function/*uvc*.c
+F:     drivers/usb/gadget/legacy/webcam.c
 
 USB WIRELESS RNDIS DRIVER (rndis_wlan)
 M:     Jussi Kivilinna <jussi.kivilinna@iki.fi>
index 96e54bed50889e4fe62c71ac6125ac6fe0dad5f2..e858aa0ad8af3cc4008b464914c264acd39e3980 100644 (file)
@@ -6,4 +6,5 @@ generic-y += exec.h
 generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/scatterlist.h b/arch/alpha/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 017d747..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ALPHA_SCATTERLIST_H
-#define _ALPHA_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(_ALPHA_SCATTERLIST_H) */
index 916cedbd7a674f7df326e43d29fc98bd95bf4d41..c49a775937db39912411a33be3d63a5d31eacfba 100644 (file)
@@ -83,6 +83,7 @@ config ARM
          <http://www.arm.linux.org.uk/>.
 
 config ARM_HAS_SG_CHAIN
+       select ARCH_HAS_SG_CHAIN
        bool
 
 config NEED_SG_DMA_LENGTH
@@ -1982,6 +1983,8 @@ config XIP_PHYS_ADDR
 config KEXEC
        bool "Kexec system call (EXPERIMENTAL)"
        depends on (!SMP || PM_SLEEP_SMP)
+       select CRYPTO
+       select CRYPTO_SHA256
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
index f5a35760198314e150c752290facc879b3c068e6..70cd84eb7fda0c294c004d83841462bb4df799a5 100644 (file)
@@ -22,6 +22,7 @@ generic-y += poll.h
 generic-y += preempt.h
 generic-y += resource.h
 generic-y += rwsem.h
+generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += sembuf.h
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
deleted file mode 100644 (file)
index cefdb8f..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASMARM_SCATTERLIST_H
-#define _ASMARM_SCATTERLIST_H
-
-#ifdef CONFIG_ARM_HAS_SG_CHAIN
-#define ARCH_HAS_SG_CHAIN
-#endif
-
-#include <asm/memory.h>
-#include <asm/types.h>
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASMARM_SCATTERLIST_H */
index 7da48bc42bbfbb2007ac244a594c54299a2eb13d..70b904c010c682b54f59df863f6cbf900c5072ee 100644 (file)
@@ -336,7 +336,7 @@ static int __init early_touchbook_revision(char *p)
        if (!p)
                return 0;
 
-       return strict_strtoul(p, 10, &touchbook_revision);
+       return kstrtoul(p, 10, &touchbook_revision);
 }
 early_param("tbr", early_touchbook_revision);
 
index f62f7537d899f0a0ed4da259445f488a41bc3c39..ac8a249779f222512e05c30b6a35ad6cdc344260 100644 (file)
@@ -681,29 +681,19 @@ static ssize_t omap_mux_dbg_signal_write(struct file *file,
                                         const char __user *user_buf,
                                         size_t count, loff_t *ppos)
 {
-       char buf[OMAP_MUX_MAX_ARG_CHAR];
        struct seq_file *seqf;
        struct omap_mux *m;
-       unsigned long val;
-       int buf_size, ret;
+       u16 val;
+       int ret;
        struct omap_mux_partition *partition;
 
        if (count > OMAP_MUX_MAX_ARG_CHAR)
                return -EINVAL;
 
-       memset(buf, 0, sizeof(buf));
-       buf_size = min(count, sizeof(buf) - 1);
-
-       if (copy_from_user(buf, user_buf, buf_size))
-               return -EFAULT;
-
-       ret = strict_strtoul(buf, 0x10, &val);
+       ret = kstrtou16_from_user(user_buf, count, 0x10, &val);
        if (ret < 0)
                return ret;
 
-       if (val > 0xffff)
-               return -EINVAL;
-
        seqf = file->private_data;
        m = seqf->private;
 
@@ -711,7 +701,7 @@ static ssize_t omap_mux_dbg_signal_write(struct file *file,
        if (!partition)
                return -ENODEV;
 
-       omap_mux_write(partition, (u16)val, m->reg_offset);
+       omap_mux_write(partition, val, m->reg_offset);
        *ppos += count;
 
        return count;
@@ -917,14 +907,14 @@ static void __init omap_mux_set_cmdline_signals(void)
 
        while ((token = strsep(&next_opt, ",")) != NULL) {
                char *keyval, *name;
-               unsigned long val;
+               u16 val;
 
                keyval = token;
                name = strsep(&keyval, "=");
                if (name) {
                        int res;
 
-                       res = strict_strtoul(keyval, 0x10, &val);
+                       res = kstrtou16(keyval, 0x10, &val);
                        if (res < 0)
                                continue;
 
index 43596e0ed0515597ee786da610eaf22683e8f3f8..d897292712ebe722df54463a3db9a0bb04d36dc1 100644 (file)
@@ -90,7 +90,7 @@ int __init parse_balloon3_features(char *arg)
        if (!arg)
                return 0;
 
-       return strict_strtoul(arg, 0, &balloon3_features_present);
+       return kstrtoul(arg, 0, &balloon3_features_present);
 }
 early_param("balloon3_features", parse_balloon3_features);
 
index 41f27f667ca89ccebdf0bd9b54b63f34922eb60a..de3b08073fe7604ececdd1e5dc259e0244a53201 100644 (file)
@@ -769,7 +769,7 @@ static unsigned long viper_tpm;
 
 static int __init viper_tpm_setup(char *str)
 {
-       return strict_strtoul(str, 10, &viper_tpm) >= 0;
+       return kstrtoul(str, 10, &viper_tpm) >= 0;
 }
 
 __setup("tpm=", viper_tpm_setup);
index e647b47244a947f0dff96d1d991af7e50908e5e6..7804d3c6991b769eb68aa630bd495bbd53c7e820 100644 (file)
@@ -242,7 +242,7 @@ static int __init jive_mtdset(char *options)
        if (options == NULL || options[0] == '\0')
                return 0;
 
-       if (strict_strtoul(options, 10, &set)) {
+       if (kstrtoul(options, 10, &set)) {
                printk(KERN_ERR "failed to parse mtdset=%s\n", options);
                return 0;
        }
index b1eabaad50a5a2358dae8e98ffbd1d9f187aa73c..213230ee57d13cf19c2e6617eb5efd3bed1b1d45 100644 (file)
@@ -178,7 +178,8 @@ static int __init nuc900_set_cpufreq(char *str)
        if (!*str)
                return 0;
 
-       strict_strtoul(str, 0, &cpufreq);
+       if (kstrtoul(str, 0, &cpufreq))
+               return 0;
 
        nuc900_clock_source(NULL, "ext");
 
index b0f9c9db95903f1368540a7678d2294a6dd773c5..fd4e81a4e1cee5fc4e564ad60f8b48d15b3c5f71 100644 (file)
@@ -1,6 +1,7 @@
 config ARM64
        def_bool y
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+       select ARCH_HAS_SG_CHAIN
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_USE_CMPXCHG_LOCKREF
        select ARCH_SUPPORTS_ATOMIC_RMW
index 7a3f462133b0b83c57dd5de0fde19931b73ad877..22b16232bd609b7a916c4214df0e53819abe82fb 100644 (file)
@@ -28,9 +28,6 @@
 #define PAGE_SIZE              (_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK              (~(PAGE_SIZE-1))
 
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA          1
-
 /*
  * The idmap and swapper page tables need some space reserved in the kernel
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
index a81a446a578628ea63e96405f7cbfb6eb6434151..32aeea083d93b2391122ad9d1c49c3925121e38b 100644 (file)
@@ -194,25 +194,6 @@ up_fail:
        return PTR_ERR(ret);
 }
 
-/*
- * We define AT_SYSINFO_EHDR, so we need these function stubs to keep
- * Linux happy.
- */
-int in_gate_area_no_mm(unsigned long addr)
-{
-       return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-       return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return NULL;
-}
-
 /*
  * Update the vDSO data page to keep in sync with kernel timekeeping.
  */
index afff5105909d953af7ca3667c773350fb4bc434d..31742dfadff903d0dd7a953e79f4c8cd2d780379 100644 (file)
@@ -13,6 +13,7 @@ generic-y += linkage.h
 generic-y += mcs_spinlock.h
 generic-y += module.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
 generic-y += vga.h
 generic-y += xor.h
diff --git a/arch/cris/include/asm/scatterlist.h b/arch/cris/include/asm/scatterlist.h
deleted file mode 100644 (file)
index f11f8f4..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_CRIS_SCATTERLIST_H
-#define __ASM_CRIS_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(__ASM_CRIS_SCATTERLIST_H) */
index 87b95eb8aee53e3f5280f3b985da8b0c6cc75192..5b73921b6e9d32f125366a07f0f1270b0d46e260 100644 (file)
@@ -5,4 +5,5 @@ generic-y += exec.h
 generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/scatterlist.h b/arch/frv/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 0e5eb30..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !_ASM_SCATTERLIST_H */
index 44a6915ab13d474d53457e0d8c1a62838974b7ac..64aefb76bd69054a2f5d9f7627a950347a959d82 100644 (file)
@@ -28,6 +28,7 @@ config IA64
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_VIRT_CPU_ACCOUNTING
+       select ARCH_HAS_SG_CHAIN
        select VIRT_TO_BUS
        select ARCH_DISCARD_MEMBLOCK
        select GENERIC_IRQ_PROBE
@@ -548,6 +549,8 @@ source "drivers/sn/Kconfig"
 config KEXEC
        bool "kexec system call"
        depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+       select CRYPTO
+       select CRYPTO_SHA256
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
index 0da4aa2602ae01d9badaca0f960e8d6b4c16ae2b..e8317d2d6c8d4462fe2536e5863ba628e3e44194 100644 (file)
@@ -5,5 +5,6 @@ generic-y += hash.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
 generic-y += vtime.h
index f1e1b2e3cdb3890cd8b35b0224eacbbafa94fe01..1f1bf144fe62b165cd7854c19c30717a9026d97d 100644 (file)
@@ -231,4 +231,6 @@ get_order (unsigned long size)
 #define PERCPU_ADDR            (-PERCPU_PAGE_SIZE)
 #define LOAD_OFFSET            (KERNEL_START - KERNEL_TR_PAGE_SIZE)
 
+#define __HAVE_ARCH_GATE_AREA  1
+
 #endif /* _ASM_IA64_PAGE_H */
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 08fd93b..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_IA64_SCATTERLIST_H
-#define _ASM_IA64_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_IA64_SCATTERLIST_H */
index 3e71ef85e4397f58a5bf392d8217088eab4eccd7..9a0104a38cd37ef3e1a4a1537cfe95c785d45dbf 100644 (file)
@@ -384,21 +384,6 @@ static struct irqaction timer_irqaction = {
        .name =         "timer"
 };
 
-static struct platform_device rtc_efi_dev = {
-       .name = "rtc-efi",
-       .id = -1,
-};
-
-static int __init rtc_init(void)
-{
-       if (platform_device_register(&rtc_efi_dev) < 0)
-               printk(KERN_ERR "unable to register rtc device...\n");
-
-       /* not necessarily an error */
-       return 0;
-}
-module_init(rtc_init);
-
 void read_persistent_clock(struct timespec *ts)
 {
        efi_gettimeofday(ts);
index 892d43e32f3b5995936a24d91b4229cb38e78afb..6b3345758d3e3298ed8ee23b592d21adda88034d 100644 (file)
@@ -278,6 +278,37 @@ setup_gate (void)
        ia64_patch_gate();
 }
 
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+       gate_vma.vm_mm = NULL;
+       gate_vma.vm_start = FIXADDR_USER_START;
+       gate_vma.vm_end = FIXADDR_USER_END;
+       gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+       gate_vma.vm_page_prot = __P101;
+
+       return 0;
+}
+__initcall(gate_vma_init);
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+       return &gate_vma;
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+       if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
+               return 1;
+       return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+       return in_gate_area_no_mm(addr);
+}
+
 void ia64_mmu_init(void *my_cpu_data)
 {
        unsigned long pta, impl_va_bits;
index 67779a74b62dbbe2e5841253acf145867b67c9ae..accc10a3dc78f09bd7dab56a0907ed1be2925da9 100644 (file)
@@ -6,4 +6,5 @@ generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += module.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
diff --git a/arch/m32r/include/asm/scatterlist.h b/arch/m32r/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 7370b8b..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_M32R_SCATTERLIST_H
-#define _ASM_M32R_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_M32R_SCATTERLIST_H */
index 87b7c7581b1dd5777642a3b87dd9002b7e68dc50..3ff8c9a25335b20eb4d3f7dfab3513fe584abf39 100644 (file)
@@ -91,6 +91,8 @@ config MMU_SUN3
 config KEXEC
        bool "kexec system call"
        depends on M68KCLASSIC
+       select CRYPTO
+       select CRYPTO_SHA256
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
index 35b3ecaf25d542a58bd3ee4ddb99d88e96f63f21..27a3acda6c1904a05167b96efb006dcdd56f83ce 100644 (file)
@@ -7,5 +7,6 @@ generic-y += exec.h
 generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += syscalls.h
 generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/scatterlist.h b/arch/microblaze/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 35d786f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/scatterlist.h>
index 900c7e5333b650666c374a8e378129b1c7eb43f1..df51e78a72cc1b045e5f2e3146be63a5a3ad1790 100644 (file)
@@ -2396,6 +2396,8 @@ source "kernel/Kconfig.preempt"
 
 config KEXEC
        bool "Kexec system call"
+       select CRYPTO
+       select CRYPTO_SHA256
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
index 654d5ba6e31077354f130f22a198c7b6adb45743..ecbd6676bd338c70a5ad9d1d2ece7b28f996dc21 100644 (file)
@@ -6,4 +6,5 @@ generic-y += exec.h
 generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/scatterlist.h b/arch/mn10300/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 7baa400..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 Scatterlist definitions
- *
- * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCATTERLIST_H */
index 80b94b0add1f494e600db71170a966c2271c5e58..a577609f8ed60229ed4d3fc26931bc23f955c599 100644 (file)
@@ -111,6 +111,7 @@ config PPC
        select HAVE_DMA_API_DEBUG
        select HAVE_OPROFILE
        select HAVE_DEBUG_KMEMLEAK
+       select ARCH_HAS_SG_CHAIN
        select GENERIC_ATOMIC64 if PPC32
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
        select HAVE_PERF_EVENTS
@@ -398,6 +399,8 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
 config KEXEC
        bool "kexec system call"
        depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
+       select CRYPTO
+       select CRYPTO_SHA256
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
index 3fb1bc432f4f6106f1a892ce02113ec68970af6a..7f23f162ce9c0c40a9e29eebba12cdd1db4c6704 100644 (file)
@@ -4,5 +4,6 @@ generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
 generic-y += vtime.h
index 32e4e212b9c1a3c14e941d211dd5f5d2e2cf6aba..26fe1ae1521255b514d267acc0010833b5e93e8a 100644 (file)
@@ -48,9 +48,6 @@ extern unsigned int HPAGE_SHIFT;
 #define HUGE_MAX_HSTATE                (MMU_PAGE_COUNT-1)
 #endif
 
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA          1
-
 /*
  * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
  * assign PAGE_MASK to a larger type it gets extended the way we want
diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
deleted file mode 100644 (file)
index de1f620..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_POWERPC_SCATTERLIST_H
-#define _ASM_POWERPC_SCATTERLIST_H
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/dma.h>
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_POWERPC_SCATTERLIST_H */
index d0225572faa19ac74b429c7806be5645277fb562..75d62d63fe684ad77b33efac9ac2a13d59222ac7 100644 (file)
@@ -149,13 +149,13 @@ static void check_smt_enabled(void)
                else if (!strcmp(smt_enabled_cmdline, "off"))
                        smt_enabled_at_boot = 0;
                else {
-                       long smt;
+                       int smt;
                        int rc;
 
-                       rc = strict_strtol(smt_enabled_cmdline, 10, &smt);
+                       rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
                        if (!rc)
                                smt_enabled_at_boot =
-                                       min(threads_per_core, (int)smt);
+                                       min(threads_per_core, smt);
                }
        } else {
                dn = of_find_node_by_path("/options");
index ce74c335a6a405b03e8400a72b74e1fc7227b56f..f174351842cf449cfcdcc9117ec183e951542af7 100644 (file)
@@ -840,19 +840,3 @@ static int __init vdso_init(void)
        return 0;
 }
 arch_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-       return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-       return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return NULL;
-}
-
index 904c66128faeb7d31a8463532929d187254da7ed..5bfdab9047be2577443a77034e718ae56253546e 100644 (file)
@@ -977,7 +977,7 @@ static ssize_t viodev_cmo_desired_set(struct device *dev,
        size_t new_desired;
        int ret;
 
-       ret = strict_strtoul(buf, 10, &new_desired);
+       ret = kstrtoul(buf, 10, &new_desired);
        if (ret)
                return ret;
 
index 7b6c10750179d163ff98e091d8e5dac863f2a2a0..d85e86aac7fbeff7d4f0b248bd443d5ba5071df2 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/export.h>
 
 #include <asm/tlbflush.h>
+#include <asm/dma.h>
 
 #include "mmu_decl.h"
 
index 534574a97ec906d72e9f15fde252dac5a36ca514..3a104284b338f700d1ec56478dcb679859db514f 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/time.h>
 #include <asm/uic.h>
 #include <asm/ppc4xx.h>
+#include <asm/dma.h>
 
 
 static __initdata struct of_device_id warp_of_bus[] = {
index 6e19b0ad5d266e344de46149af1505de736e5c29..3feffde9128d1109b1a8e6623a43b6c5e24c8305 100644 (file)
@@ -13,6 +13,7 @@
 #include <generated/utsrelease.h>
 #include <linux/pci.h>
 #include <linux/of.h>
+#include <asm/dma.h>
 #include <asm/prom.h>
 #include <asm/time.h>
 #include <asm/machdep.h>
index 03aabc0e16ac2f212ec6d5ae6a78909333e606c6..2fe12046279ef53483f5b0c1659123dad1d7a0e4 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/i8259.h>
 #include <asm/time.h>
 #include <asm/udbg.h>
+#include <asm/dma.h>
 
 extern void __flush_disable_L1(void);
 
index 2d0b4d68a40a076f970fd458ab09d06785a43ff2..a2450b8a50a5eaaf3bbab9fc402bbf8997c00ed4 100644 (file)
@@ -400,10 +400,10 @@ out:
 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 {
        struct device_node *dn, *parent;
-       unsigned long drc_index;
+       u32 drc_index;
        int rc;
 
-       rc = strict_strtoul(buf, 0, &drc_index);
+       rc = kstrtou32(buf, 0, &drc_index);
        if (rc)
                return -EINVAL;
 
index d146fef038b82823834b6e1ce48c55339b1721bb..e7cb6d4a871ae718d7e081f3aab041614bdd0420 100644 (file)
@@ -320,7 +320,7 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
        u64 streamid;
        int rc;
 
-       rc = strict_strtoull(buf, 0, &streamid);
+       rc = kstrtou64(buf, 0, &streamid);
        if (rc)
                return rc;
 
index 8ca60f8d5683a33efed589142848c400ba8b3a88..ab39ceb89ecfa3e4b8e1b3eb34e3787aedbefeaa 100644 (file)
@@ -48,6 +48,8 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 
 config KEXEC
        def_bool y
+       select CRYPTO
+       select CRYPTO_SHA256
 
 config AUDIT_ARCH
        def_bool y
@@ -145,6 +147,7 @@ config S390
        select TTY
        select VIRT_CPU_ACCOUNTING
        select VIRT_TO_BUS
+       select ARCH_HAS_SG_CHAIN
 
 config SCHED_OMIT_FRAME_POINTER
        def_bool y
index 57892a8a905584d79fd112a7c2e67a319a3f9e45..b3fea0722ff1f80e71809f45f61ad48b66805ea1 100644 (file)
@@ -4,4 +4,5 @@ generic-y += clkdev.h
 generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
index 114258eeaacdbbd796a07605e74c53d9610ba3be..7b2ac6e44166ac258ebd6481cb1691e77be53b71 100644 (file)
@@ -162,6 +162,4 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-#define __HAVE_ARCH_GATE_AREA 1
-
 #endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 6d45ef6..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
index 613649096783401e0cf9acf73069dff48a35e438..0bbb7e027c5aae5778483db3fcdbe334f9a4243b 100644 (file)
@@ -316,18 +316,3 @@ static int __init vdso_init(void)
        return 0;
 }
 early_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-       return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-       return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return NULL;
-}
index 2f947aba4bd4d248984c1c865248e71fd3a0445c..aad209199f7e51852f40f71fb7dd1d2b11a36f16 100644 (file)
@@ -8,5 +8,6 @@ generic-y += cputime.h
 generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
 generic-y += xor.h
diff --git a/arch/score/include/asm/scatterlist.h b/arch/score/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 9f533b8..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_SCATTERLIST_H
-#define _ASM_SCORE_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCORE_SCATTERLIST_H */
index aa2df3eaeb297d32160bbfc4fc551aea4e28146c..453fa5c09550c592a9dae8c6dc7c24ebbbded80f 100644 (file)
@@ -595,6 +595,8 @@ source kernel/Kconfig.hz
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
        depends on SUPERH32 && MMU
+       select CRYPTO
+       select CRYPTO_SHA256
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
index 15d970328f717c929d25287a09b14af181e23d9c..fe20d14ae051a5892350185d55ce1adfc352e538 100644 (file)
@@ -186,11 +186,6 @@ typedef struct page *pgtable_t;
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-/* vDSO support */
-#ifdef CONFIG_VSYSCALL
-#define __HAVE_ARCH_GATE_AREA
-#endif
-
 /*
  * Some drivers need to perform DMA into kmalloc'ed buffers
  * and so we have to increase the kmalloc minalign for this.
index 5ca579720a0992322099732c16faa79a1d1b9b75..ea2aa1393b8749b72b2ed2c3efd802f213bff342 100644 (file)
@@ -92,18 +92,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 
        return NULL;
 }
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long address)
-{
-       return 0;
-}
-
-int in_gate_area_no_mm(unsigned long address)
-{
-       return 0;
-}
index 4692c90936f188b2a3faaff1d37e57edd8c0da30..a537816613f99d952bab734ca0e97134cda468ac 100644 (file)
@@ -42,6 +42,7 @@ config SPARC
        select MODULES_USE_ELF_RELA
        select ODD_RT_SIGACTION
        select OLD_SIGSUSPEND
+       select ARCH_HAS_SG_CHAIN
 
 config SPARC32
        def_bool !64BIT
index a45821818003fce80d501b59653ac35eedcc6875..cdd1b447bb6cac1f10cc2a5239e1a477f7e6bb06 100644 (file)
@@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h
 generic-y += module.h
 generic-y += mutex.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += types.h
diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 92bb638..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC_SCATTERLIST_H
-#define _SPARC_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* !(_SPARC_SCATTERLIST_H) */
index 7fcd492adbfcfb030a34c2f4d34558fc9a20ad81..a3ffe2dd4832b18ff60e2716e770e1dfb7324d44 100644 (file)
@@ -191,6 +191,8 @@ source "kernel/Kconfig.hz"
 
 config KEXEC
        bool "kexec system call"
+       select CRYPTO
+       select CRYPTO_SHA256
        ---help---
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
index 2f572b6b7bc2b6efd259ba6880da4bbf02f5bc79..44d2765bde2bfb337e7552e3e6201f3bfe6fc2c5 100644 (file)
@@ -23,7 +23,7 @@
 struct proc_dir_entry;
 #ifdef CONFIG_HARDWALL
 void proc_tile_hardwall_init(struct proc_dir_entry *root);
-int proc_pid_hardwall(struct task_struct *task, char *buffer);
+int proc_pid_hardwall(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task);
 #else
 static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
 #endif
index 67276800861833f674422a10cf47440f7864af3b..a213a8d84a95ac48a149de807558290c21dbe2cb 100644 (file)
 #define PAGE_MASK      (~(PAGE_SIZE - 1))
 #define HPAGE_MASK     (~(HPAGE_SIZE - 1))
 
-/*
- * We do define AT_SYSINFO_EHDR to support vDSO,
- * but don't use the gate mechanism.
- */
-#define __HAVE_ARCH_GATE_AREA          1
-
 /*
  * If the Kconfig doesn't specify, set a maximum zone order that
  * is enough so that we can create huge pages from small pages given
index 531f4c365351119eeb249904cc01fd60307931b4..aca6000bca75e5b6330aeb1b8871e285a9fbe6da 100644 (file)
@@ -947,15 +947,15 @@ static void hardwall_remove_proc(struct hardwall_info *info)
        remove_proc_entry(buf, info->type->proc_dir);
 }
 
-int proc_pid_hardwall(struct task_struct *task, char *buffer)
+int proc_pid_hardwall(struct seq_file *m, struct pid_namespace *ns,
+                     struct pid *pid, struct task_struct *task)
 {
        int i;
        int n = 0;
        for (i = 0; i < HARDWALL_TYPES; ++i) {
                struct hardwall_info *info = task->thread.hardwall[i].info;
                if (info)
-                       n += sprintf(&buffer[n], "%s: %d\n",
-                                    info->type->name, info->id);
+                       seq_printf(m, "%s: %d\n", info->type->name, info->id);
        }
        return n;
 }
index 1533af24106ea4653faa97588e73935999f74495..5bc51d7dfdcb0e493c156ebcf7c3ebc4d0525404 100644 (file)
@@ -121,21 +121,6 @@ const char *arch_vma_name(struct vm_area_struct *vma)
        return NULL;
 }
 
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long address)
-{
-       return 0;
-}
-
-int in_gate_area_no_mm(unsigned long address)
-{
-       return 0;
-}
-
 int setup_vdso_pages(void)
 {
        struct page **pagelist;
index a5e4b6068213f4f147639a9ba428b120d8e9c886..7bd64aa2e94a40599211460fc615fe0dd12c7680 100644 (file)
@@ -21,6 +21,7 @@ generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += switch_to.h
 generic-y += topology.h
index 5ff53d9185f7f81260f4b41f46226ed1683a836a..71c5d132062aa32074b3bda4a30db2ca08b86c5b 100644 (file)
@@ -119,4 +119,9 @@ extern unsigned long uml_physmem;
 #include <asm-generic/getorder.h>
 
 #endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_X86_32
+#define __HAVE_ARCH_GATE_AREA 1
+#endif
+
 #endif /* __UM_PAGE_H */
index e5287d8517aa18172c266d3cb98fa8af2b950bb1..61b6d51866f8680353044f5d7f5a3ea5cb19b7ac 100644 (file)
@@ -16,3 +16,7 @@ obj-$(CONFIG_IA32_EMULATION) += ia32/
 
 obj-y += platform/
 obj-y += net/
+
+ifeq ($(CONFIG_X86_64),y)
+obj-$(CONFIG_KEXEC) += purgatory/
+endif
index bf2405053af5dd013b1f0ae644eece67f2b9c31a..4aafd322e21e273e902870f2cdcbcc6cd0937d36 100644 (file)
@@ -96,6 +96,7 @@ config X86
        select IRQ_FORCED_THREADING
        select HAVE_BPF_JIT if X86_64
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select ARCH_HAS_SG_CHAIN
        select CLKEVT_I8253
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select GENERIC_IOMAP
@@ -1581,6 +1582,9 @@ source kernel/Kconfig.hz
 
 config KEXEC
        bool "kexec system call"
+       select BUILD_BIN2C
+       select CRYPTO
+       select CRYPTO_SHA256
        ---help---
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
@@ -1595,6 +1599,28 @@ config KEXEC
          interface is strongly in flux, so no good recommendation can be
          made.
 
+config KEXEC_VERIFY_SIG
+       bool "Verify kernel signature during kexec_file_load() syscall"
+       depends on KEXEC
+       ---help---
+         This option makes kernel signature verification mandatory for
+         kexec_file_load() syscall. If kernel is signature can not be
+         verified, kexec_file_load() will fail.
+
+         This option enforces signature verification at generic level.
+         One needs to enable signature verification for type of kernel
+         image being loaded to make sure it works. For example, enable
+         bzImage signature verification option to be able to load and
+         verify signatures of bzImage. Otherwise kernel loading will fail.
+
+config KEXEC_BZIMAGE_VERIFY_SIG
+       bool "Enable bzImage signature verification support"
+       depends on KEXEC_VERIFY_SIG
+       depends on SIGNED_PE_FILE_VERIFICATION
+       select SYSTEM_TRUSTED_KEYRING
+       ---help---
+         Enable bzImage signature verification support.
+
 config CRASH_DUMP
        bool "kernel crash dumps"
        depends on X86_64 || (X86_32 && HIGHMEM)
index c65fd9650467339ea848f6feb7f64eccc96a86f8..c1aa368878431fe0b551cb4704b6587fd8826145 100644 (file)
@@ -183,6 +183,14 @@ archscripts: scripts_basic
 archheaders:
        $(Q)$(MAKE) $(build)=arch/x86/syscalls all
 
+archprepare:
+ifeq ($(CONFIG_KEXEC),y)
+# Build only for 64bit. No loaders for 32bit yet.
+ ifeq ($(CONFIG_X86_64),y)
+       $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
+ endif
+endif
+
 ###
 # Kernel objects
 
index 3ca9762e1649d001e653468f98fff17a57065039..3bf000fab0aeb51b3fa23bfa1faca8b3a581febc 100644 (file)
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
 
 generic-y += clkdev.h
-generic-y += early_ioremap.h
 generic-y += cputime.h
+generic-y += early_ioremap.h
 generic-y += mcs_spinlock.h
+generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
new file mode 100644 (file)
index 0000000..f498411
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _ASM_X86_CRASH_H
+#define _ASM_X86_CRASH_H
+
+int crash_load_segments(struct kimage *image);
+int crash_copy_backup_region(struct kimage *image);
+int crash_setup_memmap_entries(struct kimage *image,
+               struct boot_params *params);
+
+#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
new file mode 100644 (file)
index 0000000..d1b5d19
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _ASM_KEXEC_BZIMAGE64_H
+#define _ASM_KEXEC_BZIMAGE64_H
+
+extern struct kexec_file_ops kexec_bzImage64_ops;
+
+#endif  /* _ASM_KEXE_BZIMAGE64_H */
index 17483a492f1882f218849bedd1c26d8a970d433f..d2434c1cad0558e2664d8f7587ed4701d049edd5 100644 (file)
@@ -23,6 +23,9 @@
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
+#include <asm/bootparam.h>
+
+struct kimage;
 
 /*
  * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
 # define KEXEC_ARCH KEXEC_ARCH_X86_64
 #endif
 
+/* Memory to backup during crash kdump */
+#define KEXEC_BACKUP_SRC_START (0UL)
+#define KEXEC_BACKUP_SRC_END   (640 * 1024UL)  /* 640K */
+
 /*
  * CPU does not save ss and sp on stack if execution is already
  * running in kernel mode at the time of NMI occurrence. This code
@@ -160,6 +167,44 @@ struct kimage_arch {
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
+       /* Details of backup region */
+       unsigned long backup_src_start;
+       unsigned long backup_src_sz;
+
+       /* Physical address of backup segment */
+       unsigned long backup_load_addr;
+
+       /* Core ELF header buffer */
+       void *elf_headers;
+       unsigned long elf_headers_sz;
+       unsigned long elf_load_addr;
+};
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+/*
+ * Number of elements and order of elements in this structure should match
+ * with the ones in arch/x86/purgatory/entry64.S. If you make a change here
+ * make an appropriate change in purgatory too.
+ */
+struct kexec_entry64_regs {
+       uint64_t rax;
+       uint64_t rcx;
+       uint64_t rdx;
+       uint64_t rbx;
+       uint64_t rsp;
+       uint64_t rbp;
+       uint64_t rsi;
+       uint64_t rdi;
+       uint64_t r8;
+       uint64_t r9;
+       uint64_t r10;
+       uint64_t r11;
+       uint64_t r12;
+       uint64_t r13;
+       uint64_t r14;
+       uint64_t r15;
+       uint64_t rip;
 };
 #endif
 
index 775873d3be55617a49e15c3460ae22a94dabad75..802dde30c92877588be2d068a692cf9ed4418b7d 100644 (file)
@@ -70,7 +70,6 @@ extern bool __virt_addr_valid(unsigned long kaddr);
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-#define __HAVE_ARCH_GATE_AREA 1
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 
 #endif /* __KERNEL__ */
index 0f1ddee6a0ceb66f2d97fd42c4e53acd05300c3d..f408caf73430fb5875db71ada002d88b99cb4d1d 100644 (file)
@@ -39,4 +39,6 @@ void copy_page(void *to, void *from);
 
 #endif /* !__ASSEMBLY__ */
 
+#define __HAVE_ARCH_GATE_AREA 1
+
 #endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 4240878..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_X86_SCATTERLIST_H */
index bde3993624f1c49106aaf0a5c625a715d4fc58fe..b5ea75c4a4b411d14ae2bf810eb7d40bde9f8716 100644 (file)
@@ -118,4 +118,5 @@ ifeq ($(CONFIG_X86_64),y)
 
        obj-$(CONFIG_PCI_MMCONFIG)      += mmconf-fam10h_64.o
        obj-y                           += vsmp_64.o
+       obj-$(CONFIG_KEXEC)             += kexec-bzimage64.o
 endif
index 9c8f7394c612e7fa74d0ce356caa6d526dc02b0f..c7035073dfc17e28268237604bc3731fb78428aa 100644 (file)
@@ -461,7 +461,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 
        cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
 
-       if (strict_strtoul(buf, 10, &val) < 0)
+       if (kstrtoul(buf, 10, &val) < 0)
                return -EINVAL;
 
        err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
@@ -511,7 +511,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
        if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
                return -EINVAL;
 
-       if (strict_strtoul(buf, 16, &val) < 0)
+       if (kstrtoul(buf, 16, &val) < 0)
                return -EINVAL;
 
        if (amd_set_subcaches(cpu, val))
index 4fc57975acc1ced585c16193c5e6b7a9bcb96142..bd9ccda8087ff5f46ab30bc89225b8912ceaac40 100644 (file)
@@ -2136,7 +2136,7 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
 {
        u64 new;
 
-       if (strict_strtoull(buf, 0, &new) < 0)
+       if (kstrtou64(buf, 0, &new) < 0)
                return -EINVAL;
 
        attr_to_bank(attr)->ctl = new;
@@ -2174,7 +2174,7 @@ static ssize_t set_ignore_ce(struct device *s,
 {
        u64 new;
 
-       if (strict_strtoull(buf, 0, &new) < 0)
+       if (kstrtou64(buf, 0, &new) < 0)
                return -EINVAL;
 
        if (mca_cfg.ignore_ce ^ !!new) {
@@ -2198,7 +2198,7 @@ static ssize_t set_cmci_disabled(struct device *s,
 {
        u64 new;
 
-       if (strict_strtoull(buf, 0, &new) < 0)
+       if (kstrtou64(buf, 0, &new) < 0)
                return -EINVAL;
 
        if (mca_cfg.cmci_disabled ^ !!new) {
index 603df4f74640c827db1c889f793be31e1f853075..1e49f8f41276edac73dfc8cbea3417693c6aedd2 100644 (file)
@@ -353,7 +353,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
        if (!b->interrupt_capable)
                return -EINVAL;
 
-       if (strict_strtoul(buf, 0, &new) < 0)
+       if (kstrtoul(buf, 0, &new) < 0)
                return -EINVAL;
 
        b->interrupt_enable = !!new;
@@ -372,7 +372,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
        struct thresh_restart tr;
        unsigned long new;
 
-       if (strict_strtoul(buf, 0, &new) < 0)
+       if (kstrtoul(buf, 0, &new) < 0)
                return -EINVAL;
 
        if (new > THRESHOLD_MAX)
index 507de80665942b87a148e032fd8aeedfeb76c70b..0553a34fa0df9eacf9336ee9076b28802531240b 100644 (file)
@@ -4,9 +4,14 @@
  * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
  *
  * Copyright (C) IBM Corporation, 2004. All rights reserved.
+ * Copyright (C) Red Hat Inc., 2014. All rights reserved.
+ * Authors:
+ *      Vivek Goyal <vgoyal@redhat.com>
  *
  */
 
+#define pr_fmt(fmt)    "kexec: " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/smp.h>
@@ -16,6 +21,7 @@
 #include <linux/elf.h>
 #include <linux/elfcore.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include <asm/processor.h>
 #include <asm/hardirq.h>
 #include <asm/reboot.h>
 #include <asm/virtext.h>
 
+/* Alignment required for elf header segment */
+#define ELF_CORE_HEADER_ALIGN   4096
+
+/* This primarily represents number of split ranges due to exclusion */
+#define CRASH_MAX_RANGES       16
+
+struct crash_mem_range {
+       u64 start, end;
+};
+
+struct crash_mem {
+       unsigned int nr_ranges;
+       struct crash_mem_range ranges[CRASH_MAX_RANGES];
+};
+
+/* Misc data about ram ranges needed to prepare elf headers */
+struct crash_elf_data {
+       struct kimage *image;
+       /*
+        * Total number of ram ranges we have after various adjustments for
+        * GART, crash reserved region etc.
+        */
+       unsigned int max_nr_ranges;
+       unsigned long gart_start, gart_end;
+
+       /* Pointer to elf header */
+       void *ehdr;
+       /* Pointer to next phdr */
+       void *bufp;
+       struct crash_mem mem;
+};
+
+/* Used while preparing memory map entries for second kernel */
+struct crash_memmap_data {
+       struct boot_params *params;
+       /* Type of memory */
+       unsigned int type;
+};
+
 int in_crash_kexec;
 
 /*
@@ -39,6 +84,7 @@ int in_crash_kexec;
  */
 crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
 EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+unsigned long crash_zero_bytes;
 
 static inline void cpu_crash_vmclear_loaded_vmcss(void)
 {
@@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 #endif
        crash_save_cpu(regs, safe_smp_processor_id());
 }
+
+#ifdef CONFIG_X86_64
+
+static int get_nr_ram_ranges_callback(unsigned long start_pfn,
+                               unsigned long nr_pfn, void *arg)
+{
+       int *nr_ranges = arg;
+
+       (*nr_ranges)++;
+       return 0;
+}
+
+static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
+{
+       struct crash_elf_data *ced = arg;
+
+       ced->gart_start = start;
+       ced->gart_end = end;
+
+       /* Not expecting more than 1 gart aperture */
+       return 1;
+}
+
+
+/* Gather all the required information to prepare elf headers for ram regions */
+static void fill_up_crash_elf_data(struct crash_elf_data *ced,
+                                  struct kimage *image)
+{
+       unsigned int nr_ranges = 0;
+
+       ced->image = image;
+
+       walk_system_ram_range(0, -1, &nr_ranges,
+                               get_nr_ram_ranges_callback);
+
+       ced->max_nr_ranges = nr_ranges;
+
+       /*
+        * We don't create ELF headers for GART aperture as an attempt
+        * to dump this memory in second kernel leads to hang/crash.
+        * If gart aperture is present, one needs to exclude that region
+        * and that could lead to need of extra phdr.
+        */
+       walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
+                               ced, get_gart_ranges_callback);
+
+       /*
+        * If we have gart region, excluding that could potentially split
+        * a memory range, resulting in extra header. Account for  that.
+        */
+       if (ced->gart_end)
+               ced->max_nr_ranges++;
+
+       /* Exclusion of crash region could split memory ranges */
+       ced->max_nr_ranges++;
+
+       /* If crashk_low_res is not 0, another range split possible */
+       if (crashk_low_res.end != 0)
+               ced->max_nr_ranges++;
+}
+
+static int exclude_mem_range(struct crash_mem *mem,
+               unsigned long long mstart, unsigned long long mend)
+{
+       int i, j;
+       unsigned long long start, end;
+       struct crash_mem_range temp_range = {0, 0};
+
+       for (i = 0; i < mem->nr_ranges; i++) {
+               start = mem->ranges[i].start;
+               end = mem->ranges[i].end;
+
+               if (mstart > end || mend < start)
+                       continue;
+
+               /* Truncate any area outside of range */
+               if (mstart < start)
+                       mstart = start;
+               if (mend > end)
+                       mend = end;
+
+               /* Found completely overlapping range */
+               if (mstart == start && mend == end) {
+                       mem->ranges[i].start = 0;
+                       mem->ranges[i].end = 0;
+                       if (i < mem->nr_ranges - 1) {
+                               /* Shift rest of the ranges to left */
+                               for (j = i; j < mem->nr_ranges - 1; j++) {
+                                       mem->ranges[j].start =
+                                               mem->ranges[j+1].start;
+                                       mem->ranges[j].end =
+                                                       mem->ranges[j+1].end;
+                               }
+                       }
+                       mem->nr_ranges--;
+                       return 0;
+               }
+
+               if (mstart > start && mend < end) {
+                       /* Split original range */
+                       mem->ranges[i].end = mstart - 1;
+                       temp_range.start = mend + 1;
+                       temp_range.end = end;
+               } else if (mstart != start)
+                       mem->ranges[i].end = mstart - 1;
+               else
+                       mem->ranges[i].start = mend + 1;
+               break;
+       }
+
+       /* If a split happend, add the split to array */
+       if (!temp_range.end)
+               return 0;
+
+       /* Split happened */
+       if (i == CRASH_MAX_RANGES - 1) {
+               pr_err("Too many crash ranges after split\n");
+               return -ENOMEM;
+       }
+
+       /* Location where new range should go */
+       j = i + 1;
+       if (j < mem->nr_ranges) {
+               /* Move over all ranges one slot towards the end */
+               for (i = mem->nr_ranges - 1; i >= j; i--)
+                       mem->ranges[i + 1] = mem->ranges[i];
+       }
+
+       mem->ranges[j].start = temp_range.start;
+       mem->ranges[j].end = temp_range.end;
+       mem->nr_ranges++;
+       return 0;
+}
+
+/*
+ * Look for any unwanted ranges between mstart, mend and remove them. This
+ * might lead to split and split ranges are put in ced->mem.ranges[] array
+ */
+static int elf_header_exclude_ranges(struct crash_elf_data *ced,
+               unsigned long long mstart, unsigned long long mend)
+{
+       struct crash_mem *cmem = &ced->mem;
+       int ret = 0;
+
+       memset(cmem->ranges, 0, sizeof(cmem->ranges));
+
+       cmem->ranges[0].start = mstart;
+       cmem->ranges[0].end = mend;
+       cmem->nr_ranges = 1;
+
+       /* Exclude crashkernel region */
+       ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+       if (ret)
+               return ret;
+
+       ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+       if (ret)
+               return ret;
+
+       /* Exclude GART region */
+       if (ced->gart_end) {
+               ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
+}
+
+static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
+{
+       struct crash_elf_data *ced = arg;
+       Elf64_Ehdr *ehdr;
+       Elf64_Phdr *phdr;
+       unsigned long mstart, mend;
+       struct kimage *image = ced->image;
+       struct crash_mem *cmem;
+       int ret, i;
+
+       ehdr = ced->ehdr;
+
+       /* Exclude unwanted mem ranges */
+       ret = elf_header_exclude_ranges(ced, start, end);
+       if (ret)
+               return ret;
+
+       /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
+       cmem = &ced->mem;
+
+       for (i = 0; i < cmem->nr_ranges; i++) {
+               mstart = cmem->ranges[i].start;
+               mend = cmem->ranges[i].end;
+
+               phdr = ced->bufp;
+               ced->bufp += sizeof(Elf64_Phdr);
+
+               phdr->p_type = PT_LOAD;
+               phdr->p_flags = PF_R|PF_W|PF_X;
+               phdr->p_offset  = mstart;
+
+               /*
+                * If a range matches backup region, adjust offset to backup
+                * segment.
+                */
+               if (mstart == image->arch.backup_src_start &&
+                   (mend - mstart + 1) == image->arch.backup_src_sz)
+                       phdr->p_offset = image->arch.backup_load_addr;
+
+               phdr->p_paddr = mstart;
+               phdr->p_vaddr = (unsigned long long) __va(mstart);
+               phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
+               phdr->p_align = 0;
+               ehdr->e_phnum++;
+               pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
+                       phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
+                       ehdr->e_phnum, phdr->p_offset);
+       }
+
+       return ret;
+}
+
+static int prepare_elf64_headers(struct crash_elf_data *ced,
+               void **addr, unsigned long *sz)
+{
+       Elf64_Ehdr *ehdr;
+       Elf64_Phdr *phdr;
+       unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
+       unsigned char *buf, *bufp;
+       unsigned int cpu;
+       unsigned long long notes_addr;
+       int ret;
+
+       /* extra phdr for vmcoreinfo elf note */
+       nr_phdr = nr_cpus + 1;
+       nr_phdr += ced->max_nr_ranges;
+
+       /*
+        * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
+        * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
+        * I think this is required by tools like gdb. So same physical
+        * memory will be mapped in two elf headers. One will contain kernel
+        * text virtual addresses and other will have __va(physical) addresses.
+        */
+
+       nr_phdr++;
+       elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
+       elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
+
+       buf = vzalloc(elf_sz);
+       if (!buf)
+               return -ENOMEM;
+
+       bufp = buf;
+       ehdr = (Elf64_Ehdr *)bufp;
+       bufp += sizeof(Elf64_Ehdr);
+       memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+       ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+       ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+       ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+       ehdr->e_ident[EI_OSABI] = ELF_OSABI;
+       memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+       ehdr->e_type = ET_CORE;
+       ehdr->e_machine = ELF_ARCH;
+       ehdr->e_version = EV_CURRENT;
+       ehdr->e_phoff = sizeof(Elf64_Ehdr);
+       ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+       ehdr->e_phentsize = sizeof(Elf64_Phdr);
+
+       /* Prepare one phdr of type PT_NOTE for each present cpu */
+       for_each_present_cpu(cpu) {
+               phdr = (Elf64_Phdr *)bufp;
+               bufp += sizeof(Elf64_Phdr);
+               phdr->p_type = PT_NOTE;
+               notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
+               phdr->p_offset = phdr->p_paddr = notes_addr;
+               phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
+               (ehdr->e_phnum)++;
+       }
+
+       /* Prepare one PT_NOTE header for vmcoreinfo */
+       phdr = (Elf64_Phdr *)bufp;
+       bufp += sizeof(Elf64_Phdr);
+       phdr->p_type = PT_NOTE;
+       phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
+       phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+       (ehdr->e_phnum)++;
+
+#ifdef CONFIG_X86_64
+       /* Prepare PT_LOAD type program header for kernel text region */
+       phdr = (Elf64_Phdr *)bufp;
+       bufp += sizeof(Elf64_Phdr);
+       phdr->p_type = PT_LOAD;
+       phdr->p_flags = PF_R|PF_W|PF_X;
+       phdr->p_vaddr = (Elf64_Addr)_text;
+       phdr->p_filesz = phdr->p_memsz = _end - _text;
+       phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
+       (ehdr->e_phnum)++;
+#endif
+
+       /* Prepare PT_LOAD headers for system ram chunks. */
+       ced->ehdr = ehdr;
+       ced->bufp = bufp;
+       ret = walk_system_ram_res(0, -1, ced,
+                       prepare_elf64_ram_headers_callback);
+       if (ret < 0)
+               return ret;
+
+       *addr = buf;
+       *sz = elf_sz;
+       return 0;
+}
+
+/* Prepare elf headers. Return addr and size */
+static int prepare_elf_headers(struct kimage *image, void **addr,
+                                       unsigned long *sz)
+{
+       struct crash_elf_data *ced;
+       int ret;
+
+       ced = kzalloc(sizeof(*ced), GFP_KERNEL);
+       if (!ced)
+               return -ENOMEM;
+
+       fill_up_crash_elf_data(ced, image);
+
+       /* By default prepare 64bit headers */
+       ret =  prepare_elf64_headers(ced, addr, sz);
+       kfree(ced);
+       return ret;
+}
+
+static int add_e820_entry(struct boot_params *params, struct e820entry *entry)
+{
+       unsigned int nr_e820_entries;
+
+       nr_e820_entries = params->e820_entries;
+       if (nr_e820_entries >= E820MAX)
+               return 1;
+
+       memcpy(&params->e820_map[nr_e820_entries], entry,
+                       sizeof(struct e820entry));
+       params->e820_entries++;
+       return 0;
+}
+
+static int memmap_entry_callback(u64 start, u64 end, void *arg)
+{
+       struct crash_memmap_data *cmd = arg;
+       struct boot_params *params = cmd->params;
+       struct e820entry ei;
+
+       ei.addr = start;
+       ei.size = end - start + 1;
+       ei.type = cmd->type;
+       add_e820_entry(params, &ei);
+
+       return 0;
+}
+
+static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
+                                unsigned long long mstart,
+                                unsigned long long mend)
+{
+       unsigned long start, end;
+       int ret = 0;
+
+       cmem->ranges[0].start = mstart;
+       cmem->ranges[0].end = mend;
+       cmem->nr_ranges = 1;
+
+       /* Exclude Backup region */
+       start = image->arch.backup_load_addr;
+       end = start + image->arch.backup_src_sz - 1;
+       ret = exclude_mem_range(cmem, start, end);
+       if (ret)
+               return ret;
+
+       /* Exclude elf header region */
+       start = image->arch.elf_load_addr;
+       end = start + image->arch.elf_headers_sz - 1;
+       return exclude_mem_range(cmem, start, end);
+}
+
+/* Prepare memory map for crash dump kernel */
+int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
+{
+       int i, ret = 0;
+       unsigned long flags;
+       struct e820entry ei;
+       struct crash_memmap_data cmd;
+       struct crash_mem *cmem;
+
+       cmem = vzalloc(sizeof(struct crash_mem));
+       if (!cmem)
+               return -ENOMEM;
+
+       memset(&cmd, 0, sizeof(struct crash_memmap_data));
+       cmd.params = params;
+
+       /* Add first 640K segment */
+       ei.addr = image->arch.backup_src_start;
+       ei.size = image->arch.backup_src_sz;
+       ei.type = E820_RAM;
+       add_e820_entry(params, &ei);
+
+       /* Add ACPI tables */
+       cmd.type = E820_ACPI;
+       flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+                      memmap_entry_callback);
+
+       /* Add ACPI Non-volatile Storage */
+       cmd.type = E820_NVS;
+       walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+                       memmap_entry_callback);
+
+       /* Add crashk_low_res region */
+       if (crashk_low_res.end) {
+               ei.addr = crashk_low_res.start;
+               ei.size = crashk_low_res.end - crashk_low_res.start + 1;
+               ei.type = E820_RAM;
+               add_e820_entry(params, &ei);
+       }
+
+       /* Exclude some ranges from crashk_res and add rest to memmap */
+       ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
+                                               crashk_res.end);
+       if (ret)
+               goto out;
+
+       for (i = 0; i < cmem->nr_ranges; i++) {
+               ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
+
+               /* If entry is less than a page, skip it */
+               if (ei.size < PAGE_SIZE)
+                       continue;
+               ei.addr = cmem->ranges[i].start;
+               ei.type = E820_RAM;
+               add_e820_entry(params, &ei);
+       }
+
+out:
+       vfree(cmem);
+       return ret;
+}
+
+static int determine_backup_region(u64 start, u64 end, void *arg)
+{
+       struct kimage *image = arg;
+
+       image->arch.backup_src_start = start;
+       image->arch.backup_src_sz = end - start + 1;
+
+       /* Expecting only one range for backup region */
+       return 1;
+}
+
+int crash_load_segments(struct kimage *image)
+{
+       unsigned long src_start, src_sz, elf_sz;
+       void *elf_addr;
+       int ret;
+
+       /*
+        * Determine and load a segment for backup area. First 640K RAM
+        * region is backup source
+        */
+
+       ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
+                               image, determine_backup_region);
+
+       /* Zero or postive return values are ok */
+       if (ret < 0)
+               return ret;
+
+       src_start = image->arch.backup_src_start;
+       src_sz = image->arch.backup_src_sz;
+
+       /* Add backup segment. */
+       if (src_sz) {
+               /*
+                * Ideally there is no source for backup segment. This is
+                * copied in purgatory after crash. Just add a zero filled
+                * segment for now to make sure checksum logic works fine.
+                */
+               ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
+                                      sizeof(crash_zero_bytes), src_sz,
+                                      PAGE_SIZE, 0, -1, 0,
+                                      &image->arch.backup_load_addr);
+               if (ret)
+                       return ret;
+               pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
+                        image->arch.backup_load_addr, src_start, src_sz);
+       }
+
+       /* Prepare elf headers and add a segment */
+       ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
+       if (ret)
+               return ret;
+
+       image->arch.elf_headers = elf_addr;
+       image->arch.elf_headers_sz = elf_sz;
+
+       ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
+                       ELF_CORE_HEADER_ALIGN, 0, -1, 0,
+                       &image->arch.elf_load_addr);
+       if (ret) {
+               vfree((void *)image->arch.elf_headers);
+               return ret;
+       }
+       pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+                image->arch.elf_load_addr, elf_sz, elf_sz);
+
+       return ret;
+}
+
+#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
new file mode 100644 (file)
index 0000000..9642b9b
--- /dev/null
@@ -0,0 +1,553 @@
+/*
+ * Kexec bzImage loader
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ * Authors:
+ *      Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#define pr_fmt(fmt)    "kexec-bzImage64: " fmt
+
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/efi.h>
+#include <linux/verify_pefile.h>
+#include <keys/system_keyring.h>
+
+#include <asm/bootparam.h>
+#include <asm/setup.h>
+#include <asm/crash.h>
+#include <asm/efi.h>
+
+#define MAX_ELFCOREHDR_STR_LEN 30      /* elfcorehdr=0x<64bit-value> */
+
+/*
+ * Defines lowest physical address for various segments. Not sure where
+ * exactly these limits came from. Current bzimage64 loader in kexec-tools
+ * uses these so I am retaining it. It can be changed over time as we gain
+ * more insight.
+ */
+#define MIN_PURGATORY_ADDR     0x3000
+#define MIN_BOOTPARAM_ADDR     0x3000
+#define MIN_KERNEL_LOAD_ADDR   0x100000
+#define MIN_INITRD_LOAD_ADDR   0x1000000
+
+/*
+ * This is a place holder for all boot loader specific data structure which
+ * gets allocated in one call but gets freed much later during cleanup
+ * time. Right now there is only one field but it can grow as need be.
+ */
+struct bzimage64_data {
+       /*
+        * Temporary buffer to hold bootparams buffer. This should be
+        * freed once the bootparam segment has been loaded.
+        */
+       void *bootparams_buf;
+};
+
+static int setup_initrd(struct boot_params *params,
+               unsigned long initrd_load_addr, unsigned long initrd_len)
+{
+       params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL;
+       params->hdr.ramdisk_size = initrd_len & 0xffffffffUL;
+
+       params->ext_ramdisk_image = initrd_load_addr >> 32;
+       params->ext_ramdisk_size = initrd_len >> 32;
+
+       return 0;
+}
+
+static int setup_cmdline(struct kimage *image, struct boot_params *params,
+                        unsigned long bootparams_load_addr,
+                        unsigned long cmdline_offset, char *cmdline,
+                        unsigned long cmdline_len)
+{
+       char *cmdline_ptr = ((char *)params) + cmdline_offset;
+       unsigned long cmdline_ptr_phys, len;
+       uint32_t cmdline_low_32, cmdline_ext_32;
+
+       memcpy(cmdline_ptr, cmdline, cmdline_len);
+       if (image->type == KEXEC_TYPE_CRASH) {
+               len = sprintf(cmdline_ptr + cmdline_len - 1,
+                       " elfcorehdr=0x%lx", image->arch.elf_load_addr);
+               cmdline_len += len;
+       }
+       cmdline_ptr[cmdline_len - 1] = '\0';
+
+       pr_debug("Final command line is: %s\n", cmdline_ptr);
+       cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
+       cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
+       cmdline_ext_32 = cmdline_ptr_phys >> 32;
+
+       params->hdr.cmd_line_ptr = cmdline_low_32;
+       if (cmdline_ext_32)
+               params->ext_cmd_line_ptr = cmdline_ext_32;
+
+       return 0;
+}
+
+static int setup_e820_entries(struct boot_params *params)
+{
+       unsigned int nr_e820_entries;
+
+       nr_e820_entries = e820_saved.nr_map;
+
+       /* TODO: Pass entries more than E820MAX in bootparams setup data */
+       if (nr_e820_entries > E820MAX)
+               nr_e820_entries = E820MAX;
+
+       params->e820_entries = nr_e820_entries;
+       memcpy(&params->e820_map, &e820_saved.map,
+              nr_e820_entries * sizeof(struct e820entry));
+
+       return 0;
+}
+
+#ifdef CONFIG_EFI
+static int setup_efi_info_memmap(struct boot_params *params,
+                                 unsigned long params_load_addr,
+                                 unsigned int efi_map_offset,
+                                 unsigned int efi_map_sz)
+{
+       void *efi_map = (void *)params + efi_map_offset;
+       unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset;
+       struct efi_info *ei = &params->efi_info;
+
+       if (!efi_map_sz)
+               return 0;
+
+       efi_runtime_map_copy(efi_map, efi_map_sz);
+
+       ei->efi_memmap = efi_map_phys_addr & 0xffffffff;
+       ei->efi_memmap_hi = efi_map_phys_addr >> 32;
+       ei->efi_memmap_size = efi_map_sz;
+
+       return 0;
+}
+
+static int
+prepare_add_efi_setup_data(struct boot_params *params,
+                      unsigned long params_load_addr,
+                      unsigned int efi_setup_data_offset)
+{
+       unsigned long setup_data_phys;
+       struct setup_data *sd = (void *)params + efi_setup_data_offset;
+       struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
+
+       esd->fw_vendor = efi.fw_vendor;
+       esd->runtime = efi.runtime;
+       esd->tables = efi.config_table;
+       esd->smbios = efi.smbios;
+
+       sd->type = SETUP_EFI;
+       sd->len = sizeof(struct efi_setup_data);
+
+       /* Add setup data */
+       setup_data_phys = params_load_addr + efi_setup_data_offset;
+       sd->next = params->hdr.setup_data;
+       params->hdr.setup_data = setup_data_phys;
+
+       return 0;
+}
+
+static int
+setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
+               unsigned int efi_map_offset, unsigned int efi_map_sz,
+               unsigned int efi_setup_data_offset)
+{
+       struct efi_info *current_ei = &boot_params.efi_info;
+       struct efi_info *ei = &params->efi_info;
+
+       if (!current_ei->efi_memmap_size)
+               return 0;
+
+       /*
+        * If 1:1 mapping is not enabled, second kernel can not setup EFI
+        * and use EFI run time services. User space will have to pass
+        * acpi_rsdp=<addr> on kernel command line to make second kernel boot
+        * without efi.
+        */
+       if (efi_enabled(EFI_OLD_MEMMAP))
+               return 0;
+
+       ei->efi_loader_signature = current_ei->efi_loader_signature;
+       ei->efi_systab = current_ei->efi_systab;
+       ei->efi_systab_hi = current_ei->efi_systab_hi;
+
+       ei->efi_memdesc_version = current_ei->efi_memdesc_version;
+       ei->efi_memdesc_size = efi_get_runtime_map_desc_size();
+
+       setup_efi_info_memmap(params, params_load_addr, efi_map_offset,
+                             efi_map_sz);
+       prepare_add_efi_setup_data(params, params_load_addr,
+                                  efi_setup_data_offset);
+       return 0;
+}
+#endif /* CONFIG_EFI */
+
+static int
+setup_boot_parameters(struct kimage *image, struct boot_params *params,
+                     unsigned long params_load_addr,
+                     unsigned int efi_map_offset, unsigned int efi_map_sz,
+                     unsigned int efi_setup_data_offset)
+{
+       unsigned int nr_e820_entries;
+       unsigned long long mem_k, start, end;
+       int i, ret = 0;
+
+       /* Get subarch from existing bootparams */
+       params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
+
+       /* Copying screen_info will do? */
+       memcpy(&params->screen_info, &boot_params.screen_info,
+                               sizeof(struct screen_info));
+
+       /* Fill in memsize later */
+       params->screen_info.ext_mem_k = 0;
+       params->alt_mem_k = 0;
+
+       /* Default APM info */
+       memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
+
+       /* Default drive info */
+       memset(&params->hd0_info, 0, sizeof(params->hd0_info));
+       memset(&params->hd1_info, 0, sizeof(params->hd1_info));
+
+       /* Default sysdesc table */
+       params->sys_desc_table.length = 0;
+
+       if (image->type == KEXEC_TYPE_CRASH) {
+               ret = crash_setup_memmap_entries(image, params);
+               if (ret)
+                       return ret;
+       } else
+               setup_e820_entries(params);
+
+       nr_e820_entries = params->e820_entries;
+
+       for (i = 0; i < nr_e820_entries; i++) {
+               if (params->e820_map[i].type != E820_RAM)
+                       continue;
+               start = params->e820_map[i].addr;
+               end = params->e820_map[i].addr + params->e820_map[i].size - 1;
+
+               if ((start <= 0x100000) && end > 0x100000) {
+                       mem_k = (end >> 10) - (0x100000 >> 10);
+                       params->screen_info.ext_mem_k = mem_k;
+                       params->alt_mem_k = mem_k;
+                       if (mem_k > 0xfc00)
+                               params->screen_info.ext_mem_k = 0xfc00; /* 64M*/
+                       if (mem_k > 0xffffffff)
+                               params->alt_mem_k = 0xffffffff;
+               }
+       }
+
+#ifdef CONFIG_EFI
+       /* Setup EFI state */
+       setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
+                       efi_setup_data_offset);
+#endif
+
+       /* Setup EDD info */
+       memcpy(params->eddbuf, boot_params.eddbuf,
+                               EDDMAXNR * sizeof(struct edd_info));
+       params->eddbuf_entries = boot_params.eddbuf_entries;
+
+       memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
+              EDD_MBR_SIG_MAX * sizeof(unsigned int));
+
+       return ret;
+}
+
+int bzImage64_probe(const char *buf, unsigned long len)
+{
+       int ret = -ENOEXEC;
+       struct setup_header *header;
+
+       /* kernel should be atleast two sectors long */
+       if (len < 2 * 512) {
+               pr_err("File is too short to be a bzImage\n");
+               return ret;
+       }
+
+       header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr));
+       if (memcmp((char *)&header->header, "HdrS", 4) != 0) {
+               pr_err("Not a bzImage\n");
+               return ret;
+       }
+
+       if (header->boot_flag != 0xAA55) {
+               pr_err("No x86 boot sector present\n");
+               return ret;
+       }
+
+       if (header->version < 0x020C) {
+               pr_err("Must be at least protocol version 2.12\n");
+               return ret;
+       }
+
+       if (!(header->loadflags & LOADED_HIGH)) {
+               pr_err("zImage not a bzImage\n");
+               return ret;
+       }
+
+       if (!(header->xloadflags & XLF_KERNEL_64)) {
+               pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n");
+               return ret;
+       }
+
+       if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) {
+               pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n");
+               return ret;
+       }
+
+       /*
+        * Can't handle 32bit EFI as it does not allow loading kernel
+        * above 4G. This should be handled by 32bit bzImage loader
+        */
+       if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) {
+               pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n");
+               return ret;
+       }
+
+       /* I've got a bzImage */
+       pr_debug("It's a relocatable bzImage64\n");
+       ret = 0;
+
+       return ret;
+}
+
+void *bzImage64_load(struct kimage *image, char *kernel,
+                    unsigned long kernel_len, char *initrd,
+                    unsigned long initrd_len, char *cmdline,
+                    unsigned long cmdline_len)
+{
+
+       struct setup_header *header;
+       int setup_sects, kern16_size, ret = 0;
+       unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
+       struct boot_params *params;
+       unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
+       unsigned long purgatory_load_addr;
+       unsigned long kernel_bufsz, kernel_memsz, kernel_align;
+       char *kernel_buf;
+       struct bzimage64_data *ldata;
+       struct kexec_entry64_regs regs64;
+       void *stack;
+       unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
+       unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
+
+       header = (struct setup_header *)(kernel + setup_hdr_offset);
+       setup_sects = header->setup_sects;
+       if (setup_sects == 0)
+               setup_sects = 4;
+
+       kern16_size = (setup_sects + 1) * 512;
+       if (kernel_len < kern16_size) {
+               pr_err("bzImage truncated\n");
+               return ERR_PTR(-ENOEXEC);
+       }
+
+       if (cmdline_len > header->cmdline_size) {
+               pr_err("Kernel command line too long\n");
+               return ERR_PTR(-EINVAL);
+       }
+
+       /*
+        * In case of crash dump, we will append elfcorehdr=<addr> to
+        * command line. Make sure it does not overflow
+        */
+       if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) {
+               pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n");
+               return ERR_PTR(-EINVAL);
+       }
+
+       /* Allocate and load backup region */
+       if (image->type == KEXEC_TYPE_CRASH) {
+               ret = crash_load_segments(image);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
+
+       /*
+        * Load purgatory. For 64bit entry point, purgatory  code can be
+        * anywhere.
+        */
+       ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
+                                  &purgatory_load_addr);
+       if (ret) {
+               pr_err("Loading purgatory failed\n");
+               return ERR_PTR(ret);
+       }
+
+       pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+
+       /*
+        * Load Bootparams and cmdline and space for efi stuff.
+        *
+        * Allocate memory together for multiple data structures so
+        * that they all can go in single area/segment and we don't
+        * have to create separate segment for each. Keeps things
+        * little bit simple
+        */
+       efi_map_sz = efi_get_runtime_map_size();
+       efi_map_sz = ALIGN(efi_map_sz, 16);
+       params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
+                               MAX_ELFCOREHDR_STR_LEN;
+       params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
+       params_misc_sz = params_cmdline_sz + efi_map_sz +
+                               sizeof(struct setup_data) +
+                               sizeof(struct efi_setup_data);
+
+       params = kzalloc(params_misc_sz, GFP_KERNEL);
+       if (!params)
+               return ERR_PTR(-ENOMEM);
+       efi_map_offset = params_cmdline_sz;
+       efi_setup_data_offset = efi_map_offset + efi_map_sz;
+
+       /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
+       setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
+
+       /* Is there a limit on setup header size? */
+       memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
+
+       ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
+                              params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
+                              ULONG_MAX, 1, &bootparam_load_addr);
+       if (ret)
+               goto out_free_params;
+       pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+                bootparam_load_addr, params_misc_sz, params_misc_sz);
+
+       /* Load kernel */
+       kernel_buf = kernel + kern16_size;
+       kernel_bufsz =  kernel_len - kern16_size;
+       kernel_memsz = PAGE_ALIGN(header->init_size);
+       kernel_align = header->kernel_alignment;
+
+       ret = kexec_add_buffer(image, kernel_buf,
+                              kernel_bufsz, kernel_memsz, kernel_align,
+                              MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
+                              &kernel_load_addr);
+       if (ret)
+               goto out_free_params;
+
+       pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+                kernel_load_addr, kernel_memsz, kernel_memsz);
+
+       /* Load initrd high */
+       if (initrd) {
+               ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
+                                      PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
+                                      ULONG_MAX, 1, &initrd_load_addr);
+               if (ret)
+                       goto out_free_params;
+
+               pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+                               initrd_load_addr, initrd_len, initrd_len);
+
+               setup_initrd(params, initrd_load_addr, initrd_len);
+       }
+
+       setup_cmdline(image, params, bootparam_load_addr,
+                     sizeof(struct boot_params), cmdline, cmdline_len);
+
+       /* bootloader info. Do we need a separate ID for kexec kernel loader? */
+       params->hdr.type_of_loader = 0x0D << 4;
+       params->hdr.loadflags = 0;
+
+       /* Setup purgatory regs for entry */
+       ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+                                            sizeof(regs64), 1);
+       if (ret)
+               goto out_free_params;
+
+       regs64.rbx = 0; /* Bootstrap Processor */
+       regs64.rsi = bootparam_load_addr;
+       regs64.rip = kernel_load_addr + 0x200;
+       stack = kexec_purgatory_get_symbol_addr(image, "stack_end");
+       if (IS_ERR(stack)) {
+               pr_err("Could not find address of symbol stack_end\n");
+               ret = -EINVAL;
+               goto out_free_params;
+       }
+
+       regs64.rsp = (unsigned long)stack;
+       ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+                                            sizeof(regs64), 0);
+       if (ret)
+               goto out_free_params;
+
+       ret = setup_boot_parameters(image, params, bootparam_load_addr,
+                                   efi_map_offset, efi_map_sz,
+                                   efi_setup_data_offset);
+       if (ret)
+               goto out_free_params;
+
+       /* Allocate loader specific data */
+       ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
+       if (!ldata) {
+               ret = -ENOMEM;
+               goto out_free_params;
+       }
+
+       /*
+        * Store pointer to params so that it could be freed after loading
+        * params segment has been loaded and contents have been copied
+        * somewhere else.
+        */
+       ldata->bootparams_buf = params;
+       return ldata;
+
+out_free_params:
+       kfree(params);
+       return ERR_PTR(ret);
+}
+
+/* This cleanup function is called after various segments have been loaded */
+int bzImage64_cleanup(void *loader_data)
+{
+       struct bzimage64_data *ldata = loader_data;
+
+       if (!ldata)
+               return 0;
+
+       kfree(ldata->bootparams_buf);
+       ldata->bootparams_buf = NULL;
+
+       return 0;
+}
+
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+       bool trusted;
+       int ret;
+
+       ret = verify_pefile_signature(kernel, kernel_len,
+                                     system_trusted_keyring, &trusted);
+       if (ret < 0)
+               return ret;
+       if (!trusted)
+               return -EKEYREJECTED;
+       return 0;
+}
+#endif
+
+struct kexec_file_ops kexec_bzImage64_ops = {
+       .probe = bzImage64_probe,
+       .load = bzImage64_load,
+       .cleanup = bzImage64_cleanup,
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+       .verify_sig = bzImage64_verify_sig,
+#endif
+};
index 679cef0791cd842448216f4a6158cd2ae24fe0e0..8b04018e5d1f0732f1d1c3cf3ad092352d1409d6 100644 (file)
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt)    "kexec: " fmt
+
 #include <linux/mm.h>
 #include <linux/kexec.h>
 #include <linux/string.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/debugreg.h>
+#include <asm/kexec-bzimage64.h>
+
+static struct kexec_file_ops *kexec_file_loaders[] = {
+               &kexec_bzImage64_ops,
+};
 
 static void free_transition_pgtable(struct kimage *image)
 {
@@ -171,6 +178,38 @@ static void load_segments(void)
                );
 }
 
+/* Update purgatory as needed after various image segments have been prepared */
+static int arch_update_purgatory(struct kimage *image)
+{
+       int ret = 0;
+
+       if (!image->file_mode)
+               return 0;
+
+       /* Setup copying of backup region */
+       if (image->type == KEXEC_TYPE_CRASH) {
+               ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+                               &image->arch.backup_load_addr,
+                               sizeof(image->arch.backup_load_addr), 0);
+               if (ret)
+                       return ret;
+
+               ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+                               &image->arch.backup_src_start,
+                               sizeof(image->arch.backup_src_start), 0);
+               if (ret)
+                       return ret;
+
+               ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+                               &image->arch.backup_src_sz,
+                               sizeof(image->arch.backup_src_sz), 0);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
+}
+
 int machine_kexec_prepare(struct kimage *image)
 {
        unsigned long start_pgtable;
@@ -184,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image)
        if (result)
                return result;
 
+       /* update purgatory as needed */
+       result = arch_update_purgatory(image);
+       if (result)
+               return result;
+
        return 0;
 }
 
@@ -283,3 +327,198 @@ void arch_crash_save_vmcoreinfo(void)
                              (unsigned long)&_text - __START_KERNEL);
 }
 
+/* arch-dependent functionality related to kexec file-based syscall */
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+                                 unsigned long buf_len)
+{
+       int i, ret = -ENOEXEC;
+       struct kexec_file_ops *fops;
+
+       for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+               fops = kexec_file_loaders[i];
+               if (!fops || !fops->probe)
+                       continue;
+
+               ret = fops->probe(buf, buf_len);
+               if (!ret) {
+                       image->fops = fops;
+                       return ret;
+               }
+       }
+
+       return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+       vfree(image->arch.elf_headers);
+       image->arch.elf_headers = NULL;
+
+       if (!image->fops || !image->fops->load)
+               return ERR_PTR(-ENOEXEC);
+
+       return image->fops->load(image, image->kernel_buf,
+                                image->kernel_buf_len, image->initrd_buf,
+                                image->initrd_buf_len, image->cmdline_buf,
+                                image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+       if (!image->fops || !image->fops->cleanup)
+               return 0;
+
+       return image->fops->cleanup(image->image_loader_data);
+}
+
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
+                                unsigned long kernel_len)
+{
+       if (!image->fops || !image->fops->verify_sig) {
+               pr_debug("kernel loader does not support signature verification.");
+               return -EKEYREJECTED;
+       }
+
+       return image->fops->verify_sig(kernel, kernel_len);
+}
+
+/*
+ * Apply purgatory relocations.
+ *
+ * ehdr: Pointer to elf headers
+ * sechdrs: Pointer to section headers.
+ * relsec: section index of SHT_RELA section.
+ *
+ * TODO: Some of the code belongs to generic code. Move that in kexec.c.
+ */
+int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
+                                    Elf64_Shdr *sechdrs, unsigned int relsec)
+{
+       unsigned int i;
+       Elf64_Rela *rel;
+       Elf64_Sym *sym;
+       void *location;
+       Elf64_Shdr *section, *symtabsec;
+       unsigned long address, sec_base, value;
+       const char *strtab, *name, *shstrtab;
+
+       /*
+        * ->sh_offset has been modified to keep the pointer to section
+        * contents in memory
+        */
+       rel = (void *)sechdrs[relsec].sh_offset;
+
+       /* Section to which relocations apply */
+       section = &sechdrs[sechdrs[relsec].sh_info];
+
+       pr_debug("Applying relocate section %u to %u\n", relsec,
+                sechdrs[relsec].sh_info);
+
+       /* Associated symbol table */
+       symtabsec = &sechdrs[sechdrs[relsec].sh_link];
+
+       /* String table */
+       if (symtabsec->sh_link >= ehdr->e_shnum) {
+               /* Invalid strtab section number */
+               pr_err("Invalid string table section index %d\n",
+                      symtabsec->sh_link);
+               return -ENOEXEC;
+       }
+
+       strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
+
+       /* section header string table */
+       shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
+
+       for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+
+               /*
+                * rel[i].r_offset contains byte offset from beginning
+                * of section to the storage unit affected.
+                *
+                * This is location to update (->sh_offset). This is temporary
+                * buffer where section is currently loaded. This will finally
+                * be loaded to a different address later, pointed to by
+                * ->sh_addr. kexec takes care of moving it
+                *  (kexec_load_segment()).
+                */
+               location = (void *)(section->sh_offset + rel[i].r_offset);
+
+               /* Final address of the location */
+               address = section->sh_addr + rel[i].r_offset;
+
+               /*
+                * rel[i].r_info contains information about symbol table index
+                * w.r.t which relocation must be made and type of relocation
+                * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
+                * these respectively.
+                */
+               sym = (Elf64_Sym *)symtabsec->sh_offset +
+                               ELF64_R_SYM(rel[i].r_info);
+
+               if (sym->st_name)
+                       name = strtab + sym->st_name;
+               else
+                       name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+               pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n",
+                        name, sym->st_info, sym->st_shndx, sym->st_value,
+                        sym->st_size);
+
+               if (sym->st_shndx == SHN_UNDEF) {
+                       pr_err("Undefined symbol: %s\n", name);
+                       return -ENOEXEC;
+               }
+
+               if (sym->st_shndx == SHN_COMMON) {
+                       pr_err("symbol '%s' in common section\n", name);
+                       return -ENOEXEC;
+               }
+
+               if (sym->st_shndx == SHN_ABS)
+                       sec_base = 0;
+               else if (sym->st_shndx >= ehdr->e_shnum) {
+                       pr_err("Invalid section %d for symbol %s\n",
+                              sym->st_shndx, name);
+                       return -ENOEXEC;
+               } else
+                       sec_base = sechdrs[sym->st_shndx].sh_addr;
+
+               value = sym->st_value;
+               value += sec_base;
+               value += rel[i].r_addend;
+
+               switch (ELF64_R_TYPE(rel[i].r_info)) {
+               case R_X86_64_NONE:
+                       break;
+               case R_X86_64_64:
+                       *(u64 *)location = value;
+                       break;
+               case R_X86_64_32:
+                       *(u32 *)location = value;
+                       if (value != *(u32 *)location)
+                               goto overflow;
+                       break;
+               case R_X86_64_32S:
+                       *(s32 *)location = value;
+                       if ((s64)value != *(s32 *)location)
+                               goto overflow;
+                       break;
+               case R_X86_64_PC32:
+                       value -= (u64)address;
+                       *(u32 *)location = value;
+                       break;
+               default:
+                       pr_err("Unknown rela relocation: %llu\n",
+                              ELF64_R_TYPE(rel[i].r_info));
+                       return -ENOEXEC;
+               }
+       }
+       return 0;
+
+overflow:
+       pr_err("Overflow in relocation type %d value 0x%lx\n",
+              (int)ELF64_R_TYPE(rel[i].r_info), value);
+       return -ENOEXEC;
+}
index 1185fe7a7f47b053ba3c0fcb1b079d1614581e57..9ade5cfb5a4c26cf9d63f6f59519d50e94b5e2ae 100644 (file)
@@ -273,7 +273,7 @@ static int mmu_audit_set(const char *val, const struct kernel_param *kp)
        int ret;
        unsigned long enable;
 
-       ret = strict_strtoul(val, 10, &enable);
+       ret = kstrtoul(val, 10, &enable);
        if (ret < 0)
                return -EINVAL;
 
index ed161c6e278b06824377f0e9a982a196e92e2e1c..3968d67d366bc714231e2f4b2ba864eada762fe6 100644 (file)
@@ -1479,7 +1479,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
                return count;
        }
 
-       if (strict_strtol(optstr, 10, &input_arg) < 0) {
+       if (kstrtol(optstr, 10, &input_arg) < 0) {
                printk(KERN_DEBUG "%s is invalid\n", optstr);
                return -EINVAL;
        }
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
new file mode 100644 (file)
index 0000000..7fde9ee
--- /dev/null
@@ -0,0 +1,30 @@
+purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string.o
+
+targets += $(purgatory-y)
+PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+
+LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
+targets += purgatory.ro
+
+# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
+# in turn leaves some undefined symbols like __fentry__ in purgatory and not
+# sure how to relocate those. Like kexec-tools, use custom flags.
+
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
+
+$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+               $(call if_changed,ld)
+
+targets += kexec-purgatory.c
+
+quiet_cmd_bin2c = BIN2C   $@
+      cmd_bin2c = cat $(obj)/purgatory.ro | $(objtree)/scripts/basic/bin2c kexec_purgatory > $(obj)/kexec-purgatory.c
+
+$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
+       $(call if_changed,bin2c)
+
+
+# No loaders for 32bits yet.
+ifeq ($(CONFIG_X86_64),y)
+ obj-$(CONFIG_KEXEC)           += kexec-purgatory.o
+endif
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
new file mode 100644 (file)
index 0000000..d1a4291
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2003,2004  Eric Biederman (ebiederm@xmission.com)
+ * Copyright (C) 2014  Red Hat Inc.
+
+ * Author(s): Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This code has been taken from kexec-tools.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+       .text
+       .balign 16
+       .code64
+       .globl entry64, entry64_regs
+
+
+entry64:
+       /* Setup a gdt that should be preserved */
+       lgdt gdt(%rip)
+
+       /* load the data segments */
+       movl    $0x18, %eax     /* data segment */
+       movl    %eax, %ds
+       movl    %eax, %es
+       movl    %eax, %ss
+       movl    %eax, %fs
+       movl    %eax, %gs
+
+       /* Setup new stack */
+       leaq    stack_init(%rip), %rsp
+       pushq   $0x10 /* CS */
+       leaq    new_cs_exit(%rip), %rax
+       pushq   %rax
+       lretq
+new_cs_exit:
+
+       /* Load the registers */
+       movq    rax(%rip), %rax
+       movq    rbx(%rip), %rbx
+       movq    rcx(%rip), %rcx
+       movq    rdx(%rip), %rdx
+       movq    rsi(%rip), %rsi
+       movq    rdi(%rip), %rdi
+       movq    rsp(%rip), %rsp
+       movq    rbp(%rip), %rbp
+       movq    r8(%rip), %r8
+       movq    r9(%rip), %r9
+       movq    r10(%rip), %r10
+       movq    r11(%rip), %r11
+       movq    r12(%rip), %r12
+       movq    r13(%rip), %r13
+       movq    r14(%rip), %r14
+       movq    r15(%rip), %r15
+
+       /* Jump to the new code... */
+       jmpq    *rip(%rip)
+
+       .section ".rodata"
+       .balign 4
+entry64_regs:
+rax:   .quad 0x0
+rcx:   .quad 0x0
+rdx:   .quad 0x0
+rbx:   .quad 0x0
+rsp:   .quad 0x0
+rbp:   .quad 0x0
+rsi:   .quad 0x0
+rdi:   .quad 0x0
+r8:    .quad 0x0
+r9:    .quad 0x0
+r10:   .quad 0x0
+r11:   .quad 0x0
+r12:   .quad 0x0
+r13:   .quad 0x0
+r14:   .quad 0x0
+r15:   .quad 0x0
+rip:   .quad 0x0
+       .size entry64_regs, . - entry64_regs
+
+       /* GDT */
+       .section ".rodata"
+       .balign 16
+gdt:
+       /* 0x00 unusable segment
+        * 0x08 unused
+        * so use them as gdt ptr
+        */
+       .word gdt_end - gdt - 1
+       .quad gdt
+       .word 0, 0, 0
+
+       /* 0x10 4GB flat code segment */
+       .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
+
+       /* 0x18 4GB flat data segment */
+       .word 0xFFFF, 0x0000, 0x9200, 0x00CF
+gdt_end:
+stack: .quad   0, 0
+stack_init:
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
new file mode 100644 (file)
index 0000000..25e068b
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * purgatory: Runs between two kernels
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * Author:
+ *       Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include "sha256.h"
+#include "../boot/string.h"
+
+struct sha_region {
+       unsigned long start;
+       unsigned long len;
+};
+
+unsigned long backup_dest = 0;
+unsigned long backup_src = 0;
+unsigned long backup_sz = 0;
+
+u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+
+struct sha_region sha_regions[16] = {};
+
+/*
+ * On x86, second kernel requries first 640K of memory to boot. Copy
+ * first 640K to a backup region in reserved memory range so that second
+ * kernel can use first 640K.
+ */
+static int copy_backup_region(void)
+{
+       if (backup_dest)
+               memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
+
+       return 0;
+}
+
+int verify_sha256_digest(void)
+{
+       struct sha_region *ptr, *end;
+       u8 digest[SHA256_DIGEST_SIZE];
+       struct sha256_state sctx;
+
+       sha256_init(&sctx);
+       end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
+       for (ptr = sha_regions; ptr < end; ptr++)
+               sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
+
+       sha256_final(&sctx, digest);
+
+       if (memcmp(digest, sha256_digest, sizeof(digest)))
+               return 1;
+
+       return 0;
+}
+
+void purgatory(void)
+{
+       int ret;
+
+       ret = verify_sha256_digest();
+       if (ret) {
+               /* loop forever */
+               for (;;)
+                       ;
+       }
+       copy_backup_region();
+}
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
new file mode 100644 (file)
index 0000000..fe3c91b
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * purgatory:  setup code
+ *
+ * Copyright (C) 2003,2004  Eric Biederman (ebiederm@xmission.com)
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * This code has been taken from kexec-tools.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+       .text
+       .globl purgatory_start
+       .balign 16
+purgatory_start:
+       .code64
+
+       /* Load a gdt so I know what the segment registers are */
+       lgdt    gdt(%rip)
+
+       /* load the data segments */
+       movl    $0x18, %eax     /* data segment */
+       movl    %eax, %ds
+       movl    %eax, %es
+       movl    %eax, %ss
+       movl    %eax, %fs
+       movl    %eax, %gs
+
+       /* Setup a stack */
+       leaq    lstack_end(%rip), %rsp
+
+       /* Call the C code */
+       call purgatory
+       jmp     entry64
+
+       .section ".rodata"
+       .balign 16
+gdt:   /* 0x00 unusable segment
+        * 0x08 unused
+        * so use them as the gdt ptr
+        */
+       .word   gdt_end - gdt - 1
+       .quad   gdt
+       .word   0, 0, 0
+
+       /* 0x10 4GB flat code segment */
+       .word   0xFFFF, 0x0000, 0x9A00, 0x00AF
+
+       /* 0x18 4GB flat data segment */
+       .word   0xFFFF, 0x0000, 0x9200, 0x00CF
+gdt_end:
+
+       .bss
+       .balign 4096
+lstack:
+       .skip 4096
+lstack_end:
diff --git a/arch/x86/purgatory/sha256.c b/arch/x86/purgatory/sha256.c
new file mode 100644 (file)
index 0000000..548ca67
--- /dev/null
@@ -0,0 +1,283 @@
+/*
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/bitops.h>
+#include <asm/byteorder.h>
+#include "sha256.h"
+#include "../boot/string.h"
+
+static inline u32 Ch(u32 x, u32 y, u32 z)
+{
+       return z ^ (x & (y ^ z));
+}
+
+static inline u32 Maj(u32 x, u32 y, u32 z)
+{
+       return (x & y) | (z & (x | y));
+}
+
+#define e0(x)       (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
+#define e1(x)       (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
+#define s0(x)       (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
+#define s1(x)       (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
+
+static inline void LOAD_OP(int I, u32 *W, const u8 *input)
+{
+       W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
+}
+
+static inline void BLEND_OP(int I, u32 *W)
+{
+       W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
+}
+
+static void sha256_transform(u32 *state, const u8 *input)
+{
+       u32 a, b, c, d, e, f, g, h, t1, t2;
+       u32 W[64];
+       int i;
+
+       /* load the input */
+       for (i = 0; i < 16; i++)
+               LOAD_OP(i, W, input);
+
+       /* now blend */
+       for (i = 16; i < 64; i++)
+               BLEND_OP(i, W);
+
+       /* load the state into our registers */
+       a = state[0];  b = state[1];  c = state[2];  d = state[3];
+       e = state[4];  f = state[5];  g = state[6];  h = state[7];
+
+       /* now iterate */
+       t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
+       t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+       t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
+       t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+       t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
+       t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+       t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
+       t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+       t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
+       t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+       t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
+       t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+       t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
+       t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+       t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
+       t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+       t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
+       t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+       t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
+       t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+       t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
+       t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+       t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
+       t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+       t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
+       t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+       t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
+       t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+       t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
+       t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+       t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
+       t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+       t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
+       t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+       t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
+       t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+       t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
+       t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+       t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
+       t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+       t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
+       t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+       t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
+       t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+       t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
+       t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+       t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
+       t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+       t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
+       t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+       t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
+       t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+       t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
+       t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+       t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
+       t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+       t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
+       t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+       t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
+       t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+       t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
+       t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+       t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
+       t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+       t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
+       t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+       t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
+       t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+       t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
+       t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+       t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
+       t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+       t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
+       t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+       t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
+       t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+       t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
+       t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+       t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
+       t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+       t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
+       t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+       t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
+       t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+       t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
+       t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+       t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
+       t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+       t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
+       t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+       t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
+       t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+       t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
+       t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+       t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
+       t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+       t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
+       t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+       t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
+       t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+       t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
+       t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+       t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
+       t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+       t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
+       t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+       t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
+       t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+       t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
+       t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+       t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
+       t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+       t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
+       t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+       t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
+       t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+       t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
+       t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+       t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
+       t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+       t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
+       t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+       t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
+       t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+       t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
+       t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+       t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
+       t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+       state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+       state[4] += e; state[5] += f; state[6] += g; state[7] += h;
+
+       /* clear any sensitive info... */
+       a = b = c = d = e = f = g = h = t1 = t2 = 0;
+       memset(W, 0, 64 * sizeof(u32));
+}
+
+int sha256_init(struct sha256_state *sctx)
+{
+       sctx->state[0] = SHA256_H0;
+       sctx->state[1] = SHA256_H1;
+       sctx->state[2] = SHA256_H2;
+       sctx->state[3] = SHA256_H3;
+       sctx->state[4] = SHA256_H4;
+       sctx->state[5] = SHA256_H5;
+       sctx->state[6] = SHA256_H6;
+       sctx->state[7] = SHA256_H7;
+       sctx->count = 0;
+
+       return 0;
+}
+
+int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+{
+       unsigned int partial, done;
+       const u8 *src;
+
+       partial = sctx->count & 0x3f;
+       sctx->count += len;
+       done = 0;
+       src = data;
+
+       if ((partial + len) > 63) {
+               if (partial) {
+                       done = -partial;
+                       memcpy(sctx->buf + partial, data, done + 64);
+                       src = sctx->buf;
+               }
+
+               do {
+                       sha256_transform(sctx->state, src);
+                       done += 64;
+                       src = data + done;
+               } while (done + 63 < len);
+
+               partial = 0;
+       }
+       memcpy(sctx->buf + partial, src, len - done);
+
+       return 0;
+}
+
+int sha256_final(struct sha256_state *sctx, u8 *out)
+{
+       __be32 *dst = (__be32 *)out;
+       __be64 bits;
+       unsigned int index, pad_len;
+       int i;
+       static const u8 padding[64] = { 0x80, };
+
+       /* Save number of bits */
+       bits = cpu_to_be64(sctx->count << 3);
+
+       /* Pad out to 56 mod 64. */
+       index = sctx->count & 0x3f;
+       pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+       sha256_update(sctx, padding, pad_len);
+
+       /* Append length (before padding) */
+       sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+       /* Store state in digest */
+       for (i = 0; i < 8; i++)
+               dst[i] = cpu_to_be32(sctx->state[i]);
+
+       /* Zeroize sensitive information. */
+       memset(sctx, 0, sizeof(*sctx));
+
+       return 0;
+}
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
new file mode 100644 (file)
index 0000000..bd15a41
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ *  Copyright (C) 2014 Red Hat Inc.
+ *
+ *  Author: Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#ifndef SHA256_H
+#define SHA256_H
+
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+extern int sha256_init(struct sha256_state *sctx);
+extern int sha256_update(struct sha256_state *sctx, const u8 *input,
+                               unsigned int length);
+extern int sha256_final(struct sha256_state *sctx, u8 *hash);
+
+#endif /* SHA256_H */
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
new file mode 100644 (file)
index 0000000..3cefba1
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * purgatory:  stack
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+       /* A stack for the loaded kernel.
+        * Seperate and in the data section so it can be prepopulated.
+        */
+       .data
+       .balign 4096
+       .globl stack, stack_end
+
+stack:
+       .skip 4096
+stack_end:
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
new file mode 100644 (file)
index 0000000..d886b1f
--- /dev/null
@@ -0,0 +1,13 @@
+/*
+ * Simple string functions.
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * Author:
+ *       Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include "../boot/string.c"
index d1b4a119d4a5aa22c23c7722de632f921299d988..028b78168d85ac51d70c6b750d59fc23ae7cc028 100644 (file)
 353    i386    renameat2               sys_renameat2
 354    i386    seccomp                 sys_seccomp
 355    i386    getrandom               sys_getrandom
+356    i386    memfd_create            sys_memfd_create
index 252c804bb1aa7e2d74502d6a67fe67c8d06ae402..35dd922727b9a577a5de0108dd5f9d79cce4b9c1 100644 (file)
 316    common  renameat2               sys_renameat2
 317    common  seccomp                 sys_seccomp
 318    common  getrandom               sys_getrandom
+319    common  memfd_create            sys_memfd_create
+320    common  kexec_file_load         sys_kexec_file_load
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
index 0feee2fd5077bbd90c9c6e914d1b386f12482120..25a1022dd793c52898cdeaee27aceb601d10f39d 100644 (file)
@@ -216,6 +216,5 @@ extern long elf_aux_hwcap;
 #define ELF_HWCAP (elf_aux_hwcap)
 
 #define SET_PERSONALITY(ex) do ; while(0)
-#define __HAVE_ARCH_GATE_AREA 1
 
 #endif
index c6492e75797b7bbceca4437810a59f6eb892a405..f8fecaddcc0df94a3b723f01018e09063ed7c632 100644 (file)
@@ -9,18 +9,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 
        return NULL;
 }
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-       return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-       return 0;
-}
index e4f7781ee16280a1606349262dcb14b055f5599b..e904c270573bf58054d5fd83f2b9c77b0ee81347 100644 (file)
@@ -115,23 +115,6 @@ static __init int ia32_binfmt_init(void)
        return 0;
 }
 __initcall(ia32_binfmt_init);
-#endif
-
-#else  /* CONFIG_X86_32 */
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-       return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-       return 0;
-}
+#endif /* CONFIG_SYSCTL */
 
 #endif /* CONFIG_X86_64 */
index 06b62e5cdcc72a93281051a7c07b5090be9abeaa..c9ee681d57fdf5dd6fb39081d4af3ad02be39d97 100644 (file)
@@ -168,7 +168,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
        }
 
        ret = req->avail_out - stream->avail_out;
-       pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+       pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
                 stream->avail_in, stream->avail_out,
                 req->avail_in - stream->avail_in, ret);
        req->next_in = stream->next_in;
@@ -198,7 +198,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
        }
 
        ret = req->avail_out - stream->avail_out;
-       pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+       pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
                 stream->avail_in, stream->avail_out,
                 req->avail_in - stream->avail_in, ret);
        req->next_in = stream->next_in;
@@ -283,7 +283,7 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
        }
 
        ret = req->avail_out - stream->avail_out;
-       pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+       pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
                 stream->avail_in, stream->avail_out,
                 req->avail_in - stream->avail_in, ret);
        req->next_in = stream->next_in;
@@ -331,7 +331,7 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
        }
 
        ret = req->avail_out - stream->avail_out;
-       pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+       pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
                 stream->avail_in, stream->avail_out,
                 req->avail_in - stream->avail_in, ret);
        req->next_in = stream->next_in;
index aa6be2698669652ada3e9bbebf3ad597831b24ec..c39702bc279d44f747ae24ae07958d2899ac9bcd 100644 (file)
@@ -533,14 +533,13 @@ static void he_init_tx_lbfp(struct he_dev *he_dev)
 
 static int he_init_tpdrq(struct he_dev *he_dev)
 {
-       he_dev->tpdrq_base = pci_alloc_consistent(he_dev->pci_dev,
-               CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq), &he_dev->tpdrq_phys);
+       he_dev->tpdrq_base = pci_zalloc_consistent(he_dev->pci_dev,
+                                                  CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq),
+                                                  &he_dev->tpdrq_phys);
        if (he_dev->tpdrq_base == NULL) {
                hprintk("failed to alloc tpdrq\n");
                return -ENOMEM;
        }
-       memset(he_dev->tpdrq_base, 0,
-                               CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq));
 
        he_dev->tpdrq_tail = he_dev->tpdrq_base;
        he_dev->tpdrq_head = he_dev->tpdrq_base;
@@ -804,13 +803,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
                goto out_free_rbpl_virt;
        }
 
-       he_dev->rbpl_base = pci_alloc_consistent(he_dev->pci_dev,
-               CONFIG_RBPL_SIZE * sizeof(struct he_rbp), &he_dev->rbpl_phys);
+       he_dev->rbpl_base = pci_zalloc_consistent(he_dev->pci_dev,
+                                                 CONFIG_RBPL_SIZE * sizeof(struct he_rbp),
+                                                 &he_dev->rbpl_phys);
        if (he_dev->rbpl_base == NULL) {
                hprintk("failed to alloc rbpl_base\n");
                goto out_destroy_rbpl_pool;
        }
-       memset(he_dev->rbpl_base, 0, CONFIG_RBPL_SIZE * sizeof(struct he_rbp));
 
        INIT_LIST_HEAD(&he_dev->rbpl_outstanding);
 
@@ -843,13 +842,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
 
        /* rx buffer ready queue */
 
-       he_dev->rbrq_base = pci_alloc_consistent(he_dev->pci_dev,
-               CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), &he_dev->rbrq_phys);
+       he_dev->rbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
+                                                 CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
+                                                 &he_dev->rbrq_phys);
        if (he_dev->rbrq_base == NULL) {
                hprintk("failed to allocate rbrq\n");
                goto out_free_rbpl;
        }
-       memset(he_dev->rbrq_base, 0, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq));
 
        he_dev->rbrq_head = he_dev->rbrq_base;
        he_writel(he_dev, he_dev->rbrq_phys, G0_RBRQ_ST + (group * 16));
@@ -867,13 +866,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
 
        /* tx buffer ready queue */
 
-       he_dev->tbrq_base = pci_alloc_consistent(he_dev->pci_dev,
-               CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), &he_dev->tbrq_phys);
+       he_dev->tbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
+                                                 CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+                                                 &he_dev->tbrq_phys);
        if (he_dev->tbrq_base == NULL) {
                hprintk("failed to allocate tbrq\n");
                goto out_free_rbpq_base;
        }
-       memset(he_dev->tbrq_base, 0, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq));
 
        he_dev->tbrq_head = he_dev->tbrq_base;
 
@@ -1460,13 +1459,13 @@ static int he_start(struct atm_dev *dev)
 
        /* host status page */
 
-       he_dev->hsp = pci_alloc_consistent(he_dev->pci_dev,
-                               sizeof(struct he_hsp), &he_dev->hsp_phys);
+       he_dev->hsp = pci_zalloc_consistent(he_dev->pci_dev,
+                                           sizeof(struct he_hsp),
+                                           &he_dev->hsp_phys);
        if (he_dev->hsp == NULL) {
                hprintk("failed to allocate host status page\n");
                return -ENOMEM;
        }
-       memset(he_dev->hsp, 0, sizeof(struct he_hsp));
        he_writel(he_dev, he_dev->hsp_phys, HSP_BA);
 
        /* initialize framer */
index b621f56a36be5850b1abc4d1619b665fbce7ccf3..2b24ed0567281fe46f94e305b75859bfbd91ea75 100644 (file)
@@ -641,13 +641,11 @@ alloc_scq(struct idt77252_dev *card, int class)
        scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL);
        if (!scq)
                return NULL;
-       scq->base = pci_alloc_consistent(card->pcidev, SCQ_SIZE,
-                                        &scq->paddr);
+       scq->base = pci_zalloc_consistent(card->pcidev, SCQ_SIZE, &scq->paddr);
        if (scq->base == NULL) {
                kfree(scq);
                return NULL;
        }
-       memset(scq->base, 0, SCQ_SIZE);
 
        scq->next = scq->base;
        scq->last = scq->base + (SCQ_ENTRIES - 1);
@@ -972,13 +970,12 @@ init_rsq(struct idt77252_dev *card)
 {
        struct rsq_entry *rsqe;
 
-       card->rsq.base = pci_alloc_consistent(card->pcidev, RSQSIZE,
-                                             &card->rsq.paddr);
+       card->rsq.base = pci_zalloc_consistent(card->pcidev, RSQSIZE,
+                                              &card->rsq.paddr);
        if (card->rsq.base == NULL) {
                printk("%s: can't allocate RSQ.\n", card->name);
                return -1;
        }
-       memset(card->rsq.base, 0, RSQSIZE);
 
        card->rsq.last = card->rsq.base + RSQ_NUM_ENTRIES - 1;
        card->rsq.next = card->rsq.last;
@@ -3400,14 +3397,14 @@ static int init_card(struct atm_dev *dev)
        writel(0, SAR_REG_GP);
 
        /* Initialize RAW Cell Handle Register  */
-       card->raw_cell_hnd = pci_alloc_consistent(card->pcidev, 2 * sizeof(u32),
-                                                 &card->raw_cell_paddr);
+       card->raw_cell_hnd = pci_zalloc_consistent(card->pcidev,
+                                                  2 * sizeof(u32),
+                                                  &card->raw_cell_paddr);
        if (!card->raw_cell_hnd) {
                printk("%s: memory allocation failure.\n", card->name);
                deinit_card(card);
                return -1;
        }
-       memset(card->raw_cell_hnd, 0, 2 * sizeof(u32));
        writel(card->raw_cell_paddr, SAR_REG_RAWHND);
        IPRINTK("%s: raw cell handle is at 0x%p.\n", card->name,
                card->raw_cell_hnd);
index 125d8450573833fb3f71dee536fa376b4315f07c..811e11c82f32907a98a02a442ecb76cb1da471fb 100644 (file)
@@ -6741,11 +6741,11 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
        ErrorCode = -ENOMEM;
        if (DataTransferLength > 0)
          {
-           DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
-                               DataTransferLength, &DataTransferBufferDMA);
+           DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice,
+                                                       DataTransferLength,
+                                                       &DataTransferBufferDMA);
            if (DataTransferBuffer == NULL)
                break;
-           memset(DataTransferBuffer, 0, DataTransferLength);
          }
        else if (DataTransferLength < 0)
          {
@@ -6877,11 +6877,11 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
        ErrorCode = -ENOMEM;
        if (DataTransferLength > 0)
          {
-           DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
-                               DataTransferLength, &DataTransferBufferDMA);
+           DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice,
+                                                       DataTransferLength,
+                                                       &DataTransferBufferDMA);
            if (DataTransferBuffer == NULL)
                break;
-           memset(DataTransferBuffer, 0, DataTransferLength);
          }
        else if (DataTransferLength < 0)
          {
@@ -6899,14 +6899,14 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
        RequestSenseLength = UserCommand.RequestSenseLength;
        if (RequestSenseLength > 0)
          {
-           RequestSenseBuffer = pci_alloc_consistent(Controller->PCIDevice,
-                       RequestSenseLength, &RequestSenseBufferDMA);
+           RequestSenseBuffer = pci_zalloc_consistent(Controller->PCIDevice,
+                                                       RequestSenseLength,
+                                                       &RequestSenseBufferDMA);
            if (RequestSenseBuffer == NULL)
              {
                ErrorCode = -ENOMEM;
                goto Failure2;
              }
-           memset(RequestSenseBuffer, 0, RequestSenseLength);
          }
        spin_lock_irqsave(&Controller->queue_lock, flags);
        while ((Command = DAC960_AllocateCommand(Controller)) == NULL)
index 4595c22f33f71e5b8305bb649b0df461658bb98f..ff20f192b0f67a77fc9e38092c457429e215f8a7 100644 (file)
@@ -1014,24 +1014,21 @@ static CommandList_struct *cmd_special_alloc(ctlr_info_t *h)
        u64bit temp64;
        dma_addr_t cmd_dma_handle, err_dma_handle;
 
-       c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
-               sizeof(CommandList_struct), &cmd_dma_handle);
+       c = pci_zalloc_consistent(h->pdev, sizeof(CommandList_struct),
+                                 &cmd_dma_handle);
        if (c == NULL)
                return NULL;
-       memset(c, 0, sizeof(CommandList_struct));
 
        c->cmdindex = -1;
 
-       c->err_info = (ErrorInfo_struct *)
-           pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
-                   &err_dma_handle);
+       c->err_info = pci_zalloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
+                                           &err_dma_handle);
 
        if (c->err_info == NULL) {
                pci_free_consistent(h->pdev,
                        sizeof(CommandList_struct), c, cmd_dma_handle);
                return NULL;
        }
-       memset(c->err_info, 0, sizeof(ErrorInfo_struct));
 
        INIT_LIST_HEAD(&c->list);
        c->busaddr = (__u32) cmd_dma_handle;
index 608532d3f8c910111093c2125e3aa385dbf2e604..f0a089df85cc22909a9104b26a83e920e48b0dc4 100644 (file)
@@ -4112,16 +4112,14 @@ static int skd_cons_skcomp(struct skd_device *skdev)
                 skdev->name, __func__, __LINE__,
                 nbytes, SKD_N_COMPLETION_ENTRY);
 
-       skcomp = pci_alloc_consistent(skdev->pdev, nbytes,
-                                     &skdev->cq_dma_address);
+       skcomp = pci_zalloc_consistent(skdev->pdev, nbytes,
+                                      &skdev->cq_dma_address);
 
        if (skcomp == NULL) {
                rc = -ENOMEM;
                goto err_out;
        }
 
-       memset(skcomp, 0, nbytes);
-
        skdev->skcomp_table = skcomp;
        skdev->skerr_table = (struct fit_comp_error_info *)((char *)skcomp +
                                                           sizeof(*skcomp) *
@@ -4304,15 +4302,14 @@ static int skd_cons_skspcl(struct skd_device *skdev)
 
                nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
 
-               skspcl->msg_buf = pci_alloc_consistent(skdev->pdev, nbytes,
-                                                      &skspcl->mb_dma_address);
+               skspcl->msg_buf =
+                       pci_zalloc_consistent(skdev->pdev, nbytes,
+                                             &skspcl->mb_dma_address);
                if (skspcl->msg_buf == NULL) {
                        rc = -ENOMEM;
                        goto err_out;
                }
 
-               memset(skspcl->msg_buf, 0, nbytes);
-
                skspcl->req.sg = kzalloc(sizeof(struct scatterlist) *
                                         SKD_N_SG_PER_SPECIAL, GFP_KERNEL);
                if (skspcl->req.sg == NULL) {
@@ -4353,25 +4350,21 @@ static int skd_cons_sksb(struct skd_device *skdev)
 
        nbytes = SKD_N_INTERNAL_BYTES;
 
-       skspcl->data_buf = pci_alloc_consistent(skdev->pdev, nbytes,
-                                               &skspcl->db_dma_address);
+       skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
+                                                &skspcl->db_dma_address);
        if (skspcl->data_buf == NULL) {
                rc = -ENOMEM;
                goto err_out;
        }
 
-       memset(skspcl->data_buf, 0, nbytes);
-
        nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
-       skspcl->msg_buf = pci_alloc_consistent(skdev->pdev, nbytes,
-                                              &skspcl->mb_dma_address);
+       skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
+                                               &skspcl->mb_dma_address);
        if (skspcl->msg_buf == NULL) {
                rc = -ENOMEM;
                goto err_out;
        }
 
-       memset(skspcl->msg_buf, 0, nbytes);
-
        skspcl->req.sksg_list = skd_cons_sg_list(skdev, 1,
                                                 &skspcl->req.sksg_dma_address);
        if (skspcl->req.sksg_list == NULL) {
index 12fea3e223481fcb19485a8a3d6220050a7e3587..8d2a7728434d05cd06250e2c6fb2d74a3336bc88 100644 (file)
@@ -2617,14 +2617,13 @@ static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                }
        }
 
-       dev->desc_virt = pci_alloc_consistent(pdev, sizeof(struct hifn_dma),
-                       &dev->desc_dma);
+       dev->desc_virt = pci_zalloc_consistent(pdev, sizeof(struct hifn_dma),
+                                              &dev->desc_dma);
        if (!dev->desc_virt) {
                dprintk("Failed to allocate descriptor rings.\n");
                err = -ENOMEM;
                goto err_out_unmap_bars;
        }
-       memset(dev->desc_virt, 0, sizeof(struct hifn_dma));
 
        dev->pdev = pdev;
        dev->irq = pdev->irq;
index 97cdd16a2169a17e741323cca3adc634c26c966e..018c29a2661553004fc5639a71ef5af9de9849b2 100644 (file)
@@ -138,6 +138,27 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
        return entry;
 }
 
+int efi_get_runtime_map_size(void)
+{
+       return nr_efi_runtime_map * efi_memdesc_size;
+}
+
+int efi_get_runtime_map_desc_size(void)
+{
+       return efi_memdesc_size;
+}
+
+int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+       size_t sz = efi_get_runtime_map_size();
+
+       if (sz > bufsz)
+               sz = bufsz;
+
+       memcpy(buf, efi_runtime_map, sz);
+       return 0;
+}
+
 void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
 {
        efi_runtime_map = map;
index e88bac1d781f7b5a34051d0fcb138f2efc11dc0d..bae897de9468952f882df012da4f055d53edf892 100644 (file)
@@ -393,15 +393,14 @@ static int i810_dma_initialize(struct drm_device *dev,
 
        /* Program Hardware Status Page */
        dev_priv->hw_status_page =
-           pci_alloc_consistent(dev->pdev, PAGE_SIZE,
-                                &dev_priv->dma_status_page);
+               pci_zalloc_consistent(dev->pdev, PAGE_SIZE,
+                                     &dev_priv->dma_status_page);
        if (!dev_priv->hw_status_page) {
                dev->dev_private = (void *)dev_priv;
                i810_dma_cleanup(dev);
                DRM_ERROR("Can not allocate hardware status page\n");
                return -ENOMEM;
        }
-       memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
        DRM_DEBUG("hw status page @ %p\n", dev_priv->hw_status_page);
 
        I810_WRITE(0x02080, dev_priv->dma_status_page);
index 00400c352c1a9ec33e7c291bcb997049686816c5..766a71ccefed20975fd9c6fc264f2ab69d071170 100644 (file)
@@ -604,16 +604,14 @@ static int c2_up(struct net_device *netdev)
        tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
 
        c2_port->mem_size = tx_size + rx_size;
-       c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
-                                           &c2_port->dma);
+       c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
+                                            &c2_port->dma);
        if (c2_port->mem == NULL) {
                pr_debug("Unable to allocate memory for "
                        "host descriptor rings\n");
                return -ENOMEM;
        }
 
-       memset(c2_port->mem, 0, c2_port->mem_size);
-
        /* Create the Rx host descriptor ring */
        if ((ret =
             c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
index 90200245c5ebfd5fe71000d17b78e52bfa8d4734..02120d340d50cffa62fa87bb31e4e22502d5e547 100644 (file)
@@ -1003,13 +1003,13 @@ int nes_init_cqp(struct nes_device *nesdev)
                        (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
                        sizeof(struct nes_hw_cqp_qp_context);
 
-       nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
-                       &nesdev->cqp_pbase);
+       nesdev->cqp_vbase = pci_zalloc_consistent(nesdev->pcidev,
+                                                 nesdev->cqp_mem_size,
+                                                 &nesdev->cqp_pbase);
        if (!nesdev->cqp_vbase) {
                nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n");
                return -ENOMEM;
        }
-       memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size);
 
        /* Allocate a twice the number of CQP requests as the SQ size */
        nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
@@ -1691,13 +1691,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
                        (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) +
                        sizeof(struct nes_hw_nic_qp_context);
 
-       nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size,
-                       &nesvnic->nic_pbase);
+       nesvnic->nic_vbase = pci_zalloc_consistent(nesdev->pcidev,
+                                                  nesvnic->nic_mem_size,
+                                                  &nesvnic->nic_pbase);
        if (!nesvnic->nic_vbase) {
                nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n");
                return -ENOMEM;
        }
-       memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size);
        nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n",
                        nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size);
 
index 218dd35742851f14f5381dc734b245a599ad86e4..fef067c959fcf23c9455f96c17e8d734d6a894f6 100644 (file)
@@ -1616,8 +1616,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
                                entries, nescq->cq_mem_size, nescq->hw_cq.cq_number);
 
                /* allocate the physical buffer space */
-               mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
-                               &nescq->hw_cq.cq_pbase);
+               mem = pci_zalloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
+                                           &nescq->hw_cq.cq_pbase);
                if (!mem) {
                        printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n");
                        nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
@@ -1625,7 +1625,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
                        return ERR_PTR(-ENOMEM);
                }
 
-               memset(mem, 0, nescq->cq_mem_size);
                nescq->hw_cq.cq_vbase = mem;
                nescq->hw_cq.cq_head = 0;
                nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n",
index 34b0d0ddeef3f4003928487420e22aac5fb39d73..97afee672d07e228a350d63c88b148234e5032cb 100644 (file)
@@ -421,23 +421,20 @@ static int saa7146_init_one(struct pci_dev *pci, const struct pci_device_id *ent
        err = -ENOMEM;
 
        /* get memory for various stuff */
-       dev->d_rps0.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM,
-                                                   &dev->d_rps0.dma_handle);
+       dev->d_rps0.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
+                                                    &dev->d_rps0.dma_handle);
        if (!dev->d_rps0.cpu_addr)
                goto err_free_irq;
-       memset(dev->d_rps0.cpu_addr, 0x0, SAA7146_RPS_MEM);
 
-       dev->d_rps1.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM,
-                                                   &dev->d_rps1.dma_handle);
+       dev->d_rps1.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
+                                                    &dev->d_rps1.dma_handle);
        if (!dev->d_rps1.cpu_addr)
                goto err_free_rps0;
-       memset(dev->d_rps1.cpu_addr, 0x0, SAA7146_RPS_MEM);
 
-       dev->d_i2c.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM,
-                                                  &dev->d_i2c.dma_handle);
+       dev->d_i2c.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
+                                                   &dev->d_i2c.dma_handle);
        if (!dev->d_i2c.cpu_addr)
                goto err_free_rps1;
-       memset(dev->d_i2c.cpu_addr, 0x0, SAA7146_RPS_MEM);
 
        /* the rest + print status message */
 
index d9e1d6395ed9499f2df0e681bfa04b46b3d1adfe..6c47f3fe9b0fc2ba2f8eb9263a2f0e9d0396d870 100644 (file)
@@ -520,14 +520,15 @@ int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv)
           configuration data) */
        dev->ext_vv_data = ext_vv;
 
-       vv->d_clipping.cpu_addr = pci_alloc_consistent(dev->pci, SAA7146_CLIPPING_MEM, &vv->d_clipping.dma_handle);
+       vv->d_clipping.cpu_addr =
+               pci_zalloc_consistent(dev->pci, SAA7146_CLIPPING_MEM,
+                                     &vv->d_clipping.dma_handle);
        if( NULL == vv->d_clipping.cpu_addr ) {
                ERR("out of memory. aborting.\n");
                kfree(vv);
                v4l2_ctrl_handler_free(hdl);
                return -1;
        }
-       memset(vv->d_clipping.cpu_addr, 0x0, SAA7146_CLIPPING_MEM);
 
        saa7146_video_uops.init(dev,vv);
        if (dev->ext_vv_data->capabilities & V4L2_CAP_VBI_CAPTURE)
index d0c281f41a0aa98f368ec37fed1748e31284cb62..11765835d7b2977ec3317863f05f2560b25eff1c 100644 (file)
@@ -101,28 +101,20 @@ static int bt878_mem_alloc(struct bt878 *bt)
        if (!bt->buf_cpu) {
                bt->buf_size = 128 * 1024;
 
-               bt->buf_cpu =
-                   pci_alloc_consistent(bt->dev, bt->buf_size,
-                                        &bt->buf_dma);
-
+               bt->buf_cpu = pci_zalloc_consistent(bt->dev, bt->buf_size,
+                                                   &bt->buf_dma);
                if (!bt->buf_cpu)
                        return -ENOMEM;
-
-               memset(bt->buf_cpu, 0, bt->buf_size);
        }
 
        if (!bt->risc_cpu) {
                bt->risc_size = PAGE_SIZE;
-               bt->risc_cpu =
-                   pci_alloc_consistent(bt->dev, bt->risc_size,
-                                        &bt->risc_dma);
-
+               bt->risc_cpu = pci_zalloc_consistent(bt->dev, bt->risc_size,
+                                                    &bt->risc_dma);
                if (!bt->risc_cpu) {
                        bt878_mem_free(bt);
                        return -ENOMEM;
                }
-
-               memset(bt->risc_cpu, 0, bt->risc_size);
        }
 
        return 0;
index 826228c3800ec3747e7a3cb11537f26751bea073..4930b55fd5f4f855399ebb6c828daece0c579ce8 100644 (file)
@@ -1075,12 +1075,11 @@ static int AllocCommonBuffers(struct ngene *dev)
        dev->ngenetohost = dev->FWInterfaceBuffer + 256;
        dev->EventBuffer = dev->FWInterfaceBuffer + 512;
 
-       dev->OverflowBuffer = pci_alloc_consistent(dev->pci_dev,
-                                                  OVERFLOW_BUFFER_SIZE,
-                                                  &dev->PAOverflowBuffer);
+       dev->OverflowBuffer = pci_zalloc_consistent(dev->pci_dev,
+                                                   OVERFLOW_BUFFER_SIZE,
+                                                   &dev->PAOverflowBuffer);
        if (!dev->OverflowBuffer)
                return -ENOMEM;
-       memset(dev->OverflowBuffer, 0, OVERFLOW_BUFFER_SIZE);
 
        for (i = STREAM_VIDEOIN1; i < MAX_STREAM; i++) {
                int type = dev->card_info->io_type[i];
index f166ffc9800a3b069c8792c387d7e840608d4720..cef7a00099ea8cfe6ec6501fe0a18ecb03280c52 100644 (file)
@@ -803,11 +803,9 @@ static int ttusb_alloc_iso_urbs(struct ttusb *ttusb)
 {
        int i;
 
-       ttusb->iso_buffer = pci_alloc_consistent(NULL,
-                                                ISO_FRAME_SIZE *
-                                                FRAMES_PER_ISO_BUF *
-                                                ISO_BUF_COUNT,
-                                                &ttusb->iso_dma_handle);
+       ttusb->iso_buffer = pci_zalloc_consistent(NULL,
+                                                 ISO_FRAME_SIZE * FRAMES_PER_ISO_BUF * ISO_BUF_COUNT,
+                                                 &ttusb->iso_dma_handle);
 
        if (!ttusb->iso_buffer) {
                dprintk("%s: pci_alloc_consistent - not enough memory\n",
@@ -815,9 +813,6 @@ static int ttusb_alloc_iso_urbs(struct ttusb *ttusb)
                return -ENOMEM;
        }
 
-       memset(ttusb->iso_buffer, 0,
-              ISO_FRAME_SIZE * FRAMES_PER_ISO_BUF * ISO_BUF_COUNT);
-
        for (i = 0; i < ISO_BUF_COUNT; i++) {
                struct urb *urb;
 
index 29724af9b9ab11fbb36c1871bacada0fde62409c..15ab584cf265615518489a091e4ab243095e7c22 100644 (file)
@@ -1151,11 +1151,9 @@ static int ttusb_dec_alloc_iso_urbs(struct ttusb_dec *dec)
 
        dprintk("%s\n", __func__);
 
-       dec->iso_buffer = pci_alloc_consistent(NULL,
-                                              ISO_FRAME_SIZE *
-                                              (FRAMES_PER_ISO_BUF *
-                                               ISO_BUF_COUNT),
-                                              &dec->iso_dma_handle);
+       dec->iso_buffer = pci_zalloc_consistent(NULL,
+                                               ISO_FRAME_SIZE * (FRAMES_PER_ISO_BUF * ISO_BUF_COUNT),
+                                               &dec->iso_dma_handle);
 
        if (!dec->iso_buffer) {
                dprintk("%s: pci_alloc_consistent - not enough memory\n",
@@ -1163,9 +1161,6 @@ static int ttusb_dec_alloc_iso_urbs(struct ttusb_dec *dec)
                return -ENOMEM;
        }
 
-       memset(dec->iso_buffer, 0,
-              ISO_FRAME_SIZE * (FRAMES_PER_ISO_BUF * ISO_BUF_COUNT));
-
        for (i = 0; i < ISO_BUF_COUNT; i++) {
                struct urb *urb;
 
index e7cc9174e364162c68652e8c083e1d66fd5eefa0..4a8fdc4721d5b0efe8d9e2bef8794508cac8391a 100644 (file)
@@ -481,37 +481,32 @@ static void pcnet32_realloc_tx_ring(struct net_device *dev,
        dma_addr_t *new_dma_addr_list;
        struct pcnet32_tx_head *new_tx_ring;
        struct sk_buff **new_skb_list;
+       unsigned int entries = BIT(size);
 
        pcnet32_purge_tx_ring(dev);
 
-       new_tx_ring = pci_alloc_consistent(lp->pci_dev,
-                                          sizeof(struct pcnet32_tx_head) *
-                                          (1 << size),
-                                          &new_ring_dma_addr);
-       if (new_tx_ring == NULL) {
-               netif_err(lp, drv, dev, "Consistent memory allocation failed\n");
+       new_tx_ring =
+               pci_zalloc_consistent(lp->pci_dev,
+                                     sizeof(struct pcnet32_tx_head) * entries,
+                                     &new_ring_dma_addr);
+       if (new_tx_ring == NULL)
                return;
-       }
-       memset(new_tx_ring, 0, sizeof(struct pcnet32_tx_head) * (1 << size));
 
-       new_dma_addr_list = kcalloc(1 << size, sizeof(dma_addr_t),
-                                   GFP_ATOMIC);
+       new_dma_addr_list = kcalloc(entries, sizeof(dma_addr_t), GFP_ATOMIC);
        if (!new_dma_addr_list)
                goto free_new_tx_ring;
 
-       new_skb_list = kcalloc(1 << size, sizeof(struct sk_buff *),
-                              GFP_ATOMIC);
+       new_skb_list = kcalloc(entries, sizeof(struct sk_buff *), GFP_ATOMIC);
        if (!new_skb_list)
                goto free_new_lists;
 
        kfree(lp->tx_skbuff);
        kfree(lp->tx_dma_addr);
        pci_free_consistent(lp->pci_dev,
-                           sizeof(struct pcnet32_tx_head) *
-                           lp->tx_ring_size, lp->tx_ring,
-                           lp->tx_ring_dma_addr);
+                           sizeof(struct pcnet32_tx_head) * lp->tx_ring_size,
+                           lp->tx_ring, lp->tx_ring_dma_addr);
 
-       lp->tx_ring_size = (1 << size);
+       lp->tx_ring_size = entries;
        lp->tx_mod_mask = lp->tx_ring_size - 1;
        lp->tx_len_bits = (size << 12);
        lp->tx_ring = new_tx_ring;
@@ -524,8 +519,7 @@ free_new_lists:
        kfree(new_dma_addr_list);
 free_new_tx_ring:
        pci_free_consistent(lp->pci_dev,
-                           sizeof(struct pcnet32_tx_head) *
-                           (1 << size),
+                           sizeof(struct pcnet32_tx_head) * entries,
                            new_tx_ring,
                            new_ring_dma_addr);
 }
@@ -549,17 +543,14 @@ static void pcnet32_realloc_rx_ring(struct net_device *dev,
        struct pcnet32_rx_head *new_rx_ring;
        struct sk_buff **new_skb_list;
        int new, overlap;
-       unsigned int entries = 1 << size;
+       unsigned int entries = BIT(size);
 
-       new_rx_ring = pci_alloc_consistent(lp->pci_dev,
-                                          sizeof(struct pcnet32_rx_head) *
-                                          entries,
-                                          &new_ring_dma_addr);
-       if (new_rx_ring == NULL) {
-               netif_err(lp, drv, dev, "Consistent memory allocation failed\n");
+       new_rx_ring =
+               pci_zalloc_consistent(lp->pci_dev,
+                                     sizeof(struct pcnet32_rx_head) * entries,
+                                     &new_ring_dma_addr);
+       if (new_rx_ring == NULL)
                return;
-       }
-       memset(new_rx_ring, 0, sizeof(struct pcnet32_rx_head) * entries);
 
        new_dma_addr_list = kcalloc(entries, sizeof(dma_addr_t), GFP_ATOMIC);
        if (!new_dma_addr_list)
index 4345332533adb5f9108fd7d9a1fbc7a138109a3c..316e0c3fe048cfbad5ee4e3441e7954eb01440ee 100644 (file)
@@ -831,17 +831,14 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter)
        /* real ring DMA buffer */
 
        size = adapter->ring_size;
-       adapter->ring_vir_addr = pci_alloc_consistent(pdev,
-                       adapter->ring_size, &adapter->ring_dma);
-
+       adapter->ring_vir_addr = pci_zalloc_consistent(pdev, adapter->ring_size,
+                                                      &adapter->ring_dma);
        if (adapter->ring_vir_addr == NULL) {
                netdev_err(adapter->netdev,
                           "pci_alloc_consistent failed, size = D%d\n", size);
                return -ENOMEM;
        }
 
-       memset(adapter->ring_vir_addr, 0, adapter->ring_size);
-
        rx_page_desc = rx_ring->rx_page_desc;
 
        /* Init TPD Ring */
index 5abc496bcf29e64d3106b6451739dbdda6a4e7cf..37472ce4fac310a933a1f332a5df451a1cdbcc5d 100644 (file)
@@ -432,14 +432,12 @@ int vnic_dev_fw_info(struct vnic_dev *vdev,
        int err = 0;
 
        if (!vdev->fw_info) {
-               vdev->fw_info = pci_alloc_consistent(vdev->pdev,
-                       sizeof(struct vnic_devcmd_fw_info),
-                       &vdev->fw_info_pa);
+               vdev->fw_info = pci_zalloc_consistent(vdev->pdev,
+                                                     sizeof(struct vnic_devcmd_fw_info),
+                                                     &vdev->fw_info_pa);
                if (!vdev->fw_info)
                        return -ENOMEM;
 
-               memset(vdev->fw_info, 0, sizeof(struct vnic_devcmd_fw_info));
-
                a0 = vdev->fw_info_pa;
                a1 = sizeof(struct vnic_devcmd_fw_info);
 
index 69693384b58ccfefd2bd9918112b8b08431f0368..59915144aabb18d4d70334406214255bb9439975 100644 (file)
@@ -1622,11 +1622,10 @@ static int sky2_alloc_buffers(struct sky2_port *sky2)
        if (!sky2->tx_ring)
                goto nomem;
 
-       sky2->rx_le = pci_alloc_consistent(hw->pdev, RX_LE_BYTES,
-                                          &sky2->rx_le_map);
+       sky2->rx_le = pci_zalloc_consistent(hw->pdev, RX_LE_BYTES,
+                                           &sky2->rx_le_map);
        if (!sky2->rx_le)
                goto nomem;
-       memset(sky2->rx_le, 0, RX_LE_BYTES);
 
        sky2->rx_ring = kcalloc(sky2->rx_pending, sizeof(struct rx_ring_info),
                                GFP_KERNEL);
index 064a48d0c368a267826e2f77bacb2da9fa366e1e..cd5f106306d95e7a1b2bc3c1465da8f0d57a1e83 100644 (file)
@@ -4409,14 +4409,13 @@ static int ksz_alloc_desc(struct dev_info *adapter)
                DESC_ALIGNMENT;
 
        adapter->desc_pool.alloc_virt =
-               pci_alloc_consistent(
-                       adapter->pdev, adapter->desc_pool.alloc_size,
-                       &adapter->desc_pool.dma_addr);
+               pci_zalloc_consistent(adapter->pdev,
+                                     adapter->desc_pool.alloc_size,
+                                     &adapter->desc_pool.dma_addr);
        if (adapter->desc_pool.alloc_virt == NULL) {
                adapter->desc_pool.alloc_size = 0;
                return 1;
        }
-       memset(adapter->desc_pool.alloc_virt, 0, adapter->desc_pool.alloc_size);
 
        /* Align to the next cache line boundary. */
        offset = (((ulong) adapter->desc_pool.alloc_virt % DESC_ALIGNMENT) ?
index 6f6be57f46901a0e5eb3280c146fc6193cdc5171..b8d5270359cd8a7109226c0be1731cbafa6d1423 100644 (file)
@@ -129,14 +129,12 @@ netxen_get_minidump_template(struct netxen_adapter *adapter)
                return NX_RCODE_INVALID_ARGS;
        }
 
-       addr = pci_alloc_consistent(adapter->pdev, size, &md_template_addr);
-
+       addr = pci_zalloc_consistent(adapter->pdev, size, &md_template_addr);
        if (!addr) {
                dev_err(&adapter->pdev->dev, "Unable to allocate dmable memory for template.\n");
                return -ENOMEM;
        }
 
-       memset(addr, 0, size);
        memset(&cmd, 0, sizeof(cmd));
        memset(&cmd.rsp, 1, sizeof(struct _cdrp_cmd));
        cmd.req.cmd = NX_CDRP_CMD_GET_TEMP_HDR;
index b40050e03a56f7524e19dc7c215165d0a0c75585..d836ace52277a3a5c8a24745d8edb75c151f77b2 100644 (file)
@@ -2727,23 +2727,22 @@ static void ql_free_shadow_space(struct ql_adapter *qdev)
 static int ql_alloc_shadow_space(struct ql_adapter *qdev)
 {
        qdev->rx_ring_shadow_reg_area =
-           pci_alloc_consistent(qdev->pdev,
-                                PAGE_SIZE, &qdev->rx_ring_shadow_reg_dma);
+               pci_zalloc_consistent(qdev->pdev, PAGE_SIZE,
+                                     &qdev->rx_ring_shadow_reg_dma);
        if (qdev->rx_ring_shadow_reg_area == NULL) {
                netif_err(qdev, ifup, qdev->ndev,
                          "Allocation of RX shadow space failed.\n");
                return -ENOMEM;
        }
-       memset(qdev->rx_ring_shadow_reg_area, 0, PAGE_SIZE);
+
        qdev->tx_ring_shadow_reg_area =
-           pci_alloc_consistent(qdev->pdev, PAGE_SIZE,
-                                &qdev->tx_ring_shadow_reg_dma);
+               pci_zalloc_consistent(qdev->pdev, PAGE_SIZE,
+                                     &qdev->tx_ring_shadow_reg_dma);
        if (qdev->tx_ring_shadow_reg_area == NULL) {
                netif_err(qdev, ifup, qdev->ndev,
                          "Allocation of TX shadow space failed.\n");
                goto err_wqp_sh_area;
        }
-       memset(qdev->tx_ring_shadow_reg_area, 0, PAGE_SIZE);
        return 0;
 
 err_wqp_sh_area:
index 485006604bbc0da7716e754485c945f064c5188e..58ef59469dd06eaed770fbd7c6b3f1cf25761053 100644 (file)
@@ -485,13 +485,13 @@ static int vlsi_create_hwif(vlsi_irda_dev_t *idev)
        idev->virtaddr = NULL;
        idev->busaddr = 0;
 
-       ringarea = pci_alloc_consistent(idev->pdev, HW_RING_AREA_SIZE, &idev->busaddr);
+       ringarea = pci_zalloc_consistent(idev->pdev, HW_RING_AREA_SIZE,
+                                        &idev->busaddr);
        if (!ringarea) {
                IRDA_ERROR("%s: insufficient memory for descriptor rings\n",
                           __func__);
                goto out;
        }
-       memset(ringarea, 0, HW_RING_AREA_SIZE);
 
        hwmap = (struct ring_descr_hw *)ringarea;
        idev->rx_ring = vlsi_alloc_ring(idev->pdev, hwmap, ringsize[1],
index dfc6dfc56d52fa8029beb5e25826c61908b02286..1ab8e500fb77d8adfb697976abf68cda28efe915 100644 (file)
@@ -3449,8 +3449,9 @@ static int ipw2100_msg_allocate(struct ipw2100_priv *priv)
                return -ENOMEM;
 
        for (i = 0; i < IPW_COMMAND_POOL_SIZE; i++) {
-               v = pci_alloc_consistent(priv->pci_dev,
-                                        sizeof(struct ipw2100_cmd_header), &p);
+               v = pci_zalloc_consistent(priv->pci_dev,
+                                         sizeof(struct ipw2100_cmd_header),
+                                         &p);
                if (!v) {
                        printk(KERN_ERR DRV_NAME ": "
                               "%s: PCI alloc failed for msg "
@@ -3459,8 +3460,6 @@ static int ipw2100_msg_allocate(struct ipw2100_priv *priv)
                        break;
                }
 
-               memset(v, 0, sizeof(struct ipw2100_cmd_header));
-
                priv->msg_buffers[i].type = COMMAND;
                priv->msg_buffers[i].info.c_struct.cmd =
                    (struct ipw2100_cmd_header *)v;
@@ -4336,16 +4335,12 @@ static int status_queue_allocate(struct ipw2100_priv *priv, int entries)
        IPW_DEBUG_INFO("enter\n");
 
        q->size = entries * sizeof(struct ipw2100_status);
-       q->drv =
-           (struct ipw2100_status *)pci_alloc_consistent(priv->pci_dev,
-                                                         q->size, &q->nic);
+       q->drv = pci_zalloc_consistent(priv->pci_dev, q->size, &q->nic);
        if (!q->drv) {
                IPW_DEBUG_WARNING("Can not allocate status queue.\n");
                return -ENOMEM;
        }
 
-       memset(q->drv, 0, q->size);
-
        IPW_DEBUG_INFO("exit\n");
 
        return 0;
@@ -4374,13 +4369,12 @@ static int bd_queue_allocate(struct ipw2100_priv *priv,
 
        q->entries = entries;
        q->size = entries * sizeof(struct ipw2100_bd);
-       q->drv = pci_alloc_consistent(priv->pci_dev, q->size, &q->nic);
+       q->drv = pci_zalloc_consistent(priv->pci_dev, q->size, &q->nic);
        if (!q->drv) {
                IPW_DEBUG_INFO
                    ("can't allocate shared memory for buffer descriptors\n");
                return -ENOMEM;
        }
-       memset(q->drv, 0, q->size);
 
        IPW_DEBUG_INFO("exit\n");
 
index 9a3d4d6724f70324328f8d00f4bc52159fc76622..fc6cb215e761c18ce5ec1d53dc218494017647e6 100644 (file)
@@ -1159,12 +1159,11 @@ static int mwl8k_rxq_init(struct ieee80211_hw *hw, int index)
 
        size = MWL8K_RX_DESCS * priv->rxd_ops->rxd_size;
 
-       rxq->rxd = pci_alloc_consistent(priv->pdev, size, &rxq->rxd_dma);
+       rxq->rxd = pci_zalloc_consistent(priv->pdev, size, &rxq->rxd_dma);
        if (rxq->rxd == NULL) {
                wiphy_err(hw->wiphy, "failed to alloc RX descriptors\n");
                return -ENOMEM;
        }
-       memset(rxq->rxd, 0, size);
 
        rxq->buf = kcalloc(MWL8K_RX_DESCS, sizeof(*rxq->buf), GFP_KERNEL);
        if (rxq->buf == NULL) {
@@ -1451,12 +1450,11 @@ static int mwl8k_txq_init(struct ieee80211_hw *hw, int index)
 
        size = MWL8K_TX_DESCS * sizeof(struct mwl8k_tx_desc);
 
-       txq->txd = pci_alloc_consistent(priv->pdev, size, &txq->txd_dma);
+       txq->txd = pci_zalloc_consistent(priv->pdev, size, &txq->txd_dma);
        if (txq->txd == NULL) {
                wiphy_err(hw->wiphy, "failed to alloc TX descriptors\n");
                return -ENOMEM;
        }
-       memset(txq->txd, 0, size);
 
        txq->skb = kcalloc(MWL8K_TX_DESCS, sizeof(*txq->skb), GFP_KERNEL);
        if (txq->skb == NULL) {
index 4b904f70818487fde779ffd0c524284f23972e84..fcc45e5bf50a88663d2946195d7b20b3ccfcb90d 100644 (file)
@@ -972,16 +972,13 @@ static int rtl8180_init_rx_ring(struct ieee80211_hw *dev)
        else
                priv->rx_ring_sz = sizeof(struct rtl8180_rx_desc);
 
-       priv->rx_ring = pci_alloc_consistent(priv->pdev,
-                                            priv->rx_ring_sz * 32,
-                                            &priv->rx_ring_dma);
-
+       priv->rx_ring = pci_zalloc_consistent(priv->pdev, priv->rx_ring_sz * 32,
+                                             &priv->rx_ring_dma);
        if (!priv->rx_ring || (unsigned long)priv->rx_ring & 0xFF) {
                wiphy_err(dev->wiphy, "Cannot allocate RX ring\n");
                return -ENOMEM;
        }
 
-       memset(priv->rx_ring, 0, priv->rx_ring_sz * 32);
        priv->rx_idx = 0;
 
        for (i = 0; i < 32; i++) {
@@ -1040,14 +1037,14 @@ static int rtl8180_init_tx_ring(struct ieee80211_hw *dev,
        dma_addr_t dma;
        int i;
 
-       ring = pci_alloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma);
+       ring = pci_zalloc_consistent(priv->pdev, sizeof(*ring) * entries,
+                                    &dma);
        if (!ring || (unsigned long)ring & 0xFF) {
                wiphy_err(dev->wiphy, "Cannot allocate TX ring (prio = %d)\n",
                          prio);
                return -ENOMEM;
        }
 
-       memset(ring, 0, sizeof(*ring)*entries);
        priv->tx_ring[prio].desc = ring;
        priv->tx_ring[prio].dma = dma;
        priv->tx_ring[prio].idx = 0;
index dae55257f0e8bde44fb772173f7d573fceab6d65..67d1ee6edcad6e2748cade61a4127db661cfcadf 100644 (file)
@@ -1092,16 +1092,14 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
        u32 nextdescaddress;
        int i;
 
-       ring = pci_alloc_consistent(rtlpci->pdev,
-                                   sizeof(*ring) * entries, &dma);
-
+       ring = pci_zalloc_consistent(rtlpci->pdev, sizeof(*ring) * entries,
+                                    &dma);
        if (!ring || (unsigned long)ring & 0xFF) {
                RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
                         "Cannot allocate TX ring (prio = %d)\n", prio);
                return -ENOMEM;
        }
 
-       memset(ring, 0, sizeof(*ring) * entries);
        rtlpci->tx_ring[prio].desc = ring;
        rtlpci->tx_ring[prio].dma = dma;
        rtlpci->tx_ring[prio].idx = 0;
@@ -1139,10 +1137,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw)
        for (rx_queue_idx = 0; rx_queue_idx < RTL_PCI_MAX_RX_QUEUE;
             rx_queue_idx++) {
                rtlpci->rx_ring[rx_queue_idx].desc =
-                   pci_alloc_consistent(rtlpci->pdev,
-                                        sizeof(*rtlpci->rx_ring[rx_queue_idx].
-                                               desc) * rtlpci->rxringcount,
-                                        &rtlpci->rx_ring[rx_queue_idx].dma);
+                       pci_zalloc_consistent(rtlpci->pdev,
+                                             sizeof(*rtlpci->rx_ring[rx_queue_idx].desc) * rtlpci->rxringcount,
+                                             &rtlpci->rx_ring[rx_queue_idx].dma);
 
                if (!rtlpci->rx_ring[rx_queue_idx].desc ||
                    (unsigned long)rtlpci->rx_ring[rx_queue_idx].desc & 0xFF) {
@@ -1151,10 +1148,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw)
                        return -ENOMEM;
                }
 
-               memset(rtlpci->rx_ring[rx_queue_idx].desc, 0,
-                      sizeof(*rtlpci->rx_ring[rx_queue_idx].desc) *
-                      rtlpci->rxringcount);
-
                rtlpci->rx_ring[rx_queue_idx].idx = 0;
 
                /* If amsdu_8k is disabled, set buffersize to 4096. This
index c864f82bd37de9a369ed82b790676de29a4d59fb..30e981be14c237f9fefb94a0adc9cb04c5b71e23 100644 (file)
@@ -2204,7 +2204,7 @@ static int __init parport_ip32_init(void)
 {
        pr_info(PPIP32 "SGI IP32 built-in parallel port driver v0.6\n");
        this_port = parport_ip32_probe_port();
-       return IS_ERR(this_port) ? PTR_ERR(this_port) : 0;
+       return PTR_ERR_OR_ZERO(this_port);
 }
 
 /**
index 0305675270ee53ef168c15f5c93db0d06a1df87c..a7b42680a06a2f2ebfd539e5832a2d22552b9ac2 100644 (file)
@@ -644,27 +644,26 @@ enum tsi721_smsg_int_flag {
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 
-#define TSI721_BDMA_BD_RING_SZ 128
 #define TSI721_BDMA_MAX_BCOUNT (TSI721_DMAD_BCOUNT1 + 1)
 
 struct tsi721_tx_desc {
        struct dma_async_tx_descriptor  txd;
-       struct tsi721_dma_desc          *hw_desc;
        u16                             destid;
        /* low 64-bits of 66-bit RIO address */
        u64                             rio_addr;
        /* upper 2-bits of 66-bit RIO address */
        u8                              rio_addr_u;
-       u32                             bcount;
-       bool                            interrupt;
+       enum dma_rtype                  rtype;
        struct list_head                desc_node;
-       struct list_head                tx_list;
+       struct scatterlist              *sg;
+       unsigned int                    sg_len;
+       enum dma_status                 status;
 };
 
 struct tsi721_bdma_chan {
        int             id;
        void __iomem    *regs;
-       int             bd_num;         /* number of buffer descriptors */
+       int             bd_num;         /* number of HW buffer descriptors */
        void            *bd_base;       /* start of DMA descriptors */
        dma_addr_t      bd_phys;
        void            *sts_base;      /* start of DMA BD status FIFO */
@@ -680,7 +679,6 @@ struct tsi721_bdma_chan {
        struct list_head        active_list;
        struct list_head        queue;
        struct list_head        free_list;
-       dma_cookie_t            completed_cookie;
        struct tasklet_struct   tasklet;
        bool                    active;
 };
index 44341dc5b148301b6e185d2bbc4e5d0248b0a3c5..f64c5decb747a8be4425cca3e81da96200cf6812 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * DMA Engine support for Tsi721 PCIExpress-to-SRIO bridge
  *
- * Copyright 2011 Integrated Device Technology, Inc.
+ * Copyright (c) 2011-2014 Integrated Device Technology, Inc.
  * Alexandre Bounine <alexandre.bounine@idt.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -14,9 +14,8 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
  */
 
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/kfifo.h>
 #include <linux/delay.h>
+#include "../../dma/dmaengine.h"
 
 #include "tsi721.h"
 
+#define TSI721_DMA_TX_QUEUE_SZ 16      /* number of transaction descriptors */
+
+#ifdef CONFIG_PCI_MSI
+static irqreturn_t tsi721_bdma_msix(int irq, void *ptr);
+#endif
+static int tsi721_submit_sg(struct tsi721_tx_desc *desc);
+
+static unsigned int dma_desc_per_channel = 128;
+module_param(dma_desc_per_channel, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(dma_desc_per_channel,
+                "Number of DMA descriptors per channel (default: 128)");
+
 static inline struct tsi721_bdma_chan *to_tsi721_chan(struct dma_chan *chan)
 {
        return container_of(chan, struct tsi721_bdma_chan, dchan);
@@ -59,7 +71,7 @@ struct tsi721_tx_desc *tsi721_dma_first_active(
                                struct tsi721_tx_desc, desc_node);
 }
 
-static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
+static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 {
        struct tsi721_dma_desc *bd_ptr;
        struct device *dev = bdma_chan->dchan.device->dev;
@@ -67,17 +79,23 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
        dma_addr_t      bd_phys;
        dma_addr_t      sts_phys;
        int             sts_size;
-       int             bd_num = bdma_chan->bd_num;
+#ifdef CONFIG_PCI_MSI
+       struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
+#endif
 
        dev_dbg(dev, "Init Block DMA Engine, CH%d\n", bdma_chan->id);
 
-       /* Allocate space for DMA descriptors */
+       /*
+        * Allocate space for DMA descriptors
+        * (add an extra element for link descriptor)
+        */
        bd_ptr = dma_zalloc_coherent(dev,
-                               bd_num * sizeof(struct tsi721_dma_desc),
+                               (bd_num + 1) * sizeof(struct tsi721_dma_desc),
                                &bd_phys, GFP_KERNEL);
        if (!bd_ptr)
                return -ENOMEM;
 
+       bdma_chan->bd_num = bd_num;
        bdma_chan->bd_phys = bd_phys;
        bdma_chan->bd_base = bd_ptr;
 
@@ -85,8 +103,8 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
                bd_ptr, (unsigned long long)bd_phys);
 
        /* Allocate space for descriptor status FIFO */
-       sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
-                                       bd_num : TSI721_DMA_MINSTSSZ;
+       sts_size = ((bd_num + 1) >= TSI721_DMA_MINSTSSZ) ?
+                                       (bd_num + 1) : TSI721_DMA_MINSTSSZ;
        sts_size = roundup_pow_of_two(sts_size);
        sts_ptr = dma_zalloc_coherent(dev,
                                     sts_size * sizeof(struct tsi721_dma_sts),
@@ -94,7 +112,7 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
        if (!sts_ptr) {
                /* Free space allocated for DMA descriptors */
                dma_free_coherent(dev,
-                                 bd_num * sizeof(struct tsi721_dma_desc),
+                                 (bd_num + 1) * sizeof(struct tsi721_dma_desc),
                                  bd_ptr, bd_phys);
                bdma_chan->bd_base = NULL;
                return -ENOMEM;
@@ -108,11 +126,11 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
                "desc status FIFO @ %p (phys = %llx) size=0x%x\n",
                sts_ptr, (unsigned long long)sts_phys, sts_size);
 
-       /* Initialize DMA descriptors ring */
-       bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
-       bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
+       /* Initialize DMA descriptors ring using added link descriptor */
+       bd_ptr[bd_num].type_id = cpu_to_le32(DTYPE3 << 29);
+       bd_ptr[bd_num].next_lo = cpu_to_le32((u64)bd_phys &
                                                 TSI721_DMAC_DPTRL_MASK);
-       bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
+       bd_ptr[bd_num].next_hi = cpu_to_le32((u64)bd_phys >> 32);
 
        /* Setup DMA descriptor pointers */
        iowrite32(((u64)bd_phys >> 32),
@@ -134,6 +152,55 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
 
        ioread32(bdma_chan->regs + TSI721_DMAC_INT);
 
+#ifdef CONFIG_PCI_MSI
+       /* Request interrupt service if we are in MSI-X mode */
+       if (priv->flags & TSI721_USING_MSIX) {
+               int rc, idx;
+
+               idx = TSI721_VECT_DMA0_DONE + bdma_chan->id;
+
+               rc = request_irq(priv->msix[idx].vector, tsi721_bdma_msix, 0,
+                                priv->msix[idx].irq_name, (void *)bdma_chan);
+
+               if (rc) {
+                       dev_dbg(dev, "Unable to get MSI-X for BDMA%d-DONE\n",
+                               bdma_chan->id);
+                       goto err_out;
+               }
+
+               idx = TSI721_VECT_DMA0_INT + bdma_chan->id;
+
+               rc = request_irq(priv->msix[idx].vector, tsi721_bdma_msix, 0,
+                               priv->msix[idx].irq_name, (void *)bdma_chan);
+
+               if (rc) {
+                       dev_dbg(dev, "Unable to get MSI-X for BDMA%d-INT\n",
+                               bdma_chan->id);
+                       free_irq(
+                               priv->msix[TSI721_VECT_DMA0_DONE +
+                                           bdma_chan->id].vector,
+                               (void *)bdma_chan);
+               }
+
+err_out:
+               if (rc) {
+                       /* Free space allocated for DMA descriptors */
+                       dma_free_coherent(dev,
+                               (bd_num + 1) * sizeof(struct tsi721_dma_desc),
+                               bd_ptr, bd_phys);
+                       bdma_chan->bd_base = NULL;
+
+                       /* Free space allocated for status descriptors */
+                       dma_free_coherent(dev,
+                               sts_size * sizeof(struct tsi721_dma_sts),
+                               sts_ptr, sts_phys);
+                       bdma_chan->sts_base = NULL;
+
+                       return -EIO;
+               }
+       }
+#endif /* CONFIG_PCI_MSI */
+
        /* Toggle DMA channel initialization */
        iowrite32(TSI721_DMAC_CTL_INIT, bdma_chan->regs + TSI721_DMAC_CTL);
        ioread32(bdma_chan->regs + TSI721_DMAC_CTL);
@@ -147,6 +214,9 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
 static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *bdma_chan)
 {
        u32 ch_stat;
+#ifdef CONFIG_PCI_MSI
+       struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
+#endif
 
        if (bdma_chan->bd_base == NULL)
                return 0;
@@ -159,9 +229,18 @@ static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *bdma_chan)
        /* Put DMA channel into init state */
        iowrite32(TSI721_DMAC_CTL_INIT, bdma_chan->regs + TSI721_DMAC_CTL);
 
+#ifdef CONFIG_PCI_MSI
+       if (priv->flags & TSI721_USING_MSIX) {
+               free_irq(priv->msix[TSI721_VECT_DMA0_DONE +
+                                   bdma_chan->id].vector, (void *)bdma_chan);
+               free_irq(priv->msix[TSI721_VECT_DMA0_INT +
+                                   bdma_chan->id].vector, (void *)bdma_chan);
+       }
+#endif /* CONFIG_PCI_MSI */
+
        /* Free space allocated for DMA descriptors */
        dma_free_coherent(bdma_chan->dchan.device->dev,
-               bdma_chan->bd_num * sizeof(struct tsi721_dma_desc),
+               (bdma_chan->bd_num + 1) * sizeof(struct tsi721_dma_desc),
                bdma_chan->bd_base, bdma_chan->bd_phys);
        bdma_chan->bd_base = NULL;
 
@@ -243,8 +322,8 @@ static void tsi721_start_dma(struct tsi721_bdma_chan *bdma_chan)
        }
 
        dev_dbg(bdma_chan->dchan.device->dev,
-               "tx_chan: %p, chan: %d, regs: %p\n",
-               bdma_chan, bdma_chan->dchan.chan_id, bdma_chan->regs);
+               "%s: chan_%d (wrc=%d)\n", __func__, bdma_chan->id,
+               bdma_chan->wr_count_next);
 
        iowrite32(bdma_chan->wr_count_next,
                bdma_chan->regs + TSI721_DMAC_DWRCNT);
@@ -253,72 +332,19 @@ static void tsi721_start_dma(struct tsi721_bdma_chan *bdma_chan)
        bdma_chan->wr_count = bdma_chan->wr_count_next;
 }
 
-static void tsi721_desc_put(struct tsi721_bdma_chan *bdma_chan,
-                           struct tsi721_tx_desc *desc)
-{
-       dev_dbg(bdma_chan->dchan.device->dev,
-               "Put desc: %p into free list\n", desc);
-
-       if (desc) {
-               spin_lock_bh(&bdma_chan->lock);
-               list_splice_init(&desc->tx_list, &bdma_chan->free_list);
-               list_add(&desc->desc_node, &bdma_chan->free_list);
-               bdma_chan->wr_count_next = bdma_chan->wr_count;
-               spin_unlock_bh(&bdma_chan->lock);
-       }
-}
-
-static
-struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *bdma_chan)
-{
-       struct tsi721_tx_desc *tx_desc, *_tx_desc;
-       struct tsi721_tx_desc *ret = NULL;
-       int i;
-
-       spin_lock_bh(&bdma_chan->lock);
-       list_for_each_entry_safe(tx_desc, _tx_desc,
-                                &bdma_chan->free_list, desc_node) {
-               if (async_tx_test_ack(&tx_desc->txd)) {
-                       list_del(&tx_desc->desc_node);
-                       ret = tx_desc;
-                       break;
-               }
-               dev_dbg(bdma_chan->dchan.device->dev,
-                       "desc %p not ACKed\n", tx_desc);
-       }
-
-       if (ret == NULL) {
-               dev_dbg(bdma_chan->dchan.device->dev,
-                       "%s: unable to obtain tx descriptor\n", __func__);
-               goto err_out;
-       }
-
-       i = bdma_chan->wr_count_next % bdma_chan->bd_num;
-       if (i == bdma_chan->bd_num - 1) {
-               i = 0;
-               bdma_chan->wr_count_next++; /* skip link descriptor */
-       }
-
-       bdma_chan->wr_count_next++;
-       tx_desc->txd.phys = bdma_chan->bd_phys +
-                               i * sizeof(struct tsi721_dma_desc);
-       tx_desc->hw_desc = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[i];
-err_out:
-       spin_unlock_bh(&bdma_chan->lock);
-
-       return ret;
-}
-
 static int
-tsi721_desc_fill_init(struct tsi721_tx_desc *desc, struct scatterlist *sg,
-       enum dma_rtype rtype, u32 sys_size)
+tsi721_desc_fill_init(struct tsi721_tx_desc *desc,
+                     struct tsi721_dma_desc *bd_ptr,
+                     struct scatterlist *sg, u32 sys_size)
 {
-       struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
        u64 rio_addr;
 
+       if (bd_ptr == NULL)
+               return -EINVAL;
+
        /* Initialize DMA descriptor */
        bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
-                                       (rtype << 19) | desc->destid);
+                                     (desc->rtype << 19) | desc->destid);
        bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
                                     (sys_size << 26));
        rio_addr = (desc->rio_addr >> 2) |
@@ -335,51 +361,32 @@ tsi721_desc_fill_init(struct tsi721_tx_desc *desc, struct scatterlist *sg,
 }
 
 static int
-tsi721_desc_fill_end(struct tsi721_tx_desc *desc)
+tsi721_desc_fill_end(struct tsi721_dma_desc *bd_ptr, u32 bcount, bool interrupt)
 {
-       struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
+       if (bd_ptr == NULL)
+               return -EINVAL;
 
        /* Update DMA descriptor */
-       if (desc->interrupt)
+       if (interrupt)
                bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
-       bd_ptr->bcount |= cpu_to_le32(desc->bcount & TSI721_DMAD_BCOUNT1);
+       bd_ptr->bcount |= cpu_to_le32(bcount & TSI721_DMAD_BCOUNT1);
 
        return 0;
 }
 
-
-static void tsi721_dma_chain_complete(struct tsi721_bdma_chan *bdma_chan,
-                                     struct tsi721_tx_desc *desc)
+static void tsi721_dma_tx_err(struct tsi721_bdma_chan *bdma_chan,
+                             struct tsi721_tx_desc *desc)
 {
        struct dma_async_tx_descriptor *txd = &desc->txd;
        dma_async_tx_callback callback = txd->callback;
        void *param = txd->callback_param;
 
-       list_splice_init(&desc->tx_list, &bdma_chan->free_list);
        list_move(&desc->desc_node, &bdma_chan->free_list);
-       bdma_chan->completed_cookie = txd->cookie;
 
        if (callback)
                callback(param);
 }
 
-static void tsi721_dma_complete_all(struct tsi721_bdma_chan *bdma_chan)
-{
-       struct tsi721_tx_desc *desc, *_d;
-       LIST_HEAD(list);
-
-       BUG_ON(!tsi721_dma_is_idle(bdma_chan));
-
-       if (!list_empty(&bdma_chan->queue))
-               tsi721_start_dma(bdma_chan);
-
-       list_splice_init(&bdma_chan->active_list, &list);
-       list_splice_init(&bdma_chan->queue, &bdma_chan->active_list);
-
-       list_for_each_entry_safe(desc, _d, &list, desc_node)
-               tsi721_dma_chain_complete(bdma_chan, desc);
-}
-
 static void tsi721_clr_stat(struct tsi721_bdma_chan *bdma_chan)
 {
        u32 srd_ptr;
@@ -403,20 +410,159 @@ static void tsi721_clr_stat(struct tsi721_bdma_chan *bdma_chan)
        bdma_chan->sts_rdptr = srd_ptr;
 }
 
+/* Must be called with the channel spinlock held */
+static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
+{
+       struct dma_chan *dchan = desc->txd.chan;
+       struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
+       u32 sys_size;
+       u64 rio_addr;
+       dma_addr_t next_addr;
+       u32 bcount;
+       struct scatterlist *sg;
+       unsigned int i;
+       int err = 0;
+       struct tsi721_dma_desc *bd_ptr = NULL;
+       u32 idx, rd_idx;
+       u32 add_count = 0;
+
+       if (!tsi721_dma_is_idle(bdma_chan)) {
+               dev_err(bdma_chan->dchan.device->dev,
+                       "BUG: Attempt to use non-idle channel\n");
+               return -EIO;
+       }
+
+       /*
+        * Fill DMA channel's hardware buffer descriptors.
+        * (NOTE: RapidIO destination address is limited to 64 bits for now)
+        */
+       rio_addr = desc->rio_addr;
+       next_addr = -1;
+       bcount = 0;
+       sys_size = dma_to_mport(bdma_chan->dchan.device)->sys_size;
+
+       rd_idx = ioread32(bdma_chan->regs + TSI721_DMAC_DRDCNT);
+       rd_idx %= (bdma_chan->bd_num + 1);
+
+       idx = bdma_chan->wr_count_next % (bdma_chan->bd_num + 1);
+       if (idx == bdma_chan->bd_num) {
+               /* wrap around link descriptor */
+               idx = 0;
+               add_count++;
+       }
+
+       dev_dbg(dchan->device->dev, "%s: BD ring status: rdi=%d wri=%d\n",
+               __func__, rd_idx, idx);
+
+       for_each_sg(desc->sg, sg, desc->sg_len, i) {
+
+               dev_dbg(dchan->device->dev, "sg%d/%d addr: 0x%llx len: %d\n",
+                       i, desc->sg_len,
+                       (unsigned long long)sg_dma_address(sg), sg_dma_len(sg));
+
+               if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
+                       dev_err(dchan->device->dev,
+                               "%s: SG entry %d is too large\n", __func__, i);
+                       err = -EINVAL;
+                       break;
+               }
+
+               /*
+                * If this sg entry forms contiguous block with previous one,
+                * try to merge it into existing DMA descriptor
+                */
+               if (next_addr == sg_dma_address(sg) &&
+                   bcount + sg_dma_len(sg) <= TSI721_BDMA_MAX_BCOUNT) {
+                       /* Adjust byte count of the descriptor */
+                       bcount += sg_dma_len(sg);
+                       goto entry_done;
+               } else if (next_addr != -1) {
+                       /* Finalize descriptor using total byte count value */
+                       tsi721_desc_fill_end(bd_ptr, bcount, 0);
+                       dev_dbg(dchan->device->dev,
+                               "%s: prev desc final len: %d\n",
+                               __func__, bcount);
+               }
+
+               desc->rio_addr = rio_addr;
+
+               if (i && idx == rd_idx) {
+                       dev_dbg(dchan->device->dev,
+                               "%s: HW descriptor ring is full @ %d\n",
+                               __func__, i);
+                       desc->sg = sg;
+                       desc->sg_len -= i;
+                       break;
+               }
+
+               bd_ptr = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[idx];
+               err = tsi721_desc_fill_init(desc, bd_ptr, sg, sys_size);
+               if (err) {
+                       dev_err(dchan->device->dev,
+                               "Failed to build desc: err=%d\n", err);
+                       break;
+               }
+
+               dev_dbg(dchan->device->dev, "bd_ptr = %p did=%d raddr=0x%llx\n",
+                       bd_ptr, desc->destid, desc->rio_addr);
+
+               next_addr = sg_dma_address(sg);
+               bcount = sg_dma_len(sg);
+
+               add_count++;
+               if (++idx == bdma_chan->bd_num) {
+                       /* wrap around link descriptor */
+                       idx = 0;
+                       add_count++;
+               }
+
+entry_done:
+               if (sg_is_last(sg)) {
+                       tsi721_desc_fill_end(bd_ptr, bcount, 0);
+                       dev_dbg(dchan->device->dev, "%s: last desc final len: %d\n",
+                               __func__, bcount);
+                       desc->sg_len = 0;
+               } else {
+                       rio_addr += sg_dma_len(sg);
+                       next_addr += sg_dma_len(sg);
+               }
+       }
+
+       if (!err)
+               bdma_chan->wr_count_next += add_count;
+
+       return err;
+}
+
 static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan)
 {
-       if (list_empty(&bdma_chan->active_list) ||
-               list_is_singular(&bdma_chan->active_list)) {
-               dev_dbg(bdma_chan->dchan.device->dev,
-                       "%s: Active_list empty\n", __func__);
-               tsi721_dma_complete_all(bdma_chan);
-       } else {
-               dev_dbg(bdma_chan->dchan.device->dev,
-                       "%s: Active_list NOT empty\n", __func__);
-               tsi721_dma_chain_complete(bdma_chan,
-                                       tsi721_dma_first_active(bdma_chan));
-               tsi721_start_dma(bdma_chan);
+       struct tsi721_tx_desc *desc;
+       int err;
+
+       dev_dbg(bdma_chan->dchan.device->dev, "%s: Enter\n", __func__);
+
+       /*
+        * If there are any new transactions in the queue add them
+        * into the processing list
+        */
+       if (!list_empty(&bdma_chan->queue))
+               list_splice_init(&bdma_chan->queue, &bdma_chan->active_list);
+
+       /* Start new transaction (if available) */
+       if (!list_empty(&bdma_chan->active_list)) {
+               desc = tsi721_dma_first_active(bdma_chan);
+               err = tsi721_submit_sg(desc);
+               if (!err)
+                       tsi721_start_dma(bdma_chan);
+               else {
+                       tsi721_dma_tx_err(bdma_chan, desc);
+                       dev_dbg(bdma_chan->dchan.device->dev,
+                               "ERR: tsi721_submit_sg failed with err=%d\n",
+                               err);
+               }
        }
+
+       dev_dbg(bdma_chan->dchan.device->dev, "%s: Exit\n", __func__);
 }
 
 static void tsi721_dma_tasklet(unsigned long data)
@@ -444,8 +590,29 @@ static void tsi721_dma_tasklet(unsigned long data)
        }
 
        if (dmac_int & (TSI721_DMAC_INT_DONE | TSI721_DMAC_INT_IOFDONE)) {
+               struct tsi721_tx_desc *desc;
+
                tsi721_clr_stat(bdma_chan);
                spin_lock(&bdma_chan->lock);
+               desc = tsi721_dma_first_active(bdma_chan);
+
+               if (desc->sg_len == 0) {
+                       dma_async_tx_callback callback = NULL;
+                       void *param = NULL;
+
+                       desc->status = DMA_COMPLETE;
+                       dma_cookie_complete(&desc->txd);
+                       if (desc->txd.flags & DMA_PREP_INTERRUPT) {
+                               callback = desc->txd.callback;
+                               param = desc->txd.callback_param;
+                       }
+                       list_move(&desc->desc_node, &bdma_chan->free_list);
+                       spin_unlock(&bdma_chan->lock);
+                       if (callback)
+                               callback(param);
+                       spin_lock(&bdma_chan->lock);
+               }
+
                tsi721_advance_work(bdma_chan);
                spin_unlock(&bdma_chan->lock);
        }
@@ -460,21 +627,24 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
        struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(txd->chan);
        dma_cookie_t cookie;
 
-       spin_lock_bh(&bdma_chan->lock);
+       /* Check if the descriptor is detached from any lists */
+       if (!list_empty(&desc->desc_node)) {
+               dev_err(bdma_chan->dchan.device->dev,
+                       "%s: wrong state of descriptor %p\n", __func__, txd);
+               return -EIO;
+       }
 
-       cookie = txd->chan->cookie;
-       if (++cookie < 0)
-               cookie = 1;
-       txd->chan->cookie = cookie;
-       txd->cookie = cookie;
+       spin_lock_bh(&bdma_chan->lock);
 
-       if (list_empty(&bdma_chan->active_list)) {
-               list_add_tail(&desc->desc_node, &bdma_chan->active_list);
-               tsi721_start_dma(bdma_chan);
-       } else {
-               list_add_tail(&desc->desc_node, &bdma_chan->queue);
+       if (!bdma_chan->active) {
+               spin_unlock_bh(&bdma_chan->lock);
+               return -ENODEV;
        }
 
+       cookie = dma_cookie_assign(txd);
+       desc->status = DMA_IN_PROGRESS;
+       list_add_tail(&desc->desc_node, &bdma_chan->queue);
+
        spin_unlock_bh(&bdma_chan->lock);
        return cookie;
 }
@@ -482,115 +652,52 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
 static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
 {
        struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
-#ifdef CONFIG_PCI_MSI
-       struct tsi721_device *priv = to_tsi721(dchan->device);
-#endif
        struct tsi721_tx_desc *desc = NULL;
-       LIST_HEAD(tmp_list);
        int i;
-       int rc;
+
+       dev_dbg(dchan->device->dev, "%s: for channel %d\n",
+               __func__, bdma_chan->id);
 
        if (bdma_chan->bd_base)
-               return bdma_chan->bd_num - 1;
+               return TSI721_DMA_TX_QUEUE_SZ;
 
        /* Initialize BDMA channel */
-       if (tsi721_bdma_ch_init(bdma_chan)) {
+       if (tsi721_bdma_ch_init(bdma_chan, dma_desc_per_channel)) {
                dev_err(dchan->device->dev, "Unable to initialize data DMA"
                        " channel %d, aborting\n", bdma_chan->id);
-               return -ENOMEM;
+               return -ENODEV;
        }
 
-       /* Alocate matching number of logical descriptors */
-       desc = kcalloc((bdma_chan->bd_num - 1), sizeof(struct tsi721_tx_desc),
+       /* Allocate queue of transaction descriptors */
+       desc = kcalloc(TSI721_DMA_TX_QUEUE_SZ, sizeof(struct tsi721_tx_desc),
                        GFP_KERNEL);
        if (!desc) {
                dev_err(dchan->device->dev,
                        "Failed to allocate logical descriptors\n");
-               rc = -ENOMEM;
-               goto err_out;
+               tsi721_bdma_ch_free(bdma_chan);
+               return -ENOMEM;
        }
 
        bdma_chan->tx_desc = desc;
 
-       for (i = 0; i < bdma_chan->bd_num - 1; i++) {
+       for (i = 0; i < TSI721_DMA_TX_QUEUE_SZ; i++) {
                dma_async_tx_descriptor_init(&desc[i].txd, dchan);
                desc[i].txd.tx_submit = tsi721_tx_submit;
                desc[i].txd.flags = DMA_CTRL_ACK;
-               INIT_LIST_HEAD(&desc[i].tx_list);
-               list_add_tail(&desc[i].desc_node, &tmp_list);
+               list_add(&desc[i].desc_node, &bdma_chan->free_list);
        }
 
-       spin_lock_bh(&bdma_chan->lock);
-       list_splice(&tmp_list, &bdma_chan->free_list);
-       bdma_chan->completed_cookie = dchan->cookie = 1;
-       spin_unlock_bh(&bdma_chan->lock);
-
-#ifdef CONFIG_PCI_MSI
-       if (priv->flags & TSI721_USING_MSIX) {
-               /* Request interrupt service if we are in MSI-X mode */
-               rc = request_irq(
-                       priv->msix[TSI721_VECT_DMA0_DONE +
-                                  bdma_chan->id].vector,
-                       tsi721_bdma_msix, 0,
-                       priv->msix[TSI721_VECT_DMA0_DONE +
-                                  bdma_chan->id].irq_name,
-                       (void *)bdma_chan);
-
-               if (rc) {
-                       dev_dbg(dchan->device->dev,
-                               "Unable to allocate MSI-X interrupt for "
-                               "BDMA%d-DONE\n", bdma_chan->id);
-                       goto err_out;
-               }
-
-               rc = request_irq(priv->msix[TSI721_VECT_DMA0_INT +
-                                           bdma_chan->id].vector,
-                               tsi721_bdma_msix, 0,
-                               priv->msix[TSI721_VECT_DMA0_INT +
-                                          bdma_chan->id].irq_name,
-                               (void *)bdma_chan);
-
-               if (rc) {
-                       dev_dbg(dchan->device->dev,
-                               "Unable to allocate MSI-X interrupt for "
-                               "BDMA%d-INT\n", bdma_chan->id);
-                       free_irq(
-                               priv->msix[TSI721_VECT_DMA0_DONE +
-                                          bdma_chan->id].vector,
-                               (void *)bdma_chan);
-                       rc = -EIO;
-                       goto err_out;
-               }
-       }
-#endif /* CONFIG_PCI_MSI */
+       dma_cookie_init(dchan);
 
        bdma_chan->active = true;
        tsi721_bdma_interrupt_enable(bdma_chan, 1);
 
-       return bdma_chan->bd_num - 1;
-
-err_out:
-       kfree(desc);
-       tsi721_bdma_ch_free(bdma_chan);
-       return rc;
+       return TSI721_DMA_TX_QUEUE_SZ;
 }
 
-static void tsi721_free_chan_resources(struct dma_chan *dchan)
+static void tsi721_sync_dma_irq(struct tsi721_bdma_chan *bdma_chan)
 {
-       struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
-       struct tsi721_device *priv = to_tsi721(dchan->device);
-       LIST_HEAD(list);
-
-       dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
-
-       if (bdma_chan->bd_base == NULL)
-               return;
-
-       BUG_ON(!list_empty(&bdma_chan->active_list));
-       BUG_ON(!list_empty(&bdma_chan->queue));
-
-       tsi721_bdma_interrupt_enable(bdma_chan, 0);
-       bdma_chan->active = false;
+       struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
 
 #ifdef CONFIG_PCI_MSI
        if (priv->flags & TSI721_USING_MSIX) {
@@ -601,64 +708,48 @@ static void tsi721_free_chan_resources(struct dma_chan *dchan)
        } else
 #endif
        synchronize_irq(priv->pdev->irq);
+}
 
-       tasklet_kill(&bdma_chan->tasklet);
+static void tsi721_free_chan_resources(struct dma_chan *dchan)
+{
+       struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
 
-       spin_lock_bh(&bdma_chan->lock);
-       list_splice_init(&bdma_chan->free_list, &list);
-       spin_unlock_bh(&bdma_chan->lock);
+       dev_dbg(dchan->device->dev, "%s: for channel %d\n",
+               __func__, bdma_chan->id);
 
-#ifdef CONFIG_PCI_MSI
-       if (priv->flags & TSI721_USING_MSIX) {
-               free_irq(priv->msix[TSI721_VECT_DMA0_DONE +
-                                   bdma_chan->id].vector, (void *)bdma_chan);
-               free_irq(priv->msix[TSI721_VECT_DMA0_INT +
-                                   bdma_chan->id].vector, (void *)bdma_chan);
-       }
-#endif /* CONFIG_PCI_MSI */
+       if (bdma_chan->bd_base == NULL)
+               return;
 
-       tsi721_bdma_ch_free(bdma_chan);
+       BUG_ON(!list_empty(&bdma_chan->active_list));
+       BUG_ON(!list_empty(&bdma_chan->queue));
+
+       tsi721_bdma_interrupt_enable(bdma_chan, 0);
+       bdma_chan->active = false;
+       tsi721_sync_dma_irq(bdma_chan);
+       tasklet_kill(&bdma_chan->tasklet);
+       INIT_LIST_HEAD(&bdma_chan->free_list);
        kfree(bdma_chan->tx_desc);
+       tsi721_bdma_ch_free(bdma_chan);
 }
 
 static
 enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
                                 struct dma_tx_state *txstate)
 {
-       struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
-       dma_cookie_t            last_used;
-       dma_cookie_t            last_completed;
-       int                     ret;
-
-       spin_lock_bh(&bdma_chan->lock);
-       last_completed = bdma_chan->completed_cookie;
-       last_used = dchan->cookie;
-       spin_unlock_bh(&bdma_chan->lock);
-
-       ret = dma_async_is_complete(cookie, last_completed, last_used);
-
-       dma_set_tx_state(txstate, last_completed, last_used, 0);
-
-       dev_dbg(dchan->device->dev,
-               "%s: exit, ret: %d, last_completed: %d, last_used: %d\n",
-               __func__, ret, last_completed, last_used);
-
-       return ret;
+       return dma_cookie_status(dchan, cookie, txstate);
 }
 
 static void tsi721_issue_pending(struct dma_chan *dchan)
 {
        struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
 
-       dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
+       dev_dbg(dchan->device->dev, "%s: Enter\n", __func__);
 
-       if (tsi721_dma_is_idle(bdma_chan)) {
+       if (tsi721_dma_is_idle(bdma_chan) && bdma_chan->active) {
                spin_lock_bh(&bdma_chan->lock);
                tsi721_advance_work(bdma_chan);
                spin_unlock_bh(&bdma_chan->lock);
-       } else
-               dev_dbg(dchan->device->dev,
-                       "%s: DMA channel still busy\n", __func__);
+       }
 }
 
 static
@@ -668,21 +759,19 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
                        void *tinfo)
 {
        struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
-       struct tsi721_tx_desc *desc = NULL;
-       struct tsi721_tx_desc *first = NULL;
-       struct scatterlist *sg;
+       struct tsi721_tx_desc *desc, *_d;
        struct rio_dma_ext *rext = tinfo;
-       u64 rio_addr = rext->rio_addr; /* limited to 64-bit rio_addr for now */
-       unsigned int i;
-       u32 sys_size = dma_to_mport(dchan->device)->sys_size;
        enum dma_rtype rtype;
-       dma_addr_t next_addr = -1;
+       struct dma_async_tx_descriptor *txd = NULL;
 
        if (!sgl || !sg_len) {
                dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
                return NULL;
        }
 
+       dev_dbg(dchan->device->dev, "%s: %s\n", __func__,
+               (dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
+
        if (dir == DMA_DEV_TO_MEM)
                rtype = NREAD;
        else if (dir == DMA_MEM_TO_DEV) {
@@ -704,97 +793,26 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
                return NULL;
        }
 
-       for_each_sg(sgl, sg, sg_len, i) {
-               int err;
-
-               if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
-                       dev_err(dchan->device->dev,
-                               "%s: SG entry %d is too large\n", __func__, i);
-                       goto err_desc_put;
-               }
-
-               /*
-                * If this sg entry forms contiguous block with previous one,
-                * try to merge it into existing DMA descriptor
-                */
-               if (desc) {
-                       if (next_addr == sg_dma_address(sg) &&
-                           desc->bcount + sg_dma_len(sg) <=
-                                               TSI721_BDMA_MAX_BCOUNT) {
-                               /* Adjust byte count of the descriptor */
-                               desc->bcount += sg_dma_len(sg);
-                               goto entry_done;
-                       }
-
-                       /*
-                        * Finalize this descriptor using total
-                        * byte count value.
-                        */
-                       tsi721_desc_fill_end(desc);
-                       dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
-                               __func__, desc->bcount);
-               }
-
-               /*
-                * Obtain and initialize a new descriptor
-                */
-               desc = tsi721_desc_get(bdma_chan);
-               if (!desc) {
-                       dev_err(dchan->device->dev,
-                               "%s: Failed to get new descriptor for SG %d\n",
-                               __func__, i);
-                       goto err_desc_put;
-               }
-
-               desc->destid = rext->destid;
-               desc->rio_addr = rio_addr;
-               desc->rio_addr_u = 0;
-               desc->bcount = sg_dma_len(sg);
-
-               dev_dbg(dchan->device->dev,
-                       "sg%d desc: 0x%llx, addr: 0x%llx len: %d\n",
-                       i, (u64)desc->txd.phys,
-                       (unsigned long long)sg_dma_address(sg),
-                       sg_dma_len(sg));
-
-               dev_dbg(dchan->device->dev,
-                       "bd_ptr = %p did=%d raddr=0x%llx\n",
-                       desc->hw_desc, desc->destid, desc->rio_addr);
-
-               err = tsi721_desc_fill_init(desc, sg, rtype, sys_size);
-               if (err) {
-                       dev_err(dchan->device->dev,
-                               "Failed to build desc: %d\n", err);
-                       goto err_desc_put;
-               }
-
-               next_addr = sg_dma_address(sg);
-
-               if (!first)
-                       first = desc;
-               else
-                       list_add_tail(&desc->desc_node, &first->tx_list);
+       spin_lock_bh(&bdma_chan->lock);
 
-entry_done:
-               if (sg_is_last(sg)) {
-                       desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
-                       tsi721_desc_fill_end(desc);
-                       dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
-                               __func__, desc->bcount);
-               } else {
-                       rio_addr += sg_dma_len(sg);
-                       next_addr += sg_dma_len(sg);
+       list_for_each_entry_safe(desc, _d, &bdma_chan->free_list, desc_node) {
+               if (async_tx_test_ack(&desc->txd)) {
+                       list_del_init(&desc->desc_node);
+                       desc->destid = rext->destid;
+                       desc->rio_addr = rext->rio_addr;
+                       desc->rio_addr_u = 0;
+                       desc->rtype = rtype;
+                       desc->sg_len    = sg_len;
+                       desc->sg        = sgl;
+                       txd             = &desc->txd;
+                       txd->flags      = flags;
+                       break;
                }
        }
 
-       first->txd.cookie = -EBUSY;
-       desc->txd.flags = flags;
-
-       return &first->txd;
+       spin_unlock_bh(&bdma_chan->lock);
 
-err_desc_put:
-       tsi721_desc_put(bdma_chan, first);
-       return NULL;
+       return txd;
 }
 
 static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
@@ -802,23 +820,34 @@ static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
 {
        struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
        struct tsi721_tx_desc *desc, *_d;
+       u32 dmac_int;
        LIST_HEAD(list);
 
        dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
 
        if (cmd != DMA_TERMINATE_ALL)
-               return -ENXIO;
+               return -ENOSYS;
 
        spin_lock_bh(&bdma_chan->lock);
 
-       /* make sure to stop the transfer */
-       iowrite32(TSI721_DMAC_CTL_SUSP, bdma_chan->regs + TSI721_DMAC_CTL);
+       bdma_chan->active = false;
+
+       if (!tsi721_dma_is_idle(bdma_chan)) {
+               /* make sure to stop the transfer */
+               iowrite32(TSI721_DMAC_CTL_SUSP,
+                         bdma_chan->regs + TSI721_DMAC_CTL);
+
+               /* Wait until DMA channel stops */
+               do {
+                       dmac_int = ioread32(bdma_chan->regs + TSI721_DMAC_INT);
+               } while ((dmac_int & TSI721_DMAC_INT_SUSP) == 0);
+       }
 
        list_splice_init(&bdma_chan->active_list, &list);
        list_splice_init(&bdma_chan->queue, &list);
 
        list_for_each_entry_safe(desc, _d, &list, desc_node)
-               tsi721_dma_chain_complete(bdma_chan, desc);
+               tsi721_dma_tx_err(bdma_chan, desc);
 
        spin_unlock_bh(&bdma_chan->lock);
 
@@ -828,22 +857,18 @@ static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
 int tsi721_register_dma(struct tsi721_device *priv)
 {
        int i;
-       int nr_channels = TSI721_DMA_MAXCH;
+       int nr_channels = 0;
        int err;
        struct rio_mport *mport = priv->mport;
 
-       mport->dma.dev = &priv->pdev->dev;
-       mport->dma.chancnt = nr_channels;
-
        INIT_LIST_HEAD(&mport->dma.channels);
 
-       for (i = 0; i < nr_channels; i++) {
+       for (i = 0; i < TSI721_DMA_MAXCH; i++) {
                struct tsi721_bdma_chan *bdma_chan = &priv->bdma[i];
 
                if (i == TSI721_DMACH_MAINT)
                        continue;
 
-               bdma_chan->bd_num = TSI721_BDMA_BD_RING_SZ;
                bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i);
 
                bdma_chan->dchan.device = &mport->dma;
@@ -862,12 +887,15 @@ int tsi721_register_dma(struct tsi721_device *priv)
                             (unsigned long)bdma_chan);
                list_add_tail(&bdma_chan->dchan.device_node,
                              &mport->dma.channels);
+               nr_channels++;
        }
 
+       mport->dma.chancnt = nr_channels;
        dma_cap_zero(mport->dma.cap_mask);
        dma_cap_set(DMA_PRIVATE, mport->dma.cap_mask);
        dma_cap_set(DMA_SLAVE, mport->dma.cap_mask);
 
+       mport->dma.dev = &priv->pdev->dev;
        mport->dma.device_alloc_chan_resources = tsi721_alloc_chan_resources;
        mport->dma.device_free_chan_resources = tsi721_free_chan_resources;
        mport->dma.device_tx_status = tsi721_tx_status;
index a54ba0494dd3e9105abea0368303712ff4b35c8f..d7b87c64b7cd261c6ec6a1b22ddce040747679d9 100644 (file)
@@ -1509,30 +1509,39 @@ EXPORT_SYMBOL_GPL(rio_route_clr_table);
 
 static bool rio_chan_filter(struct dma_chan *chan, void *arg)
 {
-       struct rio_dev *rdev = arg;
+       struct rio_mport *mport = arg;
 
        /* Check that DMA device belongs to the right MPORT */
-       return (rdev->net->hport ==
-               container_of(chan->device, struct rio_mport, dma));
+       return mport == container_of(chan->device, struct rio_mport, dma);
 }
 
 /**
- * rio_request_dma - request RapidIO capable DMA channel that supports
- *   specified target RapidIO device.
- * @rdev: RIO device control structure
+ * rio_request_mport_dma - request RapidIO capable DMA channel associated
+ *   with specified local RapidIO mport device.
+ * @mport: RIO mport to perform DMA data transfers
  *
  * Returns pointer to allocated DMA channel or NULL if failed.
  */
-struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+struct dma_chan *rio_request_mport_dma(struct rio_mport *mport)
 {
        dma_cap_mask_t mask;
-       struct dma_chan *dchan;
 
        dma_cap_zero(mask);
        dma_cap_set(DMA_SLAVE, mask);
-       dchan = dma_request_channel(mask, rio_chan_filter, rdev);
+       return dma_request_channel(mask, rio_chan_filter, mport);
+}
+EXPORT_SYMBOL_GPL(rio_request_mport_dma);
 
-       return dchan;
+/**
+ * rio_request_dma - request RapidIO capable DMA channel that supports
+ *   specified target RapidIO device.
+ * @rdev: RIO device associated with DMA transfer
+ *
+ * Returns pointer to allocated DMA channel or NULL if failed.
+ */
+struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+{
+       return rio_request_mport_dma(rdev->net->hport);
 }
 EXPORT_SYMBOL_GPL(rio_request_dma);
 
@@ -1547,10 +1556,10 @@ void rio_release_dma(struct dma_chan *dchan)
 EXPORT_SYMBOL_GPL(rio_release_dma);
 
 /**
- * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ * rio_dma_prep_xfer - RapidIO specific wrapper
  *   for device_prep_slave_sg callback defined by DMAENGINE.
- * @rdev: RIO device control structure
  * @dchan: DMA channel to configure
+ * @destid: target RapidIO device destination ID
  * @data: RIO specific data descriptor
  * @direction: DMA data transfer direction (TO or FROM the device)
  * @flags: dmaengine defined flags
@@ -1560,11 +1569,10 @@ EXPORT_SYMBOL_GPL(rio_release_dma);
  * target RIO device.
  * Returns pointer to DMA transaction descriptor or NULL if failed.
  */
-struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
-       struct dma_chan *dchan, struct rio_dma_data *data,
+struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan,
+       u16 destid, struct rio_dma_data *data,
        enum dma_transfer_direction direction, unsigned long flags)
 {
-       struct dma_async_tx_descriptor *txd = NULL;
        struct rio_dma_ext rio_ext;
 
        if (dchan->device->device_prep_slave_sg == NULL) {
@@ -1572,15 +1580,35 @@ struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
                return NULL;
        }
 
-       rio_ext.destid = rdev->destid;
+       rio_ext.destid = destid;
        rio_ext.rio_addr_u = data->rio_addr_u;
        rio_ext.rio_addr = data->rio_addr;
        rio_ext.wr_type = data->wr_type;
 
-       txd = dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
-                                       direction, flags, &rio_ext);
+       return dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
+                                    direction, flags, &rio_ext);
+}
+EXPORT_SYMBOL_GPL(rio_dma_prep_xfer);
 
-       return txd;
+/**
+ * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ *   for device_prep_slave_sg callback defined by DMAENGINE.
+ * @rdev: RIO device control structure
+ * @dchan: DMA channel to configure
+ * @data: RIO specific data descriptor
+ * @direction: DMA data transfer direction (TO or FROM the device)
+ * @flags: dmaengine defined flags
+ *
+ * Initializes RapidIO capable DMA channel for the specified data transfer.
+ * Uses DMA channel private extension to pass information related to remote
+ * target RIO device.
+ * Returns pointer to DMA transaction descriptor or NULL if failed.
+ */
+struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
+       struct dma_chan *dchan, struct rio_dma_data *data,
+       enum dma_transfer_direction direction, unsigned long flags)
+{
+       return rio_dma_prep_xfer(dchan, rdev->destid, data, direction, flags);
 }
 EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
 
index 0754f5c7cb3b9c9683555fc0275537700d1c03bc..a168e96142b95001f5ece9585cfff8a07c1bb489 100644 (file)
@@ -373,6 +373,14 @@ config RTC_DRV_PCF8563
          This driver can also be built as a module. If so, the module
          will be called rtc-pcf8563.
 
+config RTC_DRV_PCF85063
+       tristate "nxp PCF85063"
+       help
+         If you say yes here you get support for the PCF85063 RTC chip
+
+         This driver can also be built as a module. If so, the module
+         will be called rtc-pcf85063.
+
 config RTC_DRV_PCF8583
        tristate "Philips PCF8583"
        help
@@ -760,6 +768,15 @@ config RTC_DRV_DS1742
          This driver can also be built as a module. If so, the module
          will be called rtc-ds1742.
 
+config RTC_DRV_DS2404
+       tristate "Maxim/Dallas DS2404"
+       help
+         If you say yes here you get support for the
+         Dallas DS2404 RTC chip.
+
+         This driver can also be built as a module. If so, the module
+         will be called rtc-ds2404.
+
 config RTC_DRV_DA9052
        tristate "Dialog DA9052/DA9053 RTC"
        depends on PMIC_DA9052
@@ -789,7 +806,7 @@ config RTC_DRV_DA9063
 
 config RTC_DRV_EFI
        tristate "EFI RTC"
-       depends on IA64
+       depends on EFI
        help
          If you say yes here you will get support for the EFI
          Real Time Clock.
@@ -873,15 +890,6 @@ config RTC_DRV_V3020
          This driver can also be built as a module. If so, the module
          will be called rtc-v3020.
 
-config RTC_DRV_DS2404
-       tristate "Dallas DS2404"
-       help
-         If you say yes here you get support for the
-         Dallas DS2404 RTC chip.
-
-         This driver can also be built as a module. If so, the module
-         will be called rtc-ds2404.
-
 config RTC_DRV_WM831X
        tristate "Wolfson Microelectronics WM831x RTC"
        depends on MFD_WM831X
@@ -1349,6 +1357,7 @@ config RTC_DRV_SIRFSOC
 
 config RTC_DRV_MOXART
        tristate "MOXA ART RTC"
+       depends on ARCH_MOXART || COMPILE_TEST
        help
           If you say yes here you get support for the MOXA ART
           RTC module.
index 70347d041d10b86aaa2efb68ada1d56cf47225a4..56f061c7c815b9ad9a8ca1408484beccadf8a549 100644 (file)
@@ -10,6 +10,10 @@ obj-$(CONFIG_RTC_SYSTOHC)    += systohc.o
 obj-$(CONFIG_RTC_CLASS)                += rtc-core.o
 rtc-core-y                     := class.o interface.o
 
+ifdef CONFIG_RTC_DRV_EFI
+rtc-core-y                     += rtc-efi-platform.o
+endif
+
 rtc-core-$(CONFIG_RTC_INTF_DEV)        += rtc-dev.o
 rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o
 rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
@@ -93,6 +97,7 @@ obj-$(CONFIG_RTC_DRV_PCAP)    += rtc-pcap.o
 obj-$(CONFIG_RTC_DRV_PCF2127)  += rtc-pcf2127.o
 obj-$(CONFIG_RTC_DRV_PCF8523)  += rtc-pcf8523.o
 obj-$(CONFIG_RTC_DRV_PCF8563)  += rtc-pcf8563.o
+obj-$(CONFIG_RTC_DRV_PCF85063) += rtc-pcf85063.o
 obj-$(CONFIG_RTC_DRV_PCF8583)  += rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PCF2123)  += rtc-pcf2123.o
 obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o
index 589351ef75d03cd3c226389912e27316216df677..38e26be705be5f2c6ac37034d705639da8d3830d 100644 (file)
@@ -53,6 +53,7 @@ static int rtc_suspend(struct device *dev)
        struct rtc_device       *rtc = to_rtc_device(dev);
        struct rtc_time         tm;
        struct timespec         delta, delta_delta;
+       int err;
 
        if (has_persistent_clock())
                return 0;
@@ -61,7 +62,12 @@ static int rtc_suspend(struct device *dev)
                return 0;
 
        /* snapshot the current RTC and system time at suspend*/
-       rtc_read_time(rtc, &tm);
+       err = rtc_read_time(rtc, &tm);
+       if (err < 0) {
+               pr_debug("%s:  fail to read rtc time\n", dev_name(&rtc->dev));
+               return 0;
+       }
+
        getnstimeofday(&old_system);
        rtc_tm_to_time(&tm, &old_rtc.tv_sec);
 
@@ -94,6 +100,7 @@ static int rtc_resume(struct device *dev)
        struct rtc_time         tm;
        struct timespec         new_system, new_rtc;
        struct timespec         sleep_time;
+       int err;
 
        if (has_persistent_clock())
                return 0;
@@ -104,7 +111,12 @@ static int rtc_resume(struct device *dev)
 
        /* snapshot the current rtc and system time at resume */
        getnstimeofday(&new_system);
-       rtc_read_time(rtc, &tm);
+       err = rtc_read_time(rtc, &tm);
+       if (err < 0) {
+               pr_debug("%s:  fail to read rtc time\n", dev_name(&rtc->dev));
+               return 0;
+       }
+
        if (rtc_valid_tm(&tm) != 0) {
                pr_debug("%s:  bogus resume time\n", dev_name(&rtc->dev));
                return 0;
index 5813fa52c3d43328c80bdb799d38d0b1c68d129d..5b2717f5dafa73e808f9bbf29387e223109588b2 100644 (file)
@@ -348,6 +348,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 
        /* Make sure we're not setting alarms in the past */
        err = __rtc_read_time(rtc, &tm);
+       if (err)
+               return err;
        rtc_tm_to_time(&tm, &now);
        if (scheduled <= now)
                return -ETIME;
index c3719189dd96ed63bf686ab62297c181d1f1ed2d..ae9f997223b1f4aad22b6b500855cd5eb1f548b3 100644 (file)
@@ -4,6 +4,7 @@
  * Real Time Clock
  *
  * Author : Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>
+ *         Ankur Srivastava <sankurece@gmail.com> : DS1343 Nvram Support
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -45,6 +46,9 @@
 #define DS1343_CONTROL_REG     0x0F
 #define DS1343_STATUS_REG      0x10
 #define DS1343_TRICKLE_REG     0x11
+#define DS1343_NVRAM           0x20
+
+#define DS1343_NVRAM_LEN       96
 
 /* DS1343 Control Registers bits */
 #define DS1343_EOSC            0x80
@@ -149,6 +153,64 @@ static ssize_t ds1343_store_glitchfilter(struct device *dev,
 static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
                        ds1343_store_glitchfilter);
 
+static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
+                       struct bin_attribute *attr,
+                       char *buf, loff_t off, size_t count)
+{
+       int ret;
+       unsigned char address;
+       struct device *dev = kobj_to_dev(kobj);
+       struct ds1343_priv *priv = dev_get_drvdata(dev);
+
+       if (unlikely(!count))
+               return count;
+
+       if ((count + off) > DS1343_NVRAM_LEN)
+               count = DS1343_NVRAM_LEN - off;
+
+       address = DS1343_NVRAM + off;
+
+       ret = regmap_bulk_write(priv->map, address, buf, count);
+       if (ret < 0)
+               dev_err(&priv->spi->dev, "Error in nvram write %d", ret);
+
+       return (ret < 0) ? ret : count;
+}
+
+
+static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
+                               struct bin_attribute *attr,
+                               char *buf, loff_t off, size_t count)
+{
+       int ret;
+       unsigned char address;
+       struct device *dev = kobj_to_dev(kobj);
+       struct ds1343_priv *priv = dev_get_drvdata(dev);
+
+       if (unlikely(!count))
+               return count;
+
+       if ((count + off) > DS1343_NVRAM_LEN)
+               count = DS1343_NVRAM_LEN - off;
+
+       address = DS1343_NVRAM + off;
+
+       ret = regmap_bulk_read(priv->map, address, buf, count);
+       if (ret < 0)
+               dev_err(&priv->spi->dev, "Error in nvram read %d\n", ret);
+
+       return (ret < 0) ? ret : count;
+}
+
+
+static struct bin_attribute nvram_attr = {
+       .attr.name      = "nvram",
+       .attr.mode      = S_IRUGO | S_IWUSR,
+       .read           = ds1343_nvram_read,
+       .write          = ds1343_nvram_write,
+       .size           = DS1343_NVRAM_LEN,
+};
+
 static ssize_t ds1343_show_alarmstatus(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
@@ -274,12 +336,16 @@ static int ds1343_sysfs_register(struct device *dev)
        if (err)
                goto error1;
 
+       err = device_create_bin_file(dev, &nvram_attr);
+       if (err)
+               goto error2;
+
        if (priv->irq <= 0)
                return err;
 
        err = device_create_file(dev, &dev_attr_alarm_mode);
        if (err)
-               goto error2;
+               goto error3;
 
        err = device_create_file(dev, &dev_attr_alarm_status);
        if (!err)
@@ -287,6 +353,9 @@ static int ds1343_sysfs_register(struct device *dev)
 
        device_remove_file(dev, &dev_attr_alarm_mode);
 
+error3:
+       device_remove_bin_file(dev, &nvram_attr);
+
 error2:
        device_remove_file(dev, &dev_attr_trickle_charger);
 
@@ -302,6 +371,7 @@ static void ds1343_sysfs_unregister(struct device *dev)
 
        device_remove_file(dev, &dev_attr_glitch_filter);
        device_remove_file(dev, &dev_attr_trickle_charger);
+       device_remove_bin_file(dev, &nvram_attr);
 
        if (priv->irq <= 0)
                return;
@@ -684,6 +754,7 @@ static struct spi_driver ds1343_driver = {
 module_spi_driver(ds1343_driver);
 
 MODULE_DESCRIPTION("DS1343 RTC SPI Driver");
-MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>");
+MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>,"
+               "Ankur Srivastava <sankurece@gmail.com>");
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DS1343_DRV_VERSION);
index c6b2191a4128215795e4407e6b5b35e3860cee9f..9822715db8baf32ce41e597a5ff1b710eee4bbfa 100644 (file)
@@ -231,7 +231,7 @@ static struct platform_driver ds1742_rtc_driver = {
        .driver         = {
                .name   = "rtc-ds1742",
                .owner  = THIS_MODULE,
-               .of_match_table = ds1742_rtc_of_match,
+               .of_match_table = of_match_ptr(ds1742_rtc_of_match),
        },
 };
 
diff --git a/drivers/rtc/rtc-efi-platform.c b/drivers/rtc/rtc-efi-platform.c
new file mode 100644 (file)
index 0000000..b40fbe3
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Moved from arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *     Stephane Eranian <eranian@hpl.hp.com>
+ *     David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/platform_device.h>
+
+static struct platform_device rtc_efi_dev = {
+       .name = "rtc-efi",
+       .id = -1,
+};
+
+static int __init rtc_init(void)
+{
+       if (efi_enabled(EFI_RUNTIME_SERVICES))
+               if (platform_device_register(&rtc_efi_dev) < 0)
+                       pr_err("unable to register rtc device...\n");
+
+       /* not necessarily an error */
+       return 0;
+}
+module_init(rtc_init);
index c4c38431012ecc5772209a2be69898feac968543..8225b89de810c88c794c4251d56f1ce613c906b2 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/stringify.h>
 #include <linux/time.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
@@ -48,8 +49,8 @@ compute_wday(efi_time_t *eft)
        int y;
        int ndays = 0;
 
-       if (eft->year < 1998) {
-               pr_err("EFI year < 1998, invalid date\n");
+       if (eft->year < EFI_RTC_EPOCH) {
+               pr_err("EFI year < " __stringify(EFI_RTC_EPOCH) ", invalid date\n");
                return -1;
        }
 
@@ -78,19 +79,36 @@ convert_to_efi_time(struct rtc_time *wtime, efi_time_t *eft)
        eft->timezone   = EFI_UNSPECIFIED_TIMEZONE;
 }
 
-static void
+static bool
 convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
 {
        memset(wtime, 0, sizeof(*wtime));
+
+       if (eft->second >= 60)
+               return false;
        wtime->tm_sec  = eft->second;
+
+       if (eft->minute >= 60)
+               return false;
        wtime->tm_min  = eft->minute;
+
+       if (eft->hour >= 24)
+               return false;
        wtime->tm_hour = eft->hour;
+
+       if (!eft->day || eft->day > 31)
+               return false;
        wtime->tm_mday = eft->day;
+
+       if (!eft->month || eft->month > 12)
+               return false;
        wtime->tm_mon  = eft->month - 1;
        wtime->tm_year = eft->year - 1900;
 
        /* day of the week [0-6], Sunday=0 */
        wtime->tm_wday = compute_wday(eft);
+       if (wtime->tm_wday < 0)
+               return false;
 
        /* day in the year [1-365]*/
        wtime->tm_yday = compute_yday(eft);
@@ -106,6 +124,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
        default:
                wtime->tm_isdst = -1;
        }
+
+       return true;
 }
 
 static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
@@ -122,7 +142,8 @@ static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
        if (status != EFI_SUCCESS)
                return -EINVAL;
 
-       convert_from_efi_time(&eft, &wkalrm->time);
+       if (!convert_from_efi_time(&eft, &wkalrm->time))
+               return -EIO;
 
        return rtc_valid_tm(&wkalrm->time);
 }
@@ -163,7 +184,8 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
                return -EINVAL;
        }
 
-       convert_from_efi_time(&eft, tm);
+       if (!convert_from_efi_time(&eft, tm))
+               return -EIO;
 
        return rtc_valid_tm(tm);
 }
index 03b8911294288afb2ba29bd86811fd8723da0658..aa55f081c505c81bee8bf71ccfe49d85dc27fef7 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #define DRV_VERSION "0.1"
 
@@ -271,6 +273,13 @@ static int isl12022_probe(struct i2c_client *client,
        return PTR_ERR_OR_ZERO(isl12022->rtc);
 }
 
+#ifdef CONFIG_OF
+static struct of_device_id isl12022_dt_match[] = {
+       { .compatible = "isl,isl12022" },
+       { },
+};
+#endif
+
 static const struct i2c_device_id isl12022_id[] = {
        { "isl12022", 0 },
        { }
@@ -280,6 +289,9 @@ MODULE_DEVICE_TABLE(i2c, isl12022_id);
 static struct i2c_driver isl12022_driver = {
        .driver         = {
                .name   = "rtc-isl12022",
+#ifdef CONFIG_OF
+               .of_match_table = of_match_ptr(isl12022_dt_match),
+#endif
        },
        .probe          = isl12022_probe,
        .id_table       = isl12022_id,
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
new file mode 100644 (file)
index 0000000..6a12bf6
--- /dev/null
@@ -0,0 +1,204 @@
+/*
+ * An I2C driver for the PCF85063 RTC
+ * Copyright 2014 Rose Technology
+ *
+ * Author: Søren Andersen <san@rosetechnology.dk>
+ * Maintainers: http://www.nslu2-linux.org/
+ *
+ * based on the other drivers in this same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/module.h>
+
+#define DRV_VERSION "0.0.1"
+
+#define PCF85063_REG_CTRL1             0x00 /* status */
+#define PCF85063_REG_CTRL2             0x01
+
+#define PCF85063_REG_SC                        0x04 /* datetime */
+#define PCF85063_REG_MN                        0x05
+#define PCF85063_REG_HR                        0x06
+#define PCF85063_REG_DM                        0x07
+#define PCF85063_REG_DW                        0x08
+#define PCF85063_REG_MO                        0x09
+#define PCF85063_REG_YR                        0x0A
+
+#define PCF85063_MO_C                  0x80 /* century */
+
+static struct i2c_driver pcf85063_driver;
+
+struct pcf85063 {
+       struct rtc_device *rtc;
+       int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */
+       int voltage_low; /* indicates if a low_voltage was detected */
+};
+
+/*
+ * In the routines that deal directly with the pcf85063 hardware, we use
+ * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
+ */
+static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+       struct pcf85063 *pcf85063 = i2c_get_clientdata(client);
+       unsigned char buf[13] = { PCF85063_REG_CTRL1 };
+       struct i2c_msg msgs[] = {
+               {/* setup read ptr */
+                       .addr = client->addr,
+                       .len = 1,
+                       .buf = buf
+               },
+               {/* read status + date */
+                       .addr = client->addr,
+                       .flags = I2C_M_RD,
+                       .len = 13,
+                       .buf = buf
+               },
+       };
+
+       /* read registers */
+       if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
+               dev_err(&client->dev, "%s: read error\n", __func__);
+               return -EIO;
+       }
+
+       tm->tm_sec = bcd2bin(buf[PCF85063_REG_SC] & 0x7F);
+       tm->tm_min = bcd2bin(buf[PCF85063_REG_MN] & 0x7F);
+       tm->tm_hour = bcd2bin(buf[PCF85063_REG_HR] & 0x3F); /* rtc hr 0-23 */
+       tm->tm_mday = bcd2bin(buf[PCF85063_REG_DM] & 0x3F);
+       tm->tm_wday = buf[PCF85063_REG_DW] & 0x07;
+       tm->tm_mon = bcd2bin(buf[PCF85063_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
+       tm->tm_year = bcd2bin(buf[PCF85063_REG_YR]);
+       if (tm->tm_year < 70)
+               tm->tm_year += 100;     /* assume we are in 1970...2069 */
+       /* detect the polarity heuristically. see note above. */
+       pcf85063->c_polarity = (buf[PCF85063_REG_MO] & PCF85063_MO_C) ?
+               (tm->tm_year >= 100) : (tm->tm_year < 100);
+
+       /* the clock can give out invalid datetime, but we cannot return
+        * -EINVAL otherwise hwclock will refuse to set the time on bootup.
+        */
+       if (rtc_valid_tm(tm) < 0)
+               dev_err(&client->dev, "retrieved date/time is not valid.\n");
+
+       return 0;
+}
+
+static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+       int i = 0, err = 0;
+       unsigned char buf[11];
+
+       /* Control & status */
+       buf[PCF85063_REG_CTRL1] = 0;
+       buf[PCF85063_REG_CTRL2] = 5;
+
+       /* hours, minutes and seconds */
+       buf[PCF85063_REG_SC] = bin2bcd(tm->tm_sec) & 0x7F;
+
+       buf[PCF85063_REG_MN] = bin2bcd(tm->tm_min);
+       buf[PCF85063_REG_HR] = bin2bcd(tm->tm_hour);
+
+       /* Day of month, 1 - 31 */
+       buf[PCF85063_REG_DM] = bin2bcd(tm->tm_mday);
+
+       /* Day, 0 - 6 */
+       buf[PCF85063_REG_DW] = tm->tm_wday & 0x07;
+
+       /* month, 1 - 12 */
+       buf[PCF85063_REG_MO] = bin2bcd(tm->tm_mon + 1);
+
+       /* year and century */
+       buf[PCF85063_REG_YR] = bin2bcd(tm->tm_year % 100);
+
+       /* write register's data */
+       for (i = 0; i < sizeof(buf); i++) {
+               unsigned char data[2] = { i, buf[i] };
+
+               err = i2c_master_send(client, data, sizeof(data));
+               if (err != sizeof(data)) {
+                       dev_err(&client->dev, "%s: err=%d addr=%02x, data=%02x\n",
+                                       __func__, err, data[0], data[1]);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       return pcf85063_get_datetime(to_i2c_client(dev), tm);
+}
+
+static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       return pcf85063_set_datetime(to_i2c_client(dev), tm);
+}
+
+static const struct rtc_class_ops pcf85063_rtc_ops = {
+       .read_time      = pcf85063_rtc_read_time,
+       .set_time       = pcf85063_rtc_set_time
+};
+
+static int pcf85063_probe(struct i2c_client *client,
+                               const struct i2c_device_id *id)
+{
+       struct pcf85063 *pcf85063;
+
+       dev_dbg(&client->dev, "%s\n", __func__);
+
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+               return -ENODEV;
+
+       pcf85063 = devm_kzalloc(&client->dev, sizeof(struct pcf85063),
+                               GFP_KERNEL);
+       if (!pcf85063)
+               return -ENOMEM;
+
+       dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
+
+       i2c_set_clientdata(client, pcf85063);
+
+       pcf85063->rtc = devm_rtc_device_register(&client->dev,
+                               pcf85063_driver.driver.name,
+                               &pcf85063_rtc_ops, THIS_MODULE);
+
+       return PTR_ERR_OR_ZERO(pcf85063->rtc);
+}
+
+static const struct i2c_device_id pcf85063_id[] = {
+       { "pcf85063", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, pcf85063_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id pcf85063_of_match[] = {
+       { .compatible = "nxp,pcf85063" },
+       {}
+};
+MODULE_DEVICE_TABLE(of, pcf85063_of_match);
+#endif
+
+static struct i2c_driver pcf85063_driver = {
+       .driver         = {
+               .name   = "rtc-pcf85063",
+               .owner  = THIS_MODULE,
+               .of_match_table = of_match_ptr(pcf85063_of_match),
+       },
+       .probe          = pcf85063_probe,
+       .id_table       = pcf85063_id,
+};
+
+module_i2c_driver(pcf85063_driver);
+
+MODULE_AUTHOR("Søren Andersen <san@rosetechnology.dk>");
+MODULE_DESCRIPTION("PCF85063 RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
index 63b558c48196b4ab4bea5f9e3413072647e9420b..5a197d9dc7e727b881740b5715e80aa31155ce7e 100644 (file)
@@ -26,6 +26,8 @@
 
 #define PCF8563_REG_ST1                0x00 /* status */
 #define PCF8563_REG_ST2                0x01
+#define PCF8563_BIT_AIE                (1 << 1)
+#define PCF8563_BIT_AF         (1 << 3)
 
 #define PCF8563_REG_SC         0x02 /* datetime */
 #define PCF8563_REG_MN         0x03
@@ -36,9 +38,6 @@
 #define PCF8563_REG_YR         0x08
 
 #define PCF8563_REG_AMN                0x09 /* alarm */
-#define PCF8563_REG_AHR                0x0A
-#define PCF8563_REG_ADM                0x0B
-#define PCF8563_REG_ADW                0x0C
 
 #define PCF8563_REG_CLKO       0x0D /* clock out */
 #define PCF8563_REG_TMRC       0x0E /* timer control */
@@ -67,37 +66,133 @@ struct pcf8563 {
         */
        int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */
        int voltage_low; /* incicates if a low_voltage was detected */
+
+       struct i2c_client *client;
 };
 
-/*
- * In the routines that deal directly with the pcf8563 hardware, we use
- * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
- */
-static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf8563_read_block_data(struct i2c_client *client, unsigned char reg,
+                                  unsigned char length, unsigned char *buf)
 {
-       struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
-       unsigned char buf[13] = { PCF8563_REG_ST1 };
-
        struct i2c_msg msgs[] = {
                {/* setup read ptr */
                        .addr = client->addr,
                        .len = 1,
-                       .buf = buf
+                       .buf = &reg,
                },
-               {/* read status + date */
+               {
                        .addr = client->addr,
                        .flags = I2C_M_RD,
-                       .len = 13,
+                       .len = length,
                        .buf = buf
                },
        };
 
-       /* read registers */
        if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
                dev_err(&client->dev, "%s: read error\n", __func__);
                return -EIO;
        }
 
+       return 0;
+}
+
+static int pcf8563_write_block_data(struct i2c_client *client,
+                                  unsigned char reg, unsigned char length,
+                                  unsigned char *buf)
+{
+       int i, err;
+
+       for (i = 0; i < length; i++) {
+               unsigned char data[2] = { reg + i, buf[i] };
+
+               err = i2c_master_send(client, data, sizeof(data));
+               if (err != sizeof(data)) {
+                       dev_err(&client->dev,
+                               "%s: err=%d addr=%02x, data=%02x\n",
+                               __func__, err, data[0], data[1]);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+static int pcf8563_set_alarm_mode(struct i2c_client *client, bool on)
+{
+       unsigned char buf[2];
+       int err;
+
+       err = pcf8563_read_block_data(client, PCF8563_REG_ST2, 1, buf + 1);
+       if (err < 0)
+               return err;
+
+       if (on)
+               buf[1] |= PCF8563_BIT_AIE;
+       else
+               buf[1] &= ~PCF8563_BIT_AIE;
+
+       buf[1] &= ~PCF8563_BIT_AF;
+       buf[0] = PCF8563_REG_ST2;
+
+       err = pcf8563_write_block_data(client, PCF8563_REG_ST2, 1, buf + 1);
+       if (err < 0) {
+               dev_err(&client->dev, "%s: write error\n", __func__);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int pcf8563_get_alarm_mode(struct i2c_client *client, unsigned char *en,
+                                 unsigned char *pen)
+{
+       unsigned char buf;
+       int err;
+
+       err = pcf8563_read_block_data(client, PCF8563_REG_ST2, 1, &buf);
+       if (err)
+               return err;
+
+       if (en)
+               *en = !!(buf & PCF8563_BIT_AIE);
+       if (pen)
+               *pen = !!(buf & PCF8563_BIT_AF);
+
+       return 0;
+}
+
+static irqreturn_t pcf8563_irq(int irq, void *dev_id)
+{
+       struct pcf8563 *pcf8563 = i2c_get_clientdata(dev_id);
+       int err;
+       char pending;
+
+       err = pcf8563_get_alarm_mode(pcf8563->client, NULL, &pending);
+       if (err < 0)
+               return err;
+
+       if (pending) {
+               rtc_update_irq(pcf8563->rtc, 1, RTC_IRQF | RTC_AF);
+               pcf8563_set_alarm_mode(pcf8563->client, 1);
+               return IRQ_HANDLED;
+       }
+
+       return IRQ_NONE;
+}
+
+/*
+ * In the routines that deal directly with the pcf8563 hardware, we use
+ * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
+ */
+static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+       struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
+       unsigned char buf[9];
+       int err;
+
+       err = pcf8563_read_block_data(client, PCF8563_REG_ST1, 9, buf);
+       if (err)
+               return err;
+
        if (buf[PCF8563_REG_SC] & PCF8563_SC_LV) {
                pcf8563->voltage_low = 1;
                dev_info(&client->dev,
@@ -144,7 +239,7 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 {
        struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
-       int i, err;
+       int err;
        unsigned char buf[9];
 
        dev_dbg(&client->dev, "%s: secs=%d, mins=%d, hours=%d, "
@@ -170,19 +265,10 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 
        buf[PCF8563_REG_DW] = tm->tm_wday & 0x07;
 
-       /* write register's data */
-       for (i = 0; i < 7; i++) {
-               unsigned char data[2] = { PCF8563_REG_SC + i,
-                                               buf[PCF8563_REG_SC + i] };
-
-               err = i2c_master_send(client, data, sizeof(data));
-               if (err != sizeof(data)) {
-                       dev_err(&client->dev,
-                               "%s: err=%d addr=%02x, data=%02x\n",
-                               __func__, err, data[0], data[1]);
-                       return -EIO;
-               }
-       }
+       err = pcf8563_write_block_data(client, PCF8563_REG_SC,
+                               9 - PCF8563_REG_SC, buf + PCF8563_REG_SC);
+       if (err)
+               return err;
 
        return 0;
 }
@@ -235,16 +321,83 @@ static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
        return pcf8563_set_datetime(to_i2c_client(dev), tm);
 }
 
+static int pcf8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       unsigned char buf[4];
+       int err;
+
+       err = pcf8563_read_block_data(client, PCF8563_REG_AMN, 4, buf);
+       if (err)
+               return err;
+
+       dev_dbg(&client->dev,
+               "%s: raw data is min=%02x, hr=%02x, mday=%02x, wday=%02x\n",
+               __func__, buf[0], buf[1], buf[2], buf[3]);
+
+       tm->time.tm_min = bcd2bin(buf[0] & 0x7F);
+       tm->time.tm_hour = bcd2bin(buf[1] & 0x7F);
+       tm->time.tm_mday = bcd2bin(buf[2] & 0x1F);
+       tm->time.tm_wday = bcd2bin(buf[3] & 0x7);
+       tm->time.tm_mon = -1;
+       tm->time.tm_year = -1;
+       tm->time.tm_yday = -1;
+       tm->time.tm_isdst = -1;
+
+       err = pcf8563_get_alarm_mode(client, &tm->enabled, &tm->pending);
+       if (err < 0)
+               return err;
+
+       dev_dbg(&client->dev, "%s: tm is mins=%d, hours=%d, mday=%d, wday=%d,"
+               " enabled=%d, pending=%d\n", __func__, tm->time.tm_min,
+               tm->time.tm_hour, tm->time.tm_mday, tm->time.tm_wday,
+               tm->enabled, tm->pending);
+
+       return 0;
+}
+
+static int pcf8563_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       unsigned char buf[4];
+       int err;
+
+       dev_dbg(dev, "%s, min=%d hour=%d wday=%d mday=%d "
+               "enabled=%d pending=%d\n", __func__,
+               tm->time.tm_min, tm->time.tm_hour, tm->time.tm_wday,
+               tm->time.tm_mday, tm->enabled, tm->pending);
+
+       buf[0] = bin2bcd(tm->time.tm_min);
+       buf[1] = bin2bcd(tm->time.tm_hour);
+       buf[2] = bin2bcd(tm->time.tm_mday);
+       buf[3] = tm->time.tm_wday & 0x07;
+
+       err = pcf8563_write_block_data(client, PCF8563_REG_AMN, 4, buf);
+       if (err)
+               return err;
+
+       return pcf8563_set_alarm_mode(client, 1);
+}
+
+static int pcf8563_irq_enable(struct device *dev, unsigned int enabled)
+{
+       return pcf8563_set_alarm_mode(to_i2c_client(dev), !!enabled);
+}
+
 static const struct rtc_class_ops pcf8563_rtc_ops = {
        .ioctl          = pcf8563_rtc_ioctl,
        .read_time      = pcf8563_rtc_read_time,
        .set_time       = pcf8563_rtc_set_time,
+       .read_alarm     = pcf8563_rtc_read_alarm,
+       .set_alarm      = pcf8563_rtc_set_alarm,
+       .alarm_irq_enable = pcf8563_irq_enable,
 };
 
 static int pcf8563_probe(struct i2c_client *client,
                                const struct i2c_device_id *id)
 {
        struct pcf8563 *pcf8563;
+       int err;
 
        dev_dbg(&client->dev, "%s\n", __func__);
 
@@ -259,12 +412,30 @@ static int pcf8563_probe(struct i2c_client *client,
        dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
 
        i2c_set_clientdata(client, pcf8563);
+       pcf8563->client = client;
+       device_set_wakeup_capable(&client->dev, 1);
 
        pcf8563->rtc = devm_rtc_device_register(&client->dev,
                                pcf8563_driver.driver.name,
                                &pcf8563_rtc_ops, THIS_MODULE);
 
-       return PTR_ERR_OR_ZERO(pcf8563->rtc);
+       if (IS_ERR(pcf8563->rtc))
+               return PTR_ERR(pcf8563->rtc);
+
+       if (client->irq > 0) {
+               err = devm_request_threaded_irq(&client->dev, client->irq,
+                               NULL, pcf8563_irq,
+                               IRQF_SHARED|IRQF_ONESHOT|IRQF_TRIGGER_FALLING,
+                               pcf8563->rtc->name, client);
+               if (err) {
+                       dev_err(&client->dev, "unable to request IRQ %d\n",
+                                                               client->irq);
+                       return err;
+               }
+
+       }
+
+       return 0;
 }
 
 static const struct i2c_device_id pcf8563_id[] = {
index 7af00208d637141a8eb18a045b94b6cfa4f5e071..2583349fbde5e99b98776742fc774698f896e5dc 100644 (file)
@@ -258,6 +258,8 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
        if (ret < 0)
                return ret;
 
+       platform_set_drvdata(pdev, tps_rtc);
+
        irq  = platform_get_irq(pdev, 0);
        if (irq <= 0) {
                dev_warn(&pdev->dev, "Wake up is not possible as irq = %d\n",
@@ -283,8 +285,6 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
                return ret;
        }
 
-       platform_set_drvdata(pdev, tps_rtc);
-
        return 0;
 }
 
index 4de346017e9ff91b43aed80d48231ac60f23a4e3..6da6cec9a65191882f141453c3ef18d73cb208f4 100644 (file)
@@ -683,14 +683,13 @@ static int twl_allocate_memory(TW_Device_Extension *tw_dev, int size, int which)
        unsigned long *cpu_addr;
        int retval = 1;
 
-       cpu_addr = pci_alloc_consistent(tw_dev->tw_pci_dev, size*TW_Q_LENGTH, &dma_handle);
+       cpu_addr = pci_zalloc_consistent(tw_dev->tw_pci_dev, size * TW_Q_LENGTH,
+                                        &dma_handle);
        if (!cpu_addr) {
                TW_PRINTK(tw_dev->host, TW_DRIVER, 0x5, "Memory allocation failed");
                goto out;
        }
 
-       memset(cpu_addr, 0, size*TW_Q_LENGTH);
-
        for (i = 0; i < TW_Q_LENGTH; i++) {
                switch(which) {
                case 0:
index 522570d297ca370e37e92279d734f9fd76701492..7e33a61c1ba45ed91db2b0fe86d4385eaac1e762 100644 (file)
@@ -1125,23 +1125,19 @@ static int inia100_probe_one(struct pci_dev *pdev,
 
        /* Get total memory needed for SCB */
        sz = ORC_MAXQUEUE * sizeof(struct orc_scb);
-       host->scb_virt = pci_alloc_consistent(pdev, sz,
-                       &host->scb_phys);
+       host->scb_virt = pci_zalloc_consistent(pdev, sz, &host->scb_phys);
        if (!host->scb_virt) {
                printk("inia100: SCB memory allocation error\n");
                goto out_host_put;
        }
-       memset(host->scb_virt, 0, sz);
 
        /* Get total memory needed for ESCB */
        sz = ORC_MAXQUEUE * sizeof(struct orc_extended_scb);
-       host->escb_virt = pci_alloc_consistent(pdev, sz,
-                       &host->escb_phys);
+       host->escb_virt = pci_zalloc_consistent(pdev, sz, &host->escb_phys);
        if (!host->escb_virt) {
                printk("inia100: ESCB memory allocation error\n");
                goto out_free_scb_array;
        }
-       memset(host->escb_virt, 0, sz);
 
        biosaddr = host->BIOScfg;
        biosaddr = (biosaddr << 4);
index 56467df3d6de668521fd2d578d52aa2578027a08..eb3e3e619155ea9929e2c140617d4b4ef2b888b9 100644 (file)
@@ -3538,10 +3538,9 @@ static int be_queue_alloc(struct beiscsi_hba *phba, struct be_queue_info *q,
        q->len = len;
        q->entry_size = entry_size;
        mem->size = len * entry_size;
-       mem->va = pci_alloc_consistent(phba->pcidev, mem->size, &mem->dma);
+       mem->va = pci_zalloc_consistent(phba->pcidev, mem->size, &mem->dma);
        if (!mem->va)
                return -ENOMEM;
-       memset(mem->va, 0, mem->size);
        return 0;
 }
 
@@ -4320,9 +4319,9 @@ static int beiscsi_get_boot_info(struct beiscsi_hba *phba)
                            "BM_%d : No boot session\n");
                return ret;
        }
-       nonemb_cmd.va = pci_alloc_consistent(phba->ctrl.pdev,
-                               sizeof(*session_resp),
-                               &nonemb_cmd.dma);
+       nonemb_cmd.va = pci_zalloc_consistent(phba->ctrl.pdev,
+                                             sizeof(*session_resp),
+                                             &nonemb_cmd.dma);
        if (nonemb_cmd.va == NULL) {
                beiscsi_log(phba, KERN_ERR,
                            BEISCSI_LOG_INIT | BEISCSI_LOG_CONFIG,
@@ -4332,7 +4331,6 @@ static int beiscsi_get_boot_info(struct beiscsi_hba *phba)
                return -ENOMEM;
        }
 
-       memset(nonemb_cmd.va, 0, sizeof(*session_resp));
        tag = mgmt_get_session_info(phba, s_handle,
                                    &nonemb_cmd);
        if (!tag) {
index a3e56487616c2b5ea37c25010fd6c9d58d12da19..665afcb74a56cdc1f9de12ea0bf21a973fc1cbc6 100644 (file)
@@ -900,13 +900,12 @@ free_cmd:
 static int mgmt_alloc_cmd_data(struct beiscsi_hba *phba, struct be_dma_mem *cmd,
                               int iscsi_cmd, int size)
 {
-       cmd->va = pci_alloc_consistent(phba->ctrl.pdev, size, &cmd->dma);
+       cmd->va = pci_zalloc_consistent(phba->ctrl.pdev, size, &cmd->dma);
        if (!cmd->va) {
                beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG,
                            "BG_%d : Failed to allocate memory for if info\n");
                return -ENOMEM;
        }
-       memset(cmd->va, 0, size);
        cmd->size = size;
        be_cmd_hdr_prepare(cmd->va, CMD_SUBSYSTEM_ISCSI, iscsi_cmd, size);
        return 0;
index 4255ce264abf9085040de66d5075b75e9125eb9c..773da14cfa145c703550e59a1f33c420544fedd4 100644 (file)
@@ -232,7 +232,7 @@ csio_wr_alloc_q(struct csio_hw *hw, uint32_t qsize, uint32_t wrsize,
 
        q = wrm->q_arr[free_idx];
 
-       q->vstart = pci_alloc_consistent(hw->pdev, qsz, &q->pstart);
+       q->vstart = pci_zalloc_consistent(hw->pdev, qsz, &q->pstart);
        if (!q->vstart) {
                csio_err(hw,
                         "Failed to allocate DMA memory for "
@@ -240,12 +240,6 @@ csio_wr_alloc_q(struct csio_hw *hw, uint32_t qsize, uint32_t wrsize,
                return -1;
        }
 
-       /*
-        * We need to zero out the contents, importantly for ingress,
-        * since we start with a generatiom bit of 1 for ingress.
-        */
-       memset(q->vstart, 0, qsz);
-
        q->type         = type;
        q->owner        = owner;
        q->pidx         = q->cidx = q->inc_idx = 0;
index 03372cff38f39b3caef6ff47b36cd04c5bf79c6c..813dd5c998e44ac6a3d8ba32280fc9bc57148c25 100644 (file)
@@ -1238,8 +1238,8 @@ static int port_detect(unsigned long port_base, unsigned int j,
                struct eata_config *cf;
                dma_addr_t cf_dma_addr;
 
-               cf = pci_alloc_consistent(pdev, sizeof(struct eata_config),
-                                         &cf_dma_addr);
+               cf = pci_zalloc_consistent(pdev, sizeof(struct eata_config),
+                                          &cf_dma_addr);
 
                if (!cf) {
                        printk
@@ -1249,7 +1249,6 @@ static int port_detect(unsigned long port_base, unsigned int j,
                }
 
                /* Set board configuration */
-               memset((char *)cf, 0, sizeof(struct eata_config));
                cf->len = (ushort) H2DEV16((ushort) 510);
                cf->ocena = 1;
 
index 8545d1826725bde1aee310f2f4c1469aa11e4c12..6b35d0dfe64c943a5a9caac8564fdfa8fc5c9a83 100644 (file)
@@ -4732,23 +4732,21 @@ static struct CommandList *cmd_special_alloc(struct ctlr_info *h)
        union u64bit temp64;
        dma_addr_t cmd_dma_handle, err_dma_handle;
 
-       c = pci_alloc_consistent(h->pdev, sizeof(*c), &cmd_dma_handle);
+       c = pci_zalloc_consistent(h->pdev, sizeof(*c), &cmd_dma_handle);
        if (c == NULL)
                return NULL;
-       memset(c, 0, sizeof(*c));
 
        c->cmd_type = CMD_SCSI;
        c->cmdindex = -1;
 
-       c->err_info = pci_alloc_consistent(h->pdev, sizeof(*c->err_info),
-                   &err_dma_handle);
+       c->err_info = pci_zalloc_consistent(h->pdev, sizeof(*c->err_info),
+                                           &err_dma_handle);
 
        if (c->err_info == NULL) {
                pci_free_consistent(h->pdev,
                        sizeof(*c), c, cmd_dma_handle);
                return NULL;
        }
-       memset(c->err_info, 0, sizeof(*c->err_info));
 
        INIT_LIST_HEAD(&c->list);
        c->busaddr = (u32) cmd_dma_handle;
index e2237a97cb9d314b485869cc37da7e3de8062531..531dce419c18103d3ad78b3a7fcc6329432cfe87 100644 (file)
@@ -998,8 +998,9 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
         * Allocate the common 16-byte aligned memory for the handshake
         * mailbox.
         */
-       raid_dev->una_mbox64 = pci_alloc_consistent(adapter->pdev,
-                       sizeof(mbox64_t), &raid_dev->una_mbox64_dma);
+       raid_dev->una_mbox64 = pci_zalloc_consistent(adapter->pdev,
+                                                    sizeof(mbox64_t),
+                                                    &raid_dev->una_mbox64_dma);
 
        if (!raid_dev->una_mbox64) {
                con_log(CL_ANN, (KERN_WARNING
@@ -1007,7 +1008,6 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
                        __LINE__));
                return -1;
        }
-       memset(raid_dev->una_mbox64, 0, sizeof(mbox64_t));
 
        /*
         * Align the mailbox at 16-byte boundary
@@ -1026,8 +1026,8 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
                        align;
 
        // Allocate memory for commands issued internally
-       adapter->ibuf = pci_alloc_consistent(pdev, MBOX_IBUF_SIZE,
-                               &adapter->ibuf_dma_h);
+       adapter->ibuf = pci_zalloc_consistent(pdev, MBOX_IBUF_SIZE,
+                                             &adapter->ibuf_dma_h);
        if (!adapter->ibuf) {
 
                con_log(CL_ANN, (KERN_WARNING
@@ -1036,7 +1036,6 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
 
                goto out_free_common_mbox;
        }
-       memset(adapter->ibuf, 0, MBOX_IBUF_SIZE);
 
        // Allocate memory for our SCSI Command Blocks and their associated
        // memory
@@ -2972,8 +2971,8 @@ megaraid_mbox_product_info(adapter_t *adapter)
         * Issue an ENQUIRY3 command to find out certain adapter parameters,
         * e.g., max channels, max commands etc.
         */
-       pinfo = pci_alloc_consistent(adapter->pdev, sizeof(mraid_pinfo_t),
-                       &pinfo_dma_h);
+       pinfo = pci_zalloc_consistent(adapter->pdev, sizeof(mraid_pinfo_t),
+                                     &pinfo_dma_h);
 
        if (pinfo == NULL) {
                con_log(CL_ANN, (KERN_WARNING
@@ -2982,7 +2981,6 @@ megaraid_mbox_product_info(adapter_t *adapter)
 
                return -1;
        }
-       memset(pinfo, 0, sizeof(mraid_pinfo_t));
 
        mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h;
        memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE);
index 112799b131a93253d1b06170e2d7e1d5a1d998c2..22a04e37b70acfa63b3f3cb7e61ddb31ef9ec209 100644 (file)
@@ -2038,9 +2038,9 @@ int megasas_sriov_start_heartbeat(struct megasas_instance *instance,
 
        if (initial) {
                instance->hb_host_mem =
-                       pci_alloc_consistent(instance->pdev,
-                                            sizeof(struct MR_CTRL_HB_HOST_MEM),
-                                            &instance->hb_host_mem_h);
+                       pci_zalloc_consistent(instance->pdev,
+                                             sizeof(struct MR_CTRL_HB_HOST_MEM),
+                                             &instance->hb_host_mem_h);
                if (!instance->hb_host_mem) {
                        printk(KERN_DEBUG "megasas: SR-IOV: Couldn't allocate"
                               " memory for heartbeat host memory for "
@@ -2048,8 +2048,6 @@ int megasas_sriov_start_heartbeat(struct megasas_instance *instance,
                        retval = -ENOMEM;
                        goto out;
                }
-               memset(instance->hb_host_mem, 0,
-                      sizeof(struct MR_CTRL_HB_HOST_MEM));
        }
 
        memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
index 7a6160f172ce1b44b980784992ce599fd8c56658..57a95e2c3442c7d3fa52001a610fb51663412259 100644 (file)
@@ -1915,14 +1915,12 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
        /* We use the PCI APIs for now until the generic one gets fixed
         * enough or until we get some macio-specific versions
         */
-       dma_cmd_space = pci_alloc_consistent(macio_get_pci_dev(mdev),
-                                            ms->dma_cmd_size,
-                                            &dma_cmd_bus);
+       dma_cmd_space = pci_zalloc_consistent(macio_get_pci_dev(mdev),
+                                             ms->dma_cmd_size, &dma_cmd_bus);
        if (dma_cmd_space == NULL) {
                printk(KERN_ERR "mesh: can't allocate DMA table\n");
                goto out_unmap;
        }
-       memset(dma_cmd_space, 0, ms->dma_cmd_size);
 
        ms->dma_cmds = (struct dbdma_cmd *) DBDMA_ALIGN(dma_cmd_space);
                ms->dma_cmd_space = dma_cmd_space;
index edbee8dc62c9ab546b5340ca06cbef62ba4cecf3..3e716b2f611ae119d6510a661cab846598c4f7d5 100644 (file)
@@ -142,8 +142,8 @@ static struct mvumi_res *mvumi_alloc_mem_resource(struct mvumi_hba *mhba,
 
        case RESOURCE_UNCACHED_MEMORY:
                size = round_up(size, 8);
-               res->virt_addr = pci_alloc_consistent(mhba->pdev, size,
-                                                       &res->bus_addr);
+               res->virt_addr = pci_zalloc_consistent(mhba->pdev, size,
+                                                      &res->bus_addr);
                if (!res->virt_addr) {
                        dev_err(&mhba->pdev->dev,
                                        "unable to allocate consistent mem,"
@@ -151,7 +151,6 @@ static struct mvumi_res *mvumi_alloc_mem_resource(struct mvumi_hba *mhba,
                        kfree(res);
                        return NULL;
                }
-               memset(res->virt_addr, 0, size);
                break;
 
        default:
@@ -258,12 +257,10 @@ static int mvumi_internal_cmd_sgl(struct mvumi_hba *mhba, struct mvumi_cmd *cmd,
        if (size == 0)
                return 0;
 
-       virt_addr = pci_alloc_consistent(mhba->pdev, size, &phy_addr);
+       virt_addr = pci_zalloc_consistent(mhba->pdev, size, &phy_addr);
        if (!virt_addr)
                return -1;
 
-       memset(virt_addr, 0, size);
-
        m_sg = (struct mvumi_sgl *) &cmd->frame->payload[0];
        cmd->frame->sg_counts = 1;
        cmd->data_buf = virt_addr;
index 34cea82917722fbe9ad0a295f31d37638d7cf308..76570e6a547d26df5cfaffa715c9f49310085467 100644 (file)
@@ -116,13 +116,12 @@ int pm8001_mem_alloc(struct pci_dev *pdev, void **virt_addr,
        u64 align_offset = 0;
        if (align)
                align_offset = (dma_addr_t)align - 1;
-       mem_virt_alloc =
-               pci_alloc_consistent(pdev, mem_size + align, &mem_dma_handle);
+       mem_virt_alloc = pci_zalloc_consistent(pdev, mem_size + align,
+                                              &mem_dma_handle);
        if (!mem_virt_alloc) {
                pm8001_printk("memory allocation error\n");
                return -1;
        }
-       memset((void *)mem_virt_alloc, 0, mem_size+align);
        *pphys_addr = mem_dma_handle;
        phys_align = (*pphys_addr + align_offset) & ~align_offset;
        *virt_addr = (void *)mem_virt_alloc + phys_align - *pphys_addr;
index 017f8b9554e53de6c64ae49f73d79e348a97dfd9..6f3275d020a044abd3607e9ca6b4f114dd45200f 100644 (file)
@@ -4213,9 +4213,9 @@ static ssize_t pmcraid_store_log_level(
 {
        struct Scsi_Host *shost;
        struct pmcraid_instance *pinstance;
-       unsigned long val;
+       u8 val;
 
-       if (strict_strtoul(buf, 10, &val))
+       if (kstrtou8(buf, 10, &val))
                return -EINVAL;
        /* log-level should be from 0 to 2 */
        if (val > 2)
index 406b3038bbad919e560fc3eebcd64caa1fe55a8c..8b4105a22ac2564b5dc64eef5c816c5df0eaf3a8 100644 (file)
@@ -910,9 +910,9 @@ sdev_store_queue_ramp_up_period(struct device *dev,
                                const char *buf, size_t count)
 {
        struct scsi_device *sdev = to_scsi_device(dev);
-       unsigned long period;
+       unsigned int period;
 
-       if (strict_strtoul(buf, 10, &period))
+       if (kstrtouint(buf, 10, &period))
                return -EINVAL;
 
        sdev->queue_ramp_up_period = msecs_to_jiffies(period);
index 2920e406030a8fd332a2636c36682a6f52a09284..5729cf678765e6646347bb62a45c08e72d1290f2 100644 (file)
@@ -2065,20 +2065,16 @@ static short rtl8192_alloc_rx_desc_ring(struct net_device *dev)
        int i, rx_queue_idx;
 
        for (rx_queue_idx = 0; rx_queue_idx < MAX_RX_QUEUE; rx_queue_idx++) {
-               priv->rx_ring[rx_queue_idx] = pci_alloc_consistent(priv->pdev,
-                                       sizeof(*priv->rx_ring[rx_queue_idx]) *
-                                       priv->rxringcount,
-                                       &priv->rx_ring_dma[rx_queue_idx]);
-
+               priv->rx_ring[rx_queue_idx] =
+                       pci_zalloc_consistent(priv->pdev,
+                                             sizeof(*priv->rx_ring[rx_queue_idx]) * priv->rxringcount,
+                                             &priv->rx_ring_dma[rx_queue_idx]);
                if (!priv->rx_ring[rx_queue_idx] ||
                    (unsigned long)priv->rx_ring[rx_queue_idx] & 0xFF) {
                        RT_TRACE(COMP_ERR, "Cannot allocate RX ring\n");
                        return -ENOMEM;
                }
 
-               memset(priv->rx_ring[rx_queue_idx], 0,
-                      sizeof(*priv->rx_ring[rx_queue_idx]) *
-                      priv->rxringcount);
                priv->rx_idx[rx_queue_idx] = 0;
 
                for (i = 0; i < priv->rxringcount; i++) {
@@ -2118,14 +2114,13 @@ static int rtl8192_alloc_tx_desc_ring(struct net_device *dev,
        dma_addr_t dma;
        int i;
 
-       ring = pci_alloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma);
+       ring = pci_zalloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma);
        if (!ring || (unsigned long)ring & 0xFF) {
                RT_TRACE(COMP_ERR, "Cannot allocate TX ring (prio = %d)\n",
                         prio);
                return -ENOMEM;
        }
 
-       memset(ring, 0, sizeof(*ring)*entries);
        priv->tx_ring[prio].desc = ring;
        priv->tx_ring[prio].dma = dma;
        priv->tx_ring[prio].idx = 0;
index f3abbcc9f3ba70a065d60b50627a2037716d6680..0215aef1eacc14004aa75f5d91f91f8f3ab40619 100644 (file)
@@ -1224,10 +1224,10 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
 
        /* alloc tx buffer desc for new trx flow*/
        if (rtlpriv->use_new_trx_flow) {
-               buffer_desc = pci_alloc_consistent(rtlpci->pdev,
-                                       sizeof(*buffer_desc) * entries,
-                                       &buffer_desc_dma);
-
+               buffer_desc =
+                       pci_zalloc_consistent(rtlpci->pdev,
+                                             sizeof(*buffer_desc) * entries,
+                                             &buffer_desc_dma);
                if (!buffer_desc || (unsigned long)buffer_desc & 0xFF) {
                        RT_TRACE(COMP_ERR, DBG_EMERG,
                                 ("Cannot allocate TX ring (prio = %d)\n",
@@ -1235,7 +1235,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
                        return -ENOMEM;
                }
 
-               memset(buffer_desc, 0, sizeof(*buffer_desc) * entries);
                rtlpci->tx_ring[prio].buffer_desc = buffer_desc;
                rtlpci->tx_ring[prio].buffer_desc_dma = buffer_desc_dma;
 
@@ -1245,16 +1244,14 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
        }
 
        /* alloc dma for this ring */
-       desc = pci_alloc_consistent(rtlpci->pdev,
-                                   sizeof(*desc) * entries, &desc_dma);
-
+       desc = pci_zalloc_consistent(rtlpci->pdev, sizeof(*desc) * entries,
+                                    &desc_dma);
        if (!desc || (unsigned long)desc & 0xFF) {
                RT_TRACE(COMP_ERR, DBG_EMERG,
                         ("Cannot allocate TX ring (prio = %d)\n", prio));
                return -ENOMEM;
        }
 
-       memset(desc, 0, sizeof(*desc) * entries);
        rtlpci->tx_ring[prio].desc = desc;
        rtlpci->tx_ring[prio].dma = desc_dma;
 
@@ -1290,11 +1287,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
                struct rtl_rx_buffer_desc *entry = NULL;
                /* alloc dma for this ring */
                rtlpci->rx_ring[rxring_idx].buffer_desc =
-                   pci_alloc_consistent(rtlpci->pdev,
-                                        sizeof(*rtlpci->rx_ring[rxring_idx].
-                                               buffer_desc) *
-                                               rtlpci->rxringcount,
-                                        &rtlpci->rx_ring[rxring_idx].dma);
+                       pci_zalloc_consistent(rtlpci->pdev,
+                                             sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) * rtlpci->rxringcount,
+                                             &rtlpci->rx_ring[rxring_idx].dma);
                if (!rtlpci->rx_ring[rxring_idx].buffer_desc ||
                    (unsigned long)rtlpci->rx_ring[rxring_idx].buffer_desc & 0xFF) {
                        RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1302,10 +1297,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
                        return -ENOMEM;
                }
 
-               memset(rtlpci->rx_ring[rxring_idx].buffer_desc, 0,
-                      sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) *
-                      rtlpci->rxringcount);
-
                /* init every desc in this ring */
                rtlpci->rx_ring[rxring_idx].idx = 0;
 
@@ -1320,19 +1311,15 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
                u8 tmp_one = 1;
                /* alloc dma for this ring */
                rtlpci->rx_ring[rxring_idx].desc =
-                   pci_alloc_consistent(rtlpci->pdev,
-                                        sizeof(*rtlpci->rx_ring[rxring_idx].
-                                       desc) * rtlpci->rxringcount,
-                                        &rtlpci->rx_ring[rxring_idx].dma);
+                       pci_zalloc_consistent(rtlpci->pdev,
+                                             sizeof(*rtlpci->rx_ring[rxring_idx].desc) * rtlpci->rxringcount,
+                                             &rtlpci->rx_ring[rxring_idx].dma);
                if (!rtlpci->rx_ring[rxring_idx].desc ||
                    (unsigned long)rtlpci->rx_ring[rxring_idx].desc & 0xFF) {
                        RT_TRACE(COMP_ERR, DBG_EMERG,
                                 ("Cannot allocate RX ring\n"));
                        return -ENOMEM;
                }
-               memset(rtlpci->rx_ring[rxring_idx].desc, 0,
-                      sizeof(*rtlpci->rx_ring[rxring_idx].desc) *
-                      rtlpci->rxringcount);
 
                /* init every desc in this ring */
                rtlpci->rx_ring[rxring_idx].idx = 0;
index f9847d1fbdebae821e8aaab5f32ae0109bc7dfe5..26d7b2fc852a4f07a54db7dc7465ced58b7d4a70 100644 (file)
@@ -1248,9 +1248,10 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
 
        /* alloc tx buffer desc for new trx flow*/
        if (rtlpriv->use_new_trx_flow) {
-               buffer_desc = pci_alloc_consistent(rtlpci->pdev,
-                                           sizeof(*buffer_desc) * entries,
-                                           &buffer_desc_dma);
+               buffer_desc =
+                       pci_zalloc_consistent(rtlpci->pdev,
+                                             sizeof(*buffer_desc) * entries,
+                                             &buffer_desc_dma);
 
                if (!buffer_desc || (unsigned long)buffer_desc & 0xFF) {
                        RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1259,7 +1260,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
                        return -ENOMEM;
                }
 
-               memset(buffer_desc, 0, sizeof(*buffer_desc) * entries);
                rtlpci->tx_ring[prio].buffer_desc = buffer_desc;
                rtlpci->tx_ring[prio].buffer_desc_dma = buffer_desc_dma;
 
@@ -1270,8 +1270,8 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
        }
 
        /* alloc dma for this ring */
-       desc = pci_alloc_consistent(rtlpci->pdev,
-                                   sizeof(*desc) * entries, &desc_dma);
+       desc = pci_zalloc_consistent(rtlpci->pdev, sizeof(*desc) * entries,
+                                    &desc_dma);
 
        if (!desc || (unsigned long)desc & 0xFF) {
                RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1279,7 +1279,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
                return -ENOMEM;
        }
 
-       memset(desc, 0, sizeof(*desc) * entries);
        rtlpci->tx_ring[prio].desc = desc;
        rtlpci->tx_ring[prio].dma = desc_dma;
 
@@ -1316,21 +1315,15 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
                struct rtl_rx_buffer_desc *entry = NULL;
                /* alloc dma for this ring */
                rtlpci->rx_ring[rxring_idx].buffer_desc =
-                   pci_alloc_consistent(rtlpci->pdev,
-                                        sizeof(*rtlpci->rx_ring[rxring_idx].
-                                               buffer_desc) *
-                                               rtlpci->rxringcount,
-                                        &rtlpci->rx_ring[rxring_idx].dma);
+                       pci_zalloc_consistent(rtlpci->pdev,
+                                             sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) * rtlpci->rxringcount,
+                                             &rtlpci->rx_ring[rxring_idx].dma);
                if (!rtlpci->rx_ring[rxring_idx].buffer_desc ||
                    (unsigned long)rtlpci->rx_ring[rxring_idx].buffer_desc & 0xFF) {
                        RT_TRACE(COMP_ERR, DBG_EMERG, ("Cannot allocate RX ring\n"));
                        return -ENOMEM;
                }
 
-               memset(rtlpci->rx_ring[rxring_idx].buffer_desc, 0,
-                      sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) *
-                      rtlpci->rxringcount);
-
                /* init every desc in this ring */
                rtlpci->rx_ring[rxring_idx].idx = 0;
                for (i = 0; i < rtlpci->rxringcount; i++) {
@@ -1344,10 +1337,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
                u8 tmp_one = 1;
                /* alloc dma for this ring */
                rtlpci->rx_ring[rxring_idx].desc =
-                   pci_alloc_consistent(rtlpci->pdev,
-                                        sizeof(*rtlpci->rx_ring[rxring_idx].
-                                               desc) * rtlpci->rxringcount,
-                                        &rtlpci->rx_ring[rxring_idx].dma);
+                       pci_zalloc_consistent(rtlpci->pdev,
+                                             sizeof(*rtlpci->rx_ring[rxring_idx].desc) * rtlpci->rxringcount,
+                                             &rtlpci->rx_ring[rxring_idx].dma);
                if (!rtlpci->rx_ring[rxring_idx].desc ||
                    (unsigned long)rtlpci->rx_ring[rxring_idx].desc & 0xFF) {
                        RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1355,10 +1347,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
                        return -ENOMEM;
                }
 
-               memset(rtlpci->rx_ring[rxring_idx].desc, 0,
-                      sizeof(*rtlpci->rx_ring[rxring_idx].desc) *
-                      rtlpci->rxringcount);
-
                /* init every desc in this ring */
                rtlpci->rx_ring[rxring_idx].idx = 0;
                for (i = 0; i < rtlpci->rxringcount; i++) {
index 50ece291fc6aef6040c08a869a0f0f6b353f155e..f35fa3dfe22c235c31411437b9d2120e7cc4a083 100644 (file)
@@ -1191,18 +1191,15 @@ static int slic_rspqueue_init(struct adapter *adapter)
        rspq->num_pages = SLIC_RSPQ_PAGES_GB;
 
        for (i = 0; i < rspq->num_pages; i++) {
-               rspq->vaddr[i] = pci_alloc_consistent(adapter->pcidev,
-                                                     PAGE_SIZE,
-                                                     &rspq->paddr[i]);
+               rspq->vaddr[i] = pci_zalloc_consistent(adapter->pcidev,
+                                                      PAGE_SIZE,
+                                                      &rspq->paddr[i]);
                if (!rspq->vaddr[i]) {
                        dev_err(&adapter->pcidev->dev,
                                "pci_alloc_consistent failed\n");
                        slic_rspqueue_free(adapter);
                        return -ENOMEM;
                }
-               /* FIXME:
-                * do we really need this assertions (4K PAGE_SIZE aligned addr)? */
-               memset(rspq->vaddr[i], 0, PAGE_SIZE);
 
                if (paddrh == 0) {
                        slic_reg32_write(&slic_regs->slic_rbar,
index c78d06eff7ea4501c4aa210fa18aed1b33de6a9c..0b583a37f5b3ddb9c8a902b1a67b86c4a4cdc004 100644 (file)
@@ -1111,25 +1111,17 @@ static bool device_init_rings(PSDevice pDevice)
        void *vir_pool;
 
        /*allocate all RD/TD rings a single pool*/
-       vir_pool = pci_alloc_consistent(pDevice->pcid,
-                                       pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) +
-                                       pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) +
-                                       pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) +
-                                       pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc),
-                                       &pDevice->pool_dma);
-
+       vir_pool = pci_zalloc_consistent(pDevice->pcid,
+                                        pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) +
+                                        pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) +
+                                        pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) +
+                                        pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc),
+                                        &pDevice->pool_dma);
        if (vir_pool == NULL) {
                DBG_PRT(MSG_LEVEL_ERR, KERN_ERR "%s : allocate desc dma memory failed\n", pDevice->dev->name);
                return false;
        }
 
-       memset(vir_pool, 0,
-              pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) +
-              pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) +
-              pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) +
-              pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc)
-               );
-
        pDevice->aRD0Ring = vir_pool;
        pDevice->aRD1Ring = vir_pool +
                pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc);
@@ -1138,13 +1130,12 @@ static bool device_init_rings(PSDevice pDevice)
        pDevice->rd1_pool_dma = pDevice->rd0_pool_dma +
                pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc);
 
-       pDevice->tx0_bufs = pci_alloc_consistent(pDevice->pcid,
-                                                pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ +
-                                                pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ +
-                                                CB_BEACON_BUF_SIZE +
-                                                CB_MAX_BUF_SIZE,
-                                                &pDevice->tx_bufs_dma0);
-
+       pDevice->tx0_bufs = pci_zalloc_consistent(pDevice->pcid,
+                                                 pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ +
+                                                 pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ +
+                                                 CB_BEACON_BUF_SIZE +
+                                                 CB_MAX_BUF_SIZE,
+                                                 &pDevice->tx_bufs_dma0);
        if (pDevice->tx0_bufs == NULL) {
                DBG_PRT(MSG_LEVEL_ERR, KERN_ERR "%s: allocate buf dma memory failed\n", pDevice->dev->name);
                pci_free_consistent(pDevice->pcid,
@@ -1157,13 +1148,6 @@ static bool device_init_rings(PSDevice pDevice)
                return false;
        }
 
-       memset(pDevice->tx0_bufs, 0,
-              pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ +
-              pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ +
-              CB_BEACON_BUF_SIZE +
-              CB_MAX_BUF_SIZE
-               );
-
        pDevice->td0_pool_dma = pDevice->rd1_pool_dma +
                pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc);
 
index ba1dbcdf46096439637ad0a6fa8e2603b19e4b0d..0e8c39b6ccd45643051b2ee2876a0eda59595034 100644 (file)
@@ -3383,12 +3383,11 @@ static int alloc_desc(struct slgt_info *info)
        unsigned int pbufs;
 
        /* allocate memory to hold descriptor lists */
-       info->bufs = pci_alloc_consistent(info->pdev, DESC_LIST_SIZE, &info->bufs_dma_addr);
+       info->bufs = pci_zalloc_consistent(info->pdev, DESC_LIST_SIZE,
+                                          &info->bufs_dma_addr);
        if (info->bufs == NULL)
                return -ENOMEM;
 
-       memset(info->bufs, 0, DESC_LIST_SIZE);
-
        info->rbufs = (struct slgt_desc*)info->bufs;
        info->tbufs = ((struct slgt_desc*)info->bufs) + info->rbuf_count;
 
index bfb2d3f06738481d302e7edb3550addbbb6a4691..18078ecbfcc6bfb5c68b173bd256ba9f63793244 100644 (file)
@@ -1555,16 +1555,14 @@ static int ca91cx42_crcsr_init(struct vme_bridge *ca91cx42_bridge,
        }
 
        /* Allocate mem for CR/CSR image */
-       bridge->crcsr_kernel = pci_alloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
-               &bridge->crcsr_bus);
+       bridge->crcsr_kernel = pci_zalloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
+                                                    &bridge->crcsr_bus);
        if (bridge->crcsr_kernel == NULL) {
                dev_err(&pdev->dev, "Failed to allocate memory for CR/CSR "
                        "image\n");
                return -ENOMEM;
        }
 
-       memset(bridge->crcsr_kernel, 0, VME_CRCSR_BUF_SIZE);
-
        crcsr_addr = slot * (512 * 1024);
        iowrite32(bridge->crcsr_bus - crcsr_addr, bridge->base + VCSR_TO);
 
index 61e706c0e00c6f6435d89ad5a23d9d887bd23bec..e07cfa8001bbf0ab2a253eae50b6290c3a60a984 100644 (file)
@@ -2275,16 +2275,14 @@ static int tsi148_crcsr_init(struct vme_bridge *tsi148_bridge,
        bridge = tsi148_bridge->driver_priv;
 
        /* Allocate mem for CR/CSR image */
-       bridge->crcsr_kernel = pci_alloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
-               &bridge->crcsr_bus);
+       bridge->crcsr_kernel = pci_zalloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
+                                                    &bridge->crcsr_bus);
        if (bridge->crcsr_kernel == NULL) {
                dev_err(tsi148_bridge->parent, "Failed to allocate memory for "
                        "CR/CSR image\n");
                return -ENOMEM;
        }
 
-       memset(bridge->crcsr_kernel, 0, VME_CRCSR_BUF_SIZE);
-
        reg_split(bridge->crcsr_bus, &crcsr_bus_high, &crcsr_bus_low);
 
        iowrite32be(crcsr_bus_high, bridge->base + TSI148_LCSR_CROU);
index c770337c4b458beefc707a91fb8c9f7315d0634b..24575d9d882d99e790f97d351ddfac369c616603 100644 (file)
@@ -153,6 +153,7 @@ extern int adfs_map_lookup(struct super_block *sb, unsigned int frag_id, unsigne
 extern unsigned int adfs_map_free(struct super_block *sb);
 
 /* Misc */
+__printf(3, 4)
 void __adfs_error(struct super_block *sb, const char *function,
                  const char *fmt, ...);
 #define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
index 0d138c0de293ed3512046058b5ff96cac8c744ad..51c279a29845e6870125469b18fd06ccdd99f64f 100644 (file)
@@ -138,7 +138,7 @@ adfs_dir_lookup_byname(struct inode *inode, struct qstr *name, struct object_inf
                goto out;
 
        if (ADFS_I(inode)->parent_id != dir.parent_id) {
-               adfs_error(sb, "parent directory changed under me! (%lx but got %lx)\n",
+               adfs_error(sb, "parent directory changed under me! (%lx but got %x)\n",
                           ADFS_I(inode)->parent_id, dir.parent_id);
                ret = -EIO;
                goto free_out;
index d9e3bee4e653ff2c6df48006ddcdb3119082233a..f2ba88ab4aed0e1e7baf934b95a6b9612e57baa4 100644 (file)
@@ -55,10 +55,10 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
        }
 
        size >>= sb->s_blocksize_bits;
-       if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
+       if (size > ARRAY_SIZE(dir->bh)) {
                /* this directory is too big for fixed bh set, must allocate */
                struct buffer_head **bh_fplus =
-                       kzalloc(size * sizeof(struct buffer_head *),
+                       kcalloc(size, sizeof(struct buffer_head *),
                                GFP_KERNEL);
                if (!bh_fplus) {
                        adfs_error(sb, "not enough memory for"
@@ -79,9 +79,8 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
 
                dir->bh_fplus[blk] = sb_bread(sb, block);
                if (!dir->bh_fplus[blk]) {
-                       adfs_error(sb,  "dir object %X failed read for"
-                                       " offset %d, mapped block %X",
-                                       id, blk, block);
+                       adfs_error(sb,  "dir object %x failed read for offset %d, mapped block %lX",
+                                  id, blk, block);
                        goto out;
                }
 
index acf32054edd87a6f7360cfc17b2a1f5ddd6ee85f..9e359fb20c0a5cec2b344fff4ef0b64844f0850d 100644 (file)
@@ -143,20 +143,6 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
        return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
 }
 
-/* Does a dentry have some pending activity? */
-static inline int autofs4_ispending(struct dentry *dentry)
-{
-       struct autofs_info *inf = autofs4_dentry_ino(dentry);
-
-       if (inf->flags & AUTOFS_INF_PENDING)
-               return 1;
-
-       if (inf->flags & AUTOFS_INF_EXPIRING)
-               return 1;
-
-       return 0;
-}
-
 struct inode *autofs4_get_inode(struct super_block *, umode_t);
 void autofs4_free_ino(struct autofs_info *);
 
@@ -191,55 +177,6 @@ extern const struct file_operations autofs4_root_operations;
 extern const struct dentry_operations autofs4_dentry_operations;
 
 /* VFS automount flags management functions */
-
-static inline void __managed_dentry_set_automount(struct dentry *dentry)
-{
-       dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-}
-
-static inline void managed_dentry_set_automount(struct dentry *dentry)
-{
-       spin_lock(&dentry->d_lock);
-       __managed_dentry_set_automount(dentry);
-       spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_clear_automount(struct dentry *dentry)
-{
-       dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
-}
-
-static inline void managed_dentry_clear_automount(struct dentry *dentry)
-{
-       spin_lock(&dentry->d_lock);
-       __managed_dentry_clear_automount(dentry);
-       spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_set_transit(struct dentry *dentry)
-{
-       dentry->d_flags |= DCACHE_MANAGE_TRANSIT;
-}
-
-static inline void managed_dentry_set_transit(struct dentry *dentry)
-{
-       spin_lock(&dentry->d_lock);
-       __managed_dentry_set_transit(dentry);
-       spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_clear_transit(struct dentry *dentry)
-{
-       dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT;
-}
-
-static inline void managed_dentry_clear_transit(struct dentry *dentry)
-{
-       spin_lock(&dentry->d_lock);
-       __managed_dentry_clear_transit(dentry);
-       spin_unlock(&dentry->d_lock);
-}
-
 static inline void __managed_dentry_set_managed(struct dentry *dentry)
 {
        dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
index 394e90b02c5e60783b828a2d39d7698806505930..a7be57e39be78791930c2dc362507d53e9ec6174 100644 (file)
@@ -333,7 +333,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
        if (ino->flags & AUTOFS_INF_PENDING)
                goto out;
        if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
-               struct autofs_info *ino = autofs4_dentry_ino(root);
                ino->flags |= AUTOFS_INF_EXPIRING;
                init_completion(&ino->expire_complete);
                spin_unlock(&sbi->fs_lock);
index cc87c1abac9710169cd56ad44a78cf7185da2834..cdb25ebccc4c49c2ac2f4a4c17d998ae99f14941 100644 (file)
@@ -166,8 +166,10 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
        const unsigned char *str = name->name;
        struct list_head *p, *head;
 
-       spin_lock(&sbi->lookup_lock);
        head = &sbi->active_list;
+       if (list_empty(head))
+               return NULL;
+       spin_lock(&sbi->lookup_lock);
        list_for_each(p, head) {
                struct autofs_info *ino;
                struct dentry *active;
@@ -218,8 +220,10 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
        const unsigned char *str = name->name;
        struct list_head *p, *head;
 
-       spin_lock(&sbi->lookup_lock);
        head = &sbi->expiring_list;
+       if (list_empty(head))
+               return NULL;
+       spin_lock(&sbi->lookup_lock);
        list_for_each(p, head) {
                struct autofs_info *ino;
                struct dentry *expiring;
@@ -373,7 +377,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
                 * this because the leaves of the directory tree under the
                 * mount never trigger mounts themselves (they have an autofs
                 * trigger mount mounted on them). But v4 pseudo direct mounts
-                * do need the leaves to to trigger mounts. In this case we
+                * do need the leaves to trigger mounts. In this case we
                 * have no choice but to use the list_empty() check and
                 * require user space behave.
                 */
index 0d6c07cc1149191d0f46e38e87142987e0fd41ad..4cf61ec6b7a8ee539e0caa63d548f8aa4222ad0f 100644 (file)
@@ -832,16 +832,14 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
                    (befs_super_block *) ((void *) bh->b_data + x86_sb_off);
        }
 
-       if (befs_load_sb(sb, disk_sb) != BEFS_OK)
+       if ((befs_load_sb(sb, disk_sb) != BEFS_OK) ||
+           (befs_check_sb(sb) != BEFS_OK))
                goto unacquire_bh;
 
        befs_dump_super_block(sb, disk_sb);
 
        brelse(bh);
 
-       if (befs_check_sb(sb) != BEFS_OK)
-               goto unacquire_priv_sbp;
-
        if( befs_sb->num_blocks > ~((sector_t)0) ) {
                befs_error(sb, "blocks count: %llu "
                        "is larger than the host can use",
index f7f87e233dd9d175cc9c3c8e921f381ff056d775..f40006db36dfdd68c573785cfee4003bf81457a0 100644 (file)
@@ -46,6 +46,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
 
 /* inode.c */
 extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
+extern void bfs_dump_imap(const char *, struct super_block *);
 
 /* file.c */
 extern const struct inode_operations bfs_file_inops;
index a399e6d9dc74d1a1d2fb225af24685886cd5ec59..08063ae0a17cd631b0c3194ea68043045f7cce05 100644 (file)
@@ -75,8 +75,6 @@ const struct file_operations bfs_dir_operations = {
        .llseek         = generic_file_llseek,
 };
 
-extern void dump_imap(const char *, struct super_block *);
-
 static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                                                bool excl)
 {
@@ -110,7 +108,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        BFS_I(inode)->i_eblock = 0;
        insert_inode_hash(inode);
         mark_inode_dirty(inode);
-       dump_imap("create", s);
+       bfs_dump_imap("create", s);
 
        err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
                                                        inode->i_ino);
index 7041ac35ace85ab7f91d96b21f0dde3c54840257..90bc079d9982928b7a9b5bcb6ad3efd6ebf1375f 100644 (file)
@@ -30,8 +30,6 @@ MODULE_LICENSE("GPL");
 #define dprintf(x...)
 #endif
 
-void dump_imap(const char *prefix, struct super_block *s);
-
 struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
 {
        struct bfs_inode *di;
@@ -194,7 +192,7 @@ static void bfs_evict_inode(struct inode *inode)
                        info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
                info->si_freei++;
                clear_bit(ino, info->si_imap);
-               dump_imap("delete_inode", s);
+               bfs_dump_imap("delete_inode", s);
         }
 
        /*
@@ -297,7 +295,7 @@ static const struct super_operations bfs_sops = {
        .statfs         = bfs_statfs,
 };
 
-void dump_imap(const char *prefix, struct super_block *s)
+void bfs_dump_imap(const char *prefix, struct super_block *s)
 {
 #ifdef DEBUG
        int i;
@@ -443,7 +441,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
        }
        brelse(bh);
        brelse(sbh);
-       dump_imap("read_super", s);
+       bfs_dump_imap("read_super", s);
        return 0;
 
 out3:
index 1da168c61d35c2194422fa24c5bca3235a1c19ca..278f8fdeb9efced93e00b50caa9ee27f437e5dcf 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/string.h>
 #include <linux/list.h>
 #include <linux/sched.h>
index 2849f41e72a209fb689c7b28915566ed8c70cde1..1326d38960db0de7dc234f44c981f7d7eeeaa498 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/string.h>
 
 #include <linux/coda.h>
index cd8a63238b1192edd1d8f64bd9037b7b85e85a1a..9c3dedc000d181945926c606ba26b17168737f99 100644 (file)
@@ -19,8 +19,7 @@
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/namei.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/coda.h>
 #include <linux/coda_psdev.h>
index 9e83b77902126a011e1505104bd707dbe89f809d..d244d743a23261c97551d4ee00a30399a2f30b68 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/coda.h>
 #include <linux/coda_psdev.h>
index fe3afb2de88088fe5200c50fdd9b7e0bceee77c1..b945410bfcd58d7b5469bf120c38e54c6b250fa2 100644 (file)
@@ -21,9 +21,7 @@
 #include <linux/vfs.h>
 #include <linux/slab.h>
 #include <linux/pid_namespace.h>
-
-#include <asm/uaccess.h>
-
+#include <linux/uaccess.h>
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
 
index 3f5de96bbb58f77a43ef8ddb99217e843d813f51..4326d172fc27fb697d5c94baf110f68b721401d4 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/string.h>
 #include <linux/namei.h>
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/coda.h>
 #include <linux/coda_psdev.h>
index 5c1e4242368bc11717a34dcd03a9f33dedb4217e..822629126e89fdd0ad325f75a8aa433febe4a59d 100644 (file)
@@ -40,7 +40,7 @@
 #include <linux/pid_namespace.h>
 #include <asm/io.h>
 #include <asm/poll.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <linux/coda.h>
 #include <linux/coda_psdev.h>
index 21fcf8dcb9cdb76c90e45eaba1bbb2a3d274f803..5bb6e27298a469d3a20bf3e54ef37e79d3435f76 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/vfs.h>
 
index ddcfe590b8a857a13faf7c5eefc4a9d7d8bcbdb9..355c522f3585a5e3a7667225681383ff85b93a48 100644 (file)
@@ -11,6 +11,8 @@
  * The actual compression is based on zlib, see the other files.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
@@ -21,7 +23,7 @@
 #include <linux/vfs.h>
 #include <linux/mutex.h>
 #include <uapi/linux/cramfs_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "internal.h"
 
@@ -153,7 +155,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
 
 static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
 static unsigned buffer_blocknr[READ_BUFFERS];
-static struct super_block * buffer_dev[READ_BUFFERS];
+static struct super_block *buffer_dev[READ_BUFFERS];
 static int next_buffer;
 
 /*
@@ -205,6 +207,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 
        for (i = 0; i < BLKS_PER_BUF; i++) {
                struct page *page = pages[i];
+
                if (page) {
                        wait_on_page_locked(page);
                        if (!PageUptodate(page)) {
@@ -223,6 +226,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
        data = read_buffers[buffer];
        for (i = 0; i < BLKS_PER_BUF; i++) {
                struct page *page = pages[i];
+
                if (page) {
                        memcpy(data, kmap(page), PAGE_CACHE_SIZE);
                        kunmap(page);
@@ -237,6 +241,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 static void cramfs_kill_sb(struct super_block *sb)
 {
        struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+
        kill_block_super(sb);
        kfree(sbi);
 }
@@ -277,7 +282,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
                /* check for wrong endianness */
                if (super.magic == CRAMFS_MAGIC_WEND) {
                        if (!silent)
-                               printk(KERN_ERR "cramfs: wrong endianness\n");
+                               pr_err("wrong endianness\n");
                        return -EINVAL;
                }
 
@@ -287,22 +292,22 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
                mutex_unlock(&read_mutex);
                if (super.magic != CRAMFS_MAGIC) {
                        if (super.magic == CRAMFS_MAGIC_WEND && !silent)
-                               printk(KERN_ERR "cramfs: wrong endianness\n");
+                               pr_err("wrong endianness\n");
                        else if (!silent)
-                               printk(KERN_ERR "cramfs: wrong magic\n");
+                               pr_err("wrong magic\n");
                        return -EINVAL;
                }
        }
 
        /* get feature flags first */
        if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) {
-               printk(KERN_ERR "cramfs: unsupported filesystem features\n");
+               pr_err("unsupported filesystem features\n");
                return -EINVAL;
        }
 
        /* Check that the root inode is in a sane state */
        if (!S_ISDIR(super.root.mode)) {
-               printk(KERN_ERR "cramfs: root is not a directory\n");
+               pr_err("root is not a directory\n");
                return -EINVAL;
        }
        /* correct strange, hard-coded permissions of mkcramfs */
@@ -310,23 +315,23 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
 
        root_offset = super.root.offset << 2;
        if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
-               sbi->size=super.size;
-               sbi->blocks=super.fsid.blocks;
-               sbi->files=super.fsid.files;
+               sbi->size = super.size;
+               sbi->blocks = super.fsid.blocks;
+               sbi->files = super.fsid.files;
        } else {
-               sbi->size=1<<28;
-               sbi->blocks=0;
-               sbi->files=0;
+               sbi->size = 1<<28;
+               sbi->blocks = 0;
+               sbi->files = 0;
        }
-       sbi->magic=super.magic;
-       sbi->flags=super.flags;
+       sbi->magic = super.magic;
+       sbi->flags = super.flags;
        if (root_offset == 0)
-               printk(KERN_INFO "cramfs: empty filesystem");
+               pr_info("empty filesystem");
        else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
                 ((root_offset != sizeof(struct cramfs_super)) &&
                  (root_offset != 512 + sizeof(struct cramfs_super))))
        {
-               printk(KERN_ERR "cramfs: bad root offset %lu\n", root_offset);
+               pr_err("bad root offset %lu\n", root_offset);
                return -EINVAL;
        }
 
@@ -425,7 +430,7 @@ static int cramfs_readdir(struct file *file, struct dir_context *ctx)
 /*
  * Lookup and fill in the inode data..
  */
-static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        unsigned int offset = 0;
        struct inode *inode = NULL;
@@ -483,7 +488,7 @@ out:
        return NULL;
 }
 
-static int cramfs_readpage(struct file *file, struct page * page)
+static int cramfs_readpage(struct file *file, struct page *page)
 {
        struct inode *inode = page->mapping->host;
        u32 maxblock;
@@ -511,7 +516,7 @@ static int cramfs_readpage(struct file *file, struct page * page)
                if (compr_len == 0)
                        ; /* hole */
                else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
-                       pr_err("cramfs: bad compressed blocksize %u\n",
+                       pr_err("bad compressed blocksize %u\n",
                                compr_len);
                        goto err;
                } else {
index 1760c1b84d9787cc27cc23e22387147ad65129e7..ec4f1d4fdad062b2b5e0003adb77f05c3382cdbd 100644 (file)
@@ -15,6 +15,8 @@
  * then is used by multiple filesystems.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/vmalloc.h>
@@ -37,7 +39,7 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
 
        err = zlib_inflateReset(&stream);
        if (err != Z_OK) {
-               printk("zlib_inflateReset error %d\n", err);
+               pr_err("zlib_inflateReset error %d\n", err);
                zlib_inflateEnd(&stream);
                zlib_inflateInit(&stream);
        }
@@ -48,8 +50,8 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
        return stream.total_out;
 
 err:
-       printk("Error %d while decompressing!\n", err);
-       printk("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
+       pr_err("Error %d while decompressing!\n", err);
+       pr_err("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
        return -EIO;
 }
 
@@ -57,7 +59,7 @@ int cramfs_uncompress_init(void)
 {
        if (!initialized++) {
                stream.workspace = vmalloc(zlib_inflate_workspacesize());
-               if ( !stream.workspace ) {
+               if (!stream.workspace) {
                        initialized = 0;
                        return -ENOMEM;
                }
index 8d77ba7b17564676bf1053ef7258fd3f4898abf1..1323c568e3627a4b69a9e46026d5dbb29a48ac53 100644 (file)
@@ -718,16 +718,11 @@ static const struct file_operations waiters_fops = {
 
 void dlm_delete_debug_file(struct dlm_ls *ls)
 {
-       if (ls->ls_debug_rsb_dentry)
-               debugfs_remove(ls->ls_debug_rsb_dentry);
-       if (ls->ls_debug_waiters_dentry)
-               debugfs_remove(ls->ls_debug_waiters_dentry);
-       if (ls->ls_debug_locks_dentry)
-               debugfs_remove(ls->ls_debug_locks_dentry);
-       if (ls->ls_debug_all_dentry)
-               debugfs_remove(ls->ls_debug_all_dentry);
-       if (ls->ls_debug_toss_dentry)
-               debugfs_remove(ls->ls_debug_toss_dentry);
+       debugfs_remove(ls->ls_debug_rsb_dentry);
+       debugfs_remove(ls->ls_debug_waiters_dentry);
+       debugfs_remove(ls->ls_debug_locks_dentry);
+       debugfs_remove(ls->ls_debug_all_dentry);
+       debugfs_remove(ls->ls_debug_toss_dentry);
 }
 
 int dlm_create_debug_file(struct dlm_ls *ls)
index 356c044e2cd302276f43c19031e74b8f678de35b..bbee8f063dfab1bba67b34fc07424a79c51ff47c 100644 (file)
@@ -12,7 +12,8 @@
 #include "efs.h"
 
 
-static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) {
+static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
+{
        struct buffer_head *bh;
 
        int                     slot, namelen;
@@ -40,10 +41,10 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
                if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) {
                        pr_err("%s(): invalid directory block\n", __func__);
                        brelse(bh);
-                       return(0);
+                       return 0;
                }
 
-               for(slot = 0; slot < dirblock->slots; slot++) {
+               for (slot = 0; slot < dirblock->slots; slot++) {
                        dirslot  = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot));
 
                        namelen  = dirslot->namelen;
@@ -52,12 +53,12 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
                        if ((namelen == len) && (!memcmp(name, nameptr, len))) {
                                inodenum = be32_to_cpu(dirslot->inode);
                                brelse(bh);
-                               return(inodenum);
+                               return inodenum;
                        }
                }
                brelse(bh);
        }
-       return(0);
+       return 0;
 }
 
 struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
index ab1f1200ce5d8b45534e0a3e0da47045116dec4e..a2b42a98c743b80941d7ae1f6caf9ca7158ca1ea 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
        if (!mm)
                goto err;
 
-       err = init_new_context(current, mm);
-       if (err)
-               goto err;
-
        err = __bprm_mm_init(bprm);
        if (err)
                goto err;
index 7f20f25c232c453d631b817ad382d27fea83fba3..84529b8a331b1402e3a4662a1f4ce90c4dc185b6 100644 (file)
@@ -116,7 +116,7 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width,
                        num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa,
                                                        pages_in_unit - i);
 
-                       __a1pa = kzalloc(num_a1pa * sizeof__a1pa, GFP_KERNEL);
+                       __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL);
                        if (unlikely(!__a1pa)) {
                                ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n",
                                           num_a1pa);
index 72c82f69b01b28594e56bb9518df6f211f0d51a9..22d1c3df61acfa61ae957ede4aedfa01a3e9ab7a 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
        case F_GETPIPE_SZ:
                err = pipe_fcntl(filp, cmd, arg);
                break;
+       case F_ADD_SEALS:
+       case F_GET_SEALS:
+               err = shmem_fcntl(filp, cmd, arg);
+               break;
        default:
                break;
        }
index f36fc010fccbf1ce82ca129272705e24432fcb2c..2923a7bd82accf8018a771cf5953df7ab5331cda 100644 (file)
@@ -545,12 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
                        struct dnode *d1;
                        struct quad_buffer_head qbh1;
                        if (hpfs_sb(i->i_sb)->sb_chk)
-                           if (up != i->i_ino) {
-                               hpfs_error(i->i_sb,
-                                       "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
-                                       dno, up, (unsigned long)i->i_ino);
-                               return;
-                           }
+                               if (up != i->i_ino) {
+                                       hpfs_error(i->i_sb,
+                                                  "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
+                                                  dno, up,
+                                                  (unsigned long)i->i_ino);
+                                       return;
+                               }
                        if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
                                d1->up = cpu_to_le32(up);
                                d1->root_dnode = 1;
@@ -1061,8 +1062,8 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
                hpfs_brelse4(qbh);
                if (hpfs_sb(s)->sb_chk)
                        if (hpfs_stop_cycles(s, dno, &c1, &c2, "map_fnode_dirent #1")) {
-                       kfree(name2);
-                       return NULL;
+                               kfree(name2);
+                               return NULL;
                }
                goto go_down;
        }
index 5938f3928944c75ee2f52fdd38119ecfdf41f65f..26753ba7b6d656473f17158a1c04064fcb6bffdd 100644 (file)
@@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        mapping->a_ops = &empty_aops;
        mapping->host = inode;
        mapping->flags = 0;
+       atomic_set(&mapping->i_mmap_writable, 0);
        mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
        mapping->private_data = NULL;
        mapping->backing_dev_info = &default_backing_dev_info;
index 592e5115a561bc8b736ae9ba90cdbddb8a628b30..f311bf084015fd1450a859ebb31e0200ccd20ff3 100644 (file)
@@ -158,8 +158,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
                                               "zisofs: zisofs_inflate returned"
                                               " %d, inode = %lu,"
                                               " page idx = %d, bh idx = %d,"
-                                              " avail_in = %d,"
-                                              " avail_out = %d\n",
+                                              " avail_in = %ld,"
+                                              " avail_out = %ld\n",
                                               zerr, inode->i_ino, curpage,
                                               curbh, stream.avail_in,
                                               stream.avail_out);
index 0b9a1e44e833e691e2fc8dd2ca3135274113aad3..5698dae5d92dd5f7c3b8db3674348ae80fea45f0 100644 (file)
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 
        while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
                def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
-               def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out);
-               jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n",
+               def_strm.avail_in = min_t(unsigned long,
+                       (*sourcelen-def_strm.total_in), def_strm.avail_out);
+               jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
                          def_strm.avail_in, def_strm.avail_out);
                ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
-               jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n",
+               jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
                          def_strm.avail_in, def_strm.avail_out,
                          def_strm.total_in, def_strm.total_out);
                if (ret != Z_OK) {
index 4bc50dac8e979faf4f299d1538d5462329191fa1..742942a983be8c73cf9f3ea01639ccfe86367e18 100644 (file)
@@ -96,7 +96,7 @@ int minix_new_block(struct inode * inode)
 unsigned long minix_count_free_blocks(struct super_block *sb)
 {
        struct minix_sb_info *sbi = minix_sb(sb);
-       u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1);
+       u32 bits = sbi->s_nzones - sbi->s_firstdatazone + 1;
 
        return (count_free(sbi->s_zmap, sb->s_blocksize, bits)
                << sbi->s_log_zone_size);
index f007a3355570b38de89b57343407ccafad6cbe21..3f57af196a7de36343be9249d2e883a9b162310b 100644 (file)
@@ -267,12 +267,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
        block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize);
        if (sbi->s_imap_blocks < block) {
                printk("MINIX-fs: file system does not have enough "
-                               "imap blocks allocated.  Refusing to mount\n");
+                               "imap blocks allocated.  Refusing to mount.\n");
                goto out_no_bitmap;
        }
 
        block = minix_blocks_needed(
-                       (sbi->s_nzones - (sbi->s_firstdatazone + 1)),
+                       (sbi->s_nzones - sbi->s_firstdatazone + 1),
                        s->s_blocksize);
        if (sbi->s_zmap_blocks < block) {
                printk("MINIX-fs: file system does not have enough "
index 85c98737a146a0dd54d6eceec2eead73b29afad9..fc603e0431bb2b4973e46fe7c11b801ec654b5c5 100644 (file)
@@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o
 nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
        btnode.o bmap.o btree.o direct.o dat.o recovery.o \
        the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
-       ifile.o alloc.o gcinode.o ioctl.o
+       ifile.o alloc.o gcinode.o ioctl.o sysfs.o
index 9bc72dec3fa69afb83bc4d0d952be494d4af2f23..0696161bf59d24cb8471491c2a38c852e0594934 100644 (file)
@@ -320,6 +320,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
 int nilfs_init_gcinode(struct inode *inode);
 void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
 
+/* sysfs.c */
+int __init nilfs_sysfs_init(void);
+void nilfs_sysfs_exit(void);
+int nilfs_sysfs_create_device_group(struct super_block *);
+void nilfs_sysfs_delete_device_group(struct the_nilfs *);
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
+
 /*
  * Inodes and files operations
  */
index 8c532b2ca3aba1c1a4c6e162641e3eb7d009f91c..c519927b7b5e8c955cf223cbd40924f648376ed2 100644 (file)
@@ -1452,13 +1452,19 @@ static int __init init_nilfs_fs(void)
        if (err)
                goto fail;
 
-       err = register_filesystem(&nilfs_fs_type);
+       err = nilfs_sysfs_init();
        if (err)
                goto free_cachep;
 
+       err = register_filesystem(&nilfs_fs_type);
+       if (err)
+               goto deinit_sysfs_entry;
+
        printk(KERN_INFO "NILFS version 2 loaded\n");
        return 0;
 
+deinit_sysfs_entry:
+       nilfs_sysfs_exit();
 free_cachep:
        nilfs_destroy_cachep();
 fail:
@@ -1468,6 +1474,7 @@ fail:
 static void __exit exit_nilfs_fs(void)
 {
        nilfs_destroy_cachep();
+       nilfs_sysfs_exit();
        unregister_filesystem(&nilfs_fs_type);
 }
 
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
new file mode 100644 (file)
index 0000000..bbb0dcc
--- /dev/null
@@ -0,0 +1,1137 @@
+/*
+ * sysfs.c - sysfs support implementation.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#include <linux/kobject.h>
+
+#include "nilfs.h"
+#include "mdt.h"
+#include "sufile.h"
+#include "cpfile.h"
+#include "sysfs.h"
+
+/* /sys/fs/<nilfs>/ */
+static struct kset *nilfs_kset;
+
+#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
+               struct tm res; \
+               int count = 0; \
+               time_to_tm(time_t_val, 0, &res); \
+               res.tm_year += 1900; \
+               res.tm_mon += 1; \
+               count = scnprintf(buf, PAGE_SIZE, \
+                                   "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
+                                   res.tm_year, res.tm_mon, res.tm_mday, \
+                                   res.tm_hour, res.tm_min, res.tm_sec);\
+               count; \
+})
+
+#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
+static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
+                                       struct attribute *attr, char *buf) \
+{ \
+       struct the_nilfs *nilfs = container_of(kobj->parent, \
+                                               struct the_nilfs, \
+                                               ns_##parent_name##_kobj); \
+       struct nilfs_##name##_attr *a = container_of(attr, \
+                                               struct nilfs_##name##_attr, \
+                                               attr); \
+       return a->show ? a->show(a, nilfs, buf) : 0; \
+} \
+static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \
+                                        struct attribute *attr, \
+                                        const char *buf, size_t len) \
+{ \
+       struct the_nilfs *nilfs = container_of(kobj->parent, \
+                                               struct the_nilfs, \
+                                               ns_##parent_name##_kobj); \
+       struct nilfs_##name##_attr *a = container_of(attr, \
+                                               struct nilfs_##name##_attr, \
+                                               attr); \
+       return a->store ? a->store(a, nilfs, buf, len) : 0; \
+} \
+static const struct sysfs_ops nilfs_##name##_attr_ops = { \
+       .show   = nilfs_##name##_attr_show, \
+       .store  = nilfs_##name##_attr_store, \
+};
+
+#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
+static void nilfs_##name##_attr_release(struct kobject *kobj) \
+{ \
+       struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+       struct the_nilfs *nilfs = container_of(kobj->parent, \
+                                               struct the_nilfs, \
+                                               ns_##parent_name##_kobj); \
+       subgroups = nilfs->ns_##parent_name##_subgroups; \
+       complete(&subgroups->sg_##name##_kobj_unregister); \
+} \
+static struct kobj_type nilfs_##name##_ktype = { \
+       .default_attrs  = nilfs_##name##_attrs, \
+       .sysfs_ops      = &nilfs_##name##_attr_ops, \
+       .release        = nilfs_##name##_attr_release, \
+};
+
+#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \
+static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
+{ \
+       struct kobject *parent; \
+       struct kobject *kobj; \
+       struct completion *kobj_unregister; \
+       struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+       int err; \
+       subgroups = nilfs->ns_##parent_name##_subgroups; \
+       kobj = &subgroups->sg_##name##_kobj; \
+       kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \
+       parent = &nilfs->ns_##parent_name##_kobj; \
+       kobj->kset = nilfs_kset; \
+       init_completion(kobj_unregister); \
+       err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
+                                   #name); \
+       if (err) \
+               return err; \
+       return 0; \
+} \
+static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
+{ \
+       kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+}
+
+/************************************************************************
+ *                        NILFS snapshot attrs                          *
+ ************************************************************************/
+
+static ssize_t
+nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr,
+                                struct nilfs_root *root, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+                       (unsigned long long)atomic64_read(&root->inodes_count));
+}
+
+static ssize_t
+nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr,
+                                struct nilfs_root *root, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+                       (unsigned long long)atomic64_read(&root->blocks_count));
+}
+
+static const char snapshot_readme_str[] =
+       "The group contains details about mounted snapshot.\n\n"
+       "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n"
+       "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n";
+
+static ssize_t
+nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr,
+                           struct nilfs_root *root, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, snapshot_readme_str);
+}
+
+NILFS_SNAPSHOT_RO_ATTR(inodes_count);
+NILFS_SNAPSHOT_RO_ATTR(blocks_count);
+NILFS_SNAPSHOT_RO_ATTR(README);
+
+static struct attribute *nilfs_snapshot_attrs[] = {
+       NILFS_SNAPSHOT_ATTR_LIST(inodes_count),
+       NILFS_SNAPSHOT_ATTR_LIST(blocks_count),
+       NILFS_SNAPSHOT_ATTR_LIST(README),
+       NULL,
+};
+
+static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj,
+                                       struct attribute *attr, char *buf)
+{
+       struct nilfs_root *root =
+                       container_of(kobj, struct nilfs_root, snapshot_kobj);
+       struct nilfs_snapshot_attr *a =
+                       container_of(attr, struct nilfs_snapshot_attr, attr);
+
+       return a->show ? a->show(a, root, buf) : 0;
+}
+
+static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        const char *buf, size_t len)
+{
+       struct nilfs_root *root =
+                       container_of(kobj, struct nilfs_root, snapshot_kobj);
+       struct nilfs_snapshot_attr *a =
+                       container_of(attr, struct nilfs_snapshot_attr, attr);
+
+       return a->store ? a->store(a, root, buf, len) : 0;
+}
+
+static void nilfs_snapshot_attr_release(struct kobject *kobj)
+{
+       struct nilfs_root *root = container_of(kobj, struct nilfs_root,
+                                               snapshot_kobj);
+       complete(&root->snapshot_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_snapshot_attr_ops = {
+       .show   = nilfs_snapshot_attr_show,
+       .store  = nilfs_snapshot_attr_store,
+};
+
+static struct kobj_type nilfs_snapshot_ktype = {
+       .default_attrs  = nilfs_snapshot_attrs,
+       .sysfs_ops      = &nilfs_snapshot_attr_ops,
+       .release        = nilfs_snapshot_attr_release,
+};
+
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
+{
+       struct the_nilfs *nilfs;
+       struct kobject *parent;
+       int err;
+
+       nilfs = root->nilfs;
+       parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj;
+       root->snapshot_kobj.kset = nilfs_kset;
+       init_completion(&root->snapshot_kobj_unregister);
+
+       if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
+               err = kobject_init_and_add(&root->snapshot_kobj,
+                                           &nilfs_snapshot_ktype,
+                                           &nilfs->ns_dev_kobj,
+                                           "current_checkpoint");
+       } else {
+               err = kobject_init_and_add(&root->snapshot_kobj,
+                                           &nilfs_snapshot_ktype,
+                                           parent,
+                                           "%llu", root->cno);
+       }
+
+       if (err)
+               return err;
+
+       return 0;
+}
+
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
+{
+       kobject_del(&root->snapshot_kobj);
+}
+
+/************************************************************************
+ *                    NILFS mounted snapshots attrs                     *
+ ************************************************************************/
+
+static const char mounted_snapshots_readme_str[] =
+       "The mounted_snapshots group contains group for\n"
+       "every mounted snapshot.\n";
+
+static ssize_t
+nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr,
+                                   struct the_nilfs *nilfs, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, mounted_snapshots_readme_str);
+}
+
+NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README);
+
+static struct attribute *nilfs_mounted_snapshots_attrs[] = {
+       NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README),
+       NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev);
+
+/************************************************************************
+ *                      NILFS checkpoints attrs                         *
+ ************************************************************************/
+
+static ssize_t
+nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr,
+                                           struct the_nilfs *nilfs,
+                                           char *buf)
+{
+       __u64 ncheckpoints;
+       struct nilfs_cpstat cpstat;
+       int err;
+
+       down_read(&nilfs->ns_segctor_sem);
+       err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+       up_read(&nilfs->ns_segctor_sem);
+       if (err < 0) {
+               printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+                       err);
+               return err;
+       }
+
+       ncheckpoints = cpstat.cs_ncps;
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", ncheckpoints);
+}
+
+static ssize_t
+nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       __u64 nsnapshots;
+       struct nilfs_cpstat cpstat;
+       int err;
+
+       down_read(&nilfs->ns_segctor_sem);
+       err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+       up_read(&nilfs->ns_segctor_sem);
+       if (err < 0) {
+               printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+                       err);
+               return err;
+       }
+
+       nsnapshots = cpstat.cs_nsss;
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", nsnapshots);
+}
+
+static ssize_t
+nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+                                           struct the_nilfs *nilfs,
+                                           char *buf)
+{
+       __u64 last_cno;
+
+       spin_lock(&nilfs->ns_last_segment_lock);
+       last_cno = nilfs->ns_last_cno;
+       spin_unlock(&nilfs->ns_last_segment_lock);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       __u64 cno;
+
+       down_read(&nilfs->ns_sem);
+       cno = nilfs->ns_cno;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static const char checkpoints_readme_str[] =
+       "The checkpoints group contains attributes that describe\n"
+       "details about volume's checkpoints.\n\n"
+       "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n"
+       "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n"
+       "(3) last_seg_checkpoint\n"
+       "\tshow checkpoint number of the latest segment.\n\n"
+       "(4) next_checkpoint\n\tshow next checkpoint number.\n\n";
+
+static ssize_t
+nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr,
+                               struct the_nilfs *nilfs, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, checkpoints_readme_str);
+}
+
+NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number);
+NILFS_CHECKPOINTS_RO_ATTR(snapshots_number);
+NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(README);
+
+static struct attribute *nilfs_checkpoints_attrs[] = {
+       NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number),
+       NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number),
+       NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint),
+       NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint),
+       NILFS_CHECKPOINTS_ATTR_LIST(README),
+       NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(checkpoints, dev);
+NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev);
+NILFS_DEV_INT_GROUP_FNS(checkpoints, dev);
+
+/************************************************************************
+ *                        NILFS segments attrs                          *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr,
+                                    struct the_nilfs *nilfs,
+                                    char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_nsegments);
+}
+
+static ssize_t
+nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_blocks_per_segment);
+}
+
+static ssize_t
+nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr,
+                                   struct the_nilfs *nilfs,
+                                   char *buf)
+{
+       unsigned long ncleansegs;
+
+       down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+       ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
+       up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n", ncleansegs);
+}
+
+static ssize_t
+nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr,
+                                   struct the_nilfs *nilfs,
+                                   char *buf)
+{
+       struct nilfs_sustat sustat;
+       int err;
+
+       down_read(&nilfs->ns_segctor_sem);
+       err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
+       up_read(&nilfs->ns_segctor_sem);
+       if (err < 0) {
+               printk(KERN_ERR "NILFS: unable to get segment stat: err=%d\n",
+                       err);
+               return err;
+       }
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", sustat.ss_ndirtysegs);
+}
+
+static const char segments_readme_str[] =
+       "The segments group contains attributes that describe\n"
+       "details about volume's segments.\n\n"
+       "(1) segments_number\n\tshow number of segments on volume.\n\n"
+       "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n"
+       "(3) clean_segments\n\tshow count of clean segments.\n\n"
+       "(4) dirty_segments\n\tshow count of dirty segments.\n\n";
+
+static ssize_t
+nilfs_segments_README_show(struct nilfs_segments_attr *attr,
+                           struct the_nilfs *nilfs,
+                           char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, segments_readme_str);
+}
+
+NILFS_SEGMENTS_RO_ATTR(segments_number);
+NILFS_SEGMENTS_RO_ATTR(blocks_per_segment);
+NILFS_SEGMENTS_RO_ATTR(clean_segments);
+NILFS_SEGMENTS_RO_ATTR(dirty_segments);
+NILFS_SEGMENTS_RO_ATTR(README);
+
+static struct attribute *nilfs_segments_attrs[] = {
+       NILFS_SEGMENTS_ATTR_LIST(segments_number),
+       NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment),
+       NILFS_SEGMENTS_ATTR_LIST(clean_segments),
+       NILFS_SEGMENTS_ATTR_LIST(dirty_segments),
+       NILFS_SEGMENTS_ATTR_LIST(README),
+       NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segments, dev);
+NILFS_DEV_INT_GROUP_TYPE(segments, dev);
+NILFS_DEV_INT_GROUP_FNS(segments, dev);
+
+/************************************************************************
+ *                        NILFS segctor attrs                           *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr,
+                                   struct the_nilfs *nilfs,
+                                   char *buf)
+{
+       sector_t last_pseg;
+
+       spin_lock(&nilfs->ns_last_segment_lock);
+       last_pseg = nilfs->ns_last_pseg;
+       spin_unlock(&nilfs->ns_last_segment_lock);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+                       (unsigned long long)last_pseg);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       u64 last_seq;
+
+       spin_lock(&nilfs->ns_last_segment_lock);
+       last_seq = nilfs->ns_last_seq;
+       spin_unlock(&nilfs->ns_last_segment_lock);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", last_seq);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       __u64 last_cno;
+
+       spin_lock(&nilfs->ns_last_segment_lock);
+       last_cno = nilfs->ns_last_cno;
+       spin_unlock(&nilfs->ns_last_segment_lock);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       u64 seg_seq;
+
+       down_read(&nilfs->ns_sem);
+       seg_seq = nilfs->ns_seg_seq;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", seg_seq);
+}
+
+static ssize_t
+nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr,
+                                        struct the_nilfs *nilfs,
+                                        char *buf)
+{
+       __u64 segnum;
+
+       down_read(&nilfs->ns_sem);
+       segnum = nilfs->ns_segnum;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", segnum);
+}
+
+static ssize_t
+nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr,
+                                struct the_nilfs *nilfs,
+                                char *buf)
+{
+       __u64 nextnum;
+
+       down_read(&nilfs->ns_sem);
+       nextnum = nilfs->ns_nextnum;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", nextnum);
+}
+
+static ssize_t
+nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       unsigned long pseg_offset;
+
+       down_read(&nilfs->ns_sem);
+       pseg_offset = nilfs->ns_pseg_offset;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n", pseg_offset);
+}
+
+static ssize_t
+nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       __u64 cno;
+
+       down_read(&nilfs->ns_sem);
+       cno = nilfs->ns_cno;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
+                                       struct the_nilfs *nilfs,
+                                       char *buf)
+{
+       time_t ctime;
+
+       down_read(&nilfs->ns_sem);
+       ctime = nilfs->ns_ctime;
+       up_read(&nilfs->ns_sem);
+
+       return NILFS_SHOW_TIME(ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
+                                           struct the_nilfs *nilfs,
+                                           char *buf)
+{
+       time_t ctime;
+
+       down_read(&nilfs->ns_sem);
+       ctime = nilfs->ns_ctime;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
+                                        struct the_nilfs *nilfs,
+                                        char *buf)
+{
+       time_t nongc_ctime;
+
+       down_read(&nilfs->ns_sem);
+       nongc_ctime = nilfs->ns_nongc_ctime;
+       up_read(&nilfs->ns_sem);
+
+       return NILFS_SHOW_TIME(nongc_ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
+                                               struct the_nilfs *nilfs,
+                                               char *buf)
+{
+       time_t nongc_ctime;
+
+       down_read(&nilfs->ns_sem);
+       nongc_ctime = nilfs->ns_nongc_ctime;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+                       (unsigned long long)nongc_ctime);
+}
+
+static ssize_t
+nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr,
+                                           struct the_nilfs *nilfs,
+                                           char *buf)
+{
+       u32 ndirtyblks;
+
+       down_read(&nilfs->ns_sem);
+       ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks);
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", ndirtyblks);
+}
+
+static const char segctor_readme_str[] =
+       "The segctor group contains attributes that describe\n"
+       "segctor thread activity details.\n\n"
+       "(1) last_pseg_block\n"
+       "\tshow start block number of the latest segment.\n\n"
+       "(2) last_seg_sequence\n"
+       "\tshow sequence value of the latest segment.\n\n"
+       "(3) last_seg_checkpoint\n"
+       "\tshow checkpoint number of the latest segment.\n\n"
+       "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n"
+       "(5) current_last_full_seg\n"
+       "\tshow index number of the latest full segment.\n\n"
+       "(6) next_full_seg\n"
+       "\tshow index number of the full segment index to be used next.\n\n"
+       "(7) next_pseg_offset\n"
+       "\tshow offset of next partial segment in the current full segment.\n\n"
+       "(8) next_checkpoint\n\tshow next checkpoint number.\n\n"
+       "(9) last_seg_write_time\n"
+       "\tshow write time of the last segment in human-readable format.\n\n"
+       "(10) last_seg_write_time_secs\n"
+       "\tshow write time of the last segment in seconds.\n\n"
+       "(11) last_nongc_write_time\n"
+       "\tshow write time of the last segment not for cleaner operation "
+       "in human-readable format.\n\n"
+       "(12) last_nongc_write_time_secs\n"
+       "\tshow write time of the last segment not for cleaner operation "
+       "in seconds.\n\n"
+       "(13) dirty_data_blocks_count\n"
+       "\tshow number of dirty data blocks.\n\n";
+
+static ssize_t
+nilfs_segctor_README_show(struct nilfs_segctor_attr *attr,
+                         struct the_nilfs *nilfs, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, segctor_readme_str);
+}
+
+NILFS_SEGCTOR_RO_ATTR(last_pseg_block);
+NILFS_SEGCTOR_RO_ATTR(last_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(current_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(current_last_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_pseg_offset);
+NILFS_SEGCTOR_RO_ATTR(next_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count);
+NILFS_SEGCTOR_RO_ATTR(README);
+
+static struct attribute *nilfs_segctor_attrs[] = {
+       NILFS_SEGCTOR_ATTR_LIST(last_pseg_block),
+       NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence),
+       NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint),
+       NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence),
+       NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg),
+       NILFS_SEGCTOR_ATTR_LIST(next_full_seg),
+       NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset),
+       NILFS_SEGCTOR_ATTR_LIST(next_checkpoint),
+       NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time),
+       NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs),
+       NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time),
+       NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs),
+       NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count),
+       NILFS_SEGCTOR_ATTR_LIST(README),
+       NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segctor, dev);
+NILFS_DEV_INT_GROUP_TYPE(segctor, dev);
+NILFS_DEV_INT_GROUP_FNS(segctor, dev);
+
+/************************************************************************
+ *                        NILFS superblock attrs                        *
+ ************************************************************************/
+
+static ssize_t
+nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
+                                    struct the_nilfs *nilfs,
+                                    char *buf)
+{
+       time_t sbwtime;
+
+       down_read(&nilfs->ns_sem);
+       sbwtime = nilfs->ns_sbwtime;
+       up_read(&nilfs->ns_sem);
+
+       return NILFS_SHOW_TIME(sbwtime, buf);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
+                                        struct the_nilfs *nilfs,
+                                        char *buf)
+{
+       time_t sbwtime;
+
+       down_read(&nilfs->ns_sem);
+       sbwtime = nilfs->ns_sbwtime;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr,
+                                     struct the_nilfs *nilfs,
+                                     char *buf)
+{
+       unsigned sbwcount;
+
+       down_read(&nilfs->ns_sem);
+       sbwcount = nilfs->ns_sbwcount;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", sbwcount);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr,
+                                           struct the_nilfs *nilfs,
+                                           char *buf)
+{
+       unsigned sb_update_freq;
+
+       down_read(&nilfs->ns_sem);
+       sb_update_freq = nilfs->ns_sb_update_freq;
+       up_read(&nilfs->ns_sem);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", sb_update_freq);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr,
+                                           struct the_nilfs *nilfs,
+                                           const char *buf, size_t count)
+{
+       unsigned val;
+       int err;
+
+       err = kstrtouint(skip_spaces(buf), 0, &val);
+       if (err) {
+               printk(KERN_ERR "NILFS: unable to convert string: err=%d\n",
+                       err);
+               return err;
+       }
+
+       if (val < NILFS_SB_FREQ) {
+               val = NILFS_SB_FREQ;
+               printk(KERN_WARNING "NILFS: superblock update frequency cannot be lesser than 10 seconds\n");
+       }
+
+       down_write(&nilfs->ns_sem);
+       nilfs->ns_sb_update_freq = val;
+       up_write(&nilfs->ns_sem);
+
+       return count;
+}
+
+static const char sb_readme_str[] =
+       "The superblock group contains attributes that describe\n"
+       "superblock's details.\n\n"
+       "(1) sb_write_time\n\tshow previous write time of super block "
+       "in human-readable format.\n\n"
+       "(2) sb_write_time_secs\n\tshow previous write time of super block "
+       "in seconds.\n\n"
+       "(3) sb_write_count\n\tshow write count of super block.\n\n"
+       "(4) sb_update_frequency\n"
+       "\tshow/set interval of periodical update of superblock (in seconds).\n\n"
+       "\tYou can set preferable frequency of superblock update by command:\n\n"
+       "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n";
+
+static ssize_t
+nilfs_superblock_README_show(struct nilfs_superblock_attr *attr,
+                               struct the_nilfs *nilfs, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, sb_readme_str);
+}
+
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_count);
+NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency);
+NILFS_SUPERBLOCK_RO_ATTR(README);
+
+static struct attribute *nilfs_superblock_attrs[] = {
+       NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time),
+       NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs),
+       NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count),
+       NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency),
+       NILFS_SUPERBLOCK_ATTR_LIST(README),
+       NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(superblock, dev);
+NILFS_DEV_INT_GROUP_TYPE(superblock, dev);
+NILFS_DEV_INT_GROUP_FNS(superblock, dev);
+
+/************************************************************************
+ *                        NILFS device attrs                            *
+ ************************************************************************/
+
+static
+ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
+                               struct the_nilfs *nilfs,
+                               char *buf)
+{
+       struct nilfs_super_block **sbp = nilfs->ns_sbp;
+       u32 major = le32_to_cpu(sbp[0]->s_rev_level);
+       u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
+
+       return snprintf(buf, PAGE_SIZE, "%d.%d\n", major, minor);
+}
+
+static
+ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr,
+                                struct the_nilfs *nilfs,
+                                char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%u\n", nilfs->ns_blocksize);
+}
+
+static
+ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
+                                   struct the_nilfs *nilfs,
+                                   char *buf)
+{
+       struct nilfs_super_block **sbp = nilfs->ns_sbp;
+       u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", dev_size);
+}
+
+static
+ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr,
+                                  struct the_nilfs *nilfs,
+                                  char *buf)
+{
+       sector_t free_blocks = 0;
+
+       nilfs_count_free_blocks(nilfs, &free_blocks);
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+                       (unsigned long long)free_blocks);
+}
+
+static
+ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
+                           struct the_nilfs *nilfs,
+                           char *buf)
+{
+       struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+       return snprintf(buf, PAGE_SIZE, "%pUb\n", sbp[0]->s_uuid);
+}
+
+static
+ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
+                                   struct the_nilfs *nilfs,
+                                   char *buf)
+{
+       struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+       return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
+                        sbp[0]->s_volume_name);
+}
+
+static const char dev_readme_str[] =
+       "The <device> group contains attributes that describe file system\n"
+       "partition's details.\n\n"
+       "(1) revision\n\tshow NILFS file system revision.\n\n"
+       "(2) blocksize\n\tshow volume block size in bytes.\n\n"
+       "(3) device_size\n\tshow volume size in bytes.\n\n"
+       "(4) free_blocks\n\tshow count of free blocks on volume.\n\n"
+       "(5) uuid\n\tshow volume's UUID.\n\n"
+       "(6) volume_name\n\tshow volume's name.\n\n";
+
+static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr,
+                                    struct the_nilfs *nilfs,
+                                    char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, dev_readme_str);
+}
+
+NILFS_DEV_RO_ATTR(revision);
+NILFS_DEV_RO_ATTR(blocksize);
+NILFS_DEV_RO_ATTR(device_size);
+NILFS_DEV_RO_ATTR(free_blocks);
+NILFS_DEV_RO_ATTR(uuid);
+NILFS_DEV_RO_ATTR(volume_name);
+NILFS_DEV_RO_ATTR(README);
+
+static struct attribute *nilfs_dev_attrs[] = {
+       NILFS_DEV_ATTR_LIST(revision),
+       NILFS_DEV_ATTR_LIST(blocksize),
+       NILFS_DEV_ATTR_LIST(device_size),
+       NILFS_DEV_ATTR_LIST(free_blocks),
+       NILFS_DEV_ATTR_LIST(uuid),
+       NILFS_DEV_ATTR_LIST(volume_name),
+       NILFS_DEV_ATTR_LIST(README),
+       NULL,
+};
+
+static ssize_t nilfs_dev_attr_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf)
+{
+       struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+                                               ns_dev_kobj);
+       struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+                                               attr);
+
+       return a->show ? a->show(a, nilfs, buf) : 0;
+}
+
+static ssize_t nilfs_dev_attr_store(struct kobject *kobj,
+                                   struct attribute *attr,
+                                   const char *buf, size_t len)
+{
+       struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+                                               ns_dev_kobj);
+       struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+                                               attr);
+
+       return a->store ? a->store(a, nilfs, buf, len) : 0;
+}
+
+static void nilfs_dev_attr_release(struct kobject *kobj)
+{
+       struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+                                               ns_dev_kobj);
+       complete(&nilfs->ns_dev_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_dev_attr_ops = {
+       .show   = nilfs_dev_attr_show,
+       .store  = nilfs_dev_attr_store,
+};
+
+static struct kobj_type nilfs_dev_ktype = {
+       .default_attrs  = nilfs_dev_attrs,
+       .sysfs_ops      = &nilfs_dev_attr_ops,
+       .release        = nilfs_dev_attr_release,
+};
+
+int nilfs_sysfs_create_device_group(struct super_block *sb)
+{
+       struct the_nilfs *nilfs = sb->s_fs_info;
+       size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups);
+       int err;
+
+       nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL);
+       if (unlikely(!nilfs->ns_dev_subgroups)) {
+               err = -ENOMEM;
+               printk(KERN_ERR "NILFS: unable to allocate memory for device group\n");
+               goto failed_create_device_group;
+       }
+
+       nilfs->ns_dev_kobj.kset = nilfs_kset;
+       init_completion(&nilfs->ns_dev_kobj_unregister);
+       err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
+                                   "%s", sb->s_id);
+       if (err)
+               goto free_dev_subgroups;
+
+       err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
+       if (err)
+               goto cleanup_dev_kobject;
+
+       err = nilfs_sysfs_create_checkpoints_group(nilfs);
+       if (err)
+               goto delete_mounted_snapshots_group;
+
+       err = nilfs_sysfs_create_segments_group(nilfs);
+       if (err)
+               goto delete_checkpoints_group;
+
+       err = nilfs_sysfs_create_superblock_group(nilfs);
+       if (err)
+               goto delete_segments_group;
+
+       err = nilfs_sysfs_create_segctor_group(nilfs);
+       if (err)
+               goto delete_superblock_group;
+
+       return 0;
+
+delete_superblock_group:
+       nilfs_sysfs_delete_superblock_group(nilfs);
+
+delete_segments_group:
+       nilfs_sysfs_delete_segments_group(nilfs);
+
+delete_checkpoints_group:
+       nilfs_sysfs_delete_checkpoints_group(nilfs);
+
+delete_mounted_snapshots_group:
+       nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+
+cleanup_dev_kobject:
+       kobject_del(&nilfs->ns_dev_kobj);
+
+free_dev_subgroups:
+       kfree(nilfs->ns_dev_subgroups);
+
+failed_create_device_group:
+       return err;
+}
+
+void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
+{
+       nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+       nilfs_sysfs_delete_checkpoints_group(nilfs);
+       nilfs_sysfs_delete_segments_group(nilfs);
+       nilfs_sysfs_delete_superblock_group(nilfs);
+       nilfs_sysfs_delete_segctor_group(nilfs);
+       kobject_del(&nilfs->ns_dev_kobj);
+       kfree(nilfs->ns_dev_subgroups);
+}
+
+/************************************************************************
+ *                        NILFS feature attrs                           *
+ ************************************************************************/
+
+static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
+                                           struct attribute *attr, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%d.%d\n",
+                       NILFS_CURRENT_REV, NILFS_MINOR_REV);
+}
+
+static const char features_readme_str[] =
+       "The features group contains attributes that describe NILFS file\n"
+       "system driver features.\n\n"
+       "(1) revision\n\tshow current revision of NILFS file system driver.\n";
+
+static ssize_t nilfs_feature_README_show(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, features_readme_str);
+}
+
+NILFS_FEATURE_RO_ATTR(revision);
+NILFS_FEATURE_RO_ATTR(README);
+
+static struct attribute *nilfs_feature_attrs[] = {
+       NILFS_FEATURE_ATTR_LIST(revision),
+       NILFS_FEATURE_ATTR_LIST(README),
+       NULL,
+};
+
+static const struct attribute_group nilfs_feature_attr_group = {
+       .name = "features",
+       .attrs = nilfs_feature_attrs,
+};
+
+int __init nilfs_sysfs_init(void)
+{
+       int err;
+
+       nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj);
+       if (!nilfs_kset) {
+               err = -ENOMEM;
+               printk(KERN_ERR "NILFS: unable to create sysfs entry: err %d\n",
+                       err);
+               goto failed_sysfs_init;
+       }
+
+       err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+       if (unlikely(err)) {
+               printk(KERN_ERR "NILFS: unable to create feature group: err %d\n",
+                       err);
+               goto cleanup_sysfs_init;
+       }
+
+       return 0;
+
+cleanup_sysfs_init:
+       kset_unregister(nilfs_kset);
+
+failed_sysfs_init:
+       return err;
+}
+
+void nilfs_sysfs_exit(void)
+{
+       sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+       kset_unregister(nilfs_kset);
+}
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
new file mode 100644 (file)
index 0000000..677e3a1
--- /dev/null
@@ -0,0 +1,176 @@
+/*
+ * sysfs.h - sysfs support declarations.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#ifndef _NILFS_SYSFS_H
+#define _NILFS_SYSFS_H
+
+#include <linux/sysfs.h>
+
+#define NILFS_ROOT_GROUP_NAME  "nilfs2"
+
+/*
+ * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
+ * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
+ * @sg_superblock_kobj_unregister: completion state
+ * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
+ * @sg_segctor_kobj_unregister: completion state
+ * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
+ * @sg_mounted_snapshots_kobj_unregister: completion state
+ * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
+ * @sg_checkpoints_kobj_unregister: completion state
+ * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
+ * @sg_segments_kobj_unregister: completion state
+ */
+struct nilfs_sysfs_dev_subgroups {
+       /* /sys/fs/<nilfs>/<device>/superblock */
+       struct kobject sg_superblock_kobj;
+       struct completion sg_superblock_kobj_unregister;
+
+       /* /sys/fs/<nilfs>/<device>/segctor */
+       struct kobject sg_segctor_kobj;
+       struct completion sg_segctor_kobj_unregister;
+
+       /* /sys/fs/<nilfs>/<device>/mounted_snapshots */
+       struct kobject sg_mounted_snapshots_kobj;
+       struct completion sg_mounted_snapshots_kobj_unregister;
+
+       /* /sys/fs/<nilfs>/<device>/checkpoints */
+       struct kobject sg_checkpoints_kobj;
+       struct completion sg_checkpoints_kobj_unregister;
+
+       /* /sys/fs/<nilfs>/<device>/segments */
+       struct kobject sg_segments_kobj;
+       struct completion sg_segments_kobj_unregister;
+};
+
+#define NILFS_COMMON_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+       struct attribute attr; \
+       ssize_t (*show)(struct kobject *, struct attribute *, \
+                       char *); \
+       ssize_t (*store)(struct kobject *, struct attribute *, \
+                        const char *, size_t); \
+};
+
+NILFS_COMMON_ATTR_STRUCT(feature);
+
+#define NILFS_DEV_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+       struct attribute attr; \
+       ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+                       char *); \
+       ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+                        const char *, size_t); \
+};
+
+NILFS_DEV_ATTR_STRUCT(dev);
+NILFS_DEV_ATTR_STRUCT(segments);
+NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
+NILFS_DEV_ATTR_STRUCT(checkpoints);
+NILFS_DEV_ATTR_STRUCT(superblock);
+NILFS_DEV_ATTR_STRUCT(segctor);
+
+#define NILFS_CP_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+       struct attribute attr; \
+       ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+                       char *); \
+       ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+                        const char *, size_t); \
+};
+
+NILFS_CP_ATTR_STRUCT(snapshot);
+
+#define NILFS_ATTR(type, name, mode, show, store) \
+       static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
+               __ATTR(name, mode, show, store)
+
+#define NILFS_INFO_ATTR(type, name) \
+       NILFS_ATTR(type, name, 0444, NULL, NULL)
+#define NILFS_RO_ATTR(type, name) \
+       NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
+#define NILFS_RW_ATTR(type, name) \
+       NILFS_ATTR(type, name, 0644, \
+                   nilfs_##type##_##name##_show, \
+                   nilfs_##type##_##name##_store)
+
+#define NILFS_FEATURE_INFO_ATTR(name) \
+       NILFS_INFO_ATTR(feature, name)
+#define NILFS_FEATURE_RO_ATTR(name) \
+       NILFS_RO_ATTR(feature, name)
+#define NILFS_FEATURE_RW_ATTR(name) \
+       NILFS_RW_ATTR(feature, name)
+
+#define NILFS_DEV_INFO_ATTR(name) \
+       NILFS_INFO_ATTR(dev, name)
+#define NILFS_DEV_RO_ATTR(name) \
+       NILFS_RO_ATTR(dev, name)
+#define NILFS_DEV_RW_ATTR(name) \
+       NILFS_RW_ATTR(dev, name)
+
+#define NILFS_SEGMENTS_RO_ATTR(name) \
+       NILFS_RO_ATTR(segments, name)
+#define NILFS_SEGMENTS_RW_ATTR(name) \
+       NILFS_RW_ATTR(segs_info, name)
+
+#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
+       NILFS_RO_ATTR(mounted_snapshots, name)
+
+#define NILFS_CHECKPOINTS_RO_ATTR(name) \
+       NILFS_RO_ATTR(checkpoints, name)
+#define NILFS_CHECKPOINTS_RW_ATTR(name) \
+       NILFS_RW_ATTR(checkpoints, name)
+
+#define NILFS_SNAPSHOT_INFO_ATTR(name) \
+       NILFS_INFO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RO_ATTR(name) \
+       NILFS_RO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RW_ATTR(name) \
+       NILFS_RW_ATTR(snapshot, name)
+
+#define NILFS_SUPERBLOCK_RO_ATTR(name) \
+       NILFS_RO_ATTR(superblock, name)
+#define NILFS_SUPERBLOCK_RW_ATTR(name) \
+       NILFS_RW_ATTR(superblock, name)
+
+#define NILFS_SEGCTOR_INFO_ATTR(name) \
+       NILFS_INFO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RO_ATTR(name) \
+       NILFS_RO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RW_ATTR(name) \
+       NILFS_RW_ATTR(segctor, name)
+
+#define NILFS_FEATURE_ATTR_LIST(name) \
+       (&nilfs_feature_attr_##name.attr)
+#define NILFS_DEV_ATTR_LIST(name) \
+       (&nilfs_dev_attr_##name.attr)
+#define NILFS_SEGMENTS_ATTR_LIST(name) \
+       (&nilfs_segments_attr_##name.attr)
+#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
+       (&nilfs_mounted_snapshots_attr_##name.attr)
+#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
+       (&nilfs_checkpoints_attr_##name.attr)
+#define NILFS_SNAPSHOT_ATTR_LIST(name) \
+       (&nilfs_snapshot_attr_##name.attr)
+#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
+       (&nilfs_superblock_attr_##name.attr)
+#define NILFS_SEGCTOR_ATTR_LIST(name) \
+       (&nilfs_segctor_attr_##name.attr)
+
+#endif /* _NILFS_SYSFS_H */
index 8ba8229ba076a0de225b1d95bd26d2c51ee656c3..9da25fe9ea616d6c78d27c918649d43486c37ed6 100644 (file)
@@ -85,6 +85,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
        nilfs->ns_cptree = RB_ROOT;
        spin_lock_init(&nilfs->ns_cptree_lock);
        init_rwsem(&nilfs->ns_segctor_sem);
+       nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
 
        return nilfs;
 }
@@ -97,6 +98,7 @@ void destroy_nilfs(struct the_nilfs *nilfs)
 {
        might_sleep();
        if (nilfs_init(nilfs)) {
+               nilfs_sysfs_delete_device_group(nilfs);
                brelse(nilfs->ns_sbh[0]);
                brelse(nilfs->ns_sbh[1]);
        }
@@ -640,6 +642,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
        if (err)
                goto failed_sbh;
 
+       err = nilfs_sysfs_create_device_group(sb);
+       if (err)
+               goto failed_sbh;
+
        set_nilfs_init(nilfs);
        err = 0;
  out:
@@ -740,12 +746,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
 {
        struct rb_node **p, *parent;
        struct nilfs_root *root, *new;
+       int err;
 
        root = nilfs_lookup_root(nilfs, cno);
        if (root)
                return root;
 
-       new = kmalloc(sizeof(*root), GFP_KERNEL);
+       new = kzalloc(sizeof(*root), GFP_KERNEL);
        if (!new)
                return NULL;
 
@@ -782,6 +789,12 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
 
        spin_unlock(&nilfs->ns_cptree_lock);
 
+       err = nilfs_sysfs_create_snapshot_group(new);
+       if (err) {
+               kfree(new);
+               new = NULL;
+       }
+
        return new;
 }
 
@@ -790,6 +803,8 @@ void nilfs_put_root(struct nilfs_root *root)
        if (atomic_dec_and_test(&root->count)) {
                struct the_nilfs *nilfs = root->nilfs;
 
+               nilfs_sysfs_delete_snapshot_group(root);
+
                spin_lock(&nilfs->ns_cptree_lock);
                rb_erase(&root->rb_node, &nilfs->ns_cptree);
                spin_unlock(&nilfs->ns_cptree_lock);
index de8cc53b4a5c1f5e246ad77b540345184479c5cf..d01ead1bea9a738a5268908fe321eed19af3cc07 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 
 struct nilfs_sc_info;
+struct nilfs_sysfs_dev_subgroups;
 
 /* the_nilfs struct */
 enum {
@@ -54,6 +55,7 @@ enum {
  * @ns_sbwcount: write count of super block
  * @ns_sbsize: size of valid data in super block
  * @ns_mount_state: file system state
+ * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
  * @ns_seg_seq: segment sequence counter
  * @ns_segnum: index number of the latest full segment.
  * @ns_nextnum: index number of the full segment index to be used next
@@ -95,6 +97,9 @@ enum {
  * @ns_inode_size: size of on-disk inode
  * @ns_first_ino: first not-special inode number
  * @ns_crc_seed: seed value of CRC32 calculation
+ * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
+ * @ns_dev_kobj_unregister: completion state
+ * @ns_dev_subgroups: <device> subgroups pointer
  */
 struct the_nilfs {
        unsigned long           ns_flags;
@@ -114,6 +119,7 @@ struct the_nilfs {
        unsigned                ns_sbwcount;
        unsigned                ns_sbsize;
        unsigned                ns_mount_state;
+       unsigned                ns_sb_update_freq;
 
        /*
         * Following fields are dedicated to a writable FS-instance.
@@ -188,6 +194,11 @@ struct the_nilfs {
        int                     ns_inode_size;
        int                     ns_first_ino;
        u32                     ns_crc_seed;
+
+       /* /sys/fs/<nilfs>/<device> */
+       struct kobject ns_dev_kobj;
+       struct completion ns_dev_kobj_unregister;
+       struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
 };
 
 #define THE_NILFS_FNS(bit, name)                                       \
@@ -232,6 +243,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
  * @ifile: inode file
  * @inodes_count: number of inodes
  * @blocks_count: number of blocks
+ * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
+ * @snapshot_kobj_unregister: completion state for kernel object
  */
 struct nilfs_root {
        __u64 cno;
@@ -243,6 +256,10 @@ struct nilfs_root {
 
        atomic64_t inodes_count;
        atomic64_t blocks_count;
+
+       /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
+       struct kobject snapshot_kobj;
+       struct completion snapshot_kobj_unregister;
 };
 
 /* Special checkpoint number */
@@ -254,7 +271,8 @@ struct nilfs_root {
 static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
 {
        u64 t = get_seconds();
-       return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ;
+       return t < nilfs->ns_sbwtime ||
+               t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
 }
 
 static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
index ec58c765918365f1cb32555b745fb2503cf7253e..ba8819702c56719ae8a1a7b4596662a653a6ef0b 100644 (file)
@@ -321,7 +321,7 @@ static int omfs_get_imap(struct super_block *sb)
                goto out;
 
        sbi->s_imap_size = array_size;
-       sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
+       sbi->s_imap = kcalloc(array_size, sizeof(unsigned long *), GFP_KERNEL);
        if (!sbi->s_imap)
                goto nomem;
 
index 2d696b0c93bfbec6f1dd73ff4a8f71c9da569e4d..043c83cb51f935920cc013846efab7ba853c9077 100644 (file)
  */
 
 struct pid_entry {
-       char *name;
+       const char *name;
        int len;
        umode_t mode;
        const struct inode_operations *iop;
@@ -130,10 +130,6 @@ struct pid_entry {
                { .proc_get_link = get_link } )
 #define REG(NAME, MODE, fops)                          \
        NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
-#define INF(NAME, MODE, read)                          \
-       NOD(NAME, (S_IFREG|(MODE)),                     \
-               NULL, &proc_info_file_operations,       \
-               { .proc_read = read } )
 #define ONE(NAME, MODE, show)                          \
        NOD(NAME, (S_IFREG|(MODE)),                     \
                NULL, &proc_single_file_operations,     \
@@ -200,27 +196,32 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
        return result;
 }
 
-static int proc_pid_cmdline(struct task_struct *task, char *buffer)
+static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns,
+                           struct pid *pid, struct task_struct *task)
 {
-       return get_cmdline(task, buffer, PAGE_SIZE);
+       /*
+        * Rely on struct seq_operations::show() being called once
+        * per internal buffer allocation. See single_open(), traverse().
+        */
+       BUG_ON(m->size < PAGE_SIZE);
+       m->count += get_cmdline(task, m->buf, PAGE_SIZE);
+       return 0;
 }
 
-static int proc_pid_auxv(struct task_struct *task, char *buffer)
+static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
+                        struct pid *pid, struct task_struct *task)
 {
        struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
-       int res = PTR_ERR(mm);
        if (mm && !IS_ERR(mm)) {
                unsigned int nwords = 0;
                do {
                        nwords += 2;
                } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
-               res = nwords * sizeof(mm->saved_auxv[0]);
-               if (res > PAGE_SIZE)
-                       res = PAGE_SIZE;
-               memcpy(buffer, mm->saved_auxv, res);
+               seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
                mmput(mm);
-       }
-       return res;
+               return 0;
+       } else
+               return PTR_ERR(mm);
 }
 
 
@@ -229,7 +230,8 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer)
  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
  * Returns the resolved symbol.  If that fails, simply return the address.
  */
-static int proc_pid_wchan(struct task_struct *task, char *buffer)
+static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
+                         struct pid *pid, struct task_struct *task)
 {
        unsigned long wchan;
        char symname[KSYM_NAME_LEN];
@@ -240,9 +242,9 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
                if (!ptrace_may_access(task, PTRACE_MODE_READ))
                        return 0;
                else
-                       return sprintf(buffer, "%lu", wchan);
+                       return seq_printf(m, "%lu", wchan);
        else
-               return sprintf(buffer, "%s", symname);
+               return seq_printf(m, "%s", symname);
 }
 #endif /* CONFIG_KALLSYMS */
 
@@ -304,9 +306,10 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
 /*
  * Provides /proc/PID/schedstat
  */
-static int proc_pid_schedstat(struct task_struct *task, char *buffer)
+static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
+                             struct pid *pid, struct task_struct *task)
 {
-       return sprintf(buffer, "%llu %llu %lu\n",
+       return seq_printf(m, "%llu %llu %lu\n",
                        (unsigned long long)task->se.sum_exec_runtime,
                        (unsigned long long)task->sched_info.run_delay,
                        task->sched_info.pcount);
@@ -404,7 +407,8 @@ static const struct file_operations proc_cpuset_operations = {
 };
 #endif
 
-static int proc_oom_score(struct task_struct *task, char *buffer)
+static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
+                         struct pid *pid, struct task_struct *task)
 {
        unsigned long totalpages = totalram_pages + total_swap_pages;
        unsigned long points = 0;
@@ -414,12 +418,12 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
                points = oom_badness(task, NULL, NULL, totalpages) *
                                                1000 / totalpages;
        read_unlock(&tasklist_lock);
-       return sprintf(buffer, "%lu\n", points);
+       return seq_printf(m, "%lu\n", points);
 }
 
 struct limit_names {
-       char *name;
-       char *unit;
+       const char *name;
+       const char *unit;
 };
 
 static const struct limit_names lnames[RLIM_NLIMITS] = {
@@ -442,12 +446,11 @@ static const struct limit_names lnames[RLIM_NLIMITS] = {
 };
 
 /* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
+                          struct pid *pid, struct task_struct *task)
 {
        unsigned int i;
-       int count = 0;
        unsigned long flags;
-       char *bufptr = buffer;
 
        struct rlimit rlim[RLIM_NLIMITS];
 
@@ -459,35 +462,34 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
        /*
         * print the file header
         */
-       count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+       seq_printf(m, "%-25s %-20s %-20s %-10s\n",
                        "Limit", "Soft Limit", "Hard Limit", "Units");
 
        for (i = 0; i < RLIM_NLIMITS; i++) {
                if (rlim[i].rlim_cur == RLIM_INFINITY)
-                       count += sprintf(&bufptr[count], "%-25s %-20s ",
+                       seq_printf(m, "%-25s %-20s ",
                                         lnames[i].name, "unlimited");
                else
-                       count += sprintf(&bufptr[count], "%-25s %-20lu ",
+                       seq_printf(m, "%-25s %-20lu ",
                                         lnames[i].name, rlim[i].rlim_cur);
 
                if (rlim[i].rlim_max == RLIM_INFINITY)
-                       count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+                       seq_printf(m, "%-20s ", "unlimited");
                else
-                       count += sprintf(&bufptr[count], "%-20lu ",
-                                        rlim[i].rlim_max);
+                       seq_printf(m, "%-20lu ", rlim[i].rlim_max);
 
                if (lnames[i].unit)
-                       count += sprintf(&bufptr[count], "%-10s\n",
-                                        lnames[i].unit);
+                       seq_printf(m, "%-10s\n", lnames[i].unit);
                else
-                       count += sprintf(&bufptr[count], "\n");
+                       seq_putc(m, '\n');
        }
 
-       return count;
+       return 0;
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
-static int proc_pid_syscall(struct task_struct *task, char *buffer)
+static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
+                           struct pid *pid, struct task_struct *task)
 {
        long nr;
        unsigned long args[6], sp, pc;
@@ -496,11 +498,11 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
                return res;
 
        if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
-               res = sprintf(buffer, "running\n");
+               seq_puts(m, "running\n");
        else if (nr < 0)
-               res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+               seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
        else
-               res = sprintf(buffer,
+               seq_printf(m,
                       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
                       nr,
                       args[0], args[1], args[2], args[3], args[4], args[5],
@@ -598,43 +600,6 @@ static const struct inode_operations proc_def_inode_operations = {
        .setattr        = proc_setattr,
 };
 
-#define PROC_BLOCK_SIZE        (3*1024)                /* 4K page size but our output routines use some slack for overruns */
-
-static ssize_t proc_info_read(struct file * file, char __user * buf,
-                         size_t count, loff_t *ppos)
-{
-       struct inode * inode = file_inode(file);
-       unsigned long page;
-       ssize_t length;
-       struct task_struct *task = get_proc_task(inode);
-
-       length = -ESRCH;
-       if (!task)
-               goto out_no_task;
-
-       if (count > PROC_BLOCK_SIZE)
-               count = PROC_BLOCK_SIZE;
-
-       length = -ENOMEM;
-       if (!(page = __get_free_page(GFP_TEMPORARY)))
-               goto out;
-
-       length = PROC_I(inode)->op.proc_read(task, (char*)page);
-
-       if (length >= 0)
-               length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
-       free_page(page);
-out:
-       put_task_struct(task);
-out_no_task:
-       return length;
-}
-
-static const struct file_operations proc_info_file_operations = {
-       .read           = proc_info_read,
-       .llseek         = generic_file_llseek,
-};
-
 static int proc_single_show(struct seq_file *m, void *v)
 {
        struct inode *inode = m->private;
@@ -2056,7 +2021,7 @@ static int show_timer(struct seq_file *m, void *v)
        struct k_itimer *timer;
        struct timers_private *tp = m->private;
        int notify;
-       static char *nstr[] = {
+       static const char * const nstr[] = {
                [SIGEV_SIGNAL] = "signal",
                [SIGEV_NONE] = "none",
                [SIGEV_THREAD] = "thread",
@@ -2392,7 +2357,7 @@ static const struct file_operations proc_coredump_filter_operations = {
 #endif
 
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
 {
        struct task_io_accounting acct = task->ioac;
        unsigned long flags;
@@ -2416,7 +2381,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
 
                unlock_task_sighand(task, &flags);
        }
-       result = sprintf(buffer,
+       result = seq_printf(m,
                        "rchar: %llu\n"
                        "wchar: %llu\n"
                        "syscr: %llu\n"
@@ -2436,20 +2401,22 @@ out_unlock:
        return result;
 }
 
-static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
+                                 struct pid *pid, struct task_struct *task)
 {
-       return do_io_accounting(task, buffer, 0);
+       return do_io_accounting(task, m, 0);
 }
 
-static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
+                                  struct pid *pid, struct task_struct *task)
 {
-       return do_io_accounting(task, buffer, 1);
+       return do_io_accounting(task, m, 1);
 }
 #endif /* CONFIG_TASK_IO_ACCOUNTING */
 
 #ifdef CONFIG_USER_NS
 static int proc_id_map_open(struct inode *inode, struct file *file,
-       struct seq_operations *seq_ops)
+       const struct seq_operations *seq_ops)
 {
        struct user_namespace *ns = NULL;
        struct task_struct *task;
@@ -2557,10 +2524,10 @@ static const struct pid_entry tgid_base_stuff[] = {
        DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
 #endif
        REG("environ",    S_IRUSR, proc_environ_operations),
-       INF("auxv",       S_IRUSR, proc_pid_auxv),
+       ONE("auxv",       S_IRUSR, proc_pid_auxv),
        ONE("status",     S_IRUGO, proc_pid_status),
        ONE("personality", S_IRUSR, proc_pid_personality),
-       INF("limits",     S_IRUGO, proc_pid_limits),
+       ONE("limits",     S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
@@ -2569,9 +2536,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #endif
        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
-       INF("syscall",    S_IRUSR, proc_pid_syscall),
+       ONE("syscall",    S_IRUSR, proc_pid_syscall),
 #endif
-       INF("cmdline",    S_IRUGO, proc_pid_cmdline),
+       ONE("cmdline",    S_IRUGO, proc_pid_cmdline),
        ONE("stat",       S_IRUGO, proc_tgid_stat),
        ONE("statm",      S_IRUGO, proc_pid_statm),
        REG("maps",       S_IRUGO, proc_pid_maps_operations),
@@ -2594,13 +2561,13 @@ static const struct pid_entry tgid_base_stuff[] = {
        DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
 #endif
 #ifdef CONFIG_KALLSYMS
-       INF("wchan",      S_IRUGO, proc_pid_wchan),
+       ONE("wchan",      S_IRUGO, proc_pid_wchan),
 #endif
 #ifdef CONFIG_STACKTRACE
        ONE("stack",      S_IRUSR, proc_pid_stack),
 #endif
 #ifdef CONFIG_SCHEDSTATS
-       INF("schedstat",  S_IRUGO, proc_pid_schedstat),
+       ONE("schedstat",  S_IRUGO, proc_pid_schedstat),
 #endif
 #ifdef CONFIG_LATENCYTOP
        REG("latency",  S_IRUGO, proc_lstats_operations),
@@ -2611,7 +2578,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_CGROUPS
        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
-       INF("oom_score",  S_IRUGO, proc_oom_score),
+       ONE("oom_score",  S_IRUGO, proc_oom_score),
        REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
@@ -2625,10 +2592,10 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-       INF("io",       S_IRUSR, proc_tgid_io_accounting),
+       ONE("io",       S_IRUSR, proc_tgid_io_accounting),
 #endif
 #ifdef CONFIG_HARDWALL
-       INF("hardwall",   S_IRUGO, proc_pid_hardwall),
+       ONE("hardwall",   S_IRUGO, proc_pid_hardwall),
 #endif
 #ifdef CONFIG_USER_NS
        REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -2780,12 +2747,12 @@ out:
 
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
-       int result = 0;
+       int result = -ENOENT;
        struct task_struct *task;
        unsigned tgid;
        struct pid_namespace *ns;
 
-       tgid = name_to_int(dentry);
+       tgid = name_to_int(&dentry->d_name);
        if (tgid == ~0U)
                goto out;
 
@@ -2896,18 +2863,18 @@ static const struct pid_entry tid_base_stuff[] = {
        DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
        DIR("ns",        S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
        REG("environ",   S_IRUSR, proc_environ_operations),
-       INF("auxv",      S_IRUSR, proc_pid_auxv),
+       ONE("auxv",      S_IRUSR, proc_pid_auxv),
        ONE("status",    S_IRUGO, proc_pid_status),
        ONE("personality", S_IRUSR, proc_pid_personality),
-       INF("limits",    S_IRUGO, proc_pid_limits),
+       ONE("limits",    S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
-       INF("syscall",   S_IRUSR, proc_pid_syscall),
+       ONE("syscall",   S_IRUSR, proc_pid_syscall),
 #endif
-       INF("cmdline",   S_IRUGO, proc_pid_cmdline),
+       ONE("cmdline",   S_IRUGO, proc_pid_cmdline),
        ONE("stat",      S_IRUGO, proc_tid_stat),
        ONE("statm",     S_IRUGO, proc_pid_statm),
        REG("maps",      S_IRUGO, proc_tid_maps_operations),
@@ -2932,13 +2899,13 @@ static const struct pid_entry tid_base_stuff[] = {
        DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
 #endif
 #ifdef CONFIG_KALLSYMS
-       INF("wchan",     S_IRUGO, proc_pid_wchan),
+       ONE("wchan",     S_IRUGO, proc_pid_wchan),
 #endif
 #ifdef CONFIG_STACKTRACE
        ONE("stack",      S_IRUSR, proc_pid_stack),
 #endif
 #ifdef CONFIG_SCHEDSTATS
-       INF("schedstat", S_IRUGO, proc_pid_schedstat),
+       ONE("schedstat", S_IRUGO, proc_pid_schedstat),
 #endif
 #ifdef CONFIG_LATENCYTOP
        REG("latency",  S_IRUGO, proc_lstats_operations),
@@ -2949,7 +2916,7 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_CGROUPS
        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
-       INF("oom_score", S_IRUGO, proc_oom_score),
+       ONE("oom_score", S_IRUGO, proc_oom_score),
        REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
@@ -2960,10 +2927,10 @@ static const struct pid_entry tid_base_stuff[] = {
        REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-       INF("io",       S_IRUSR, proc_tid_io_accounting),
+       ONE("io",       S_IRUSR, proc_tid_io_accounting),
 #endif
 #ifdef CONFIG_HARDWALL
-       INF("hardwall",   S_IRUGO, proc_pid_hardwall),
+       ONE("hardwall",   S_IRUGO, proc_pid_hardwall),
 #endif
 #ifdef CONFIG_USER_NS
        REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -3033,7 +3000,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
        if (!leader)
                goto out_no_task;
 
-       tid = name_to_int(dentry);
+       tid = name_to_int(&dentry->d_name);
        if (tid == ~0U)
                goto out;
 
index 0788d093f5d86ace99ad80f68f4d7bd924eff294..955bb55fab8cad3fbdfbd8e69f5536e6c2e53593 100644 (file)
@@ -206,7 +206,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
 {
        struct task_struct *task = get_proc_task(dir);
        int result = -ENOENT;
-       unsigned fd = name_to_int(dentry);
+       unsigned fd = name_to_int(&dentry->d_name);
 
        if (!task)
                goto out_no_task;
index b7f268eb5f45251ae1977c643540e081c6612546..317b72641ebf18cef26baace46000a0382f793b1 100644 (file)
@@ -27,7 +27,7 @@
 
 #include "internal.h"
 
-DEFINE_SPINLOCK(proc_subdir_lock);
+static DEFINE_SPINLOCK(proc_subdir_lock);
 
 static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
 {
@@ -330,28 +330,28 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
                                          nlink_t nlink)
 {
        struct proc_dir_entry *ent = NULL;
-       const char *fn = name;
-       unsigned int len;
-
-       /* make sure name is valid */
-       if (!name || !strlen(name))
-               goto out;
+       const char *fn;
+       struct qstr qstr;
 
        if (xlate_proc_name(name, parent, &fn) != 0)
                goto out;
+       qstr.name = fn;
+       qstr.len = strlen(fn);
+       if (qstr.len == 0 || qstr.len >= 256) {
+               WARN(1, "name len %u\n", qstr.len);
+               return NULL;
+       }
+       if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
+               WARN(1, "create '/proc/%s' by hand\n", qstr.name);
+               return NULL;
+       }
 
-       /* At this point there must not be any '/' characters beyond *fn */
-       if (strchr(fn, '/'))
-               goto out;
-
-       len = strlen(fn);
-
-       ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
+       ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
        if (!ent)
                goto out;
 
-       memcpy(ent->name, fn, len + 1);
-       ent->namelen = len;
+       memcpy(ent->name, fn, qstr.len + 1);
+       ent->namelen = qstr.len;
        ent->mode = mode;
        ent->nlink = nlink;
        atomic_set(&ent->count, 1);
index 3ab6d14e71c544753e0558fd78022a63200ea644..a024cf7b260f2109911a902b446229951333df5b 100644 (file)
@@ -52,7 +52,6 @@ struct proc_dir_entry {
 
 union proc_op {
        int (*proc_get_link)(struct dentry *, struct path *);
-       int (*proc_read)(struct task_struct *task, char *page);
        int (*proc_show)(struct seq_file *m,
                struct pid_namespace *ns, struct pid *pid,
                struct task_struct *task);
@@ -112,10 +111,10 @@ static inline int task_dumpable(struct task_struct *task)
        return 0;
 }
 
-static inline unsigned name_to_int(struct dentry *dentry)
+static inline unsigned name_to_int(const struct qstr *qstr)
 {
-       const char *name = dentry->d_name.name;
-       int len = dentry->d_name.len;
+       const char *name = qstr->name;
+       int len = qstr->len;
        unsigned n = 0;
 
        if (len > 1 && *name == '0')
@@ -178,8 +177,6 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
 /*
  * generic.c
  */
-extern spinlock_t proc_subdir_lock;
-
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
 extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
                                     struct dentry *);
index 39e6ef32f0bd6a3483f8771606a88801eb56ccc6..6df8d0722c970ec57d19dda9dbd2806795c03863 100644 (file)
@@ -172,7 +172,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 
        start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
        end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
-       end = ALIGN(end, PAGE_SIZE);
+       end = PAGE_ALIGN(end);
        /* overlap check (because we have to align page */
        list_for_each_entry(tmp, head, list) {
                if (tmp->type != KCORE_VMEMMAP)
index 71290463a1d3fa87a5132e21615792fe7fbaf9f4..f92d5dd578a4a5289aab9e29dcd37b09ee775c46 100644 (file)
@@ -632,7 +632,7 @@ out:
        return ret;
 }
 
-static int scan(struct ctl_table_header *head, ctl_table *table,
+static int scan(struct ctl_table_header *head, struct ctl_table *table,
                unsigned long *pos, struct file *file,
                struct dir_context *ctx)
 {
index cb761f01030028db501075eeadd71da259697b15..15f327bed8c6fea9ccdba944b3540524b6c6d065 100644 (file)
@@ -18,7 +18,7 @@
 /*
  * The /proc/tty directory inodes...
  */
-static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver;
+static struct proc_dir_entry *proc_tty_driver;
 
 /*
  * This is the handler for /proc/tty/drivers
@@ -176,7 +176,7 @@ void __init proc_tty_init(void)
 {
        if (!proc_mkdir("tty", NULL))
                return;
-       proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
+       proc_mkdir("tty/ldisc", NULL);  /* Preserved: it's userspace visible */
        /*
         * /proc/tty/driver/serial reveals the exact character counts for
         * serial links which is just too easy to abuse for inferring
index 5dbadecb234da01be9cc7eb5705e71363da7dfea..574bafc41f0b7c1f77160971ebb9ca6d5a8121e6 100644 (file)
@@ -199,10 +199,10 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
 
 static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
 {
-       if (!proc_lookup(dir, dentry, flags))
+       if (!proc_pid_lookup(dir, dentry, flags))
                return NULL;
        
-       return proc_pid_lookup(dir, dentry, flags);
+       return proc_lookup(dir, dentry, flags);
 }
 
 static int proc_root_readdir(struct file *file, struct dir_context *ctx)
index 382aa890e228cdaeda6a9038fad01c9474d20eda..a90d6d3541992552d30d2b4a2bac062d352f576d 100644 (file)
@@ -328,6 +328,82 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
  * virtually contiguous user-space in ELF layout.
  */
 #ifdef CONFIG_MMU
+/*
+ * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
+ * reported as not being ram with the zero page.
+ *
+ * @vma: vm_area_struct describing requested mapping
+ * @from: start remapping from
+ * @pfn: page frame number to start remapping to
+ * @size: remapping size
+ * @prot: protection bits
+ *
+ * Returns zero on success, -EAGAIN on failure.
+ */
+static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
+                                   unsigned long from, unsigned long pfn,
+                                   unsigned long size, pgprot_t prot)
+{
+       unsigned long map_size;
+       unsigned long pos_start, pos_end, pos;
+       unsigned long zeropage_pfn = my_zero_pfn(0);
+       size_t len = 0;
+
+       pos_start = pfn;
+       pos_end = pfn + (size >> PAGE_SHIFT);
+
+       for (pos = pos_start; pos < pos_end; ++pos) {
+               if (!pfn_is_ram(pos)) {
+                       /*
+                        * We hit a page which is not ram. Remap the continuous
+                        * region between pos_start and pos-1 and replace
+                        * the non-ram page at pos with the zero page.
+                        */
+                       if (pos > pos_start) {
+                               /* Remap continuous region */
+                               map_size = (pos - pos_start) << PAGE_SHIFT;
+                               if (remap_oldmem_pfn_range(vma, from + len,
+                                                          pos_start, map_size,
+                                                          prot))
+                                       goto fail;
+                               len += map_size;
+                       }
+                       /* Remap the zero page */
+                       if (remap_oldmem_pfn_range(vma, from + len,
+                                                  zeropage_pfn,
+                                                  PAGE_SIZE, prot))
+                               goto fail;
+                       len += PAGE_SIZE;
+                       pos_start = pos + 1;
+               }
+       }
+       if (pos > pos_start) {
+               /* Remap the rest */
+               map_size = (pos - pos_start) << PAGE_SHIFT;
+               if (remap_oldmem_pfn_range(vma, from + len, pos_start,
+                                          map_size, prot))
+                       goto fail;
+       }
+       return 0;
+fail:
+       do_munmap(vma->vm_mm, from, len);
+       return -EAGAIN;
+}
+
+static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
+                           unsigned long from, unsigned long pfn,
+                           unsigned long size, pgprot_t prot)
+{
+       /*
+        * Check if oldmem_pfn_is_ram was registered to avoid
+        * looping over all pages without a reason.
+        */
+       if (oldmem_pfn_is_ram)
+               return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
+       else
+               return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
+}
+
 static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 {
        size_t size = vma->vm_end - vma->vm_start;
@@ -387,9 +463,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 
                        tsz = min_t(size_t, m->offset + m->size - start, size);
                        paddr = m->paddr + start - m->offset;
-                       if (remap_oldmem_pfn_range(vma, vma->vm_start + len,
-                                                  paddr >> PAGE_SHIFT, tsz,
-                                                  vma->vm_page_prot))
+                       if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
+                                                   paddr >> PAGE_SHIFT, tsz,
+                                                   vma->vm_page_prot))
                                goto fail;
                        size -= tsz;
                        start += tsz;
index 34a1e5aa848ce601f4d76911d0618f8ac6469cc3..9d7b9a83699e42c23993115d01e8ace172d46df2 100644 (file)
@@ -394,7 +394,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
 
        prot = pgprot_noncached(PAGE_KERNEL);
 
-       pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
+       pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
        if (!pages) {
                pr_err("%s: Failed to allocate array for %u pages\n",
                       __func__, page_count);
index 9dd06199afc952e6ab494a5d15797b6de1f2bc09..5e6bae6fae50bea6cd5462951c39d853fb2b682b 100644 (file)
@@ -5,3 +5,4 @@
 obj-$(CONFIG_QNX6FS_FS) += qnx6.o
 
 qnx6-objs := inode.o dir.o namei.o super_mmi.o
+ccflags-$(CONFIG_QNX6FS_DEBUG) += -DDEBUG
index 15b7d92ed60d681a32b848a33a5c5c698d9a9fc5..8d64bb5366bf0721bcde9416d5ff3d356c8a0d9b 100644 (file)
@@ -77,21 +77,20 @@ static int qnx6_dir_longfilename(struct inode *inode,
        if (de->de_size != 0xff) {
                /* error - long filename entries always have size 0xff
                   in direntry */
-               printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
-                               de->de_size);
+               pr_err("invalid direntry size (%i).\n", de->de_size);
                return 0;
        }
        lf = qnx6_longname(s, de, &page);
        if (IS_ERR(lf)) {
-               printk(KERN_ERR "qnx6:Error reading longname\n");
+               pr_err("Error reading longname\n");
                return 0;
        }
 
        lf_size = fs16_to_cpu(sbi, lf->lf_size);
 
        if (lf_size > QNX6_LONG_NAME_MAX) {
-               QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
-               printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
+               pr_debug("file %s\n", lf->lf_fname);
+               pr_err("Filename too long (%i)\n", lf_size);
                qnx6_put_page(page);
                return 0;
        }
@@ -100,10 +99,10 @@ static int qnx6_dir_longfilename(struct inode *inode,
           mmi 3g filesystem does not have that checksum */
        if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
                        qnx6_lfile_checksum(lf->lf_fname, lf_size))
-               printk(KERN_INFO "qnx6: long filename checksum error.\n");
+               pr_info("long filename checksum error.\n");
 
-       QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
-                                       lf_size, lf->lf_fname, de_inode));
+       pr_debug("qnx6_readdir:%.*s inode:%u\n",
+                lf_size, lf->lf_fname, de_inode);
        if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) {
                qnx6_put_page(page);
                return 0;
@@ -136,7 +135,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
                int i = start;
 
                if (IS_ERR(page)) {
-                       printk(KERN_ERR "qnx6_readdir: read failed\n");
+                       pr_err("%s(): read failed\n", __func__);
                        ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
                        return PTR_ERR(page);
                }
@@ -159,9 +158,9 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
                                        break;
                                }
                        } else {
-                               QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
-                                  " inode:%u\n", size, de->de_fname,
-                                                       no_inode));
+                               pr_debug("%s():%.*s inode:%u\n",
+                                        __func__, size, de->de_fname,
+                                        no_inode);
                                if (!dir_emit(ctx, de->de_fname, size,
                                      no_inode, DT_UNKNOWN)) {
                                        done = true;
@@ -259,8 +258,7 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
                                        if (ino)
                                                goto found;
                                } else
-                                       printk(KERN_ERR "qnx6: undefined "
-                                               "filename size in inode.\n");
+                                       pr_err("undefined filename size in inode.\n");
                        }
                        qnx6_put_page(page);
                }
index 65cdaab3ed49d554e4d618202e4d41d11718c714..44e73923670d59a0c7663804d8710fb3262c6106 100644 (file)
@@ -73,8 +73,8 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
 {
        unsigned phys;
 
-       QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
-                       inode->i_ino, (unsigned long)iblock));
+       pr_debug("qnx6_get_block inode=[%ld] iblock=[%ld]\n",
+                inode->i_ino, (unsigned long)iblock);
 
        phys = qnx6_block_map(inode, iblock);
        if (phys) {
@@ -87,7 +87,7 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
 static int qnx6_check_blockptr(__fs32 ptr)
 {
        if (ptr == ~(__fs32)0) {
-               printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
+               pr_err("hit unused blockpointer.\n");
                return 0;
        }
        return 1;
@@ -127,8 +127,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
        levelptr = no >> bitdelta;
 
        if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
-               printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
-                               no);
+               pr_err("Requested file block number (%u) too big.", no);
                return 0;
        }
 
@@ -137,8 +136,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
        for (i = 0; i < depth; i++) {
                bh = sb_bread(s, block);
                if (!bh) {
-                       printk(KERN_ERR "qnx6:Error reading block (%u)\n",
-                                       block);
+                       pr_err("Error reading block (%u)\n", block);
                        return 0;
                }
                bitdelta -= ptrbits;
@@ -207,26 +205,16 @@ void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
 {
        struct qnx6_sb_info *sbi = QNX6_SB(s);
 
-       QNX6DEBUG((KERN_INFO "magic: %08x\n",
-                               fs32_to_cpu(sbi, sb->sb_magic)));
-       QNX6DEBUG((KERN_INFO "checksum: %08x\n",
-                               fs32_to_cpu(sbi, sb->sb_checksum)));
-       QNX6DEBUG((KERN_INFO "serial: %llx\n",
-                               fs64_to_cpu(sbi, sb->sb_serial)));
-       QNX6DEBUG((KERN_INFO "flags: %08x\n",
-                               fs32_to_cpu(sbi, sb->sb_flags)));
-       QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
-                               fs32_to_cpu(sbi, sb->sb_blocksize)));
-       QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
-                               fs32_to_cpu(sbi, sb->sb_num_inodes)));
-       QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
-                               fs32_to_cpu(sbi, sb->sb_free_inodes)));
-       QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
-                               fs32_to_cpu(sbi, sb->sb_num_blocks)));
-       QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
-                               fs32_to_cpu(sbi, sb->sb_free_blocks)));
-       QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
-                               sb->Inode.levels));
+       pr_debug("magic: %08x\n", fs32_to_cpu(sbi, sb->sb_magic));
+       pr_debug("checksum: %08x\n", fs32_to_cpu(sbi, sb->sb_checksum));
+       pr_debug("serial: %llx\n", fs64_to_cpu(sbi, sb->sb_serial));
+       pr_debug("flags: %08x\n", fs32_to_cpu(sbi, sb->sb_flags));
+       pr_debug("blocksize: %08x\n", fs32_to_cpu(sbi, sb->sb_blocksize));
+       pr_debug("num_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_num_inodes));
+       pr_debug("free_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_free_inodes));
+       pr_debug("num_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_num_blocks));
+       pr_debug("free_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_free_blocks));
+       pr_debug("inode_levels: %02x\n", sb->Inode.levels);
 }
 #endif
 
@@ -277,7 +265,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
           start with the first superblock */
        bh = sb_bread(s, offset);
        if (!bh) {
-               printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+               pr_err("unable to read the first superblock\n");
                return NULL;
        }
        sb = (struct qnx6_super_block *)bh->b_data;
@@ -285,20 +273,16 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
                sbi->s_bytesex = BYTESEX_BE;
                if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
                        /* we got a big endian fs */
-                       QNX6DEBUG((KERN_INFO "qnx6: fs got different"
-                                       " endianness.\n"));
+                       pr_debug("fs got different endianness.\n");
                        return bh;
                } else
                        sbi->s_bytesex = BYTESEX_LE;
                if (!silent) {
                        if (offset == 0) {
-                               printk(KERN_ERR "qnx6: wrong signature (magic)"
-                                       " in superblock #1.\n");
+                               pr_err("wrong signature (magic) in superblock #1.\n");
                        } else {
-                               printk(KERN_INFO "qnx6: wrong signature (magic)"
-                                       " at position (0x%lx) - will try"
-                                       " alternative position (0x0000).\n",
-                                               offset * s->s_blocksize);
+                               pr_info("wrong signature (magic) at position (0x%lx) - will try alternative position (0x0000).\n",
+                                       offset * s->s_blocksize);
                        }
                }
                brelse(bh);
@@ -329,13 +313,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
 
        /* Superblock always is 512 Byte long */
        if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
-               printk(KERN_ERR "qnx6: unable to set blocksize\n");
+               pr_err("unable to set blocksize\n");
                goto outnobh;
        }
 
        /* parse the mount-options */
        if (!qnx6_parse_options((char *) data, s)) {
-               printk(KERN_ERR "qnx6: invalid mount options.\n");
+               pr_err("invalid mount options.\n");
                goto outnobh;
        }
        if (test_opt(s, MMI_FS)) {
@@ -355,7 +339,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
                /* try again without bootblock offset */
                bh1 = qnx6_check_first_superblock(s, 0, silent);
                if (!bh1) {
-                       printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+                       pr_err("unable to read the first superblock\n");
                        goto outnobh;
                }
                /* seems that no bootblock at partition start */
@@ -370,13 +354,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
        /* checksum check - start at byte 8 and end at byte 512 */
        if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
                        crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
-               printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+               pr_err("superblock #1 checksum error\n");
                goto out;
        }
 
        /* set new blocksize */
        if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
-               printk(KERN_ERR "qnx6: unable to set blocksize\n");
+               pr_err("unable to set blocksize\n");
                goto out;
        }
        /* blocksize invalidates bh - pull it back in */
@@ -398,21 +382,20 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
        /* next the second superblock */
        bh2 = sb_bread(s, offset);
        if (!bh2) {
-               printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+               pr_err("unable to read the second superblock\n");
                goto out;
        }
        sb2 = (struct qnx6_super_block *)bh2->b_data;
        if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
                if (!silent)
-                       printk(KERN_ERR "qnx6: wrong signature (magic)"
-                                       " in superblock #2.\n");
+                       pr_err("wrong signature (magic) in superblock #2.\n");
                goto out;
        }
 
        /* checksum check - start at byte 8 and end at byte 512 */
        if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
                                crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
-               printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
+               pr_err("superblock #2 checksum error\n");
                goto out;
        }
 
@@ -422,25 +405,24 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
                sbi->sb_buf = bh1;
                sbi->sb = (struct qnx6_super_block *)bh1->b_data;
                brelse(bh2);
-               printk(KERN_INFO "qnx6: superblock #1 active\n");
+               pr_info("superblock #1 active\n");
        } else {
                /* superblock #2 active */
                sbi->sb_buf = bh2;
                sbi->sb = (struct qnx6_super_block *)bh2->b_data;
                brelse(bh1);
-               printk(KERN_INFO "qnx6: superblock #2 active\n");
+               pr_info("superblock #2 active\n");
        }
 mmi_success:
        /* sanity check - limit maximum indirect pointer levels */
        if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
-               printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
-                       QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
+               pr_err("too many inode levels (max %i, sb %i)\n",
+                      QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
                goto out;
        }
        if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
-               printk(KERN_ERR "qnx6: too many longfilename levels"
-                               " (max %i, sb %i)\n",
-                       QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
+               pr_err("too many longfilename levels (max %i, sb %i)\n",
+                      QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
                goto out;
        }
        s->s_op = &qnx6_sops;
@@ -460,7 +442,7 @@ mmi_success:
        /* prefetch root inode */
        root = qnx6_iget(s, QNX6_ROOT_INO);
        if (IS_ERR(root)) {
-               printk(KERN_ERR "qnx6: get inode failed\n");
+               pr_err("get inode failed\n");
                ret = PTR_ERR(root);
                goto out2;
        }
@@ -474,7 +456,7 @@ mmi_success:
        errmsg = qnx6_checkroot(s);
        if (errmsg != NULL) {
                if (!silent)
-                       printk(KERN_ERR "qnx6: %s\n", errmsg);
+                       pr_err("%s\n", errmsg);
                goto out3;
        }
        return 0;
@@ -555,8 +537,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
        inode->i_mode = 0;
 
        if (ino == 0) {
-               printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
-                               "out of range\n",
+               pr_err("bad inode number on dev %s: %u is out of range\n",
                       sb->s_id, ino);
                iget_failed(inode);
                return ERR_PTR(-EIO);
@@ -566,8 +547,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
        mapping = sbi->inodes->i_mapping;
        page = read_mapping_page(mapping, n, NULL);
        if (IS_ERR(page)) {
-               printk(KERN_ERR "qnx6: major problem: unable to read inode from "
-                      "dev %s\n", sb->s_id);
+               pr_err("major problem: unable to read inode from dev %s\n",
+                      sb->s_id);
                iget_failed(inode);
                return ERR_CAST(page);
        }
@@ -689,7 +670,7 @@ static int __init init_qnx6_fs(void)
                return err;
        }
 
-       printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
+       pr_info("QNX6 filesystem 1.0.0 registered.\n");
        return 0;
 }
 
index 0561326a94f5d48551d68be18dccbb8ffe59fd38..6c1a323137dda33fb798c10ca3ef42ffee2460c3 100644 (file)
@@ -29,12 +29,12 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
                foundinode = qnx6_iget(dir->i_sb, ino);
                qnx6_put_page(page);
                if (IS_ERR(foundinode)) {
-                       QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
-                               " error %ld\n", PTR_ERR(foundinode)));
+                       pr_debug("lookup->iget ->  error %ld\n",
+                                PTR_ERR(foundinode));
                        return ERR_CAST(foundinode);
                }
        } else {
-               QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
+               pr_debug("%s(): not found %s\n", __func__, name);
                return NULL;
        }
        d_add(dentry, foundinode);
index b00fcc960d374f28154b3efa9c7d9f8a4e5e7c88..d3fb2b698800298184e72db2489f491d91948ba9 100644 (file)
  *
  */
 
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 
@@ -19,12 +25,6 @@ typedef __u64 __bitwise __fs64;
 
 #include <linux/qnx6_fs.h>
 
-#ifdef CONFIG_QNX6FS_DEBUG
-#define QNX6DEBUG(X) printk X
-#else
-#define QNX6DEBUG(X) (void) 0
-#endif
-
 struct qnx6_sb_info {
        struct buffer_head      *sb_buf;        /* superblock buffer */
        struct qnx6_super_block *sb;            /* our superblock */
index 29c32cba62d6664b2120388e7858bfb57292fcfd..62aaf3e3126a6c18bb45abf5c48040393223b178 100644 (file)
@@ -44,15 +44,14 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
           start with the first superblock */
        bh1 = sb_bread(s, 0);
        if (!bh1) {
-               printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
+               pr_err("Unable to read first mmi superblock\n");
                return NULL;
        }
        sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
        sbi = QNX6_SB(s);
        if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
                if (!silent) {
-                       printk(KERN_ERR "qnx6: wrong signature (magic) in"
-                                       " superblock #1.\n");
+                       pr_err("wrong signature (magic) in superblock #1.\n");
                        goto out;
                }
        }
@@ -60,7 +59,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
        /* checksum check - start at byte 8 and end at byte 512 */
        if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
                                crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
-               printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+               pr_err("superblock #1 checksum error\n");
                goto out;
        }
 
@@ -70,7 +69,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
 
        /* set new blocksize */
        if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
-               printk(KERN_ERR "qnx6: unable to set blocksize\n");
+               pr_err("unable to set blocksize\n");
                goto out;
        }
        /* blocksize invalidates bh - pull it back in */
@@ -83,27 +82,26 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
        /* read second superblock */
        bh2 = sb_bread(s, offset);
        if (!bh2) {
-               printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+               pr_err("unable to read the second superblock\n");
                goto out;
        }
        sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
        if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
                if (!silent)
-                       printk(KERN_ERR "qnx6: wrong signature (magic) in"
-                                       " superblock #2.\n");
+                       pr_err("wrong signature (magic) in superblock #2.\n");
                goto out;
        }
 
        /* checksum check - start at byte 8 and end at byte 512 */
        if (fs32_to_cpu(sbi, sb2->sb_checksum)
                        != crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
-               printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+               pr_err("superblock #1 checksum error\n");
                goto out;
        }
 
        qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
        if (!qsb) {
-               printk(KERN_ERR "qnx6: unable to allocate memory.\n");
+               pr_err("unable to allocate memory.\n");
                goto out;
        }
 
@@ -119,7 +117,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
                sbi->sb_buf = bh1;
                sbi->sb = (struct qnx6_super_block *)bh1->b_data;
                brelse(bh2);
-               printk(KERN_INFO "qnx6: superblock #1 active\n");
+               pr_info("superblock #1 active\n");
        } else {
                /* superblock #2 active */
                qnx6_mmi_copy_sb(qsb, sb2);
@@ -131,7 +129,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
                sbi->sb_buf = bh2;
                sbi->sb = (struct qnx6_super_block *)bh2->b_data;
                brelse(bh1);
-               printk(KERN_INFO "qnx6: superblock #2 active\n");
+               pr_info("superblock #2 active\n");
        }
        kfree(qsb);
 
index dda012ad4208d3192521c80a082af5ff038f5b3b..bbafbde3471a65b0f0f509b0ba5ccc48dd1d2718 100644 (file)
@@ -222,7 +222,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
 
        /* gang-find the pages */
        ret = -ENOMEM;
-       pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
+       pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
        if (!pages)
                goto out_free;
 
index d9f5a60dd59b4e3a393498368be7d16fd5ab14fd..0a7dc941aaf4dc7190b4ac3864765f6688b11683 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/stat.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern const struct reiserfs_key MIN_KEY;
 
index 54fdf196bfb29c736c0ecf89490a1688a5edaafe..5739cb99de7bdf7a1e3edf39a9e69d81016daa60 100644 (file)
@@ -10,7 +10,7 @@
  * and using buffers obtained after all above.
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/time.h>
 #include "reiserfs.h"
 #include <linux/buffer_head.h>
index db9e80ba53a0db5abe4910fa128bab1e6a2ee6ad..751dd3f4346b5dab98d7b4861cd24a617c148bae 100644 (file)
@@ -6,7 +6,7 @@
 #include "reiserfs.h"
 #include "acl.h"
 #include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
index 73231b1ebdbe0d2121f586c7d0500bb6393cd82c..b751eea32e20733f496a16a52c37cd0f8c2d06a4 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/string.h>
 #include <linux/time.h>
 #include "reiserfs.h"
index 63b2b0ec49e6afacd955abf9f172751768ee08ee..a7eec9888f10f1d2e3f6b0f92bbddba8aaea21cc 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
index 501ed6811a2bd2cc283d75b3cec03e31502d9ad0..6ec8a30a0911b953e0c79ddcbd1458c8fc13142a 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/mount.h>
 #include "reiserfs.h"
 #include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/pagemap.h>
 #include <linux/compat.h>
 
index cfaee912ee0935a66202dafc713dfc0d04e49365..aca73dd739066477a8f4eb6293122cfe84f3f055 100644 (file)
@@ -54,7 +54,7 @@ static void sd_print_item(struct item_head *ih, char *item)
        } else {
                struct stat_data *sd = (struct stat_data *)item;
 
-               printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd),
+               printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
                       (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
                       sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
        }
@@ -408,7 +408,7 @@ static void direntry_print_item(struct item_head *ih, char *item)
                        namebuf[namelen + 2] = 0;
                }
 
-               printk("%d:  %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
+               printk("%d:  %-15s%-15d%-15d%-15lld%-15lld(%s)\n",
                       i, namebuf,
                       deh_dir_id(deh), deh_objectid(deh),
                       GET_HASH_VALUE(deh_offset(deh)),
index d6744c8b24e12f6b2108109f05b31c0f88b5a2b7..814dda3ec998d770c9a0e50420599002b9353cc0 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/string.h>
 #include <linux/time.h>
 #include "reiserfs.h"
index c9b47e91baf8891406098ceb9b6692ba7cee101a..ae1dc841db3af85bfef8391449cbea7ef2a13428 100644 (file)
@@ -17,7 +17,7 @@ static char off_buf[80];
 static char *reiserfs_cpu_offset(struct cpu_key *key)
 {
        if (cpu_key_k_type(key) == TYPE_DIRENTRY)
-               sprintf(off_buf, "%Lu(%Lu)",
+               sprintf(off_buf, "%llu(%llu)",
                        (unsigned long long)
                        GET_HASH_VALUE(cpu_key_k_offset(key)),
                        (unsigned long long)
@@ -34,7 +34,7 @@ static char *le_offset(struct reiserfs_key *key)
 
        version = le_key_version(key);
        if (le_key_k_type(version, key) == TYPE_DIRENTRY)
-               sprintf(off_buf, "%Lu(%Lu)",
+               sprintf(off_buf, "%llu(%llu)",
                        (unsigned long long)
                        GET_HASH_VALUE(le_key_k_offset(version, key)),
                        (unsigned long long)
index 02b0b7d0f7d532e0ed0238161118ae627f5d9b7c..621b9f381fe1faed2925d1117e13ee4452129730 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/time.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "reiserfs.h"
 #include <linux/init.h>
 #include <linux/proc_fs.h>
index dd44468edc2b43927a9f04aa2fc050bb38b9454a..24cbe013240fa6559910942173bf7e89768ec49d 100644 (file)
@@ -2006,7 +2006,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
                                            &s_search_path) == POSITION_FOUND);
 
        RFALSE(file_size > ROUND_UP(new_file_size),
-              "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
+              "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
               new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
 
 update_and_out:
index a392cef6acc61e3d237bf8353a302bf4fdf46b67..709ea92d716f03ca78b7e45b6f2677ac50b3ba1d 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "reiserfs.h"
 #include "acl.h"
 #include "xattr.h"
@@ -331,7 +331,7 @@ static int finish_unfinished(struct super_block *s)
                         * not completed truncate found. New size was
                         * committed together with "save" link
                         */
-                       reiserfs_info(s, "Truncating %k to %Ld ..",
+                       reiserfs_info(s, "Truncating %k to %lld ..",
                                      INODE_PKEY(inode), inode->i_size);
 
                        /* don't update modification time */
@@ -1577,7 +1577,7 @@ static int read_super_block(struct super_block *s, int offset)
        rs = (struct reiserfs_super_block *)bh->b_data;
        if (sb_blocksize(rs) != s->s_blocksize) {
                reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
-                                "filesystem on (dev %s, block %Lu, size %lu)",
+                                "filesystem on (dev %s, block %llu, size %lu)",
                                 s->s_id,
                                 (unsigned long long)bh->b_blocknr,
                                 s->s_blocksize);
@@ -2441,8 +2441,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
        struct buffer_head tmp_bh, *bh;
 
        if (!current->journal_info) {
-               printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)"
-                       " cancelled because transaction is not started.\n",
+               printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n",
                        (unsigned long long)off, (unsigned long long)len);
                return -EIO;
        }
index ca416d099e7d5d25dc8a2061aa30a856b26a9ef4..7c36898af40266995be056b1a6b1b168be831e4b 100644 (file)
@@ -45,7 +45,7 @@
 #include <linux/xattr.h>
 #include "xattr.h"
 #include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <net/checksum.h>
 #include <linux/stat.h>
 #include <linux/quotaops.h>
@@ -84,6 +84,7 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 {
        int error;
+
        BUG_ON(!mutex_is_locked(&dir->i_mutex));
 
        mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -98,6 +99,7 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
 {
        int error;
+
        BUG_ON(!mutex_is_locked(&dir->i_mutex));
 
        mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -117,6 +119,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
 {
        struct dentry *privroot = REISERFS_SB(sb)->priv_root;
        struct dentry *xaroot;
+
        if (!privroot->d_inode)
                return ERR_PTR(-ENODATA);
 
@@ -127,6 +130,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
                xaroot = ERR_PTR(-ENODATA);
        else if (!xaroot->d_inode) {
                int err = -ENODATA;
+
                if (xattr_may_create(flags))
                        err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
                if (err) {
@@ -157,6 +161,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
        xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
        if (!IS_ERR(xadir) && !xadir->d_inode) {
                int err = -ENODATA;
+
                if (xattr_may_create(flags))
                        err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
                if (err) {
@@ -188,6 +193,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
 {
        struct reiserfs_dentry_buf *dbuf = buf;
        struct dentry *dentry;
+
        WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
 
        if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
@@ -218,6 +224,7 @@ static void
 cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
 {
        int i;
+
        for (i = 0; i < buf->count; i++)
                if (buf->dentries[i])
                        dput(buf->dentries[i]);
@@ -283,11 +290,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
                             4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
                struct reiserfs_transaction_handle th;
+
                reiserfs_write_lock(inode->i_sb);
                err = journal_begin(&th, inode->i_sb, blocks);
                reiserfs_write_unlock(inode->i_sb);
                if (!err) {
                        int jerror;
+
                        mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
                                          I_MUTEX_XATTR);
                        err = action(dir, data);
@@ -340,6 +349,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
 int reiserfs_delete_xattrs(struct inode *inode)
 {
        int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
+
        if (err)
                reiserfs_warning(inode->i_sb, "jdm-20004",
                                 "Couldn't delete all xattrs (%d)\n", err);
@@ -350,6 +360,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
 int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
 {
        int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
+
        if (err)
                reiserfs_warning(inode->i_sb, "jdm-20007",
                                 "Couldn't chown all xattrs (%d)\n", err);
@@ -439,6 +450,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 static void update_ctime(struct inode *inode)
 {
        struct timespec now = current_fs_time(inode->i_sb);
+
        if (inode_unhashed(inode) || !inode->i_nlink ||
            timespec_equal(&inode->i_ctime, &now))
                return;
@@ -514,6 +526,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
                size_t chunk;
                size_t skip = 0;
                size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+
                if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
                        chunk = PAGE_CACHE_SIZE;
                else
@@ -530,6 +543,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 
                if (file_pos == 0) {
                        struct reiserfs_xattr_header *rxh;
+
                        skip = file_pos = sizeof(struct reiserfs_xattr_header);
                        if (chunk + skip > PAGE_CACHE_SIZE)
                                chunk = PAGE_CACHE_SIZE - skip;
@@ -659,6 +673,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
                size_t chunk;
                char *data;
                size_t skip = 0;
+
                if (isize - file_pos > PAGE_CACHE_SIZE)
                        chunk = PAGE_CACHE_SIZE;
                else
@@ -792,6 +807,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 int reiserfs_removexattr(struct dentry *dentry, const char *name)
 {
        const struct xattr_handler *handler;
+
        handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
        if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
@@ -813,9 +829,11 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
 {
        struct listxattr_buf *b = (struct listxattr_buf *)buf;
        size_t size;
+
        if (name[0] != '.' ||
            (namelen != 1 && (name[1] != '.' || namelen != 2))) {
                const struct xattr_handler *handler;
+
                handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
                                                    name);
                if (!handler)   /* Unsupported xattr name */
@@ -885,6 +903,7 @@ static int create_privroot(struct dentry *dentry)
 {
        int err;
        struct inode *inode = dentry->d_parent->d_inode;
+
        WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
 
        err = xattr_mkdir(inode, dentry, 0700);
@@ -1015,6 +1034,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                mutex_lock(&privroot->d_inode->i_mutex);
                if (!REISERFS_SB(s)->xattr_root) {
                        struct dentry *dentry;
+
                        dentry = lookup_one_len(XAROOT_NAME, privroot,
                                                strlen(XAROOT_NAME));
                        if (!IS_ERR(dentry))
index 44503e29379030c891840f7df09a6a6fd76a98a5..4b34b9dc03dda9fffd8da5d3ab7221bd9ab139b7 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/posix_acl_xattr.h>
 #include "xattr.h"
 #include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
                            struct inode *inode, int type,
index 800a3cef6f62726294a9f70a02eeab111af361b2..e7f8939a4cb5786cc1c8df46996c8926afcc066a 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/slab.h>
 #include "xattr.h"
 #include <linux/security.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int
 security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
index a0035719f66baac61a6b1f7048bcd1f2d793ab30..5eeb0c48ba46deb225ec2cc94518746aebceabd3 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
 #include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int
 trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
index 8667491ae7c3ef864a1be3270325c19d6189b80a..e50eab0464711f6ebe363ca9f2bd7ff7767aab12 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
 #include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static int
 user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
index ef90e8bca95ac77e6808a300eb053d36911db523..e98dd88197d50241eec236575573bc5cab924aa4 100644 (file)
@@ -56,6 +56,8 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/fs.h>
@@ -380,7 +382,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
 eio:
        ret = -EIO;
 error:
-       printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos);
+       pr_err("read error for inode 0x%lx\n", pos);
        return ERR_PTR(ret);
 }
 
@@ -390,6 +392,7 @@ error:
 static struct inode *romfs_alloc_inode(struct super_block *sb)
 {
        struct romfs_inode_info *inode;
+
        inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
        return inode ? &inode->vfs_inode : NULL;
 }
@@ -400,6 +403,7 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
 static void romfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
+
        kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
 
@@ -507,15 +511,13 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
        if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 ||
            img_size < ROMFH_SIZE) {
                if (!silent)
-                       printk(KERN_WARNING "VFS:"
-                              " Can't find a romfs filesystem on dev %s.\n",
+                       pr_warn("VFS: Can't find a romfs filesystem on dev %s.\n",
                               sb->s_id);
                goto error_rsb_inval;
        }
 
        if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) {
-               printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n",
-                      sb->s_id);
+               pr_err("bad initial checksum on dev %s.\n", sb->s_id);
                goto error_rsb_inval;
        }
 
@@ -523,8 +525,8 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
 
        len = strnlen(rsb->name, ROMFS_MAXFN);
        if (!silent)
-               printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n",
-                      (unsigned) len, (unsigned) len, rsb->name, storage);
+               pr_notice("Mounting image '%*.*s' through %s\n",
+                         (unsigned) len, (unsigned) len, rsb->name, storage);
 
        kfree(rsb);
        rsb = NULL;
@@ -614,7 +616,7 @@ static int __init init_romfs_fs(void)
 {
        int ret;
 
-       printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n");
+       pr_info("ROMFS MTD (C) 2007 Red Hat, Inc.\n");
 
        romfs_inode_cachep =
                kmem_cache_create("romfs_i",
@@ -623,13 +625,12 @@ static int __init init_romfs_fs(void)
                                  romfs_i_init_once);
 
        if (!romfs_inode_cachep) {
-               printk(KERN_ERR
-                      "ROMFS error: Failed to initialise inode cache\n");
+               pr_err("Failed to initialise inode cache\n");
                return -ENOMEM;
        }
        ret = register_filesystem(&romfs_fs_type);
        if (ret) {
-               printk(KERN_ERR "ROMFS error: Failed to register filesystem\n");
+               pr_err("Failed to register filesystem\n");
                goto error_register;
        }
        return 0;
index dd39980437fce587ed86424467c0803e67059cea..4d0e02b022b302f0ad8233706fa743d1ee510dba 100644 (file)
@@ -6,3 +6,4 @@ obj-$(CONFIG_UFS_FS) += ufs.o
 
 ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
            namei.o super.o symlink.o truncate.o util.o
+ccflags-$(CONFIG_UFS_DEBUG)    += -DDEBUG
index 61e8a9b021ddffdfddce21905907b0fedf415b79..7c580c97990ee05dee7e5ad42a9c5754bfe4384d 100644 (file)
@@ -158,16 +158,16 @@ out:
 
 /**
  * ufs_inode_getfrag() - allocate new fragment(s)
- * @inode - pointer to inode
- * @fragment - number of `fragment' which hold pointer
+ * @inode: pointer to inode
+ * @fragment: number of `fragment' which hold pointer
  *   to new allocated fragment(s)
- * @new_fragment - number of new allocated fragment(s)
- * @required - how many fragment(s) we require
- * @err - we set it if something wrong
- * @phys - pointer to where we save physical number of new allocated fragments,
+ * @new_fragment: number of new allocated fragment(s)
+ * @required: how many fragment(s) we require
+ * @err: we set it if something wrong
+ * @phys: pointer to where we save physical number of new allocated fragments,
  *   NULL if we allocate not data(indirect blocks for example).
- * @new - we set it if we allocate new block
- * @locked_page - for ufs_new_fragments()
+ * @new: we set it if we allocate new block
+ * @locked_page: for ufs_new_fragments()
  */
 static struct buffer_head *
 ufs_inode_getfrag(struct inode *inode, u64 fragment,
@@ -315,16 +315,16 @@ repeat2:
 
 /**
  * ufs_inode_getblock() - allocate new block
- * @inode - pointer to inode
- * @bh - pointer to block which hold "pointer" to new allocated block
- * @fragment - number of `fragment' which hold pointer
+ * @inode: pointer to inode
+ * @bh: pointer to block which hold "pointer" to new allocated block
+ * @fragment: number of `fragment' which hold pointer
  *   to new allocated block
- * @new_fragment - number of new allocated fragment
+ * @new_fragment: number of new allocated fragment
  *  (block will hold this fragment and also uspi->s_fpb-1)
- * @err - see ufs_inode_getfrag()
- * @phys - see ufs_inode_getfrag()
- * @new - see ufs_inode_getfrag()
- * @locked_page - see ufs_inode_getfrag()
+ * @err: see ufs_inode_getfrag()
+ * @phys: see ufs_inode_getfrag()
+ * @new: see ufs_inode_getfrag()
+ * @locked_page: see ufs_inode_getfrag()
  */
 static struct buffer_head *
 ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
index b879f1ba34398c4b46c2f80f3f18b33c7447b4fb..da73801301d588bf14eb4e107e42609bedbfb51b 100644 (file)
@@ -65,7 +65,6 @@
  * Evgeniy Dushistov <dushistov@mail.ru>, 2007
  */
 
-
 #include <linux/exportfs.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
@@ -172,73 +171,73 @@ static void ufs_print_super_stuff(struct super_block *sb,
 {
        u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
 
-       printk("ufs_print_super_stuff\n");
-       printk("  magic:     0x%x\n", magic);
+       pr_debug("ufs_print_super_stuff\n");
+       pr_debug("  magic:     0x%x\n", magic);
        if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
-               printk("  fs_size:   %llu\n", (unsigned long long)
-                      fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
-               printk("  fs_dsize:  %llu\n", (unsigned long long)
-                      fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
-               printk("  bsize:         %u\n",
-                      fs32_to_cpu(sb, usb1->fs_bsize));
-               printk("  fsize:         %u\n",
-                      fs32_to_cpu(sb, usb1->fs_fsize));
-               printk("  fs_volname:  %s\n", usb2->fs_un.fs_u2.fs_volname);
-               printk("  fs_sblockloc: %llu\n", (unsigned long long)
-                      fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
-               printk("  cs_ndir(No of dirs):  %llu\n", (unsigned long long)
-                      fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
-               printk("  cs_nbfree(No of free blocks):  %llu\n",
-                      (unsigned long long)
-                      fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
-               printk(KERN_INFO"  cs_nifree(Num of free inodes): %llu\n",
-                      (unsigned long long)
-                      fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
-               printk(KERN_INFO"  cs_nffree(Num of free frags): %llu\n",
-                      (unsigned long long)
-                      fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
-               printk(KERN_INFO"  fs_maxsymlinklen: %u\n",
-                      fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
+               pr_debug("  fs_size:   %llu\n", (unsigned long long)
+                        fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
+               pr_debug("  fs_dsize:  %llu\n", (unsigned long long)
+                        fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
+               pr_debug("  bsize:         %u\n",
+                        fs32_to_cpu(sb, usb1->fs_bsize));
+               pr_debug("  fsize:         %u\n",
+                        fs32_to_cpu(sb, usb1->fs_fsize));
+               pr_debug("  fs_volname:  %s\n", usb2->fs_un.fs_u2.fs_volname);
+               pr_debug("  fs_sblockloc: %llu\n", (unsigned long long)
+                        fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
+               pr_debug("  cs_ndir(No of dirs):  %llu\n", (unsigned long long)
+                        fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
+               pr_debug("  cs_nbfree(No of free blocks):  %llu\n",
+                        (unsigned long long)
+                        fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
+               pr_info("  cs_nifree(Num of free inodes): %llu\n",
+                       (unsigned long long)
+                       fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
+               pr_info("  cs_nffree(Num of free frags): %llu\n",
+                       (unsigned long long)
+                       fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
+               pr_info("  fs_maxsymlinklen: %u\n",
+                       fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
        } else {
-               printk(" sblkno:      %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
-               printk(" cblkno:      %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
-               printk(" iblkno:      %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
-               printk(" dblkno:      %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
-               printk(" cgoffset:    %u\n",
-                      fs32_to_cpu(sb, usb1->fs_cgoffset));
-               printk(" ~cgmask:     0x%x\n",
-                      ~fs32_to_cpu(sb, usb1->fs_cgmask));
-               printk(" size:        %u\n", fs32_to_cpu(sb, usb1->fs_size));
-               printk(" dsize:       %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
-               printk(" ncg:         %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
-               printk(" bsize:       %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
-               printk(" fsize:       %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
-               printk(" frag:        %u\n", fs32_to_cpu(sb, usb1->fs_frag));
-               printk(" fragshift:   %u\n",
-                      fs32_to_cpu(sb, usb1->fs_fragshift));
-               printk(" ~fmask:      %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
-               printk(" fshift:      %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
-               printk(" sbsize:      %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
-               printk(" spc:         %u\n", fs32_to_cpu(sb, usb1->fs_spc));
-               printk(" cpg:         %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
-               printk(" ipg:         %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
-               printk(" fpg:         %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
-               printk(" csaddr:      %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
-               printk(" cssize:      %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
-               printk(" cgsize:      %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
-               printk(" fstodb:      %u\n",
-                      fs32_to_cpu(sb, usb1->fs_fsbtodb));
-               printk(" nrpos:       %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
-               printk(" ndir         %u\n",
-                      fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
-               printk(" nifree       %u\n",
-                      fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
-               printk(" nbfree       %u\n",
-                      fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
-               printk(" nffree       %u\n",
-                      fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
+               pr_debug(" sblkno:      %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
+               pr_debug(" cblkno:      %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
+               pr_debug(" iblkno:      %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
+               pr_debug(" dblkno:      %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
+               pr_debug(" cgoffset:    %u\n",
+                        fs32_to_cpu(sb, usb1->fs_cgoffset));
+               pr_debug(" ~cgmask:     0x%x\n",
+                        ~fs32_to_cpu(sb, usb1->fs_cgmask));
+               pr_debug(" size:        %u\n", fs32_to_cpu(sb, usb1->fs_size));
+               pr_debug(" dsize:       %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
+               pr_debug(" ncg:         %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
+               pr_debug(" bsize:       %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
+               pr_debug(" fsize:       %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
+               pr_debug(" frag:        %u\n", fs32_to_cpu(sb, usb1->fs_frag));
+               pr_debug(" fragshift:   %u\n",
+                        fs32_to_cpu(sb, usb1->fs_fragshift));
+               pr_debug(" ~fmask:      %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
+               pr_debug(" fshift:      %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
+               pr_debug(" sbsize:      %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
+               pr_debug(" spc:         %u\n", fs32_to_cpu(sb, usb1->fs_spc));
+               pr_debug(" cpg:         %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
+               pr_debug(" ipg:         %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
+               pr_debug(" fpg:         %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
+               pr_debug(" csaddr:      %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
+               pr_debug(" cssize:      %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
+               pr_debug(" cgsize:      %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
+               pr_debug(" fstodb:      %u\n",
+                        fs32_to_cpu(sb, usb1->fs_fsbtodb));
+               pr_debug(" nrpos:       %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
+               pr_debug(" ndir         %u\n",
+                        fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
+               pr_debug(" nifree       %u\n",
+                        fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
+               pr_debug(" nbfree       %u\n",
+                        fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
+               pr_debug(" nffree       %u\n",
+                        fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
        }
-       printk("\n");
+       pr_debug("\n");
 }
 
 /*
@@ -247,38 +246,38 @@ static void ufs_print_super_stuff(struct super_block *sb,
 static void ufs_print_cylinder_stuff(struct super_block *sb,
                                     struct ufs_cylinder_group *cg)
 {
-       printk("\nufs_print_cylinder_stuff\n");
-       printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
-       printk("  magic:        %x\n", fs32_to_cpu(sb, cg->cg_magic));
-       printk("  time:         %u\n", fs32_to_cpu(sb, cg->cg_time));
-       printk("  cgx:          %u\n", fs32_to_cpu(sb, cg->cg_cgx));
-       printk("  ncyl:         %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
-       printk("  niblk:        %u\n", fs16_to_cpu(sb, cg->cg_niblk));
-       printk("  ndblk:        %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
-       printk("  cs_ndir:      %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
-       printk("  cs_nbfree:    %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
-       printk("  cs_nifree:    %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
-       printk("  cs_nffree:    %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
-       printk("  rotor:        %u\n", fs32_to_cpu(sb, cg->cg_rotor));
-       printk("  frotor:       %u\n", fs32_to_cpu(sb, cg->cg_frotor));
-       printk("  irotor:       %u\n", fs32_to_cpu(sb, cg->cg_irotor));
-       printk("  frsum:        %u, %u, %u, %u, %u, %u, %u, %u\n",
+       pr_debug("\nufs_print_cylinder_stuff\n");
+       pr_debug("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
+       pr_debug("  magic:        %x\n", fs32_to_cpu(sb, cg->cg_magic));
+       pr_debug("  time:         %u\n", fs32_to_cpu(sb, cg->cg_time));
+       pr_debug("  cgx:          %u\n", fs32_to_cpu(sb, cg->cg_cgx));
+       pr_debug("  ncyl:         %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
+       pr_debug("  niblk:        %u\n", fs16_to_cpu(sb, cg->cg_niblk));
+       pr_debug("  ndblk:        %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
+       pr_debug("  cs_ndir:      %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
+       pr_debug("  cs_nbfree:    %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
+       pr_debug("  cs_nifree:    %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
+       pr_debug("  cs_nffree:    %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
+       pr_debug("  rotor:        %u\n", fs32_to_cpu(sb, cg->cg_rotor));
+       pr_debug("  frotor:       %u\n", fs32_to_cpu(sb, cg->cg_frotor));
+       pr_debug("  irotor:       %u\n", fs32_to_cpu(sb, cg->cg_irotor));
+       pr_debug("  frsum:        %u, %u, %u, %u, %u, %u, %u, %u\n",
            fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]),
            fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]),
            fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]),
            fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7]));
-       printk("  btotoff:      %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
-       printk("  boff:         %u\n", fs32_to_cpu(sb, cg->cg_boff));
-       printk("  iuseoff:      %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
-       printk("  freeoff:      %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
-       printk("  nextfreeoff:  %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
-       printk("  clustersumoff %u\n",
-              fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
-       printk("  clusteroff    %u\n",
-              fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
-       printk("  nclusterblks  %u\n",
-              fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
-       printk("\n");
+       pr_debug("  btotoff:      %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
+       pr_debug("  boff:         %u\n", fs32_to_cpu(sb, cg->cg_boff));
+       pr_debug("  iuseoff:      %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
+       pr_debug("  freeoff:      %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
+       pr_debug("  nextfreeoff:  %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
+       pr_debug("  clustersumoff %u\n",
+                fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
+       pr_debug("  clusteroff    %u\n",
+                fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
+       pr_debug("  nclusterblks  %u\n",
+                fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
+       pr_debug("\n");
 }
 #else
 #  define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
@@ -287,13 +286,12 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
 
 static const struct super_operations ufs_super_ops;
 
-static char error_buf[1024];
-
 void ufs_error (struct super_block * sb, const char * function,
        const char * fmt, ...)
 {
        struct ufs_sb_private_info * uspi;
        struct ufs_super_block_first * usb1;
+       struct va_format vaf;
        va_list args;
 
        uspi = UFS_SB(sb)->s_uspi;
@@ -305,20 +303,21 @@ void ufs_error (struct super_block * sb, const char * function,
                ufs_mark_sb_dirty(sb);
                sb->s_flags |= MS_RDONLY;
        }
-       va_start (args, fmt);
-       vsnprintf (error_buf, sizeof(error_buf), fmt, args);
-       va_end (args);
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
        switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
        case UFS_MOUNT_ONERROR_PANIC:
-               panic ("UFS-fs panic (device %s): %s: %s\n", 
-                       sb->s_id, function, error_buf);
+               panic("panic (device %s): %s: %pV\n",
+                     sb->s_id, function, &vaf);
 
        case UFS_MOUNT_ONERROR_LOCK:
        case UFS_MOUNT_ONERROR_UMOUNT:
        case UFS_MOUNT_ONERROR_REPAIR:
-               printk (KERN_CRIT "UFS-fs error (device %s): %s: %s\n",
-                       sb->s_id, function, error_buf);
-       }               
+               pr_crit("error (device %s): %s: %pV\n",
+                       sb->s_id, function, &vaf);
+       }
+       va_end(args);
 }
 
 void ufs_panic (struct super_block * sb, const char * function,
@@ -326,6 +325,7 @@ void ufs_panic (struct super_block * sb, const char * function,
 {
        struct ufs_sb_private_info * uspi;
        struct ufs_super_block_first * usb1;
+       struct va_format vaf;
        va_list args;
        
        uspi = UFS_SB(sb)->s_uspi;
@@ -336,24 +336,27 @@ void ufs_panic (struct super_block * sb, const char * function,
                ubh_mark_buffer_dirty(USPI_UBH(uspi));
                ufs_mark_sb_dirty(sb);
        }
-       va_start (args, fmt);
-       vsnprintf (error_buf, sizeof(error_buf), fmt, args);
-       va_end (args);
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
        sb->s_flags |= MS_RDONLY;
-       printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n",
-               sb->s_id, function, error_buf);
+       pr_crit("panic (device %s): %s: %pV\n",
+               sb->s_id, function, &vaf);
+       va_end(args);
 }
 
 void ufs_warning (struct super_block * sb, const char * function,
        const char * fmt, ...)
 {
+       struct va_format vaf;
        va_list args;
 
-       va_start (args, fmt);
-       vsnprintf (error_buf, sizeof(error_buf), fmt, args);
-       va_end (args);
-       printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n",
-               sb->s_id, function, error_buf);
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
+       pr_warn("(device %s): %s: %pV\n",
+               sb->s_id, function, &vaf);
+       va_end(args);
 }
 
 enum {
@@ -464,14 +467,12 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
                        ufs_set_opt (*mount_options, ONERROR_UMOUNT);
                        break;
                case Opt_onerror_repair:
-                       printk("UFS-fs: Unable to do repair on error, "
-                               "will lock lock instead\n");
+                       pr_err("Unable to do repair on error, will lock lock instead\n");
                        ufs_clear_opt (*mount_options, ONERROR);
                        ufs_set_opt (*mount_options, ONERROR_REPAIR);
                        break;
                default:
-                       printk("UFS-fs: Invalid option: \"%s\" "
-                                       "or missing value\n", p);
+                       pr_err("Invalid option: \"%s\" or missing value\n", p);
                        return 0;
                }
        }
@@ -788,8 +789,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 
 #ifndef CONFIG_UFS_FS_WRITE
        if (!(sb->s_flags & MS_RDONLY)) {
-               printk("ufs was compiled with read-only support, "
-                      "can't be mounted as read-write\n");
+               pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
                return -EROFS;
        }
 #endif
@@ -812,12 +812,12 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_mount_opt = 0;
        ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK);
        if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) {
-               printk("wrong mount options\n");
+               pr_err("wrong mount options\n");
                goto failed;
        }
        if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) {
                if (!silent)
-                       printk("You didn't specify the type of your ufs filesystem\n\n"
+                       pr_err("You didn't specify the type of your ufs filesystem\n\n"
                        "mount -t ufs -o ufstype="
                        "sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
                        ">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
@@ -868,7 +868,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
                break;
 
        case UFS_MOUNT_UFSTYPE_SUNOS:
-               UFSD(("ufstype=sunos\n"))
+               UFSD("ufstype=sunos\n");
                uspi->s_fsize = block_size = 1024;
                uspi->s_fmask = ~(1024 - 1);
                uspi->s_fshift = 10;
@@ -900,7 +900,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
                flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
                if (!(sb->s_flags & MS_RDONLY)) {
                        if (!silent)
-                               printk(KERN_INFO "ufstype=old is supported read-only\n");
+                               pr_info("ufstype=old is supported read-only\n");
                        sb->s_flags |= MS_RDONLY;
                }
                break;
@@ -916,7 +916,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
                flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
                if (!(sb->s_flags & MS_RDONLY)) {
                        if (!silent)
-                               printk(KERN_INFO "ufstype=nextstep is supported read-only\n");
+                               pr_info("ufstype=nextstep is supported read-only\n");
                        sb->s_flags |= MS_RDONLY;
                }
                break;
@@ -932,7 +932,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
                flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
                if (!(sb->s_flags & MS_RDONLY)) {
                        if (!silent)
-                               printk(KERN_INFO "ufstype=nextstep-cd is supported read-only\n");
+                               pr_info("ufstype=nextstep-cd is supported read-only\n");
                        sb->s_flags |= MS_RDONLY;
                }
                break;
@@ -948,7 +948,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
                flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
                if (!(sb->s_flags & MS_RDONLY)) {
                        if (!silent)
-                               printk(KERN_INFO "ufstype=openstep is supported read-only\n");
+                               pr_info("ufstype=openstep is supported read-only\n");
                        sb->s_flags |= MS_RDONLY;
                }
                break;
@@ -963,19 +963,19 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
                flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
                if (!(sb->s_flags & MS_RDONLY)) {
                        if (!silent)
-                               printk(KERN_INFO "ufstype=hp is supported read-only\n");
+                               pr_info("ufstype=hp is supported read-only\n");
                        sb->s_flags |= MS_RDONLY;
                }
                break;
        default:
                if (!silent)
-                       printk("unknown ufstype\n");
+                       pr_err("unknown ufstype\n");
                goto failed;
        }
        
 again: 
        if (!sb_set_blocksize(sb, block_size)) {
-               printk(KERN_ERR "UFS: failed to set blocksize\n");
+               pr_err("failed to set blocksize\n");
                goto failed;
        }
 
@@ -1034,7 +1034,7 @@ again:
                goto again;
        }
        if (!silent)
-               printk("ufs_read_super: bad magic number\n");
+               pr_err("%s(): bad magic number\n", __func__);
        goto failed;
 
 magic_found:
@@ -1048,33 +1048,33 @@ magic_found:
        uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
 
        if (!is_power_of_2(uspi->s_fsize)) {
-               printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n",
-                       uspi->s_fsize);
-                       goto failed;
+               pr_err("%s(): fragment size %u is not a power of 2\n",
+                      __func__, uspi->s_fsize);
+               goto failed;
        }
        if (uspi->s_fsize < 512) {
-               printk(KERN_ERR "ufs_read_super: fragment size %u is too small\n",
-                       uspi->s_fsize);
+               pr_err("%s(): fragment size %u is too small\n",
+                      __func__, uspi->s_fsize);
                goto failed;
        }
        if (uspi->s_fsize > 4096) {
-               printk(KERN_ERR "ufs_read_super: fragment size %u is too large\n",
-                       uspi->s_fsize);
+               pr_err("%s(): fragment size %u is too large\n",
+                      __func__, uspi->s_fsize);
                goto failed;
        }
        if (!is_power_of_2(uspi->s_bsize)) {
-               printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n",
-                       uspi->s_bsize);
+               pr_err("%s(): block size %u is not a power of 2\n",
+                      __func__, uspi->s_bsize);
                goto failed;
        }
        if (uspi->s_bsize < 4096) {
-               printk(KERN_ERR "ufs_read_super: block size %u is too small\n",
-                       uspi->s_bsize);
+               pr_err("%s(): block size %u is too small\n",
+                      __func__, uspi->s_bsize);
                goto failed;
        }
        if (uspi->s_bsize / uspi->s_fsize > 8) {
-               printk(KERN_ERR "ufs_read_super: too many fragments per block (%u)\n",
-                       uspi->s_bsize / uspi->s_fsize);
+               pr_err("%s(): too many fragments per block (%u)\n",
+                      __func__, uspi->s_bsize / uspi->s_fsize);
                goto failed;
        }
        if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) {
@@ -1113,20 +1113,21 @@ magic_found:
                        UFSD("fs is DEC OSF/1\n");
                        break;
                case UFS_FSACTIVE:
-                       printk("ufs_read_super: fs is active\n");
+                       pr_err("%s(): fs is active\n", __func__);
                        sb->s_flags |= MS_RDONLY;
                        break;
                case UFS_FSBAD:
-                       printk("ufs_read_super: fs is bad\n");
+                       pr_err("%s(): fs is bad\n", __func__);
                        sb->s_flags |= MS_RDONLY;
                        break;
                default:
-                       printk("ufs_read_super: can't grok fs_clean 0x%x\n", usb1->fs_clean);
+                       pr_err("%s(): can't grok fs_clean 0x%x\n",
+                              __func__, usb1->fs_clean);
                        sb->s_flags |= MS_RDONLY;
                        break;
                }
        } else {
-               printk("ufs_read_super: fs needs fsck\n");
+               pr_err("%s(): fs needs fsck\n", __func__);
                sb->s_flags |= MS_RDONLY;
        }
 
@@ -1299,7 +1300,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
        if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
                new_mount_opt |= ufstype;
        } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
-               printk("ufstype can't be changed during remount\n");
+               pr_err("ufstype can't be changed during remount\n");
                unlock_ufs(sb);
                return -EINVAL;
        }
@@ -1328,8 +1329,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
         * fs was mounted as ro, remounting rw
         */
 #ifndef CONFIG_UFS_FS_WRITE
-               printk("ufs was compiled with read-only support, "
-               "can't be mounted as read-write\n");
+               pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
                unlock_ufs(sb);
                return -EINVAL;
 #else
@@ -1338,12 +1338,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
                    ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
                    ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
                    ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
-                       printk("this ufstype is read-only supported\n");
+                       pr_err("this ufstype is read-only supported\n");
                        unlock_ufs(sb);
                        return -EINVAL;
                }
                if (!ufs_read_cylinder_structures(sb)) {
-                       printk("failed during remounting\n");
+                       pr_err("failed during remounting\n");
                        unlock_ufs(sb);
                        return -EPERM;
                }
index 343e6fc571e5b3976b6132d43dccb03b538d9380..2a07396d5f9eb623625f238d0d8b61c70301ae19 100644 (file)
@@ -1,6 +1,12 @@
 #ifndef _UFS_UFS_H
 #define _UFS_UFS_H 1
 
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #define UFS_MAX_GROUP_LOADED 8
 #define UFS_CGNO_EMPTY ((unsigned)-1)
 
@@ -71,9 +77,9 @@ struct ufs_inode_info {
  */
 #ifdef CONFIG_UFS_DEBUG
 #      define UFSD(f, a...)    {                                       \
-               printk ("UFSD (%s, %d): %s:",                           \
+               pr_debug("UFSD (%s, %d): %s:",                          \
                        __FILE__, __LINE__, __func__);          \
-               printk (f, ## a);                                       \
+               pr_debug(f, ## a);                                      \
        }
 #else
 #      define UFSD(f, a...)    /**/
index 1437b7da09b251ad79427af493ff4a6b688229f5..c110843fc53b3f80174496148df8798c866db1b5 100644 (file)
@@ -19,6 +19,14 @@ pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
        return dma_alloc_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, dma_handle, GFP_ATOMIC);
 }
 
+static inline void *
+pci_zalloc_consistent(struct pci_dev *hwdev, size_t size,
+                     dma_addr_t *dma_handle)
+{
+       return dma_zalloc_coherent(hwdev == NULL ? NULL : &hwdev->dev,
+                                  size, dma_handle, GFP_ATOMIC);
+}
+
 static inline void
 pci_free_consistent(struct pci_dev *hwdev, size_t size,
                    void *vaddr, dma_addr_t dma_handle)
index 115272137a9c8cb3e2b8f1d62dfb010999c97087..4d683df898e6eace4661926628982c70249a5276 100644 (file)
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_BUNZIP2_H
 #define DECOMPRESS_BUNZIP2_H
 
-int bunzip2(unsigned char *inbuf, int len,
-           int(*fill)(void*, unsigned int),
-           int(*flush)(void*, unsigned int),
+int bunzip2(unsigned char *inbuf, long len,
+           long (*fill)(void*, unsigned long),
+           long (*flush)(void*, unsigned long),
            unsigned char *output,
-           int *pos,
+           long *pos,
            void(*error)(char *x));
 #endif
index 0c7111a55a1aaf101dd5aefbef1f77036ed599d6..1fcfd64b5076eaa8a7830d840e098ce8b5fe9edc 100644 (file)
@@ -1,11 +1,11 @@
 #ifndef DECOMPRESS_GENERIC_H
 #define DECOMPRESS_GENERIC_H
 
-typedef int (*decompress_fn) (unsigned char *inbuf, int len,
-                             int(*fill)(void*, unsigned int),
-                             int(*flush)(void*, unsigned int),
+typedef int (*decompress_fn) (unsigned char *inbuf, long len,
+                             long (*fill)(void*, unsigned long),
+                             long (*flush)(void*, unsigned long),
                              unsigned char *outbuf,
-                             int *posp,
+                             long *posp,
                              void(*error)(char *x));
 
 /* inbuf   - input buffer
@@ -33,7 +33,7 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len,
 
 
 /* Utility routine to detect the decompression method */
-decompress_fn decompress_method(const unsigned char *inbuf, int len,
+decompress_fn decompress_method(const unsigned char *inbuf, long len,
                                const char **name);
 
 #endif
index 1d0aedef982206fae64706263b35024b9373e6b8..e4f411fdbd24b2461959a454e98dc158344c1ec0 100644 (file)
@@ -1,10 +1,10 @@
 #ifndef LINUX_DECOMPRESS_INFLATE_H
 #define LINUX_DECOMPRESS_INFLATE_H
 
-int gunzip(unsigned char *inbuf, int len,
-          int(*fill)(void*, unsigned int),
-          int(*flush)(void*, unsigned int),
+int gunzip(unsigned char *inbuf, long len,
+          long (*fill)(void*, unsigned long),
+          long (*flush)(void*, unsigned long),
           unsigned char *output,
-          int *pos,
+          long *pos,
           void(*error_fn)(char *x));
 #endif
index d5b68bf3ec92ae2c4c1d7db106e1a557b0daa344..3273c2f364961f7490684161818c4e6fa11d7f5e 100644 (file)
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_UNLZ4_H
 #define DECOMPRESS_UNLZ4_H
 
-int unlz4(unsigned char *inbuf, int len,
-       int(*fill)(void*, unsigned int),
-       int(*flush)(void*, unsigned int),
+int unlz4(unsigned char *inbuf, long len,
+       long (*fill)(void*, unsigned long),
+       long (*flush)(void*, unsigned long),
        unsigned char *output,
-       int *pos,
+       long *pos,
        void(*error)(char *x));
 #endif
index 7796538f1bf4262cc5682a167e63627ea08a727d..8a891a1938403c654e546cc2bcb3eb131259a51b 100644 (file)
@@ -1,11 +1,11 @@
 #ifndef DECOMPRESS_UNLZMA_H
 #define DECOMPRESS_UNLZMA_H
 
-int unlzma(unsigned char *, int,
-          int(*fill)(void*, unsigned int),
-          int(*flush)(void*, unsigned int),
+int unlzma(unsigned char *, long,
+          long (*fill)(void*, unsigned long),
+          long (*flush)(void*, unsigned long),
           unsigned char *output,
-          int *posp,
+          long *posp,
           void(*error)(char *x)
        );
 
index 98722975251941deb1f25482d2277bc4640e8569..af18f95d65703e0b16594a288a01bb3a72c30a95 100644 (file)
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_UNLZO_H
 #define DECOMPRESS_UNLZO_H
 
-int unlzo(unsigned char *inbuf, int len,
-       int(*fill)(void*, unsigned int),
-       int(*flush)(void*, unsigned int),
+int unlzo(unsigned char *inbuf, long len,
+       long (*fill)(void*, unsigned long),
+       long (*flush)(void*, unsigned long),
        unsigned char *output,
-       int *pos,
+       long *pos,
        void(*error)(char *x));
 #endif
index 41728fc6c8a1add9efd3ae97cdb7ccf1c2b1df56..f764e2a7201e35ce7d725254afdbb28c8664bab5 100644 (file)
 #ifndef DECOMPRESS_UNXZ_H
 #define DECOMPRESS_UNXZ_H
 
-int unxz(unsigned char *in, int in_size,
-        int (*fill)(void *dest, unsigned int size),
-        int (*flush)(void *src, unsigned int size),
-        unsigned char *out, int *in_used,
+int unxz(unsigned char *in, long in_size,
+        long (*fill)(void *dest, unsigned long size),
+        long (*flush)(void *src, unsigned long size),
+        unsigned char *out, long *in_used,
         void (*error)(char *x));
 
 #endif
index efc681fd58956a33a4590e13b60de3dd242a24c3..45cb4ffdea621281aa2068676b9cad9115e48c65 100644 (file)
@@ -1156,6 +1156,9 @@ int efivars_sysfs_init(void);
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);
 void efi_runtime_map_setup(void *, int, u32);
+int efi_get_runtime_map_size(void);
+int efi_get_runtime_map_desc_size(void);
+int efi_runtime_map_copy(void *buf, size_t bufsz);
 #else
 static inline int efi_runtime_map_init(struct kobject *kobj)
 {
@@ -1164,6 +1167,22 @@ static inline int efi_runtime_map_init(struct kobject *kobj)
 
 static inline void
 efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
+
+static inline int efi_get_runtime_map_size(void)
+{
+       return 0;
+}
+
+static inline int efi_get_runtime_map_desc_size(void)
+{
+       return 0;
+}
+
+static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+       return 0;
+}
+
 #endif
 
 /* prototypes shared between arch specific and generic stub code */
index 1ab6c6913040523cb4a097b3a0bef71ce4fe4dab..f0890e4a7c25755c9531cb5903aa8e084043d2c8 100644 (file)
@@ -387,7 +387,7 @@ struct address_space {
        struct inode            *host;          /* owner: inode, block_device */
        struct radix_tree_root  page_tree;      /* radix tree of all pages */
        spinlock_t              tree_lock;      /* and lock protecting it */
-       unsigned int            i_mmap_writable;/* count VM_SHARED mappings */
+       atomic_t                i_mmap_writable;/* count VM_SHARED mappings */
        struct rb_root          i_mmap;         /* tree of private and shared mappings */
        struct list_head        i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
        struct mutex            i_mmap_mutex;   /* protect tree, count, list */
@@ -470,10 +470,35 @@ static inline int mapping_mapped(struct address_space *mapping)
  * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
  * marks vma as VM_SHARED if it is shared, and the file was opened for
  * writing i.e. vma may be mprotected writable even if now readonly.
+ *
+ * If i_mmap_writable is negative, no new writable mappings are allowed. You
+ * can only deny writable mappings, if none exists right now.
  */
 static inline int mapping_writably_mapped(struct address_space *mapping)
 {
-       return mapping->i_mmap_writable != 0;
+       return atomic_read(&mapping->i_mmap_writable) > 0;
+}
+
+static inline int mapping_map_writable(struct address_space *mapping)
+{
+       return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
+               0 : -EPERM;
+}
+
+static inline void mapping_unmap_writable(struct address_space *mapping)
+{
+       atomic_dec(&mapping->i_mmap_writable);
+}
+
+static inline int mapping_deny_writable(struct address_space *mapping)
+{
+       return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
+               0 : -EBUSY;
+}
+
+static inline void mapping_allow_writable(struct address_space *mapping)
+{
+       atomic_inc(&mapping->i_mmap_writable);
 }
 
 /*
index 5e3a906cc089a721eaacdf390e3e30927fd37434..142ec544167cc91fae73b4e4f1d1c8f347b179f2 100644 (file)
@@ -237,6 +237,12 @@ extern int iomem_is_exclusive(u64 addr);
 extern int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
                void *arg, int (*func)(unsigned long, unsigned long, void *));
+extern int
+walk_system_ram_res(u64 start, u64 end, void *arg,
+                   int (*func)(u64, u64, void *));
+extern int
+walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
+              int (*func)(u64, u64, void *));
 
 /* True if any part of r1 overlaps r2 */
 static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
index 3dc22abbc68a212023d90d54556a94d8026bcfcb..31ae66f34235e109361951a7c4b4759a45696207 100644 (file)
@@ -470,6 +470,7 @@ extern enum system_states {
 #define TAINT_FIRMWARE_WORKAROUND      11
 #define TAINT_OOT_MODULE               12
 #define TAINT_UNSIGNED_MODULE          13
+#define TAINT_SOFTLOCKUP               14
 
 extern const char hex_asc[];
 #define hex_asc_lo(x)  hex_asc[((x) & 0x0f)]
index a7564193004959ba12bc28361a4c32d76b8297c2..4b2a0e11cc5be7245f343ded8049f7d3e86f0855 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/ioport.h>
 #include <linux/elfcore.h>
 #include <linux/elf.h>
+#include <linux/module.h>
 #include <asm/kexec.h>
 
 /* Verify architecture specific macros are defined */
@@ -69,7 +70,18 @@ typedef unsigned long kimage_entry_t;
 #define IND_SOURCE       0x8
 
 struct kexec_segment {
-       void __user *buf;
+       /*
+        * This pointer can point to user memory if kexec_load() system
+        * call is used or will point to kernel memory if
+        * kexec_file_load() system call is used.
+        *
+        * Use ->buf when expecting to deal with user memory and use ->kbuf
+        * when expecting to deal with kernel memory.
+        */
+       union {
+               void __user *buf;
+               void *kbuf;
+       };
        size_t bufsz;
        unsigned long mem;
        size_t memsz;
@@ -84,6 +96,27 @@ struct compat_kexec_segment {
 };
 #endif
 
+struct kexec_sha_region {
+       unsigned long start;
+       unsigned long len;
+};
+
+struct purgatory_info {
+       /* Pointer to elf header of read only purgatory */
+       Elf_Ehdr *ehdr;
+
+       /* Pointer to purgatory sechdrs which are modifiable */
+       Elf_Shdr *sechdrs;
+       /*
+        * Temporary buffer location where purgatory is loaded and relocated
+        * This memory can be freed post image load
+        */
+       void *purgatory_buf;
+
+       /* Address where purgatory is finally loaded and is executed from */
+       unsigned long purgatory_load_addr;
+};
+
 struct kimage {
        kimage_entry_t head;
        kimage_entry_t *entry;
@@ -100,7 +133,7 @@ struct kimage {
 
        struct list_head control_pages;
        struct list_head dest_pages;
-       struct list_head unuseable_pages;
+       struct list_head unusable_pages;
 
        /* Address of next control page to allocate for crash kernels. */
        unsigned long control_page;
@@ -110,13 +143,63 @@ struct kimage {
 #define KEXEC_TYPE_DEFAULT 0
 #define KEXEC_TYPE_CRASH   1
        unsigned int preserve_context : 1;
+       /* If set, we are using file mode kexec syscall */
+       unsigned int file_mode:1;
 
 #ifdef ARCH_HAS_KIMAGE_ARCH
        struct kimage_arch arch;
 #endif
+
+       /* Additional fields for file based kexec syscall */
+       void *kernel_buf;
+       unsigned long kernel_buf_len;
+
+       void *initrd_buf;
+       unsigned long initrd_buf_len;
+
+       char *cmdline_buf;
+       unsigned long cmdline_buf_len;
+
+       /* File operations provided by image loader */
+       struct kexec_file_ops *fops;
+
+       /* Image loader handling the kernel can store a pointer here */
+       void *image_loader_data;
+
+       /* Information for loading purgatory */
+       struct purgatory_info purgatory_info;
 };
 
+/*
+ * Keeps track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+       struct kimage *image;
+       char *buffer;
+       unsigned long bufsz;
+       unsigned long memsz;
+       unsigned long buf_align;
+       unsigned long buf_min;
+       unsigned long buf_max;
+       bool top_down;          /* allocate from top of memory hole */
+};
 
+typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
+typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
+                            unsigned long kernel_len, char *initrd,
+                            unsigned long initrd_len, char *cmdline,
+                            unsigned long cmdline_len);
+typedef int (kexec_cleanup_t)(void *loader_data);
+typedef int (kexec_verify_sig_t)(const char *kernel_buf,
+                                unsigned long kernel_len);
+
+struct kexec_file_ops {
+       kexec_probe_t *probe;
+       kexec_load_t *load;
+       kexec_cleanup_t *cleanup;
+       kexec_verify_sig_t *verify_sig;
+};
 
 /* kexec interface functions */
 extern void machine_kexec(struct kimage *image);
@@ -127,8 +210,21 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
                                        struct kexec_segment __user *segments,
                                        unsigned long flags);
 extern int kernel_kexec(void);
+extern int kexec_add_buffer(struct kimage *image, char *buffer,
+                           unsigned long bufsz, unsigned long memsz,
+                           unsigned long buf_align, unsigned long buf_min,
+                           unsigned long buf_max, bool top_down,
+                           unsigned long *load_addr);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
                                                unsigned int order);
+extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
+                               unsigned long max, int top_down,
+                               unsigned long *load_addr);
+extern int kexec_purgatory_get_set_symbol(struct kimage *image,
+                                         const char *name, void *buf,
+                                         unsigned int size, bool get_value);
+extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
+                                            const char *name);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
@@ -177,6 +273,10 @@ extern int kexec_load_disabled;
 #define KEXEC_FLAGS    (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
 #endif
 
+/* List of defined/legal kexec file flags */
+#define KEXEC_FILE_FLAGS       (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
+                                KEXEC_FILE_NO_INITRAMFS)
+
 #define VMCOREINFO_BYTES           (4096)
 #define VMCOREINFO_NOTE_NAME       "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
index eb65d29516ca45e34fa04646296debbf5ae33d4d..e0752d204d9e8b8cb9ea658668c7d35f5806a754 100644 (file)
@@ -54,39 +54,20 @@ struct mem_cgroup_reclaim_cookie {
 };
 
 #ifdef CONFIG_MEMCG
-/*
- * All "charge" functions with gfp_mask should use GFP_KERNEL or
- * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
- * alloc memory but reclaims memory from all available zones. So, "where I want
- * memory from" bits of gfp_mask has no meaning. So any bits of that field is
- * available but adding a rule is better. charge functions' gfp_mask should
- * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
- * codes.
- * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
- */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+                         gfp_t gfp_mask, struct mem_cgroup **memcgp);
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+                             bool lrucare);
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
+void mem_cgroup_uncharge(struct page *page);
+void mem_cgroup_uncharge_list(struct list_head *page_list);
 
-extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
-                               gfp_t gfp_mask);
-/* for swap handling */
-extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-               struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
-extern void mem_cgroup_commit_charge_swapin(struct page *page,
-                                       struct mem_cgroup *memcg);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
-
-extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
-                                       gfp_t gfp_mask);
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+                       bool lrucare);
 
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
-/* For coalescing uncharge for reducing memcg' overhead*/
-extern void mem_cgroup_uncharge_start(void);
-extern void mem_cgroup_uncharge_end(void);
-
-extern void mem_cgroup_uncharge_page(struct page *page);
-extern void mem_cgroup_uncharge_cache_page(struct page *page);
-
 bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
                                  struct mem_cgroup *memcg);
 bool task_in_mem_cgroup(struct task_struct *task,
@@ -113,12 +94,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
 
 extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 
-extern void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-                            struct mem_cgroup **memcgp);
-extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-       struct page *oldpage, struct page *newpage, bool migration_ok);
-
 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
                                   struct mem_cgroup *,
                                   struct mem_cgroup_reclaim_cookie *);
@@ -133,8 +108,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
 void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
                                        struct task_struct *p);
-extern void mem_cgroup_replace_page_cache(struct page *oldpage,
-                                       struct page *newpage);
 
 static inline void mem_cgroup_oom_enable(void)
 {
@@ -233,46 +206,36 @@ void mem_cgroup_print_bad_page(struct page *page);
 #else /* CONFIG_MEMCG */
 struct mem_cgroup;
 
-static inline int mem_cgroup_charge_anon(struct page *page,
-                                       struct mm_struct *mm, gfp_t gfp_mask)
-{
-       return 0;
-}
-
-static inline int mem_cgroup_charge_file(struct page *page,
-                                       struct mm_struct *mm, gfp_t gfp_mask)
-{
-       return 0;
-}
-
-static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-               struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
+static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+                                       gfp_t gfp_mask,
+                                       struct mem_cgroup **memcgp)
 {
+       *memcgp = NULL;
        return 0;
 }
 
-static inline void mem_cgroup_commit_charge_swapin(struct page *page,
-                                         struct mem_cgroup *memcg)
-{
-}
-
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+static inline void mem_cgroup_commit_charge(struct page *page,
+                                           struct mem_cgroup *memcg,
+                                           bool lrucare)
 {
 }
 
-static inline void mem_cgroup_uncharge_start(void)
+static inline void mem_cgroup_cancel_charge(struct page *page,
+                                           struct mem_cgroup *memcg)
 {
 }
 
-static inline void mem_cgroup_uncharge_end(void)
+static inline void mem_cgroup_uncharge(struct page *page)
 {
 }
 
-static inline void mem_cgroup_uncharge_page(struct page *page)
+static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
 }
 
-static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+static inline void mem_cgroup_migrate(struct page *oldpage,
+                                     struct page *newpage,
+                                     bool lrucare)
 {
 }
 
@@ -311,17 +274,6 @@ static inline struct cgroup_subsys_state
        return NULL;
 }
 
-static inline void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-                            struct mem_cgroup **memcgp)
-{
-}
-
-static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-               struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-}
-
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
                struct mem_cgroup *prev,
@@ -417,10 +369,6 @@ static inline
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
-static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
-                               struct page *newpage)
-{
-}
 #endif /* CONFIG_MEMCG */
 
 #if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
index e03dd29145a019a184fbf47fbdd252251b05acaa..8981cc882ed2eb0d2dfdebfbc5355e8c1cc2eb16 100644 (file)
@@ -2014,13 +2014,20 @@ static inline bool kernel_page_present(struct page *page) { return true; }
 #endif /* CONFIG_HIBERNATION */
 #endif
 
+#ifdef __HAVE_ARCH_GATE_AREA
 extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
-#ifdef __HAVE_ARCH_GATE_AREA
-int in_gate_area_no_mm(unsigned long addr);
-int in_gate_area(struct mm_struct *mm, unsigned long addr);
+extern int in_gate_area_no_mm(unsigned long addr);
+extern int in_gate_area(struct mm_struct *mm, unsigned long addr);
 #else
-int in_gate_area_no_mm(unsigned long addr);
-#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
+static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+       return NULL;
+}
+static inline int in_gate_area_no_mm(unsigned long addr) { return 0; }
+static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+       return 0;
+}
 #endif /* __HAVE_ARCH_GATE_AREA */
 
 #ifdef CONFIG_SYSCTL
index 796deac19fcfb4bb21d9e8ba9e22c7d4e15df156..6e0b286649f1fc9a981090f38a4cbbd9b0db2f40 100644 (file)
@@ -461,6 +461,7 @@ static inline void mm_init_cpumask(struct mm_struct *mm)
 #ifdef CONFIG_CPUMASK_OFFSTACK
        mm->cpu_vm_mask_var = &mm->cpumask_allocation;
 #endif
+       cpumask_clear(mm->cpu_vm_mask_var);
 }
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
index 777a524716db9364d3f9a06f9357cacf36b2b418..5c831f1eca798d06e35c02c32e29353cc511d74e 100644 (file)
@@ -3,17 +3,15 @@
 
 enum {
        /* flags for mem_cgroup */
-       PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */
-       PCG_USED, /* this object is in use. */
-       PCG_MIGRATION, /* under page migration */
-       __NR_PCG_FLAGS,
+       PCG_USED = 0x01,        /* This page is charged to a memcg */
+       PCG_MEM = 0x02,         /* This page holds a memory charge */
+       PCG_MEMSW = 0x04,       /* This page holds a memory+swap charge */
 };
 
-#ifndef __GENERATING_BOUNDS_H
-#include <generated/bounds.h>
+struct pglist_data;
 
 #ifdef CONFIG_MEMCG
-#include <linux/bit_spinlock.h>
+struct mem_cgroup;
 
 /*
  * Page Cgroup can be considered as an extended mem_map.
@@ -27,65 +25,30 @@ struct page_cgroup {
        struct mem_cgroup *mem_cgroup;
 };
 
-void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
+extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
 
 #ifdef CONFIG_SPARSEMEM
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
 {
 }
-extern void __init page_cgroup_init(void);
+extern void page_cgroup_init(void);
 #else
-void __init page_cgroup_init_flatmem(void);
-static inline void __init page_cgroup_init(void)
+extern void page_cgroup_init_flatmem(void);
+static inline void page_cgroup_init(void)
 {
 }
 #endif
 
 struct page_cgroup *lookup_page_cgroup(struct page *page);
-struct page *lookup_cgroup_page(struct page_cgroup *pc);
-
-#define TESTPCGFLAG(uname, lname)                      \
-static inline int PageCgroup##uname(struct page_cgroup *pc)    \
-       { return test_bit(PCG_##lname, &pc->flags); }
-
-#define SETPCGFLAG(uname, lname)                       \
-static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
-       { set_bit(PCG_##lname, &pc->flags);  }
-
-#define CLEARPCGFLAG(uname, lname)                     \
-static inline void ClearPageCgroup##uname(struct page_cgroup *pc)      \
-       { clear_bit(PCG_##lname, &pc->flags);  }
-
-#define TESTCLEARPCGFLAG(uname, lname)                 \
-static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)   \
-       { return test_and_clear_bit(PCG_##lname, &pc->flags);  }
-
-TESTPCGFLAG(Used, USED)
-CLEARPCGFLAG(Used, USED)
-SETPCGFLAG(Used, USED)
-
-SETPCGFLAG(Migration, MIGRATION)
-CLEARPCGFLAG(Migration, MIGRATION)
-TESTPCGFLAG(Migration, MIGRATION)
 
-static inline void lock_page_cgroup(struct page_cgroup *pc)
+static inline int PageCgroupUsed(struct page_cgroup *pc)
 {
-       /*
-        * Don't take this lock in IRQ context.
-        * This lock is for pc->mem_cgroup, USED, MIGRATION
-        */
-       bit_spin_lock(PCG_LOCK, &pc->flags);
+       return !!(pc->flags & PCG_USED);
 }
-
-static inline void unlock_page_cgroup(struct page_cgroup *pc)
-{
-       bit_spin_unlock(PCG_LOCK, &pc->flags);
-}
-
-#else /* CONFIG_MEMCG */
+#else /* !CONFIG_MEMCG */
 struct page_cgroup;
 
-static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
 {
 }
 
@@ -98,10 +61,9 @@ static inline void page_cgroup_init(void)
 {
 }
 
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
 {
 }
-
 #endif /* CONFIG_MEMCG */
 
 #include <linux/swap.h>
@@ -140,6 +102,4 @@ static inline void swap_cgroup_swapoff(int type)
 
 #endif /* CONFIG_MEMCG_SWAP */
 
-#endif /* !__GENERATING_BOUNDS_H */
-
 #endif /* __LINUX_PAGE_CGROUP_H */
index 5059994fe2970edf8942e537fd3fe81144c2ccb3..9fc2f213e74fa61d32b3840b5fb202f93f8f525a 100644 (file)
@@ -384,11 +384,16 @@ void rio_dev_put(struct rio_dev *);
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
+extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport);
 extern void rio_release_dma(struct dma_chan *dchan);
 extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
                struct rio_dev *rdev, struct dma_chan *dchan,
                struct rio_dma_data *data,
                enum dma_transfer_direction direction, unsigned long flags);
+extern struct dma_async_tx_descriptor *rio_dma_prep_xfer(
+               struct dma_chan *dchan, u16 destid,
+               struct rio_dma_data *data,
+               enum dma_transfer_direction direction, unsigned long flags);
 #endif
 
 /**
index f4ec8bbcb372c9dcf2da17fd816937332912d3b1..ed8f9e70df9bcf72358ce9baf534c082699daa86 100644 (file)
@@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
 static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
                            struct scatterlist *sgl)
 {
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
        BUG();
 #endif
 
index 7c19d552dc3f734d44741cae25b6df60fe52336f..db2f6474e95e84b62b570c014cf569c1d24188bc 100644 (file)
@@ -33,6 +33,7 @@ struct sched_param {
 
 #include <linux/smp.h>
 #include <linux/sem.h>
+#include <linux/shm.h>
 #include <linux/signal.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
@@ -1385,6 +1386,7 @@ struct task_struct {
 #ifdef CONFIG_SYSVIPC
 /* ipc stuff */
        struct sysv_sem sysvsem;
+       struct sysv_shm sysvshm;
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 /* hung task detection */
@@ -1628,12 +1630,6 @@ struct task_struct {
        unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
-       struct memcg_batch_info {
-               int do_batch;   /* incremented when batch uncharge started */
-               struct mem_cgroup *memcg; /* target memcg of uncharge */
-               unsigned long nr_pages; /* uncharged usage */
-               unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
-       } memcg_batch;
        unsigned int memcg_kmem_skip_account;
        struct memcg_oom_info {
                struct mem_cgroup *memcg;
@@ -2967,15 +2963,10 @@ static inline void inc_syscw(struct task_struct *tsk)
 
 #ifdef CONFIG_MEMCG
 extern void mm_update_next_owner(struct mm_struct *mm);
-extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
 #else
 static inline void mm_update_next_owner(struct mm_struct *mm)
 {
 }
-
-static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-}
 #endif /* CONFIG_MEMCG */
 
 static inline unsigned long task_rlimit(const struct task_struct *tsk,
index 57d77709fbe2a1c2cb6f8a7f47604bf560b24b9f..6fb801686ad6cea28b6fb4b1538dad63bd61117c 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SHM_H_
 #define _LINUX_SHM_H_
 
+#include <linux/list.h>
 #include <asm/page.h>
 #include <uapi/linux/shm.h>
 #include <asm/shmparam.h>
@@ -20,6 +21,7 @@ struct shmid_kernel /* private to the kernel */
 
        /* The task created the shm object.  NULL if the task is dead. */
        struct task_struct      *shm_creator;
+       struct list_head        shm_clist;      /* list by creator */
 };
 
 /* shm_mode upper byte flags */
@@ -44,11 +46,20 @@ struct shmid_kernel /* private to the kernel */
 #define SHM_HUGE_1GB    (30 << SHM_HUGE_SHIFT)
 
 #ifdef CONFIG_SYSVIPC
+struct sysv_shm {
+       struct list_head shm_clist;
+};
+
 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
              unsigned long shmlba);
-extern int is_file_shm_hugepages(struct file *file);
-extern void exit_shm(struct task_struct *task);
+int is_file_shm_hugepages(struct file *file);
+void exit_shm(struct task_struct *task);
+#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
 #else
+struct sysv_shm {
+       /* empty */
+};
+
 static inline long do_shmat(int shmid, char __user *shmaddr,
                            int shmflg, unsigned long *addr,
                            unsigned long shmlba)
@@ -62,6 +73,9 @@ static inline int is_file_shm_hugepages(struct file *file)
 static inline void exit_shm(struct task_struct *task)
 {
 }
+static inline void shm_init_task(struct task_struct *task)
+{
+}
 #endif
 
 #endif /* _LINUX_SHM_H_ */
index 4d1771c2d29f9a34943cd4e8a680461d63e7a987..50777b5b1e4c3967d9f4fcc441b604fe6f0e0008 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __SHMEM_FS_H
 #define __SHMEM_FS_H
 
+#include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
 #include <linux/pagemap.h>
@@ -11,6 +12,7 @@
 
 struct shmem_inode_info {
        spinlock_t              lock;
+       unsigned int            seals;          /* shmem seals */
        unsigned long           flags;
        unsigned long           alloced;        /* data pages alloced to file */
        union {
@@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page(
                                        mapping_gfp_mask(mapping));
 }
 
+#ifdef CONFIG_TMPFS
+
+extern int shmem_add_seals(struct file *file, unsigned int seals);
+extern int shmem_get_seals(struct file *file);
+extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
+{
+       return -EINVAL;
+}
+
+#endif
+
 #endif
index 1eb64043c076fe97db25443feb7ddb5bfe32e38c..1b72060f093a6e74bb64d05cecf2d2894f987796 100644 (file)
@@ -320,6 +320,9 @@ extern void swap_setup(void);
 
 extern void add_page_to_unevictable_list(struct page *page);
 
+extern void lru_cache_add_active_or_unevictable(struct page *page,
+                                               struct vm_area_struct *vma);
+
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                        gfp_t gfp_mask, nodemask_t *mask);
@@ -378,9 +381,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 }
 #endif
 #ifdef CONFIG_MEMCG_SWAP
-extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
+extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
+extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
 #else
-static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+}
+static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
 {
 }
 #endif
@@ -440,7 +447,7 @@ extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t, struct page *page);
+extern void swapcache_free(swp_entry_t);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
@@ -504,7 +511,7 @@ static inline void swap_free(swp_entry_t swp)
 {
 }
 
-static inline void swapcache_free(swp_entry_t swp, struct page *page)
+static inline void swapcache_free(swp_entry_t swp)
 {
 }
 
index 701daff5d899ebabae0cdcabb5d2a381beffc38f..0f86d85a9ce44cc771bb7118eb7b4cbc28ae905d 100644 (file)
@@ -317,6 +317,10 @@ asmlinkage long sys_restart_syscall(void);
 asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
                                struct kexec_segment __user *segments,
                                unsigned long flags);
+asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
+                                   unsigned long cmdline_len,
+                                   const char __user *cmdline_ptr,
+                                   unsigned long flags);
 
 asmlinkage long sys_exit(int error_code);
 asmlinkage long sys_exit_group(int error_code);
@@ -802,6 +806,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
 asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
 asmlinkage long sys_eventfd(unsigned int count);
 asmlinkage long sys_eventfd2(unsigned int count, int flags);
+asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
 asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
index 14a8ff2de11e54907ff63d20ee23c7d4c29896fa..b7361f831226d97bab5ff8b4c86d9252e972d877 100644 (file)
@@ -34,8 +34,6 @@ struct ctl_table_root;
 struct ctl_table_header;
 struct ctl_dir;
 
-typedef struct ctl_table ctl_table;
-
 typedef int proc_handler (struct ctl_table *ctl, int write,
                          void __user *buffer, size_t *lenp, loff_t *ppos);
 
index 4836ba3c1cd8266c294b9dfd28aa6d0d433db0d6..e95372654f091a822388054a1e89d6f008d6712c 100644 (file)
@@ -57,9 +57,9 @@ static inline void put_user_ns(struct user_namespace *ns)
 }
 
 struct seq_operations;
-extern struct seq_operations proc_uid_seq_operations;
-extern struct seq_operations proc_gid_seq_operations;
-extern struct seq_operations proc_projid_seq_operations;
+extern const struct seq_operations proc_uid_seq_operations;
+extern const struct seq_operations proc_gid_seq_operations;
+extern const struct seq_operations proc_projid_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
index 197abb2a54c5d713dd1a909da735bee60ab28d12..92dbbd3f6c757178d66b0b0e3d9f05789a05e02e 100644 (file)
@@ -83,11 +83,11 @@ struct internal_state;
 
 typedef struct z_stream_s {
     const Byte *next_in;   /* next input byte */
-    uInt     avail_in;  /* number of bytes available at next_in */
+       uLong avail_in;  /* number of bytes available at next_in */
     uLong    total_in;  /* total nb of input bytes read so far */
 
     Byte    *next_out;  /* next output byte should be put there */
-    uInt     avail_out; /* remaining free space at next_out */
+       uLong avail_out; /* remaining free space at next_out */
     uLong    total_out; /* total nb of bytes output so far */
 
     char     *msg;      /* last error message, NULL if no error */
index e6df23cae7bedabdbaf9b6b09156ab011f389cb4..261e708010da70174785c7a62b4f51f31f0a4164 100644 (file)
@@ -31,7 +31,7 @@ enum scsi_timeouts {
  * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
  * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
  */
-#ifdef ARCH_HAS_SG_CHAIN
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
 #define SCSI_MAX_SG_CHAIN_SEGMENTS     2048
 #else
 #define SCSI_MAX_SG_CHAIN_SEGMENTS     SCSI_MAX_SG_SEGMENTS
index 074b886c6be0a0f3d0f4d493050c365153ad1917..beed138bd359382273cd9f9c114f44b7bd559841 100644 (file)
 #define F_SETPIPE_SZ   (F_LINUX_SPECIFIC_BASE + 7)
 #define F_GETPIPE_SZ   (F_LINUX_SPECIFIC_BASE + 8)
 
+/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS    (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS    (F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL    0x0001  /* prevent further seals from being set */
+#define F_SEAL_SHRINK  0x0002  /* prevent file from shrinking */
+#define F_SEAL_GROW    0x0004  /* prevent file from growing */
+#define F_SEAL_WRITE   0x0008  /* prevent writes */
+/* (1U << 31) is reserved for signed error codes */
+
 /*
  * Types of directory notifications that may be requested.
  */
index d6629d49a2433745d205cb3685dadeaffaeab61a..6925f5b42f890983220abd05939f74574f8d43c2 100644 (file)
 #define KEXEC_PRESERVE_CONTEXT 0x00000002
 #define KEXEC_ARCH_MASK                0xffff0000
 
+/*
+ * Kexec file load interface flags.
+ * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image.
+ * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
+ * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
+ *                           fd field.
+ */
+#define KEXEC_FILE_UNLOAD      0x00000001
+#define KEXEC_FILE_ON_CRASH    0x00000002
+#define KEXEC_FILE_NO_INITRAMFS        0x00000004
+
 /* These values match the ELF architecture values.
  * Unless there is a good reason that should continue to be the case.
  */
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
new file mode 100644 (file)
index 0000000..534e364
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MEMFD_H
+#define _UAPI_LINUX_MEMFD_H
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC            0x0001U
+#define MFD_ALLOW_SEALING      0x0002U
+
+#endif /* _UAPI_LINUX_MEMFD_H */
index a291b7ef473893891481f188b81f7dfce1550942..44f9ed3dae2286a80792918b79c50870317410c4 100644 (file)
@@ -783,8 +783,13 @@ endchoice
 
 endmenu # "RCU Subsystem"
 
+config BUILD_BIN2C
+       bool
+       default n
+
 config IKCONFIG
        tristate "Kernel .config support"
+       select BUILD_BIN2C
        ---help---
          This option enables the complete Linux kernel ".config" file
          contents to be saved in the kernel. It provides documentation
index 82f22885c87e995c3d18f6a0080bf639d99d9331..b6237c31b0e2469cac9b2287cecb6932ad49462e 100644 (file)
@@ -539,12 +539,6 @@ void __init prepare_namespace(void)
 {
        int is_floppy;
 
-       if (root_delay) {
-               printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
-                      root_delay);
-               ssleep(root_delay);
-       }
-
        /*
         * wait for the known devices to complete their probing
         *
@@ -571,6 +565,12 @@ void __init prepare_namespace(void)
        if (initrd_load())
                goto out;
 
+       if (root_delay) {
+               pr_info("Waiting %d sec before mounting root device...\n",
+                       root_delay);
+               ssleep(root_delay);
+       }
+
        /* wait for any asynchronous scanning to complete */
        if ((ROOT_DEV == 0) && root_wait) {
                printk(KERN_INFO "Waiting for root device %s...\n",
index a8227022e3a02210afaab0cc628729f80ca07af8..e5d059e8aa11e78c736c75f452bc641abeec09b2 100644 (file)
@@ -311,9 +311,9 @@ static int exit_code;
 static int decompress_error;
 static int crd_infd, crd_outfd;
 
-static int __init compr_fill(void *buf, unsigned int len)
+static long __init compr_fill(void *buf, unsigned long len)
 {
-       int r = sys_read(crd_infd, buf, len);
+       long r = sys_read(crd_infd, buf, len);
        if (r < 0)
                printk(KERN_ERR "RAMDISK: error while reading compressed data");
        else if (r == 0)
@@ -321,13 +321,13 @@ static int __init compr_fill(void *buf, unsigned int len)
        return r;
 }
 
-static int __init compr_flush(void *window, unsigned int outcnt)
+static long __init compr_flush(void *window, unsigned long outcnt)
 {
-       int written = sys_write(crd_outfd, window, outcnt);
+       long written = sys_write(crd_outfd, window, outcnt);
        if (written != outcnt) {
                if (decompress_error == 0)
                        printk(KERN_ERR
-                              "RAMDISK: incomplete write (%d != %d)\n",
+                              "RAMDISK: incomplete write (%ld != %ld)\n",
                               written, outcnt);
                decompress_error = 1;
                return -1;
index a8497fab1c3d64950370fb54401493e9eeac0621..bece48c3461edd55ba4a9ea5fc1b8662be6bda0f 100644 (file)
 #include <linux/syscalls.h>
 #include <linux/utime.h>
 
+static ssize_t __init xwrite(int fd, const char *p, size_t count)
+{
+       ssize_t out = 0;
+
+       /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
+       while (count) {
+               ssize_t rv = sys_write(fd, p, count);
+
+               if (rv < 0) {
+                       if (rv == -EINTR || rv == -EAGAIN)
+                               continue;
+                       return out ? out : rv;
+               } else if (rv == 0)
+                       break;
+
+               p += rv;
+               out += rv;
+               count -= rv;
+       }
+
+       return out;
+}
+
 static __initdata char *message;
 static void __init error(char *x)
 {
@@ -174,7 +197,7 @@ static __initdata enum state {
 } state, next_state;
 
 static __initdata char *victim;
-static __initdata unsigned count;
+static unsigned long count __initdata;
 static __initdata loff_t this_header, next_header;
 
 static inline void __init eat(unsigned n)
@@ -186,7 +209,7 @@ static inline void __init eat(unsigned n)
 
 static __initdata char *vcollected;
 static __initdata char *collected;
-static __initdata int remains;
+static long remains __initdata;
 static __initdata char *collect;
 
 static void __init read_into(char *buf, unsigned size, enum state next)
@@ -213,7 +236,7 @@ static int __init do_start(void)
 
 static int __init do_collect(void)
 {
-       unsigned n = remains;
+       unsigned long n = remains;
        if (count < n)
                n = count;
        memcpy(collect, victim, n);
@@ -346,7 +369,8 @@ static int __init do_name(void)
 static int __init do_copy(void)
 {
        if (count >= body_len) {
-               sys_write(wfd, victim, body_len);
+               if (xwrite(wfd, victim, body_len) != body_len)
+                       error("write error");
                sys_close(wfd);
                do_utime(vcollected, mtime);
                kfree(vcollected);
@@ -354,7 +378,8 @@ static int __init do_copy(void)
                state = SkipIt;
                return 0;
        } else {
-               sys_write(wfd, victim, count);
+               if (xwrite(wfd, victim, count) != count)
+                       error("write error");
                body_len -= count;
                eat(count);
                return 1;
@@ -384,7 +409,7 @@ static __initdata int (*actions[])(void) = {
        [Reset]         = do_reset,
 };
 
-static int __init write_buffer(char *buf, unsigned len)
+static long __init write_buffer(char *buf, unsigned long len)
 {
        count = len;
        victim = buf;
@@ -394,11 +419,11 @@ static int __init write_buffer(char *buf, unsigned len)
        return len - count;
 }
 
-static int __init flush_buffer(void *bufv, unsigned len)
+static long __init flush_buffer(void *bufv, unsigned long len)
 {
        char *buf = (char *) bufv;
-       int written;
-       int origLen = len;
+       long written;
+       long origLen = len;
        if (message)
                return -1;
        while ((written = write_buffer(buf, len)) < len && !message) {
@@ -417,13 +442,13 @@ static int __init flush_buffer(void *bufv, unsigned len)
        return origLen;
 }
 
-static unsigned my_inptr;   /* index of next byte to be processed in inbuf */
+static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
 
 #include <linux/decompress/generic.h>
 
-static char * __init unpack_to_rootfs(char *buf, unsigned len)
+static char * __init unpack_to_rootfs(char *buf, unsigned long len)
 {
-       int written, res;
+       long written;
        decompress_fn decompress;
        const char *compress_name;
        static __initdata char msg_buf[64];
@@ -457,7 +482,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
                decompress = decompress_method(buf, len, &compress_name);
                pr_debug("Detected %s compressed data\n", compress_name);
                if (decompress) {
-                       res = decompress(buf, len, NULL, flush_buffer, NULL,
+                       int res = decompress(buf, len, NULL, flush_buffer, NULL,
                                   &my_inptr, error);
                        if (res)
                                error("decompressor failed");
@@ -603,8 +628,13 @@ static int __init populate_rootfs(void)
                fd = sys_open("/initrd.image",
                              O_WRONLY|O_CREAT, 0700);
                if (fd >= 0) {
-                       sys_write(fd, (char *)initrd_start,
-                                       initrd_end - initrd_start);
+                       ssize_t written = xwrite(fd, (char *)initrd_start,
+                                               initrd_end - initrd_start);
+
+                       if (written != initrd_end - initrd_start)
+                               pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
+                                      written, initrd_end - initrd_start);
+
                        sys_close(fd);
                        free_initrd();
                }
index e8ae1fef0908965b0c5fff7ba7ab8f043e83a980..bb1aed928f21391b63493112c6bb1a1eae39077f 100644 (file)
@@ -6,7 +6,7 @@
  *  GK 2/5/95  -  Changed to support mounting root fs via NFS
  *  Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
  *  Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
- *  Simplified starting of init:  Michael A. Griffith <grif@acm.org> 
+ *  Simplified starting of init:  Michael A. Griffith <grif@acm.org>
  */
 
 #define DEBUG          /* Enable initcall_debug */
@@ -136,7 +136,7 @@ static char *ramdisk_execute_command;
  * Used to generate warnings if static_key manipulation functions are used
  * before jump_label_init is called.
  */
-bool static_key_initialized __read_mostly = false;
+bool static_key_initialized __read_mostly;
 EXPORT_SYMBOL_GPL(static_key_initialized);
 
 /*
@@ -159,8 +159,8 @@ static int __init set_reset_devices(char *str)
 
 __setup("reset_devices", set_reset_devices);
 
-static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 static const char *panic_later, *panic_param;
 
 extern const struct obs_kernel_param __setup_start[], __setup_end[];
@@ -199,7 +199,6 @@ static int __init obsolete_checksetup(char *line)
  * still work even if initially too large, it will just take slightly longer
  */
 unsigned long loops_per_jiffy = (1<<12);
-
 EXPORT_SYMBOL(loops_per_jiffy);
 
 static int __init debug_kernel(char *str)
@@ -376,8 +375,8 @@ static void __init setup_command_line(char *command_line)
        initcall_command_line =
                memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
        static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
-       strcpy (saved_command_line, boot_command_line);
-       strcpy (static_command_line, command_line);
+       strcpy(saved_command_line, boot_command_line);
+       strcpy(static_command_line, command_line);
 }
 
 /*
@@ -445,8 +444,8 @@ void __init parse_early_options(char *cmdline)
 /* Arch code calls this early on, or if not, just before other parsing. */
 void __init parse_early_param(void)
 {
-       static __initdata int done = 0;
-       static __initdata char tmp_cmdline[COMMAND_LINE_SIZE];
+       static int done __initdata;
+       static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
 
        if (done)
                return;
@@ -500,7 +499,8 @@ static void __init mm_init(void)
 
 asmlinkage __visible void __init start_kernel(void)
 {
-       char * command_line, *after_dashes;
+       char *command_line;
+       char *after_dashes;
        extern const struct kernel_param __start___param[], __stop___param[];
 
        /*
@@ -572,7 +572,8 @@ asmlinkage __visible void __init start_kernel(void)
         * fragile until we cpu_idle() for the first time.
         */
        preempt_disable();
-       if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
+       if (WARN(!irqs_disabled(),
+                "Interrupts were enabled *very* early, fixing it\n"))
                local_irq_disable();
        idr_init_cache();
        rcu_init();
index 89fc354156cb918342751f72e1017727877d86e0..7fc9f9f3a26b874c7fa558d22618273e9eb357d3 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -178,6 +178,7 @@ static void shm_rcu_free(struct rcu_head *head)
 
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
+       list_del(&s->shm_clist);
        ipc_rmid(&shm_ids(ns), &s->shm_perm);
 }
 
@@ -267,37 +268,6 @@ static void shm_close(struct vm_area_struct *vma)
        up_write(&shm_ids(ns).rwsem);
 }
 
-/* Called with ns->shm_ids(ns).rwsem locked */
-static int shm_try_destroy_current(int id, void *p, void *data)
-{
-       struct ipc_namespace *ns = data;
-       struct kern_ipc_perm *ipcp = p;
-       struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
-
-       if (shp->shm_creator != current)
-               return 0;
-
-       /*
-        * Mark it as orphaned to destroy the segment when
-        * kernel.shm_rmid_forced is changed.
-        * It is noop if the following shm_may_destroy() returns true.
-        */
-       shp->shm_creator = NULL;
-
-       /*
-        * Don't even try to destroy it.  If shm_rmid_forced=0 and IPC_RMID
-        * is not set, it shouldn't be deleted here.
-        */
-       if (!ns->shm_rmid_forced)
-               return 0;
-
-       if (shm_may_destroy(ns, shp)) {
-               shm_lock_by_ptr(shp);
-               shm_destroy(ns, shp);
-       }
-       return 0;
-}
-
 /* Called with ns->shm_ids(ns).rwsem locked */
 static int shm_try_destroy_orphaned(int id, void *p, void *data)
 {
@@ -329,18 +299,50 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
        up_write(&shm_ids(ns).rwsem);
 }
 
-
+/* Locking assumes this will only be called with task == current */
 void exit_shm(struct task_struct *task)
 {
        struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+       struct shmid_kernel *shp, *n;
 
-       if (shm_ids(ns).in_use == 0)
+       if (list_empty(&task->sysvshm.shm_clist))
                return;
 
-       /* Destroy all already created segments, but not mapped yet */
+       /*
+        * If kernel.shm_rmid_forced is not set then only keep track of
+        * which shmids are orphaned, so that a later set of the sysctl
+        * can clean them up.
+        */
+       if (!ns->shm_rmid_forced) {
+               down_read(&shm_ids(ns).rwsem);
+               list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
+                       shp->shm_creator = NULL;
+               /*
+                * Only under read lock but we are only called on current
+                * so no entry on the list will be shared.
+                */
+               list_del(&task->sysvshm.shm_clist);
+               up_read(&shm_ids(ns).rwsem);
+               return;
+       }
+
+       /*
+        * Destroy all already created segments, that were not yet mapped,
+        * and mark any mapped as orphan to cover the sysctl toggling.
+        * Destroy is skipped if shm_may_destroy() returns false.
+        */
        down_write(&shm_ids(ns).rwsem);
-       if (shm_ids(ns).in_use)
-               idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
+       list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
+               shp->shm_creator = NULL;
+
+               if (shm_may_destroy(ns, shp)) {
+                       shm_lock_by_ptr(shp);
+                       shm_destroy(ns, shp);
+               }
+       }
+
+       /* Remove the list head from any segments still attached. */
+       list_del(&task->sysvshm.shm_clist);
        up_write(&shm_ids(ns).rwsem);
 }
 
@@ -561,6 +563,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        shp->shm_nattch = 0;
        shp->shm_file = file;
        shp->shm_creator = current;
+       list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
 
        /*
         * shmid gets reported as "inode#" in /proc/pid/maps.
index 0026cf5317690c9df47d38222db5df16ab8e9bf1..dc5c77544fd69f6924adc25c3f4a8d3530708392 100644 (file)
@@ -105,7 +105,7 @@ targets += config_data.gz
 $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
        $(call if_changed,gzip)
 
-      filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;")
+      filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/basic/bin2c; echo "MAGIC_END;")
 targets += config_data.h
 $(obj)/config_data.h: $(obj)/config_data.gz FORCE
        $(call filechk,ikconfiggz)
index a1844f14c6d6502e3d519e367397a4d70296e7e5..51793520566fade30ec645d0cdd74d31f4c369ed 100644 (file)
@@ -141,12 +141,12 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
        if (acct->active) {
                if (act < 0) {
                        acct->active = 0;
-                       printk(KERN_INFO "Process accounting paused\n");
+                       pr_info("Process accounting paused\n");
                }
        } else {
                if (act > 0) {
                        acct->active = 1;
-                       printk(KERN_INFO "Process accounting resumed\n");
+                       pr_info("Process accounting resumed\n");
                }
        }
 
@@ -261,6 +261,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
 
        if (name) {
                struct filename *tmp = getname(name);
+
                if (IS_ERR(tmp))
                        return PTR_ERR(tmp);
                error = acct_on(tmp);
@@ -376,7 +377,7 @@ static comp_t encode_comp_t(unsigned long value)
        return exp;
 }
 
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
 /*
  * encode an u64 into a comp2_t (24 bits)
  *
@@ -389,7 +390,7 @@ static comp_t encode_comp_t(unsigned long value)
 #define MANTSIZE2       20                      /* 20 bit mantissa. */
 #define EXPSIZE2        5                       /* 5 bit base 2 exponent. */
 #define MAXFRACT2       ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
-#define MAXEXP2         ((1 <<EXPSIZE2) - 1)    /* Maximum exponent. */
+#define MAXEXP2         ((1 << EXPSIZE2) - 1)    /* Maximum exponent. */
 
 static comp2_t encode_comp2_t(u64 value)
 {
@@ -420,7 +421,7 @@ static comp2_t encode_comp2_t(u64 value)
 }
 #endif
 
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
 /*
  * encode an u64 into a 32 bit IEEE float
  */
@@ -429,8 +430,9 @@ static u32 encode_float(u64 value)
        unsigned exp = 190;
        unsigned u;
 
-       if (value==0) return 0;
-       while ((s64)value > 0){
+       if (value == 0)
+               return 0;
+       while ((s64)value > 0) {
                value <<= 1;
                exp--;
        }
@@ -486,16 +488,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
        run_time -= current->group_leader->start_time;
        /* convert nsec -> AHZ */
        elapsed = nsec_to_AHZ(run_time);
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
        ac.ac_etime = encode_float(elapsed);
 #else
        ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
-                              (unsigned long) elapsed : (unsigned long) -1l);
+                               (unsigned long) elapsed : (unsigned long) -1l);
 #endif
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
        {
                /* new enlarged etime field */
                comp2_t etime = encode_comp2_t(elapsed);
+
                ac.ac_etime_hi = etime >> 16;
                ac.ac_etime_lo = (u16) etime;
        }
@@ -505,15 +508,15 @@ static void do_acct_process(struct bsd_acct_struct *acct,
        /* we really need to bite the bullet and change layout */
        ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
        ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
-#if ACCT_VERSION==2
+#if ACCT_VERSION == 2
        ac.ac_ahz = AHZ;
 #endif
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
        /* backward-compatible 16 bit fields */
        ac.ac_uid16 = ac.ac_uid;
        ac.ac_gid16 = ac.ac_gid;
 #endif
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
        ac.ac_pid = task_tgid_nr_ns(current, ns);
        rcu_read_lock();
        ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
@@ -574,6 +577,7 @@ void acct_collect(long exitcode, int group_dead)
 
        if (group_dead && current->mm) {
                struct vm_area_struct *vma;
+
                down_read(&current->mm->mmap_sem);
                vma = current->mm->mmap;
                while (vma) {
index 9fd4246b04b8298ec608dbba4d9ee9997e105fbc..e1d1d1952bfa37eaafd87486e03555051e16283c 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/page-flags.h>
 #include <linux/mmzone.h>
 #include <linux/kbuild.h>
-#include <linux/page_cgroup.h>
 #include <linux/log2.h>
 #include <linux/spinlock_types.h>
 
@@ -18,7 +17,6 @@ void foo(void)
        /* The enum constants to put into include/generated/bounds.h */
        DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
        DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
-       DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
 #ifdef CONFIG_SMP
        DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
 #endif
index 6f3254e8c13750133db07340c1a07f173da683bf..1d0af8a2c6469bda46438dbd8383cbf535d65077 100644 (file)
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        /* For mmu_notifiers */
        const unsigned long mmun_start = addr;
        const unsigned long mmun_end   = addr + PAGE_SIZE;
+       struct mem_cgroup *memcg;
+
+       err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
+       if (err)
+               return err;
 
        /* For try_to_free_swap() and munlock_vma_page() below */
        lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        get_page(kpage);
        page_add_new_anon_rmap(kpage, vma, addr);
+       mem_cgroup_commit_charge(kpage, memcg, false);
+       lru_cache_add_active_or_unevictable(kpage, vma);
 
        if (!PageAnon(page)) {
                dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        err = 0;
  unlock:
+       mem_cgroup_cancel_charge(kpage, memcg);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
        unlock_page(page);
        return err;
@@ -315,18 +323,11 @@ retry:
        if (!new_page)
                goto put_old;
 
-       if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
-               goto put_new;
-
        __SetPageUptodate(new_page);
        copy_highpage(new_page, old_page);
        copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
        ret = __replace_page(vma, vaddr, old_page, new_page);
-       if (ret)
-               mem_cgroup_uncharge_page(new_page);
-
-put_new:
        page_cache_release(new_page);
 put_old:
        put_page(old_page);
index 88c6b3e425834e89943bb1a291a7a350333e3593..32c58f7433a3672c636687cf81c108eb0f20761e 100644 (file)
@@ -59,7 +59,7 @@
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 
-static void exit_mm(struct task_struct * tsk);
+static void exit_mm(struct task_struct *tsk);
 
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
@@ -151,7 +151,7 @@ static void __exit_signal(struct task_struct *tsk)
        spin_unlock(&sighand->siglock);
 
        __cleanup_sighand(sighand);
-       clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
+       clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
        if (group_dead) {
                flush_sigqueue(&sig->shared_pending);
                tty_kref_put(tty);
@@ -168,7 +168,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 }
 
 
-void release_task(struct task_struct * p)
+void release_task(struct task_struct *p)
 {
        struct task_struct *leader;
        int zap_leader;
@@ -192,7 +192,8 @@ repeat:
         */
        zap_leader = 0;
        leader = p->group_leader;
-       if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
+       if (leader != p && thread_group_empty(leader)
+                       && leader->exit_state == EXIT_ZOMBIE) {
                /*
                 * If we were the last child thread and the leader has
                 * exited already, and the leader's parent ignores SIGCHLD,
@@ -241,7 +242,8 @@ struct pid *session_of_pgrp(struct pid *pgrp)
  *
  * "I ask you, have you ever known what it is to be an orphan?"
  */
-static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
+static int will_become_orphaned_pgrp(struct pid *pgrp,
+                                       struct task_struct *ignored_task)
 {
        struct task_struct *p;
 
@@ -294,9 +296,9 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
        struct task_struct *ignored_task = tsk;
 
        if (!parent)
-                /* exit: our father is in a different pgrp than
-                 * we are and we were the only connection outside.
-                 */
+               /* exit: our father is in a different pgrp than
+                * we are and we were the only connection outside.
+                */
                parent = tsk->real_parent;
        else
                /* reparent: our child is in a different pgrp than
@@ -405,7 +407,7 @@ assign_new_owner:
  * Turn us into a lazy TLB process if we
  * aren't already..
  */
-static void exit_mm(struct task_struct * tsk)
+static void exit_mm(struct task_struct *tsk)
 {
        struct mm_struct *mm = tsk->mm;
        struct core_state *core_state;
@@ -425,6 +427,7 @@ static void exit_mm(struct task_struct * tsk)
        core_state = mm->core_state;
        if (core_state) {
                struct core_thread self;
+
                up_read(&mm->mmap_sem);
 
                self.task = tsk;
@@ -566,6 +569,7 @@ static void forget_original_parent(struct task_struct *father)
 
        list_for_each_entry_safe(p, n, &father->children, sibling) {
                struct task_struct *t = p;
+
                do {
                        t->real_parent = reaper;
                        if (t->parent == father) {
@@ -599,7 +603,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
        /*
         * This does two things:
         *
-        * A.  Make init inherit all the child processes
+        * A.  Make init inherit all the child processes
         * B.  Check to see if any process groups have become orphaned
         *      as a result of our exiting, and if they have any stopped
         *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
@@ -649,9 +653,8 @@ static void check_stack_usage(void)
 
        spin_lock(&low_water_lock);
        if (free < lowest_to_date) {
-               printk(KERN_WARNING "%s (%d) used greatest stack depth: "
-                               "%lu bytes left\n",
-                               current->comm, task_pid_nr(current), free);
+               pr_warn("%s (%d) used greatest stack depth: %lu bytes left\n",
+                       current->comm, task_pid_nr(current), free);
                lowest_to_date = free;
        }
        spin_unlock(&low_water_lock);
@@ -692,8 +695,7 @@ void do_exit(long code)
         * leave this task alone and wait for reboot.
         */
        if (unlikely(tsk->flags & PF_EXITING)) {
-               printk(KERN_ALERT
-                       "Fixing recursive fault but reboot is needed!\n");
+               pr_alert("Fixing recursive fault but reboot is needed!\n");
                /*
                 * We can do this unlocked here. The futex code uses
                 * this flag just to verify whether the pi state
@@ -717,9 +719,9 @@ void do_exit(long code)
        raw_spin_unlock_wait(&tsk->pi_lock);
 
        if (unlikely(in_atomic()))
-               printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
-                               current->comm, task_pid_nr(current),
-                               preempt_count());
+               pr_info("note: %s[%d] exited with preempt_count %d\n",
+                       current->comm, task_pid_nr(current),
+                       preempt_count());
 
        acct_update_integrals(tsk);
        /* sync mm's RSS info before statistics gathering */
@@ -837,7 +839,6 @@ void do_exit(long code)
        for (;;)
                cpu_relax();    /* For when BUG is null */
 }
-
 EXPORT_SYMBOL_GPL(do_exit);
 
 void complete_and_exit(struct completion *comp, long code)
@@ -847,7 +848,6 @@ void complete_and_exit(struct completion *comp, long code)
 
        do_exit(code);
 }
-
 EXPORT_SYMBOL(complete_and_exit);
 
 SYSCALL_DEFINE1(exit, int, error_code)
@@ -870,6 +870,7 @@ do_group_exit(int exit_code)
                exit_code = sig->group_exit_code;
        else if (!thread_group_empty(current)) {
                struct sighand_struct *const sighand = current->sighand;
+
                spin_lock_irq(&sighand->siglock);
                if (signal_group_exit(sig))
                        /* Another thread got here before we took the lock.  */
@@ -1034,9 +1035,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                 * as other threads in the parent group can be right
                 * here reaping other children at the same time.
                 *
-                * We use thread_group_cputime_adjusted() to get times for the thread
-                * group, which consolidates times for all threads in the
-                * group including the group leader.
+                * We use thread_group_cputime_adjusted() to get times for
+                * the thread group, which consolidates times for all threads
+                * in the group including the group leader.
                 */
                thread_group_cputime_adjusted(p, &tgutime, &tgstime);
                spin_lock_irq(&p->real_parent->sighand->siglock);
@@ -1418,6 +1419,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
 
        list_for_each_entry(p, &tsk->children, sibling) {
                int ret = wait_consider_task(wo, 0, p);
+
                if (ret)
                        return ret;
        }
@@ -1431,6 +1433,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 
        list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
                int ret = wait_consider_task(wo, 1, p);
+
                if (ret)
                        return ret;
        }
index fbd3497b221f781a47022192cd4e1f2fa74a5803..1380d8ace334be7f7d6b49d760ed5c6013e4c7bb 100644 (file)
@@ -374,12 +374,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
         */
        down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
-       mm->locked_vm = 0;
-       mm->mmap = NULL;
-       mm->vmacache_seqnum = 0;
-       mm->map_count = 0;
-       cpumask_clear(mm_cpumask(mm));
-       mm->mm_rb = RB_ROOT;
+       mm->total_vm = oldmm->total_vm;
+       mm->shared_vm = oldmm->shared_vm;
+       mm->exec_vm = oldmm->exec_vm;
+       mm->stack_vm = oldmm->stack_vm;
+
        rb_link = &mm->mm_rb.rb_node;
        rb_parent = NULL;
        pprev = &mm->mmap;
@@ -430,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                                atomic_dec(&inode->i_writecount);
                        mutex_lock(&mapping->i_mmap_mutex);
                        if (tmp->vm_flags & VM_SHARED)
-                               mapping->i_mmap_writable++;
+                               atomic_inc(&mapping->i_mmap_writable);
                        flush_dcache_mmap_lock(mapping);
                        /* insert tmp into the share list, just after mpnt */
                        if (unlikely(tmp->vm_flags & VM_NONLINEAR))
@@ -536,19 +535,37 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 }
 
+static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+       mm->owner = p;
+#endif
+}
+
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 {
+       mm->mmap = NULL;
+       mm->mm_rb = RB_ROOT;
+       mm->vmacache_seqnum = 0;
        atomic_set(&mm->mm_users, 1);
        atomic_set(&mm->mm_count, 1);
        init_rwsem(&mm->mmap_sem);
        INIT_LIST_HEAD(&mm->mmlist);
        mm->core_state = NULL;
        atomic_long_set(&mm->nr_ptes, 0);
+       mm->map_count = 0;
+       mm->locked_vm = 0;
+       mm->pinned_vm = 0;
        memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
        spin_lock_init(&mm->page_table_lock);
+       mm_init_cpumask(mm);
        mm_init_aio(mm);
        mm_init_owner(mm, p);
+       mmu_notifier_mm_init(mm);
        clear_tlb_flush_pending(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+       mm->pmd_huge_pte = NULL;
+#endif
 
        if (current->mm) {
                mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -558,11 +575,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
                mm->def_flags = 0;
        }
 
-       if (likely(!mm_alloc_pgd(mm))) {
-               mmu_notifier_mm_init(mm);
-               return mm;
-       }
+       if (mm_alloc_pgd(mm))
+               goto fail_nopgd;
+
+       if (init_new_context(p, mm))
+               goto fail_nocontext;
+
+       return mm;
 
+fail_nocontext:
+       mm_free_pgd(mm);
+fail_nopgd:
        free_mm(mm);
        return NULL;
 }
@@ -596,7 +619,6 @@ struct mm_struct *mm_alloc(void)
                return NULL;
 
        memset(mm, 0, sizeof(*mm));
-       mm_init_cpumask(mm);
        return mm_init(mm, current);
 }
 
@@ -828,17 +850,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
                goto fail_nomem;
 
        memcpy(mm, oldmm, sizeof(*mm));
-       mm_init_cpumask(mm);
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
-       mm->pmd_huge_pte = NULL;
-#endif
        if (!mm_init(mm, tsk))
                goto fail_nomem;
 
-       if (init_new_context(tsk, mm))
-               goto fail_nocontext;
-
        dup_mm_exe_file(oldmm, mm);
 
        err = dup_mmap(mm, oldmm);
@@ -860,15 +875,6 @@ free_pt:
 
 fail_nomem:
        return NULL;
-
-fail_nocontext:
-       /*
-        * If init_new_context() failed, we cannot use mmput() to free the mm
-        * because it calls destroy_context()
-        */
-       mm_free_pgd(mm);
-       free_mm(mm);
-       return NULL;
 }
 
 static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1140,13 +1146,6 @@ static void rt_mutex_init_task(struct task_struct *p)
 #endif
 }
 
-#ifdef CONFIG_MEMCG
-void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-       mm->owner = p;
-}
-#endif /* CONFIG_MEMCG */
-
 /*
  * Initialize POSIX timer handling for a single task.
  */
@@ -1346,10 +1345,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
 #endif
-#ifdef CONFIG_MEMCG
-       p->memcg_batch.do_batch = 0;
-       p->memcg_batch.memcg = NULL;
-#endif
 #ifdef CONFIG_BCACHE
        p->sequential_io        = 0;
        p->sequential_io_avg    = 0;
@@ -1367,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (retval)
                goto bad_fork_cleanup_policy;
        /* copy all the process information */
+       shm_init_task(p);
        retval = copy_semundo(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_audit;
@@ -1918,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
                         */
                        exit_sem(current);
                }
+               if (unshare_flags & CLONE_NEWIPC) {
+                       /* Orphan segments in old ns (see sem above). */
+                       exit_shm(current);
+                       shm_init_task(current);
+               }
 
                if (new_nsproxy)
                        switch_task_namespaces(current, new_nsproxy);
index 15ff01a7637912776f16f534e64d9e3f0d002afe..edf67c493a8e1132b79e24128c8cdea5ca85fe4d 100644 (file)
@@ -784,8 +784,7 @@ static __init int gcov_fs_init(void)
 
 err_remove:
        pr_err("init failed\n");
-       if (root_node.dentry)
-               debugfs_remove(root_node.dentry);
+       debugfs_remove(root_node.dentry);
 
        return rc;
 }
index cb0cf37dac3a4e0d3c4dd0562e18ad0046f0ff24..ae51670878457d9b0db2498ee032d5735933e96f 100644 (file)
@@ -364,7 +364,7 @@ static int __sprint_symbol(char *buffer, unsigned long address,
        address += symbol_offset;
        name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
        if (!name)
-               return sprintf(buffer, "0x%lx", address);
+               return sprintf(buffer, "0x%lx", address - symbol_offset);
 
        if (name != buffer)
                strcpy(buffer, name);
index 4b8f0c9258843246e9233b0a01f60455725feba9..0b49a0a5810200da0925350e3d55dd1fc70b8638 100644 (file)
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt)    "kexec: " fmt
+
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
@@ -40,6 +42,9 @@
 #include <asm/io.h>
 #include <asm/sections.h>
 
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+
 /* Per cpu memory for storing cpu states in case of system crash. */
 note_buf_t __percpu *crash_notes;
 
@@ -52,6 +57,15 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
 /* Flag to indicate we are going to kexec a new kernel */
 bool kexec_in_progress = false;
 
+/*
+ * Declare these symbols weak so that if architecture provides a purgatory,
+ * these will be overridden.
+ */
+char __weak kexec_purgatory[0];
+size_t __weak kexec_purgatory_size = 0;
+
+static int kexec_calculate_store_digests(struct kimage *image);
+
 /* Location of the reserved area for the crash kernel */
 struct resource crashk_res = {
        .name  = "Crash kernel",
@@ -125,45 +139,27 @@ static struct page *kimage_alloc_page(struct kimage *image,
                                       gfp_t gfp_mask,
                                       unsigned long dest);
 
-static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
-                          unsigned long nr_segments,
-                          struct kexec_segment __user *segments)
+static int copy_user_segment_list(struct kimage *image,
+                                 unsigned long nr_segments,
+                                 struct kexec_segment __user *segments)
 {
+       int ret;
        size_t segment_bytes;
-       struct kimage *image;
-       unsigned long i;
-       int result;
-
-       /* Allocate a controlling structure */
-       result = -ENOMEM;
-       image = kzalloc(sizeof(*image), GFP_KERNEL);
-       if (!image)
-               goto out;
-
-       image->head = 0;
-       image->entry = &image->head;
-       image->last_entry = &image->head;
-       image->control_page = ~0; /* By default this does not apply */
-       image->start = entry;
-       image->type = KEXEC_TYPE_DEFAULT;
-
-       /* Initialize the list of control pages */
-       INIT_LIST_HEAD(&image->control_pages);
-
-       /* Initialize the list of destination pages */
-       INIT_LIST_HEAD(&image->dest_pages);
-
-       /* Initialize the list of unusable pages */
-       INIT_LIST_HEAD(&image->unuseable_pages);
 
        /* Read in the segments */
        image->nr_segments = nr_segments;
        segment_bytes = nr_segments * sizeof(*segments);
-       result = copy_from_user(image->segment, segments, segment_bytes);
-       if (result) {
-               result = -EFAULT;
-               goto out;
-       }
+       ret = copy_from_user(image->segment, segments, segment_bytes);
+       if (ret)
+               ret = -EFAULT;
+
+       return ret;
+}
+
+static int sanity_check_segment_list(struct kimage *image)
+{
+       int result, i;
+       unsigned long nr_segments = image->nr_segments;
 
        /*
         * Verify we have good destination addresses.  The caller is
@@ -185,9 +181,9 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
                mstart = image->segment[i].mem;
                mend   = mstart + image->segment[i].memsz;
                if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
-                       goto out;
+                       return result;
                if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
-                       goto out;
+                       return result;
        }
 
        /* Verify our destination addresses do not overlap.
@@ -208,7 +204,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
                        pend   = pstart + image->segment[j].memsz;
                        /* Do the segments overlap ? */
                        if ((mend > pstart) && (mstart < pend))
-                               goto out;
+                               return result;
                }
        }
 
@@ -220,130 +216,401 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
        result = -EINVAL;
        for (i = 0; i < nr_segments; i++) {
                if (image->segment[i].bufsz > image->segment[i].memsz)
-                       goto out;
+                       return result;
        }
 
-       result = 0;
-out:
-       if (result == 0)
-               *rimage = image;
-       else
-               kfree(image);
+       /*
+        * Verify we have good destination addresses.  Normally
+        * the caller is responsible for making certain we don't
+        * attempt to load the new image into invalid or reserved
+        * areas of RAM.  But crash kernels are preloaded into a
+        * reserved area of ram.  We must ensure the addresses
+        * are in the reserved area otherwise preloading the
+        * kernel could corrupt things.
+        */
 
-       return result;
+       if (image->type == KEXEC_TYPE_CRASH) {
+               result = -EADDRNOTAVAIL;
+               for (i = 0; i < nr_segments; i++) {
+                       unsigned long mstart, mend;
+
+                       mstart = image->segment[i].mem;
+                       mend = mstart + image->segment[i].memsz - 1;
+                       /* Ensure we are within the crash kernel limits */
+                       if ((mstart < crashk_res.start) ||
+                           (mend > crashk_res.end))
+                               return result;
+               }
+       }
+
+       return 0;
+}
 
+static struct kimage *do_kimage_alloc_init(void)
+{
+       struct kimage *image;
+
+       /* Allocate a controlling structure */
+       image = kzalloc(sizeof(*image), GFP_KERNEL);
+       if (!image)
+               return NULL;
+
+       image->head = 0;
+       image->entry = &image->head;
+       image->last_entry = &image->head;
+       image->control_page = ~0; /* By default this does not apply */
+       image->type = KEXEC_TYPE_DEFAULT;
+
+       /* Initialize the list of control pages */
+       INIT_LIST_HEAD(&image->control_pages);
+
+       /* Initialize the list of destination pages */
+       INIT_LIST_HEAD(&image->dest_pages);
+
+       /* Initialize the list of unusable pages */
+       INIT_LIST_HEAD(&image->unusable_pages);
+
+       return image;
 }
 
 static void kimage_free_page_list(struct list_head *list);
 
-static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
-                               unsigned long nr_segments,
-                               struct kexec_segment __user *segments)
+static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
+                            unsigned long nr_segments,
+                            struct kexec_segment __user *segments,
+                            unsigned long flags)
 {
-       int result;
+       int ret;
        struct kimage *image;
+       bool kexec_on_panic = flags & KEXEC_ON_CRASH;
+
+       if (kexec_on_panic) {
+               /* Verify we have a valid entry point */
+               if ((entry < crashk_res.start) || (entry > crashk_res.end))
+                       return -EADDRNOTAVAIL;
+       }
 
        /* Allocate and initialize a controlling structure */
-       image = NULL;
-       result = do_kimage_alloc(&image, entry, nr_segments, segments);
-       if (result)
-               goto out;
+       image = do_kimage_alloc_init();
+       if (!image)
+               return -ENOMEM;
+
+       image->start = entry;
+
+       ret = copy_user_segment_list(image, nr_segments, segments);
+       if (ret)
+               goto out_free_image;
+
+       ret = sanity_check_segment_list(image);
+       if (ret)
+               goto out_free_image;
+
+        /* Enable the special crash kernel control page allocation policy. */
+       if (kexec_on_panic) {
+               image->control_page = crashk_res.start;
+               image->type = KEXEC_TYPE_CRASH;
+       }
 
        /*
         * Find a location for the control code buffer, and add it
         * the vector of segments so that it's pages will also be
         * counted as destination pages.
         */
-       result = -ENOMEM;
+       ret = -ENOMEM;
        image->control_code_page = kimage_alloc_control_pages(image,
                                           get_order(KEXEC_CONTROL_PAGE_SIZE));
        if (!image->control_code_page) {
                pr_err("Could not allocate control_code_buffer\n");
-               goto out_free;
+               goto out_free_image;
        }
 
-       image->swap_page = kimage_alloc_control_pages(image, 0);
-       if (!image->swap_page) {
-               pr_err("Could not allocate swap buffer\n");
-               goto out_free;
+       if (!kexec_on_panic) {
+               image->swap_page = kimage_alloc_control_pages(image, 0);
+               if (!image->swap_page) {
+                       pr_err("Could not allocate swap buffer\n");
+                       goto out_free_control_pages;
+               }
        }
 
        *rimage = image;
        return 0;
-
-out_free:
+out_free_control_pages:
        kimage_free_page_list(&image->control_pages);
+out_free_image:
        kfree(image);
-out:
-       return result;
+       return ret;
 }
 
-static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
-                               unsigned long nr_segments,
-                               struct kexec_segment __user *segments)
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
 {
-       int result;
-       struct kimage *image;
-       unsigned long i;
+       struct fd f = fdget(fd);
+       int ret;
+       struct kstat stat;
+       loff_t pos;
+       ssize_t bytes = 0;
 
-       image = NULL;
-       /* Verify we have a valid entry point */
-       if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
-               result = -EADDRNOTAVAIL;
+       if (!f.file)
+               return -EBADF;
+
+       ret = vfs_getattr(&f.file->f_path, &stat);
+       if (ret)
+               goto out;
+
+       if (stat.size > INT_MAX) {
+               ret = -EFBIG;
                goto out;
        }
 
-       /* Allocate and initialize a controlling structure */
-       result = do_kimage_alloc(&image, entry, nr_segments, segments);
-       if (result)
+       /* Don't hand 0 to vmalloc, it whines. */
+       if (stat.size == 0) {
+               ret = -EINVAL;
                goto out;
+       }
 
-       /* Enable the special crash kernel control page
-        * allocation policy.
-        */
-       image->control_page = crashk_res.start;
-       image->type = KEXEC_TYPE_CRASH;
+       *buf = vmalloc(stat.size);
+       if (!*buf) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
-       /*
-        * Verify we have good destination addresses.  Normally
-        * the caller is responsible for making certain we don't
-        * attempt to load the new image into invalid or reserved
-        * areas of RAM.  But crash kernels are preloaded into a
-        * reserved area of ram.  We must ensure the addresses
-        * are in the reserved area otherwise preloading the
-        * kernel could corrupt things.
-        */
-       result = -EADDRNOTAVAIL;
-       for (i = 0; i < nr_segments; i++) {
-               unsigned long mstart, mend;
+       pos = 0;
+       while (pos < stat.size) {
+               bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+                                   stat.size - pos);
+               if (bytes < 0) {
+                       vfree(*buf);
+                       ret = bytes;
+                       goto out;
+               }
 
-               mstart = image->segment[i].mem;
-               mend = mstart + image->segment[i].memsz - 1;
-               /* Ensure we are within the crash kernel limits */
-               if ((mstart < crashk_res.start) || (mend > crashk_res.end))
-                       goto out_free;
+               if (bytes == 0)
+                       break;
+               pos += bytes;
+       }
+
+       if (pos != stat.size) {
+               ret = -EBADF;
+               vfree(*buf);
+               goto out;
        }
 
+       *buf_len = pos;
+out:
+       fdput(f);
+       return ret;
+}
+
+/* Architectures can provide this probe function */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+                                        unsigned long buf_len)
+{
+       return -ENOEXEC;
+}
+
+void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+{
+       return ERR_PTR(-ENOEXEC);
+}
+
+void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+}
+
+int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+                                       unsigned long buf_len)
+{
+       return -EKEYREJECTED;
+}
+
+/* Apply relocations of type RELA */
+int __weak
+arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+                                unsigned int relsec)
+{
+       pr_err("RELA relocation unsupported.\n");
+       return -ENOEXEC;
+}
+
+/* Apply relocations of type REL */
+int __weak
+arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+                            unsigned int relsec)
+{
+       pr_err("REL relocation unsupported.\n");
+       return -ENOEXEC;
+}
+
+/*
+ * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere.
+ */
+static void kimage_file_post_load_cleanup(struct kimage *image)
+{
+       struct purgatory_info *pi = &image->purgatory_info;
+
+       vfree(image->kernel_buf);
+       image->kernel_buf = NULL;
+
+       vfree(image->initrd_buf);
+       image->initrd_buf = NULL;
+
+       kfree(image->cmdline_buf);
+       image->cmdline_buf = NULL;
+
+       vfree(pi->purgatory_buf);
+       pi->purgatory_buf = NULL;
+
+       vfree(pi->sechdrs);
+       pi->sechdrs = NULL;
+
+       /* See if architecture has anything to cleanup post load */
+       arch_kimage_file_post_load_cleanup(image);
+
        /*
-        * Find a location for the control code buffer, and add
-        * the vector of segments so that it's pages will also be
-        * counted as destination pages.
+        * Above call should have called into bootloader to free up
+        * any data stored in kimage->image_loader_data. It should
+        * be ok now to free it up.
         */
-       result = -ENOMEM;
+       kfree(image->image_loader_data);
+       image->image_loader_data = NULL;
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int
+kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
+                            const char __user *cmdline_ptr,
+                            unsigned long cmdline_len, unsigned flags)
+{
+       int ret = 0;
+       void *ldata;
+
+       ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+                               &image->kernel_buf_len);
+       if (ret)
+               return ret;
+
+       /* Call arch image probe handlers */
+       ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+                                           image->kernel_buf_len);
+
+       if (ret)
+               goto out;
+
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+       ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+                                          image->kernel_buf_len);
+       if (ret) {
+               pr_debug("kernel signature verification failed.\n");
+               goto out;
+       }
+       pr_debug("kernel signature verification successful.\n");
+#endif
+       /* It is possible that there no initramfs is being loaded */
+       if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
+               ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+                                       &image->initrd_buf_len);
+               if (ret)
+                       goto out;
+       }
+
+       if (cmdline_len) {
+               image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
+               if (!image->cmdline_buf) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
+                                    cmdline_len);
+               if (ret) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+
+               image->cmdline_buf_len = cmdline_len;
+
+               /* command line should be a string with last byte null */
+               if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+                       ret = -EINVAL;
+                       goto out;
+               }
+       }
+
+       /* Call arch image load handlers */
+       ldata = arch_kexec_kernel_image_load(image);
+
+       if (IS_ERR(ldata)) {
+               ret = PTR_ERR(ldata);
+               goto out;
+       }
+
+       image->image_loader_data = ldata;
+out:
+       /* In case of error, free up all allocated memory in this function */
+       if (ret)
+               kimage_file_post_load_cleanup(image);
+       return ret;
+}
+
+static int
+kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
+                      int initrd_fd, const char __user *cmdline_ptr,
+                      unsigned long cmdline_len, unsigned long flags)
+{
+       int ret;
+       struct kimage *image;
+       bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
+
+       image = do_kimage_alloc_init();
+       if (!image)
+               return -ENOMEM;
+
+       image->file_mode = 1;
+
+       if (kexec_on_panic) {
+               /* Enable special crash kernel control page alloc policy. */
+               image->control_page = crashk_res.start;
+               image->type = KEXEC_TYPE_CRASH;
+       }
+
+       ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+                                          cmdline_ptr, cmdline_len, flags);
+       if (ret)
+               goto out_free_image;
+
+       ret = sanity_check_segment_list(image);
+       if (ret)
+               goto out_free_post_load_bufs;
+
+       ret = -ENOMEM;
        image->control_code_page = kimage_alloc_control_pages(image,
                                           get_order(KEXEC_CONTROL_PAGE_SIZE));
        if (!image->control_code_page) {
                pr_err("Could not allocate control_code_buffer\n");
-               goto out_free;
+               goto out_free_post_load_bufs;
+       }
+
+       if (!kexec_on_panic) {
+               image->swap_page = kimage_alloc_control_pages(image, 0);
+               if (!image->swap_page) {
+                       pr_err(KERN_ERR "Could not allocate swap buffer\n");
+                       goto out_free_control_pages;
+               }
        }
 
        *rimage = image;
        return 0;
-
-out_free:
+out_free_control_pages:
+       kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+       kimage_file_post_load_cleanup(image);
+out_free_image:
        kfree(image);
-out:
-       return result;
+       return ret;
 }
 
 static int kimage_is_destination_range(struct kimage *image,
@@ -609,7 +876,7 @@ static void kimage_free_extra_pages(struct kimage *image)
        kimage_free_page_list(&image->dest_pages);
 
        /* Walk through and free any unusable pages I have cached */
-       kimage_free_page_list(&image->unuseable_pages);
+       kimage_free_page_list(&image->unusable_pages);
 
 }
 static void kimage_terminate(struct kimage *image)
@@ -663,6 +930,14 @@ static void kimage_free(struct kimage *image)
 
        /* Free the kexec control pages... */
        kimage_free_page_list(&image->control_pages);
+
+       /*
+        * Free up any temporary buffers allocated. This might hit if
+        * error occurred much later after buffer allocation.
+        */
+       if (image->file_mode)
+               kimage_file_post_load_cleanup(image);
+
        kfree(image);
 }
 
@@ -732,7 +1007,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
                /* If the page cannot be used file it away */
                if (page_to_pfn(page) >
                                (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
-                       list_add(&page->lru, &image->unuseable_pages);
+                       list_add(&page->lru, &image->unusable_pages);
                        continue;
                }
                addr = page_to_pfn(page) << PAGE_SHIFT;
@@ -791,10 +1066,14 @@ static int kimage_load_normal_segment(struct kimage *image,
        unsigned long maddr;
        size_t ubytes, mbytes;
        int result;
-       unsigned char __user *buf;
+       unsigned char __user *buf = NULL;
+       unsigned char *kbuf = NULL;
 
        result = 0;
-       buf = segment->buf;
+       if (image->file_mode)
+               kbuf = segment->kbuf;
+       else
+               buf = segment->buf;
        ubytes = segment->bufsz;
        mbytes = segment->memsz;
        maddr = segment->mem;
@@ -826,7 +1105,11 @@ static int kimage_load_normal_segment(struct kimage *image,
                                PAGE_SIZE - (maddr & ~PAGE_MASK));
                uchunk = min(ubytes, mchunk);
 
-               result = copy_from_user(ptr, buf, uchunk);
+               /* For file based kexec, source pages are in kernel memory */
+               if (image->file_mode)
+                       memcpy(ptr, kbuf, uchunk);
+               else
+                       result = copy_from_user(ptr, buf, uchunk);
                kunmap(page);
                if (result) {
                        result = -EFAULT;
@@ -834,7 +1117,10 @@ static int kimage_load_normal_segment(struct kimage *image,
                }
                ubytes -= uchunk;
                maddr  += mchunk;
-               buf    += mchunk;
+               if (image->file_mode)
+                       kbuf += mchunk;
+               else
+                       buf += mchunk;
                mbytes -= mchunk;
        }
 out:
@@ -851,10 +1137,14 @@ static int kimage_load_crash_segment(struct kimage *image,
        unsigned long maddr;
        size_t ubytes, mbytes;
        int result;
-       unsigned char __user *buf;
+       unsigned char __user *buf = NULL;
+       unsigned char *kbuf = NULL;
 
        result = 0;
-       buf = segment->buf;
+       if (image->file_mode)
+               kbuf = segment->kbuf;
+       else
+               buf = segment->buf;
        ubytes = segment->bufsz;
        mbytes = segment->memsz;
        maddr = segment->mem;
@@ -877,7 +1167,12 @@ static int kimage_load_crash_segment(struct kimage *image,
                        /* Zero the trailing part of the page */
                        memset(ptr + uchunk, 0, mchunk - uchunk);
                }
-               result = copy_from_user(ptr, buf, uchunk);
+
+               /* For file based kexec, source pages are in kernel memory */
+               if (image->file_mode)
+                       memcpy(ptr, kbuf, uchunk);
+               else
+                       result = copy_from_user(ptr, buf, uchunk);
                kexec_flush_icache_page(page);
                kunmap(page);
                if (result) {
@@ -886,7 +1181,10 @@ static int kimage_load_crash_segment(struct kimage *image,
                }
                ubytes -= uchunk;
                maddr  += mchunk;
-               buf    += mchunk;
+               if (image->file_mode)
+                       kbuf += mchunk;
+               else
+                       buf += mchunk;
                mbytes -= mchunk;
        }
 out:
@@ -986,16 +1284,16 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 
                /* Loading another kernel to reboot into */
                if ((flags & KEXEC_ON_CRASH) == 0)
-                       result = kimage_normal_alloc(&image, entry,
-                                                       nr_segments, segments);
+                       result = kimage_alloc_init(&image, entry, nr_segments,
+                                                  segments, flags);
                /* Loading another kernel to switch to if this one crashes */
                else if (flags & KEXEC_ON_CRASH) {
                        /* Free any current crash dump kernel before
                         * we corrupt it.
                         */
                        kimage_free(xchg(&kexec_crash_image, NULL));
-                       result = kimage_crash_alloc(&image, entry,
-                                                    nr_segments, segments);
+                       result = kimage_alloc_init(&image, entry, nr_segments,
+                                                  segments, flags);
                        crash_map_reserved_pages();
                }
                if (result)
@@ -1077,30 +1375,106 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
 }
 #endif
 
-void crash_kexec(struct pt_regs *regs)
+SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
+               unsigned long, cmdline_len, const char __user *, cmdline_ptr,
+               unsigned long, flags)
 {
-       /* Take the kexec_mutex here to prevent sys_kexec_load
-        * running on one cpu from replacing the crash kernel
-        * we are using after a panic on a different cpu.
-        *
-        * If the crash kernel was not located in a fixed area
-        * of memory the xchg(&kexec_crash_image) would be
-        * sufficient.  But since I reuse the memory...
-        */
-       if (mutex_trylock(&kexec_mutex)) {
-               if (kexec_crash_image) {
-                       struct pt_regs fixed_regs;
-
-                       crash_setup_regs(&fixed_regs, regs);
-                       crash_save_vmcoreinfo();
-                       machine_crash_shutdown(&fixed_regs);
-                       machine_kexec(kexec_crash_image);
-               }
-               mutex_unlock(&kexec_mutex);
-       }
-}
+       int ret = 0, i;
+       struct kimage **dest_image, *image;
 
-size_t crash_get_memory_size(void)
+       /* We only trust the superuser with rebooting the system. */
+       if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+               return -EPERM;
+
+       /* Make sure we have a legal set of flags */
+       if (flags != (flags & KEXEC_FILE_FLAGS))
+               return -EINVAL;
+
+       image = NULL;
+
+       if (!mutex_trylock(&kexec_mutex))
+               return -EBUSY;
+
+       dest_image = &kexec_image;
+       if (flags & KEXEC_FILE_ON_CRASH)
+               dest_image = &kexec_crash_image;
+
+       if (flags & KEXEC_FILE_UNLOAD)
+               goto exchange;
+
+       /*
+        * In case of crash, new kernel gets loaded in reserved region. It is
+        * same memory where old crash kernel might be loaded. Free any
+        * current crash dump kernel before we corrupt it.
+        */
+       if (flags & KEXEC_FILE_ON_CRASH)
+               kimage_free(xchg(&kexec_crash_image, NULL));
+
+       ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+                                    cmdline_len, flags);
+       if (ret)
+               goto out;
+
+       ret = machine_kexec_prepare(image);
+       if (ret)
+               goto out;
+
+       ret = kexec_calculate_store_digests(image);
+       if (ret)
+               goto out;
+
+       for (i = 0; i < image->nr_segments; i++) {
+               struct kexec_segment *ksegment;
+
+               ksegment = &image->segment[i];
+               pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+                        i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+                        ksegment->memsz);
+
+               ret = kimage_load_segment(image, &image->segment[i]);
+               if (ret)
+                       goto out;
+       }
+
+       kimage_terminate(image);
+
+       /*
+        * Free up any temporary buffers allocated which are not needed
+        * after image has been loaded
+        */
+       kimage_file_post_load_cleanup(image);
+exchange:
+       image = xchg(dest_image, image);
+out:
+       mutex_unlock(&kexec_mutex);
+       kimage_free(image);
+       return ret;
+}
+
+void crash_kexec(struct pt_regs *regs)
+{
+       /* Take the kexec_mutex here to prevent sys_kexec_load
+        * running on one cpu from replacing the crash kernel
+        * we are using after a panic on a different cpu.
+        *
+        * If the crash kernel was not located in a fixed area
+        * of memory the xchg(&kexec_crash_image) would be
+        * sufficient.  But since I reuse the memory...
+        */
+       if (mutex_trylock(&kexec_mutex)) {
+               if (kexec_crash_image) {
+                       struct pt_regs fixed_regs;
+
+                       crash_setup_regs(&fixed_regs, regs);
+                       crash_save_vmcoreinfo();
+                       machine_crash_shutdown(&fixed_regs);
+                       machine_kexec(kexec_crash_image);
+               }
+               mutex_unlock(&kexec_mutex);
+       }
+}
+
+size_t crash_get_memory_size(void)
 {
        size_t size = 0;
        mutex_lock(&kexec_mutex);
@@ -1632,6 +2006,683 @@ static int __init crash_save_vmcoreinfo_init(void)
 
 subsys_initcall(crash_save_vmcoreinfo_init);
 
+static int __kexec_add_segment(struct kimage *image, char *buf,
+                              unsigned long bufsz, unsigned long mem,
+                              unsigned long memsz)
+{
+       struct kexec_segment *ksegment;
+
+       ksegment = &image->segment[image->nr_segments];
+       ksegment->kbuf = buf;
+       ksegment->bufsz = bufsz;
+       ksegment->mem = mem;
+       ksegment->memsz = memsz;
+       image->nr_segments++;
+
+       return 0;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+                                   struct kexec_buf *kbuf)
+{
+       struct kimage *image = kbuf->image;
+       unsigned long temp_start, temp_end;
+
+       temp_end = min(end, kbuf->buf_max);
+       temp_start = temp_end - kbuf->memsz;
+
+       do {
+               /* align down start */
+               temp_start = temp_start & (~(kbuf->buf_align - 1));
+
+               if (temp_start < start || temp_start < kbuf->buf_min)
+                       return 0;
+
+               temp_end = temp_start + kbuf->memsz - 1;
+
+               /*
+                * Make sure this does not conflict with any of existing
+                * segments
+                */
+               if (kimage_is_destination_range(image, temp_start, temp_end)) {
+                       temp_start = temp_start - PAGE_SIZE;
+                       continue;
+               }
+
+               /* We found a suitable memory range */
+               break;
+       } while (1);
+
+       /* If we are here, we found a suitable memory range */
+       __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+                           kbuf->memsz);
+
+       /* Success, stop navigating through remaining System RAM ranges */
+       return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+                                    struct kexec_buf *kbuf)
+{
+       struct kimage *image = kbuf->image;
+       unsigned long temp_start, temp_end;
+
+       temp_start = max(start, kbuf->buf_min);
+
+       do {
+               temp_start = ALIGN(temp_start, kbuf->buf_align);
+               temp_end = temp_start + kbuf->memsz - 1;
+
+               if (temp_end > end || temp_end > kbuf->buf_max)
+                       return 0;
+               /*
+                * Make sure this does not conflict with any of existing
+                * segments
+                */
+               if (kimage_is_destination_range(image, temp_start, temp_end)) {
+                       temp_start = temp_start + PAGE_SIZE;
+                       continue;
+               }
+
+               /* We found a suitable memory range */
+               break;
+       } while (1);
+
+       /* If we are here, we found a suitable memory range */
+       __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+                           kbuf->memsz);
+
+       /* Success, stop navigating through remaining System RAM ranges */
+       return 1;
+}
+
+static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+{
+       struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+       unsigned long sz = end - start + 1;
+
+       /* Returning 0 will take to next memory range */
+       if (sz < kbuf->memsz)
+               return 0;
+
+       if (end < kbuf->buf_min || start > kbuf->buf_max)
+               return 0;
+
+       /*
+        * Allocate memory top down with-in ram range. Otherwise bottom up
+        * allocation.
+        */
+       if (kbuf->top_down)
+               return locate_mem_hole_top_down(start, end, kbuf);
+       return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper function for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
+                    unsigned long memsz, unsigned long buf_align,
+                    unsigned long buf_min, unsigned long buf_max,
+                    bool top_down, unsigned long *load_addr)
+{
+
+       struct kexec_segment *ksegment;
+       struct kexec_buf buf, *kbuf;
+       int ret;
+
+       /* Currently adding segment this way is allowed only in file mode */
+       if (!image->file_mode)
+               return -EINVAL;
+
+       if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+               return -EINVAL;
+
+       /*
+        * Make sure we are not trying to add buffer after allocating
+        * control pages. All segments need to be placed first before
+        * any control pages are allocated. As control page allocation
+        * logic goes through list of segments to make sure there are
+        * no destination overlaps.
+        */
+       if (!list_empty(&image->control_pages)) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       memset(&buf, 0, sizeof(struct kexec_buf));
+       kbuf = &buf;
+       kbuf->image = image;
+       kbuf->buffer = buffer;
+       kbuf->bufsz = bufsz;
+
+       kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+       kbuf->buf_align = max(buf_align, PAGE_SIZE);
+       kbuf->buf_min = buf_min;
+       kbuf->buf_max = buf_max;
+       kbuf->top_down = top_down;
+
+       /* Walk the RAM ranges and allocate a suitable range for the buffer */
+       if (image->type == KEXEC_TYPE_CRASH)
+               ret = walk_iomem_res("Crash kernel",
+                                    IORESOURCE_MEM | IORESOURCE_BUSY,
+                                    crashk_res.start, crashk_res.end, kbuf,
+                                    locate_mem_hole_callback);
+       else
+               ret = walk_system_ram_res(0, -1, kbuf,
+                                         locate_mem_hole_callback);
+       if (ret != 1) {
+               /* A suitable memory range could not be found for buffer */
+               return -EADDRNOTAVAIL;
+       }
+
+       /* Found a suitable memory range */
+       ksegment = &image->segment[image->nr_segments - 1];
+       *load_addr = ksegment->mem;
+       return 0;
+}
+
+/* Calculate and store the digest of segments */
+static int kexec_calculate_store_digests(struct kimage *image)
+{
+       struct crypto_shash *tfm;
+       struct shash_desc *desc;
+       int ret = 0, i, j, zero_buf_sz, sha_region_sz;
+       size_t desc_size, nullsz;
+       char *digest;
+       void *zero_buf;
+       struct kexec_sha_region *sha_regions;
+       struct purgatory_info *pi = &image->purgatory_info;
+
+       zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
+       zero_buf_sz = PAGE_SIZE;
+
+       tfm = crypto_alloc_shash("sha256", 0, 0);
+       if (IS_ERR(tfm)) {
+               ret = PTR_ERR(tfm);
+               goto out;
+       }
+
+       desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+       desc = kzalloc(desc_size, GFP_KERNEL);
+       if (!desc) {
+               ret = -ENOMEM;
+               goto out_free_tfm;
+       }
+
+       sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
+       sha_regions = vzalloc(sha_region_sz);
+       if (!sha_regions)
+               goto out_free_desc;
+
+       desc->tfm   = tfm;
+       desc->flags = 0;
+
+       ret = crypto_shash_init(desc);
+       if (ret < 0)
+               goto out_free_sha_regions;
+
+       digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
+       if (!digest) {
+               ret = -ENOMEM;
+               goto out_free_sha_regions;
+       }
+
+       for (j = i = 0; i < image->nr_segments; i++) {
+               struct kexec_segment *ksegment;
+
+               ksegment = &image->segment[i];
+               /*
+                * Skip purgatory as it will be modified once we put digest
+                * info in purgatory.
+                */
+               if (ksegment->kbuf == pi->purgatory_buf)
+                       continue;
+
+               ret = crypto_shash_update(desc, ksegment->kbuf,
+                                         ksegment->bufsz);
+               if (ret)
+                       break;
+
+               /*
+                * Assume rest of the buffer is filled with zero and
+                * update digest accordingly.
+                */
+               nullsz = ksegment->memsz - ksegment->bufsz;
+               while (nullsz) {
+                       unsigned long bytes = nullsz;
+
+                       if (bytes > zero_buf_sz)
+                               bytes = zero_buf_sz;
+                       ret = crypto_shash_update(desc, zero_buf, bytes);
+                       if (ret)
+                               break;
+                       nullsz -= bytes;
+               }
+
+               if (ret)
+                       break;
+
+               sha_regions[j].start = ksegment->mem;
+               sha_regions[j].len = ksegment->memsz;
+               j++;
+       }
+
+       if (!ret) {
+               ret = crypto_shash_final(desc, digest);
+               if (ret)
+                       goto out_free_digest;
+               ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
+                                               sha_regions, sha_region_sz, 0);
+               if (ret)
+                       goto out_free_digest;
+
+               ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
+                                               digest, SHA256_DIGEST_SIZE, 0);
+               if (ret)
+                       goto out_free_digest;
+       }
+
+out_free_digest:
+       kfree(digest);
+out_free_sha_regions:
+       vfree(sha_regions);
+out_free_desc:
+       kfree(desc);
+out_free_tfm:
+       kfree(tfm);
+out:
+       return ret;
+}
+
+/* Actually load purgatory. Lot of code taken from kexec-tools */
+static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
+                                 unsigned long max, int top_down)
+{
+       struct purgatory_info *pi = &image->purgatory_info;
+       unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
+       unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+       unsigned char *buf_addr, *src;
+       int i, ret = 0, entry_sidx = -1;
+       const Elf_Shdr *sechdrs_c;
+       Elf_Shdr *sechdrs = NULL;
+       void *purgatory_buf = NULL;
+
+       /*
+        * sechdrs_c points to section headers in purgatory and are read
+        * only. No modifications allowed.
+        */
+       sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
+
+       /*
+        * We can not modify sechdrs_c[] and its fields. It is read only.
+        * Copy it over to a local copy where one can store some temporary
+        * data and free it at the end. We need to modify ->sh_addr and
+        * ->sh_offset fields to keep track of permanent and temporary
+        * locations of sections.
+        */
+       sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+       if (!sechdrs)
+               return -ENOMEM;
+
+       memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+
+       /*
+        * We seem to have multiple copies of sections. First copy is which
+        * is embedded in kernel in read only section. Some of these sections
+        * will be copied to a temporary buffer and relocated. And these
+        * sections will finally be copied to their final destination at
+        * segment load time.
+        *
+        * Use ->sh_offset to reflect section address in memory. It will
+        * point to original read only copy if section is not allocatable.
+        * Otherwise it will point to temporary copy which will be relocated.
+        *
+        * Use ->sh_addr to contain final address of the section where it
+        * will go during execution time.
+        */
+       for (i = 0; i < pi->ehdr->e_shnum; i++) {
+               if (sechdrs[i].sh_type == SHT_NOBITS)
+                       continue;
+
+               sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
+                                               sechdrs[i].sh_offset;
+       }
+
+       /*
+        * Identify entry point section and make entry relative to section
+        * start.
+        */
+       entry = pi->ehdr->e_entry;
+       for (i = 0; i < pi->ehdr->e_shnum; i++) {
+               if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+                       continue;
+
+               if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
+                       continue;
+
+               /* Make entry section relative */
+               if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
+                   ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
+                    pi->ehdr->e_entry)) {
+                       entry_sidx = i;
+                       entry -= sechdrs[i].sh_addr;
+                       break;
+               }
+       }
+
+       /* Determine how much memory is needed to load relocatable object. */
+       buf_align = 1;
+       bss_align = 1;
+       buf_sz = 0;
+       bss_sz = 0;
+
+       for (i = 0; i < pi->ehdr->e_shnum; i++) {
+               if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+                       continue;
+
+               align = sechdrs[i].sh_addralign;
+               if (sechdrs[i].sh_type != SHT_NOBITS) {
+                       if (buf_align < align)
+                               buf_align = align;
+                       buf_sz = ALIGN(buf_sz, align);
+                       buf_sz += sechdrs[i].sh_size;
+               } else {
+                       /* bss section */
+                       if (bss_align < align)
+                               bss_align = align;
+                       bss_sz = ALIGN(bss_sz, align);
+                       bss_sz += sechdrs[i].sh_size;
+               }
+       }
+
+       /* Determine the bss padding required to align bss properly */
+       bss_pad = 0;
+       if (buf_sz & (bss_align - 1))
+               bss_pad = bss_align - (buf_sz & (bss_align - 1));
+
+       memsz = buf_sz + bss_pad + bss_sz;
+
+       /* Allocate buffer for purgatory */
+       purgatory_buf = vzalloc(buf_sz);
+       if (!purgatory_buf) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       if (buf_align < bss_align)
+               buf_align = bss_align;
+
+       /* Add buffer to segment list */
+       ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
+                               buf_align, min, max, top_down,
+                               &pi->purgatory_load_addr);
+       if (ret)
+               goto out;
+
+       /* Load SHF_ALLOC sections */
+       buf_addr = purgatory_buf;
+       load_addr = curr_load_addr = pi->purgatory_load_addr;
+       bss_addr = load_addr + buf_sz + bss_pad;
+
+       for (i = 0; i < pi->ehdr->e_shnum; i++) {
+               if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+                       continue;
+
+               align = sechdrs[i].sh_addralign;
+               if (sechdrs[i].sh_type != SHT_NOBITS) {
+                       curr_load_addr = ALIGN(curr_load_addr, align);
+                       offset = curr_load_addr - load_addr;
+                       /* We already modifed ->sh_offset to keep src addr */
+                       src = (char *) sechdrs[i].sh_offset;
+                       memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
+
+                       /* Store load address and source address of section */
+                       sechdrs[i].sh_addr = curr_load_addr;
+
+                       /*
+                        * This section got copied to temporary buffer. Update
+                        * ->sh_offset accordingly.
+                        */
+                       sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
+
+                       /* Advance to the next address */
+                       curr_load_addr += sechdrs[i].sh_size;
+               } else {
+                       bss_addr = ALIGN(bss_addr, align);
+                       sechdrs[i].sh_addr = bss_addr;
+                       bss_addr += sechdrs[i].sh_size;
+               }
+       }
+
+       /* Update entry point based on load address of text section */
+       if (entry_sidx >= 0)
+               entry += sechdrs[entry_sidx].sh_addr;
+
+       /* Make kernel jump to purgatory after shutdown */
+       image->start = entry;
+
+       /* Used later to get/set symbol values */
+       pi->sechdrs = sechdrs;
+
+       /*
+        * Used later to identify which section is purgatory and skip it
+        * from checksumming.
+        */
+       pi->purgatory_buf = purgatory_buf;
+       return ret;
+out:
+       vfree(sechdrs);
+       vfree(purgatory_buf);
+       return ret;
+}
+
+static int kexec_apply_relocations(struct kimage *image)
+{
+       int i, ret;
+       struct purgatory_info *pi = &image->purgatory_info;
+       Elf_Shdr *sechdrs = pi->sechdrs;
+
+       /* Apply relocations */
+       for (i = 0; i < pi->ehdr->e_shnum; i++) {
+               Elf_Shdr *section, *symtab;
+
+               if (sechdrs[i].sh_type != SHT_RELA &&
+                   sechdrs[i].sh_type != SHT_REL)
+                       continue;
+
+               /*
+                * For section of type SHT_RELA/SHT_REL,
+                * ->sh_link contains section header index of associated
+                * symbol table. And ->sh_info contains section header
+                * index of section to which relocations apply.
+                */
+               if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
+                   sechdrs[i].sh_link >= pi->ehdr->e_shnum)
+                       return -ENOEXEC;
+
+               section = &sechdrs[sechdrs[i].sh_info];
+               symtab = &sechdrs[sechdrs[i].sh_link];
+
+               if (!(section->sh_flags & SHF_ALLOC))
+                       continue;
+
+               /*
+                * symtab->sh_link contain section header index of associated
+                * string table.
+                */
+               if (symtab->sh_link >= pi->ehdr->e_shnum)
+                       /* Invalid section number? */
+                       continue;
+
+               /*
+                * Respective archicture needs to provide support for applying
+                * relocations of type SHT_RELA/SHT_REL.
+                */
+               if (sechdrs[i].sh_type == SHT_RELA)
+                       ret = arch_kexec_apply_relocations_add(pi->ehdr,
+                                                              sechdrs, i);
+               else if (sechdrs[i].sh_type == SHT_REL)
+                       ret = arch_kexec_apply_relocations(pi->ehdr,
+                                                          sechdrs, i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/* Load relocatable purgatory object and relocate it appropriately */
+int kexec_load_purgatory(struct kimage *image, unsigned long min,
+                        unsigned long max, int top_down,
+                        unsigned long *load_addr)
+{
+       struct purgatory_info *pi = &image->purgatory_info;
+       int ret;
+
+       if (kexec_purgatory_size <= 0)
+               return -EINVAL;
+
+       if (kexec_purgatory_size < sizeof(Elf_Ehdr))
+               return -ENOEXEC;
+
+       pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
+
+       if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
+           || pi->ehdr->e_type != ET_REL
+           || !elf_check_arch(pi->ehdr)
+           || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
+               return -ENOEXEC;
+
+       if (pi->ehdr->e_shoff >= kexec_purgatory_size
+           || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
+           kexec_purgatory_size - pi->ehdr->e_shoff))
+               return -ENOEXEC;
+
+       ret = __kexec_load_purgatory(image, min, max, top_down);
+       if (ret)
+               return ret;
+
+       ret = kexec_apply_relocations(image);
+       if (ret)
+               goto out;
+
+       *load_addr = pi->purgatory_load_addr;
+       return 0;
+out:
+       vfree(pi->sechdrs);
+       vfree(pi->purgatory_buf);
+       return ret;
+}
+
+static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+                                           const char *name)
+{
+       Elf_Sym *syms;
+       Elf_Shdr *sechdrs;
+       Elf_Ehdr *ehdr;
+       int i, k;
+       const char *strtab;
+
+       if (!pi->sechdrs || !pi->ehdr)
+               return NULL;
+
+       sechdrs = pi->sechdrs;
+       ehdr = pi->ehdr;
+
+       for (i = 0; i < ehdr->e_shnum; i++) {
+               if (sechdrs[i].sh_type != SHT_SYMTAB)
+                       continue;
+
+               if (sechdrs[i].sh_link >= ehdr->e_shnum)
+                       /* Invalid strtab section number */
+                       continue;
+               strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
+               syms = (Elf_Sym *)sechdrs[i].sh_offset;
+
+               /* Go through symbols for a match */
+               for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
+                       if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
+                               continue;
+
+                       if (strcmp(strtab + syms[k].st_name, name) != 0)
+                               continue;
+
+                       if (syms[k].st_shndx == SHN_UNDEF ||
+                           syms[k].st_shndx >= ehdr->e_shnum) {
+                               pr_debug("Symbol: %s has bad section index %d.\n",
+                                               name, syms[k].st_shndx);
+                               return NULL;
+                       }
+
+                       /* Found the symbol we are looking for */
+                       return &syms[k];
+               }
+       }
+
+       return NULL;
+}
+
+void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
+{
+       struct purgatory_info *pi = &image->purgatory_info;
+       Elf_Sym *sym;
+       Elf_Shdr *sechdr;
+
+       sym = kexec_purgatory_find_symbol(pi, name);
+       if (!sym)
+               return ERR_PTR(-EINVAL);
+
+       sechdr = &pi->sechdrs[sym->st_shndx];
+
+       /*
+        * Returns the address where symbol will finally be loaded after
+        * kexec_load_segment()
+        */
+       return (void *)(sechdr->sh_addr + sym->st_value);
+}
+
+/*
+ * Get or set value of a symbol. If "get_value" is true, symbol value is
+ * returned in buf otherwise symbol value is set based on value in buf.
+ */
+int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+                                  void *buf, unsigned int size, bool get_value)
+{
+       Elf_Sym *sym;
+       Elf_Shdr *sechdrs;
+       struct purgatory_info *pi = &image->purgatory_info;
+       char *sym_buf;
+
+       sym = kexec_purgatory_find_symbol(pi, name);
+       if (!sym)
+               return -EINVAL;
+
+       if (sym->st_size != size) {
+               pr_err("symbol %s size mismatch: expected %lu actual %u\n",
+                      name, (unsigned long)sym->st_size, size);
+               return -EINVAL;
+       }
+
+       sechdrs = pi->sechdrs;
+
+       if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
+               pr_err("symbol %s is in a bss section. Cannot %s\n", name,
+                      get_value ? "get" : "set");
+               return -EINVAL;
+       }
+
+       sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
+                                       sym->st_value;
+
+       if (get_value)
+               memcpy((void *)buf, sym_buf, size);
+       else
+               memcpy((void *)sym_buf, buf, size);
+
+       return 0;
+}
+
 /*
  * Move into place and start executing a preloaded standalone
  * executable.  If nothing was preloaded return an error.
index 62e16cef9cc23e848f524ed650617741d69069bd..d09dc5c32c6740e41a5987cca0252ad86b43bdbd 100644 (file)
@@ -224,6 +224,7 @@ static const struct tnt tnts[] = {
        { TAINT_FIRMWARE_WORKAROUND,    'I', ' ' },
        { TAINT_OOT_MODULE,             'O', ' ' },
        { TAINT_UNSIGNED_MODULE,        'E', ' ' },
+       { TAINT_SOFTLOCKUP,             'L', ' ' },
 };
 
 /**
index 3c2237ac32dbedf56d10444a7feef59e6b35a561..da14b8d092961bf5680bf06004f980ad1ec43859 100644 (file)
@@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock);
 static struct resource *bootmem_resource_free;
 static DEFINE_SPINLOCK(bootmem_resource_lock);
 
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+static struct resource *next_resource(struct resource *p, bool sibling_only)
 {
-       struct resource *p = v;
-       (*pos)++;
+       /* Caller wants to traverse through siblings only */
+       if (sibling_only)
+               return p->sibling;
+
        if (p->child)
                return p->child;
        while (!p->sibling && p->parent)
@@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
        return p->sibling;
 }
 
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       struct resource *p = v;
+       (*pos)++;
+       return (void *)next_resource(p, false);
+}
+
 #ifdef CONFIG_PROC_FS
 
 enum { MAX_IORES_LEVEL = 5 };
@@ -322,16 +331,19 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 /*
- * Finds the lowest memory reosurce exists within [res->start.res->end)
+ * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
  * the caller must specify res->start, res->end, res->flags and "name".
  * If found, returns 0, res is overwritten, if not found, returns -1.
+ * This walks through whole tree and not just first level children
+ * until and unless first_level_children_only is true.
  */
-static int find_next_system_ram(struct resource *res, char *name)
+static int find_next_iomem_res(struct resource *res, char *name,
+                              bool first_level_children_only)
 {
        resource_size_t start, end;
        struct resource *p;
+       bool sibling_only = false;
 
        BUG_ON(!res);
 
@@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name)
        BUG_ON(start >= end);
 
        read_lock(&resource_lock);
-       for (p = iomem_resource.child; p ; p = p->sibling) {
-               /* system ram is just marked as IORESOURCE_MEM */
+
+       if (first_level_children_only) {
+               p = iomem_resource.child;
+               sibling_only = true;
+       } else
+               p = &iomem_resource;
+
+       while ((p = next_resource(p, sibling_only))) {
                if (p->flags != res->flags)
                        continue;
                if (name && strcmp(p->name, name))
@@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name)
                if ((p->end >= start) && (p->start < end))
                        break;
        }
+
        read_unlock(&resource_lock);
        if (!p)
                return -1;
@@ -364,6 +383,70 @@ static int find_next_system_ram(struct resource *res, char *name)
        return 0;
 }
 
+/*
+ * Walks through iomem resources and calls func() with matching resource
+ * ranges. This walks through whole tree and not just first level children.
+ * All the memory ranges which overlap start,end and also match flags and
+ * name are valid candidates.
+ *
+ * @name: name of resource
+ * @flags: resource flags
+ * @start: start addr
+ * @end: end addr
+ */
+int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
+               void *arg, int (*func)(u64, u64, void *))
+{
+       struct resource res;
+       u64 orig_end;
+       int ret = -1;
+
+       res.start = start;
+       res.end = end;
+       res.flags = flags;
+       orig_end = res.end;
+       while ((res.start < res.end) &&
+               (!find_next_iomem_res(&res, name, false))) {
+               ret = (*func)(res.start, res.end, arg);
+               if (ret)
+                       break;
+               res.start = res.end + 1;
+               res.end = orig_end;
+       }
+       return ret;
+}
+
+/*
+ * This function calls callback against all memory range of "System RAM"
+ * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ * Now, this function is only for "System RAM". This function deals with
+ * full ranges and not pfn. If resources are not pfn aligned, dealing
+ * with pfn can truncate ranges.
+ */
+int walk_system_ram_res(u64 start, u64 end, void *arg,
+                               int (*func)(u64, u64, void *))
+{
+       struct resource res;
+       u64 orig_end;
+       int ret = -1;
+
+       res.start = start;
+       res.end = end;
+       res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       orig_end = res.end;
+       while ((res.start < res.end) &&
+               (!find_next_iomem_res(&res, "System RAM", true))) {
+               ret = (*func)(res.start, res.end, arg);
+               if (ret)
+                       break;
+               res.start = res.end + 1;
+               res.end = orig_end;
+       }
+       return ret;
+}
+
+#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+
 /*
  * This function calls callback against all memory range of "System RAM"
  * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
@@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
        res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
        orig_end = res.end;
        while ((res.start < res.end) &&
-               (find_next_system_ram(&res, "System RAM") >= 0)) {
+               (find_next_iomem_res(&res, "System RAM", true) >= 0)) {
                pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
                end_pfn = (res.end + 1) >> PAGE_SHIFT;
                if (end_pfn > pfn)
index 2904a21059145b61e55a3fee9204a64fce48c9ea..391d4ddb6f4bb7e83593841ada8096f9d20eae54 100644 (file)
@@ -25,6 +25,7 @@ cond_syscall(sys_swapon);
 cond_syscall(sys_swapoff);
 cond_syscall(sys_kexec_load);
 cond_syscall(compat_sys_kexec_load);
+cond_syscall(sys_kexec_file_load);
 cond_syscall(sys_init_module);
 cond_syscall(sys_finit_module);
 cond_syscall(sys_delete_module);
@@ -197,6 +198,7 @@ cond_syscall(compat_sys_timerfd_settime);
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
+cond_syscall(sys_memfd_create);
 
 /* performance counters: */
 cond_syscall(sys_perf_event_open);
index 12d6ebbfdd836809d41fafbd55f1ddac369809a8..0dbab6d1acb422368d8fc8c04a8eda94ba9342eb 100644 (file)
@@ -14,6 +14,8 @@
  * the GNU General Public License for more details.
  */
 
+#define pr_fmt(fmt) "Kprobe smoke test: " fmt
+
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/random.h>
@@ -41,8 +43,7 @@ static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
 {
        if (preh_val != (rand1 / div_factor)) {
                handler_errors++;
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "incorrect value in post_handler\n");
+               pr_err("incorrect value in post_handler\n");
        }
        posth_val = preh_val + div_factor;
 }
@@ -59,8 +60,7 @@ static int test_kprobe(void)
 
        ret = register_kprobe(&kp);
        if (ret < 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "register_kprobe returned %d\n", ret);
+               pr_err("register_kprobe returned %d\n", ret);
                return ret;
        }
 
@@ -68,14 +68,12 @@ static int test_kprobe(void)
        unregister_kprobe(&kp);
 
        if (preh_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kprobe pre_handler not called\n");
+               pr_err("kprobe pre_handler not called\n");
                handler_errors++;
        }
 
        if (posth_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kprobe post_handler not called\n");
+               pr_err("kprobe post_handler not called\n");
                handler_errors++;
        }
 
@@ -98,8 +96,7 @@ static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
 {
        if (preh_val != (rand1 / div_factor) + 1) {
                handler_errors++;
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "incorrect value in post_handler2\n");
+               pr_err("incorrect value in post_handler2\n");
        }
        posth_val = preh_val + div_factor;
 }
@@ -120,8 +117,7 @@ static int test_kprobes(void)
        kp.flags = 0;
        ret = register_kprobes(kps, 2);
        if (ret < 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "register_kprobes returned %d\n", ret);
+               pr_err("register_kprobes returned %d\n", ret);
                return ret;
        }
 
@@ -130,14 +126,12 @@ static int test_kprobes(void)
        ret = target(rand1);
 
        if (preh_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kprobe pre_handler not called\n");
+               pr_err("kprobe pre_handler not called\n");
                handler_errors++;
        }
 
        if (posth_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kprobe post_handler not called\n");
+               pr_err("kprobe post_handler not called\n");
                handler_errors++;
        }
 
@@ -146,14 +140,12 @@ static int test_kprobes(void)
        ret = target2(rand1);
 
        if (preh_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kprobe pre_handler2 not called\n");
+               pr_err("kprobe pre_handler2 not called\n");
                handler_errors++;
        }
 
        if (posth_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kprobe post_handler2 not called\n");
+               pr_err("kprobe post_handler2 not called\n");
                handler_errors++;
        }
 
@@ -166,8 +158,7 @@ static u32 j_kprobe_target(u32 value)
 {
        if (value != rand1) {
                handler_errors++;
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "incorrect value in jprobe handler\n");
+               pr_err("incorrect value in jprobe handler\n");
        }
 
        jph_val = rand1;
@@ -186,16 +177,14 @@ static int test_jprobe(void)
 
        ret = register_jprobe(&jp);
        if (ret < 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "register_jprobe returned %d\n", ret);
+               pr_err("register_jprobe returned %d\n", ret);
                return ret;
        }
 
        ret = target(rand1);
        unregister_jprobe(&jp);
        if (jph_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "jprobe handler not called\n");
+               pr_err("jprobe handler not called\n");
                handler_errors++;
        }
 
@@ -217,24 +206,21 @@ static int test_jprobes(void)
        jp.kp.flags = 0;
        ret = register_jprobes(jps, 2);
        if (ret < 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "register_jprobes returned %d\n", ret);
+               pr_err("register_jprobes returned %d\n", ret);
                return ret;
        }
 
        jph_val = 0;
        ret = target(rand1);
        if (jph_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "jprobe handler not called\n");
+               pr_err("jprobe handler not called\n");
                handler_errors++;
        }
 
        jph_val = 0;
        ret = target2(rand1);
        if (jph_val == 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "jprobe handler2 not called\n");
+               pr_err("jprobe handler2 not called\n");
                handler_errors++;
        }
        unregister_jprobes(jps, 2);
@@ -256,13 +242,11 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 
        if (ret != (rand1 / div_factor)) {
                handler_errors++;
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "incorrect value in kretprobe handler\n");
+               pr_err("incorrect value in kretprobe handler\n");
        }
        if (krph_val == 0) {
                handler_errors++;
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "call to kretprobe entry handler failed\n");
+               pr_err("call to kretprobe entry handler failed\n");
        }
 
        krph_val = rand1;
@@ -281,16 +265,14 @@ static int test_kretprobe(void)
 
        ret = register_kretprobe(&rp);
        if (ret < 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "register_kretprobe returned %d\n", ret);
+               pr_err("register_kretprobe returned %d\n", ret);
                return ret;
        }
 
        ret = target(rand1);
        unregister_kretprobe(&rp);
        if (krph_val != rand1) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kretprobe handler not called\n");
+               pr_err("kretprobe handler not called\n");
                handler_errors++;
        }
 
@@ -303,13 +285,11 @@ static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
 
        if (ret != (rand1 / div_factor) + 1) {
                handler_errors++;
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "incorrect value in kretprobe handler2\n");
+               pr_err("incorrect value in kretprobe handler2\n");
        }
        if (krph_val == 0) {
                handler_errors++;
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "call to kretprobe entry handler failed\n");
+               pr_err("call to kretprobe entry handler failed\n");
        }
 
        krph_val = rand1;
@@ -332,24 +312,21 @@ static int test_kretprobes(void)
        rp.kp.flags = 0;
        ret = register_kretprobes(rps, 2);
        if (ret < 0) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "register_kretprobe returned %d\n", ret);
+               pr_err("register_kretprobe returned %d\n", ret);
                return ret;
        }
 
        krph_val = 0;
        ret = target(rand1);
        if (krph_val != rand1) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kretprobe handler not called\n");
+               pr_err("kretprobe handler not called\n");
                handler_errors++;
        }
 
        krph_val = 0;
        ret = target2(rand1);
        if (krph_val != rand1) {
-               printk(KERN_ERR "Kprobe smoke test failed: "
-                               "kretprobe handler2 not called\n");
+               pr_err("kretprobe handler2 not called\n");
                handler_errors++;
        }
        unregister_kretprobes(rps, 2);
@@ -368,7 +345,7 @@ int init_test_probes(void)
                rand1 = prandom_u32();
        } while (rand1 <= div_factor);
 
-       printk(KERN_INFO "Kprobe smoke test started\n");
+       pr_info("started\n");
        num_tests++;
        ret = test_kprobe();
        if (ret < 0)
@@ -402,13 +379,11 @@ int init_test_probes(void)
 #endif /* CONFIG_KRETPROBES */
 
        if (errors)
-               printk(KERN_ERR "BUG: Kprobe smoke test: %d out of "
-                               "%d tests failed\n", errors, num_tests);
+               pr_err("BUG: %d out of %d tests failed\n", errors, num_tests);
        else if (handler_errors)
-               printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) "
-                               "running handlers\n", handler_errors);
+               pr_err("BUG: %d error(s) running handlers\n", handler_errors);
        else
-               printk(KERN_INFO "Kprobe smoke test passed successfully\n");
+               pr_info("passed successfully\n");
 
        return 0;
 }
index fcc02560fd6b8eba124f68c42d8da7ac3420a8c5..aa312b0dc3ec254a55bf77c37116170b301988e5 100644 (file)
@@ -526,21 +526,21 @@ static void m_stop(struct seq_file *seq, void *v)
        return;
 }
 
-struct seq_operations proc_uid_seq_operations = {
+const struct seq_operations proc_uid_seq_operations = {
        .start = uid_m_start,
        .stop = m_stop,
        .next = m_next,
        .show = uid_m_show,
 };
 
-struct seq_operations proc_gid_seq_operations = {
+const struct seq_operations proc_gid_seq_operations = {
        .start = gid_m_start,
        .stop = m_stop,
        .next = m_next,
        .show = gid_m_show,
 };
 
-struct seq_operations proc_projid_seq_operations = {
+const struct seq_operations proc_projid_seq_operations = {
        .start = projid_m_start,
        .stop = m_stop,
        .next = m_next,
index 51b29e9d2ba65a700c15cb71923bce1359ebfa41..a8d6914030fe6ec5aa9e08154ce29b480d2c0745 100644 (file)
@@ -368,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                        smp_mb__after_atomic();
                }
 
+               add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
                if (softlockup_panic)
                        panic("softlockup: hung tasks");
                __this_cpu_write(soft_watchdog_warn, true);
index df872659ddd3d699a3f204b50720c59ff0f29092..a5ce0c7f6c302058da57fe19bda51b9286cdd883 100644 (file)
@@ -508,4 +508,11 @@ config UCS2_STRING
 
 source "lib/fonts/Kconfig"
 
+#
+# sg chaining option
+#
+
+config ARCH_HAS_SG_CHAIN
+       def_bool n
+
 endmenu
index 86069d74c062d17ec094f44754ab197483bf910e..37f3c786348f21a233c7f8dc486d8ef3124e8dba 100644 (file)
@@ -54,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = {
        { {0, 0}, NULL, NULL }
 };
 
-decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
+decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
                                const char **name)
 {
        const struct compress_format *cf;
index 31c5f7675fbfa270a70a005247c8a3773067f5c0..8290e0bef7eabd9f4284b9e2558f8a4e9afab6b0 100644 (file)
@@ -92,8 +92,8 @@ struct bunzip_data {
        /* State for interrupting output loop */
        int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
        /* I/O tracking data (file handles, buffers, positions, etc.) */
-       int (*fill)(void*, unsigned int);
-       int inbufCount, inbufPos /*, outbufPos*/;
+       long (*fill)(void*, unsigned long);
+       long inbufCount, inbufPos /*, outbufPos*/;
        unsigned char *inbuf /*,*outbuf*/;
        unsigned int inbufBitCount, inbufBits;
        /* The CRC values stored in the block header and calculated from the
@@ -617,7 +617,7 @@ decode_next_byte:
        goto decode_next_byte;
 }
 
-static int INIT nofill(void *buf, unsigned int len)
+static long INIT nofill(void *buf, unsigned long len)
 {
        return -1;
 }
@@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len)
 /* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
    a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
    ignored, and data is read from file handle into temporary buffer. */
-static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
-                            int (*fill)(void*, unsigned int))
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
+                            long (*fill)(void*, unsigned long))
 {
        struct bunzip_data *bd;
        unsigned int i, j, c;
@@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
 
 /* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
    not end of file.) */
-STATIC int INIT bunzip2(unsigned char *buf, int len,
-                       int(*fill)(void*, unsigned int),
-                       int(*flush)(void*, unsigned int),
+STATIC int INIT bunzip2(unsigned char *buf, long len,
+                       long (*fill)(void*, unsigned long),
+                       long (*flush)(void*, unsigned long),
                        unsigned char *outbuf,
-                       int *pos,
+                       long *pos,
                        void(*error)(char *x))
 {
        struct bunzip_data *bd;
@@ -743,11 +743,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int len,
-                       int(*fill)(void*, unsigned int),
-                       int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long len,
+                       long (*fill)(void*, unsigned long),
+                       long (*flush)(void*, unsigned long),
                        unsigned char *outbuf,
-                       int *pos,
+                       long *pos,
                        void(*error)(char *x))
 {
        return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
index 0edfd742a1542c19b9432b65e9ce0751306a76f2..d4c7891635ecc2b1fb70b9f4fc75a216c782fe0d 100644 (file)
 
 #define GZIP_IOBUF_SIZE (16*1024)
 
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
 {
        return -1;
 }
 
 /* Included from initramfs et al code */
-STATIC int INIT gunzip(unsigned char *buf, int len,
-                      int(*fill)(void*, unsigned int),
-                      int(*flush)(void*, unsigned int),
+STATIC int INIT gunzip(unsigned char *buf, long len,
+                      long (*fill)(void*, unsigned long),
+                      long (*flush)(void*, unsigned long),
                       unsigned char *out_buf,
-                      int *pos,
+                      long *pos,
                       void(*error)(char *x)) {
        u8 *zbuf;
        struct z_stream_s *strm;
@@ -142,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 
                /* Write any data generated */
                if (flush && strm->next_out > out_buf) {
-                       int l = strm->next_out - out_buf;
+                       long l = strm->next_out - out_buf;
                        if (l != flush(out_buf, l)) {
                                rc = -1;
                                error("write error");
index 7d1e83caf8ad8512c3a0d39e4f5e5b5c39de619f..40f66ebe57b77a0566460a2407bdd713d6e0b3fc 100644 (file)
 #define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
 #define ARCHIVE_MAGICNUMBER 0x184C2102
 
-STATIC inline int INIT unlz4(u8 *input, int in_len,
-                               int (*fill) (void *, unsigned int),
-                               int (*flush) (void *, unsigned int),
-                               u8 *output, int *posp,
+STATIC inline int INIT unlz4(u8 *input, long in_len,
+                               long (*fill)(void *, unsigned long),
+                               long (*flush)(void *, unsigned long),
+                               u8 *output, long *posp,
                                void (*error) (char *x))
 {
        int ret = -1;
@@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
        u8 *inp;
        u8 *inp_start;
        u8 *outp;
-       int size = in_len;
+       long size = in_len;
 #ifdef PREBOOT
        size_t out_len = get_unaligned_le32(input + in_len);
 #endif
@@ -83,13 +83,20 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
        if (posp)
                *posp = 0;
 
-       if (fill)
-               fill(inp, 4);
+       if (fill) {
+               size = fill(inp, 4);
+               if (size < 4) {
+                       error("data corrupted");
+                       goto exit_2;
+               }
+       }
 
        chunksize = get_unaligned_le32(inp);
        if (chunksize == ARCHIVE_MAGICNUMBER) {
-               inp += 4;
-               size -= 4;
+               if (!fill) {
+                       inp += 4;
+                       size -= 4;
+               }
        } else {
                error("invalid header");
                goto exit_2;
@@ -100,29 +107,44 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
 
        for (;;) {
 
-               if (fill)
-                       fill(inp, 4);
+               if (fill) {
+                       size = fill(inp, 4);
+                       if (size == 0)
+                               break;
+                       if (size < 4) {
+                               error("data corrupted");
+                               goto exit_2;
+                       }
+               }
 
                chunksize = get_unaligned_le32(inp);
                if (chunksize == ARCHIVE_MAGICNUMBER) {
-                       inp += 4;
-                       size -= 4;
+                       if (!fill) {
+                               inp += 4;
+                               size -= 4;
+                       }
                        if (posp)
                                *posp += 4;
                        continue;
                }
-               inp += 4;
-               size -= 4;
+
 
                if (posp)
                        *posp += 4;
 
-               if (fill) {
+               if (!fill) {
+                       inp += 4;
+                       size -= 4;
+               } else {
                        if (chunksize > lz4_compressbound(uncomp_chunksize)) {
                                error("chunk length is longer than allocated");
                                goto exit_2;
                        }
-                       fill(inp, chunksize);
+                       size = fill(inp, chunksize);
+                       if (size < chunksize) {
+                               error("data corrupted");
+                               goto exit_2;
+                       }
                }
 #ifdef PREBOOT
                if (out_len >= uncomp_chunksize) {
@@ -149,18 +171,17 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
                if (posp)
                        *posp += chunksize;
 
-               size -= chunksize;
+               if (!fill) {
+                       size -= chunksize;
 
-               if (size == 0)
-                       break;
-               else if (size < 0) {
-                       error("data corrupted");
-                       goto exit_2;
+                       if (size == 0)
+                               break;
+                       else if (size < 0) {
+                               error("data corrupted");
+                               goto exit_2;
+                       }
+                       inp += chunksize;
                }
-
-               inp += chunksize;
-               if (fill)
-                       inp = inp_start;
        }
 
        ret = 0;
@@ -175,11 +196,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
-                             int(*fill)(void*, unsigned int),
-                             int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+                             long (*fill)(void*, unsigned long),
+                             long (*flush)(void*, unsigned long),
                              unsigned char *output,
-                             int *posp,
+                             long *posp,
                              void(*error)(char *x)
        )
 {
index 32adb73a903811c4b05d49fea6f9401c13cf396e..0be83af62b884c3dbfa9f29f9630a5ecfd605e2f 100644 (file)
@@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size)
 #define LZMA_IOBUF_SIZE        0x10000
 
 struct rc {
-       int (*fill)(void*, unsigned int);
+       long (*fill)(void*, unsigned long);
        uint8_t *ptr;
        uint8_t *buffer;
        uint8_t *buffer_end;
-       int buffer_size;
+       long buffer_size;
        uint32_t code;
        uint32_t range;
        uint32_t bound;
@@ -82,7 +82,7 @@ struct rc {
 #define RC_MODEL_TOTAL_BITS 11
 
 
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
 {
        return -1;
 }
@@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc)
 
 /* Called once */
 static inline void INIT rc_init(struct rc *rc,
-                                      int (*fill)(void*, unsigned int),
-                                      char *buffer, int buffer_size)
+                                      long (*fill)(void*, unsigned long),
+                                      char *buffer, long buffer_size)
 {
        if (fill)
                rc->fill = fill;
@@ -280,7 +280,7 @@ struct writer {
        size_t buffer_pos;
        int bufsize;
        size_t global_pos;
-       int(*flush)(void*, unsigned int);
+       long (*flush)(void*, unsigned long);
        struct lzma_header *header;
 };
 
@@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
 
 
 
-STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
-                             int(*fill)(void*, unsigned int),
-                             int(*flush)(void*, unsigned int),
+STATIC inline int INIT unlzma(unsigned char *buf, long in_len,
+                             long (*fill)(void*, unsigned long),
+                             long (*flush)(void*, unsigned long),
                              unsigned char *output,
-                             int *posp,
+                             long *posp,
                              void(*error)(char *x)
        )
 {
@@ -667,11 +667,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
-                             int(*fill)(void*, unsigned int),
-                             int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+                             long (*fill)(void*, unsigned long),
+                             long (*flush)(void*, unsigned long),
                              unsigned char *output,
-                             int *posp,
+                             long *posp,
                              void(*error)(char *x)
        )
 {
index 960183d4258f2bffda4dfa421525d04cdd40a949..b94a31bdd87d15f34a7f4902eb6170f800c01206 100644 (file)
@@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = {
 #define HEADER_SIZE_MIN       (9 + 7     + 4 + 8     + 1       + 4)
 #define HEADER_SIZE_MAX       (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
 
-STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
+STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len)
 {
        int l;
        u8 *parse = input;
@@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
        return 1;
 }
 
-STATIC inline int INIT unlzo(u8 *input, int in_len,
-                               int (*fill) (void *, unsigned int),
-                               int (*flush) (void *, unsigned int),
-                               u8 *output, int *posp,
+STATIC int INIT unlzo(u8 *input, long in_len,
+                               long (*fill)(void *, unsigned long),
+                               long (*flush)(void *, unsigned long),
+                               u8 *output, long *posp,
                                void (*error) (char *x))
 {
        u8 r = 0;
-       int skip = 0;
+       long skip = 0;
        u32 src_len, dst_len;
        size_t tmp;
        u8 *in_buf, *in_buf_save, *out_buf;
index 9f34eb56854d64eeebd4dcfcc9993a64c6aa585a..b07a78340e9d315006a97194fbcccae0c4c16509 100644 (file)
@@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size)
  * both input and output buffers are available as a single chunk, i.e. when
  * fill() and flush() won't be used.
  */
-STATIC int INIT unxz(unsigned char *in, int in_size,
-                    int (*fill)(void *dest, unsigned int size),
-                    int (*flush)(void *src, unsigned int size),
-                    unsigned char *out, int *in_used,
+STATIC int INIT unxz(unsigned char *in, long in_size,
+                    long (*fill)(void *dest, unsigned long size),
+                    long (*flush)(void *src, unsigned long size),
+                    unsigned char *out, long *in_used,
                     void (*error)(char *x))
 {
        struct xz_buf b;
@@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size,
                                 * returned by xz_dec_run(), but probably
                                 * it's not too bad.
                                 */
-                               if (flush(b.out, b.out_pos) != (int)b.out_pos)
+                               if (flush(b.out, b.out_pos) != (long)b.out_pos)
                                        ret = XZ_BUF_ERROR;
 
                                b.out_pos = 0;
index 39158abebad175453b7f9f5a520300c4ac57edd8..50be3fa9b657d95d786b02c70f507ab27807bbd3 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -590,26 +590,27 @@ static void __idr_remove_all(struct idr *idp)
        struct idr_layer **paa = &pa[0];
 
        n = idp->layers * IDR_BITS;
-       p = idp->top;
+       *paa = idp->top;
        RCU_INIT_POINTER(idp->top, NULL);
        max = idr_max(idp->layers);
 
        id = 0;
        while (id >= 0 && id <= max) {
+               p = *paa;
                while (n > IDR_BITS && p) {
                        n -= IDR_BITS;
-                       *paa++ = p;
                        p = p->ary[(id >> n) & IDR_MASK];
+                       *++paa = p;
                }
 
                bt_mask = id;
                id += 1 << n;
                /* Get the highest bit that the above add changed from 0->1. */
                while (n < fls(id ^ bt_mask)) {
-                       if (p)
-                               free_layer(idp, p);
+                       if (*paa)
+                               free_layer(idp, *paa);
                        n += IDR_BITS;
-                       p = *--paa;
+                       --paa;
                }
        }
        idp->layers = 0;
@@ -692,15 +693,16 @@ int idr_for_each(struct idr *idp,
        struct idr_layer **paa = &pa[0];
 
        n = idp->layers * IDR_BITS;
-       p = rcu_dereference_raw(idp->top);
+       *paa = rcu_dereference_raw(idp->top);
        max = idr_max(idp->layers);
 
        id = 0;
        while (id >= 0 && id <= max) {
+               p = *paa;
                while (n > 0 && p) {
                        n -= IDR_BITS;
-                       *paa++ = p;
                        p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+                       *++paa = p;
                }
 
                if (p) {
@@ -712,7 +714,7 @@ int idr_for_each(struct idr *idp,
                id += 1 << n;
                while (n < fls(id)) {
                        n += IDR_BITS;
-                       p = *--paa;
+                       --paa;
                }
        }
 
@@ -740,17 +742,18 @@ void *idr_get_next(struct idr *idp, int *nextidp)
        int n, max;
 
        /* find first ent */
-       p = rcu_dereference_raw(idp->top);
+       p = *paa = rcu_dereference_raw(idp->top);
        if (!p)
                return NULL;
        n = (p->layer + 1) * IDR_BITS;
        max = idr_max(p->layer + 1);
 
        while (id >= 0 && id <= max) {
+               p = *paa;
                while (n > 0 && p) {
                        n -= IDR_BITS;
-                       *paa++ = p;
                        p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+                       *++paa = p;
                }
 
                if (p) {
@@ -768,7 +771,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
                id = round_up(id + 1, 1 << n);
                while (n < fls(id)) {
                        n += IDR_BITS;
-                       p = *--paa;
+                       --paa;
                }
        }
        return NULL;
index d79b9d222065bd9467e4e091ad850a2ce4d93c94..90ba1eb1df06e5283a2dad9132b53346f5106812 100644 (file)
@@ -561,8 +561,7 @@ EXPORT_SYMBOL(__kfifo_to_user_r);
 unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
        struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
 {
-       if (!nents)
-               BUG();
+       BUG_ON(!nents);
 
        len = __kfifo_max_r(len, recsize);
 
@@ -585,8 +584,7 @@ EXPORT_SYMBOL(__kfifo_dma_in_finish_r);
 unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
        struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
 {
-       if (!nents)
-               BUG();
+       BUG_ON(!nents);
 
        len = __kfifo_max_r(len, recsize);
 
index 65f4effd117f4350bb5dde964eab219f67caf701..c16c81a3d430e84a8084de93e97900715a8e6b8e 100644 (file)
@@ -101,7 +101,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
                                 *      / \          / \
                                 *     p   u  -->   P   U
                                 *    /            /
-                                *   n            N
+                                *   n            n
                                 *
                                 * However, since g's parent might be red, and
                                 * 4) does not allow this, we need to recurse
index b4415fceb7e7706ef5de6a9d78c413cd826f25ee..9cdf62f8accdeaf777e8661ba51e3c08617ad06e 100644 (file)
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
  **/
 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
 {
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
        struct scatterlist *ret = &sgl[nents - 1];
 #else
        struct scatterlist *sg, *ret = NULL;
@@ -255,7 +255,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
 
        if (nents == 0)
                return -EINVAL;
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
        if (WARN_ON_ONCE(nents > max_ents))
                return -EINVAL;
 #endif
index af19a6b079f5a5ae3a001079cb5b564260be1c22..f501b56ec2c6e2c3d3c662669afbffc9017797b5 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
+#include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
 #include <linux/rmap.h>
@@ -233,7 +234,6 @@ void delete_from_page_cache(struct page *page)
        spin_lock_irq(&mapping->tree_lock);
        __delete_from_page_cache(page, NULL);
        spin_unlock_irq(&mapping->tree_lock);
-       mem_cgroup_uncharge_cache_page(page);
 
        if (freepage)
                freepage(page);
@@ -489,8 +489,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
                if (PageSwapBacked(new))
                        __inc_zone_page_state(new, NR_SHMEM);
                spin_unlock_irq(&mapping->tree_lock);
-               /* mem_cgroup codes must not be called under tree_lock */
-               mem_cgroup_replace_page_cache(old, new);
+               mem_cgroup_migrate(old, new, true);
                radix_tree_preload_end();
                if (freepage)
                        freepage(old);
@@ -548,19 +547,24 @@ static int __add_to_page_cache_locked(struct page *page,
                                      pgoff_t offset, gfp_t gfp_mask,
                                      void **shadowp)
 {
+       int huge = PageHuge(page);
+       struct mem_cgroup *memcg;
        int error;
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageSwapBacked(page), page);
 
-       error = mem_cgroup_charge_file(page, current->mm,
-                                       gfp_mask & GFP_RECLAIM_MASK);
-       if (error)
-               return error;
+       if (!huge) {
+               error = mem_cgroup_try_charge(page, current->mm,
+                                             gfp_mask, &memcg);
+               if (error)
+                       return error;
+       }
 
        error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
        if (error) {
-               mem_cgroup_uncharge_cache_page(page);
+               if (!huge)
+                       mem_cgroup_cancel_charge(page, memcg);
                return error;
        }
 
@@ -575,13 +579,16 @@ static int __add_to_page_cache_locked(struct page *page,
                goto err_insert;
        __inc_zone_page_state(page, NR_FILE_PAGES);
        spin_unlock_irq(&mapping->tree_lock);
+       if (!huge)
+               mem_cgroup_commit_charge(page, memcg, false);
        trace_mm_filemap_add_to_page_cache(page);
        return 0;
 err_insert:
        page->mapping = NULL;
        /* Leave page->index set: truncation relies upon it */
        spin_unlock_irq(&mapping->tree_lock);
-       mem_cgroup_uncharge_cache_page(page);
+       if (!huge)
+               mem_cgroup_cancel_charge(page, memcg);
        page_cache_release(page);
        return error;
 }
index 3630d577e9879e9d6dc6a80912e2eb88d5f1c959..d9a21d06b8623571cabe5f73532d02412a94ae9c 100644 (file)
@@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
                                        unsigned long haddr, pmd_t *pmd,
                                        struct page *page)
 {
+       struct mem_cgroup *memcg;
        pgtable_t pgtable;
        spinlock_t *ptl;
 
        VM_BUG_ON_PAGE(!PageCompound(page), page);
+
+       if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
+               return VM_FAULT_OOM;
+
        pgtable = pte_alloc_one(mm, haddr);
-       if (unlikely(!pgtable))
+       if (unlikely(!pgtable)) {
+               mem_cgroup_cancel_charge(page, memcg);
                return VM_FAULT_OOM;
+       }
 
        clear_huge_page(page, haddr, HPAGE_PMD_NR);
        /*
@@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
        ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_none(*pmd))) {
                spin_unlock(ptl);
-               mem_cgroup_uncharge_page(page);
+               mem_cgroup_cancel_charge(page, memcg);
                put_page(page);
                pte_free(mm, pgtable);
        } else {
@@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
                entry = mk_huge_pmd(page, vma->vm_page_prot);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
                page_add_new_anon_rmap(page, vma, haddr);
+               mem_cgroup_commit_charge(page, memcg, false);
+               lru_cache_add_active_or_unevictable(page, vma);
                pgtable_trans_huge_deposit(mm, pmd, pgtable);
                set_pmd_at(mm, haddr, pmd, entry);
                add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
        }
-       if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) {
-               put_page(page);
-               count_vm_event(THP_FAULT_FALLBACK);
-               return VM_FAULT_FALLBACK;
-       }
        if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
-               mem_cgroup_uncharge_page(page);
                put_page(page);
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
                                        struct page *page,
                                        unsigned long haddr)
 {
+       struct mem_cgroup *memcg;
        spinlock_t *ptl;
        pgtable_t pgtable;
        pmd_t _pmd;
@@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
                                               __GFP_OTHER_NODE,
                                               vma, address, page_to_nid(page));
                if (unlikely(!pages[i] ||
-                            mem_cgroup_charge_anon(pages[i], mm,
-                                                      GFP_KERNEL))) {
+                            mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
+                                                  &memcg))) {
                        if (pages[i])
                                put_page(pages[i]);
-                       mem_cgroup_uncharge_start();
                        while (--i >= 0) {
-                               mem_cgroup_uncharge_page(pages[i]);
+                               memcg = (void *)page_private(pages[i]);
+                               set_page_private(pages[i], 0);
+                               mem_cgroup_cancel_charge(pages[i], memcg);
                                put_page(pages[i]);
                        }
-                       mem_cgroup_uncharge_end();
                        kfree(pages);
                        ret |= VM_FAULT_OOM;
                        goto out;
                }
+               set_page_private(pages[i], (unsigned long)memcg);
        }
 
        for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
                pte_t *pte, entry;
                entry = mk_pte(pages[i], vma->vm_page_prot);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+               memcg = (void *)page_private(pages[i]);
+               set_page_private(pages[i], 0);
                page_add_new_anon_rmap(pages[i], vma, haddr);
+               mem_cgroup_commit_charge(pages[i], memcg, false);
+               lru_cache_add_active_or_unevictable(pages[i], vma);
                pte = pte_offset_map(&_pmd, haddr);
                VM_BUG_ON(!pte_none(*pte));
                set_pte_at(mm, haddr, pte, entry);
@@ -1065,12 +1074,12 @@ out:
 out_free_pages:
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-       mem_cgroup_uncharge_start();
        for (i = 0; i < HPAGE_PMD_NR; i++) {
-               mem_cgroup_uncharge_page(pages[i]);
+               memcg = (void *)page_private(pages[i]);
+               set_page_private(pages[i], 0);
+               mem_cgroup_cancel_charge(pages[i], memcg);
                put_page(pages[i]);
        }
-       mem_cgroup_uncharge_end();
        kfree(pages);
        goto out;
 }
@@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
        spinlock_t *ptl;
        int ret = 0;
        struct page *page = NULL, *new_page;
+       struct mem_cgroup *memcg;
        unsigned long haddr;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
@@ -1132,7 +1142,8 @@ alloc:
                goto out;
        }
 
-       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) {
+       if (unlikely(mem_cgroup_try_charge(new_page, mm,
+                                          GFP_TRANSHUGE, &memcg))) {
                put_page(new_page);
                if (page) {
                        split_huge_page(page);
@@ -1161,7 +1172,7 @@ alloc:
                put_user_huge_page(page);
        if (unlikely(!pmd_same(*pmd, orig_pmd))) {
                spin_unlock(ptl);
-               mem_cgroup_uncharge_page(new_page);
+               mem_cgroup_cancel_charge(new_page, memcg);
                put_page(new_page);
                goto out_mn;
        } else {
@@ -1170,6 +1181,8 @@ alloc:
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
                pmdp_clear_flush(vma, haddr, pmd);
                page_add_new_anon_rmap(new_page, vma, haddr);
+               mem_cgroup_commit_charge(new_page, memcg, false);
+               lru_cache_add_active_or_unevictable(new_page, vma);
                set_pmd_at(mm, haddr, pmd, entry);
                update_mmu_cache_pmd(vma, address, pmd);
                if (!page) {
@@ -2413,6 +2426,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        spinlock_t *pmd_ptl, *pte_ptl;
        int isolated;
        unsigned long hstart, hend;
+       struct mem_cgroup *memcg;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
 
@@ -2423,7 +2437,8 @@ static void collapse_huge_page(struct mm_struct *mm,
        if (!new_page)
                return;
 
-       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE)))
+       if (unlikely(mem_cgroup_try_charge(new_page, mm,
+                                          GFP_TRANSHUGE, &memcg)))
                return;
 
        /*
@@ -2510,6 +2525,8 @@ static void collapse_huge_page(struct mm_struct *mm,
        spin_lock(pmd_ptl);
        BUG_ON(!pmd_none(*pmd));
        page_add_new_anon_rmap(new_page, vma, address);
+       mem_cgroup_commit_charge(new_page, memcg, false);
+       lru_cache_add_active_or_unevictable(new_page, vma);
        pgtable_trans_huge_deposit(mm, pmd, pgtable);
        set_pmd_at(mm, address, pmd, _pmd);
        update_mmu_cache_pmd(vma, address, pmd);
@@ -2523,7 +2540,7 @@ out_up_write:
        return;
 
 out:
-       mem_cgroup_uncharge_page(new_page);
+       mem_cgroup_cancel_charge(new_page, memcg);
        goto out_up_write;
 }
 
index 90dc501eaf3fbcbc7a60efeb1a4b3072220c04dc..ec4dcf1b9562b6299f215e754768da18a36b156e 100644 (file)
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
 static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
                                       struct mem_cgroup_tree_per_zone *mctz)
 {
-       spin_lock(&mctz->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&mctz->lock, flags);
        __mem_cgroup_remove_exceeded(mz, mctz);
-       spin_unlock(&mctz->lock);
+       spin_unlock_irqrestore(&mctz->lock, flags);
 }
 
 
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
                 * mem is over its softlimit.
                 */
                if (excess || mz->on_tree) {
-                       spin_lock(&mctz->lock);
+                       unsigned long flags;
+
+                       spin_lock_irqsave(&mctz->lock, flags);
                        /* if on-tree, remove it */
                        if (mz->on_tree)
                                __mem_cgroup_remove_exceeded(mz, mctz);
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
                         * If excess is 0, no tree ops.
                         */
                        __mem_cgroup_insert_exceeded(mz, mctz, excess);
-                       spin_unlock(&mctz->lock);
+                       spin_unlock_irqrestore(&mctz->lock, flags);
                }
        }
 }
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
 {
        struct mem_cgroup_per_zone *mz;
 
-       spin_lock(&mctz->lock);
+       spin_lock_irq(&mctz->lock);
        mz = __mem_cgroup_largest_soft_limit_node(mctz);
-       spin_unlock(&mctz->lock);
+       spin_unlock_irq(&mctz->lock);
        return mz;
 }
 
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
        return val;
 }
 
-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
-                                        bool charge)
-{
-       int val = (charge) ? 1 : -1;
-       this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
-}
-
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
                                            enum mem_cgroup_events_index idx)
 {
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 
 static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
                                         struct page *page,
-                                        bool anon, int nr_pages)
+                                        int nr_pages)
 {
        /*
         * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
         * counted as CACHE even if it's on ANON LRU.
         */
-       if (anon)
+       if (PageAnon(page))
                __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
                                nr_pages);
        else
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
  */
 static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 {
-       preempt_disable();
        /* threshold event is triggered in finer grain than soft limit */
        if (unlikely(mem_cgroup_event_ratelimit(memcg,
                                                MEM_CGROUP_TARGET_THRESH))) {
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
                do_numainfo = mem_cgroup_event_ratelimit(memcg,
                                                MEM_CGROUP_TARGET_NUMAINFO);
 #endif
-               preempt_enable();
-
                mem_cgroup_threshold(memcg);
                if (unlikely(do_softlimit))
                        mem_cgroup_update_tree(memcg, page);
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
                if (unlikely(do_numainfo))
                        atomic_inc(&memcg->numainfo_events);
 #endif
-       } else
-               preempt_enable();
+       }
 }
 
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -1347,20 +1340,6 @@ out:
        return lruvec;
 }
 
-/*
- * Following LRU functions are allowed to be used without PCG_LOCK.
- * Operations are called by routine of global LRU independently from memcg.
- * What we have to take care of here is validness of pc->mem_cgroup.
- *
- * Changes to pc->mem_cgroup happens when
- * 1. charge
- * 2. moving account
- * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
- * It is added to LRU before charge.
- * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
- * When moving account, the page is not on LRU. It's isolated.
- */
-
 /**
  * mem_cgroup_page_lruvec - return lruvec for adding an lru page
  * @page: the page
@@ -2261,22 +2240,14 @@ cleanup:
  *
  * Notes: Race condition
  *
- * We usually use lock_page_cgroup() for accessing page_cgroup member but
- * it tends to be costly. But considering some conditions, we doesn't need
- * to do so _always_.
+ * Charging occurs during page instantiation, while the page is
+ * unmapped and locked in page migration, or while the page table is
+ * locked in THP migration.  No race is possible.
  *
- * Considering "charge", lock_page_cgroup() is not required because all
- * file-stat operations happen after a page is attached to radix-tree. There
- * are no race with "charge".
+ * Uncharge happens to pages with zero references, no race possible.
  *
- * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup
- * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
- * if there are race with "uncharge". Statistics itself is properly handled
- * by flags.
- *
- * Considering "move", this is an only case we see a race. To make the race
- * small, we check memcg->moving_account and detect there are possibility
- * of race or not. If there is, we take a lock.
+ * Charge moving between groups is protected by checking mm->moving
+ * account and taking the move_lock in the slowpath.
  */
 
 void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2551,17 +2522,8 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
-                                gfp_t gfp_mask,
-                                unsigned int nr_pages)
+static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+                     unsigned int nr_pages)
 {
        unsigned int batch = max(CHARGE_BATCH, nr_pages);
        int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2660,41 +2622,7 @@ done:
        return ret;
 }
 
-/**
- * mem_cgroup_try_charge_mm - try charging a mm
- * @mm: mm_struct to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns the charged mem_cgroup associated with the given mm_struct or
- * NULL the charge failed.
- */
-static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
-                                gfp_t gfp_mask,
-                                unsigned int nr_pages)
-
-{
-       struct mem_cgroup *memcg;
-       int ret;
-
-       memcg = get_mem_cgroup_from_mm(mm);
-       ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
-       css_put(&memcg->css);
-       if (ret == -EINTR)
-               memcg = root_mem_cgroup;
-       else if (ret)
-               memcg = NULL;
-
-       return memcg;
-}
-
-/*
- * Somemtimes we have to undo a charge we got by try_charge().
- * This function is for that and do uncharge, put css's refcnt.
- * gotten by try_charge().
- */
-static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
-                                      unsigned int nr_pages)
+static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
        unsigned long bytes = nr_pages * PAGE_SIZE;
 
@@ -2732,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
        return mem_cgroup_from_id(id);
 }
 
+/*
+ * try_get_mem_cgroup_from_page - look up page's memcg association
+ * @page: the page
+ *
+ * Look up, get a css reference, and return the memcg that owns @page.
+ *
+ * The page must be locked to prevent racing with swap-in and page
+ * cache charges.  If coming from an unlocked page table, the caller
+ * must ensure the page is on the LRU or this can race with charging.
+ */
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
        struct mem_cgroup *memcg = NULL;
@@ -2742,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
        VM_BUG_ON_PAGE(!PageLocked(page), page);
 
        pc = lookup_page_cgroup(page);
-       lock_page_cgroup(pc);
        if (PageCgroupUsed(pc)) {
                memcg = pc->mem_cgroup;
                if (memcg && !css_tryget_online(&memcg->css))
@@ -2756,23 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
                        memcg = NULL;
                rcu_read_unlock();
        }
-       unlock_page_cgroup(pc);
        return memcg;
 }
 
-static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
-                                      struct page *page,
-                                      unsigned int nr_pages,
-                                      enum charge_type ctype,
-                                      bool lrucare)
+static void lock_page_lru(struct page *page, int *isolated)
+{
+       struct zone *zone = page_zone(page);
+
+       spin_lock_irq(&zone->lru_lock);
+       if (PageLRU(page)) {
+               struct lruvec *lruvec;
+
+               lruvec = mem_cgroup_page_lruvec(page, zone);
+               ClearPageLRU(page);
+               del_page_from_lru_list(page, lruvec, page_lru(page));
+               *isolated = 1;
+       } else
+               *isolated = 0;
+}
+
+static void unlock_page_lru(struct page *page, int isolated)
+{
+       struct zone *zone = page_zone(page);
+
+       if (isolated) {
+               struct lruvec *lruvec;
+
+               lruvec = mem_cgroup_page_lruvec(page, zone);
+               VM_BUG_ON_PAGE(PageLRU(page), page);
+               SetPageLRU(page);
+               add_page_to_lru_list(page, lruvec, page_lru(page));
+       }
+       spin_unlock_irq(&zone->lru_lock);
+}
+
+static void commit_charge(struct page *page, struct mem_cgroup *memcg,
+                         bool lrucare)
 {
        struct page_cgroup *pc = lookup_page_cgroup(page);
-       struct zone *uninitialized_var(zone);
-       struct lruvec *lruvec;
-       bool was_on_lru = false;
-       bool anon;
+       int isolated;
 
-       lock_page_cgroup(pc);
        VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
        /*
         * we don't need page_cgroup_lock about tail pages, becase they are not
@@ -2783,44 +2743,28 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
         * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
         * may already be on some other mem_cgroup's LRU.  Take care of it.
         */
-       if (lrucare) {
-               zone = page_zone(page);
-               spin_lock_irq(&zone->lru_lock);
-               if (PageLRU(page)) {
-                       lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
-                       ClearPageLRU(page);
-                       del_page_from_lru_list(page, lruvec, page_lru(page));
-                       was_on_lru = true;
-               }
-       }
-
-       pc->mem_cgroup = memcg;
-       SetPageCgroupUsed(pc);
-
-       if (lrucare) {
-               if (was_on_lru) {
-                       lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
-                       VM_BUG_ON_PAGE(PageLRU(page), page);
-                       SetPageLRU(page);
-                       add_page_to_lru_list(page, lruvec, page_lru(page));
-               }
-               spin_unlock_irq(&zone->lru_lock);
-       }
-
-       if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
-               anon = true;
-       else
-               anon = false;
-
-       mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
-       unlock_page_cgroup(pc);
+       if (lrucare)
+               lock_page_lru(page, &isolated);
 
        /*
-        * "charge_statistics" updated event counter. Then, check it.
-        * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
-        * if they exceeds softlimit.
+        * Nobody should be changing or seriously looking at
+        * pc->mem_cgroup and pc->flags at this point:
+        *
+        * - the page is uncharged
+        *
+        * - the page is off-LRU
+        *
+        * - an anonymous fault has exclusive page access, except for
+        *   a locked page table
+        *
+        * - a page cache insertion, a swapin fault, or a migration
+        *   have the page locked
         */
-       memcg_check_events(memcg, page);
+       pc->mem_cgroup = memcg;
+       pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
+
+       if (lrucare)
+               unlock_page_lru(page, isolated);
 }
 
 static DEFINE_MUTEX(set_limit_mutex);
@@ -2882,21 +2826,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
        if (ret)
                return ret;
 
-       ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
+       ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
        if (ret == -EINTR)  {
                /*
-                * mem_cgroup_try_charge() chosed to bypass to root due to
-                * OOM kill or fatal signal.  Since our only options are to
-                * either fail the allocation or charge it to this cgroup, do
-                * it as a temporary condition. But we can't fail. From a
-                * kmem/slab perspective, the cache has already been selected,
-                * by mem_cgroup_kmem_get_cache(), so it is too late to change
+                * try_charge() chose to bypass to root due to OOM kill or
+                * fatal signal.  Since our only options are to either fail
+                * the allocation or charge it to this cgroup, do it as a
+                * temporary condition. But we can't fail. From a kmem/slab
+                * perspective, the cache has already been selected, by
+                * mem_cgroup_kmem_get_cache(), so it is too late to change
                 * our minds.
                 *
                 * This condition will only trigger if the task entered
-                * memcg_charge_kmem in a sane state, but was OOM-killed during
-                * mem_cgroup_try_charge() above. Tasks that were already
-                * dying when the allocation triggers should have been already
+                * memcg_charge_kmem in a sane state, but was OOM-killed
+                * during try_charge() above. Tasks that were already dying
+                * when the allocation triggers should have been already
                 * directed to the root cgroup in memcontrol.h
                 */
                res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@ -3447,7 +3391,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
 /*
  * Because tail pages are not marked as "used", set it. We're under
  * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -3468,7 +3411,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
        for (i = 1; i < HPAGE_PMD_NR; i++) {
                pc = head_pc + i;
                pc->mem_cgroup = memcg;
-               pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+               pc->flags = head_pc->flags;
        }
        __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
                       HPAGE_PMD_NR);
@@ -3498,7 +3441,6 @@ static int mem_cgroup_move_account(struct page *page,
 {
        unsigned long flags;
        int ret;
-       bool anon = PageAnon(page);
 
        VM_BUG_ON(from == to);
        VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3512,15 +3454,21 @@ static int mem_cgroup_move_account(struct page *page,
        if (nr_pages > 1 && !PageTransHuge(page))
                goto out;
 
-       lock_page_cgroup(pc);
+       /*
+        * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
+        * of its source page while we change it: page migration takes
+        * both pages off the LRU, but page cache replacement doesn't.
+        */
+       if (!trylock_page(page))
+               goto out;
 
        ret = -EINVAL;
        if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
-               goto unlock;
+               goto out_unlock;
 
        move_lock_mem_cgroup(from, &flags);
 
-       if (!anon && page_mapped(page)) {
+       if (!PageAnon(page) && page_mapped(page)) {
                __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
                               nr_pages);
                __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3534,20 +3482,25 @@ static int mem_cgroup_move_account(struct page *page,
                               nr_pages);
        }
 
-       mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
+       /*
+        * It is safe to change pc->mem_cgroup here because the page
+        * is referenced, charged, and isolated - we can't race with
+        * uncharging, charging, migration, or LRU putback.
+        */
 
        /* caller should have done css_get */
        pc->mem_cgroup = to;
-       mem_cgroup_charge_statistics(to, page, anon, nr_pages);
        move_unlock_mem_cgroup(from, &flags);
        ret = 0;
-unlock:
-       unlock_page_cgroup(pc);
-       /*
-        * check events
-        */
+
+       local_irq_disable();
+       mem_cgroup_charge_statistics(to, page, nr_pages);
        memcg_check_events(to, page);
+       mem_cgroup_charge_statistics(from, page, -nr_pages);
        memcg_check_events(from, page);
+       local_irq_enable();
+out_unlock:
+       unlock_page(page);
 out:
        return ret;
 }
@@ -3618,742 +3571,136 @@ out:
        return ret;
 }
 
-int mem_cgroup_charge_anon(struct page *page,
-                             struct mm_struct *mm, gfp_t gfp_mask)
+#ifdef CONFIG_MEMCG_SWAP
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
+                                        bool charge)
 {
-       unsigned int nr_pages = 1;
-       struct mem_cgroup *memcg;
-
-       if (mem_cgroup_disabled())
-               return 0;
-
-       VM_BUG_ON_PAGE(page_mapped(page), page);
-       VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
-       VM_BUG_ON(!mm);
-
-       if (PageTransHuge(page)) {
-               nr_pages <<= compound_order(page);
-               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-       }
-
-       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
-       if (!memcg)
-               return -ENOMEM;
-       __mem_cgroup_commit_charge(memcg, page, nr_pages,
-                                  MEM_CGROUP_CHARGE_TYPE_ANON, false);
-       return 0;
+       int val = (charge) ? 1 : -1;
+       this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
 }
 
-/*
- * While swap-in, try_charge -> commit or cancel, the page is locked.
- * And when try_charge() successfully returns, one refcnt to memcg without
- * struct page_cgroup is acquired. This refcnt will be consumed by
- * "commit()" or removed by "cancel()"
+/**
+ * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
+ * @entry: swap entry to be moved
+ * @from:  mem_cgroup which the entry is moved from
+ * @to:  mem_cgroup which the entry is moved to
+ *
+ * It succeeds only when the swap_cgroup's record for this entry is the same
+ * as the mem_cgroup's id of @from.
+ *
+ * Returns 0 on success, -EINVAL on failure.
+ *
+ * The caller must have charged to @to, IOW, called res_counter_charge() about
+ * both res and memsw, and called css_get().
  */
-static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-                                         struct page *page,
-                                         gfp_t mask,
-                                         struct mem_cgroup **memcgp)
+static int mem_cgroup_move_swap_account(swp_entry_t entry,
+                               struct mem_cgroup *from, struct mem_cgroup *to)
 {
-       struct mem_cgroup *memcg = NULL;
-       struct page_cgroup *pc;
-       int ret;
-
-       pc = lookup_page_cgroup(page);
-       /*
-        * Every swap fault against a single page tries to charge the
-        * page, bail as early as possible.  shmem_unuse() encounters
-        * already charged pages, too.  The USED bit is protected by
-        * the page lock, which serializes swap cache removal, which
-        * in turn serializes uncharging.
-        */
-       if (PageCgroupUsed(pc))
-               goto out;
-       if (do_swap_account)
-               memcg = try_get_mem_cgroup_from_page(page);
-       if (!memcg)
-               memcg = get_mem_cgroup_from_mm(mm);
-       ret = mem_cgroup_try_charge(memcg, mask, 1);
-       css_put(&memcg->css);
-       if (ret == -EINTR)
-               memcg = root_mem_cgroup;
-       else if (ret)
-               return ret;
-out:
-       *memcgp = memcg;
-       return 0;
-}
+       unsigned short old_id, new_id;
 
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
-                                gfp_t gfp_mask, struct mem_cgroup **memcgp)
-{
-       if (mem_cgroup_disabled()) {
-               *memcgp = NULL;
-               return 0;
-       }
-       /*
-        * A racing thread's fault, or swapoff, may have already
-        * updated the pte, and even removed page from swap cache: in
-        * those cases unuse_pte()'s pte_same() test will fail; but
-        * there's also a KSM case which does need to charge the page.
-        */
-       if (!PageSwapCache(page)) {
-               struct mem_cgroup *memcg;
+       old_id = mem_cgroup_id(from);
+       new_id = mem_cgroup_id(to);
 
-               memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
-               if (!memcg)
-                       return -ENOMEM;
-               *memcgp = memcg;
+       if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
+               mem_cgroup_swap_statistics(from, false);
+               mem_cgroup_swap_statistics(to, true);
+               /*
+                * This function is only called from task migration context now.
+                * It postpones res_counter and refcount handling till the end
+                * of task migration(mem_cgroup_clear_mc()) for performance
+                * improvement. But we cannot postpone css_get(to)  because if
+                * the process that has been moved to @to does swap-in, the
+                * refcount of @to might be decreased to 0.
+                *
+                * We are in attach() phase, so the cgroup is guaranteed to be
+                * alive, so we can just call css_get().
+                */
+               css_get(&to->css);
                return 0;
        }
-       return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
+       return -EINVAL;
 }
-
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+#else
+static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
+                               struct mem_cgroup *from, struct mem_cgroup *to)
 {
-       if (mem_cgroup_disabled())
-               return;
-       if (!memcg)
-               return;
-       __mem_cgroup_cancel_charge(memcg, 1);
+       return -EINVAL;
 }
+#endif
 
-static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
-                                       enum charge_type ctype)
+#ifdef CONFIG_DEBUG_VM
+static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
-       if (mem_cgroup_disabled())
-               return;
-       if (!memcg)
-               return;
+       struct page_cgroup *pc;
 
-       __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
+       pc = lookup_page_cgroup(page);
        /*
-        * Now swap is on-memory. This means this page may be
-        * counted both as mem and swap....double count.
-        * Fix it by uncharging from memsw. Basically, this SwapCache is stable
-        * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
-        * may call delete_from_swap_cache() before reach here.
+        * Can be NULL while feeding pages into the page allocator for
+        * the first time, i.e. during boot or memory hotplug;
+        * or when mem_cgroup_disabled().
         */
-       if (do_swap_account && PageSwapCache(page)) {
-               swp_entry_t ent = {.val = page_private(page)};
-               mem_cgroup_uncharge_swap(ent);
-       }
+       if (likely(pc) && PageCgroupUsed(pc))
+               return pc;
+       return NULL;
 }
 
-void mem_cgroup_commit_charge_swapin(struct page *page,
-                                    struct mem_cgroup *memcg)
+bool mem_cgroup_bad_page_check(struct page *page)
 {
-       __mem_cgroup_commit_charge_swapin(page, memcg,
-                                         MEM_CGROUP_CHARGE_TYPE_ANON);
+       if (mem_cgroup_disabled())
+               return false;
+
+       return lookup_page_cgroup_used(page) != NULL;
 }
 
-int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
-                               gfp_t gfp_mask)
+void mem_cgroup_print_bad_page(struct page *page)
 {
-       enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
-       struct mem_cgroup *memcg;
-       int ret;
-
-       if (mem_cgroup_disabled())
-               return 0;
-       if (PageCompound(page))
-               return 0;
+       struct page_cgroup *pc;
 
-       if (PageSwapCache(page)) { /* shmem */
-               ret = __mem_cgroup_try_charge_swapin(mm, page,
-                                                    gfp_mask, &memcg);
-               if (ret)
-                       return ret;
-               __mem_cgroup_commit_charge_swapin(page, memcg, type);
-               return 0;
+       pc = lookup_page_cgroup_used(page);
+       if (pc) {
+               pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
+                        pc, pc->flags, pc->mem_cgroup);
        }
-
-       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
-       if (!memcg)
-               return -ENOMEM;
-       __mem_cgroup_commit_charge(memcg, page, 1, type, false);
-       return 0;
 }
+#endif
 
-static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
-                                  unsigned int nr_pages,
-                                  const enum charge_type ctype)
+static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
+                               unsigned long long val)
 {
-       struct memcg_batch_info *batch = NULL;
-       bool uncharge_memsw = true;
-
-       /* If swapout, usage of swap doesn't decrease */
-       if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
-               uncharge_memsw = false;
+       int retry_count;
+       u64 memswlimit, memlimit;
+       int ret = 0;
+       int children = mem_cgroup_count_children(memcg);
+       u64 curusage, oldusage;
+       int enlarge;
 
-       batch = &current->memcg_batch;
-       /*
-        * In usual, we do css_get() when we remember memcg pointer.
-        * But in this case, we keep res->usage until end of a series of
-        * uncharges. Then, it's ok to ignore memcg's refcnt.
-        */
-       if (!batch->memcg)
-               batch->memcg = memcg;
        /*
-        * do_batch > 0 when unmapping pages or inode invalidate/truncate.
-        * In those cases, all pages freed continuously can be expected to be in
-        * the same cgroup and we have chance to coalesce uncharges.
-        * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
-        * because we want to do uncharge as soon as possible.
+        * For keeping hierarchical_reclaim simple, how long we should retry
+        * is depends on callers. We set our retry-count to be function
+        * of # of children which we should visit in this loop.
         */
+       retry_count = MEM_CGROUP_RECLAIM_RETRIES * children;
 
-       if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
-               goto direct_uncharge;
+       oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 
-       if (nr_pages > 1)
-               goto direct_uncharge;
-
-       /*
-        * In typical case, batch->memcg == mem. This means we can
-        * merge a series of uncharges to an uncharge of res_counter.
-        * If not, we uncharge res_counter ony by one.
-        */
-       if (batch->memcg != memcg)
-               goto direct_uncharge;
-       /* remember freed charge and uncharge it later */
-       batch->nr_pages++;
-       if (uncharge_memsw)
-               batch->memsw_nr_pages++;
-       return;
-direct_uncharge:
-       res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
-       if (uncharge_memsw)
-               res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
-       if (unlikely(batch->memcg != memcg))
-               memcg_oom_recover(memcg);
-}
-
-/*
- * uncharge if !page_mapped(page)
- */
-static struct mem_cgroup *
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
-                            bool end_migration)
-{
-       struct mem_cgroup *memcg = NULL;
-       unsigned int nr_pages = 1;
-       struct page_cgroup *pc;
-       bool anon;
-
-       if (mem_cgroup_disabled())
-               return NULL;
-
-       if (PageTransHuge(page)) {
-               nr_pages <<= compound_order(page);
-               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-       }
-       /*
-        * Check if our page_cgroup is valid
-        */
-       pc = lookup_page_cgroup(page);
-       if (unlikely(!PageCgroupUsed(pc)))
-               return NULL;
-
-       lock_page_cgroup(pc);
-
-       memcg = pc->mem_cgroup;
-
-       if (!PageCgroupUsed(pc))
-               goto unlock_out;
-
-       anon = PageAnon(page);
-
-       switch (ctype) {
-       case MEM_CGROUP_CHARGE_TYPE_ANON:
-               /*
-                * Generally PageAnon tells if it's the anon statistics to be
-                * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
-                * used before page reached the stage of being marked PageAnon.
-                */
-               anon = true;
-               /* fallthrough */
-       case MEM_CGROUP_CHARGE_TYPE_DROP:
-               /* See mem_cgroup_prepare_migration() */
-               if (page_mapped(page))
-                       goto unlock_out;
-               /*
-                * Pages under migration may not be uncharged.  But
-                * end_migration() /must/ be the one uncharging the
-                * unused post-migration page and so it has to call
-                * here with the migration bit still set.  See the
-                * res_counter handling below.
-                */
-               if (!end_migration && PageCgroupMigration(pc))
-                       goto unlock_out;
-               break;
-       case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
-               if (!PageAnon(page)) {  /* Shared memory */
-                       if (page->mapping && !page_is_file_cache(page))
-                               goto unlock_out;
-               } else if (page_mapped(page)) /* Anon */
-                               goto unlock_out;
-               break;
-       default:
-               break;
-       }
-
-       mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
-
-       ClearPageCgroupUsed(pc);
-       /*
-        * pc->mem_cgroup is not cleared here. It will be accessed when it's
-        * freed from LRU. This is safe because uncharged page is expected not
-        * to be reused (freed soon). Exception is SwapCache, it's handled by
-        * special functions.
-        */
-
-       unlock_page_cgroup(pc);
-       /*
-        * even after unlock, we have memcg->res.usage here and this memcg
-        * will never be freed, so it's safe to call css_get().
-        */
-       memcg_check_events(memcg, page);
-       if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
-               mem_cgroup_swap_statistics(memcg, true);
-               css_get(&memcg->css);
-       }
-       /*
-        * Migration does not charge the res_counter for the
-        * replacement page, so leave it alone when phasing out the
-        * page that is unused after the migration.
-        */
-       if (!end_migration)
-               mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
-
-       return memcg;
-
-unlock_out:
-       unlock_page_cgroup(pc);
-       return NULL;
-}
-
-void mem_cgroup_uncharge_page(struct page *page)
-{
-       /* early check. */
-       if (page_mapped(page))
-               return;
-       VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
-       /*
-        * If the page is in swap cache, uncharge should be deferred
-        * to the swap path, which also properly accounts swap usage
-        * and handles memcg lifetime.
-        *
-        * Note that this check is not stable and reclaim may add the
-        * page to swap cache at any time after this.  However, if the
-        * page is not in swap cache by the time page->mapcount hits
-        * 0, there won't be any page table references to the swap
-        * slot, and reclaim will free it and not actually write the
-        * page to disk.
-        */
-       if (PageSwapCache(page))
-               return;
-       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
-}
-
-void mem_cgroup_uncharge_cache_page(struct page *page)
-{
-       VM_BUG_ON_PAGE(page_mapped(page), page);
-       VM_BUG_ON_PAGE(page->mapping, page);
-       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
-}
-
-/*
- * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
- * In that cases, pages are freed continuously and we can expect pages
- * are in the same memcg. All these calls itself limits the number of
- * pages freed at once, then uncharge_start/end() is called properly.
- * This may be called prural(2) times in a context,
- */
-
-void mem_cgroup_uncharge_start(void)
-{
-       current->memcg_batch.do_batch++;
-       /* We can do nest. */
-       if (current->memcg_batch.do_batch == 1) {
-               current->memcg_batch.memcg = NULL;
-               current->memcg_batch.nr_pages = 0;
-               current->memcg_batch.memsw_nr_pages = 0;
-       }
-}
-
-void mem_cgroup_uncharge_end(void)
-{
-       struct memcg_batch_info *batch = &current->memcg_batch;
-
-       if (!batch->do_batch)
-               return;
-
-       batch->do_batch--;
-       if (batch->do_batch) /* If stacked, do nothing. */
-               return;
-
-       if (!batch->memcg)
-               return;
-       /*
-        * This "batch->memcg" is valid without any css_get/put etc...
-        * bacause we hide charges behind us.
-        */
-       if (batch->nr_pages)
-               res_counter_uncharge(&batch->memcg->res,
-                                    batch->nr_pages * PAGE_SIZE);
-       if (batch->memsw_nr_pages)
-               res_counter_uncharge(&batch->memcg->memsw,
-                                    batch->memsw_nr_pages * PAGE_SIZE);
-       memcg_oom_recover(batch->memcg);
-       /* forget this pointer (for sanity check) */
-       batch->memcg = NULL;
-}
-
-#ifdef CONFIG_SWAP
-/*
- * called after __delete_from_swap_cache() and drop "page" account.
- * memcg information is recorded to swap_cgroup of "ent"
- */
-void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
-{
-       struct mem_cgroup *memcg;
-       int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
-
-       if (!swapout) /* this was a swap cache but the swap is unused ! */
-               ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
-
-       memcg = __mem_cgroup_uncharge_common(page, ctype, false);
-
-       /*
-        * record memcg information,  if swapout && memcg != NULL,
-        * css_get() was called in uncharge().
-        */
-       if (do_swap_account && swapout && memcg)
-               swap_cgroup_record(ent, mem_cgroup_id(memcg));
-}
-#endif
-
-#ifdef CONFIG_MEMCG_SWAP
-/*
- * called from swap_entry_free(). remove record in swap_cgroup and
- * uncharge "memsw" account.
- */
-void mem_cgroup_uncharge_swap(swp_entry_t ent)
-{
-       struct mem_cgroup *memcg;
-       unsigned short id;
-
-       if (!do_swap_account)
-               return;
-
-       id = swap_cgroup_record(ent, 0);
-       rcu_read_lock();
-       memcg = mem_cgroup_lookup(id);
-       if (memcg) {
-               /*
-                * We uncharge this because swap is freed.  This memcg can
-                * be obsolete one. We avoid calling css_tryget_online().
-                */
-               res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
-               mem_cgroup_swap_statistics(memcg, false);
-               css_put(&memcg->css);
-       }
-       rcu_read_unlock();
-}
-
-/**
- * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
- * @entry: swap entry to be moved
- * @from:  mem_cgroup which the entry is moved from
- * @to:  mem_cgroup which the entry is moved to
- *
- * It succeeds only when the swap_cgroup's record for this entry is the same
- * as the mem_cgroup's id of @from.
- *
- * Returns 0 on success, -EINVAL on failure.
- *
- * The caller must have charged to @to, IOW, called res_counter_charge() about
- * both res and memsw, and called css_get().
- */
-static int mem_cgroup_move_swap_account(swp_entry_t entry,
-                               struct mem_cgroup *from, struct mem_cgroup *to)
-{
-       unsigned short old_id, new_id;
-
-       old_id = mem_cgroup_id(from);
-       new_id = mem_cgroup_id(to);
-
-       if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
-               mem_cgroup_swap_statistics(from, false);
-               mem_cgroup_swap_statistics(to, true);
-               /*
-                * This function is only called from task migration context now.
-                * It postpones res_counter and refcount handling till the end
-                * of task migration(mem_cgroup_clear_mc()) for performance
-                * improvement. But we cannot postpone css_get(to)  because if
-                * the process that has been moved to @to does swap-in, the
-                * refcount of @to might be decreased to 0.
-                *
-                * We are in attach() phase, so the cgroup is guaranteed to be
-                * alive, so we can just call css_get().
-                */
-               css_get(&to->css);
-               return 0;
-       }
-       return -EINVAL;
-}
-#else
-static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
-                               struct mem_cgroup *from, struct mem_cgroup *to)
-{
-       return -EINVAL;
-}
-#endif
-
-/*
- * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
- * page belongs to.
- */
-void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-                                 struct mem_cgroup **memcgp)
-{
-       struct mem_cgroup *memcg = NULL;
-       unsigned int nr_pages = 1;
-       struct page_cgroup *pc;
-       enum charge_type ctype;
-
-       *memcgp = NULL;
-
-       if (mem_cgroup_disabled())
-               return;
-
-       if (PageTransHuge(page))
-               nr_pages <<= compound_order(page);
-
-       pc = lookup_page_cgroup(page);
-       lock_page_cgroup(pc);
-       if (PageCgroupUsed(pc)) {
-               memcg = pc->mem_cgroup;
-               css_get(&memcg->css);
-               /*
-                * At migrating an anonymous page, its mapcount goes down
-                * to 0 and uncharge() will be called. But, even if it's fully
-                * unmapped, migration may fail and this page has to be
-                * charged again. We set MIGRATION flag here and delay uncharge
-                * until end_migration() is called
-                *
-                * Corner Case Thinking
-                * A)
-                * When the old page was mapped as Anon and it's unmap-and-freed
-                * while migration was ongoing.
-                * If unmap finds the old page, uncharge() of it will be delayed
-                * until end_migration(). If unmap finds a new page, it's
-                * uncharged when it make mapcount to be 1->0. If unmap code
-                * finds swap_migration_entry, the new page will not be mapped
-                * and end_migration() will find it(mapcount==0).
-                *
-                * B)
-                * When the old page was mapped but migraion fails, the kernel
-                * remaps it. A charge for it is kept by MIGRATION flag even
-                * if mapcount goes down to 0. We can do remap successfully
-                * without charging it again.
-                *
-                * C)
-                * The "old" page is under lock_page() until the end of
-                * migration, so, the old page itself will not be swapped-out.
-                * If the new page is swapped out before end_migraton, our
-                * hook to usual swap-out path will catch the event.
-                */
-               if (PageAnon(page))
-                       SetPageCgroupMigration(pc);
-       }
-       unlock_page_cgroup(pc);
-       /*
-        * If the page is not charged at this point,
-        * we return here.
-        */
-       if (!memcg)
-               return;
-
-       *memcgp = memcg;
-       /*
-        * We charge new page before it's used/mapped. So, even if unlock_page()
-        * is called before end_migration, we can catch all events on this new
-        * page. In the case new page is migrated but not remapped, new page's
-        * mapcount will be finally 0 and we call uncharge in end_migration().
-        */
-       if (PageAnon(page))
-               ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
-       else
-               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
-       /*
-        * The page is committed to the memcg, but it's not actually
-        * charged to the res_counter since we plan on replacing the
-        * old one and only one page is going to be left afterwards.
-        */
-       __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
-}
-
-/* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-       struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-       struct page *used, *unused;
-       struct page_cgroup *pc;
-       bool anon;
-
-       if (!memcg)
-               return;
-
-       if (!migration_ok) {
-               used = oldpage;
-               unused = newpage;
-       } else {
-               used = newpage;
-               unused = oldpage;
-       }
-       anon = PageAnon(used);
-       __mem_cgroup_uncharge_common(unused,
-                                    anon ? MEM_CGROUP_CHARGE_TYPE_ANON
-                                    : MEM_CGROUP_CHARGE_TYPE_CACHE,
-                                    true);
-       css_put(&memcg->css);
-       /*
-        * We disallowed uncharge of pages under migration because mapcount
-        * of the page goes down to zero, temporarly.
-        * Clear the flag and check the page should be charged.
-        */
-       pc = lookup_page_cgroup(oldpage);
-       lock_page_cgroup(pc);
-       ClearPageCgroupMigration(pc);
-       unlock_page_cgroup(pc);
-
-       /*
-        * If a page is a file cache, radix-tree replacement is very atomic
-        * and we can skip this check. When it was an Anon page, its mapcount
-        * goes down to 0. But because we added MIGRATION flage, it's not
-        * uncharged yet. There are several case but page->mapcount check
-        * and USED bit check in mem_cgroup_uncharge_page() will do enough
-        * check. (see prepare_charge() also)
-        */
-       if (anon)
-               mem_cgroup_uncharge_page(used);
-}
-
-/*
- * At replace page cache, newpage is not under any memcg but it's on
- * LRU. So, this function doesn't touch res_counter but handles LRU
- * in correct way. Both pages are locked so we cannot race with uncharge.
- */
-void mem_cgroup_replace_page_cache(struct page *oldpage,
-                                 struct page *newpage)
-{
-       struct mem_cgroup *memcg = NULL;
-       struct page_cgroup *pc;
-       enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
-
-       if (mem_cgroup_disabled())
-               return;
-
-       pc = lookup_page_cgroup(oldpage);
-       /* fix accounting on old pages */
-       lock_page_cgroup(pc);
-       if (PageCgroupUsed(pc)) {
-               memcg = pc->mem_cgroup;
-               mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
-               ClearPageCgroupUsed(pc);
-       }
-       unlock_page_cgroup(pc);
-
-       /*
-        * When called from shmem_replace_page(), in some cases the
-        * oldpage has already been charged, and in some cases not.
-        */
-       if (!memcg)
-               return;
-       /*
-        * Even if newpage->mapping was NULL before starting replacement,
-        * the newpage may be on LRU(or pagevec for LRU) already. We lock
-        * LRU while we overwrite pc->mem_cgroup.
-        */
-       __mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
-}
-
-#ifdef CONFIG_DEBUG_VM
-static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
-{
-       struct page_cgroup *pc;
-
-       pc = lookup_page_cgroup(page);
-       /*
-        * Can be NULL while feeding pages into the page allocator for
-        * the first time, i.e. during boot or memory hotplug;
-        * or when mem_cgroup_disabled().
-        */
-       if (likely(pc) && PageCgroupUsed(pc))
-               return pc;
-       return NULL;
-}
-
-bool mem_cgroup_bad_page_check(struct page *page)
-{
-       if (mem_cgroup_disabled())
-               return false;
-
-       return lookup_page_cgroup_used(page) != NULL;
-}
-
-void mem_cgroup_print_bad_page(struct page *page)
-{
-       struct page_cgroup *pc;
-
-       pc = lookup_page_cgroup_used(page);
-       if (pc) {
-               pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
-                        pc, pc->flags, pc->mem_cgroup);
-       }
-}
-#endif
-
-static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
-                               unsigned long long val)
-{
-       int retry_count;
-       u64 memswlimit, memlimit;
-       int ret = 0;
-       int children = mem_cgroup_count_children(memcg);
-       u64 curusage, oldusage;
-       int enlarge;
-
-       /*
-        * For keeping hierarchical_reclaim simple, how long we should retry
-        * is depends on callers. We set our retry-count to be function
-        * of # of children which we should visit in this loop.
-        */
-       retry_count = MEM_CGROUP_RECLAIM_RETRIES * children;
-
-       oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
-
-       enlarge = 0;
-       while (retry_count) {
-               if (signal_pending(current)) {
-                       ret = -EINTR;
-                       break;
-               }
-               /*
-                * Rather than hide all in some function, I do this in
-                * open coded manner. You see what this really does.
-                * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
-                */
-               mutex_lock(&set_limit_mutex);
-               memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
-               if (memswlimit < val) {
-                       ret = -EINVAL;
-                       mutex_unlock(&set_limit_mutex);
-                       break;
-               }
+       enlarge = 0;
+       while (retry_count) {
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       break;
+               }
+               /*
+                * Rather than hide all in some function, I do this in
+                * open coded manner. You see what this really does.
+                * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
+                */
+               mutex_lock(&set_limit_mutex);
+               memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+               if (memswlimit < val) {
+                       ret = -EINVAL;
+                       mutex_unlock(&set_limit_mutex);
+                       break;
+               }
 
                memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
                if (memlimit < val)
@@ -4479,7 +3826,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                                                    gfp_mask, &nr_scanned);
                nr_reclaimed += reclaimed;
                *total_scanned += nr_scanned;
-               spin_lock(&mctz->lock);
+               spin_lock_irq(&mctz->lock);
 
                /*
                 * If we failed to reclaim anything from this memory cgroup
@@ -4519,7 +3866,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                 */
                /* If excess == 0, no tree ops */
                __mem_cgroup_insert_exceeded(mz, mctz, excess);
-               spin_unlock(&mctz->lock);
+               spin_unlock_irq(&mctz->lock);
                css_put(&mz->memcg->css);
                loop++;
                /*
@@ -6319,20 +5666,19 @@ static int mem_cgroup_do_precharge(unsigned long count)
        int ret;
 
        /* Try a single bulk charge without reclaim first */
-       ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+       ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
        if (!ret) {
                mc.precharge += count;
                return ret;
        }
        if (ret == -EINTR) {
-               __mem_cgroup_cancel_charge(root_mem_cgroup, count);
+               cancel_charge(root_mem_cgroup, count);
                return ret;
        }
 
        /* Try charges one by one with reclaim */
        while (count--) {
-               ret = mem_cgroup_try_charge(mc.to,
-                                           GFP_KERNEL & ~__GFP_NORETRY, 1);
+               ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
                /*
                 * In case of failure, any residual charges against
                 * mc.to will be dropped by mem_cgroup_clear_mc()
@@ -6340,7 +5686,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
                 * bypassed to root right away or they'll be lost.
                 */
                if (ret == -EINTR)
-                       __mem_cgroup_cancel_charge(root_mem_cgroup, 1);
+                       cancel_charge(root_mem_cgroup, 1);
                if (ret)
                        return ret;
                mc.precharge++;
@@ -6482,9 +5828,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
        if (page) {
                pc = lookup_page_cgroup(page);
                /*
-                * Do only loose check w/o page_cgroup lock.
-                * mem_cgroup_move_account() checks the pc is valid or not under
-                * the lock.
+                * Do only loose check w/o serialization.
+                * mem_cgroup_move_account() checks the pc is valid or
+                * not under LRU exclusion.
                 */
                if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
                        ret = MC_TARGET_PAGE;
@@ -6609,7 +5955,7 @@ static void __mem_cgroup_clear_mc(void)
 
        /* we must uncharge all the leftover precharges from mc.to */
        if (mc.precharge) {
-               __mem_cgroup_cancel_charge(mc.to, mc.precharge);
+               cancel_charge(mc.to, mc.precharge);
                mc.precharge = 0;
        }
        /*
@@ -6617,7 +5963,7 @@ static void __mem_cgroup_clear_mc(void)
         * we must uncharge here.
         */
        if (mc.moved_charge) {
-               __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
+               cancel_charge(mc.from, mc.moved_charge);
                mc.moved_charge = 0;
        }
        /* we must fixup refcnts and charges */
@@ -6946,6 +6292,398 @@ static void __init enable_swap_cgroup(void)
 }
 #endif
 
+#ifdef CONFIG_MEMCG_SWAP
+/**
+ * mem_cgroup_swapout - transfer a memsw charge to swap
+ * @page: page whose memsw charge to transfer
+ * @entry: swap entry to move the charge to
+ *
+ * Transfer the memsw charge of @page to @entry.
+ */
+void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+       struct page_cgroup *pc;
+       unsigned short oldid;
+
+       VM_BUG_ON_PAGE(PageLRU(page), page);
+       VM_BUG_ON_PAGE(page_count(page), page);
+
+       if (!do_swap_account)
+               return;
+
+       pc = lookup_page_cgroup(page);
+
+       /* Readahead page, never charged */
+       if (!PageCgroupUsed(pc))
+               return;
+
+       VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+
+       oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+       VM_BUG_ON_PAGE(oldid, page);
+
+       pc->flags &= ~PCG_MEMSW;
+       css_get(&pc->mem_cgroup->css);
+       mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+}
+
+/**
+ * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * @entry: swap entry to uncharge
+ *
+ * Drop the memsw charge associated with @entry.
+ */
+void mem_cgroup_uncharge_swap(swp_entry_t entry)
+{
+       struct mem_cgroup *memcg;
+       unsigned short id;
+
+       if (!do_swap_account)
+               return;
+
+       id = swap_cgroup_record(entry, 0);
+       rcu_read_lock();
+       memcg = mem_cgroup_lookup(id);
+       if (memcg) {
+               res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+               mem_cgroup_swap_statistics(memcg, false);
+               css_put(&memcg->css);
+       }
+       rcu_read_unlock();
+}
+#endif
+
+/**
+ * mem_cgroup_try_charge - try charging a page
+ * @page: page to charge
+ * @mm: mm context of the victim
+ * @gfp_mask: reclaim mode
+ * @memcgp: charged memcg return
+ *
+ * Try to charge @page to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp_mask if necessary.
+ *
+ * Returns 0 on success, with *@memcgp pointing to the charged memcg.
+ * Otherwise, an error code is returned.
+ *
+ * After page->mapping has been set up, the caller must finalize the
+ * charge with mem_cgroup_commit_charge().  Or abort the transaction
+ * with mem_cgroup_cancel_charge() in case page instantiation fails.
+ */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+                         gfp_t gfp_mask, struct mem_cgroup **memcgp)
+{
+       struct mem_cgroup *memcg = NULL;
+       unsigned int nr_pages = 1;
+       int ret = 0;
+
+       if (mem_cgroup_disabled())
+               goto out;
+
+       if (PageSwapCache(page)) {
+               struct page_cgroup *pc = lookup_page_cgroup(page);
+               /*
+                * Every swap fault against a single page tries to charge the
+                * page, bail as early as possible.  shmem_unuse() encounters
+                * already charged pages, too.  The USED bit is protected by
+                * the page lock, which serializes swap cache removal, which
+                * in turn serializes uncharging.
+                */
+               if (PageCgroupUsed(pc))
+                       goto out;
+       }
+
+       if (PageTransHuge(page)) {
+               nr_pages <<= compound_order(page);
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+       }
+
+       if (do_swap_account && PageSwapCache(page))
+               memcg = try_get_mem_cgroup_from_page(page);
+       if (!memcg)
+               memcg = get_mem_cgroup_from_mm(mm);
+
+       ret = try_charge(memcg, gfp_mask, nr_pages);
+
+       css_put(&memcg->css);
+
+       if (ret == -EINTR) {
+               memcg = root_mem_cgroup;
+               ret = 0;
+       }
+out:
+       *memcgp = memcg;
+       return ret;
+}
+
+/**
+ * mem_cgroup_commit_charge - commit a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ * @lrucare: page might be on LRU already
+ *
+ * Finalize a charge transaction started by mem_cgroup_try_charge(),
+ * after page->mapping has been set up.  This must happen atomically
+ * as part of the page instantiation, i.e. under the page table lock
+ * for anonymous pages, under the page lock for page and swap cache.
+ *
+ * In addition, the page must not be on the LRU during the commit, to
+ * prevent racing with task migration.  If it might be, use @lrucare.
+ *
+ * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
+ */
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+                             bool lrucare)
+{
+       unsigned int nr_pages = 1;
+
+       VM_BUG_ON_PAGE(!page->mapping, page);
+       VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
+
+       if (mem_cgroup_disabled())
+               return;
+       /*
+        * Swap faults will attempt to charge the same page multiple
+        * times.  But reuse_swap_page() might have removed the page
+        * from swapcache already, so we can't check PageSwapCache().
+        */
+       if (!memcg)
+               return;
+
+       commit_charge(page, memcg, lrucare);
+
+       if (PageTransHuge(page)) {
+               nr_pages <<= compound_order(page);
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+       }
+
+       local_irq_disable();
+       mem_cgroup_charge_statistics(memcg, page, nr_pages);
+       memcg_check_events(memcg, page);
+       local_irq_enable();
+
+       if (do_swap_account && PageSwapCache(page)) {
+               swp_entry_t entry = { .val = page_private(page) };
+               /*
+                * The swap entry might not get freed for a long time,
+                * let's not wait for it.  The page already received a
+                * memory+swap charge, drop the swap entry duplicate.
+                */
+               mem_cgroup_uncharge_swap(entry);
+       }
+}
+
+/**
+ * mem_cgroup_cancel_charge - cancel a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ *
+ * Cancel a charge transaction started by mem_cgroup_try_charge().
+ */
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
+{
+       unsigned int nr_pages = 1;
+
+       if (mem_cgroup_disabled())
+               return;
+       /*
+        * Swap faults will attempt to charge the same page multiple
+        * times.  But reuse_swap_page() might have removed the page
+        * from swapcache already, so we can't check PageSwapCache().
+        */
+       if (!memcg)
+               return;
+
+       if (PageTransHuge(page)) {
+               nr_pages <<= compound_order(page);
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+       }
+
+       cancel_charge(memcg, nr_pages);
+}
+
+static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
+                          unsigned long nr_mem, unsigned long nr_memsw,
+                          unsigned long nr_anon, unsigned long nr_file,
+                          unsigned long nr_huge, struct page *dummy_page)
+{
+       unsigned long flags;
+
+       if (nr_mem)
+               res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
+       if (nr_memsw)
+               res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
+
+       memcg_oom_recover(memcg);
+
+       local_irq_save(flags);
+       __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
+       __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
+       __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
+       __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
+       __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+       memcg_check_events(memcg, dummy_page);
+       local_irq_restore(flags);
+}
+
+static void uncharge_list(struct list_head *page_list)
+{
+       struct mem_cgroup *memcg = NULL;
+       unsigned long nr_memsw = 0;
+       unsigned long nr_anon = 0;
+       unsigned long nr_file = 0;
+       unsigned long nr_huge = 0;
+       unsigned long pgpgout = 0;
+       unsigned long nr_mem = 0;
+       struct list_head *next;
+       struct page *page;
+
+       next = page_list->next;
+       do {
+               unsigned int nr_pages = 1;
+               struct page_cgroup *pc;
+
+               page = list_entry(next, struct page, lru);
+               next = page->lru.next;
+
+               VM_BUG_ON_PAGE(PageLRU(page), page);
+               VM_BUG_ON_PAGE(page_count(page), page);
+
+               pc = lookup_page_cgroup(page);
+               if (!PageCgroupUsed(pc))
+                       continue;
+
+               /*
+                * Nobody should be changing or seriously looking at
+                * pc->mem_cgroup and pc->flags at this point, we have
+                * fully exclusive access to the page.
+                */
+
+               if (memcg != pc->mem_cgroup) {
+                       if (memcg) {
+                               uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+                                              nr_anon, nr_file, nr_huge, page);
+                               pgpgout = nr_mem = nr_memsw = 0;
+                               nr_anon = nr_file = nr_huge = 0;
+                       }
+                       memcg = pc->mem_cgroup;
+               }
+
+               if (PageTransHuge(page)) {
+                       nr_pages <<= compound_order(page);
+                       VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+                       nr_huge += nr_pages;
+               }
+
+               if (PageAnon(page))
+                       nr_anon += nr_pages;
+               else
+                       nr_file += nr_pages;
+
+               if (pc->flags & PCG_MEM)
+                       nr_mem += nr_pages;
+               if (pc->flags & PCG_MEMSW)
+                       nr_memsw += nr_pages;
+               pc->flags = 0;
+
+               pgpgout++;
+       } while (next != page_list);
+
+       if (memcg)
+               uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+                              nr_anon, nr_file, nr_huge, page);
+}
+
+/**
+ * mem_cgroup_uncharge - uncharge a page
+ * @page: page to uncharge
+ *
+ * Uncharge a page previously charged with mem_cgroup_try_charge() and
+ * mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge(struct page *page)
+{
+       struct page_cgroup *pc;
+
+       if (mem_cgroup_disabled())
+               return;
+
+       /* Don't touch page->lru of any random page, pre-check: */
+       pc = lookup_page_cgroup(page);
+       if (!PageCgroupUsed(pc))
+               return;
+
+       INIT_LIST_HEAD(&page->lru);
+       uncharge_list(&page->lru);
+}
+
+/**
+ * mem_cgroup_uncharge_list - uncharge a list of page
+ * @page_list: list of pages to uncharge
+ *
+ * Uncharge a list of pages previously charged with
+ * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge_list(struct list_head *page_list)
+{
+       if (mem_cgroup_disabled())
+               return;
+
+       if (!list_empty(page_list))
+               uncharge_list(page_list);
+}
+
+/**
+ * mem_cgroup_migrate - migrate a charge to another page
+ * @oldpage: currently charged page
+ * @newpage: page to transfer the charge to
+ * @lrucare: both pages might be on the LRU already
+ *
+ * Migrate the charge from @oldpage to @newpage.
+ *
+ * Both pages must be locked, @newpage->mapping must be set up.
+ */
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+                       bool lrucare)
+{
+       struct page_cgroup *pc;
+       int isolated;
+
+       VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+       VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+       VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
+       VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
+       VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
+       VM_BUG_ON_PAGE(PageTransHuge(oldpage) != PageTransHuge(newpage),
+                      newpage);
+
+       if (mem_cgroup_disabled())
+               return;
+
+       /* Page cache replacement: new page already charged? */
+       pc = lookup_page_cgroup(newpage);
+       if (PageCgroupUsed(pc))
+               return;
+
+       /* Re-entrant migration: old page already uncharged? */
+       pc = lookup_page_cgroup(oldpage);
+       if (!PageCgroupUsed(pc))
+               return;
+
+       VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
+       VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
+
+       if (lrucare)
+               lock_page_lru(oldpage, &isolated);
+
+       pc->flags = 0;
+
+       if (lrucare)
+               unlock_page_lru(oldpage, isolated);
+
+       commit_charge(newpage, pc->mem_cgroup, lrucare);
+}
+
 /*
  * subsys_initcall() for memory controller.
  *
index 5c55270729f7b45a1196e8fb4fc5a5374dcc6d9d..ab3537bcfed2334fd1636cfc8e74ff634e54c943 100644 (file)
@@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
                details = NULL;
 
        BUG_ON(addr >= end);
-       mem_cgroup_uncharge_start();
        tlb_start_vma(tlb, vma);
        pgd = pgd_offset(vma->vm_mm, addr);
        do {
@@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
                next = zap_pud_range(tlb, vma, pgd, addr, next, details);
        } while (pgd++, addr = next, addr != end);
        tlb_end_vma(tlb, vma);
-       mem_cgroup_uncharge_end();
 }
 
 
@@ -2049,6 +2047,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
        struct page *dirty_page = NULL;
        unsigned long mmun_start = 0;   /* For mmu_notifiers */
        unsigned long mmun_end = 0;     /* For mmu_notifiers */
+       struct mem_cgroup *memcg;
 
        old_page = vm_normal_page(vma, address, orig_pte);
        if (!old_page) {
@@ -2204,7 +2203,7 @@ gotten:
        }
        __SetPageUptodate(new_page);
 
-       if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
                goto oom_free_new;
 
        mmun_start  = address & PAGE_MASK;
@@ -2234,6 +2233,8 @@ gotten:
                 */
                ptep_clear_flush(vma, address, page_table);
                page_add_new_anon_rmap(new_page, vma, address);
+               mem_cgroup_commit_charge(new_page, memcg, false);
+               lru_cache_add_active_or_unevictable(new_page, vma);
                /*
                 * We call the notify macro here because, when using secondary
                 * mmu page tables (such as kvm shadow page tables), we want the
@@ -2271,7 +2272,7 @@ gotten:
                new_page = old_page;
                ret |= VM_FAULT_WRITE;
        } else
-               mem_cgroup_uncharge_page(new_page);
+               mem_cgroup_cancel_charge(new_page, memcg);
 
        if (new_page)
                page_cache_release(new_page);
@@ -2410,10 +2411,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        spinlock_t *ptl;
        struct page *page, *swapcache;
+       struct mem_cgroup *memcg;
        swp_entry_t entry;
        pte_t pte;
        int locked;
-       struct mem_cgroup *ptr;
        int exclusive = 0;
        int ret = 0;
 
@@ -2489,7 +2490,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                goto out_page;
        }
 
-       if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
+       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
                ret = VM_FAULT_OOM;
                goto out_page;
        }
@@ -2514,10 +2515,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         * while the page is counted on swap but not yet in mapcount i.e.
         * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
         * must be called after the swap_free(), or it will never succeed.
-        * Because delete_from_swap_page() may be called by reuse_swap_page(),
-        * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
-        * in page->private. In this case, a record in swap_cgroup  is silently
-        * discarded at swap_free().
         */
 
        inc_mm_counter_fast(mm, MM_ANONPAGES);
@@ -2533,12 +2530,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
        if (pte_swp_soft_dirty(orig_pte))
                pte = pte_mksoft_dirty(pte);
        set_pte_at(mm, address, page_table, pte);
-       if (page == swapcache)
+       if (page == swapcache) {
                do_page_add_anon_rmap(page, vma, address, exclusive);
-       else /* ksm created a completely new copy */
+               mem_cgroup_commit_charge(page, memcg, true);
+       } else { /* ksm created a completely new copy */
                page_add_new_anon_rmap(page, vma, address);
-       /* It's better to call commit-charge after rmap is established */
-       mem_cgroup_commit_charge_swapin(page, ptr);
+               mem_cgroup_commit_charge(page, memcg, false);
+               lru_cache_add_active_or_unevictable(page, vma);
+       }
 
        swap_free(entry);
        if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2571,7 +2570,7 @@ unlock:
 out:
        return ret;
 out_nomap:
-       mem_cgroup_cancel_charge_swapin(ptr);
+       mem_cgroup_cancel_charge(page, memcg);
        pte_unmap_unlock(page_table, ptl);
 out_page:
        unlock_page(page);
@@ -2627,6 +2626,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
                unsigned int flags)
 {
+       struct mem_cgroup *memcg;
        struct page *page;
        spinlock_t *ptl;
        pte_t entry;
@@ -2660,7 +2660,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
         */
        __SetPageUptodate(page);
 
-       if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
+       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
                goto oom_free_page;
 
        entry = mk_pte(page, vma->vm_page_prot);
@@ -2673,6 +2673,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
        inc_mm_counter_fast(mm, MM_ANONPAGES);
        page_add_new_anon_rmap(page, vma, address);
+       mem_cgroup_commit_charge(page, memcg, false);
+       lru_cache_add_active_or_unevictable(page, vma);
 setpte:
        set_pte_at(mm, address, page_table, entry);
 
@@ -2682,7 +2684,7 @@ unlock:
        pte_unmap_unlock(page_table, ptl);
        return 0;
 release:
-       mem_cgroup_uncharge_page(page);
+       mem_cgroup_cancel_charge(page, memcg);
        page_cache_release(page);
        goto unlock;
 oom_free_page:
@@ -2919,6 +2921,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
 {
        struct page *fault_page, *new_page;
+       struct mem_cgroup *memcg;
        spinlock_t *ptl;
        pte_t *pte;
        int ret;
@@ -2930,7 +2933,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!new_page)
                return VM_FAULT_OOM;
 
-       if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
+       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
                page_cache_release(new_page);
                return VM_FAULT_OOM;
        }
@@ -2950,12 +2953,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                goto uncharge_out;
        }
        do_set_pte(vma, address, new_page, pte, true, true);
+       mem_cgroup_commit_charge(new_page, memcg, false);
+       lru_cache_add_active_or_unevictable(new_page, vma);
        pte_unmap_unlock(pte, ptl);
        unlock_page(fault_page);
        page_cache_release(fault_page);
        return ret;
 uncharge_out:
-       mem_cgroup_uncharge_page(new_page);
+       mem_cgroup_cancel_charge(new_page, memcg);
        page_cache_release(new_page);
        return ret;
 }
@@ -3425,44 +3430,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-#if !defined(__HAVE_ARCH_GATE_AREA)
-
-#if defined(AT_SYSINFO_EHDR)
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
-       gate_vma.vm_mm = NULL;
-       gate_vma.vm_start = FIXADDR_USER_START;
-       gate_vma.vm_end = FIXADDR_USER_END;
-       gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
-       gate_vma.vm_page_prot = __P101;
-
-       return 0;
-}
-__initcall(gate_vma_init);
-#endif
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-#ifdef AT_SYSINFO_EHDR
-       return &gate_vma;
-#else
-       return NULL;
-#endif
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-#ifdef AT_SYSINFO_EHDR
-       if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
-               return 1;
-#endif
-       return 0;
-}
-
-#endif /* __HAVE_ARCH_GATE_AREA */
-
 static int __follow_pte(struct mm_struct *mm, unsigned long address,
                pte_t **ptepp, spinlock_t **ptlp)
 {
index be6dbf995c0cea7128fa58124057d8891cfa7933..f78ec9bd454dd04585d208bd065460c92f04c042 100644 (file)
@@ -780,6 +780,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
        if (rc != MIGRATEPAGE_SUCCESS) {
                newpage->mapping = NULL;
        } else {
+               mem_cgroup_migrate(page, newpage, false);
                if (remap_swapcache)
                        remove_migration_ptes(page, newpage);
                page->mapping = NULL;
@@ -795,7 +796,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 {
        int rc = -EAGAIN;
        int remap_swapcache = 1;
-       struct mem_cgroup *mem;
        struct anon_vma *anon_vma = NULL;
 
        if (!trylock_page(page)) {
@@ -821,9 +821,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                lock_page(page);
        }
 
-       /* charge against new page */
-       mem_cgroup_prepare_migration(page, newpage, &mem);
-
        if (PageWriteback(page)) {
                /*
                 * Only in the case of a full synchronous migration is it
@@ -833,10 +830,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                 */
                if (mode != MIGRATE_SYNC) {
                        rc = -EBUSY;
-                       goto uncharge;
+                       goto out_unlock;
                }
                if (!force)
-                       goto uncharge;
+                       goto out_unlock;
                wait_on_page_writeback(page);
        }
        /*
@@ -872,7 +869,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                         */
                        remap_swapcache = 0;
                } else {
-                       goto uncharge;
+                       goto out_unlock;
                }
        }
 
@@ -885,7 +882,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                 * the page migration right away (proteced by page lock).
                 */
                rc = balloon_page_migrate(newpage, page, mode);
-               goto uncharge;
+               goto out_unlock;
        }
 
        /*
@@ -904,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                VM_BUG_ON_PAGE(PageAnon(page), page);
                if (page_has_private(page)) {
                        try_to_free_buffers(page);
-                       goto uncharge;
+                       goto out_unlock;
                }
                goto skip_unmap;
        }
@@ -923,10 +920,7 @@ skip_unmap:
        if (anon_vma)
                put_anon_vma(anon_vma);
 
-uncharge:
-       mem_cgroup_end_migration(mem, page, newpage,
-                                (rc == MIGRATEPAGE_SUCCESS ||
-                                 rc == MIGRATEPAGE_BALLOON_SUCCESS));
+out_unlock:
        unlock_page(page);
 out:
        return rc;
@@ -1786,7 +1780,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
        pg_data_t *pgdat = NODE_DATA(node);
        int isolated = 0;
        struct page *new_page = NULL;
-       struct mem_cgroup *memcg = NULL;
        int page_lru = page_is_file_cache(page);
        unsigned long mmun_start = address & HPAGE_PMD_MASK;
        unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1852,15 +1845,6 @@ fail_putback:
                goto out_unlock;
        }
 
-       /*
-        * Traditional migration needs to prepare the memcg charge
-        * transaction early to prevent the old page from being
-        * uncharged when installing migration entries.  Here we can
-        * save the potential rollback and start the charge transfer
-        * only when migration is already known to end successfully.
-        */
-       mem_cgroup_prepare_migration(page, new_page, &memcg);
-
        orig_entry = *pmd;
        entry = mk_pmd(new_page, vma->vm_page_prot);
        entry = pmd_mkhuge(entry);
@@ -1888,14 +1872,10 @@ fail_putback:
                goto fail_putback;
        }
 
+       mem_cgroup_migrate(page, new_page, false);
+
        page_remove_rmap(page);
 
-       /*
-        * Finish the charge transaction under the page table lock to
-        * prevent split_huge_page() from dividing up the charge
-        * before it's fully transferred to the new page.
-        */
-       mem_cgroup_end_migration(memcg, page, new_page, true);
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
index 64c9d736155c7a546e6d133426a0861a63688ead..c1f2ea4a0b9960d39940c82f4ba329dad2df74df 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -221,7 +221,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
        if (vma->vm_flags & VM_DENYWRITE)
                atomic_inc(&file_inode(file)->i_writecount);
        if (vma->vm_flags & VM_SHARED)
-               mapping->i_mmap_writable--;
+               mapping_unmap_writable(mapping);
 
        flush_dcache_mmap_lock(mapping);
        if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -622,7 +622,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
                if (vma->vm_flags & VM_DENYWRITE)
                        atomic_dec(&file_inode(file)->i_writecount);
                if (vma->vm_flags & VM_SHARED)
-                       mapping->i_mmap_writable++;
+                       atomic_inc(&mapping->i_mmap_writable);
 
                flush_dcache_mmap_lock(mapping);
                if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -1577,6 +1577,17 @@ munmap_back:
                        if (error)
                                goto free_vma;
                }
+               if (vm_flags & VM_SHARED) {
+                       error = mapping_map_writable(file->f_mapping);
+                       if (error)
+                               goto allow_write_and_free_vma;
+               }
+
+               /* ->mmap() can change vma->vm_file, but must guarantee that
+                * vma_link() below can deny write-access if VM_DENYWRITE is set
+                * and map writably if VM_SHARED is set. This usually means the
+                * new file must not have been exposed to user-space, yet.
+                */
                vma->vm_file = get_file(file);
                error = file->f_op->mmap(file, vma);
                if (error)
@@ -1616,8 +1627,12 @@ munmap_back:
 
        vma_link(mm, vma, prev, rb_link, rb_parent);
        /* Once vma denies write, undo our temporary denial count */
-       if (vm_flags & VM_DENYWRITE)
-               allow_write_access(file);
+       if (file) {
+               if (vm_flags & VM_SHARED)
+                       mapping_unmap_writable(file->f_mapping);
+               if (vm_flags & VM_DENYWRITE)
+                       allow_write_access(file);
+       }
        file = vma->vm_file;
 out:
        perf_event_mmap(vma);
@@ -1646,14 +1661,17 @@ out:
        return addr;
 
 unmap_and_free_vma:
-       if (vm_flags & VM_DENYWRITE)
-               allow_write_access(file);
        vma->vm_file = NULL;
        fput(file);
 
        /* Undo any partial mapping done by a device driver. */
        unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
        charged = 0;
+       if (vm_flags & VM_SHARED)
+               mapping_unmap_writable(file->f_mapping);
+allow_write_and_free_vma:
+       if (vm_flags & VM_DENYWRITE)
+               allow_write_access(file);
 free_vma:
        kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
index 4a852f6c5709dbda2a29561714b704972ec292f6..a881d9673c6b165e5336e83bf2ab75bf3c03d938 100644 (file)
@@ -1981,11 +1981,6 @@ error:
        return -ENOMEM;
 }
 
-int in_gate_area_no_mm(unsigned long addr)
-{
-       return 0;
-}
-
 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        BUG();
index 22a4a7699cdbeb51e86c22ebbd4b1118693042f9..3e8491c504f8bedc432293484bfe1f31fa1e6f46 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1032,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page,
        __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
                        hpage_nr_pages(page));
        __page_set_anon_rmap(page, vma, address, 1);
-
-       VM_BUG_ON_PAGE(PageLRU(page), page);
-       if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
-               SetPageActive(page);
-               lru_cache_add(page);
-               return;
-       }
-
-       if (!TestSetPageMlocked(page)) {
-               /*
-                * We use the irq-unsafe __mod_zone_page_stat because this
-                * counter is not modified from interrupt context, and the pte
-                * lock is held(spinlock), which implies preemption disabled.
-                */
-               __mod_zone_page_state(page_zone(page), NR_MLOCK,
-                                   hpage_nr_pages(page));
-               count_vm_event(UNEVICTABLE_PGMLOCKED);
-       }
-       add_page_to_unevictable_list(page);
 }
 
 /**
@@ -1108,7 +1089,6 @@ void page_remove_rmap(struct page *page)
        if (unlikely(PageHuge(page)))
                goto out;
        if (anon) {
-               mem_cgroup_uncharge_page(page);
                if (PageTransHuge(page))
                        __dec_zone_page_state(page,
                                              NR_ANON_TRANSPARENT_HUGEPAGES);
index 302d1cf7ad07c385ebfeb381dd42af542b4787a5..a42add14331c02171f5f3fbb30ed4e82b8d9fad3 100644 (file)
@@ -66,6 +66,9 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/syscalls.h>
+#include <linux/fcntl.h>
+#include <uapi/linux/memfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -419,7 +422,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        pvec.pages, indices);
                if (!pvec.nr)
                        break;
-               mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
@@ -447,7 +449,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                }
                pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                cond_resched();
                index++;
        }
@@ -495,7 +496,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        index = start;
                        continue;
                }
-               mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
@@ -531,7 +531,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                }
                pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                index++;
        }
 
@@ -551,6 +550,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = dentry->d_inode;
+       struct shmem_inode_info *info = SHMEM_I(inode);
        int error;
 
        error = inode_change_ok(inode, attr);
@@ -561,6 +561,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
                loff_t oldsize = inode->i_size;
                loff_t newsize = attr->ia_size;
 
+               /* protected by i_mutex */
+               if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
+                   (newsize > oldsize && (info->seals & F_SEAL_GROW)))
+                       return -EPERM;
+
                if (newsize != oldsize) {
                        error = shmem_reacct_size(SHMEM_I(inode)->flags,
                                        oldsize, newsize);
@@ -621,7 +626,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
        radswap = swp_to_radix_entry(swap);
        index = radix_tree_locate_item(&mapping->page_tree, radswap);
        if (index == -1)
-               return 0;
+               return -EAGAIN; /* tell shmem_unuse we found nothing */
 
        /*
         * Move _head_ to start search for next from here.
@@ -680,7 +685,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
                        spin_unlock(&info->lock);
                        swap_free(swap);
                }
-               error = 1;      /* not an error, but entry was found */
        }
        return error;
 }
@@ -692,7 +696,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 {
        struct list_head *this, *next;
        struct shmem_inode_info *info;
-       int found = 0;
+       struct mem_cgroup *memcg;
        int error = 0;
 
        /*
@@ -707,26 +711,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
         * the shmem_swaplist_mutex which might hold up shmem_writepage().
         * Charged back to the user (not to caller) when swap account is used.
         */
-       error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
+       error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
        if (error)
                goto out;
        /* No radix_tree_preload: swap entry keeps a place for page in tree */
+       error = -EAGAIN;
 
        mutex_lock(&shmem_swaplist_mutex);
        list_for_each_safe(this, next, &shmem_swaplist) {
                info = list_entry(this, struct shmem_inode_info, swaplist);
                if (info->swapped)
-                       found = shmem_unuse_inode(info, swap, &page);
+                       error = shmem_unuse_inode(info, swap, &page);
                else
                        list_del_init(&info->swaplist);
                cond_resched();
-               if (found)
+               if (error != -EAGAIN)
                        break;
+               /* found nothing in this: move on to search the next */
        }
        mutex_unlock(&shmem_swaplist_mutex);
 
-       if (found < 0)
-               error = found;
+       if (error) {
+               if (error != -ENOMEM)
+                       error = 0;
+               mem_cgroup_cancel_charge(page, memcg);
+       } else
+               mem_cgroup_commit_charge(page, memcg, true);
 out:
        unlock_page(page);
        page_cache_release(page);
@@ -830,7 +840,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
        }
 
        mutex_unlock(&shmem_swaplist_mutex);
-       swapcache_free(swap, NULL);
+       swapcache_free(swap);
 redirty:
        set_page_dirty(page);
        if (wbc->for_reclaim)
@@ -1003,7 +1013,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
                 */
                oldpage = newpage;
        } else {
-               mem_cgroup_replace_page_cache(oldpage, newpage);
+               mem_cgroup_migrate(oldpage, newpage, false);
                lru_cache_add_anon(newpage);
                *pagep = newpage;
        }
@@ -1030,6 +1040,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
        struct address_space *mapping = inode->i_mapping;
        struct shmem_inode_info *info;
        struct shmem_sb_info *sbinfo;
+       struct mem_cgroup *memcg;
        struct page *page;
        swp_entry_t swap;
        int error;
@@ -1108,8 +1119,7 @@ repeat:
                                goto failed;
                }
 
-               error = mem_cgroup_charge_file(page, current->mm,
-                                               gfp & GFP_RECLAIM_MASK);
+               error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
                                                swp_to_radix_entry(swap));
@@ -1125,12 +1135,16 @@ repeat:
                         * Reset swap.val? No, leave it so "failed" goes back to
                         * "repeat": reading a hole and writing should succeed.
                         */
-                       if (error)
+                       if (error) {
+                               mem_cgroup_cancel_charge(page, memcg);
                                delete_from_swap_cache(page);
+                       }
                }
                if (error)
                        goto failed;
 
+               mem_cgroup_commit_charge(page, memcg, true);
+
                spin_lock(&info->lock);
                info->swapped--;
                shmem_recalc_inode(inode);
@@ -1168,8 +1182,7 @@ repeat:
                if (sgp == SGP_WRITE)
                        __SetPageReferenced(page);
 
-               error = mem_cgroup_charge_file(page, current->mm,
-                                               gfp & GFP_RECLAIM_MASK);
+               error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
                if (error)
                        goto decused;
                error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
@@ -1179,9 +1192,10 @@ repeat:
                        radix_tree_preload_end();
                }
                if (error) {
-                       mem_cgroup_uncharge_cache_page(page);
+                       mem_cgroup_cancel_charge(page, memcg);
                        goto decused;
                }
+               mem_cgroup_commit_charge(page, memcg, false);
                lru_cache_add_anon(page);
 
                spin_lock(&info->lock);
@@ -1407,6 +1421,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
                info = SHMEM_I(inode);
                memset(info, 0, (char *)inode - (char *)info);
                spin_lock_init(&info->lock);
+               info->seals = F_SEAL_SEAL;
                info->flags = flags & VM_NORESERVE;
                INIT_LIST_HEAD(&info->swaplist);
                simple_xattrs_init(&info->xattrs);
@@ -1465,7 +1480,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
                        struct page **pagep, void **fsdata)
 {
        struct inode *inode = mapping->host;
+       struct shmem_inode_info *info = SHMEM_I(inode);
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+       /* i_mutex is held by caller */
+       if (unlikely(info->seals)) {
+               if (info->seals & F_SEAL_WRITE)
+                       return -EPERM;
+               if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
+                       return -EPERM;
+       }
+
        return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
 }
 
@@ -1803,11 +1828,233 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
        return offset;
 }
 
+/*
+ * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
+ * so reuse a tag which we firmly believe is never set or cleared on shmem.
+ */
+#define SHMEM_TAG_PINNED        PAGECACHE_TAG_TOWRITE
+#define LAST_SCAN               4       /* about 150ms max */
+
+static void shmem_tag_pins(struct address_space *mapping)
+{
+       struct radix_tree_iter iter;
+       void **slot;
+       pgoff_t start;
+       struct page *page;
+
+       lru_add_drain();
+       start = 0;
+       rcu_read_lock();
+
+restart:
+       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+               page = radix_tree_deref_slot(slot);
+               if (!page || radix_tree_exception(page)) {
+                       if (radix_tree_deref_retry(page))
+                               goto restart;
+               } else if (page_count(page) - page_mapcount(page) > 1) {
+                       spin_lock_irq(&mapping->tree_lock);
+                       radix_tree_tag_set(&mapping->page_tree, iter.index,
+                                          SHMEM_TAG_PINNED);
+                       spin_unlock_irq(&mapping->tree_lock);
+               }
+
+               if (need_resched()) {
+                       cond_resched_rcu();
+                       start = iter.index + 1;
+                       goto restart;
+               }
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
+ * via get_user_pages(), drivers might have some pending I/O without any active
+ * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
+ * and see whether it has an elevated ref-count. If so, we tag them and wait for
+ * them to be dropped.
+ * The caller must guarantee that no new user will acquire writable references
+ * to those pages to avoid races.
+ */
+static int shmem_wait_for_pins(struct address_space *mapping)
+{
+       struct radix_tree_iter iter;
+       void **slot;
+       pgoff_t start;
+       struct page *page;
+       int error, scan;
+
+       shmem_tag_pins(mapping);
+
+       error = 0;
+       for (scan = 0; scan <= LAST_SCAN; scan++) {
+               if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
+                       break;
+
+               if (!scan)
+                       lru_add_drain_all();
+               else if (schedule_timeout_killable((HZ << scan) / 200))
+                       scan = LAST_SCAN;
+
+               start = 0;
+               rcu_read_lock();
+restart:
+               radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
+                                          start, SHMEM_TAG_PINNED) {
+
+                       page = radix_tree_deref_slot(slot);
+                       if (radix_tree_exception(page)) {
+                               if (radix_tree_deref_retry(page))
+                                       goto restart;
+
+                               page = NULL;
+                       }
+
+                       if (page &&
+                           page_count(page) - page_mapcount(page) != 1) {
+                               if (scan < LAST_SCAN)
+                                       goto continue_resched;
+
+                               /*
+                                * On the last scan, we clean up all those tags
+                                * we inserted; but make a note that we still
+                                * found pages pinned.
+                                */
+                               error = -EBUSY;
+                       }
+
+                       spin_lock_irq(&mapping->tree_lock);
+                       radix_tree_tag_clear(&mapping->page_tree,
+                                            iter.index, SHMEM_TAG_PINNED);
+                       spin_unlock_irq(&mapping->tree_lock);
+continue_resched:
+                       if (need_resched()) {
+                               cond_resched_rcu();
+                               start = iter.index + 1;
+                               goto restart;
+                       }
+               }
+               rcu_read_unlock();
+       }
+
+       return error;
+}
+
+#define F_ALL_SEALS (F_SEAL_SEAL | \
+                    F_SEAL_SHRINK | \
+                    F_SEAL_GROW | \
+                    F_SEAL_WRITE)
+
+int shmem_add_seals(struct file *file, unsigned int seals)
+{
+       struct inode *inode = file_inode(file);
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       int error;
+
+       /*
+        * SEALING
+        * Sealing allows multiple parties to share a shmem-file but restrict
+        * access to a specific subset of file operations. Seals can only be
+        * added, but never removed. This way, mutually untrusted parties can
+        * share common memory regions with a well-defined policy. A malicious
+        * peer can thus never perform unwanted operations on a shared object.
+        *
+        * Seals are only supported on special shmem-files and always affect
+        * the whole underlying inode. Once a seal is set, it may prevent some
+        * kinds of access to the file. Currently, the following seals are
+        * defined:
+        *   SEAL_SEAL: Prevent further seals from being set on this file
+        *   SEAL_SHRINK: Prevent the file from shrinking
+        *   SEAL_GROW: Prevent the file from growing
+        *   SEAL_WRITE: Prevent write access to the file
+        *
+        * As we don't require any trust relationship between two parties, we
+        * must prevent seals from being removed. Therefore, sealing a file
+        * only adds a given set of seals to the file, it never touches
+        * existing seals. Furthermore, the "setting seals"-operation can be
+        * sealed itself, which basically prevents any further seal from being
+        * added.
+        *
+        * Semantics of sealing are only defined on volatile files. Only
+        * anonymous shmem files support sealing. More importantly, seals are
+        * never written to disk. Therefore, there's no plan to support it on
+        * other file types.
+        */
+
+       if (file->f_op != &shmem_file_operations)
+               return -EINVAL;
+       if (!(file->f_mode & FMODE_WRITE))
+               return -EPERM;
+       if (seals & ~(unsigned int)F_ALL_SEALS)
+               return -EINVAL;
+
+       mutex_lock(&inode->i_mutex);
+
+       if (info->seals & F_SEAL_SEAL) {
+               error = -EPERM;
+               goto unlock;
+       }
+
+       if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
+               error = mapping_deny_writable(file->f_mapping);
+               if (error)
+                       goto unlock;
+
+               error = shmem_wait_for_pins(file->f_mapping);
+               if (error) {
+                       mapping_allow_writable(file->f_mapping);
+                       goto unlock;
+               }
+       }
+
+       info->seals |= seals;
+       error = 0;
+
+unlock:
+       mutex_unlock(&inode->i_mutex);
+       return error;
+}
+EXPORT_SYMBOL_GPL(shmem_add_seals);
+
+int shmem_get_seals(struct file *file)
+{
+       if (file->f_op != &shmem_file_operations)
+               return -EINVAL;
+
+       return SHMEM_I(file_inode(file))->seals;
+}
+EXPORT_SYMBOL_GPL(shmem_get_seals);
+
+long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       long error;
+
+       switch (cmd) {
+       case F_ADD_SEALS:
+               /* disallow upper 32bit */
+               if (arg > UINT_MAX)
+                       return -EINVAL;
+
+               error = shmem_add_seals(file, arg);
+               break;
+       case F_GET_SEALS:
+               error = shmem_get_seals(file);
+               break;
+       default:
+               error = -EINVAL;
+               break;
+       }
+
+       return error;
+}
+
 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                                                         loff_t len)
 {
        struct inode *inode = file_inode(file);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+       struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_falloc shmem_falloc;
        pgoff_t start, index, end;
        int error;
@@ -1823,6 +2070,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
                DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
+               /* protected by i_mutex */
+               if (info->seals & F_SEAL_WRITE) {
+                       error = -EPERM;
+                       goto out;
+               }
+
                shmem_falloc.waitq = &shmem_falloc_waitq;
                shmem_falloc.start = unmap_start >> PAGE_SHIFT;
                shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1849,6 +2102,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
        if (error)
                goto out;
 
+       if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
+               error = -EPERM;
+               goto out;
+       }
+
        start = offset >> PAGE_CACHE_SHIFT;
        end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        /* Try to avoid a swapstorm if len is impossible to satisfy */
@@ -2584,6 +2842,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
        shmem_show_mpol(seq, sbinfo->mpol);
        return 0;
 }
+
+#define MFD_NAME_PREFIX "memfd:"
+#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
+#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
+
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
+
+SYSCALL_DEFINE2(memfd_create,
+               const char __user *, uname,
+               unsigned int, flags)
+{
+       struct shmem_inode_info *info;
+       struct file *file;
+       int fd, error;
+       char *name;
+       long len;
+
+       if (flags & ~(unsigned int)MFD_ALL_FLAGS)
+               return -EINVAL;
+
+       /* length includes terminating zero */
+       len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
+       if (len <= 0)
+               return -EFAULT;
+       if (len > MFD_NAME_MAX_LEN + 1)
+               return -EINVAL;
+
+       name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
+       if (!name)
+               return -ENOMEM;
+
+       strcpy(name, MFD_NAME_PREFIX);
+       if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
+               error = -EFAULT;
+               goto err_name;
+       }
+
+       /* terminating-zero may have changed after strnlen_user() returned */
+       if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
+               error = -EFAULT;
+               goto err_name;
+       }
+
+       fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
+       if (fd < 0) {
+               error = fd;
+               goto err_name;
+       }
+
+       file = shmem_file_setup(name, 0, VM_NORESERVE);
+       if (IS_ERR(file)) {
+               error = PTR_ERR(file);
+               goto err_fd;
+       }
+       info = SHMEM_I(file_inode(file));
+       file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
+       file->f_flags |= O_RDWR | O_LARGEFILE;
+       if (flags & MFD_ALLOW_SEALING)
+               info->seals &= ~F_SEAL_SEAL;
+
+       fd_install(fd, file);
+       kfree(name);
+       return fd;
+
+err_fd:
+       put_unused_fd(fd);
+err_name:
+       kfree(name);
+       return error;
+}
+
 #endif /* CONFIG_TMPFS */
 
 static void shmem_put_super(struct super_block *sb)
index 2e60bf3dedbb3925a015e1c66c0c871f03f28f6f..a467b308c682334254c53fdf12a6114aad1ce1fd 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -470,6 +470,8 @@ static struct kmem_cache kmem_cache_boot = {
        .name = "kmem_cache",
 };
 
+#define BAD_ALIEN_MAGIC 0x01020304ul
+
 static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -836,7 +838,7 @@ static int transfer_objects(struct array_cache *to,
 static inline struct alien_cache **alloc_alien_cache(int node,
                                                int limit, gfp_t gfp)
 {
-       return NULL;
+       return (struct alien_cache **)BAD_ALIEN_MAGIC;
 }
 
 static inline void free_alien_cache(struct alien_cache **ac_ptr)
index c789d01c9ec31db05803566bc23baee090908366..6b2dc3897cd575f94873df4e6bdaadebd7a1f2a1 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page)
                del_page_from_lru_list(page, lruvec, page_off_lru(page));
                spin_unlock_irqrestore(&zone->lru_lock, flags);
        }
+       mem_cgroup_uncharge(page);
 }
 
 static void __put_single_page(struct page *page)
@@ -687,6 +688,40 @@ void add_page_to_unevictable_list(struct page *page)
        spin_unlock_irq(&zone->lru_lock);
 }
 
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page:  the page to be added to LRU
+ * @vma:   vma in which page is mapped for determining reclaimability
+ *
+ * Place @page on the active or unevictable LRU list, depending on its
+ * evictability.  Note that if the page is not evictable, it goes
+ * directly back onto it's zone's unevictable list, it does NOT use a
+ * per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+                                        struct vm_area_struct *vma)
+{
+       VM_BUG_ON_PAGE(PageLRU(page), page);
+
+       if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+               SetPageActive(page);
+               lru_cache_add(page);
+               return;
+       }
+
+       if (!TestSetPageMlocked(page)) {
+               /*
+                * We use the irq-unsafe __mod_zone_page_stat because this
+                * counter is not modified from interrupt context, and the pte
+                * lock is held(spinlock), which implies preemption disabled.
+                */
+               __mod_zone_page_state(page_zone(page), NR_MLOCK,
+                                   hpage_nr_pages(page));
+               count_vm_event(UNEVICTABLE_PGMLOCKED);
+       }
+       add_page_to_unevictable_list(page);
+}
+
 /*
  * If the page can not be invalidated, it is moved to the
  * inactive list to speed up its reclaim.  It is moved to the
@@ -913,6 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold)
        if (zone)
                spin_unlock_irqrestore(&zone->lru_lock, flags);
 
+       mem_cgroup_uncharge_list(&pages_to_free);
        free_hot_cold_page_list(&pages_to_free, cold);
 }
 EXPORT_SYMBOL(release_pages);
index 2972eee184a44c1dc1df3eeed766d7d3140e353b..3e0ec83d000cdf3f7a65e620dc6696f039dc8d98 100644 (file)
@@ -39,6 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = {
 struct address_space swapper_spaces[MAX_SWAPFILES] = {
        [0 ... MAX_SWAPFILES - 1] = {
                .page_tree      = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
+               .i_mmap_writable = ATOMIC_INIT(0),
                .a_ops          = &swap_aops,
                .backing_dev_info = &swap_backing_dev_info,
        }
@@ -176,7 +177,7 @@ int add_to_swap(struct page *page, struct list_head *list)
 
        if (unlikely(PageTransHuge(page)))
                if (unlikely(split_huge_page_to_list(page, list))) {
-                       swapcache_free(entry, NULL);
+                       swapcache_free(entry);
                        return 0;
                }
 
@@ -202,7 +203,7 @@ int add_to_swap(struct page *page, struct list_head *list)
                 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
                 * clear SWAP_HAS_CACHE flag.
                 */
-               swapcache_free(entry, NULL);
+               swapcache_free(entry);
                return 0;
        }
 }
@@ -225,7 +226,7 @@ void delete_from_swap_cache(struct page *page)
        __delete_from_swap_cache(page);
        spin_unlock_irq(&address_space->tree_lock);
 
-       swapcache_free(entry, page);
+       swapcache_free(entry);
        page_cache_release(page);
 }
 
@@ -386,7 +387,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
                 * clear SWAP_HAS_CACHE flag.
                 */
-               swapcache_free(entry, NULL);
+               swapcache_free(entry);
        } while (err != -ENOMEM);
 
        if (new_page)
index 4c524f7bd0bfe69c23e2b28a13cad902ee3ac292..8798b2e0ac594a21e9ab624b7ce9373172eb12ae 100644 (file)
@@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry)
 /*
  * Called after dropping swapcache to decrease refcnt to swap entries.
  */
-void swapcache_free(swp_entry_t entry, struct page *page)
+void swapcache_free(swp_entry_t entry)
 {
        struct swap_info_struct *p;
-       unsigned char count;
 
        p = swap_info_get(entry);
        if (p) {
-               count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
-               if (page)
-                       mem_cgroup_uncharge_swapcache(page, entry, count != 0);
+               swap_entry_free(p, entry, SWAP_HAS_CACHE);
                spin_unlock(&p->lock);
        }
 }
@@ -1106,15 +1103,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        if (unlikely(!page))
                return -ENOMEM;
 
-       if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
-                                        GFP_KERNEL, &memcg)) {
+       if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) {
                ret = -ENOMEM;
                goto out_nolock;
        }
 
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
-               mem_cgroup_cancel_charge_swapin(memcg);
+               mem_cgroup_cancel_charge(page, memcg);
                ret = 0;
                goto out;
        }
@@ -1124,11 +1120,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        get_page(page);
        set_pte_at(vma->vm_mm, addr, pte,
                   pte_mkold(mk_pte(page, vma->vm_page_prot)));
-       if (page == swapcache)
+       if (page == swapcache) {
                page_add_anon_rmap(page, vma, addr);
-       else /* ksm created a completely new copy */
+               mem_cgroup_commit_charge(page, memcg, true);
+       } else { /* ksm created a completely new copy */
                page_add_new_anon_rmap(page, vma, addr);
-       mem_cgroup_commit_charge_swapin(page, memcg);
+               mem_cgroup_commit_charge(page, memcg, false);
+               lru_cache_add_active_or_unevictable(page, vma);
+       }
        swap_free(entry);
        /*
         * Move the page to the active list so it is not
index eda2473071648cc47935dd9e21e9f57fd402a4dd..96d167372d89405372ef7cd544799e76fd99a886 100644 (file)
@@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
        while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
                        min(end - index, (pgoff_t)PAGEVEC_SIZE),
                        indices)) {
-               mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
@@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
                }
                pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                cond_resched();
                index++;
        }
@@ -369,7 +367,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
                        pagevec_release(&pvec);
                        break;
                }
-               mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
@@ -394,7 +391,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
                }
                pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                index++;
        }
        cleancache_invalidate_inode(mapping);
@@ -493,7 +489,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
        while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
                        min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
                        indices)) {
-               mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
@@ -522,7 +517,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                }
                pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                cond_resched();
                index++;
        }
@@ -553,7 +547,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
        BUG_ON(page_has_private(page));
        __delete_from_page_cache(page, NULL);
        spin_unlock_irq(&mapping->tree_lock);
-       mem_cgroup_uncharge_cache_page(page);
 
        if (mapping->a_ops->freepage)
                mapping->a_ops->freepage(page);
@@ -602,7 +595,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
        while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
                        min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
                        indices)) {
-               mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
@@ -655,7 +647,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                }
                pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                cond_resched();
                index++;
        }
index 7b6608df2ee803d9d4345dc11290eb17100199f4..093c973f1697dd39545c6c93aeac8e9c3d60cf52 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -183,17 +183,14 @@ pid_t vm_is_stack(struct task_struct *task,
 
        if (in_group) {
                struct task_struct *t;
-               rcu_read_lock();
-               if (!pid_alive(task))
-                       goto done;
 
-               t = task;
-               do {
+               rcu_read_lock();
+               for_each_thread(task, t) {
                        if (vm_is_stack_for_task(t, vma)) {
                                ret = t->pid;
                                goto done;
                        }
-               } while_each_thread(task, t);
+               }
 done:
                rcu_read_unlock();
        }
index d2f65c856350eb179f83c20cde87d961d92eb28f..2836b5373b2e7623a1143a98bf3997fa11865731 100644 (file)
@@ -577,9 +577,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 
        if (PageSwapCache(page)) {
                swp_entry_t swap = { .val = page_private(page) };
+               mem_cgroup_swapout(page, swap);
                __delete_from_swap_cache(page);
                spin_unlock_irq(&mapping->tree_lock);
-               swapcache_free(swap, page);
+               swapcache_free(swap);
        } else {
                void (*freepage)(struct page *);
                void *shadow = NULL;
@@ -600,7 +601,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                        shadow = workingset_eviction(mapping, page);
                __delete_from_page_cache(page, shadow);
                spin_unlock_irq(&mapping->tree_lock);
-               mem_cgroup_uncharge_cache_page(page);
 
                if (freepage != NULL)
                        freepage(page);
@@ -822,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
        cond_resched();
 
-       mem_cgroup_uncharge_start();
        while (!list_empty(page_list)) {
                struct address_space *mapping;
                struct page *page;
@@ -1133,11 +1132,12 @@ keep:
                VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
        }
 
+       mem_cgroup_uncharge_list(&free_pages);
        free_hot_cold_page_list(&free_pages, true);
 
        list_splice(&ret_pages, page_list);
        count_vm_events(PGACTIVATE, pgactivate);
-       mem_cgroup_uncharge_end();
+
        *ret_nr_dirty += nr_dirty;
        *ret_nr_congested += nr_congested;
        *ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1437,6 +1437,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 
                        if (unlikely(PageCompound(page))) {
                                spin_unlock_irq(&zone->lru_lock);
+                               mem_cgroup_uncharge(page);
                                (*get_compound_page_dtor(page))(page);
                                spin_lock_irq(&zone->lru_lock);
                        } else
@@ -1544,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
        spin_unlock_irq(&zone->lru_lock);
 
+       mem_cgroup_uncharge_list(&page_list);
        free_hot_cold_page_list(&page_list, true);
 
        /*
@@ -1658,6 +1660,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
 
                        if (unlikely(PageCompound(page))) {
                                spin_unlock_irq(&zone->lru_lock);
+                               mem_cgroup_uncharge(page);
                                (*get_compound_page_dtor(page))(page);
                                spin_lock_irq(&zone->lru_lock);
                        } else
@@ -1765,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
        __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
        spin_unlock_irq(&zone->lru_lock);
 
+       mem_cgroup_uncharge_list(&l_hold);
        free_hot_cold_page_list(&l_hold, true);
 }
 
index 032c21eeab2b0f05ebe73a9a7d1fd820651f3cef..ea064c1a09ba79003a5ed5e8990d1ce44f2e6122 100644 (file)
@@ -212,7 +212,7 @@ static int zswap_entry_cache_create(void)
        return zswap_entry_cache == NULL;
 }
 
-static void zswap_entry_cache_destory(void)
+static void __init zswap_entry_cache_destroy(void)
 {
        kmem_cache_destroy(zswap_entry_cache);
 }
@@ -507,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
                 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
                 * clear SWAP_HAS_CACHE flag.
                 */
-               swapcache_free(entry, NULL);
+               swapcache_free(entry);
        } while (err != -ENOMEM);
 
        if (new_page)
@@ -941,7 +941,7 @@ static int __init init_zswap(void)
 pcpufail:
        zswap_comp_exit();
 compfail:
-       zswap_entry_cache_destory();
+       zswap_entry_cache_destroy();
 cachefail:
        zpool_destroy_pool(zswap_pool);
 error:
index fb070fa1038feb51e47db9c4ec807ee328fe6364..5ecfe93f2028712afa413dba6f67c72e1ce0930e 100644 (file)
@@ -4,7 +4,6 @@
 conmakehash
 kallsyms
 pnmtologo
-bin2c
 unifdef
 ihex2fw
 recordmcount
index 890df5c6adfbc16769a4abcb27bb3860fc625e71..72902b5f27213696604f077bc8ad78e8f82d9bd0 100644 (file)
@@ -13,7 +13,6 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
 hostprogs-$(CONFIG_KALLSYMS)     += kallsyms
 hostprogs-$(CONFIG_LOGO)         += pnmtologo
 hostprogs-$(CONFIG_VT)           += conmakehash
-hostprogs-$(CONFIG_IKCONFIG)     += bin2c
 hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
 hostprogs-$(CONFIG_ASN1)        += asn1_compiler
index a776371a350243330b14dc87b636146ba4ef2b16..9528ec9e5adc4a89e901f3e9cdfe6140f8452573 100644 (file)
@@ -1 +1,2 @@
 fixdep
+bin2c
index 4fcef87bb8759894435a395224c7d92cd7a14214..ec10d9345bc2d9b978d8cae4d85bf2fd89ab121c 100644 (file)
@@ -9,6 +9,7 @@
 # fixdep:       Used to generate dependency information during build process
 
 hostprogs-y    := fixdep
+hostprogs-$(CONFIG_BUILD_BIN2C)     += bin2c
 always         := $(hostprogs-y)
 
 # fixdep is needed to compile other host programs
diff --git a/scripts/basic/bin2c.c b/scripts/basic/bin2c.c
new file mode 100644 (file)
index 0000000..af187e6
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Unloved program to convert a binary on stdin to a C include on stdout
+ *
+ * Jan 1999 Matt Mackall <mpm@selenic.com>
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+
+#include <stdio.h>
+
+int main(int argc, char *argv[])
+{
+       int ch, total = 0;
+
+       if (argc > 1)
+               printf("const char %s[] %s=\n",
+                       argv[1], argc > 2 ? argv[2] : "");
+
+       do {
+               printf("\t\"");
+               while ((ch = getchar()) != EOF) {
+                       total++;
+                       printf("\\x%02x", ch);
+                       if (total % 16 == 0)
+                               break;
+               }
+               printf("\"\n");
+       } while (ch != EOF);
+
+       if (argc > 1)
+               printf("\t;\n\nconst int %s_size = %d;\n", argv[1], total);
+
+       return 0;
+}
diff --git a/scripts/bin2c.c b/scripts/bin2c.c
deleted file mode 100644 (file)
index 96dd2bc..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Unloved program to convert a binary on stdin to a C include on stdout
- *
- * Jan 1999 Matt Mackall <mpm@selenic.com>
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- */
-
-#include <stdio.h>
-
-int main(int argc, char *argv[])
-{
-       int ch, total=0;
-
-       if (argc > 1)
-               printf("const char %s[] %s=\n",
-                       argv[1], argc > 2 ? argv[2] : "");
-
-       do {
-               printf("\t\"");
-               while ((ch = getchar()) != EOF)
-               {
-                       total++;
-                       printf("\\x%02x",ch);
-                       if (total % 16 == 0)
-                               break;
-               }
-               printf("\"\n");
-       } while (ch != EOF);
-
-       if (argc > 1)
-               printf("\t;\n\nconst int %s_size = %d;\n", argv[1], total);
-
-       return 0;
-}
index c05d586b1feeddd92d9d030d77e0b31b9ea0eb58..899b4230320e0f8e032ddedbd0eaff6add225f30 100755 (executable)
@@ -52,14 +52,12 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
                #8000008a:       20 1d           sub sp,4
                #80000ca8:       fa cd 05 b0     sub sp,sp,1456
                $re = qr/^.*sub.*sp.*,([0-9]{1,8})/o;
-       } elsif ($arch =~ /^i[3456]86$/) {
+       } elsif ($arch =~ /^x86(_64)?$/ || $arch =~ /^i[3456]86$/) {
                #c0105234:       81 ec ac 05 00 00       sub    $0x5ac,%esp
-               $re = qr/^.*[as][du][db]    \$(0x$x{1,8}),\%esp$/o;
-               $dre = qr/^.*[as][du][db]    (%.*),\%esp$/o;
-       } elsif ($arch eq 'x86_64') {
-               #    2f60:      48 81 ec e8 05 00 00    sub    $0x5e8,%rsp
-               $re = qr/^.*[as][du][db]    \$(0x$x{1,8}),\%rsp$/o;
-               $dre = qr/^.*[as][du][db]    (\%.*),\%rsp$/o;
+               # or
+               #    2f60:    48 81 ec e8 05 00 00       sub    $0x5e8,%rsp
+               $re = qr/^.*[as][du][db]    \$(0x$x{1,8}),\%(e|r)sp$/o;
+               $dre = qr/^.*[as][du][db]    (%.*),\%(e|r)sp$/o;
        } elsif ($arch eq 'ia64') {
                #e0000000044011fc:       01 0f fc 8c     adds r12=-384,r12
                $re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
diff --git a/scripts/coccinelle/free/ifnullfree.cocci b/scripts/coccinelle/free/ifnullfree.cocci
new file mode 100644 (file)
index 0000000..a42d70b
--- /dev/null
@@ -0,0 +1,53 @@
+/// NULL check before some freeing functions is not needed.
+///
+/// Based on checkpatch warning
+/// "kfree(NULL) is safe this check is probably not required"
+/// and kfreeaddr.cocci by Julia Lawall.
+///
+// Copyright: (C) 2014 Fabian Frederick.  GPLv2.
+// Comments: -
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual org
+virtual report
+virtual context
+
+@r2 depends on patch@
+expression E;
+@@
+- if (E)
+(
+-      kfree(E);
++ kfree(E);
+|
+-      debugfs_remove(E);
++ debugfs_remove(E);
+|
+-      debugfs_remove_recursive(E);
++ debugfs_remove_recursive(E);
+|
+-      usb_free_urb(E);
++ usb_free_urb(E);
+)
+
+@r depends on context || report || org @
+expression E;
+position p;
+@@
+
+* if (E)
+*      \(kfree@p\|debugfs_remove@p\|debugfs_remove_recursive@p\|usb_free_urb\)(E);
+
+@script:python depends on org@
+p << r.p;
+@@
+
+cocci.print_main("NULL check before that freeing function is not needed", p)
+
+@script:python depends on report@
+p << r.p;
+@@
+
+msg = "WARNING: NULL check before freeing functions like kfree, debugfs_remove, debugfs_remove_recursive or usb_free_urb is not needed. Maybe consider reorganizing relevant code to avoid passing NULL values."
+coccilib.report.print_report(p[0], msg)
index e6b011fe1d0d1dfd850f345f55f0a7f057d6435d..cbfd269a6011154b21cf2d76b2894915577396ec 100755 (executable)
@@ -168,6 +168,7 @@ exuberant()
        --extra=+f --c-kinds=+px                                \
        --regex-asm='/^(ENTRY|_GLOBAL)\(([^)]*)\).*/\2/'        \
        --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \
+       --regex-c='/^COMPAT_SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/compat_sys_\1/' \
        --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/'               \
        --regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/'     \
        --regex-c++='/PAGEFLAG\(([^,)]*).*/Page\1/'                     \
@@ -231,6 +232,7 @@ emacs()
        all_target_sources | xargs $1 -a                        \
        --regex='/^\(ENTRY\|_GLOBAL\)(\([^)]*\)).*/\2/'         \
        --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/'   \
+       --regex='/^COMPAT_SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/compat_sys_\1/' \
        --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/'          \
        --regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' \
        --regex='/PAGEFLAG(\([^,)]*\).*/Page\1/'                        \
index d10f95ce2ea459d648814f30f0143bd37d3c292a..6fd2a440206964beb15e9579cb7a5652b518175a 100644 (file)
@@ -2,6 +2,7 @@ TARGETS = breakpoints
 TARGETS += cpu-hotplug
 TARGETS += efivarfs
 TARGETS += kcmp
+TARGETS += memfd
 TARGETS += memory-hotplug
 TARGETS += mqueue
 TARGETS += net
diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore
new file mode 100644 (file)
index 0000000..afe87c4
--- /dev/null
@@ -0,0 +1,4 @@
+fuse_mnt
+fuse_test
+memfd_test
+memfd-test-file
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
new file mode 100644 (file)
index 0000000..6816c49
--- /dev/null
@@ -0,0 +1,41 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+       ARCH := X86
+endif
+ifeq ($(ARCH),x86_64)
+       ARCH := X86
+endif
+
+CFLAGS += -D_FILE_OFFSET_BITS=64
+CFLAGS += -I../../../../arch/x86/include/generated/uapi/
+CFLAGS += -I../../../../arch/x86/include/uapi/
+CFLAGS += -I../../../../include/uapi/
+CFLAGS += -I../../../../include/
+
+all:
+ifeq ($(ARCH),X86)
+       gcc $(CFLAGS) memfd_test.c -o memfd_test
+else
+       echo "Not an x86 target, can't build memfd selftest"
+endif
+
+run_tests: all
+ifeq ($(ARCH),X86)
+       gcc $(CFLAGS) memfd_test.c -o memfd_test
+endif
+       @./memfd_test || echo "memfd_test: [FAIL]"
+
+build_fuse:
+ifeq ($(ARCH),X86)
+       gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
+       gcc $(CFLAGS) fuse_test.c -o fuse_test
+else
+       echo "Not an x86 target, can't build memfd selftest"
+endif
+
+run_fuse: build_fuse
+       @./run_fuse_test.sh || echo "fuse_test: [FAIL]"
+
+clean:
+       $(RM) memfd_test fuse_test
diff --git a/tools/testing/selftests/memfd/fuse_mnt.c b/tools/testing/selftests/memfd/fuse_mnt.c
new file mode 100644 (file)
index 0000000..feacf12
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * memfd test file-system
+ * This file uses FUSE to create a dummy file-system with only one file /memfd.
+ * This file is read-only and takes 1s per read.
+ *
+ * This file-system is used by the memfd test-cases to force the kernel to pin
+ * pages during reads(). Due to the 1s delay of this file-system, this is a
+ * nice way to test race-conditions against get_user_pages() in the kernel.
+ *
+ * We use direct_io==1 to force the kernel to use direct-IO for this
+ * file-system.
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static const char memfd_content[] = "memfd-example-content";
+static const char memfd_path[] = "/memfd";
+
+static int memfd_getattr(const char *path, struct stat *st)
+{
+       memset(st, 0, sizeof(*st));
+
+       if (!strcmp(path, "/")) {
+               st->st_mode = S_IFDIR | 0755;
+               st->st_nlink = 2;
+       } else if (!strcmp(path, memfd_path)) {
+               st->st_mode = S_IFREG | 0444;
+               st->st_nlink = 1;
+               st->st_size = strlen(memfd_content);
+       } else {
+               return -ENOENT;
+       }
+
+       return 0;
+}
+
+static int memfd_readdir(const char *path,
+                        void *buf,
+                        fuse_fill_dir_t filler,
+                        off_t offset,
+                        struct fuse_file_info *fi)
+{
+       if (strcmp(path, "/"))
+               return -ENOENT;
+
+       filler(buf, ".", NULL, 0);
+       filler(buf, "..", NULL, 0);
+       filler(buf, memfd_path + 1, NULL, 0);
+
+       return 0;
+}
+
+static int memfd_open(const char *path, struct fuse_file_info *fi)
+{
+       if (strcmp(path, memfd_path))
+               return -ENOENT;
+
+       if ((fi->flags & 3) != O_RDONLY)
+               return -EACCES;
+
+       /* force direct-IO */
+       fi->direct_io = 1;
+
+       return 0;
+}
+
+static int memfd_read(const char *path,
+                     char *buf,
+                     size_t size,
+                     off_t offset,
+                     struct fuse_file_info *fi)
+{
+       size_t len;
+
+       if (strcmp(path, memfd_path) != 0)
+               return -ENOENT;
+
+       sleep(1);
+
+       len = strlen(memfd_content);
+       if (offset < len) {
+               if (offset + size > len)
+                       size = len - offset;
+
+               memcpy(buf, memfd_content + offset, size);
+       } else {
+               size = 0;
+       }
+
+       return size;
+}
+
+static struct fuse_operations memfd_ops = {
+       .getattr        = memfd_getattr,
+       .readdir        = memfd_readdir,
+       .open           = memfd_open,
+       .read           = memfd_read,
+};
+
+int main(int argc, char *argv[])
+{
+       return fuse_main(argc, argv, &memfd_ops, NULL);
+}
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
new file mode 100644 (file)
index 0000000..67908b1
--- /dev/null
@@ -0,0 +1,311 @@
+/*
+ * memfd GUP test-case
+ * This tests memfd interactions with get_user_pages(). We require the
+ * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
+ * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
+ * read() on files in that file-system will pin the receive-buffer pages for at
+ * least 1s via get_user_pages().
+ *
+ * We use this trick to race ADD_SEALS against a write on a memfd object. The
+ * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
+ * the read() syscall with our memory-mapped memfd object as receive buffer to
+ * force the kernel to write into our memfd object.
+ */
+
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65535
+
+static int sys_memfd_create(const char *name,
+                           unsigned int flags)
+{
+       return syscall(__NR_memfd_create, name, flags);
+}
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+       int r, fd;
+
+       fd = sys_memfd_create(name, flags);
+       if (fd < 0) {
+               printf("memfd_create(\"%s\", %u) failed: %m\n",
+                      name, flags);
+               abort();
+       }
+
+       r = ftruncate(fd, sz);
+       if (r < 0) {
+               printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+               abort();
+       }
+
+       return fd;
+}
+
+static __u64 mfd_assert_get_seals(int fd)
+{
+       long r;
+
+       r = fcntl(fd, F_GET_SEALS);
+       if (r < 0) {
+               printf("GET_SEALS(%d) failed: %m\n", fd);
+               abort();
+       }
+
+       return r;
+}
+
+static void mfd_assert_has_seals(int fd, __u64 seals)
+{
+       __u64 s;
+
+       s = mfd_assert_get_seals(fd);
+       if (s != seals) {
+               printf("%llu != %llu = GET_SEALS(%d)\n",
+                      (unsigned long long)seals, (unsigned long long)s, fd);
+               abort();
+       }
+}
+
+static void mfd_assert_add_seals(int fd, __u64 seals)
+{
+       long r;
+       __u64 s;
+
+       s = mfd_assert_get_seals(fd);
+       r = fcntl(fd, F_ADD_SEALS, seals);
+       if (r < 0) {
+               printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
+                      fd, (unsigned long long)s, (unsigned long long)seals);
+               abort();
+       }
+}
+
+static int mfd_busy_add_seals(int fd, __u64 seals)
+{
+       long r;
+       __u64 s;
+
+       r = fcntl(fd, F_GET_SEALS);
+       if (r < 0)
+               s = 0;
+       else
+               s = r;
+
+       r = fcntl(fd, F_ADD_SEALS, seals);
+       if (r < 0 && errno != EBUSY) {
+               printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
+                      fd, (unsigned long long)s, (unsigned long long)seals);
+               abort();
+       }
+
+       return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+       void *p;
+
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ | PROT_WRITE,
+                MAP_SHARED,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+
+       return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+       void *p;
+
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ | PROT_WRITE,
+                MAP_PRIVATE,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+
+       return p;
+}
+
+static int global_mfd = -1;
+static void *global_p = NULL;
+
+static int sealing_thread_fn(void *arg)
+{
+       int sig, r;
+
+       /*
+        * This thread first waits 200ms so any pending operation in the parent
+        * is correctly started. After that, it tries to seal @global_mfd as
+        * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
+        * that memory mapped object still ongoing.
+        * We then wait one more second and try sealing again. This time it
+        * must succeed as there shouldn't be anyone else pinning the pages.
+        */
+
+       /* wait 200ms for FUSE-request to be active */
+       usleep(200000);
+
+       /* unmount mapping before sealing to avoid i_mmap_writable failures */
+       munmap(global_p, MFD_DEF_SIZE);
+
+       /* Try sealing the global file; expect EBUSY or success. Current
+        * kernels will never succeed, but in the future, kernels might
+        * implement page-replacements or other fancy ways to avoid racing
+        * writes. */
+       r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
+       if (r >= 0) {
+               printf("HURRAY! This kernel fixed GUP races!\n");
+       } else {
+               /* wait 1s more so the FUSE-request is done */
+               sleep(1);
+
+               /* try sealing the global file again */
+               mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
+       }
+
+       return 0;
+}
+
+static pid_t spawn_sealing_thread(void)
+{
+       uint8_t *stack;
+       pid_t pid;
+
+       stack = malloc(STACK_SIZE);
+       if (!stack) {
+               printf("malloc(STACK_SIZE) failed: %m\n");
+               abort();
+       }
+
+       pid = clone(sealing_thread_fn,
+                   stack + STACK_SIZE,
+                   SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
+                   NULL);
+       if (pid < 0) {
+               printf("clone() failed: %m\n");
+               abort();
+       }
+
+       return pid;
+}
+
+static void join_sealing_thread(pid_t pid)
+{
+       waitpid(pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+       static const char zero[MFD_DEF_SIZE];
+       int fd, mfd, r;
+       void *p;
+       int was_sealed;
+       pid_t pid;
+
+       if (argc < 2) {
+               printf("error: please pass path to file in fuse_mnt mount-point\n");
+               abort();
+       }
+
+       /* open FUSE memfd file for GUP testing */
+       printf("opening: %s\n", argv[1]);
+       fd = open(argv[1], O_RDONLY | O_CLOEXEC);
+       if (fd < 0) {
+               printf("cannot open(\"%s\"): %m\n", argv[1]);
+               abort();
+       }
+
+       /* create new memfd-object */
+       mfd = mfd_assert_new("kern_memfd_fuse",
+                            MFD_DEF_SIZE,
+                            MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+       /* mmap memfd-object for writing */
+       p = mfd_assert_mmap_shared(mfd);
+
+       /* pass mfd+mapping to a separate sealing-thread which tries to seal
+        * the memfd objects with SEAL_WRITE while we write into it */
+       global_mfd = mfd;
+       global_p = p;
+       pid = spawn_sealing_thread();
+
+       /* Use read() on the FUSE file to read into our memory-mapped memfd
+        * object. This races the other thread which tries to seal the
+        * memfd-object.
+        * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
+        * This guarantees that the receive-buffer is pinned for 1s until the
+        * data is written into it. The racing ADD_SEALS should thus fail as
+        * the pages are still pinned. */
+       r = read(fd, p, MFD_DEF_SIZE);
+       if (r < 0) {
+               printf("read() failed: %m\n");
+               abort();
+       } else if (!r) {
+               printf("unexpected EOF on read()\n");
+               abort();
+       }
+
+       was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
+
+       /* Wait for sealing-thread to finish and verify that it
+        * successfully sealed the file after the second try. */
+       join_sealing_thread(pid);
+       mfd_assert_has_seals(mfd, F_SEAL_WRITE);
+
+       /* *IF* the memfd-object was sealed at the time our read() returned,
+        * then the kernel did a page-replacement or canceled the read() (or
+        * whatever magic it did..). In that case, the memfd object is still
+        * all zero.
+        * In case the memfd-object was *not* sealed, the read() was successfull
+        * and the memfd object must *not* be all zero.
+        * Note that in real scenarios, there might be a mixture of both, but
+        * in this test-cases, we have explicit 200ms delays which should be
+        * enough to avoid any in-flight writes. */
+
+       p = mfd_assert_mmap_private(mfd);
+       if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) {
+               printf("memfd sealed during read() but data not discarded\n");
+               abort();
+       } else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) {
+               printf("memfd sealed after read() but data discarded\n");
+               abort();
+       }
+
+       close(mfd);
+       close(fd);
+
+       printf("fuse: DONE\n");
+
+       return 0;
+}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
new file mode 100644 (file)
index 0000000..3634c90
--- /dev/null
@@ -0,0 +1,913 @@
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65535
+
+static int sys_memfd_create(const char *name,
+                           unsigned int flags)
+{
+       return syscall(__NR_memfd_create, name, flags);
+}
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+       int r, fd;
+
+       fd = sys_memfd_create(name, flags);
+       if (fd < 0) {
+               printf("memfd_create(\"%s\", %u) failed: %m\n",
+                      name, flags);
+               abort();
+       }
+
+       r = ftruncate(fd, sz);
+       if (r < 0) {
+               printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+               abort();
+       }
+
+       return fd;
+}
+
+static void mfd_fail_new(const char *name, unsigned int flags)
+{
+       int r;
+
+       r = sys_memfd_create(name, flags);
+       if (r >= 0) {
+               printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
+                      name, flags);
+               close(r);
+               abort();
+       }
+}
+
+static __u64 mfd_assert_get_seals(int fd)
+{
+       long r;
+
+       r = fcntl(fd, F_GET_SEALS);
+       if (r < 0) {
+               printf("GET_SEALS(%d) failed: %m\n", fd);
+               abort();
+       }
+
+       return r;
+}
+
+static void mfd_assert_has_seals(int fd, __u64 seals)
+{
+       __u64 s;
+
+       s = mfd_assert_get_seals(fd);
+       if (s != seals) {
+               printf("%llu != %llu = GET_SEALS(%d)\n",
+                      (unsigned long long)seals, (unsigned long long)s, fd);
+               abort();
+       }
+}
+
+static void mfd_assert_add_seals(int fd, __u64 seals)
+{
+       long r;
+       __u64 s;
+
+       s = mfd_assert_get_seals(fd);
+       r = fcntl(fd, F_ADD_SEALS, seals);
+       if (r < 0) {
+               printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
+                      fd, (unsigned long long)s, (unsigned long long)seals);
+               abort();
+       }
+}
+
+static void mfd_fail_add_seals(int fd, __u64 seals)
+{
+       long r;
+       __u64 s;
+
+       r = fcntl(fd, F_GET_SEALS);
+       if (r < 0)
+               s = 0;
+       else
+               s = r;
+
+       r = fcntl(fd, F_ADD_SEALS, seals);
+       if (r >= 0) {
+               printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected\n",
+                      fd, (unsigned long long)s, (unsigned long long)seals);
+               abort();
+       }
+}
+
+static void mfd_assert_size(int fd, size_t size)
+{
+       struct stat st;
+       int r;
+
+       r = fstat(fd, &st);
+       if (r < 0) {
+               printf("fstat(%d) failed: %m\n", fd);
+               abort();
+       } else if (st.st_size != size) {
+               printf("wrong file size %lld, but expected %lld\n",
+                      (long long)st.st_size, (long long)size);
+               abort();
+       }
+}
+
+static int mfd_assert_dup(int fd)
+{
+       int r;
+
+       r = dup(fd);
+       if (r < 0) {
+               printf("dup(%d) failed: %m\n", fd);
+               abort();
+       }
+
+       return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+       void *p;
+
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ | PROT_WRITE,
+                MAP_SHARED,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+
+       return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+       void *p;
+
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ,
+                MAP_PRIVATE,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+
+       return p;
+}
+
+static int mfd_assert_open(int fd, int flags, mode_t mode)
+{
+       char buf[512];
+       int r;
+
+       sprintf(buf, "/proc/self/fd/%d", fd);
+       r = open(buf, flags, mode);
+       if (r < 0) {
+               printf("open(%s) failed: %m\n", buf);
+               abort();
+       }
+
+       return r;
+}
+
+static void mfd_fail_open(int fd, int flags, mode_t mode)
+{
+       char buf[512];
+       int r;
+
+       sprintf(buf, "/proc/self/fd/%d", fd);
+       r = open(buf, flags, mode);
+       if (r >= 0) {
+               printf("open(%s) didn't fail as expected\n");
+               abort();
+       }
+}
+
+static void mfd_assert_read(int fd)
+{
+       char buf[16];
+       void *p;
+       ssize_t l;
+
+       l = read(fd, buf, sizeof(buf));
+       if (l != sizeof(buf)) {
+               printf("read() failed: %m\n");
+               abort();
+       }
+
+       /* verify PROT_READ *is* allowed */
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ,
+                MAP_PRIVATE,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+       munmap(p, MFD_DEF_SIZE);
+
+       /* verify MAP_PRIVATE is *always* allowed (even writable) */
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ | PROT_WRITE,
+                MAP_PRIVATE,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+       munmap(p, MFD_DEF_SIZE);
+}
+
+static void mfd_assert_write(int fd)
+{
+       ssize_t l;
+       void *p;
+       int r;
+
+       /* verify write() succeeds */
+       l = write(fd, "\0\0\0\0", 4);
+       if (l != 4) {
+               printf("write() failed: %m\n");
+               abort();
+       }
+
+       /* verify PROT_READ | PROT_WRITE is allowed */
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ | PROT_WRITE,
+                MAP_SHARED,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+       *(char *)p = 0;
+       munmap(p, MFD_DEF_SIZE);
+
+       /* verify PROT_WRITE is allowed */
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_WRITE,
+                MAP_SHARED,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+       *(char *)p = 0;
+       munmap(p, MFD_DEF_SIZE);
+
+       /* verify PROT_READ with MAP_SHARED is allowed and a following
+        * mprotect(PROT_WRITE) allows writing */
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ,
+                MAP_SHARED,
+                fd,
+                0);
+       if (p == MAP_FAILED) {
+               printf("mmap() failed: %m\n");
+               abort();
+       }
+
+       r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
+       if (r < 0) {
+               printf("mprotect() failed: %m\n");
+               abort();
+       }
+
+       *(char *)p = 0;
+       munmap(p, MFD_DEF_SIZE);
+
+       /* verify PUNCH_HOLE works */
+       r = fallocate(fd,
+                     FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                     0,
+                     MFD_DEF_SIZE);
+       if (r < 0) {
+               printf("fallocate(PUNCH_HOLE) failed: %m\n");
+               abort();
+       }
+}
+
+static void mfd_fail_write(int fd)
+{
+       ssize_t l;
+       void *p;
+       int r;
+
+       /* verify write() fails */
+       l = write(fd, "data", 4);
+       if (l != -EPERM) {
+               printf("expected EPERM on write(), but got %d: %m\n", (int)l);
+               abort();
+       }
+
+       /* verify PROT_READ | PROT_WRITE is not allowed */
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ | PROT_WRITE,
+                MAP_SHARED,
+                fd,
+                0);
+       if (p != MAP_FAILED) {
+               printf("mmap() didn't fail as expected\n");
+               abort();
+       }
+
+       /* verify PROT_WRITE is not allowed */
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_WRITE,
+                MAP_SHARED,
+                fd,
+                0);
+       if (p != MAP_FAILED) {
+               printf("mmap() didn't fail as expected\n");
+               abort();
+       }
+
+       /* Verify PROT_READ with MAP_SHARED with a following mprotect is not
+        * allowed. Note that for r/w the kernel already prevents the mmap. */
+       p = mmap(NULL,
+                MFD_DEF_SIZE,
+                PROT_READ,
+                MAP_SHARED,
+                fd,
+                0);
+       if (p != MAP_FAILED) {
+               r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
+               if (r >= 0) {
+                       printf("mmap()+mprotect() didn't fail as expected\n");
+                       abort();
+               }
+       }
+
+       /* verify PUNCH_HOLE fails */
+       r = fallocate(fd,
+                     FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                     0,
+                     MFD_DEF_SIZE);
+       if (r >= 0) {
+               printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
+               abort();
+       }
+}
+
+static void mfd_assert_shrink(int fd)
+{
+       int r, fd2;
+
+       r = ftruncate(fd, MFD_DEF_SIZE / 2);
+       if (r < 0) {
+               printf("ftruncate(SHRINK) failed: %m\n");
+               abort();
+       }
+
+       mfd_assert_size(fd, MFD_DEF_SIZE / 2);
+
+       fd2 = mfd_assert_open(fd,
+                             O_RDWR | O_CREAT | O_TRUNC,
+                             S_IRUSR | S_IWUSR);
+       close(fd2);
+
+       mfd_assert_size(fd, 0);
+}
+
+static void mfd_fail_shrink(int fd)
+{
+       int r;
+
+       r = ftruncate(fd, MFD_DEF_SIZE / 2);
+       if (r >= 0) {
+               printf("ftruncate(SHRINK) didn't fail as expected\n");
+               abort();
+       }
+
+       mfd_fail_open(fd,
+                     O_RDWR | O_CREAT | O_TRUNC,
+                     S_IRUSR | S_IWUSR);
+}
+
+static void mfd_assert_grow(int fd)
+{
+       int r;
+
+       r = ftruncate(fd, MFD_DEF_SIZE * 2);
+       if (r < 0) {
+               printf("ftruncate(GROW) failed: %m\n");
+               abort();
+       }
+
+       mfd_assert_size(fd, MFD_DEF_SIZE * 2);
+
+       r = fallocate(fd,
+                     0,
+                     0,
+                     MFD_DEF_SIZE * 4);
+       if (r < 0) {
+               printf("fallocate(ALLOC) failed: %m\n");
+               abort();
+       }
+
+       mfd_assert_size(fd, MFD_DEF_SIZE * 4);
+}
+
+static void mfd_fail_grow(int fd)
+{
+       int r;
+
+       r = ftruncate(fd, MFD_DEF_SIZE * 2);
+       if (r >= 0) {
+               printf("ftruncate(GROW) didn't fail as expected\n");
+               abort();
+       }
+
+       r = fallocate(fd,
+                     0,
+                     0,
+                     MFD_DEF_SIZE * 4);
+       if (r >= 0) {
+               printf("fallocate(ALLOC) didn't fail as expected\n");
+               abort();
+       }
+}
+
+static void mfd_assert_grow_write(int fd)
+{
+       static char buf[MFD_DEF_SIZE * 8];
+       ssize_t l;
+
+       l = pwrite(fd, buf, sizeof(buf), 0);
+       if (l != sizeof(buf)) {
+               printf("pwrite() failed: %m\n");
+               abort();
+       }
+
+       mfd_assert_size(fd, MFD_DEF_SIZE * 8);
+}
+
+static void mfd_fail_grow_write(int fd)
+{
+       static char buf[MFD_DEF_SIZE * 8];
+       ssize_t l;
+
+       l = pwrite(fd, buf, sizeof(buf), 0);
+       if (l == sizeof(buf)) {
+               printf("pwrite() didn't fail as expected\n");
+               abort();
+       }
+}
+
+static int idle_thread_fn(void *arg)
+{
+       sigset_t set;
+       int sig;
+
+       /* dummy waiter; SIGTERM terminates us anyway */
+       sigemptyset(&set);
+       sigaddset(&set, SIGTERM);
+       sigwait(&set, &sig);
+
+       return 0;
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+       uint8_t *stack;
+       pid_t pid;
+
+       stack = malloc(STACK_SIZE);
+       if (!stack) {
+               printf("malloc(STACK_SIZE) failed: %m\n");
+               abort();
+       }
+
+       pid = clone(idle_thread_fn,
+                   stack + STACK_SIZE,
+                   SIGCHLD | flags,
+                   NULL);
+       if (pid < 0) {
+               printf("clone() failed: %m\n");
+               abort();
+       }
+
+       return pid;
+}
+
+static void join_idle_thread(pid_t pid)
+{
+       kill(pid, SIGTERM);
+       waitpid(pid, NULL, 0);
+}
+
+/*
+ * Test memfd_create() syscall
+ * Verify syscall-argument validation, including name checks, flag validation
+ * and more.
+ */
+static void test_create(void)
+{
+       char buf[2048];
+       int fd;
+
+       /* test NULL name */
+       mfd_fail_new(NULL, 0);
+
+       /* test over-long name (not zero-terminated) */
+       memset(buf, 0xff, sizeof(buf));
+       mfd_fail_new(buf, 0);
+
+       /* test over-long zero-terminated name */
+       memset(buf, 0xff, sizeof(buf));
+       buf[sizeof(buf) - 1] = 0;
+       mfd_fail_new(buf, 0);
+
+       /* verify "" is a valid name */
+       fd = mfd_assert_new("", 0, 0);
+       close(fd);
+
+       /* verify invalid O_* open flags */
+       mfd_fail_new("", 0x0100);
+       mfd_fail_new("", ~MFD_CLOEXEC);
+       mfd_fail_new("", ~MFD_ALLOW_SEALING);
+       mfd_fail_new("", ~0);
+       mfd_fail_new("", 0x80000000U);
+
+       /* verify MFD_CLOEXEC is allowed */
+       fd = mfd_assert_new("", 0, MFD_CLOEXEC);
+       close(fd);
+
+       /* verify MFD_ALLOW_SEALING is allowed */
+       fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
+       close(fd);
+
+       /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
+       fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+       close(fd);
+}
+
+/*
+ * Test basic sealing
+ * A very basic sealing test to see whether setting/retrieving seals works.
+ */
+static void test_basic(void)
+{
+       int fd;
+
+       fd = mfd_assert_new("kern_memfd_basic",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+       /* add basic seals */
+       mfd_assert_has_seals(fd, 0);
+       mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+                                F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+                                F_SEAL_WRITE);
+
+       /* add them again */
+       mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+                                F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+                                F_SEAL_WRITE);
+
+       /* add more seals and seal against sealing */
+       mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
+       mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+                                F_SEAL_GROW |
+                                F_SEAL_WRITE |
+                                F_SEAL_SEAL);
+
+       /* verify that sealing no longer works */
+       mfd_fail_add_seals(fd, F_SEAL_GROW);
+       mfd_fail_add_seals(fd, 0);
+
+       close(fd);
+
+       /* verify sealing does not work without MFD_ALLOW_SEALING */
+       fd = mfd_assert_new("kern_memfd_basic",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC);
+       mfd_assert_has_seals(fd, F_SEAL_SEAL);
+       mfd_fail_add_seals(fd, F_SEAL_SHRINK |
+                              F_SEAL_GROW |
+                              F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_SEAL);
+       close(fd);
+}
+
+/*
+ * Test SEAL_WRITE
+ * Test whether SEAL_WRITE actually prevents modifications.
+ */
+static void test_seal_write(void)
+{
+       int fd;
+
+       fd = mfd_assert_new("kern_memfd_seal_write",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+       mfd_assert_has_seals(fd, 0);
+       mfd_assert_add_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+       mfd_assert_read(fd);
+       mfd_fail_write(fd);
+       mfd_assert_shrink(fd);
+       mfd_assert_grow(fd);
+       mfd_fail_grow_write(fd);
+
+       close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK
+ * Test whether SEAL_SHRINK actually prevents shrinking
+ */
+static void test_seal_shrink(void)
+{
+       int fd;
+
+       fd = mfd_assert_new("kern_memfd_seal_shrink",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+       mfd_assert_has_seals(fd, 0);
+       mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+       mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+
+       mfd_assert_read(fd);
+       mfd_assert_write(fd);
+       mfd_fail_shrink(fd);
+       mfd_assert_grow(fd);
+       mfd_assert_grow_write(fd);
+
+       close(fd);
+}
+
+/*
+ * Test SEAL_GROW
+ * Test whether SEAL_GROW actually prevents growing
+ */
+static void test_seal_grow(void)
+{
+       int fd;
+
+       fd = mfd_assert_new("kern_memfd_seal_grow",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+       mfd_assert_has_seals(fd, 0);
+       mfd_assert_add_seals(fd, F_SEAL_GROW);
+       mfd_assert_has_seals(fd, F_SEAL_GROW);
+
+       mfd_assert_read(fd);
+       mfd_assert_write(fd);
+       mfd_assert_shrink(fd);
+       mfd_fail_grow(fd);
+       mfd_fail_grow_write(fd);
+
+       close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK | SEAL_GROW
+ * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
+ */
+static void test_seal_resize(void)
+{
+       int fd;
+
+       fd = mfd_assert_new("kern_memfd_seal_resize",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+       mfd_assert_has_seals(fd, 0);
+       mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+       mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+
+       mfd_assert_read(fd);
+       mfd_assert_write(fd);
+       mfd_fail_shrink(fd);
+       mfd_fail_grow(fd);
+       mfd_fail_grow_write(fd);
+
+       close(fd);
+}
+
+/*
+ * Test sharing via dup()
+ * Test that seals are shared between dupped FDs and they're all equal.
+ */
+static void test_share_dup(void)
+{
+       int fd, fd2;
+
+       fd = mfd_assert_new("kern_memfd_share_dup",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+       mfd_assert_has_seals(fd, 0);
+
+       fd2 = mfd_assert_dup(fd);
+       mfd_assert_has_seals(fd2, 0);
+
+       mfd_assert_add_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+       mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+       mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+       mfd_assert_add_seals(fd, F_SEAL_SEAL);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+       mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+       mfd_fail_add_seals(fd, F_SEAL_GROW);
+       mfd_fail_add_seals(fd2, F_SEAL_GROW);
+       mfd_fail_add_seals(fd, F_SEAL_SEAL);
+       mfd_fail_add_seals(fd2, F_SEAL_SEAL);
+
+       close(fd2);
+
+       mfd_fail_add_seals(fd, F_SEAL_GROW);
+       close(fd);
+}
+
+/*
+ * Test sealing with active mmap()s
+ * Modifying seals is only allowed if no other mmap() refs exist.
+ */
+static void test_share_mmap(void)
+{
+       int fd;
+       void *p;
+
+       fd = mfd_assert_new("kern_memfd_share_mmap",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+       mfd_assert_has_seals(fd, 0);
+
+       /* shared/writable ref prevents sealing WRITE, but allows others */
+       p = mfd_assert_mmap_shared(fd);
+       mfd_fail_add_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, 0);
+       mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+       mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+       munmap(p, MFD_DEF_SIZE);
+
+       /* readable ref allows sealing */
+       p = mfd_assert_mmap_private(fd);
+       mfd_assert_add_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+       munmap(p, MFD_DEF_SIZE);
+
+       close(fd);
+}
+
+/*
+ * Test sealing with open(/proc/self/fd/%d)
+ * Via /proc we can get access to a separate file-context for the same memfd.
+ * This is *not* like dup(), but like a real separate open(). Make sure the
+ * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
+ */
+static void test_share_open(void)
+{
+       int fd, fd2;
+
+       fd = mfd_assert_new("kern_memfd_share_open",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+       mfd_assert_has_seals(fd, 0);
+
+       fd2 = mfd_assert_open(fd, O_RDWR, 0);
+       mfd_assert_add_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+       mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+       mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+       close(fd);
+       fd = mfd_assert_open(fd2, O_RDONLY, 0);
+
+       mfd_fail_add_seals(fd, F_SEAL_SEAL);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+       mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+       close(fd2);
+       fd2 = mfd_assert_open(fd, O_RDWR, 0);
+
+       mfd_assert_add_seals(fd2, F_SEAL_SEAL);
+       mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+       mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+       close(fd2);
+       close(fd);
+}
+
+/*
+ * Test sharing via fork()
+ * Test whether seal-modifications work as expected with forked childs.
+ */
+static void test_share_fork(void)
+{
+       int fd;
+       pid_t pid;
+
+       fd = mfd_assert_new("kern_memfd_share_fork",
+                           MFD_DEF_SIZE,
+                           MFD_CLOEXEC | MFD_ALLOW_SEALING);
+       mfd_assert_has_seals(fd, 0);
+
+       pid = spawn_idle_thread(0);
+       mfd_assert_add_seals(fd, F_SEAL_SEAL);
+       mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+       mfd_fail_add_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+       join_idle_thread(pid);
+
+       mfd_fail_add_seals(fd, F_SEAL_WRITE);
+       mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+       close(fd);
+}
+
+int main(int argc, char **argv)
+{
+       pid_t pid;
+
+       printf("memfd: CREATE\n");
+       test_create();
+       printf("memfd: BASIC\n");
+       test_basic();
+
+       printf("memfd: SEAL-WRITE\n");
+       test_seal_write();
+       printf("memfd: SEAL-SHRINK\n");
+       test_seal_shrink();
+       printf("memfd: SEAL-GROW\n");
+       test_seal_grow();
+       printf("memfd: SEAL-RESIZE\n");
+       test_seal_resize();
+
+       printf("memfd: SHARE-DUP\n");
+       test_share_dup();
+       printf("memfd: SHARE-MMAP\n");
+       test_share_mmap();
+       printf("memfd: SHARE-OPEN\n");
+       test_share_open();
+       printf("memfd: SHARE-FORK\n");
+       test_share_fork();
+
+       /* Run test-suite in a multi-threaded environment with a shared
+        * file-table. */
+       pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
+       printf("memfd: SHARE-DUP (shared file-table)\n");
+       test_share_dup();
+       printf("memfd: SHARE-MMAP (shared file-table)\n");
+       test_share_mmap();
+       printf("memfd: SHARE-OPEN (shared file-table)\n");
+       test_share_open();
+       printf("memfd: SHARE-FORK (shared file-table)\n");
+       test_share_fork();
+       join_idle_thread(pid);
+
+       printf("memfd: DONE\n");
+
+       return 0;
+}
diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh
new file mode 100644 (file)
index 0000000..69b930e
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+if test -d "./mnt" ; then
+       fusermount -u ./mnt
+       rmdir ./mnt
+fi
+
+set -e
+
+mkdir mnt
+./fuse_mnt ./mnt
+./fuse_test ./mnt/memfd
+fusermount -u ./mnt
+rmdir ./mnt
index d46558b1f58d23f745fa953fe477e8d41c692e60..c34cd8ac8aaab6944fd4e68fd2c6f4f163a67aa7 100644 (file)
@@ -31,6 +31,10 @@ static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
 #define TEST_SICODE_PRIV       -1
 #define TEST_SICODE_SHARE      -2
 
+#ifndef PAGE_SIZE
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#endif
+
 #define err(fmt, ...)                                          \
                fprintf(stderr,                                 \
                        "Error (%s:%d): " fmt,                  \