[mirror_ubuntu-hirsute-kernel.git] / fs / ext4 / file.c

/*
 *  linux/fs/ext4/file.c
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/file.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  ext4 fs regular file handling primitives
 *
 *  64-bit file support on 64-bit platforms by Jakub Jelinek
 *	(jj@sunsite.ms.mff.cuni.cz)
 */

#include <linux/time.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/path.h>
#include <linux/dax.h>
#include <linux/quotaops.h>
#include <linux/pagevec.h>
#include <linux/uio.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"

/*
 * Called when an inode is released. Note that this is different
 * from ext4_file_open: open gets called at every open, but release
 * gets called only when /all/ the files are closed.
 */
static int ext4_release_file(struct inode *inode, struct file *filp)
{
	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
		ext4_alloc_da_blocks(inode);
		ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
	}
	/* if we are the last writer on the inode, drop the block reservation */
	if ((filp->f_mode & FMODE_WRITE) &&
			(atomic_read(&inode->i_writecount) == 1) &&
		        !EXT4_I(inode)->i_reserved_data_blocks)
	{
		down_write(&EXT4_I(inode)->i_data_sem);
		ext4_discard_preallocations(inode);
		up_write(&EXT4_I(inode)->i_data_sem);
	}
	if (is_dx(inode) && filp->private_data)
		ext4_htree_free_dir_info(filp->private_data);

	return 0;
}

static void ext4_unwritten_wait(struct inode *inode)
{
	wait_queue_head_t *wq = ext4_ioend_wq(inode);

	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
}

/*
 * This tests whether the IO in question is block-aligned or not.
 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
 * are converted to written only after the IO is complete.  Until they are
 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
 * it needs to zero out portions of the start and/or end block.  If 2 AIO
 * threads are at work on the same unwritten block, they must be synchronized
 * or one thread will zero the other's data, causing corruption.
 */
static int
ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
{
	struct super_block *sb = inode->i_sb;
	int blockmask = sb->s_blocksize - 1;

	if (pos >= i_size_read(inode))
		return 0;

	if ((pos | iov_iter_alignment(from)) & blockmask)
		return 1;

	return 0;
}

static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file_inode(iocb->ki_filp);
	struct blk_plug plug;
	int o_direct = iocb->ki_flags & IOCB_DIRECT;
	int unaligned_aio = 0;
	int overwrite = 0;
	ssize_t ret;

	inode_lock(inode);
	ret = generic_write_checks(iocb, from);
	if (ret <= 0)
		goto out;

	/*
	 * Unaligned direct AIO must be serialized among each other as zeroing
	 * of partial blocks of two competing unaligned AIOs can result in data
	 * corruption.
	 */
	if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
	    !is_sync_kiocb(iocb) &&
	    ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
		unaligned_aio = 1;
		ext4_unwritten_wait(inode);
	}

	/*
	 * If we have encountered a bitmap-format file, the size limit
	 * is smaller than s_maxbytes, which is for extent-mapped files.
	 */
	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
			ret = -EFBIG;
			goto out;
		}
		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
	}

	iocb->private = &overwrite;
	if (o_direct) {
		size_t length = iov_iter_count(from);
		loff_t pos = iocb->ki_pos;
		blk_start_plug(&plug);

		/* check whether we do a DIO overwrite or not */
		if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
		    !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
			struct ext4_map_blocks map;
			unsigned int blkbits = inode->i_blkbits;
			int err, len;

			map.m_lblk = pos >> blkbits;
			map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
				- map.m_lblk;
			len = map.m_len;

			err = ext4_map_blocks(NULL, inode, &map, 0);
			/*
			 * 'err==len' means that all of blocks has
			 * been preallocated no matter they are
			 * initialized or not.  For excluding
			 * unwritten extents, we need to check
			 * m_flags.  There are two conditions that
			 * indicate for initialized extents.  1) If we
			 * hit extent cache, EXT4_MAP_MAPPED flag is
			 * returned; 2) If we do a real lookup,
			 * non-flags are returned.  So we should check
			 * these two conditions.
			 */
			if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
				overwrite = 1;
		}
	}

	ret = __generic_file_write_iter(iocb, from);
	inode_unlock(inode);

	if (ret > 0) {
		ssize_t err;

		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
		if (err < 0)
			ret = err;
	}
	if (o_direct)
		blk_finish_plug(&plug);

	return ret;

out:
	inode_unlock(inode);
	return ret;
}

#ifdef CONFIG_FS_DAX
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	int result;
	handle_t *handle = NULL;
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	bool write = vmf->flags & FAULT_FLAG_WRITE;

	if (write) {
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
		down_read(&EXT4_I(inode)->i_mmap_sem);
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
						EXT4_DATA_TRANS_BLOCKS(sb));
	} else
		down_read(&EXT4_I(inode)->i_mmap_sem);

	if (IS_ERR(handle))
		result = VM_FAULT_SIGBUS;
	else
		result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);

	if (write) {
		if (!IS_ERR(handle))
			ext4_journal_stop(handle);
		up_read(&EXT4_I(inode)->i_mmap_sem);
		sb_end_pagefault(sb);
	} else
		up_read(&EXT4_I(inode)->i_mmap_sem);

	return result;
}

static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
						pmd_t *pmd, unsigned int flags)
{
	int result;
	handle_t *handle = NULL;
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	bool write = flags & FAULT_FLAG_WRITE;

	if (write) {
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
		down_read(&EXT4_I(inode)->i_mmap_sem);
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
				ext4_chunk_trans_blocks(inode,
							PMD_SIZE / PAGE_SIZE));
	} else
		down_read(&EXT4_I(inode)->i_mmap_sem);

	if (IS_ERR(handle))
		result = VM_FAULT_SIGBUS;
	else
		result = __dax_pmd_fault(vma, addr, pmd, flags,
				ext4_dax_mmap_get_block, NULL);

	if (write) {
		if (!IS_ERR(handle))
			ext4_journal_stop(handle);
		up_read(&EXT4_I(inode)->i_mmap_sem);
		sb_end_pagefault(sb);
	} else
		up_read(&EXT4_I(inode)->i_mmap_sem);

	return result;
}

/*
 * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
 * handler we check for races agaist truncate. Note that since we cycle through
 * i_mmap_sem, we are sure that also any hole punching that began before we
 * were called is finished by now and so if it included part of the file we
 * are working on, our pte will get unmapped and the check for pte_same() in
 * wp_pfn_shared() fails. Thus fault gets retried and things work out as
 * desired.
 */
static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
				struct vm_fault *vmf)
{
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	loff_t size;
	int ret;

	sb_start_pagefault(sb);
	file_update_time(vma->vm_file);
	down_read(&EXT4_I(inode)->i_mmap_sem);
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		ret = VM_FAULT_SIGBUS;
	else
		ret = dax_pfn_mkwrite(vma, vmf);
	up_read(&EXT4_I(inode)->i_mmap_sem);
	sb_end_pagefault(sb);

	return ret;
}

static const struct vm_operations_struct ext4_dax_vm_ops = {
	.fault		= ext4_dax_fault,
	.pmd_fault	= ext4_dax_pmd_fault,
	.page_mkwrite	= ext4_dax_fault,
	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
};
#else
#define ext4_dax_vm_ops	ext4_file_vm_ops
#endif

static const struct vm_operations_struct ext4_file_vm_ops = {
	.fault		= ext4_filemap_fault,
	.map_pages	= filemap_map_pages,
	.page_mkwrite   = ext4_page_mkwrite,
};

static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
	struct inode *inode = file->f_mapping->host;

	if (ext4_encrypted_inode(inode)) {
		int err = ext4_get_encryption_info(inode);
		if (err)
			return 0;
		if (ext4_encryption_info(inode) == NULL)
			return -ENOKEY;
	}
	file_accessed(file);
	if (IS_DAX(file_inode(file))) {
		vma->vm_ops = &ext4_dax_vm_ops;
		vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
	} else {
		vma->vm_ops = &ext4_file_vm_ops;
	}
	return 0;
}

static int ext4_file_open(struct inode * inode, struct file * filp)
{
	struct super_block *sb = inode->i_sb;
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct vfsmount *mnt = filp->f_path.mnt;
	struct dentry *dir;
	struct path path;
	char buf[64], *cp;
	int ret;

	if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
		     !(sb->s_flags & MS_RDONLY))) {
		sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
		/*
		 * Sample where the filesystem has been mounted and
		 * store it in the superblock for sysadmin convenience
		 * when trying to sort through large numbers of block
		 * devices or filesystem images.
		 */
		memset(buf, 0, sizeof(buf));
		path.mnt = mnt;
		path.dentry = mnt->mnt_root;
		cp = d_path(&path, buf, sizeof(buf));
		if (!IS_ERR(cp)) {
			handle_t *handle;
			int err;

			handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
			if (IS_ERR(handle))
				return PTR_ERR(handle);
			BUFFER_TRACE(sbi->s_sbh, "get_write_access");
			err = ext4_journal_get_write_access(handle, sbi->s_sbh);
			if (err) {
				ext4_journal_stop(handle);
				return err;
			}
			strlcpy(sbi->s_es->s_last_mounted, cp,
				sizeof(sbi->s_es->s_last_mounted));
			ext4_handle_dirty_super(handle, sb);
			ext4_journal_stop(handle);
		}
	}
	if (ext4_encrypted_inode(inode)) {
		ret = ext4_get_encryption_info(inode);
		if (ret)
			return -EACCES;
		if (ext4_encryption_info(inode) == NULL)
			return -ENOKEY;
	}

	dir = dget_parent(file_dentry(filp));
	if (ext4_encrypted_inode(d_inode(dir)) &&
	    !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) {
		ext4_warning(inode->i_sb,
			     "Inconsistent encryption contexts: %lu/%lu\n",
			     (unsigned long) d_inode(dir)->i_ino,
			     (unsigned long) inode->i_ino);
		dput(dir);
		return -EPERM;
	}
	dput(dir);
	/*
	 * Set up the jbd2_inode if we are opening the inode for
	 * writing and the journal is present
	 */
	if (filp->f_mode & FMODE_WRITE) {
		ret = ext4_inode_attach_jinode(inode);
		if (ret < 0)
			return ret;
	}
	return dquot_file_open(inode, filp);
}

/*
 * Here we use ext4_map_blocks() to get a block mapping for a extent-based
 * file rather than ext4_ext_walk_space() because we can introduce
 * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
 * function.  When extent status tree has been fully implemented, it will
 * track all extent status for a file and we can directly use it to
 * retrieve the offset for SEEK_DATA/SEEK_HOLE.
 */

/*
 * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
 * lookup page cache to check whether or not there has some data between
 * [startoff, endoff] because, if this range contains an unwritten extent,
 * we determine this extent as a data or a hole according to whether the
 * page cache has data or not.
 */
static int ext4_find_unwritten_pgoff(struct inode *inode,
				     int whence,
				     ext4_lblk_t end_blk,
				     loff_t *offset)
{
	struct pagevec pvec;
	unsigned int blkbits;
	pgoff_t index;
	pgoff_t end;
	loff_t endoff;
	loff_t startoff;
	loff_t lastoff;
	int found = 0;

	blkbits = inode->i_sb->s_blocksize_bits;
	startoff = *offset;
	lastoff = startoff;
	endoff = (loff_t)end_blk << blkbits;

	index = startoff >> PAGE_SHIFT;
	end = endoff >> PAGE_SHIFT;

	pagevec_init(&pvec, 0);
	do {
		int i, num;
		unsigned long nr_pages;

		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
					  (pgoff_t)num);
		if (nr_pages == 0) {
			if (whence == SEEK_DATA)
				break;

			BUG_ON(whence != SEEK_HOLE);
			/*
			 * If this is the first time to go into the loop and
			 * offset is not beyond the end offset, it will be a
			 * hole at this offset
			 */
			if (lastoff == startoff || lastoff < endoff)
				found = 1;
			break;
		}

		/*
		 * If this is the first time to go into the loop and
		 * offset is smaller than the first page offset, it will be a
		 * hole at this offset.
		 */
		if (lastoff == startoff && whence == SEEK_HOLE &&
		    lastoff < page_offset(pvec.pages[0])) {
			found = 1;
			break;
		}

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
			struct buffer_head *bh, *head;

			/*
			 * If the current offset is not beyond the end of given
			 * range, it will be a hole.
			 */
			if (lastoff < endoff && whence == SEEK_HOLE &&
			    page->index > end) {
				found = 1;
				*offset = lastoff;
				goto out;
			}

			lock_page(page);

			if (unlikely(page->mapping != inode->i_mapping)) {
				unlock_page(page);
				continue;
			}

			if (!page_has_buffers(page)) {
				unlock_page(page);
				continue;
			}

			if (page_has_buffers(page)) {
				lastoff = page_offset(page);
				bh = head = page_buffers(page);
				do {
					if (buffer_uptodate(bh) ||
					    buffer_unwritten(bh)) {
						if (whence == SEEK_DATA)
							found = 1;
					} else {
						if (whence == SEEK_HOLE)
							found = 1;
					}
					if (found) {
						*offset = max_t(loff_t,
							startoff, lastoff);
						unlock_page(page);
						goto out;
					}
					lastoff += bh->b_size;
					bh = bh->b_this_page;
				} while (bh != head);
			}

			lastoff = page_offset(page) + PAGE_SIZE;
			unlock_page(page);
		}

		/*
		 * The no. of pages is less than our desired, that would be a
		 * hole in there.
		 */
		if (nr_pages < num && whence == SEEK_HOLE) {
			found = 1;
			*offset = lastoff;
			break;
		}

		index = pvec.pages[i - 1]->index + 1;
		pagevec_release(&pvec);
	} while (index <= end);

out:
	pagevec_release(&pvec);
	return found;
}

/*
 * ext4_seek_data() retrieves the offset for SEEK_DATA.
 */
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t dataoff, isize;
	int blkbits;
	int ret;

	inode_lock(inode);

	isize = i_size_read(inode);
	if (offset >= isize) {
		inode_unlock(inode);
		return -ENXIO;
	}

	blkbits = inode->i_sb->s_blocksize_bits;
	start = offset >> blkbits;
	last = start;
	end = isize >> blkbits;
	dataoff = offset;

	do {
		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
		if (ret <= 0) {
			/* No extent found -> no data */
			if (ret == 0)
				ret = -ENXIO;
			inode_unlock(inode);
			return ret;
		}

		last = es.es_lblk;
		if (last != start)
			dataoff = (loff_t)last << blkbits;
		if (!ext4_es_is_unwritten(&es))
			break;

		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
		if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
					      es.es_lblk + es.es_len, &dataoff))
			break;
		last += es.es_len;
		dataoff = (loff_t)last << blkbits;
		cond_resched();
	} while (last <= end);

	inode_unlock(inode);

	if (dataoff > isize)
		return -ENXIO;

	return vfs_setpos(file, dataoff, maxsize);
}

/*
 * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
 */
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t holeoff, isize;
	int blkbits;
	int ret;

	inode_lock(inode);

	isize = i_size_read(inode);
	if (offset >= isize) {
		inode_unlock(inode);
		return -ENXIO;
	}

	blkbits = inode->i_sb->s_blocksize_bits;
	start = offset >> blkbits;
	last = start;
	end = isize >> blkbits;
	holeoff = offset;

	do {
		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
		if (ret < 0) {
			inode_unlock(inode);
			return ret;
		}
		/* Found a hole? */
		if (ret == 0 || es.es_lblk > last) {
			if (last != start)
				holeoff = (loff_t)last << blkbits;
			break;
		}
		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
		if (ext4_es_is_unwritten(&es) &&
		    ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
					      last + es.es_len, &holeoff))
			break;

		last += es.es_len;
		holeoff = (loff_t)last << blkbits;
		cond_resched();
	} while (last <= end);

	inode_unlock(inode);

	if (holeoff > isize)
		holeoff = isize;

	return vfs_setpos(file, holeoff, maxsize);
}

/*
 * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
 * by calling generic_file_llseek_size() with the appropriate maxbytes
 * value for each.
 */
loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
{
	struct inode *inode = file->f_mapping->host;
	loff_t maxbytes;

	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
		maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
	else
		maxbytes = inode->i_sb->s_maxbytes;

	switch (whence) {
	case SEEK_SET:
	case SEEK_CUR:
	case SEEK_END:
		return generic_file_llseek_size(file, offset, whence,
						maxbytes, i_size_read(inode));
	case SEEK_DATA:
		return ext4_seek_data(file, offset, maxbytes);
	case SEEK_HOLE:
		return ext4_seek_hole(file, offset, maxbytes);
	}

	return -EINVAL;
}

const struct file_operations ext4_file_operations = {
	.llseek		= ext4_llseek,
	.read_iter	= generic_file_read_iter,
	.write_iter	= ext4_file_write_iter,
	.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= ext4_compat_ioctl,
#endif
	.mmap		= ext4_file_mmap,
	.open		= ext4_file_open,
	.release	= ext4_release_file,
	.fsync		= ext4_sync_file,
	.splice_read	= generic_file_splice_read,
	.splice_write	= iter_file_splice_write,
	.fallocate	= ext4_fallocate,
};

const struct inode_operations ext4_file_inode_operations = {
	.setattr	= ext4_setattr,
	.getattr	= ext4_getattr,
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= ext4_listxattr,
	.removexattr	= generic_removexattr,
	.get_acl	= ext4_get_acl,
	.set_acl	= ext4_set_acl,
	.fiemap		= ext4_fiemap,
};
Commit	Line	Data
	1	/*
	2	* linux/fs/ext4/file.c
	3	*
	4	* Copyright (C) 1992, 1993, 1994, 1995
	5	* Remy Card (card@masi.ibp.fr)
	6	* Laboratoire MASI - Institut Blaise Pascal
	7	* Universite Pierre et Marie Curie (Paris VI)
	8	*
	9	* from
	10	*
	11	* linux/fs/minix/file.c
	12	*
	13	* Copyright (C) 1991, 1992 Linus Torvalds
	14	*
	15	* ext4 fs regular file handling primitives
	16	*
	17	* 64-bit file support on 64-bit platforms by Jakub Jelinek
	18	* (jj@sunsite.ms.mff.cuni.cz)
	19	*/
	20
	21	#include <linux/time.h>
	22	#include <linux/fs.h>
	23	#include <linux/mount.h>
	24	#include <linux/path.h>
	25	#include <linux/dax.h>
	26	#include <linux/quotaops.h>
	27	#include <linux/pagevec.h>
	28	#include <linux/uio.h>
	29	#include "ext4.h"
	30	#include "ext4_jbd2.h"
	31	#include "xattr.h"
	32	#include "acl.h"
	33
	34	/*
	35	* Called when an inode is released. Note that this is different
	36	* from ext4_file_open: open gets called at every open, but release
	37	* gets called only when /all/ the files are closed.
	38	*/
	39	static int ext4_release_file(struct inode inode, struct file filp)
	40	{
	41	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
	42	ext4_alloc_da_blocks(inode);
	43	ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
	44	}
	45	/* if we are the last writer on the inode, drop the block reservation */
	46	if ((filp->f_mode & FMODE_WRITE) &&
	47	(atomic_read(&inode->i_writecount) == 1) &&
	48	!EXT4_I(inode)->i_reserved_data_blocks)
	49	{
	50	down_write(&EXT4_I(inode)->i_data_sem);
	51	ext4_discard_preallocations(inode);
	52	up_write(&EXT4_I(inode)->i_data_sem);
	53	}
	54	if (is_dx(inode) && filp->private_data)
	55	ext4_htree_free_dir_info(filp->private_data);
	56
	57	return 0;
	58	}
	59
	60	static void ext4_unwritten_wait(struct inode *inode)
	61	{
	62	wait_queue_head_t *wq = ext4_ioend_wq(inode);
	63
	64	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
	65	}
	66
	67	/*
	68	* This tests whether the IO in question is block-aligned or not.
	69	* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
	70	* are converted to written only after the IO is complete. Until they are
	71	* mapped, these blocks appear as holes, so dio_zero_block() will assume that
	72	* it needs to zero out portions of the start and/or end block. If 2 AIO
	73	* threads are at work on the same unwritten block, they must be synchronized
	74	* or one thread will zero the other's data, causing corruption.
	75	*/
	76	static int
	77	ext4_unaligned_aio(struct inode inode, struct iov_iter from, loff_t pos)
	78	{
	79	struct super_block *sb = inode->i_sb;
	80	int blockmask = sb->s_blocksize - 1;
	81
	82	if (pos >= i_size_read(inode))
	83	return 0;
	84
	85	if ((pos \| iov_iter_alignment(from)) & blockmask)
	86	return 1;
	87
	88	return 0;
	89	}
	90
	91	static ssize_t
	92	ext4_file_write_iter(struct kiocb iocb, struct iov_iter from)
	93	{
	94	struct file *file = iocb->ki_filp;
	95	struct inode *inode = file_inode(iocb->ki_filp);
	96	struct blk_plug plug;
	97	int o_direct = iocb->ki_flags & IOCB_DIRECT;
	98	int unaligned_aio = 0;
	99	int overwrite = 0;
	100	ssize_t ret;
	101
	102	inode_lock(inode);
	103	ret = generic_write_checks(iocb, from);
	104	if (ret <= 0)
	105	goto out;
	106
	107	/*
	108	* Unaligned direct AIO must be serialized among each other as zeroing
	109	* of partial blocks of two competing unaligned AIOs can result in data
	110	* corruption.
	111	*/
	112	if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
	113	!is_sync_kiocb(iocb) &&
	114	ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
	115	unaligned_aio = 1;
	116	ext4_unwritten_wait(inode);
	117	}
	118
	119	/*
	120	* If we have encountered a bitmap-format file, the size limit
	121	* is smaller than s_maxbytes, which is for extent-mapped files.
	122	*/
	123	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
	124	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	125
	126	if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
	127	ret = -EFBIG;
	128	goto out;
	129	}
	130	iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
	131	}
	132
	133	iocb->private = &overwrite;
	134	if (o_direct) {
	135	size_t length = iov_iter_count(from);
	136	loff_t pos = iocb->ki_pos;
	137	blk_start_plug(&plug);
	138
	139	/* check whether we do a DIO overwrite or not */
	140	if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
	141	!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
	142	struct ext4_map_blocks map;
	143	unsigned int blkbits = inode->i_blkbits;
	144	int err, len;
	145
	146	map.m_lblk = pos >> blkbits;
	147	map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
	148	- map.m_lblk;
	149	len = map.m_len;
	150
	151	err = ext4_map_blocks(NULL, inode, &map, 0);
	152	/*
	153	* 'err==len' means that all of blocks has
	154	* been preallocated no matter they are
	155	* initialized or not. For excluding
	156	* unwritten extents, we need to check
	157	* m_flags. There are two conditions that
	158	* indicate for initialized extents. 1) If we
	159	* hit extent cache, EXT4_MAP_MAPPED flag is
	160	* returned; 2) If we do a real lookup,
	161	* non-flags are returned. So we should check
	162	* these two conditions.
	163	*/
	164	if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
	165	overwrite = 1;
	166	}
	167	}
	168
	169	ret = __generic_file_write_iter(iocb, from);
	170	inode_unlock(inode);
	171
	172	if (ret > 0) {
	173	ssize_t err;
	174
	175	err = generic_write_sync(file, iocb->ki_pos - ret, ret);
	176	if (err < 0)
	177	ret = err;
	178	}
	179	if (o_direct)
	180	blk_finish_plug(&plug);
	181
	182	return ret;
	183
	184	out:
	185	inode_unlock(inode);
	186	return ret;
	187	}
	188
	189	#ifdef CONFIG_FS_DAX
	190	static int ext4_dax_fault(struct vm_area_struct vma, struct vm_fault vmf)
	191	{
	192	int result;
	193	handle_t *handle = NULL;
	194	struct inode *inode = file_inode(vma->vm_file);
	195	struct super_block *sb = inode->i_sb;
	196	bool write = vmf->flags & FAULT_FLAG_WRITE;
	197
	198	if (write) {
	199	sb_start_pagefault(sb);
	200	file_update_time(vma->vm_file);
	201	down_read(&EXT4_I(inode)->i_mmap_sem);
	202	handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
	203	EXT4_DATA_TRANS_BLOCKS(sb));
	204	} else
	205	down_read(&EXT4_I(inode)->i_mmap_sem);
	206
	207	if (IS_ERR(handle))
	208	result = VM_FAULT_SIGBUS;
	209	else
	210	result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
	211
	212	if (write) {
	213	if (!IS_ERR(handle))
	214	ext4_journal_stop(handle);
	215	up_read(&EXT4_I(inode)->i_mmap_sem);
	216	sb_end_pagefault(sb);
	217	} else
	218	up_read(&EXT4_I(inode)->i_mmap_sem);
	219
	220	return result;
	221	}
	222
	223	static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
	224	pmd_t *pmd, unsigned int flags)
	225	{
	226	int result;
	227	handle_t *handle = NULL;
	228	struct inode *inode = file_inode(vma->vm_file);
	229	struct super_block *sb = inode->i_sb;
	230	bool write = flags & FAULT_FLAG_WRITE;
	231
	232	if (write) {
	233	sb_start_pagefault(sb);
	234	file_update_time(vma->vm_file);
	235	down_read(&EXT4_I(inode)->i_mmap_sem);
	236	handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
	237	ext4_chunk_trans_blocks(inode,
	238	PMD_SIZE / PAGE_SIZE));
	239	} else
	240	down_read(&EXT4_I(inode)->i_mmap_sem);
	241
	242	if (IS_ERR(handle))
	243	result = VM_FAULT_SIGBUS;
	244	else
	245	result = __dax_pmd_fault(vma, addr, pmd, flags,
	246	ext4_dax_mmap_get_block, NULL);
	247
	248	if (write) {
	249	if (!IS_ERR(handle))
	250	ext4_journal_stop(handle);
	251	up_read(&EXT4_I(inode)->i_mmap_sem);
	252	sb_end_pagefault(sb);
	253	} else
	254	up_read(&EXT4_I(inode)->i_mmap_sem);
	255
	256	return result;
	257	}
	258
	259	/*
	260	* Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
	261	* handler we check for races agaist truncate. Note that since we cycle through
	262	* i_mmap_sem, we are sure that also any hole punching that began before we
	263	* were called is finished by now and so if it included part of the file we
	264	* are working on, our pte will get unmapped and the check for pte_same() in
	265	* wp_pfn_shared() fails. Thus fault gets retried and things work out as
	266	* desired.
	267	*/
	268	static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
	269	struct vm_fault *vmf)
	270	{
	271	struct inode *inode = file_inode(vma->vm_file);
	272	struct super_block *sb = inode->i_sb;
	273	loff_t size;
	274	int ret;
	275
	276	sb_start_pagefault(sb);
	277	file_update_time(vma->vm_file);
	278	down_read(&EXT4_I(inode)->i_mmap_sem);
	279	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	280	if (vmf->pgoff >= size)
	281	ret = VM_FAULT_SIGBUS;
	282	else
	283	ret = dax_pfn_mkwrite(vma, vmf);
	284	up_read(&EXT4_I(inode)->i_mmap_sem);
	285	sb_end_pagefault(sb);
	286
	287	return ret;
	288	}
	289
	290	static const struct vm_operations_struct ext4_dax_vm_ops = {
	291	.fault = ext4_dax_fault,
	292	.pmd_fault = ext4_dax_pmd_fault,
	293	.page_mkwrite = ext4_dax_fault,
	294	.pfn_mkwrite = ext4_dax_pfn_mkwrite,
	295	};
	296	#else
	297	#define ext4_dax_vm_ops ext4_file_vm_ops
	298	#endif
	299
	300	static const struct vm_operations_struct ext4_file_vm_ops = {
	301	.fault = ext4_filemap_fault,
	302	.map_pages = filemap_map_pages,
	303	.page_mkwrite = ext4_page_mkwrite,
	304	};
	305
	306	static int ext4_file_mmap(struct file file, struct vm_area_struct vma)
	307	{
	308	struct inode *inode = file->f_mapping->host;
	309
	310	if (ext4_encrypted_inode(inode)) {
	311	int err = ext4_get_encryption_info(inode);
	312	if (err)
	313	return 0;
	314	if (ext4_encryption_info(inode) == NULL)
	315	return -ENOKEY;
	316	}
	317	file_accessed(file);
	318	if (IS_DAX(file_inode(file))) {
	319	vma->vm_ops = &ext4_dax_vm_ops;
	320	vma->vm_flags \|= VM_MIXEDMAP \| VM_HUGEPAGE;
	321	} else {
	322	vma->vm_ops = &ext4_file_vm_ops;
	323	}
	324	return 0;
	325	}
	326
	327	static int ext4_file_open(struct inode * inode, struct file * filp)
	328	{
	329	struct super_block *sb = inode->i_sb;
	330	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	331	struct vfsmount *mnt = filp->f_path.mnt;
	332	struct dentry *dir;
	333	struct path path;
	334	char buf[64], *cp;
	335	int ret;
	336
	337	if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
	338	!(sb->s_flags & MS_RDONLY))) {
	339	sbi->s_mount_flags \|= EXT4_MF_MNTDIR_SAMPLED;
	340	/*
	341	* Sample where the filesystem has been mounted and
	342	* store it in the superblock for sysadmin convenience
	343	* when trying to sort through large numbers of block
	344	* devices or filesystem images.
	345	*/
	346	memset(buf, 0, sizeof(buf));
	347	path.mnt = mnt;
	348	path.dentry = mnt->mnt_root;
	349	cp = d_path(&path, buf, sizeof(buf));
	350	if (!IS_ERR(cp)) {
	351	handle_t *handle;
	352	int err;
	353
	354	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
	355	if (IS_ERR(handle))
	356	return PTR_ERR(handle);
	357	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
	358	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
	359	if (err) {
	360	ext4_journal_stop(handle);
	361	return err;
	362	}
	363	strlcpy(sbi->s_es->s_last_mounted, cp,
	364	sizeof(sbi->s_es->s_last_mounted));
	365	ext4_handle_dirty_super(handle, sb);
	366	ext4_journal_stop(handle);
	367	}
	368	}
	369	if (ext4_encrypted_inode(inode)) {
	370	ret = ext4_get_encryption_info(inode);
	371	if (ret)
	372	return -EACCES;
	373	if (ext4_encryption_info(inode) == NULL)
	374	return -ENOKEY;
	375	}
	376
	377	dir = dget_parent(file_dentry(filp));
	378	if (ext4_encrypted_inode(d_inode(dir)) &&
	379	!ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) {
	380	ext4_warning(inode->i_sb,
	381	"Inconsistent encryption contexts: %lu/%lu\n",
	382	(unsigned long) d_inode(dir)->i_ino,
	383	(unsigned long) inode->i_ino);
	384	dput(dir);
	385	return -EPERM;
	386	}
	387	dput(dir);
	388	/*
	389	* Set up the jbd2_inode if we are opening the inode for
	390	* writing and the journal is present
	391	*/
	392	if (filp->f_mode & FMODE_WRITE) {
	393	ret = ext4_inode_attach_jinode(inode);
	394	if (ret < 0)
	395	return ret;
	396	}
	397	return dquot_file_open(inode, filp);
	398	}
	399
	400	/*
	401	* Here we use ext4_map_blocks() to get a block mapping for a extent-based
	402	* file rather than ext4_ext_walk_space() because we can introduce
	403	* SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
	404	* function. When extent status tree has been fully implemented, it will
	405	* track all extent status for a file and we can directly use it to
	406	* retrieve the offset for SEEK_DATA/SEEK_HOLE.
	407	*/
	408
	409	/*
	410	* When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
	411	* lookup page cache to check whether or not there has some data between
	412	* [startoff, endoff] because, if this range contains an unwritten extent,
	413	* we determine this extent as a data or a hole according to whether the
	414	* page cache has data or not.
	415	*/
	416	static int ext4_find_unwritten_pgoff(struct inode *inode,
	417	int whence,
	418	ext4_lblk_t end_blk,
	419	loff_t *offset)
	420	{
	421	struct pagevec pvec;
	422	unsigned int blkbits;
	423	pgoff_t index;
	424	pgoff_t end;
	425	loff_t endoff;
	426	loff_t startoff;
	427	loff_t lastoff;
	428	int found = 0;
	429
	430	blkbits = inode->i_sb->s_blocksize_bits;
	431	startoff = *offset;
	432	lastoff = startoff;
	433	endoff = (loff_t)end_blk << blkbits;
	434
	435	index = startoff >> PAGE_SHIFT;
	436	end = endoff >> PAGE_SHIFT;
	437
	438	pagevec_init(&pvec, 0);
	439	do {
	440	int i, num;
	441	unsigned long nr_pages;
	442
	443	num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
	444	nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
	445	(pgoff_t)num);
	446	if (nr_pages == 0) {
	447	if (whence == SEEK_DATA)
	448	break;
	449
	450	BUG_ON(whence != SEEK_HOLE);
	451	/*
	452	* If this is the first time to go into the loop and
	453	* offset is not beyond the end offset, it will be a
	454	* hole at this offset
	455	*/
	456	if (lastoff == startoff \|\| lastoff < endoff)
	457	found = 1;
	458	break;
	459	}
	460
	461	/*
	462	* If this is the first time to go into the loop and
	463	* offset is smaller than the first page offset, it will be a
	464	* hole at this offset.
	465	*/
	466	if (lastoff == startoff && whence == SEEK_HOLE &&
	467	lastoff < page_offset(pvec.pages[0])) {
	468	found = 1;
	469	break;
	470	}
	471
	472	for (i = 0; i < nr_pages; i++) {
	473	struct page *page = pvec.pages[i];
	474	struct buffer_head bh, head;
	475
	476	/*
	477	* If the current offset is not beyond the end of given
	478	* range, it will be a hole.
	479	*/
	480	if (lastoff < endoff && whence == SEEK_HOLE &&
	481	page->index > end) {
	482	found = 1;
	483	*offset = lastoff;
	484	goto out;
	485	}
	486
	487	lock_page(page);
	488
	489	if (unlikely(page->mapping != inode->i_mapping)) {
	490	unlock_page(page);
	491	continue;
	492	}
	493
	494	if (!page_has_buffers(page)) {
	495	unlock_page(page);
	496	continue;
	497	}
	498
	499	if (page_has_buffers(page)) {
	500	lastoff = page_offset(page);
	501	bh = head = page_buffers(page);
	502	do {
	503	if (buffer_uptodate(bh) \|\|
	504	buffer_unwritten(bh)) {
	505	if (whence == SEEK_DATA)
	506	found = 1;
	507	} else {
	508	if (whence == SEEK_HOLE)
	509	found = 1;
	510	}
	511	if (found) {
	512	*offset = max_t(loff_t,
	513	startoff, lastoff);
	514	unlock_page(page);
	515	goto out;
	516	}
	517	lastoff += bh->b_size;
	518	bh = bh->b_this_page;
	519	} while (bh != head);
	520	}
	521
	522	lastoff = page_offset(page) + PAGE_SIZE;
	523	unlock_page(page);
	524	}
	525
	526	/*
	527	* The no. of pages is less than our desired, that would be a
	528	* hole in there.
	529	*/
	530	if (nr_pages < num && whence == SEEK_HOLE) {
	531	found = 1;
	532	*offset = lastoff;
	533	break;
	534	}
	535
	536	index = pvec.pages[i - 1]->index + 1;
	537	pagevec_release(&pvec);
	538	} while (index <= end);
	539
	540	out:
	541	pagevec_release(&pvec);
	542	return found;
	543	}
	544
	545	/*
	546	* ext4_seek_data() retrieves the offset for SEEK_DATA.
	547	*/
	548	static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
	549	{
	550	struct inode *inode = file->f_mapping->host;
	551	struct extent_status es;
	552	ext4_lblk_t start, last, end;
	553	loff_t dataoff, isize;
	554	int blkbits;
	555	int ret;
	556
	557	inode_lock(inode);
	558
	559	isize = i_size_read(inode);
	560	if (offset >= isize) {
	561	inode_unlock(inode);
	562	return -ENXIO;
	563	}
	564
	565	blkbits = inode->i_sb->s_blocksize_bits;
	566	start = offset >> blkbits;
	567	last = start;
	568	end = isize >> blkbits;
	569	dataoff = offset;
	570
	571	do {
	572	ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
	573	if (ret <= 0) {
	574	/* No extent found -> no data */
	575	if (ret == 0)
	576	ret = -ENXIO;
	577	inode_unlock(inode);
	578	return ret;
	579	}
	580
	581	last = es.es_lblk;
	582	if (last != start)
	583	dataoff = (loff_t)last << blkbits;
	584	if (!ext4_es_is_unwritten(&es))
	585	break;
	586
	587	/*
	588	* If there is a unwritten extent at this offset,
	589	* it will be as a data or a hole according to page
	590	* cache that has data or not.
	591	*/
	592	if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
	593	es.es_lblk + es.es_len, &dataoff))
	594	break;
	595	last += es.es_len;
	596	dataoff = (loff_t)last << blkbits;
	597	cond_resched();
	598	} while (last <= end);
	599
	600	inode_unlock(inode);
	601
	602	if (dataoff > isize)
	603	return -ENXIO;
	604
	605	return vfs_setpos(file, dataoff, maxsize);
	606	}
	607
	608	/*
	609	* ext4_seek_hole() retrieves the offset for SEEK_HOLE.
	610	*/
	611	static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
	612	{
	613	struct inode *inode = file->f_mapping->host;
	614	struct extent_status es;
	615	ext4_lblk_t start, last, end;
	616	loff_t holeoff, isize;
	617	int blkbits;
	618	int ret;
	619
	620	inode_lock(inode);
	621
	622	isize = i_size_read(inode);
	623	if (offset >= isize) {
	624	inode_unlock(inode);
	625	return -ENXIO;
	626	}
	627
	628	blkbits = inode->i_sb->s_blocksize_bits;
	629	start = offset >> blkbits;
	630	last = start;
	631	end = isize >> blkbits;
	632	holeoff = offset;
	633
	634	do {
	635	ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
	636	if (ret < 0) {
	637	inode_unlock(inode);
	638	return ret;
	639	}
	640	/* Found a hole? */
	641	if (ret == 0 \|\| es.es_lblk > last) {
	642	if (last != start)
	643	holeoff = (loff_t)last << blkbits;
	644	break;
	645	}
	646	/*
	647	* If there is a unwritten extent at this offset,
	648	* it will be as a data or a hole according to page
	649	* cache that has data or not.
	650	*/
	651	if (ext4_es_is_unwritten(&es) &&
	652	ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
	653	last + es.es_len, &holeoff))
	654	break;
	655
	656	last += es.es_len;
	657	holeoff = (loff_t)last << blkbits;
	658	cond_resched();
	659	} while (last <= end);
	660
	661	inode_unlock(inode);
	662
	663	if (holeoff > isize)
	664	holeoff = isize;
	665
	666	return vfs_setpos(file, holeoff, maxsize);
	667	}
	668
	669	/*
	670	* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
	671	* by calling generic_file_llseek_size() with the appropriate maxbytes
	672	* value for each.
	673	*/
	674	loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
	675	{
	676	struct inode *inode = file->f_mapping->host;
	677	loff_t maxbytes;
	678
	679	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
	680	maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
	681	else
	682	maxbytes = inode->i_sb->s_maxbytes;
	683
	684	switch (whence) {
	685	case SEEK_SET:
	686	case SEEK_CUR:
	687	case SEEK_END:
	688	return generic_file_llseek_size(file, offset, whence,
	689	maxbytes, i_size_read(inode));
	690	case SEEK_DATA:
	691	return ext4_seek_data(file, offset, maxbytes);
	692	case SEEK_HOLE:
	693	return ext4_seek_hole(file, offset, maxbytes);
	694	}
	695
	696	return -EINVAL;
	697	}
	698
	699	const struct file_operations ext4_file_operations = {
	700	.llseek = ext4_llseek,
	701	.read_iter = generic_file_read_iter,
	702	.write_iter = ext4_file_write_iter,
	703	.unlocked_ioctl = ext4_ioctl,
	704	#ifdef CONFIG_COMPAT
	705	.compat_ioctl = ext4_compat_ioctl,
	706	#endif
	707	.mmap = ext4_file_mmap,
	708	.open = ext4_file_open,
	709	.release = ext4_release_file,
	710	.fsync = ext4_sync_file,
	711	.splice_read = generic_file_splice_read,
	712	.splice_write = iter_file_splice_write,
	713	.fallocate = ext4_fallocate,
	714	};
	715
	716	const struct inode_operations ext4_file_inode_operations = {
	717	.setattr = ext4_setattr,
	718	.getattr = ext4_getattr,
	719	.setxattr = generic_setxattr,
	720	.getxattr = generic_getxattr,
	721	.listxattr = ext4_listxattr,
	722	.removexattr = generic_removexattr,
	723	.get_acl = ext4_get_acl,
	724	.set_acl = ext4_set_acl,
	725	.fiemap = ext4_fiemap,
	726	};
	727