Implement fallocate FALLOC_FL_PUNCH_HOLE

author Tim Chase <tim@chase2k.com>

Wed, 20 Aug 2014 22:35:13 +0000 (17:35 -0500)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Mon, 8 Sep 2014 20:52:25 +0000 (13:52 -0700)
author Tim Chase <tim@chase2k.com>
Wed, 20 Aug 2014 22:35:13 +0000 (17:35 -0500)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Mon, 8 Sep 2014 20:52:25 +0000 (13:52 -0700)
diff --git a/include/sys/zpl.h b/include/sys/zpl.h

index eb0e9f057a0e68c3fd21bb7e896b52c22aab34bc..2d82d1ada8316950e85e76a08c83e64cafc8d3ca 100644 (file)
--- a/include/sys/zpl.h
+++ b/include/sys/zpl.h
@@ -52,8 +52,10 @@ extern ssize_t zpl_read_common(struct inode *ip, const char *buf,
  extern ssize_t zpl_write_common(struct inode *ip, const char *buf,
      size_t len, loff_t *ppos, uio_seg_t segment, int flags,
      cred_t *cr);
+#if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
  extern long zpl_fallocate_common(struct inode *ip, int mode,
      loff_t offset, loff_t len);
+#endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
  
  extern const struct address_space_operations zpl_address_space_operations;
  extern const struct file_operations zpl_file_operations;
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c

index 18b2564a270dda6f517ceeb7f242e830f609eacf..ddd997fae11aa388ec629a4f5ad2940a3a717d29 100644 (file)
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -2565,8 +2565,6 @@ top:
                 if (err)
                         goto out3;
  
-               truncate_setsize(ip, vap->va_size);
-
                 /*
                  * XXX - Note, we are not providing any open
                  * mode flags here (like FNDELAY), so we may
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c

index 5fcb9e930f745054e66cd3a25ca6dee23384efaa..f2e305f7af2c7190fb25e88812aff82d2036dc21 100644 (file)
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -1344,6 +1344,50 @@ zfs_extend(znode_t *zp, uint64_t end)
         return (0);
  }
  
+/*
+ * zfs_zero_partial_page - Modeled after update_pages() but
+ * with different arguments and semantics for use by zfs_freesp().
+ *
+ * Zeroes a piece of a single page cache entry for zp at offset
+ * start and length len.
+ *
+ * Caller must acquire a range lock on the file for the region
+ * being zeroed in order that the ARC and page cache stay in sync.
+ */
+static void
+zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
+{
+       struct address_space *mp = ZTOI(zp)->i_mapping;
+       struct page *pp;
+       int64_t off;
+       void *pb;
+
+       ASSERT((start & PAGE_CACHE_MASK) ==
+           ((start + len - 1) & PAGE_CACHE_MASK));
+
+       off = start & (PAGE_CACHE_SIZE - 1);
+       start &= PAGE_CACHE_MASK;
+
+       pp = find_lock_page(mp, start >> PAGE_CACHE_SHIFT);
+       if (pp) {
+               if (mapping_writably_mapped(mp))
+                       flush_dcache_page(pp);
+
+               pb = kmap(pp);
+               bzero(pb + off, len);
+               kunmap(pp);
+
+               if (mapping_writably_mapped(mp))
+                       flush_dcache_page(pp);
+
+               mark_page_accessed(pp);
+               SetPageUptodate(pp);
+               ClearPageError(pp);
+               unlock_page(pp);
+               page_cache_release(pp);
+       }
+}
+
  /*
   * Free space in a file.
   *
@@ -1378,6 +1422,40 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
  
         error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len);
  
+       /*
+        * Zero partial page cache entries.  This must be done under a
+        * range lock in order to keep the ARC and page cache in sync.
+        */
+       if (zp->z_is_mapped) {
+               loff_t first_page, last_page, page_len;
+               loff_t first_page_offset, last_page_offset;
+
+               /* first possible full page in hole */
+               first_page = (off + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+               /* last page of hole */
+               last_page = (off + len) >> PAGE_CACHE_SHIFT;
+
+               /* offset of first_page */
+               first_page_offset = first_page << PAGE_CACHE_SHIFT;
+               /* offset of last_page */
+               last_page_offset = last_page << PAGE_CACHE_SHIFT;
+
+               if (first_page > last_page) {
+                       /* entire punched area within a single page */
+                       zfs_zero_partial_page(zp, off, len);
+               } else {
+                       /* beginning of punched area at the end of a page */
+                       page_len  = first_page_offset - off;
+                       if (page_len > 0)
+                               zfs_zero_partial_page(zp, off, page_len);
+
+                       /* end of punched area at the beginning of a page */
+                       page_len = off + len - last_page_offset;
+                       if (page_len > 0)
+                               zfs_zero_partial_page(zp, last_page_offset,
+                                   page_len);
+               }
+       }
         zfs_range_unlock(rl);
  
         return (error);
@@ -1479,8 +1557,7 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
                 error =  zfs_extend(zp, off+len);
                 if (error == 0 && log)
                         goto log;
-               else
-                       return (error);
+               goto out;
         }
  
         /*
@@ -1500,7 +1577,7 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
                         error = zfs_extend(zp, off+len);
         }
         if (error || !log)
-               return (error);
+               goto out;
  log:
         tx = dmu_tx_create(zsb->z_os);
         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
@@ -1508,7 +1585,7 @@ log:
         error = dmu_tx_assign(tx, TXG_WAIT);
         if (error) {
                 dmu_tx_abort(tx);
-               return (error);
+               goto out;
         }
  
         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16);
@@ -1522,8 +1599,40 @@ log:
         zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
  
         dmu_tx_commit(tx);
+
         zfs_inode_update(zp);
-       return (0);
+       error = 0;
+
+out:
+       /*
+        * Truncate the page cache - for file truncate operations, use
+        * the purpose-built API for truncations.  For punching operations,
+        * truncate only whole pages within the region; partial pages are
+        * zeroed under a range lock in zfs_free_range().
+        */
+       if (len == 0)
+               truncate_setsize(ZTOI(zp), off);
+       else if (zp->z_is_mapped) {
+               loff_t first_page, last_page;
+               loff_t first_page_offset, last_page_offset;
+
+               /* first possible full page in hole */
+               first_page = (off + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+               /* last page of hole */
+               last_page = (off + len) >> PAGE_CACHE_SHIFT;
+
+               /* offset of first_page */
+               first_page_offset = first_page << PAGE_CACHE_SHIFT;
+               /* offset of last_page */
+               last_page_offset = last_page << PAGE_CACHE_SHIFT;
+
+               /* truncate whole pages */
+               if (last_page_offset > first_page_offset) {
+                       truncate_inode_pages_range(ZTOI(zp)->i_mapping,
+                           first_page_offset, last_page_offset - 1);
+               }
+       }
+       return (error);
  }
  
  void
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c

index 5ea89232028ca844d3f20699edeb6189fdb7edeb..c72d5c9477397757cf1c8e22864ee1ee8a91fe87 100644 (file)
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@@ -558,38 +558,53 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
  
  /*
   * The only flag combination which matches the behavior of zfs_space()
- * is FALLOC_FL_PUNCH_HOLE.  This flag was introduced in the 2.6.38 kernel.
+ * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE.  The FALLOC_FL_PUNCH_HOLE
+ * flag was introduced in the 2.6.38 kernel.
   */
+#if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
  long
  zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
  {
-       cred_t *cr = CRED();
         int error = -EOPNOTSUPP;
  
-       if (mode & FALLOC_FL_KEEP_SIZE)
-               return (-EOPNOTSUPP);
+#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
+       cred_t *cr = CRED();
+       flock64_t bf;
+       loff_t olen;
+
+       if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+               return (error);
  
         crhold(cr);
  
-#ifdef FALLOC_FL_PUNCH_HOLE
-       if (mode & FALLOC_FL_PUNCH_HOLE) {
-               flock64_t bf;
+       if (offset < 0 || len <= 0)
+               return (-EINVAL);
  
-               bf.l_type = F_WRLCK;
-               bf.l_whence = 0;
-               bf.l_start = offset;
-               bf.l_len = len;
-               bf.l_pid = 0;
+       spl_inode_lock(ip);
+       olen = i_size_read(ip);
  
-               error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
+       if (offset > olen) {
+               spl_inode_unlock(ip);
+               return (0);
         }
-#endif /* FALLOC_FL_PUNCH_HOLE */
+       if (offset + len > olen)
+               len = olen - offset;
+       bf.l_type = F_WRLCK;
+       bf.l_whence = 0;
+       bf.l_start = offset;
+       bf.l_len = len;
+       bf.l_pid = 0;
+
+       error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
+       spl_inode_unlock(ip);
  
         crfree(cr);
+#endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
  
         ASSERT3S(error, <=, 0);
         return (error);
  }
+#endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
  
  #ifdef HAVE_FILE_FALLOCATE
  static long
author	Tim Chase <tim@chase2k.com>
	Wed, 20 Aug 2014 22:35:13 +0000 (17:35 -0500)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Mon, 8 Sep 2014 20:52:25 +0000 (13:52 -0700)
include/sys/zpl.h		patch \| blob \| blame \| history
module/zfs/zfs_vnops.c		patch \| blob \| blame \| history
module/zfs/zfs_znode.c		patch \| blob \| blame \| history
module/zfs/zpl_file.c		patch \| blob \| blame \| history