]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
ceph: record truncate size/seq for snap data writeback
authorYan, Zheng <zyan@redhat.com>
Tue, 15 Nov 2016 08:04:37 +0000 (16:04 +0800)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 12 Dec 2016 22:54:27 +0000 (23:54 +0100)
Dirty snapshot data needs to be flushed unconditionally. If they
were created before truncation, writeback should use old truncate
size/seq.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
fs/ceph/addr.c
fs/ceph/snap.c
fs/ceph/super.h

index dbb5f7d6921635808eb19780ade7c1b3775254ef..7d69f25d0c901e9c52f89e1d3afa2554be8559bf 100644 (file)
@@ -474,7 +474,9 @@ out:
  * only snap context we are allowed to write back.
  */
 static struct ceph_snap_context *get_oldest_context(struct inode *inode,
-                                                   loff_t *snap_size)
+                                                   loff_t *snap_size,
+                                                   u64 *truncate_size,
+                                                   u32 *truncate_seq)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_snap_context *snapc = NULL;
@@ -488,6 +490,10 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
                        snapc = ceph_get_snap_context(capsnap->context);
                        if (snap_size)
                                *snap_size = capsnap->size;
+                       if (truncate_size)
+                               *truncate_size = capsnap->truncate_size;
+                       if (truncate_seq)
+                               *truncate_seq = capsnap->truncate_seq;
                        break;
                }
        }
@@ -495,6 +501,10 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
                snapc = ceph_get_snap_context(ci->i_head_snapc);
                dout(" head snapc %p has %d dirty pages\n",
                     snapc, ci->i_wrbuffer_ref_head);
+               if (truncate_size)
+                       *truncate_size = capsnap->truncate_size;
+               if (truncate_seq)
+                       *truncate_seq = capsnap->truncate_seq;
        }
        spin_unlock(&ci->i_ceph_lock);
        return snapc;
@@ -537,7 +547,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
                dout("writepage %p page %p not dirty?\n", inode, page);
                goto out;
        }
-       oldest = get_oldest_context(inode, &snap_size);
+       oldest = get_oldest_context(inode, &snap_size,
+                                   &truncate_size, &truncate_seq);
        if (snapc->seq > oldest->seq) {
                dout("writepage %p page %p snapc %p not writeable - noop\n",
                     inode, page, snapc);
@@ -548,12 +559,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        }
        ceph_put_snap_context(oldest);
 
-       spin_lock(&ci->i_ceph_lock);
-       truncate_seq = ci->i_truncate_seq;
-       truncate_size = ci->i_truncate_size;
        if (snap_size == -1)
                snap_size = i_size_read(inode);
-       spin_unlock(&ci->i_ceph_lock);
 
        /* is this a partial page at end of file? */
        if (page_off >= snap_size) {
@@ -800,7 +807,8 @@ retry:
        /* find oldest snap context with dirty data */
        ceph_put_snap_context(snapc);
        snap_size = -1;
-       snapc = get_oldest_context(inode, &snap_size);
+       snapc = get_oldest_context(inode, &snap_size,
+                                  &truncate_size, &truncate_seq);
        if (!snapc) {
                /* hmm, why does writepages get called when there
                   is no dirty data? */
@@ -810,11 +818,7 @@ retry:
        dout(" oldest snapc is %p seq %lld (%d snaps)\n",
             snapc, snapc->seq, snapc->num_snaps);
 
-       spin_lock(&ci->i_ceph_lock);
-       truncate_seq = ci->i_truncate_seq;
-       truncate_size = ci->i_truncate_size;
        i_size = i_size_read(inode);
-       spin_unlock(&ci->i_ceph_lock);
 
        if (last_snapc && snapc != last_snapc) {
                /* if we switched to a newer snapc, restart our scan at the
@@ -1160,7 +1164,8 @@ out:
 static int context_is_writeable_or_written(struct inode *inode,
                                           struct ceph_snap_context *snapc)
 {
-       struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
+       struct ceph_snap_context *oldest = get_oldest_context(inode, NULL,
+                                                             NULL, NULL);
        int ret = !oldest || snapc->seq <= oldest->seq;
 
        ceph_put_snap_context(oldest);
@@ -1205,7 +1210,7 @@ retry_locked:
                 * this page is already dirty in another (older) snap
                 * context!  is it writeable now?
                 */
-               oldest = get_oldest_context(inode, NULL);
+               oldest = get_oldest_context(inode, NULL, NULL, NULL);
 
                if (snapc->seq > oldest->seq) {
                        ceph_put_snap_context(oldest);
index 9ff5219d849e942c8f3a6a480d57daa52efeb8e0..8f8b41c2ef0f7d472afad0e1abcb4801ac11b221 100644 (file)
@@ -593,6 +593,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
        capsnap->atime = inode->i_atime;
        capsnap->ctime = inode->i_ctime;
        capsnap->time_warp_seq = ci->i_time_warp_seq;
+       capsnap->truncate_size = ci->i_truncate_size;
+       capsnap->truncate_seq = ci->i_truncate_seq;
        if (capsnap->dirty_pages) {
                dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
                     "still has %d dirty pages\n", inode, capsnap,
index b07f55e55f6026e39cda94a25a8c905fa00adc51..1b31ec6d61b86e5482217b99bc791cdaa46a49f0 100644 (file)
@@ -181,6 +181,8 @@ struct ceph_cap_snap {
        u64 size;
        struct timespec mtime, atime, ctime;
        u64 time_warp_seq;
+       u64 truncate_size;
+       u32 truncate_seq;
        int writing;   /* a sync write is still in progress */
        int dirty_pages;     /* dirty pages awaiting writeback */
        bool inline_data;