]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/ceph/inode.c
ceph: avoid dereferencing invalid pointer during cached readdir
[mirror_ubuntu-bionic-kernel.git] / fs / ceph / inode.c
index 9e1bb79cc5c052b65280af65056a4ab201cf8457..63fcb9496160f634030e772d1ce685c3463535b6 100644 (file)
@@ -33,9 +33,7 @@
 
 static const struct inode_operations ceph_symlink_iops;
 
-static void ceph_invalidate_work(struct work_struct *work);
-static void ceph_writeback_work(struct work_struct *work);
-static void ceph_vmtruncate_work(struct work_struct *work);
+static void ceph_inode_work(struct work_struct *work);
 
 /*
  * find or create an inode, given the ceph ino number
@@ -506,10 +504,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ci->i_snap_realm_item);
        INIT_LIST_HEAD(&ci->i_snap_flush_item);
 
-       INIT_WORK(&ci->i_wb_work, ceph_writeback_work);
-       INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work);
-
-       INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
+       INIT_WORK(&ci->i_work, ceph_inode_work);
+       ci->i_work_mask = 0;
 
        ceph_fscache_inode_init(ci);
 
@@ -1084,6 +1080,27 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
 
        BUG_ON(d_inode(dn));
 
+       if (S_ISDIR(in->i_mode)) {
+               /* If inode is directory, d_splice_alias() below will remove
+                * 'realdn' from its origin parent. We need to ensure that
+                * origin parent's readdir cache will not reference 'realdn'
+                */
+               realdn = d_find_any_alias(in);
+               if (realdn) {
+                       struct ceph_dentry_info *di = ceph_dentry(realdn);
+                       spin_lock(&realdn->d_lock);
+
+                       realdn->d_op->d_prune(realdn);
+
+                       di->time = jiffies;
+                       di->lease_shared_gen = 0;
+                       di->offset = 0;
+
+                       spin_unlock(&realdn->d_lock);
+                       dput(realdn);
+               }
+       }
+
        /* dn must be unhashed */
        if (!d_unhashed(dn))
                d_drop(dn);
@@ -1299,8 +1316,8 @@ retry_lookup:
                if (!rinfo->head->is_target) {
                        dout("fill_trace null dentry\n");
                        if (d_really_is_positive(dn)) {
-                               ceph_dir_clear_ordered(dir);
                                dout("d_delete %p\n", dn);
+                               ceph_dir_clear_ordered(dir);
                                d_delete(dn);
                        } else if (have_lease) {
                                if (d_unhashed(dn))
@@ -1327,7 +1344,6 @@ retry_lookup:
                        dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
                             dn, d_inode(dn), ceph_vinop(d_inode(dn)),
                             ceph_vinop(in));
-                       ceph_dir_clear_ordered(dir);
                        d_invalidate(dn);
                        have_lease = false;
                }
@@ -1577,9 +1593,19 @@ retry_lookup:
                } else if (d_really_is_positive(dn) &&
                           (ceph_ino(d_inode(dn)) != tvino.ino ||
                            ceph_snap(d_inode(dn)) != tvino.snap)) {
+                       struct ceph_dentry_info *di = ceph_dentry(dn);
                        dout(" dn %p points to wrong inode %p\n",
                             dn, d_inode(dn));
-                       __ceph_dir_clear_ordered(ci);
+
+                       spin_lock(&dn->d_lock);
+                       if (di->offset > 0 &&
+                           di->lease_shared_gen ==
+                           atomic_read(&ci->i_shared_gen)) {
+                               __ceph_dir_clear_ordered(ci);
+                               di->offset = 0;
+                       }
+                       spin_unlock(&dn->d_lock);
+
                        d_delete(dn);
                        dput(dn);
                        goto retry_lookup;
@@ -1604,9 +1630,7 @@ retry_lookup:
                                 &req->r_caps_reservation);
                if (ret < 0) {
                        pr_err("fill_inode badness on %p\n", in);
-                       if (d_really_is_positive(dn))
-                               __ceph_dir_clear_ordered(ci);
-                       else
+                       if (d_really_is_negative(dn))
                                iput(in);
                        d_drop(dn);
                        err = ret;
@@ -1682,51 +1706,62 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
  */
 void ceph_queue_writeback(struct inode *inode)
 {
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask);
+
        ihold(inode);
-       if (queue_work(ceph_inode_to_client(inode)->wb_wq,
-                      &ceph_inode(inode)->i_wb_work)) {
+       if (queue_work(ceph_inode_to_client(inode)->inode_wq,
+                      &ci->i_work)) {
                dout("ceph_queue_writeback %p\n", inode);
        } else {
-               dout("ceph_queue_writeback %p failed\n", inode);
+               dout("ceph_queue_writeback %p already queued, mask=%lx\n",
+                    inode, ci->i_work_mask);
                iput(inode);
        }
 }
 
-static void ceph_writeback_work(struct work_struct *work)
-{
-       struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-                                                 i_wb_work);
-       struct inode *inode = &ci->vfs_inode;
-
-       dout("writeback %p\n", inode);
-       filemap_fdatawrite(&inode->i_data);
-       iput(inode);
-}
-
 /*
  * queue an async invalidation
  */
 void ceph_queue_invalidate(struct inode *inode)
 {
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask);
+
        ihold(inode);
-       if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
-                      &ceph_inode(inode)->i_pg_inv_work)) {
+       if (queue_work(ceph_inode_to_client(inode)->inode_wq,
+                      &ceph_inode(inode)->i_work)) {
                dout("ceph_queue_invalidate %p\n", inode);
        } else {
-               dout("ceph_queue_invalidate %p failed\n", inode);
+               dout("ceph_queue_invalidate %p already queued, mask=%lx\n",
+                    inode, ci->i_work_mask);
                iput(inode);
        }
 }
 
 /*
- * Invalidate inode pages in a worker thread.  (This can't be done
- * in the message handler context.)
+ * Queue an async vmtruncate.  If we fail to queue work, we will handle
+ * the truncation the next time we call __ceph_do_pending_vmtruncate.
  */
-static void ceph_invalidate_work(struct work_struct *work)
+void ceph_queue_vmtruncate(struct inode *inode)
 {
-       struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-                                                 i_pg_inv_work);
-       struct inode *inode = &ci->vfs_inode;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask);
+
+       ihold(inode);
+       if (queue_work(ceph_inode_to_client(inode)->inode_wq,
+                      &ci->i_work)) {
+               dout("ceph_queue_vmtruncate %p\n", inode);
+       } else {
+               dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n",
+                    inode, ci->i_work_mask);
+               iput(inode);
+       }
+}
+
+static void ceph_do_invalidate_pages(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        u32 orig_gen;
        int check = 0;
@@ -1778,44 +1813,6 @@ static void ceph_invalidate_work(struct work_struct *work)
 out:
        if (check)
                ceph_check_caps(ci, 0, NULL);
-       iput(inode);
-}
-
-
-/*
- * called by trunc_wq;
- *
- * We also truncate in a separate thread as well.
- */
-static void ceph_vmtruncate_work(struct work_struct *work)
-{
-       struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-                                                 i_vmtruncate_work);
-       struct inode *inode = &ci->vfs_inode;
-
-       dout("vmtruncate_work %p\n", inode);
-       __ceph_do_pending_vmtruncate(inode);
-       iput(inode);
-}
-
-/*
- * Queue an async vmtruncate.  If we fail to queue work, we will handle
- * the truncation the next time we call __ceph_do_pending_vmtruncate.
- */
-void ceph_queue_vmtruncate(struct inode *inode)
-{
-       struct ceph_inode_info *ci = ceph_inode(inode);
-
-       ihold(inode);
-
-       if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
-                      &ci->i_vmtruncate_work)) {
-               dout("ceph_queue_vmtruncate %p\n", inode);
-       } else {
-               dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
-                    inode, ci->i_truncate_pending);
-               iput(inode);
-       }
 }
 
 /*
@@ -1890,6 +1887,25 @@ retry:
        wake_up_all(&ci->i_cap_wq);
 }
 
+static void ceph_inode_work(struct work_struct *work)
+{
+       struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+                                                i_work);
+       struct inode *inode = &ci->vfs_inode;
+
+       if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) {
+               dout("writeback %p\n", inode);
+               filemap_fdatawrite(&inode->i_data);
+       }
+       if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask))
+               ceph_do_invalidate_pages(inode);
+
+       if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask))
+               __ceph_do_pending_vmtruncate(inode);
+
+       iput(inode);
+}
+
 /*
  * symlinks
  */