]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/ceph/inode.c
ceph: quota: support for ceph.quota.max_bytes
[mirror_ubuntu-bionic-kernel.git] / fs / ceph / inode.c
index ab81652198c48e1e90a5545cb06089a6fa30da1a..80aad584e2a0a462501ba8161fd00d77c9b5ff77 100644 (file)
@@ -33,9 +33,7 @@
 
 static const struct inode_operations ceph_symlink_iops;
 
-static void ceph_invalidate_work(struct work_struct *work);
-static void ceph_writeback_work(struct work_struct *work);
-static void ceph_vmtruncate_work(struct work_struct *work);
+static void ceph_inode_work(struct work_struct *work);
 
 /*
  * find or create an inode, given the ceph ino number
@@ -441,6 +439,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        atomic64_set(&ci->i_complete_seq[1], 0);
        ci->i_symlink = NULL;
 
+       ci->i_max_bytes = 0;
+       ci->i_max_files = 0;
+
        memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
        RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL);
 
@@ -494,7 +495,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_wrbuffer_ref = 0;
        ci->i_wrbuffer_ref_head = 0;
        atomic_set(&ci->i_filelock_ref, 0);
-       ci->i_shared_gen = 0;
+       atomic_set(&ci->i_shared_gen, 0);
        ci->i_rdcache_gen = 0;
        ci->i_rdcache_revoking = 0;
 
@@ -506,10 +507,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ci->i_snap_realm_item);
        INIT_LIST_HEAD(&ci->i_snap_flush_item);
 
-       INIT_WORK(&ci->i_wb_work, ceph_writeback_work);
-       INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work);
-
-       INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
+       INIT_WORK(&ci->i_work, ceph_inode_work);
+       ci->i_work_mask = 0;
 
        ceph_fscache_inode_init(ci);
 
@@ -534,7 +533,7 @@ void ceph_destroy_inode(struct inode *inode)
 
        ceph_fscache_unregister_inode_cookie(ci);
 
-       ceph_queue_caps_release(inode);
+       __ceph_remove_caps(inode);
 
        /*
         * we may still have a snap_realm reference if there are stray
@@ -660,13 +659,15 @@ void ceph_fill_file_time(struct inode *inode, int issued,
                      CEPH_CAP_FILE_BUFFER|
                      CEPH_CAP_AUTH_EXCL|
                      CEPH_CAP_XATTR_EXCL)) {
-               if (timespec_compare(ctime, &inode->i_ctime) > 0) {
+               if (ci->i_version == 0 ||
+                   timespec_compare(ctime, &inode->i_ctime) > 0) {
                        dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
                             inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
                             ctime->tv_sec, ctime->tv_nsec);
                        inode->i_ctime = *ctime;
                }
-               if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
+               if (ci->i_version == 0 ||
+                   ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
                        /* the MDS did a utimes() */
                        dout("mtime %ld.%09ld -> %ld.%09ld "
                             "tw %d -> %d\n",
@@ -786,10 +787,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
        new_issued = ~issued & le32_to_cpu(info->cap.caps);
 
        /* update inode */
-       ci->i_version = le64_to_cpu(info->version);
        inode->i_rdev = le32_to_cpu(info->rdev);
        inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
+       ci->i_max_bytes = iinfo->max_bytes;
+       ci->i_max_files = iinfo->max_files;
+
        if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
            (issued & CEPH_CAP_AUTH_EXCL) == 0) {
                inode->i_mode = le32_to_cpu(info->mode);
@@ -857,6 +860,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
                xattr_blob = NULL;
        }
 
+       /* finally update i_version */
+       ci->i_version = le64_to_cpu(info->version);
+
        inode->i_mapping->a_ops = &ceph_aops;
 
        switch (inode->i_mode & S_IFMT) {
@@ -1041,7 +1047,7 @@ static void update_dentry_lease(struct dentry *dentry,
        if (ceph_snap(dir) != CEPH_NOSNAP)
                goto out_unlock;
 
-       di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
+       di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen);
 
        if (duration == 0)
                goto out_unlock;
@@ -1080,6 +1086,27 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
 
        BUG_ON(d_inode(dn));
 
+       if (S_ISDIR(in->i_mode)) {
+               /* If inode is directory, d_splice_alias() below will remove
+                * 'realdn' from its origin parent. We need to ensure that
+                * origin parent's readdir cache will not reference 'realdn'
+                */
+               realdn = d_find_any_alias(in);
+               if (realdn) {
+                       struct ceph_dentry_info *di = ceph_dentry(realdn);
+                       spin_lock(&realdn->d_lock);
+
+                       realdn->d_op->d_prune(realdn);
+
+                       di->time = jiffies;
+                       di->lease_shared_gen = 0;
+                       di->offset = 0;
+
+                       spin_unlock(&realdn->d_lock);
+                       dput(realdn);
+               }
+       }
+
        /* dn must be unhashed */
        if (!d_unhashed(dn))
                d_drop(dn);
@@ -1295,8 +1322,8 @@ retry_lookup:
                if (!rinfo->head->is_target) {
                        dout("fill_trace null dentry\n");
                        if (d_really_is_positive(dn)) {
-                               ceph_dir_clear_ordered(dir);
                                dout("d_delete %p\n", dn);
+                               ceph_dir_clear_ordered(dir);
                                d_delete(dn);
                        } else if (have_lease) {
                                if (d_unhashed(dn))
@@ -1323,7 +1350,6 @@ retry_lookup:
                        dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
                             dn, d_inode(dn), ceph_vinop(d_inode(dn)),
                             ceph_vinop(in));
-                       ceph_dir_clear_ordered(dir);
                        d_invalidate(dn);
                        have_lease = false;
                }
@@ -1573,9 +1599,19 @@ retry_lookup:
                } else if (d_really_is_positive(dn) &&
                           (ceph_ino(d_inode(dn)) != tvino.ino ||
                            ceph_snap(d_inode(dn)) != tvino.snap)) {
+                       struct ceph_dentry_info *di = ceph_dentry(dn);
                        dout(" dn %p points to wrong inode %p\n",
                             dn, d_inode(dn));
-                       __ceph_dir_clear_ordered(ci);
+
+                       spin_lock(&dn->d_lock);
+                       if (di->offset > 0 &&
+                           di->lease_shared_gen ==
+                           atomic_read(&ci->i_shared_gen)) {
+                               __ceph_dir_clear_ordered(ci);
+                               di->offset = 0;
+                       }
+                       spin_unlock(&dn->d_lock);
+
                        d_delete(dn);
                        dput(dn);
                        goto retry_lookup;
@@ -1600,9 +1636,7 @@ retry_lookup:
                                 &req->r_caps_reservation);
                if (ret < 0) {
                        pr_err("fill_inode badness on %p\n", in);
-                       if (d_really_is_positive(dn))
-                               __ceph_dir_clear_ordered(ci);
-                       else
+                       if (d_really_is_negative(dn))
                                iput(in);
                        d_drop(dn);
                        err = ret;
@@ -1678,51 +1712,62 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
  */
 void ceph_queue_writeback(struct inode *inode)
 {
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask);
+
        ihold(inode);
-       if (queue_work(ceph_inode_to_client(inode)->wb_wq,
-                      &ceph_inode(inode)->i_wb_work)) {
+       if (queue_work(ceph_inode_to_client(inode)->inode_wq,
+                      &ci->i_work)) {
                dout("ceph_queue_writeback %p\n", inode);
        } else {
-               dout("ceph_queue_writeback %p failed\n", inode);
+               dout("ceph_queue_writeback %p already queued, mask=%lx\n",
+                    inode, ci->i_work_mask);
                iput(inode);
        }
 }
 
-static void ceph_writeback_work(struct work_struct *work)
-{
-       struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-                                                 i_wb_work);
-       struct inode *inode = &ci->vfs_inode;
-
-       dout("writeback %p\n", inode);
-       filemap_fdatawrite(&inode->i_data);
-       iput(inode);
-}
-
 /*
  * queue an async invalidation
  */
 void ceph_queue_invalidate(struct inode *inode)
 {
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask);
+
        ihold(inode);
-       if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
-                      &ceph_inode(inode)->i_pg_inv_work)) {
+       if (queue_work(ceph_inode_to_client(inode)->inode_wq,
+                      &ceph_inode(inode)->i_work)) {
                dout("ceph_queue_invalidate %p\n", inode);
        } else {
-               dout("ceph_queue_invalidate %p failed\n", inode);
+               dout("ceph_queue_invalidate %p already queued, mask=%lx\n",
+                    inode, ci->i_work_mask);
                iput(inode);
        }
 }
 
 /*
- * Invalidate inode pages in a worker thread.  (This can't be done
- * in the message handler context.)
+ * Queue an async vmtruncate.  If we fail to queue work, we will handle
+ * the truncation the next time we call __ceph_do_pending_vmtruncate.
  */
-static void ceph_invalidate_work(struct work_struct *work)
+void ceph_queue_vmtruncate(struct inode *inode)
 {
-       struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-                                                 i_pg_inv_work);
-       struct inode *inode = &ci->vfs_inode;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask);
+
+       ihold(inode);
+       if (queue_work(ceph_inode_to_client(inode)->inode_wq,
+                      &ci->i_work)) {
+               dout("ceph_queue_vmtruncate %p\n", inode);
+       } else {
+               dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n",
+                    inode, ci->i_work_mask);
+               iput(inode);
+       }
+}
+
+static void ceph_do_invalidate_pages(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        u32 orig_gen;
        int check = 0;
@@ -1774,44 +1819,6 @@ static void ceph_invalidate_work(struct work_struct *work)
 out:
        if (check)
                ceph_check_caps(ci, 0, NULL);
-       iput(inode);
-}
-
-
-/*
- * called by trunc_wq;
- *
- * We also truncate in a separate thread as well.
- */
-static void ceph_vmtruncate_work(struct work_struct *work)
-{
-       struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-                                                 i_vmtruncate_work);
-       struct inode *inode = &ci->vfs_inode;
-
-       dout("vmtruncate_work %p\n", inode);
-       __ceph_do_pending_vmtruncate(inode);
-       iput(inode);
-}
-
-/*
- * Queue an async vmtruncate.  If we fail to queue work, we will handle
- * the truncation the next time we call __ceph_do_pending_vmtruncate.
- */
-void ceph_queue_vmtruncate(struct inode *inode)
-{
-       struct ceph_inode_info *ci = ceph_inode(inode);
-
-       ihold(inode);
-
-       if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
-                      &ci->i_vmtruncate_work)) {
-               dout("ceph_queue_vmtruncate %p\n", inode);
-       } else {
-               dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
-                    inode, ci->i_truncate_pending);
-               iput(inode);
-       }
 }
 
 /*
@@ -1886,6 +1893,25 @@ retry:
        wake_up_all(&ci->i_cap_wq);
 }
 
+static void ceph_inode_work(struct work_struct *work)
+{
+       struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+                                                i_work);
+       struct inode *inode = &ci->vfs_inode;
+
+       if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) {
+               dout("writeback %p\n", inode);
+               filemap_fdatawrite(&inode->i_data);
+       }
+       if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask))
+               ceph_do_invalidate_pages(inode);
+
+       if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask))
+               __ceph_do_pending_vmtruncate(inode);
+
+       iput(inode);
+}
+
 /*
  * symlinks
  */
@@ -2124,6 +2150,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        if (err != 0)
                return err;
 
+       if ((attr->ia_valid & ATTR_SIZE) &&
+           ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
+               return -EDQUOT;
+
        err = __ceph_setattr(inode, attr);
 
        if (err >= 0 && (attr->ia_valid & ATTR_MODE))