X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=fs%2Fceph%2Finode.c;h=ae6723e80c7158c79f89f8aa43de239e2f596c36;hb=7370e8a442bcb90d0072b5baa3265bbe4324dc18;hp=ab81652198c48e1e90a5545cb06089a6fa30da1a;hpb=088fecfbb2c8a3890918fc614a7e1e1ccdd16c83;p=mirror_ubuntu-bionic-kernel.git diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ab81652198c4..ae6723e80c71 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -33,9 +33,7 @@ static const struct inode_operations ceph_symlink_iops; -static void ceph_invalidate_work(struct work_struct *work); -static void ceph_writeback_work(struct work_struct *work); -static void ceph_vmtruncate_work(struct work_struct *work); +static void ceph_inode_work(struct work_struct *work); /* * find or create an inode, given the ceph ino number @@ -441,6 +439,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb) atomic64_set(&ci->i_complete_seq[1], 0); ci->i_symlink = NULL; + ci->i_max_bytes = 0; + ci->i_max_files = 0; + memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); @@ -494,7 +495,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_wrbuffer_ref = 0; ci->i_wrbuffer_ref_head = 0; atomic_set(&ci->i_filelock_ref, 0); - ci->i_shared_gen = 0; + atomic_set(&ci->i_shared_gen, 0); ci->i_rdcache_gen = 0; ci->i_rdcache_revoking = 0; @@ -506,10 +507,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_snap_realm_item); INIT_LIST_HEAD(&ci->i_snap_flush_item); - INIT_WORK(&ci->i_wb_work, ceph_writeback_work); - INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work); - - INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work); + INIT_WORK(&ci->i_work, ceph_inode_work); + ci->i_work_mask = 0; ceph_fscache_inode_init(ci); @@ -534,7 +533,7 @@ void ceph_destroy_inode(struct inode *inode) ceph_fscache_unregister_inode_cookie(ci); - ceph_queue_caps_release(inode); + __ceph_remove_caps(inode); /* * we may still have a snap_realm reference if there are stray @@ -660,13 +659,15 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_FILE_BUFFER| CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { - if (timespec_compare(ctime, &inode->i_ctime) > 0) { + if (ci->i_version == 0 || + timespec_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); inode->i_ctime = *ctime; } - if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { + if (ci->i_version == 0 || + ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ dout("mtime %ld.%09ld -> %ld.%09ld " "tw %d -> %d\n", @@ -786,10 +787,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page, new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ - ci->i_version = le64_to_cpu(info->version); inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + ci->i_max_bytes = iinfo->max_bytes; + ci->i_max_files = iinfo->max_files; + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); @@ -857,6 +860,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, xattr_blob = NULL; } + /* finally update i_version */ + ci->i_version = le64_to_cpu(info->version); + inode->i_mapping->a_ops = &ceph_aops; switch (inode->i_mode & S_IFMT) { @@ -1041,7 +1047,7 @@ static void update_dentry_lease(struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) goto out_unlock; - di->lease_shared_gen = ceph_inode(dir)->i_shared_gen; + di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); if (duration == 0) goto out_unlock; @@ -1080,6 +1086,27 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) BUG_ON(d_inode(dn)); + if (S_ISDIR(in->i_mode)) { + /* If inode is directory, d_splice_alias() below will remove + * 'realdn' from its origin parent. We need to ensure that + * origin parent's readdir cache will not reference 'realdn' + */ + realdn = d_find_any_alias(in); + if (realdn) { + struct ceph_dentry_info *di = ceph_dentry(realdn); + spin_lock(&realdn->d_lock); + + realdn->d_op->d_prune(realdn); + + di->time = jiffies; + di->lease_shared_gen = 0; + di->offset = 0; + + spin_unlock(&realdn->d_lock); + dput(realdn); + } + } + /* dn must be unhashed */ if (!d_unhashed(dn)) d_drop(dn); @@ -1295,8 +1322,8 @@ retry_lookup: if (!rinfo->head->is_target) { dout("fill_trace null dentry\n"); if (d_really_is_positive(dn)) { - ceph_dir_clear_ordered(dir); dout("d_delete %p\n", dn); + ceph_dir_clear_ordered(dir); d_delete(dn); } else if (have_lease) { if (d_unhashed(dn)) @@ -1323,7 +1350,6 @@ retry_lookup: dout(" %p links to %p %llx.%llx, not %llx.%llx\n", dn, d_inode(dn), ceph_vinop(d_inode(dn)), ceph_vinop(in)); - ceph_dir_clear_ordered(dir); d_invalidate(dn); have_lease = false; } @@ -1573,9 +1599,19 @@ retry_lookup: } else if (d_really_is_positive(dn) && (ceph_ino(d_inode(dn)) != tvino.ino || ceph_snap(d_inode(dn)) != tvino.snap)) { + struct ceph_dentry_info *di = ceph_dentry(dn); dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); - __ceph_dir_clear_ordered(ci); + + spin_lock(&dn->d_lock); + if (di->offset > 0 && + di->lease_shared_gen == + atomic_read(&ci->i_shared_gen)) { + __ceph_dir_clear_ordered(ci); + di->offset = 0; + } + spin_unlock(&dn->d_lock); + d_delete(dn); dput(dn); goto retry_lookup; @@ -1600,9 +1636,7 @@ retry_lookup: &req->r_caps_reservation); if (ret < 0) { pr_err("fill_inode badness on %p\n", in); - if (d_really_is_positive(dn)) - __ceph_dir_clear_ordered(ci); - else + if (d_really_is_negative(dn)) iput(in); d_drop(dn); err = ret; @@ -1678,51 +1712,62 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size) */ void ceph_queue_writeback(struct inode *inode) { + struct ceph_inode_info *ci = ceph_inode(inode); + set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask); + ihold(inode); - if (queue_work(ceph_inode_to_client(inode)->wb_wq, - &ceph_inode(inode)->i_wb_work)) { + if (queue_work(ceph_inode_to_client(inode)->inode_wq, + &ci->i_work)) { dout("ceph_queue_writeback %p\n", inode); } else { - dout("ceph_queue_writeback %p failed\n", inode); + dout("ceph_queue_writeback %p already queued, mask=%lx\n", + inode, ci->i_work_mask); iput(inode); } } -static void ceph_writeback_work(struct work_struct *work) -{ - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, - i_wb_work); - struct inode *inode = &ci->vfs_inode; - - dout("writeback %p\n", inode); - filemap_fdatawrite(&inode->i_data); - iput(inode); -} - /* * queue an async invalidation */ void ceph_queue_invalidate(struct inode *inode) { + struct ceph_inode_info *ci = ceph_inode(inode); + set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask); + ihold(inode); - if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, - &ceph_inode(inode)->i_pg_inv_work)) { + if (queue_work(ceph_inode_to_client(inode)->inode_wq, + &ceph_inode(inode)->i_work)) { dout("ceph_queue_invalidate %p\n", inode); } else { - dout("ceph_queue_invalidate %p failed\n", inode); + dout("ceph_queue_invalidate %p already queued, mask=%lx\n", + inode, ci->i_work_mask); iput(inode); } } /* - * Invalidate inode pages in a worker thread. (This can't be done - * in the message handler context.) + * Queue an async vmtruncate. If we fail to queue work, we will handle + * the truncation the next time we call __ceph_do_pending_vmtruncate. */ -static void ceph_invalidate_work(struct work_struct *work) +void ceph_queue_vmtruncate(struct inode *inode) { - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, - i_pg_inv_work); - struct inode *inode = &ci->vfs_inode; + struct ceph_inode_info *ci = ceph_inode(inode); + set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask); + + ihold(inode); + if (queue_work(ceph_inode_to_client(inode)->inode_wq, + &ci->i_work)) { + dout("ceph_queue_vmtruncate %p\n", inode); + } else { + dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n", + inode, ci->i_work_mask); + iput(inode); + } +} + +static void ceph_do_invalidate_pages(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); u32 orig_gen; int check = 0; @@ -1774,44 +1819,6 @@ static void ceph_invalidate_work(struct work_struct *work) out: if (check) ceph_check_caps(ci, 0, NULL); - iput(inode); -} - - -/* - * called by trunc_wq; - * - * We also truncate in a separate thread as well. - */ -static void ceph_vmtruncate_work(struct work_struct *work) -{ - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, - i_vmtruncate_work); - struct inode *inode = &ci->vfs_inode; - - dout("vmtruncate_work %p\n", inode); - __ceph_do_pending_vmtruncate(inode); - iput(inode); -} - -/* - * Queue an async vmtruncate. If we fail to queue work, we will handle - * the truncation the next time we call __ceph_do_pending_vmtruncate. - */ -void ceph_queue_vmtruncate(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - - ihold(inode); - - if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, - &ci->i_vmtruncate_work)) { - dout("ceph_queue_vmtruncate %p\n", inode); - } else { - dout("ceph_queue_vmtruncate %p failed, pending=%d\n", - inode, ci->i_truncate_pending); - iput(inode); - } } /* @@ -1886,6 +1893,25 @@ retry: wake_up_all(&ci->i_cap_wq); } +static void ceph_inode_work(struct work_struct *work) +{ + struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, + i_work); + struct inode *inode = &ci->vfs_inode; + + if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) { + dout("writeback %p\n", inode); + filemap_fdatawrite(&inode->i_data); + } + if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask)) + ceph_do_invalidate_pages(inode); + + if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask)) + __ceph_do_pending_vmtruncate(inode); + + iput(inode); +} + /* * symlinks */