]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 5 May 2014 22:17:02 +0000 (15:17 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 5 May 2014 22:17:02 +0000 (15:17 -0700)
Pull Ceph fixes from Sage Weil:
 "First, there is a critical fix for the new primary-affinity function
  that went into -rc1.

  The second batch of patches from Zheng fix a range of problems with
  directory fragmentation, readdir, and a few odds and ends for cephfs"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  ceph: reserve caps for file layout/lock MDS requests
  ceph: avoid releasing caps that are being used
  ceph: clear directory's completeness when creating file
  libceph: fix non-default values check in apply_primary_affinity()
  ceph: use fpos_cmp() to compare dentry positions
  ceph: check directory's completeness before emitting directory entry

fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/inode.c
fs/ceph/ioctl.c
fs/ceph/locks.c
fs/ceph/super.h
net/ceph/osdmap.c

index 2e5e648eb5c3dc3bd82bea5ce8dead051864cf75..c561b628ebce519d111d159f541b9df88242a5b1 100644 (file)
@@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
                        rel->seq = cpu_to_le32(cap->seq);
                        rel->issue_seq = cpu_to_le32(cap->issue_seq),
                        rel->mseq = cpu_to_le32(cap->mseq);
-                       rel->caps = cpu_to_le32(cap->issued);
+                       rel->caps = cpu_to_le32(cap->implemented);
                        rel->wanted = cpu_to_le32(cap->mds_wanted);
                        rel->dname_len = 0;
                        rel->dname_seq = 0;
index 766410a12c2cb209a224fcfd97f63a055fd20801..c29d6ae6887489c29902bec33c4118d4d807e9dc 100644 (file)
@@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 
        /* start at beginning? */
        if (ctx->pos == 2 || last == NULL ||
-           ctx->pos < ceph_dentry(last)->offset) {
+           fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
                if (list_empty(&parent->d_subdirs))
                        goto out_unlock;
                p = parent->d_subdirs.prev;
@@ -182,9 +182,16 @@ more:
        spin_unlock(&dentry->d_lock);
        spin_unlock(&parent->d_lock);
 
+       /* make sure a dentry wasn't dropped while we didn't have parent lock */
+       if (!ceph_dir_is_complete(dir)) {
+               dout(" lost dir complete on %p; falling back to mds\n", dir);
+               dput(dentry);
+               err = -EAGAIN;
+               goto out;
+       }
+
        dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
             dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
-       ctx->pos = di->offset;
        if (!dir_emit(ctx, dentry->d_name.name,
                      dentry->d_name.len,
                      ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
@@ -198,19 +205,12 @@ more:
                return 0;
        }
 
+       ctx->pos = di->offset + 1;
+
        if (last)
                dput(last);
        last = dentry;
 
-       ctx->pos++;
-
-       /* make sure a dentry wasn't dropped while we didn't have parent lock */
-       if (!ceph_dir_is_complete(dir)) {
-               dout(" lost dir complete on %p; falling back to mds\n", dir);
-               err = -EAGAIN;
-               goto out;
-       }
-
        spin_lock(&parent->d_lock);
        p = p->prev;    /* advance to next dentry */
        goto more;
@@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
                err = __dcache_readdir(file, ctx, shared_gen);
                if (err != -EAGAIN)
                        return err;
+               frag = fpos_frag(ctx->pos);
+               off = fpos_off(ctx->pos);
        } else {
                spin_unlock(&ci->i_ceph_lock);
        }
@@ -446,7 +448,6 @@ more:
        if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
                dout(" marking %p complete\n", inode);
                __ceph_dir_set_complete(ci, fi->dir_release_count);
-               ci->i_max_offset = ctx->pos;
        }
        spin_unlock(&ci->i_ceph_lock);
 
@@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
                 * to do it here.
                 */
 
-               /* d_move screws up d_subdirs order */
-               ceph_dir_clear_complete(new_dir);
-
                d_move(old_dentry, new_dentry);
 
                /* ensure target dentry is invalidated, despite
                   rehashing bug in vfs_rename_dir */
                ceph_invalidate_dentry_lease(new_dentry);
+
+               /* d_move screws up sibling dentries' offsets */
+               ceph_dir_clear_complete(old_dir);
+               ceph_dir_clear_complete(new_dir);
+
        }
        ceph_mdsc_put_request(req);
        return err;
index 0b0728e5be2d7cba589a935159b88f9d26f0b2e9..233c6f96910abc78d2b120e4e30a44ba0009b88a 100644 (file)
@@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode,
            !__ceph_dir_is_complete(ci)) {
                dout(" marking %p complete (empty)\n", inode);
                __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
-               ci->i_max_offset = 2;
        }
 no_change:
        /* only update max_size on auth cap */
@@ -889,41 +888,6 @@ out_unlock:
        return;
 }
 
-/*
- * Set dentry's directory position based on the current dir's max, and
- * order it in d_subdirs, so that dcache_readdir behaves.
- *
- * Always called under directory's i_mutex.
- */
-static void ceph_set_dentry_offset(struct dentry *dn)
-{
-       struct dentry *dir = dn->d_parent;
-       struct inode *inode = dir->d_inode;
-       struct ceph_inode_info *ci;
-       struct ceph_dentry_info *di;
-
-       BUG_ON(!inode);
-
-       ci = ceph_inode(inode);
-       di = ceph_dentry(dn);
-
-       spin_lock(&ci->i_ceph_lock);
-       if (!__ceph_dir_is_complete(ci)) {
-               spin_unlock(&ci->i_ceph_lock);
-               return;
-       }
-       di->offset = ceph_inode(inode)->i_max_offset++;
-       spin_unlock(&ci->i_ceph_lock);
-
-       spin_lock(&dir->d_lock);
-       spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
-       list_move(&dn->d_u.d_child, &dir->d_subdirs);
-       dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
-            dn->d_u.d_child.prev, dn->d_u.d_child.next);
-       spin_unlock(&dn->d_lock);
-       spin_unlock(&dir->d_lock);
-}
-
 /*
  * splice a dentry to an inode.
  * caller must hold directory i_mutex for this to be safe.
@@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
  * the caller) if we fail.
  */
 static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
-                                   bool *prehash, bool set_offset)
+                                   bool *prehash)
 {
        struct dentry *realdn;
 
@@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
        }
        if ((!prehash || *prehash) && d_unhashed(dn))
                d_rehash(dn);
-       if (set_offset)
-               ceph_set_dentry_offset(dn);
 out:
        return dn;
 }
@@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 {
        struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
        struct inode *in = NULL;
-       struct ceph_mds_reply_inode *ininfo;
        struct ceph_vino vino;
        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
        int err = 0;
@@ -1161,6 +1122,9 @@ retry_lookup:
 
                /* rename? */
                if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
+                       struct inode *olddir = req->r_old_dentry_dir;
+                       BUG_ON(!olddir);
+
                        dout(" src %p '%.*s' dst %p '%.*s'\n",
                             req->r_old_dentry,
                             req->r_old_dentry->d_name.len,
@@ -1180,13 +1144,10 @@ retry_lookup:
                           rehashing bug in vfs_rename_dir */
                        ceph_invalidate_dentry_lease(dn);
 
-                       /*
-                        * d_move() puts the renamed dentry at the end of
-                        * d_subdirs.  We need to assign it an appropriate
-                        * directory offset so we can behave when dir is
-                        * complete.
-                        */
-                       ceph_set_dentry_offset(req->r_old_dentry);
+                       /* d_move screws up sibling dentries' offsets */
+                       ceph_dir_clear_complete(dir);
+                       ceph_dir_clear_complete(olddir);
+
                        dout("dn %p gets new offset %lld\n", req->r_old_dentry,
                             ceph_dentry(req->r_old_dentry)->offset);
 
@@ -1213,8 +1174,9 @@ retry_lookup:
 
                /* attach proper inode */
                if (!dn->d_inode) {
+                       ceph_dir_clear_complete(dir);
                        ihold(in);
-                       dn = splice_dentry(dn, in, &have_lease, true);
+                       dn = splice_dentry(dn, in, &have_lease);
                        if (IS_ERR(dn)) {
                                err = PTR_ERR(dn);
                                goto done;
@@ -1235,17 +1197,16 @@ retry_lookup:
                   (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
                    req->r_op == CEPH_MDS_OP_MKSNAP)) {
                struct dentry *dn = req->r_dentry;
+               struct inode *dir = req->r_locked_dir;
 
                /* fill out a snapdir LOOKUPSNAP dentry */
                BUG_ON(!dn);
-               BUG_ON(!req->r_locked_dir);
-               BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR);
-               ininfo = rinfo->targeti.in;
-               vino.ino = le64_to_cpu(ininfo->ino);
-               vino.snap = le64_to_cpu(ininfo->snapid);
+               BUG_ON(!dir);
+               BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
                dout(" linking snapped dir %p to dn %p\n", in, dn);
+               ceph_dir_clear_complete(dir);
                ihold(in);
-               dn = splice_dentry(dn, in, NULL, true);
+               dn = splice_dentry(dn, in, NULL);
                if (IS_ERR(dn)) {
                        err = PTR_ERR(dn);
                        goto done;
@@ -1407,7 +1368,7 @@ retry_lookup:
                }
 
                if (!dn->d_inode) {
-                       dn = splice_dentry(dn, in, NULL, false);
+                       dn = splice_dentry(dn, in, NULL);
                        if (IS_ERR(dn)) {
                                err = PTR_ERR(dn);
                                dn = NULL;
index fdf941b44ff103a2590a3804aa850e468ec980d6..a822a6e58290bbedfb0e363bf3bbb601075891fb 100644 (file)
@@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
                return PTR_ERR(req);
        req->r_inode = inode;
        ihold(inode);
+       req->r_num_caps = 1;
+
        req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
 
        req->r_args.setlayout.layout.fl_stripe_unit =
@@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
                return PTR_ERR(req);
        req->r_inode = inode;
        ihold(inode);
+       req->r_num_caps = 1;
 
        req->r_args.setlayout.layout.fl_stripe_unit =
                        cpu_to_le32(l.stripe_unit);
index d94ba0df9f4d195cabf677fcdcd41cc01096c7e7..191398852a2e8927b1ac193b53ceea623829caf1 100644 (file)
@@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
                return PTR_ERR(req);
        req->r_inode = inode;
        ihold(inode);
+       req->r_num_caps = 1;
 
        /* mds requires start and length rather than start and end */
        if (LLONG_MAX == fl->fl_end)
index 7866cd05a6bbee4afd2478f3737d0ccd8ac28975..ead05cc1f447562271578131ab25769257080915 100644 (file)
@@ -266,7 +266,6 @@ struct ceph_inode_info {
        struct timespec i_rctime;
        u64 i_rbytes, i_rfiles, i_rsubdirs;
        u64 i_files, i_subdirs;
-       u64 i_max_offset;  /* largest readdir offset, set with complete dir */
 
        struct rb_root i_fragtree;
        struct mutex i_fragtree_mutex;
index e632b5a52f5b89cb2e275b64494905cc7ebfc8e7..8b8a5a24b223ef268c28cf5e5ac5379314bba237 100644 (file)
@@ -1548,8 +1548,10 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
                return;
 
        for (i = 0; i < len; i++) {
-               if (osds[i] != CRUSH_ITEM_NONE &&
-                   osdmap->osd_primary_affinity[i] !=
+               int osd = osds[i];
+
+               if (osd != CRUSH_ITEM_NONE &&
+                   osdmap->osd_primary_affinity[osd] !=
                                        CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
                        break;
                }
@@ -1563,10 +1565,9 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
         * osd's pgs get rejected as primary.
         */
        for (i = 0; i < len; i++) {
-               int osd;
+               int osd = osds[i];
                u32 aff;
 
-               osd = osds[i];
                if (osd == CRUSH_ITEM_NONE)
                        continue;