]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
ceph: make seeky readdir more efficient
authorYan, Zheng <zyan@redhat.com>
Wed, 5 Apr 2017 16:54:05 +0000 (12:54 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Thu, 4 May 2017 07:19:20 +0000 (09:19 +0200)
Current cephfs client uses string to indicate start position of
readdir. The string is last entry of previous readdir reply.
This approach does not work for seeky readdir because we can
not easily convert the new postion to a string. For seeky readdir,
mds needs to return dentries from the beginning. Client keeps
retrying if the reply does not contain the dentry it wants.

In current version of ceph, mds sorts CDentry in its cache in
hash order. Client also uses dentry hash to compose dir postion.
For seeky readdir, if client passes the hash part of dir postion
to mds. mds can avoid replying useless dentries.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/dir.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
include/linux/ceph/ceph_fs.h

index 3e9ad501addfe92f171a40dffb93c65209819cbe..ae61cdf7d4891530d6a05cde73825f3b6b9b6481 100644 (file)
@@ -378,7 +378,11 @@ more:
                                ceph_mdsc_put_request(req);
                                return -ENOMEM;
                        }
+               } else if (is_hash_order(ctx->pos)) {
+                       req->r_args.readdir.offset_hash =
+                               cpu_to_le32(fpos_hash(ctx->pos));
                }
+
                req->r_dir_release_cnt = fi->dir_release_count;
                req->r_dir_ordered_cnt = fi->dir_ordered_count;
                req->r_readdir_cache_idx = fi->readdir_cache_idx;
index d3119fe3ab45fdbdb534651ef68194815dcc544b..dcce79b844064447af8e542fe34188a4f58e22d9 100644 (file)
@@ -1482,10 +1482,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
        if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
                return readdir_prepopulate_inodes_only(req, session);
 
-       if (rinfo->hash_order && req->r_path2) {
-               last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
-                                         req->r_path2, strlen(req->r_path2));
-               last_hash = ceph_frag_value(last_hash);
+       if (rinfo->hash_order) {
+               if (req->r_path2) {
+                       last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
+                                                 req->r_path2,
+                                                 strlen(req->r_path2));
+                       last_hash = ceph_frag_value(last_hash);
+               } else if (rinfo->offset_hash) {
+                       /* mds understands offset_hash */
+                       WARN_ON_ONCE(req->r_readdir_offset != 2);
+                       last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
+               }
        }
 
        if (rinfo->dir_dir &&
@@ -1510,7 +1517,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
        }
 
        if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
-           !(rinfo->hash_order && req->r_path2)) {
+           !(rinfo->hash_order && last_hash)) {
                /* note dir version at start of readdir so we can tell
                 * if any dentries get dropped */
                req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
index a22688873ec3e64668fe63709e44f34ad9d44cff..8cc4d4e8b0773f8c09e75df3f5eea34360e5df20 100644 (file)
@@ -189,6 +189,7 @@ static int parse_reply_info_dir(void **p, void *end,
                info->dir_end = !!(flags & CEPH_READDIR_FRAG_END);
                info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE);
                info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER);
+               info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH);
        }
        if (num == 0)
                goto done;
index bbebcd55d79e89dd86e62dffa6ada0ed000fa0c5..3e67dd2169fa12eeb5d77cdd8eea8a109dfdebec 100644 (file)
@@ -83,9 +83,10 @@ struct ceph_mds_reply_info_parsed {
                        struct ceph_mds_reply_dirfrag *dir_dir;
                        size_t                        dir_buf_size;
                        int                           dir_nr;
-                       bool                          dir_complete;
                        bool                          dir_end;
+                       bool                          dir_complete;
                        bool                          hash_order;
+                       bool                          offset_hash;
                        struct ceph_mds_reply_dir_entry  *dir_entries;
                };
 
index f4b2ee18f38cbd51d2ded16380e15da7697d9f89..1787e4a8e251c50c6d0c32d7ccf3019acac25c48 100644 (file)
@@ -365,6 +365,7 @@ extern const char *ceph_mds_op_name(int op);
 #define CEPH_READDIR_FRAG_END          (1<<0)
 #define CEPH_READDIR_FRAG_COMPLETE     (1<<8)
 #define CEPH_READDIR_HASH_ORDER                (1<<9)
+#define CEPH_READDIR_OFFSET_HASH       (1<<10)
 
 union ceph_mds_request_args {
        struct {
@@ -384,6 +385,7 @@ union ceph_mds_request_args {
                __le32 max_entries;          /* how many dentries to grab */
                __le32 max_bytes;
                __le16 flags;
+               __le32 offset_hash;
        } __attribute__ ((packed)) readdir;
        struct {
                __le32 mode;