]>
Commit | Line | Data |
---|---|---|
5db11c21 MM |
1 | /* |
2 | * (C) 2001 Clemson University and The University of Chicago | |
3 | * | |
4 | * See COPYING in top-level directory. | |
5 | */ | |
6 | ||
7 | #include "protocol.h" | |
575e9461 MM |
8 | #include "orangefs-kernel.h" |
9 | #include "orangefs-bufmap.h" | |
5db11c21 MM |
10 | |
11 | struct readdir_handle_s { | |
12 | int buffer_index; | |
8bb8aefd | 13 | struct orangefs_readdir_response_s readdir_response; |
5db11c21 MM |
14 | void *dents_buf; |
15 | }; | |
16 | ||
17 | /* | |
1808f8cc MM |
18 | * decode routine used by kmod to deal with the blob sent from |
19 | * userspace for readdirs. The blob contains zero or more of these | |
20 | * sub-blobs: | |
21 | * __u32 - represents length of the character string that follows. | |
22 | * string - between 1 and ORANGEFS_NAME_MAX bytes long. | |
23 | * padding - (if needed) to cause the __u32 plus the string to be | |
24 | * eight byte aligned. | |
25 | * khandle - sizeof(khandle) bytes. | |
5db11c21 | 26 | */ |
8092895f | 27 | static long decode_dirents(char *ptr, size_t size, |
8bb8aefd | 28 | struct orangefs_readdir_response_s *readdir) |
5db11c21 MM |
29 | { |
30 | int i; | |
8bb8aefd YL |
31 | struct orangefs_readdir_response_s *rd = |
32 | (struct orangefs_readdir_response_s *) ptr; | |
5db11c21 | 33 | char *buf = ptr; |
1808f8cc MM |
34 | int khandle_size = sizeof(struct orangefs_khandle); |
35 | size_t offset = offsetof(struct orangefs_readdir_response_s, | |
36 | dirent_array); | |
37 | /* 8 reflects eight byte alignment */ | |
38 | int smallest_blob = khandle_size + 8; | |
39 | __u32 len; | |
40 | int aligned_len; | |
41 | int sizeof_u32 = sizeof(__u32); | |
42 | long ret; | |
5db11c21 | 43 | |
1808f8cc MM |
44 | gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size); |
45 | ||
46 | /* size is = offset on empty dirs, > offset on non-empty dirs... */ | |
47 | if (size < offset) { | |
48 | gossip_err("%s: size:%zu: offset:%zu:\n", | |
49 | __func__, | |
50 | size, | |
51 | offset); | |
52 | ret = -EINVAL; | |
53 | goto out; | |
54 | } | |
55 | ||
56 | if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) { | |
57 | gossip_err("%s: size:%zu: dirent_outcount:%d:\n", | |
58 | __func__, | |
59 | size, | |
60 | readdir->orangefs_dirent_outcount); | |
61 | ret = -EINVAL; | |
62 | goto out; | |
63 | } | |
8092895f | 64 | |
5db11c21 | 65 | readdir->token = rd->token; |
8bb8aefd YL |
66 | readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount; |
67 | readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount, | |
5db11c21 MM |
68 | sizeof(*readdir->dirent_array), |
69 | GFP_KERNEL); | |
1808f8cc MM |
70 | if (readdir->dirent_array == NULL) { |
71 | gossip_err("%s: kcalloc failed.\n", __func__); | |
72 | ret = -ENOMEM; | |
73 | goto out; | |
74 | } | |
8092895f | 75 | |
1808f8cc MM |
76 | buf += offset; |
77 | size -= offset; | |
8092895f | 78 | |
8bb8aefd | 79 | for (i = 0; i < readdir->orangefs_dirent_outcount; i++) { |
1808f8cc MM |
80 | if (size < smallest_blob) { |
81 | gossip_err("%s: size:%zu: smallest_blob:%d:\n", | |
82 | __func__, | |
83 | size, | |
84 | smallest_blob); | |
85 | ret = -EINVAL; | |
86 | goto free; | |
87 | } | |
8092895f AV |
88 | |
89 | len = *(__u32 *)buf; | |
1808f8cc MM |
90 | if ((len < 1) || (len > ORANGEFS_NAME_MAX)) { |
91 | gossip_err("%s: len:%d:\n", __func__, len); | |
92 | ret = -EINVAL; | |
93 | goto free; | |
94 | } | |
95 | ||
96 | gossip_debug(GOSSIP_DIR_DEBUG, | |
97 | "%s: size:%zu: len:%d:\n", | |
98 | __func__, | |
99 | size, | |
100 | len); | |
101 | ||
102 | readdir->dirent_array[i].d_name = buf + sizeof_u32; | |
9be68b08 | 103 | readdir->dirent_array[i].d_length = len; |
8092895f | 104 | |
7d221485 | 105 | /* |
1808f8cc MM |
106 | * Calculate "aligned" length of this string and its |
107 | * associated __u32 descriptor. | |
108 | */ | |
109 | aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7; | |
110 | gossip_debug(GOSSIP_DIR_DEBUG, | |
111 | "%s: aligned_len:%d:\n", | |
112 | __func__, | |
113 | aligned_len); | |
114 | ||
115 | /* | |
116 | * The end of the blob should coincide with the end | |
117 | * of the last sub-blob. | |
7d221485 | 118 | */ |
1808f8cc MM |
119 | if (size < aligned_len + khandle_size) { |
120 | gossip_err("%s: ran off the end of the blob.\n", | |
121 | __func__); | |
122 | ret = -EINVAL; | |
123 | goto free; | |
124 | } | |
125 | size -= aligned_len + khandle_size; | |
8092895f | 126 | |
1808f8cc | 127 | buf += aligned_len; |
8092895f | 128 | |
5db11c21 | 129 | readdir->dirent_array[i].khandle = |
8bb8aefd | 130 | *(struct orangefs_khandle *) buf; |
1808f8cc | 131 | buf += khandle_size; |
5db11c21 | 132 | } |
1808f8cc MM |
133 | ret = buf - ptr; |
134 | gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret); | |
135 | goto out; | |
136 | ||
137 | free: | |
8092895f AV |
138 | kfree(readdir->dirent_array); |
139 | readdir->dirent_array = NULL; | |
1808f8cc MM |
140 | |
141 | out: | |
142 | return ret; | |
5db11c21 MM |
143 | } |
144 | ||
145 | static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, | |
8092895f | 146 | size_t size, int buffer_index) |
5db11c21 MM |
147 | { |
148 | long ret; | |
149 | ||
150 | if (buf == NULL) { | |
151 | gossip_err | |
152 | ("Invalid NULL buffer specified in readdir_handle_ctor\n"); | |
153 | return -ENOMEM; | |
154 | } | |
155 | if (buffer_index < 0) { | |
156 | gossip_err | |
157 | ("Invalid buffer index specified in readdir_handle_ctor\n"); | |
158 | return -EINVAL; | |
159 | } | |
160 | rhandle->buffer_index = buffer_index; | |
161 | rhandle->dents_buf = buf; | |
8092895f | 162 | ret = decode_dirents(buf, size, &rhandle->readdir_response); |
5db11c21 MM |
163 | if (ret < 0) { |
164 | gossip_err("Could not decode readdir from buffer %ld\n", ret); | |
165 | rhandle->buffer_index = -1; | |
166 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf); | |
167 | vfree(buf); | |
168 | rhandle->dents_buf = NULL; | |
169 | } | |
170 | return ret; | |
171 | } | |
172 | ||
82d37f19 | 173 | static void readdir_handle_dtor(struct readdir_handle_s *rhandle) |
5db11c21 MM |
174 | { |
175 | if (rhandle == NULL) | |
176 | return; | |
177 | ||
178 | /* kfree(NULL) is safe */ | |
179 | kfree(rhandle->readdir_response.dirent_array); | |
180 | rhandle->readdir_response.dirent_array = NULL; | |
181 | ||
182 | if (rhandle->buffer_index >= 0) { | |
82d37f19 | 183 | orangefs_readdir_index_put(rhandle->buffer_index); |
5db11c21 MM |
184 | rhandle->buffer_index = -1; |
185 | } | |
186 | if (rhandle->dents_buf) { | |
187 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", | |
188 | rhandle->dents_buf); | |
189 | vfree(rhandle->dents_buf); | |
190 | rhandle->dents_buf = NULL; | |
191 | } | |
192 | } | |
193 | ||
194 | /* | |
195 | * Read directory entries from an instance of an open directory. | |
5db11c21 | 196 | */ |
8bb8aefd | 197 | static int orangefs_readdir(struct file *file, struct dir_context *ctx) |
5db11c21 | 198 | { |
8bb8aefd | 199 | struct orangefs_bufmap *bufmap = NULL; |
5db11c21 MM |
200 | int ret = 0; |
201 | int buffer_index; | |
88309aae MM |
202 | /* |
203 | * ptoken supports Orangefs' distributed directory logic, added | |
204 | * in 2.9.2. | |
205 | */ | |
5db11c21 MM |
206 | __u64 *ptoken = file->private_data; |
207 | __u64 pos = 0; | |
208 | ino_t ino = 0; | |
209 | struct dentry *dentry = file->f_path.dentry; | |
8bb8aefd YL |
210 | struct orangefs_kernel_op_s *new_op = NULL; |
211 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode); | |
5db11c21 MM |
212 | int buffer_full = 0; |
213 | struct readdir_handle_s rhandle; | |
214 | int i = 0; | |
215 | int len = 0; | |
216 | ino_t current_ino = 0; | |
217 | char *current_entry = NULL; | |
218 | long bytes_decoded; | |
219 | ||
88309aae MM |
220 | gossip_debug(GOSSIP_DIR_DEBUG, |
221 | "%s: ctx->pos:%lld, ptoken = %llu\n", | |
222 | __func__, | |
223 | lld(ctx->pos), | |
224 | llu(*ptoken)); | |
5db11c21 MM |
225 | |
226 | pos = (__u64) ctx->pos; | |
227 | ||
228 | /* are we done? */ | |
8bb8aefd | 229 | if (pos == ORANGEFS_READDIR_END) { |
5db11c21 MM |
230 | gossip_debug(GOSSIP_DIR_DEBUG, |
231 | "Skipping to termination path\n"); | |
232 | return 0; | |
233 | } | |
234 | ||
235 | gossip_debug(GOSSIP_DIR_DEBUG, | |
8bb8aefd | 236 | "orangefs_readdir called on %s (pos=%llu)\n", |
5db11c21 MM |
237 | dentry->d_name.name, llu(pos)); |
238 | ||
239 | rhandle.buffer_index = -1; | |
240 | rhandle.dents_buf = NULL; | |
241 | memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); | |
242 | ||
8bb8aefd | 243 | new_op = op_alloc(ORANGEFS_VFS_OP_READDIR); |
5db11c21 MM |
244 | if (!new_op) |
245 | return -ENOMEM; | |
246 | ||
247 | new_op->uses_shared_memory = 1; | |
8bb8aefd | 248 | new_op->upcall.req.readdir.refn = orangefs_inode->refn; |
7d221485 MB |
249 | new_op->upcall.req.readdir.max_dirent_count = |
250 | ORANGEFS_MAX_DIRENT_COUNT_READDIR; | |
5db11c21 MM |
251 | |
252 | gossip_debug(GOSSIP_DIR_DEBUG, | |
253 | "%s: upcall.req.readdir.refn.khandle: %pU\n", | |
254 | __func__, | |
255 | &new_op->upcall.req.readdir.refn.khandle); | |
256 | ||
5db11c21 MM |
257 | new_op->upcall.req.readdir.token = *ptoken; |
258 | ||
259 | get_new_buffer_index: | |
7d221485 | 260 | ret = orangefs_readdir_index_get(&bufmap, &buffer_index); |
5db11c21 | 261 | if (ret < 0) { |
7d221485 | 262 | gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n", |
5db11c21 MM |
263 | ret); |
264 | goto out_free_op; | |
265 | } | |
266 | new_op->upcall.req.readdir.buf_index = buffer_index; | |
267 | ||
268 | ret = service_operation(new_op, | |
8bb8aefd | 269 | "orangefs_readdir", |
5db11c21 MM |
270 | get_interruptible_flag(dentry->d_inode)); |
271 | ||
272 | gossip_debug(GOSSIP_DIR_DEBUG, | |
273 | "Readdir downcall status is %d. ret:%d\n", | |
274 | new_op->downcall.status, | |
275 | ret); | |
276 | ||
277 | if (ret == -EAGAIN && op_state_purged(new_op)) { | |
278 | /* | |
279 | * readdir shared memory aread has been wiped due to | |
280 | * pvfs2-client-core restarting, so we must get a new | |
281 | * index into the shared memory. | |
282 | */ | |
283 | gossip_debug(GOSSIP_DIR_DEBUG, | |
284 | "%s: Getting new buffer_index for retry of readdir..\n", | |
285 | __func__); | |
82d37f19 | 286 | orangefs_readdir_index_put(buffer_index); |
5db11c21 MM |
287 | goto get_new_buffer_index; |
288 | } | |
289 | ||
290 | if (ret == -EIO && op_state_purged(new_op)) { | |
291 | gossip_err("%s: Client is down. Aborting readdir call.\n", | |
292 | __func__); | |
82d37f19 | 293 | orangefs_readdir_index_put(buffer_index); |
5db11c21 MM |
294 | goto out_free_op; |
295 | } | |
296 | ||
297 | if (ret < 0 || new_op->downcall.status != 0) { | |
298 | gossip_debug(GOSSIP_DIR_DEBUG, | |
299 | "Readdir request failed. Status:%d\n", | |
300 | new_op->downcall.status); | |
82d37f19 | 301 | orangefs_readdir_index_put(buffer_index); |
5db11c21 MM |
302 | if (ret >= 0) |
303 | ret = new_op->downcall.status; | |
304 | goto out_free_op; | |
305 | } | |
306 | ||
307 | bytes_decoded = | |
308 | readdir_handle_ctor(&rhandle, | |
309 | new_op->downcall.trailer_buf, | |
8092895f | 310 | new_op->downcall.trailer_size, |
5db11c21 MM |
311 | buffer_index); |
312 | if (bytes_decoded < 0) { | |
8bb8aefd | 313 | gossip_err("orangefs_readdir: Could not decode trailer buffer into a readdir response %d\n", |
5db11c21 MM |
314 | ret); |
315 | ret = bytes_decoded; | |
82d37f19 | 316 | orangefs_readdir_index_put(buffer_index); |
5db11c21 MM |
317 | goto out_free_op; |
318 | } | |
319 | ||
320 | if (bytes_decoded != new_op->downcall.trailer_size) { | |
8bb8aefd | 321 | gossip_err("orangefs_readdir: # bytes decoded (%ld) " |
88309aae MM |
322 | "!= trailer size (%ld)\n", |
323 | bytes_decoded, | |
324 | (long)new_op->downcall.trailer_size); | |
5db11c21 MM |
325 | ret = -EINVAL; |
326 | goto out_destroy_handle; | |
327 | } | |
328 | ||
88309aae | 329 | /* |
8bb8aefd | 330 | * orangefs doesn't actually store dot and dot-dot, but |
88309aae MM |
331 | * we need to have them represented. |
332 | */ | |
5db11c21 MM |
333 | if (pos == 0) { |
334 | ino = get_ino_from_khandle(dentry->d_inode); | |
335 | gossip_debug(GOSSIP_DIR_DEBUG, | |
336 | "%s: calling dir_emit of \".\" with pos = %llu\n", | |
337 | __func__, | |
338 | llu(pos)); | |
339 | ret = dir_emit(ctx, ".", 1, ino, DT_DIR); | |
88309aae | 340 | pos += 1; |
5db11c21 MM |
341 | } |
342 | ||
343 | if (pos == 1) { | |
344 | ino = get_parent_ino_from_dentry(dentry); | |
345 | gossip_debug(GOSSIP_DIR_DEBUG, | |
346 | "%s: calling dir_emit of \"..\" with pos = %llu\n", | |
347 | __func__, | |
348 | llu(pos)); | |
349 | ret = dir_emit(ctx, "..", 2, ino, DT_DIR); | |
88309aae | 350 | pos += 1; |
5db11c21 MM |
351 | } |
352 | ||
88309aae | 353 | /* |
8bb8aefd | 354 | * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around |
88309aae MM |
355 | * to prevent "finding" dot and dot-dot on any iteration |
356 | * other than the first. | |
357 | */ | |
8bb8aefd | 358 | if (ctx->pos == ORANGEFS_ITERATE_NEXT) |
88309aae MM |
359 | ctx->pos = 0; |
360 | ||
361 | for (i = ctx->pos; | |
8bb8aefd | 362 | i < rhandle.readdir_response.orangefs_dirent_outcount; |
88309aae | 363 | i++) { |
5db11c21 MM |
364 | len = rhandle.readdir_response.dirent_array[i].d_length; |
365 | current_entry = rhandle.readdir_response.dirent_array[i].d_name; | |
8bb8aefd | 366 | current_ino = orangefs_khandle_to_ino( |
5db11c21 MM |
367 | &(rhandle.readdir_response.dirent_array[i].khandle)); |
368 | ||
369 | gossip_debug(GOSSIP_DIR_DEBUG, | |
88309aae MM |
370 | "calling dir_emit for %s with len %d" |
371 | ", ctx->pos %ld\n", | |
5db11c21 MM |
372 | current_entry, |
373 | len, | |
88309aae MM |
374 | (unsigned long)ctx->pos); |
375 | /* | |
376 | * type is unknown. We don't return object type | |
377 | * in the dirent_array. This leaves getdents | |
378 | * clueless about type. | |
379 | */ | |
5db11c21 MM |
380 | ret = |
381 | dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); | |
88309aae MM |
382 | if (!ret) |
383 | break; | |
5db11c21 | 384 | ctx->pos++; |
88309aae | 385 | gossip_debug(GOSSIP_DIR_DEBUG, |
5db11c21 MM |
386 | "%s: ctx->pos:%lld\n", |
387 | __func__, | |
388 | lld(ctx->pos)); | |
389 | ||
5db11c21 MM |
390 | } |
391 | ||
54804949 | 392 | /* |
88309aae MM |
393 | * we ran all the way through the last batch, set up for |
394 | * getting another batch... | |
395 | */ | |
396 | if (ret) { | |
5db11c21 | 397 | *ptoken = rhandle.readdir_response.token; |
8bb8aefd | 398 | ctx->pos = ORANGEFS_ITERATE_NEXT; |
5db11c21 MM |
399 | } |
400 | ||
401 | /* | |
402 | * Did we hit the end of the directory? | |
403 | */ | |
8bb8aefd | 404 | if (rhandle.readdir_response.token == ORANGEFS_READDIR_END && |
5db11c21 | 405 | !buffer_full) { |
88309aae | 406 | gossip_debug(GOSSIP_DIR_DEBUG, |
8bb8aefd YL |
407 | "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n"); |
408 | ctx->pos = ORANGEFS_READDIR_END; | |
5db11c21 MM |
409 | } |
410 | ||
5db11c21 | 411 | out_destroy_handle: |
82d37f19 | 412 | readdir_handle_dtor(&rhandle); |
5db11c21 MM |
413 | out_free_op: |
414 | op_release(new_op); | |
8bb8aefd | 415 | gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret); |
5db11c21 MM |
416 | return ret; |
417 | } | |
418 | ||
8bb8aefd | 419 | static int orangefs_dir_open(struct inode *inode, struct file *file) |
5db11c21 MM |
420 | { |
421 | __u64 *ptoken; | |
422 | ||
423 | file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); | |
424 | if (!file->private_data) | |
425 | return -ENOMEM; | |
426 | ||
427 | ptoken = file->private_data; | |
8bb8aefd | 428 | *ptoken = ORANGEFS_READDIR_START; |
5db11c21 MM |
429 | return 0; |
430 | } | |
431 | ||
8bb8aefd | 432 | static int orangefs_dir_release(struct inode *inode, struct file *file) |
5db11c21 | 433 | { |
8bb8aefd | 434 | orangefs_flush_inode(inode); |
5db11c21 MM |
435 | kfree(file->private_data); |
436 | return 0; | |
437 | } | |
438 | ||
8bb8aefd YL |
439 | /** ORANGEFS implementation of VFS directory operations */ |
440 | const struct file_operations orangefs_dir_operations = { | |
5db11c21 | 441 | .read = generic_read_dir, |
8bb8aefd YL |
442 | .iterate = orangefs_readdir, |
443 | .open = orangefs_dir_open, | |
444 | .release = orangefs_dir_release, | |
5db11c21 | 445 | }; |