]>
Commit | Line | Data |
---|---|---|
7fc1f831 AP |
1 | /* |
2 | * Copyright 2012 Xyratex Technology Limited | |
3 | * | |
4 | * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com> | |
5 | * | |
6 | */ | |
7 | ||
8 | #define DEBUG_SUBSYSTEM S_LLITE | |
9 | ||
10 | #include <linux/fs.h> | |
11 | #include <linux/sched.h> | |
12 | #include <linux/mm.h> | |
13 | #include <obd_support.h> | |
14 | #include <lustre_lite.h> | |
15 | #include <lustre_dlm.h> | |
16 | #include <lustre_ver.h> | |
17 | #include "llite_internal.h" | |
18 | ||
19 | /* If we ever have hundreds of extended attributes, we might want to consider | |
20 | * using a hash or a tree structure instead of list for faster lookups. | |
21 | */ | |
22 | struct ll_xattr_entry { | |
23 | struct list_head xe_list; /* protected with | |
24 | * lli_xattrs_list_rwsem */ | |
25 | char *xe_name; /* xattr name, \0-terminated */ | |
26 | char *xe_value; /* xattr value */ | |
27 | unsigned xe_namelen; /* strlen(xe_name) + 1 */ | |
28 | unsigned xe_vallen; /* xattr value length */ | |
29 | }; | |
30 | ||
31 | static struct kmem_cache *xattr_kmem; | |
32 | static struct lu_kmem_descr xattr_caches[] = { | |
33 | { | |
34 | .ckd_cache = &xattr_kmem, | |
35 | .ckd_name = "xattr_kmem", | |
36 | .ckd_size = sizeof(struct ll_xattr_entry) | |
37 | }, | |
38 | { | |
39 | .ckd_cache = NULL | |
40 | } | |
41 | }; | |
42 | ||
43 | int ll_xattr_init(void) | |
44 | { | |
45 | return lu_kmem_init(xattr_caches); | |
46 | } | |
47 | ||
48 | void ll_xattr_fini(void) | |
49 | { | |
50 | lu_kmem_fini(xattr_caches); | |
51 | } | |
52 | ||
53 | /** | |
54 | * Initializes xattr cache for an inode. | |
55 | * | |
56 | * This initializes the xattr list and marks cache presence. | |
57 | */ | |
58 | static void ll_xattr_cache_init(struct ll_inode_info *lli) | |
59 | { | |
60 | ||
61 | ||
62 | LASSERT(lli != NULL); | |
63 | ||
64 | INIT_LIST_HEAD(&lli->lli_xattrs); | |
65 | lli->lli_flags |= LLIF_XATTR_CACHE; | |
66 | } | |
67 | ||
68 | /** | |
69 | * This looks for a specific extended attribute. | |
70 | * | |
71 | * Find in @cache and return @xattr_name attribute in @xattr, | |
72 | * for the NULL @xattr_name return the first cached @xattr. | |
73 | * | |
74 | * \retval 0 success | |
75 | * \retval -ENODATA if not found | |
76 | */ | |
77 | static int ll_xattr_cache_find(struct list_head *cache, | |
78 | const char *xattr_name, | |
79 | struct ll_xattr_entry **xattr) | |
80 | { | |
81 | struct ll_xattr_entry *entry; | |
82 | ||
83 | ||
84 | ||
85 | list_for_each_entry(entry, cache, xe_list) { | |
86 | /* xattr_name == NULL means look for any entry */ | |
87 | if (xattr_name == NULL || | |
88 | strcmp(xattr_name, entry->xe_name) == 0) { | |
89 | *xattr = entry; | |
90 | CDEBUG(D_CACHE, "find: [%s]=%.*s\n", | |
91 | entry->xe_name, entry->xe_vallen, | |
92 | entry->xe_value); | |
93 | return 0; | |
94 | } | |
95 | } | |
96 | ||
97 | return -ENODATA; | |
98 | } | |
99 | ||
100 | /** | |
e93a3082 | 101 | * This adds an xattr. |
7fc1f831 AP |
102 | * |
103 | * Add @xattr_name attr with @xattr_val value and @xattr_val_len length, | |
7fc1f831 AP |
104 | * |
105 | * \retval 0 success | |
106 | * \retval -ENOMEM if no memory could be allocated for the cached attr | |
e93a3082 | 107 | * \retval -EPROTO if duplicate xattr is being added |
7fc1f831 AP |
108 | */ |
109 | static int ll_xattr_cache_add(struct list_head *cache, | |
110 | const char *xattr_name, | |
111 | const char *xattr_val, | |
112 | unsigned xattr_val_len) | |
113 | { | |
114 | struct ll_xattr_entry *xattr; | |
115 | ||
116 | ||
117 | ||
118 | if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { | |
e93a3082 AP |
119 | CDEBUG(D_CACHE, "duplicate xattr: [%s]\n", xattr_name); |
120 | return -EPROTO; | |
7fc1f831 AP |
121 | } |
122 | ||
123 | OBD_SLAB_ALLOC_PTR_GFP(xattr, xattr_kmem, __GFP_IO); | |
124 | if (xattr == NULL) { | |
125 | CDEBUG(D_CACHE, "failed to allocate xattr\n"); | |
126 | return -ENOMEM; | |
127 | } | |
128 | ||
129 | xattr->xe_namelen = strlen(xattr_name) + 1; | |
130 | ||
131 | OBD_ALLOC(xattr->xe_name, xattr->xe_namelen); | |
132 | if (!xattr->xe_name) { | |
133 | CDEBUG(D_CACHE, "failed to alloc xattr name %u\n", | |
134 | xattr->xe_namelen); | |
135 | goto err_name; | |
136 | } | |
137 | OBD_ALLOC(xattr->xe_value, xattr_val_len); | |
138 | if (!xattr->xe_value) { | |
139 | CDEBUG(D_CACHE, "failed to alloc xattr value %d\n", | |
140 | xattr_val_len); | |
141 | goto err_value; | |
142 | } | |
143 | ||
144 | memcpy(xattr->xe_name, xattr_name, xattr->xe_namelen); | |
145 | memcpy(xattr->xe_value, xattr_val, xattr_val_len); | |
146 | xattr->xe_vallen = xattr_val_len; | |
147 | list_add(&xattr->xe_list, cache); | |
148 | ||
149 | CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name, | |
150 | xattr_val_len, xattr_val); | |
151 | ||
152 | return 0; | |
153 | err_value: | |
154 | OBD_FREE(xattr->xe_name, xattr->xe_namelen); | |
155 | err_name: | |
156 | OBD_SLAB_FREE_PTR(xattr, xattr_kmem); | |
157 | ||
158 | return -ENOMEM; | |
159 | } | |
160 | ||
161 | /** | |
162 | * This removes an extended attribute from cache. | |
163 | * | |
164 | * Remove @xattr_name attribute from @cache. | |
165 | * | |
166 | * \retval 0 success | |
167 | * \retval -ENODATA if @xattr_name is not cached | |
168 | */ | |
169 | static int ll_xattr_cache_del(struct list_head *cache, | |
170 | const char *xattr_name) | |
171 | { | |
172 | struct ll_xattr_entry *xattr; | |
173 | ||
174 | ||
175 | ||
176 | CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name); | |
177 | ||
178 | if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { | |
179 | list_del(&xattr->xe_list); | |
180 | OBD_FREE(xattr->xe_name, xattr->xe_namelen); | |
181 | OBD_FREE(xattr->xe_value, xattr->xe_vallen); | |
182 | OBD_SLAB_FREE_PTR(xattr, xattr_kmem); | |
183 | ||
184 | return 0; | |
185 | } | |
186 | ||
187 | return -ENODATA; | |
188 | } | |
189 | ||
190 | /** | |
191 | * This iterates cached extended attributes. | |
192 | * | |
193 | * Walk over cached attributes in @cache and | |
194 | * fill in @xld_buffer or only calculate buffer | |
195 | * size if @xld_buffer is NULL. | |
196 | * | |
197 | * \retval >= 0 buffer list size | |
198 | * \retval -ENODATA if the list cannot fit @xld_size buffer | |
199 | */ | |
200 | static int ll_xattr_cache_list(struct list_head *cache, | |
201 | char *xld_buffer, | |
202 | int xld_size) | |
203 | { | |
204 | struct ll_xattr_entry *xattr, *tmp; | |
205 | int xld_tail = 0; | |
206 | ||
207 | ||
208 | ||
209 | list_for_each_entry_safe(xattr, tmp, cache, xe_list) { | |
210 | CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n", | |
211 | xld_buffer, xld_tail, xattr->xe_name); | |
212 | ||
213 | if (xld_buffer) { | |
214 | xld_size -= xattr->xe_namelen; | |
215 | if (xld_size < 0) | |
216 | break; | |
217 | memcpy(&xld_buffer[xld_tail], | |
218 | xattr->xe_name, xattr->xe_namelen); | |
219 | } | |
220 | xld_tail += xattr->xe_namelen; | |
221 | } | |
222 | ||
223 | if (xld_size < 0) | |
224 | return -ERANGE; | |
225 | ||
226 | return xld_tail; | |
227 | } | |
228 | ||
229 | /** | |
230 | * Check if the xattr cache is initialized (filled). | |
231 | * | |
232 | * \retval 0 @cache is not initialized | |
233 | * \retval 1 @cache is initialized | |
234 | */ | |
235 | int ll_xattr_cache_valid(struct ll_inode_info *lli) | |
236 | { | |
237 | return !!(lli->lli_flags & LLIF_XATTR_CACHE); | |
238 | } | |
239 | ||
240 | /** | |
241 | * This finalizes the xattr cache. | |
242 | * | |
243 | * Free all xattr memory. @lli is the inode info pointer. | |
244 | * | |
245 | * \retval 0 no error occured | |
246 | */ | |
247 | static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) | |
248 | { | |
249 | ||
250 | ||
251 | if (!ll_xattr_cache_valid(lli)) | |
252 | return 0; | |
253 | ||
254 | while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0) | |
255 | ; /* empty loop */ | |
256 | lli->lli_flags &= ~LLIF_XATTR_CACHE; | |
257 | ||
258 | return 0; | |
259 | } | |
260 | ||
261 | int ll_xattr_cache_destroy(struct inode *inode) | |
262 | { | |
263 | struct ll_inode_info *lli = ll_i2info(inode); | |
264 | int rc; | |
265 | ||
266 | ||
267 | ||
268 | down_write(&lli->lli_xattrs_list_rwsem); | |
269 | rc = ll_xattr_cache_destroy_locked(lli); | |
270 | up_write(&lli->lli_xattrs_list_rwsem); | |
271 | ||
272 | return rc; | |
273 | } | |
274 | ||
275 | /** | |
e93a3082 | 276 | * Match or enqueue a PR lock. |
7fc1f831 AP |
277 | * |
278 | * Find or request an LDLM lock with xattr data. | |
279 | * Since LDLM does not provide API for atomic match_or_enqueue, | |
280 | * the function handles it with a separate enq lock. | |
281 | * If successful, the function exits with the list lock held. | |
282 | * | |
283 | * \retval 0 no error occured | |
284 | * \retval -ENOMEM not enough memory | |
285 | */ | |
286 | static int ll_xattr_find_get_lock(struct inode *inode, | |
287 | struct lookup_intent *oit, | |
288 | struct ptlrpc_request **req) | |
289 | { | |
290 | ldlm_mode_t mode; | |
291 | struct lustre_handle lockh = { 0 }; | |
292 | struct md_op_data *op_data; | |
293 | struct ll_inode_info *lli = ll_i2info(inode); | |
294 | struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS, | |
295 | .ei_mode = it_to_lock_mode(oit), | |
296 | .ei_cb_bl = ll_md_blocking_ast, | |
297 | .ei_cb_cp = ldlm_completion_ast }; | |
298 | struct ll_sb_info *sbi = ll_i2sbi(inode); | |
299 | struct obd_export *exp = sbi->ll_md_exp; | |
300 | int rc; | |
301 | ||
302 | ||
303 | ||
304 | mutex_lock(&lli->lli_xattrs_enq_lock); | |
305 | /* Try matching first. */ | |
e93a3082 | 306 | mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0, LCK_PR); |
7fc1f831 AP |
307 | if (mode != 0) { |
308 | /* fake oit in mdc_revalidate_lock() manner */ | |
309 | oit->d.lustre.it_lock_handle = lockh.cookie; | |
310 | oit->d.lustre.it_lock_mode = mode; | |
311 | goto out; | |
312 | } | |
313 | ||
314 | /* Enqueue if the lock isn't cached locally. */ | |
315 | op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, | |
316 | LUSTRE_OPC_ANY, NULL); | |
317 | if (IS_ERR(op_data)) { | |
318 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
319 | return PTR_ERR(op_data); | |
320 | } | |
321 | ||
e93a3082 | 322 | op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS; |
7fc1f831 AP |
323 | |
324 | rc = md_enqueue(exp, &einfo, oit, op_data, &lockh, NULL, 0, NULL, 0); | |
325 | ll_finish_md_op_data(op_data); | |
326 | ||
327 | if (rc < 0) { | |
328 | CDEBUG(D_CACHE, | |
329 | "md_intent_lock failed with %d for fid "DFID"\n", | |
330 | rc, PFID(ll_inode2fid(inode))); | |
331 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
332 | return rc; | |
333 | } | |
334 | ||
335 | *req = (struct ptlrpc_request *)oit->d.lustre.it_data; | |
336 | out: | |
337 | down_write(&lli->lli_xattrs_list_rwsem); | |
338 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
339 | ||
340 | return 0; | |
341 | } | |
342 | ||
343 | /** | |
344 | * Refill the xattr cache. | |
345 | * | |
346 | * Fetch and cache the whole of xattrs for @inode, acquiring | |
347 | * a read or a write xattr lock depending on operation in @oit. | |
348 | * Intent is dropped on exit unless the operation is setxattr. | |
349 | * | |
350 | * \retval 0 no error occured | |
351 | * \retval -EPROTO network protocol error | |
352 | * \retval -ENOMEM not enough memory for the cache | |
353 | */ | |
354 | static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit) | |
355 | { | |
356 | struct ll_sb_info *sbi = ll_i2sbi(inode); | |
357 | struct ptlrpc_request *req = NULL; | |
358 | const char *xdata, *xval, *xtail, *xvtail; | |
359 | struct ll_inode_info *lli = ll_i2info(inode); | |
360 | struct mdt_body *body; | |
361 | __u32 *xsizes; | |
362 | int rc = 0, i; | |
363 | ||
364 | ||
365 | ||
366 | rc = ll_xattr_find_get_lock(inode, oit, &req); | |
367 | if (rc) | |
368 | GOTO(out_no_unlock, rc); | |
369 | ||
370 | /* Do we have the data at this point? */ | |
371 | if (ll_xattr_cache_valid(lli)) { | |
372 | ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1); | |
373 | GOTO(out_maybe_drop, rc = 0); | |
374 | } | |
375 | ||
376 | /* Matched but no cache? Cancelled on error by a parallel refill. */ | |
377 | if (unlikely(req == NULL)) { | |
378 | CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n"); | |
379 | GOTO(out_maybe_drop, rc = -EIO); | |
380 | } | |
381 | ||
382 | if (oit->d.lustre.it_status < 0) { | |
383 | CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n", | |
384 | oit->d.lustre.it_status, PFID(ll_inode2fid(inode))); | |
e93a3082 AP |
385 | rc = oit->d.lustre.it_status; |
386 | /* xattr data is so large that we don't want to cache it */ | |
387 | if (rc == -ERANGE) | |
388 | rc = -EAGAIN; | |
389 | GOTO(out_destroy, rc); | |
7fc1f831 AP |
390 | } |
391 | ||
392 | body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); | |
393 | if (body == NULL) { | |
394 | CERROR("no MDT BODY in the refill xattr reply\n"); | |
395 | GOTO(out_destroy, rc = -EPROTO); | |
396 | } | |
397 | /* do not need swab xattr data */ | |
398 | xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, | |
399 | body->eadatasize); | |
400 | xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS, | |
401 | body->aclsize); | |
402 | xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS, | |
403 | body->max_mdsize * sizeof(__u32)); | |
404 | if (xdata == NULL || xval == NULL || xsizes == NULL) { | |
405 | CERROR("wrong setxattr reply\n"); | |
406 | GOTO(out_destroy, rc = -EPROTO); | |
407 | } | |
408 | ||
409 | xtail = xdata + body->eadatasize; | |
410 | xvtail = xval + body->aclsize; | |
411 | ||
412 | CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail); | |
413 | ||
414 | ll_xattr_cache_init(lli); | |
415 | ||
416 | for (i = 0; i < body->max_mdsize; i++) { | |
417 | CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval); | |
418 | /* Perform consistency checks: attr names and vals in pill */ | |
419 | if (memchr(xdata, 0, xtail - xdata) == NULL) { | |
420 | CERROR("xattr protocol violation (names are broken)\n"); | |
421 | rc = -EPROTO; | |
422 | } else if (xval + *xsizes > xvtail) { | |
423 | CERROR("xattr protocol violation (vals are broken)\n"); | |
424 | rc = -EPROTO; | |
425 | } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) { | |
426 | rc = -ENOMEM; | |
e93a3082 AP |
427 | } else if (!strcmp(xdata, XATTR_NAME_ACL_ACCESS)) { |
428 | /* Filter out ACL ACCESS since it's cached separately */ | |
429 | CDEBUG(D_CACHE, "not caching %s\n", | |
430 | XATTR_NAME_ACL_ACCESS); | |
431 | rc = 0; | |
7fc1f831 AP |
432 | } else { |
433 | rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval, | |
434 | *xsizes); | |
435 | } | |
436 | if (rc < 0) { | |
437 | ll_xattr_cache_destroy_locked(lli); | |
438 | GOTO(out_destroy, rc); | |
439 | } | |
440 | xdata += strlen(xdata) + 1; | |
441 | xval += *xsizes; | |
442 | xsizes++; | |
443 | } | |
444 | ||
445 | if (xdata != xtail || xval != xvtail) | |
446 | CERROR("a hole in xattr data\n"); | |
447 | ||
448 | ll_set_lock_data(sbi->ll_md_exp, inode, oit, NULL); | |
449 | ||
450 | GOTO(out_maybe_drop, rc); | |
451 | out_maybe_drop: | |
e93a3082 | 452 | |
7fc1f831 AP |
453 | ll_intent_drop_lock(oit); |
454 | ||
455 | if (rc != 0) | |
456 | up_write(&lli->lli_xattrs_list_rwsem); | |
457 | out_no_unlock: | |
458 | ptlrpc_req_finished(req); | |
459 | ||
460 | return rc; | |
461 | ||
462 | out_destroy: | |
463 | up_write(&lli->lli_xattrs_list_rwsem); | |
464 | ||
465 | ldlm_lock_decref_and_cancel((struct lustre_handle *) | |
466 | &oit->d.lustre.it_lock_handle, | |
467 | oit->d.lustre.it_lock_mode); | |
468 | ||
469 | goto out_no_unlock; | |
470 | } | |
471 | ||
472 | /** | |
473 | * Get an xattr value or list xattrs using the write-through cache. | |
474 | * | |
475 | * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or | |
476 | * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode. | |
477 | * The resulting value/list is stored in @buffer if the former | |
478 | * is not larger than @size. | |
479 | * | |
480 | * \retval 0 no error occured | |
481 | * \retval -EPROTO network protocol error | |
482 | * \retval -ENOMEM not enough memory for the cache | |
483 | * \retval -ERANGE the buffer is not large enough | |
484 | * \retval -ENODATA no such attr or the list is empty | |
485 | */ | |
486 | int ll_xattr_cache_get(struct inode *inode, | |
487 | const char *name, | |
488 | char *buffer, | |
489 | size_t size, | |
490 | __u64 valid) | |
491 | { | |
492 | struct lookup_intent oit = { .it_op = IT_GETXATTR }; | |
493 | struct ll_inode_info *lli = ll_i2info(inode); | |
494 | int rc = 0; | |
495 | ||
496 | ||
497 | ||
498 | LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS)); | |
499 | ||
500 | down_read(&lli->lli_xattrs_list_rwsem); | |
501 | if (!ll_xattr_cache_valid(lli)) { | |
502 | up_read(&lli->lli_xattrs_list_rwsem); | |
503 | rc = ll_xattr_cache_refill(inode, &oit); | |
504 | if (rc) | |
505 | return rc; | |
506 | downgrade_write(&lli->lli_xattrs_list_rwsem); | |
507 | } else { | |
508 | ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1); | |
509 | } | |
510 | ||
511 | if (valid & OBD_MD_FLXATTR) { | |
512 | struct ll_xattr_entry *xattr; | |
513 | ||
514 | rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr); | |
515 | if (rc == 0) { | |
516 | rc = xattr->xe_vallen; | |
517 | /* zero size means we are only requested size in rc */ | |
518 | if (size != 0) { | |
519 | if (size >= xattr->xe_vallen) | |
520 | memcpy(buffer, xattr->xe_value, | |
521 | xattr->xe_vallen); | |
522 | else | |
523 | rc = -ERANGE; | |
524 | } | |
525 | } | |
526 | } else if (valid & OBD_MD_FLXATTRLS) { | |
527 | rc = ll_xattr_cache_list(&lli->lli_xattrs, | |
528 | size ? buffer : NULL, size); | |
529 | } | |
530 | ||
531 | GOTO(out, rc); | |
532 | out: | |
533 | up_read(&lli->lli_xattrs_list_rwsem); | |
534 | ||
535 | return rc; | |
536 | } |