]>
Commit | Line | Data |
---|---|---|
7fc1f831 AP |
1 | /* |
2 | * Copyright 2012 Xyratex Technology Limited | |
3 | * | |
4 | * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com> | |
5 | * | |
6 | */ | |
7 | ||
8 | #define DEBUG_SUBSYSTEM S_LLITE | |
9 | ||
10 | #include <linux/fs.h> | |
11 | #include <linux/sched.h> | |
12 | #include <linux/mm.h> | |
67a235f5 GKH |
13 | #include "../include/obd_support.h" |
14 | #include "../include/lustre_lite.h" | |
15 | #include "../include/lustre_dlm.h" | |
16 | #include "../include/lustre_ver.h" | |
7fc1f831 AP |
17 | #include "llite_internal.h" |
18 | ||
19 | /* If we ever have hundreds of extended attributes, we might want to consider | |
20 | * using a hash or a tree structure instead of list for faster lookups. | |
21 | */ | |
22 | struct ll_xattr_entry { | |
23 | struct list_head xe_list; /* protected with | |
24 | * lli_xattrs_list_rwsem */ | |
25 | char *xe_name; /* xattr name, \0-terminated */ | |
26 | char *xe_value; /* xattr value */ | |
27 | unsigned xe_namelen; /* strlen(xe_name) + 1 */ | |
28 | unsigned xe_vallen; /* xattr value length */ | |
29 | }; | |
30 | ||
31 | static struct kmem_cache *xattr_kmem; | |
32 | static struct lu_kmem_descr xattr_caches[] = { | |
33 | { | |
34 | .ckd_cache = &xattr_kmem, | |
35 | .ckd_name = "xattr_kmem", | |
36 | .ckd_size = sizeof(struct ll_xattr_entry) | |
37 | }, | |
38 | { | |
39 | .ckd_cache = NULL | |
40 | } | |
41 | }; | |
42 | ||
43 | int ll_xattr_init(void) | |
44 | { | |
45 | return lu_kmem_init(xattr_caches); | |
46 | } | |
47 | ||
48 | void ll_xattr_fini(void) | |
49 | { | |
50 | lu_kmem_fini(xattr_caches); | |
51 | } | |
52 | ||
53 | /** | |
54 | * Initializes xattr cache for an inode. | |
55 | * | |
56 | * This initializes the xattr list and marks cache presence. | |
57 | */ | |
58 | static void ll_xattr_cache_init(struct ll_inode_info *lli) | |
59 | { | |
60 | ||
61 | ||
62 | LASSERT(lli != NULL); | |
63 | ||
64 | INIT_LIST_HEAD(&lli->lli_xattrs); | |
65 | lli->lli_flags |= LLIF_XATTR_CACHE; | |
66 | } | |
67 | ||
68 | /** | |
69 | * This looks for a specific extended attribute. | |
70 | * | |
71 | * Find in @cache and return @xattr_name attribute in @xattr, | |
72 | * for the NULL @xattr_name return the first cached @xattr. | |
73 | * | |
74 | * \retval 0 success | |
75 | * \retval -ENODATA if not found | |
76 | */ | |
77 | static int ll_xattr_cache_find(struct list_head *cache, | |
78 | const char *xattr_name, | |
79 | struct ll_xattr_entry **xattr) | |
80 | { | |
81 | struct ll_xattr_entry *entry; | |
82 | ||
83 | ||
84 | ||
85 | list_for_each_entry(entry, cache, xe_list) { | |
86 | /* xattr_name == NULL means look for any entry */ | |
87 | if (xattr_name == NULL || | |
88 | strcmp(xattr_name, entry->xe_name) == 0) { | |
89 | *xattr = entry; | |
90 | CDEBUG(D_CACHE, "find: [%s]=%.*s\n", | |
91 | entry->xe_name, entry->xe_vallen, | |
92 | entry->xe_value); | |
93 | return 0; | |
94 | } | |
95 | } | |
96 | ||
97 | return -ENODATA; | |
98 | } | |
99 | ||
100 | /** | |
e93a3082 | 101 | * This adds an xattr. |
7fc1f831 AP |
102 | * |
103 | * Add @xattr_name attr with @xattr_val value and @xattr_val_len length, | |
7fc1f831 AP |
104 | * |
105 | * \retval 0 success | |
106 | * \retval -ENOMEM if no memory could be allocated for the cached attr | |
e93a3082 | 107 | * \retval -EPROTO if duplicate xattr is being added |
7fc1f831 AP |
108 | */ |
109 | static int ll_xattr_cache_add(struct list_head *cache, | |
110 | const char *xattr_name, | |
111 | const char *xattr_val, | |
112 | unsigned xattr_val_len) | |
113 | { | |
114 | struct ll_xattr_entry *xattr; | |
115 | ||
116 | ||
117 | ||
118 | if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { | |
e93a3082 AP |
119 | CDEBUG(D_CACHE, "duplicate xattr: [%s]\n", xattr_name); |
120 | return -EPROTO; | |
7fc1f831 AP |
121 | } |
122 | ||
0be19afa | 123 | OBD_SLAB_ALLOC_PTR_GFP(xattr, xattr_kmem, GFP_NOFS); |
7fc1f831 AP |
124 | if (xattr == NULL) { |
125 | CDEBUG(D_CACHE, "failed to allocate xattr\n"); | |
126 | return -ENOMEM; | |
127 | } | |
128 | ||
129 | xattr->xe_namelen = strlen(xattr_name) + 1; | |
130 | ||
496a51bd | 131 | xattr->xe_name = kzalloc(xattr->xe_namelen, GFP_NOFS); |
7fc1f831 AP |
132 | if (!xattr->xe_name) { |
133 | CDEBUG(D_CACHE, "failed to alloc xattr name %u\n", | |
134 | xattr->xe_namelen); | |
135 | goto err_name; | |
136 | } | |
496a51bd | 137 | xattr->xe_value = kzalloc(xattr_val_len, GFP_NOFS); |
7fc1f831 AP |
138 | if (!xattr->xe_value) { |
139 | CDEBUG(D_CACHE, "failed to alloc xattr value %d\n", | |
140 | xattr_val_len); | |
141 | goto err_value; | |
142 | } | |
143 | ||
144 | memcpy(xattr->xe_name, xattr_name, xattr->xe_namelen); | |
145 | memcpy(xattr->xe_value, xattr_val, xattr_val_len); | |
146 | xattr->xe_vallen = xattr_val_len; | |
147 | list_add(&xattr->xe_list, cache); | |
148 | ||
149 | CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name, | |
150 | xattr_val_len, xattr_val); | |
151 | ||
152 | return 0; | |
153 | err_value: | |
154 | OBD_FREE(xattr->xe_name, xattr->xe_namelen); | |
155 | err_name: | |
156 | OBD_SLAB_FREE_PTR(xattr, xattr_kmem); | |
157 | ||
158 | return -ENOMEM; | |
159 | } | |
160 | ||
161 | /** | |
162 | * This removes an extended attribute from cache. | |
163 | * | |
164 | * Remove @xattr_name attribute from @cache. | |
165 | * | |
166 | * \retval 0 success | |
167 | * \retval -ENODATA if @xattr_name is not cached | |
168 | */ | |
169 | static int ll_xattr_cache_del(struct list_head *cache, | |
170 | const char *xattr_name) | |
171 | { | |
172 | struct ll_xattr_entry *xattr; | |
173 | ||
174 | ||
175 | ||
176 | CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name); | |
177 | ||
178 | if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { | |
179 | list_del(&xattr->xe_list); | |
180 | OBD_FREE(xattr->xe_name, xattr->xe_namelen); | |
181 | OBD_FREE(xattr->xe_value, xattr->xe_vallen); | |
182 | OBD_SLAB_FREE_PTR(xattr, xattr_kmem); | |
183 | ||
184 | return 0; | |
185 | } | |
186 | ||
187 | return -ENODATA; | |
188 | } | |
189 | ||
190 | /** | |
191 | * This iterates cached extended attributes. | |
192 | * | |
193 | * Walk over cached attributes in @cache and | |
194 | * fill in @xld_buffer or only calculate buffer | |
195 | * size if @xld_buffer is NULL. | |
196 | * | |
197 | * \retval >= 0 buffer list size | |
198 | * \retval -ENODATA if the list cannot fit @xld_size buffer | |
199 | */ | |
200 | static int ll_xattr_cache_list(struct list_head *cache, | |
201 | char *xld_buffer, | |
202 | int xld_size) | |
203 | { | |
204 | struct ll_xattr_entry *xattr, *tmp; | |
205 | int xld_tail = 0; | |
206 | ||
207 | ||
208 | ||
209 | list_for_each_entry_safe(xattr, tmp, cache, xe_list) { | |
210 | CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n", | |
211 | xld_buffer, xld_tail, xattr->xe_name); | |
212 | ||
213 | if (xld_buffer) { | |
214 | xld_size -= xattr->xe_namelen; | |
215 | if (xld_size < 0) | |
216 | break; | |
217 | memcpy(&xld_buffer[xld_tail], | |
218 | xattr->xe_name, xattr->xe_namelen); | |
219 | } | |
220 | xld_tail += xattr->xe_namelen; | |
221 | } | |
222 | ||
223 | if (xld_size < 0) | |
224 | return -ERANGE; | |
225 | ||
226 | return xld_tail; | |
227 | } | |
228 | ||
229 | /** | |
230 | * Check if the xattr cache is initialized (filled). | |
231 | * | |
232 | * \retval 0 @cache is not initialized | |
233 | * \retval 1 @cache is initialized | |
234 | */ | |
2d95f10e | 235 | static int ll_xattr_cache_valid(struct ll_inode_info *lli) |
7fc1f831 AP |
236 | { |
237 | return !!(lli->lli_flags & LLIF_XATTR_CACHE); | |
238 | } | |
239 | ||
240 | /** | |
241 | * This finalizes the xattr cache. | |
242 | * | |
243 | * Free all xattr memory. @lli is the inode info pointer. | |
244 | * | |
d0a0acc3 | 245 | * \retval 0 no error occurred |
7fc1f831 AP |
246 | */ |
247 | static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) | |
248 | { | |
249 | ||
250 | ||
251 | if (!ll_xattr_cache_valid(lli)) | |
252 | return 0; | |
253 | ||
254 | while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0) | |
255 | ; /* empty loop */ | |
256 | lli->lli_flags &= ~LLIF_XATTR_CACHE; | |
257 | ||
258 | return 0; | |
259 | } | |
260 | ||
261 | int ll_xattr_cache_destroy(struct inode *inode) | |
262 | { | |
263 | struct ll_inode_info *lli = ll_i2info(inode); | |
264 | int rc; | |
265 | ||
266 | ||
267 | ||
268 | down_write(&lli->lli_xattrs_list_rwsem); | |
269 | rc = ll_xattr_cache_destroy_locked(lli); | |
270 | up_write(&lli->lli_xattrs_list_rwsem); | |
271 | ||
272 | return rc; | |
273 | } | |
274 | ||
275 | /** | |
e93a3082 | 276 | * Match or enqueue a PR lock. |
7fc1f831 AP |
277 | * |
278 | * Find or request an LDLM lock with xattr data. | |
279 | * Since LDLM does not provide API for atomic match_or_enqueue, | |
280 | * the function handles it with a separate enq lock. | |
281 | * If successful, the function exits with the list lock held. | |
282 | * | |
d0a0acc3 | 283 | * \retval 0 no error occurred |
7fc1f831 AP |
284 | * \retval -ENOMEM not enough memory |
285 | */ | |
286 | static int ll_xattr_find_get_lock(struct inode *inode, | |
287 | struct lookup_intent *oit, | |
288 | struct ptlrpc_request **req) | |
289 | { | |
290 | ldlm_mode_t mode; | |
291 | struct lustre_handle lockh = { 0 }; | |
292 | struct md_op_data *op_data; | |
293 | struct ll_inode_info *lli = ll_i2info(inode); | |
294 | struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS, | |
295 | .ei_mode = it_to_lock_mode(oit), | |
296 | .ei_cb_bl = ll_md_blocking_ast, | |
297 | .ei_cb_cp = ldlm_completion_ast }; | |
298 | struct ll_sb_info *sbi = ll_i2sbi(inode); | |
299 | struct obd_export *exp = sbi->ll_md_exp; | |
300 | int rc; | |
301 | ||
302 | ||
303 | ||
304 | mutex_lock(&lli->lli_xattrs_enq_lock); | |
305 | /* Try matching first. */ | |
e93a3082 | 306 | mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0, LCK_PR); |
7fc1f831 AP |
307 | if (mode != 0) { |
308 | /* fake oit in mdc_revalidate_lock() manner */ | |
309 | oit->d.lustre.it_lock_handle = lockh.cookie; | |
310 | oit->d.lustre.it_lock_mode = mode; | |
311 | goto out; | |
312 | } | |
313 | ||
314 | /* Enqueue if the lock isn't cached locally. */ | |
315 | op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, | |
316 | LUSTRE_OPC_ANY, NULL); | |
317 | if (IS_ERR(op_data)) { | |
318 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
319 | return PTR_ERR(op_data); | |
320 | } | |
321 | ||
e93a3082 | 322 | op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS; |
7fc1f831 AP |
323 | |
324 | rc = md_enqueue(exp, &einfo, oit, op_data, &lockh, NULL, 0, NULL, 0); | |
325 | ll_finish_md_op_data(op_data); | |
326 | ||
327 | if (rc < 0) { | |
328 | CDEBUG(D_CACHE, | |
329 | "md_intent_lock failed with %d for fid "DFID"\n", | |
330 | rc, PFID(ll_inode2fid(inode))); | |
331 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
332 | return rc; | |
333 | } | |
334 | ||
335 | *req = (struct ptlrpc_request *)oit->d.lustre.it_data; | |
336 | out: | |
337 | down_write(&lli->lli_xattrs_list_rwsem); | |
338 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
339 | ||
340 | return 0; | |
341 | } | |
342 | ||
343 | /** | |
344 | * Refill the xattr cache. | |
345 | * | |
346 | * Fetch and cache the whole of xattrs for @inode, acquiring | |
347 | * a read or a write xattr lock depending on operation in @oit. | |
348 | * Intent is dropped on exit unless the operation is setxattr. | |
349 | * | |
d0a0acc3 | 350 | * \retval 0 no error occurred |
7fc1f831 AP |
351 | * \retval -EPROTO network protocol error |
352 | * \retval -ENOMEM not enough memory for the cache | |
353 | */ | |
354 | static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit) | |
355 | { | |
356 | struct ll_sb_info *sbi = ll_i2sbi(inode); | |
357 | struct ptlrpc_request *req = NULL; | |
358 | const char *xdata, *xval, *xtail, *xvtail; | |
359 | struct ll_inode_info *lli = ll_i2info(inode); | |
360 | struct mdt_body *body; | |
361 | __u32 *xsizes; | |
362 | int rc = 0, i; | |
363 | ||
364 | ||
365 | ||
366 | rc = ll_xattr_find_get_lock(inode, oit, &req); | |
367 | if (rc) | |
34e1f2bb | 368 | goto out_no_unlock; |
7fc1f831 AP |
369 | |
370 | /* Do we have the data at this point? */ | |
371 | if (ll_xattr_cache_valid(lli)) { | |
372 | ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1); | |
34e1f2bb JL |
373 | rc = 0; |
374 | goto out_maybe_drop; | |
7fc1f831 AP |
375 | } |
376 | ||
377 | /* Matched but no cache? Cancelled on error by a parallel refill. */ | |
378 | if (unlikely(req == NULL)) { | |
379 | CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n"); | |
34e1f2bb JL |
380 | rc = -EIO; |
381 | goto out_maybe_drop; | |
7fc1f831 AP |
382 | } |
383 | ||
384 | if (oit->d.lustre.it_status < 0) { | |
385 | CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n", | |
386 | oit->d.lustre.it_status, PFID(ll_inode2fid(inode))); | |
e93a3082 AP |
387 | rc = oit->d.lustre.it_status; |
388 | /* xattr data is so large that we don't want to cache it */ | |
389 | if (rc == -ERANGE) | |
390 | rc = -EAGAIN; | |
34e1f2bb | 391 | goto out_destroy; |
7fc1f831 AP |
392 | } |
393 | ||
394 | body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); | |
395 | if (body == NULL) { | |
396 | CERROR("no MDT BODY in the refill xattr reply\n"); | |
34e1f2bb JL |
397 | rc = -EPROTO; |
398 | goto out_destroy; | |
7fc1f831 AP |
399 | } |
400 | /* do not need swab xattr data */ | |
401 | xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, | |
402 | body->eadatasize); | |
403 | xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS, | |
404 | body->aclsize); | |
405 | xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS, | |
406 | body->max_mdsize * sizeof(__u32)); | |
407 | if (xdata == NULL || xval == NULL || xsizes == NULL) { | |
408 | CERROR("wrong setxattr reply\n"); | |
34e1f2bb JL |
409 | rc = -EPROTO; |
410 | goto out_destroy; | |
7fc1f831 AP |
411 | } |
412 | ||
413 | xtail = xdata + body->eadatasize; | |
414 | xvtail = xval + body->aclsize; | |
415 | ||
416 | CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail); | |
417 | ||
418 | ll_xattr_cache_init(lli); | |
419 | ||
420 | for (i = 0; i < body->max_mdsize; i++) { | |
421 | CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval); | |
422 | /* Perform consistency checks: attr names and vals in pill */ | |
423 | if (memchr(xdata, 0, xtail - xdata) == NULL) { | |
424 | CERROR("xattr protocol violation (names are broken)\n"); | |
425 | rc = -EPROTO; | |
426 | } else if (xval + *xsizes > xvtail) { | |
427 | CERROR("xattr protocol violation (vals are broken)\n"); | |
428 | rc = -EPROTO; | |
429 | } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) { | |
430 | rc = -ENOMEM; | |
e93a3082 AP |
431 | } else if (!strcmp(xdata, XATTR_NAME_ACL_ACCESS)) { |
432 | /* Filter out ACL ACCESS since it's cached separately */ | |
433 | CDEBUG(D_CACHE, "not caching %s\n", | |
434 | XATTR_NAME_ACL_ACCESS); | |
435 | rc = 0; | |
7fc1f831 AP |
436 | } else { |
437 | rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval, | |
438 | *xsizes); | |
439 | } | |
440 | if (rc < 0) { | |
441 | ll_xattr_cache_destroy_locked(lli); | |
34e1f2bb | 442 | goto out_destroy; |
7fc1f831 AP |
443 | } |
444 | xdata += strlen(xdata) + 1; | |
445 | xval += *xsizes; | |
446 | xsizes++; | |
447 | } | |
448 | ||
449 | if (xdata != xtail || xval != xvtail) | |
450 | CERROR("a hole in xattr data\n"); | |
451 | ||
452 | ll_set_lock_data(sbi->ll_md_exp, inode, oit, NULL); | |
453 | ||
34e1f2bb | 454 | goto out_maybe_drop; |
7fc1f831 | 455 | out_maybe_drop: |
e93a3082 | 456 | |
7fc1f831 AP |
457 | ll_intent_drop_lock(oit); |
458 | ||
459 | if (rc != 0) | |
460 | up_write(&lli->lli_xattrs_list_rwsem); | |
461 | out_no_unlock: | |
462 | ptlrpc_req_finished(req); | |
463 | ||
464 | return rc; | |
465 | ||
466 | out_destroy: | |
467 | up_write(&lli->lli_xattrs_list_rwsem); | |
468 | ||
469 | ldlm_lock_decref_and_cancel((struct lustre_handle *) | |
470 | &oit->d.lustre.it_lock_handle, | |
471 | oit->d.lustre.it_lock_mode); | |
472 | ||
473 | goto out_no_unlock; | |
474 | } | |
475 | ||
476 | /** | |
477 | * Get an xattr value or list xattrs using the write-through cache. | |
478 | * | |
479 | * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or | |
480 | * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode. | |
481 | * The resulting value/list is stored in @buffer if the former | |
482 | * is not larger than @size. | |
483 | * | |
d0a0acc3 | 484 | * \retval 0 no error occurred |
7fc1f831 AP |
485 | * \retval -EPROTO network protocol error |
486 | * \retval -ENOMEM not enough memory for the cache | |
487 | * \retval -ERANGE the buffer is not large enough | |
488 | * \retval -ENODATA no such attr or the list is empty | |
489 | */ | |
490 | int ll_xattr_cache_get(struct inode *inode, | |
491 | const char *name, | |
492 | char *buffer, | |
493 | size_t size, | |
494 | __u64 valid) | |
495 | { | |
496 | struct lookup_intent oit = { .it_op = IT_GETXATTR }; | |
497 | struct ll_inode_info *lli = ll_i2info(inode); | |
498 | int rc = 0; | |
499 | ||
500 | ||
501 | ||
502 | LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS)); | |
503 | ||
504 | down_read(&lli->lli_xattrs_list_rwsem); | |
505 | if (!ll_xattr_cache_valid(lli)) { | |
506 | up_read(&lli->lli_xattrs_list_rwsem); | |
507 | rc = ll_xattr_cache_refill(inode, &oit); | |
508 | if (rc) | |
509 | return rc; | |
510 | downgrade_write(&lli->lli_xattrs_list_rwsem); | |
511 | } else { | |
512 | ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1); | |
513 | } | |
514 | ||
515 | if (valid & OBD_MD_FLXATTR) { | |
516 | struct ll_xattr_entry *xattr; | |
517 | ||
518 | rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr); | |
519 | if (rc == 0) { | |
520 | rc = xattr->xe_vallen; | |
521 | /* zero size means we are only requested size in rc */ | |
522 | if (size != 0) { | |
523 | if (size >= xattr->xe_vallen) | |
524 | memcpy(buffer, xattr->xe_value, | |
525 | xattr->xe_vallen); | |
526 | else | |
527 | rc = -ERANGE; | |
528 | } | |
529 | } | |
530 | } else if (valid & OBD_MD_FLXATTRLS) { | |
531 | rc = ll_xattr_cache_list(&lli->lli_xattrs, | |
532 | size ? buffer : NULL, size); | |
533 | } | |
534 | ||
34e1f2bb | 535 | goto out; |
7fc1f831 AP |
536 | out: |
537 | up_read(&lli->lli_xattrs_list_rwsem); | |
538 | ||
539 | return rc; | |
540 | } |