]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - fs/ceph/xattr.c
ceph: add acl for cephfs
[mirror_ubuntu-bionic-kernel.git] / fs / ceph / xattr.c
CommitLineData
3d14c5d2
YS
1#include <linux/ceph/ceph_debug.h>
2
355da1eb 3#include "super.h"
3d14c5d2
YS
4#include "mds_client.h"
5
6#include <linux/ceph/decode.h>
355da1eb
SW
7
8#include <linux/xattr.h>
5a0e3ad6 9#include <linux/slab.h>
355da1eb 10
22891907
AE
11#define XATTR_CEPH_PREFIX "ceph."
12#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
13
7221fe4c
GZ
14/*
15 * List of handlers for synthetic system.* attributes. Other
16 * attributes are handled directly.
17 */
18const struct xattr_handler *ceph_xattr_handlers[] = {
19#ifdef CONFIG_CEPH_FS_POSIX_ACL
20 &ceph_xattr_acl_access_handler,
21 &ceph_xattr_acl_default_handler,
22#endif
23 NULL,
24};
25
355da1eb
SW
26static bool ceph_is_valid_xattr(const char *name)
27{
22891907 28 return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
1a756278 29 !strncmp(name, XATTR_SECURITY_PREFIX,
355da1eb 30 XATTR_SECURITY_PREFIX_LEN) ||
7221fe4c 31 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
355da1eb
SW
32 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
33 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
34}
35
36/*
37 * These define virtual xattrs exposing the recursive directory
38 * statistics and layout metadata.
39 */
881a5fa2 40struct ceph_vxattr {
355da1eb 41 char *name;
3ce6cd12 42 size_t name_size; /* strlen(name) + 1 (for '\0') */
355da1eb
SW
43 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
44 size_t size);
8860147a 45 bool readonly, hidden;
f36e4472 46 bool (*exists_cb)(struct ceph_inode_info *ci);
355da1eb
SW
47};
48
32ab0bd7
SW
49/* layouts */
50
51static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
52{
53 size_t s;
54 char *p = (char *)&ci->i_layout;
55
56 for (s = 0; s < sizeof(ci->i_layout); s++, p++)
57 if (*p)
58 return true;
59 return false;
60}
61
62static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
63 size_t size)
64{
65 int ret;
66 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
67 struct ceph_osd_client *osdc = &fsc->client->osdc;
68 s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
69 const char *pool_name;
70
71 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
72 down_read(&osdc->map_sem);
73 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
74 if (pool_name)
75 ret = snprintf(val, size,
76 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
77 (unsigned long long)ceph_file_layout_su(ci->i_layout),
78 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
79 (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
80 pool_name);
81 else
82 ret = snprintf(val, size,
83 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
84 (unsigned long long)ceph_file_layout_su(ci->i_layout),
85 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
86 (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
87 (unsigned long long)pool);
88
89 up_read(&osdc->map_sem);
90 return ret;
91}
92
695b7119
SW
93static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
94 char *val, size_t size)
95{
96 return snprintf(val, size, "%lld",
97 (unsigned long long)ceph_file_layout_su(ci->i_layout));
98}
99
100static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
101 char *val, size_t size)
102{
103 return snprintf(val, size, "%lld",
104 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
105}
106
107static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
108 char *val, size_t size)
109{
110 return snprintf(val, size, "%lld",
111 (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
112}
113
114static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
115 char *val, size_t size)
116{
117 int ret;
118 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
119 struct ceph_osd_client *osdc = &fsc->client->osdc;
120 s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
121 const char *pool_name;
122
123 down_read(&osdc->map_sem);
124 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
125 if (pool_name)
126 ret = snprintf(val, size, "%s", pool_name);
127 else
128 ret = snprintf(val, size, "%lld", (unsigned long long)pool);
129 up_read(&osdc->map_sem);
130 return ret;
131}
132
355da1eb
SW
133/* directories */
134
aa4066ed 135static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
355da1eb
SW
136 size_t size)
137{
138 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
139}
140
aa4066ed 141static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
355da1eb
SW
142 size_t size)
143{
144 return snprintf(val, size, "%lld", ci->i_files);
145}
146
aa4066ed 147static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
355da1eb
SW
148 size_t size)
149{
150 return snprintf(val, size, "%lld", ci->i_subdirs);
151}
152
aa4066ed 153static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
355da1eb
SW
154 size_t size)
155{
156 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
157}
158
aa4066ed 159static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
355da1eb
SW
160 size_t size)
161{
162 return snprintf(val, size, "%lld", ci->i_rfiles);
163}
164
aa4066ed 165static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
355da1eb
SW
166 size_t size)
167{
168 return snprintf(val, size, "%lld", ci->i_rsubdirs);
169}
170
aa4066ed 171static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
355da1eb
SW
172 size_t size)
173{
174 return snprintf(val, size, "%lld", ci->i_rbytes);
175}
176
aa4066ed 177static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
355da1eb
SW
178 size_t size)
179{
3489b42a 180 return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
355da1eb
SW
181 (long)ci->i_rctime.tv_nsec);
182}
183
32ab0bd7 184
eb788084 185#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
695b7119
SW
186#define CEPH_XATTR_NAME2(_type, _name, _name2) \
187 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
eb788084 188
8860147a
SW
189#define XATTR_NAME_CEPH(_type, _name) \
190 { \
191 .name = CEPH_XATTR_NAME(_type, _name), \
192 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
193 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
194 .readonly = true, \
195 .hidden = false, \
f36e4472 196 .exists_cb = NULL, \
8860147a 197 }
695b7119
SW
198#define XATTR_LAYOUT_FIELD(_type, _name, _field) \
199 { \
200 .name = CEPH_XATTR_NAME2(_type, _name, _field), \
201 .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
202 .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
203 .readonly = false, \
204 .hidden = true, \
205 .exists_cb = ceph_vxattrcb_layout_exists, \
206 }
eb788084 207
881a5fa2 208static struct ceph_vxattr ceph_dir_vxattrs[] = {
1f08f2b0
SW
209 {
210 .name = "ceph.dir.layout",
211 .name_size = sizeof("ceph.dir.layout"),
212 .getxattr_cb = ceph_vxattrcb_layout,
213 .readonly = false,
214 .hidden = false,
215 .exists_cb = ceph_vxattrcb_layout_exists,
216 },
695b7119
SW
217 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
218 XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
219 XATTR_LAYOUT_FIELD(dir, layout, object_size),
220 XATTR_LAYOUT_FIELD(dir, layout, pool),
eb788084
AE
221 XATTR_NAME_CEPH(dir, entries),
222 XATTR_NAME_CEPH(dir, files),
223 XATTR_NAME_CEPH(dir, subdirs),
224 XATTR_NAME_CEPH(dir, rentries),
225 XATTR_NAME_CEPH(dir, rfiles),
226 XATTR_NAME_CEPH(dir, rsubdirs),
227 XATTR_NAME_CEPH(dir, rbytes),
228 XATTR_NAME_CEPH(dir, rctime),
2c3dd4ff 229 { .name = NULL, 0 } /* Required table terminator */
355da1eb 230};
3ce6cd12 231static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
355da1eb
SW
232
233/* files */
234
881a5fa2 235static struct ceph_vxattr ceph_file_vxattrs[] = {
32ab0bd7
SW
236 {
237 .name = "ceph.file.layout",
238 .name_size = sizeof("ceph.file.layout"),
239 .getxattr_cb = ceph_vxattrcb_layout,
240 .readonly = false,
241 .hidden = false,
242 .exists_cb = ceph_vxattrcb_layout_exists,
243 },
695b7119
SW
244 XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
245 XATTR_LAYOUT_FIELD(file, layout, stripe_count),
246 XATTR_LAYOUT_FIELD(file, layout, object_size),
247 XATTR_LAYOUT_FIELD(file, layout, pool),
2c3dd4ff 248 { .name = NULL, 0 } /* Required table terminator */
355da1eb 249};
3ce6cd12 250static size_t ceph_file_vxattrs_name_size; /* total size of all names */
355da1eb 251
881a5fa2 252static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
355da1eb
SW
253{
254 if (S_ISDIR(inode->i_mode))
255 return ceph_dir_vxattrs;
256 else if (S_ISREG(inode->i_mode))
257 return ceph_file_vxattrs;
258 return NULL;
259}
260
3ce6cd12
AE
261static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
262{
263 if (vxattrs == ceph_dir_vxattrs)
264 return ceph_dir_vxattrs_name_size;
265 if (vxattrs == ceph_file_vxattrs)
266 return ceph_file_vxattrs_name_size;
267 BUG();
268
269 return 0;
270}
271
272/*
273 * Compute the aggregate size (including terminating '\0') of all
274 * virtual extended attribute names in the given vxattr table.
275 */
276static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
277{
278 struct ceph_vxattr *vxattr;
279 size_t size = 0;
280
281 for (vxattr = vxattrs; vxattr->name; vxattr++)
8860147a
SW
282 if (!vxattr->hidden)
283 size += vxattr->name_size;
3ce6cd12
AE
284
285 return size;
286}
287
288/* Routines called at initialization and exit time */
289
290void __init ceph_xattr_init(void)
291{
292 ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
293 ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
294}
295
296void ceph_xattr_exit(void)
297{
298 ceph_dir_vxattrs_name_size = 0;
299 ceph_file_vxattrs_name_size = 0;
300}
301
881a5fa2 302static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
355da1eb
SW
303 const char *name)
304{
881a5fa2 305 struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
06476a69
AE
306
307 if (vxattr) {
308 while (vxattr->name) {
309 if (!strcmp(vxattr->name, name))
310 return vxattr;
311 vxattr++;
312 }
313 }
314
355da1eb
SW
315 return NULL;
316}
317
318static int __set_xattr(struct ceph_inode_info *ci,
319 const char *name, int name_len,
320 const char *val, int val_len,
321 int dirty,
322 int should_free_name, int should_free_val,
323 struct ceph_inode_xattr **newxattr)
324{
325 struct rb_node **p;
326 struct rb_node *parent = NULL;
327 struct ceph_inode_xattr *xattr = NULL;
328 int c;
329 int new = 0;
330
331 p = &ci->i_xattrs.index.rb_node;
332 while (*p) {
333 parent = *p;
334 xattr = rb_entry(parent, struct ceph_inode_xattr, node);
335 c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
336 if (c < 0)
337 p = &(*p)->rb_left;
338 else if (c > 0)
339 p = &(*p)->rb_right;
340 else {
341 if (name_len == xattr->name_len)
342 break;
343 else if (name_len < xattr->name_len)
344 p = &(*p)->rb_left;
345 else
346 p = &(*p)->rb_right;
347 }
348 xattr = NULL;
349 }
350
351 if (!xattr) {
352 new = 1;
353 xattr = *newxattr;
354 xattr->name = name;
355 xattr->name_len = name_len;
356 xattr->should_free_name = should_free_name;
357
358 ci->i_xattrs.count++;
359 dout("__set_xattr count=%d\n", ci->i_xattrs.count);
360 } else {
361 kfree(*newxattr);
362 *newxattr = NULL;
363 if (xattr->should_free_val)
364 kfree((void *)xattr->val);
365
366 if (should_free_name) {
367 kfree((void *)name);
368 name = xattr->name;
369 }
370 ci->i_xattrs.names_size -= xattr->name_len;
371 ci->i_xattrs.vals_size -= xattr->val_len;
372 }
355da1eb
SW
373 ci->i_xattrs.names_size += name_len;
374 ci->i_xattrs.vals_size += val_len;
375 if (val)
376 xattr->val = val;
377 else
378 xattr->val = "";
379
380 xattr->val_len = val_len;
381 xattr->dirty = dirty;
382 xattr->should_free_val = (val && should_free_val);
383
384 if (new) {
385 rb_link_node(&xattr->node, parent, p);
386 rb_insert_color(&xattr->node, &ci->i_xattrs.index);
387 dout("__set_xattr_val p=%p\n", p);
388 }
389
390 dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
391 ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
392
393 return 0;
394}
395
396static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
397 const char *name)
398{
399 struct rb_node **p;
400 struct rb_node *parent = NULL;
401 struct ceph_inode_xattr *xattr = NULL;
17db143f 402 int name_len = strlen(name);
355da1eb
SW
403 int c;
404
405 p = &ci->i_xattrs.index.rb_node;
406 while (*p) {
407 parent = *p;
408 xattr = rb_entry(parent, struct ceph_inode_xattr, node);
409 c = strncmp(name, xattr->name, xattr->name_len);
17db143f
SW
410 if (c == 0 && name_len > xattr->name_len)
411 c = 1;
355da1eb
SW
412 if (c < 0)
413 p = &(*p)->rb_left;
414 else if (c > 0)
415 p = &(*p)->rb_right;
416 else {
417 dout("__get_xattr %s: found %.*s\n", name,
418 xattr->val_len, xattr->val);
419 return xattr;
420 }
421 }
422
423 dout("__get_xattr %s: not found\n", name);
424
425 return NULL;
426}
427
428static void __free_xattr(struct ceph_inode_xattr *xattr)
429{
430 BUG_ON(!xattr);
431
432 if (xattr->should_free_name)
433 kfree((void *)xattr->name);
434 if (xattr->should_free_val)
435 kfree((void *)xattr->val);
436
437 kfree(xattr);
438}
439
440static int __remove_xattr(struct ceph_inode_info *ci,
441 struct ceph_inode_xattr *xattr)
442{
443 if (!xattr)
444 return -EOPNOTSUPP;
445
446 rb_erase(&xattr->node, &ci->i_xattrs.index);
447
448 if (xattr->should_free_name)
449 kfree((void *)xattr->name);
450 if (xattr->should_free_val)
451 kfree((void *)xattr->val);
452
453 ci->i_xattrs.names_size -= xattr->name_len;
454 ci->i_xattrs.vals_size -= xattr->val_len;
455 ci->i_xattrs.count--;
456 kfree(xattr);
457
458 return 0;
459}
460
461static int __remove_xattr_by_name(struct ceph_inode_info *ci,
462 const char *name)
463{
464 struct rb_node **p;
465 struct ceph_inode_xattr *xattr;
466 int err;
467
468 p = &ci->i_xattrs.index.rb_node;
469 xattr = __get_xattr(ci, name);
470 err = __remove_xattr(ci, xattr);
471 return err;
472}
473
474static char *__copy_xattr_names(struct ceph_inode_info *ci,
475 char *dest)
476{
477 struct rb_node *p;
478 struct ceph_inode_xattr *xattr = NULL;
479
480 p = rb_first(&ci->i_xattrs.index);
481 dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
482
483 while (p) {
484 xattr = rb_entry(p, struct ceph_inode_xattr, node);
485 memcpy(dest, xattr->name, xattr->name_len);
486 dest[xattr->name_len] = '\0';
487
488 dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
489 xattr->name_len, ci->i_xattrs.names_size);
490
491 dest += xattr->name_len + 1;
492 p = rb_next(p);
493 }
494
495 return dest;
496}
497
498void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
499{
500 struct rb_node *p, *tmp;
501 struct ceph_inode_xattr *xattr = NULL;
502
503 p = rb_first(&ci->i_xattrs.index);
504
505 dout("__ceph_destroy_xattrs p=%p\n", p);
506
507 while (p) {
508 xattr = rb_entry(p, struct ceph_inode_xattr, node);
509 tmp = p;
510 p = rb_next(tmp);
511 dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
512 xattr->name_len, xattr->name);
513 rb_erase(tmp, &ci->i_xattrs.index);
514
515 __free_xattr(xattr);
516 }
517
518 ci->i_xattrs.names_size = 0;
519 ci->i_xattrs.vals_size = 0;
520 ci->i_xattrs.index_version = 0;
521 ci->i_xattrs.count = 0;
522 ci->i_xattrs.index = RB_ROOT;
523}
524
525static int __build_xattrs(struct inode *inode)
be655596
SW
526 __releases(ci->i_ceph_lock)
527 __acquires(ci->i_ceph_lock)
355da1eb
SW
528{
529 u32 namelen;
530 u32 numattr = 0;
531 void *p, *end;
532 u32 len;
533 const char *name, *val;
534 struct ceph_inode_info *ci = ceph_inode(inode);
535 int xattr_version;
536 struct ceph_inode_xattr **xattrs = NULL;
63ff78b2 537 int err = 0;
355da1eb
SW
538 int i;
539
540 dout("__build_xattrs() len=%d\n",
541 ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
542
543 if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
544 return 0; /* already built */
545
546 __ceph_destroy_xattrs(ci);
547
548start:
549 /* updated internal xattr rb tree */
550 if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
551 p = ci->i_xattrs.blob->vec.iov_base;
552 end = p + ci->i_xattrs.blob->vec.iov_len;
553 ceph_decode_32_safe(&p, end, numattr, bad);
554 xattr_version = ci->i_xattrs.version;
be655596 555 spin_unlock(&ci->i_ceph_lock);
355da1eb
SW
556
557 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
558 GFP_NOFS);
559 err = -ENOMEM;
560 if (!xattrs)
561 goto bad_lock;
562 memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
563 for (i = 0; i < numattr; i++) {
564 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
565 GFP_NOFS);
566 if (!xattrs[i])
567 goto bad_lock;
568 }
569
be655596 570 spin_lock(&ci->i_ceph_lock);
355da1eb
SW
571 if (ci->i_xattrs.version != xattr_version) {
572 /* lost a race, retry */
573 for (i = 0; i < numattr; i++)
574 kfree(xattrs[i]);
575 kfree(xattrs);
21ec6ffa 576 xattrs = NULL;
355da1eb
SW
577 goto start;
578 }
579 err = -EIO;
580 while (numattr--) {
581 ceph_decode_32_safe(&p, end, len, bad);
582 namelen = len;
583 name = p;
584 p += len;
585 ceph_decode_32_safe(&p, end, len, bad);
586 val = p;
587 p += len;
588
589 err = __set_xattr(ci, name, namelen, val, len,
590 0, 0, 0, &xattrs[numattr]);
591
592 if (err < 0)
593 goto bad;
594 }
595 kfree(xattrs);
596 }
597 ci->i_xattrs.index_version = ci->i_xattrs.version;
598 ci->i_xattrs.dirty = false;
599
600 return err;
601bad_lock:
be655596 602 spin_lock(&ci->i_ceph_lock);
355da1eb
SW
603bad:
604 if (xattrs) {
605 for (i = 0; i < numattr; i++)
606 kfree(xattrs[i]);
607 kfree(xattrs);
608 }
609 ci->i_xattrs.names_size = 0;
610 return err;
611}
612
613static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
614 int val_size)
615{
616 /*
617 * 4 bytes for the length, and additional 4 bytes per each xattr name,
618 * 4 bytes per each value
619 */
620 int size = 4 + ci->i_xattrs.count*(4 + 4) +
621 ci->i_xattrs.names_size +
622 ci->i_xattrs.vals_size;
623 dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
624 ci->i_xattrs.count, ci->i_xattrs.names_size,
625 ci->i_xattrs.vals_size);
626
627 if (name_size)
628 size += 4 + 4 + name_size + val_size;
629
630 return size;
631}
632
633/*
634 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
635 * and swap into place.
636 */
637void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
638{
639 struct rb_node *p;
640 struct ceph_inode_xattr *xattr = NULL;
641 void *dest;
642
643 dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
644 if (ci->i_xattrs.dirty) {
645 int need = __get_required_blob_size(ci, 0, 0);
646
647 BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
648
649 p = rb_first(&ci->i_xattrs.index);
650 dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
651
652 ceph_encode_32(&dest, ci->i_xattrs.count);
653 while (p) {
654 xattr = rb_entry(p, struct ceph_inode_xattr, node);
655
656 ceph_encode_32(&dest, xattr->name_len);
657 memcpy(dest, xattr->name, xattr->name_len);
658 dest += xattr->name_len;
659 ceph_encode_32(&dest, xattr->val_len);
660 memcpy(dest, xattr->val, xattr->val_len);
661 dest += xattr->val_len;
662
663 p = rb_next(p);
664 }
665
666 /* adjust buffer len; it may be larger than we need */
667 ci->i_xattrs.prealloc_blob->vec.iov_len =
668 dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
669
b6c1d5b8
SW
670 if (ci->i_xattrs.blob)
671 ceph_buffer_put(ci->i_xattrs.blob);
355da1eb
SW
672 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
673 ci->i_xattrs.prealloc_blob = NULL;
674 ci->i_xattrs.dirty = false;
4a625be4 675 ci->i_xattrs.version++;
355da1eb
SW
676 }
677}
678
7221fe4c 679ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
355da1eb
SW
680 size_t size)
681{
355da1eb 682 struct ceph_inode_info *ci = ceph_inode(inode);
355da1eb
SW
683 int err;
684 struct ceph_inode_xattr *xattr;
881a5fa2 685 struct ceph_vxattr *vxattr = NULL;
355da1eb
SW
686
687 if (!ceph_is_valid_xattr(name))
688 return -ENODATA;
689
0bee82fb
SW
690 /* let's see if a virtual xattr was requested */
691 vxattr = ceph_match_vxattr(inode, name);
692 if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
693 err = vxattr->getxattr_cb(ci, value, size);
a1dc1937 694 return err;
0bee82fb
SW
695 }
696
a1dc1937 697 spin_lock(&ci->i_ceph_lock);
698 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
699 ci->i_xattrs.version, ci->i_xattrs.index_version);
700
355da1eb
SW
701 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
702 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
703 goto get_xattr;
704 } else {
be655596 705 spin_unlock(&ci->i_ceph_lock);
355da1eb
SW
706 /* get xattrs from mds (if we don't already have them) */
707 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
708 if (err)
709 return err;
710 }
711
be655596 712 spin_lock(&ci->i_ceph_lock);
355da1eb 713
355da1eb
SW
714 err = __build_xattrs(inode);
715 if (err < 0)
716 goto out;
717
718get_xattr:
719 err = -ENODATA; /* == ENOATTR */
720 xattr = __get_xattr(ci, name);
0bee82fb 721 if (!xattr)
355da1eb 722 goto out;
355da1eb
SW
723
724 err = -ERANGE;
725 if (size && size < xattr->val_len)
726 goto out;
727
728 err = xattr->val_len;
729 if (size == 0)
730 goto out;
731
732 memcpy(value, xattr->val, xattr->val_len);
733
734out:
be655596 735 spin_unlock(&ci->i_ceph_lock);
355da1eb
SW
736 return err;
737}
738
7221fe4c
GZ
739ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
740 size_t size)
741{
742 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
743 return generic_getxattr(dentry, name, value, size);
744
745 return __ceph_getxattr(dentry->d_inode, name, value, size);
746}
747
355da1eb
SW
748ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
749{
750 struct inode *inode = dentry->d_inode;
751 struct ceph_inode_info *ci = ceph_inode(inode);
881a5fa2 752 struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
355da1eb
SW
753 u32 vir_namelen = 0;
754 u32 namelen;
755 int err;
756 u32 len;
757 int i;
758
be655596 759 spin_lock(&ci->i_ceph_lock);
355da1eb
SW
760 dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
761 ci->i_xattrs.version, ci->i_xattrs.index_version);
762
763 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
bddfa3cc 764 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
355da1eb
SW
765 goto list_xattr;
766 } else {
be655596 767 spin_unlock(&ci->i_ceph_lock);
355da1eb
SW
768 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
769 if (err)
770 return err;
771 }
772
be655596 773 spin_lock(&ci->i_ceph_lock);
355da1eb
SW
774
775 err = __build_xattrs(inode);
776 if (err < 0)
777 goto out;
778
779list_xattr:
3ce6cd12
AE
780 /*
781 * Start with virtual dir xattr names (if any) (including
782 * terminating '\0' characters for each).
783 */
784 vir_namelen = ceph_vxattrs_name_size(vxattrs);
785
355da1eb 786 /* adding 1 byte per each variable due to the null termination */
b65917dd 787 namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
355da1eb 788 err = -ERANGE;
b65917dd 789 if (size && vir_namelen + namelen > size)
355da1eb
SW
790 goto out;
791
b65917dd 792 err = namelen + vir_namelen;
355da1eb
SW
793 if (size == 0)
794 goto out;
795
796 names = __copy_xattr_names(ci, names);
797
798 /* virtual xattr names, too */
b65917dd
SW
799 err = namelen;
800 if (vxattrs) {
355da1eb 801 for (i = 0; vxattrs[i].name; i++) {
b65917dd
SW
802 if (!vxattrs[i].hidden &&
803 !(vxattrs[i].exists_cb &&
804 !vxattrs[i].exists_cb(ci))) {
805 len = sprintf(names, "%s", vxattrs[i].name);
806 names += len + 1;
807 err += len + 1;
808 }
355da1eb 809 }
b65917dd 810 }
355da1eb
SW
811
812out:
be655596 813 spin_unlock(&ci->i_ceph_lock);
355da1eb
SW
814 return err;
815}
816
817static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
818 const char *value, size_t size, int flags)
819{
3d14c5d2 820 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
355da1eb
SW
821 struct inode *inode = dentry->d_inode;
822 struct ceph_inode_info *ci = ceph_inode(inode);
5f21c96d 823 struct inode *parent_inode;
355da1eb 824 struct ceph_mds_request *req;
3d14c5d2 825 struct ceph_mds_client *mdsc = fsc->mdsc;
355da1eb
SW
826 int err;
827 int i, nr_pages;
828 struct page **pages = NULL;
829 void *kaddr;
830
831 /* copy value into some pages */
832 nr_pages = calc_pages_for(0, size);
833 if (nr_pages) {
834 pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
835 if (!pages)
836 return -ENOMEM;
837 err = -ENOMEM;
838 for (i = 0; i < nr_pages; i++) {
31459fe4 839 pages[i] = __page_cache_alloc(GFP_NOFS);
355da1eb
SW
840 if (!pages[i]) {
841 nr_pages = i;
842 goto out;
843 }
844 kaddr = kmap(pages[i]);
845 memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
846 min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
847 }
848 }
849
850 dout("setxattr value=%.*s\n", (int)size, value);
851
852 /* do request */
853 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
854 USE_AUTH_MDS);
60d87733
JL
855 if (IS_ERR(req)) {
856 err = PTR_ERR(req);
857 goto out;
858 }
70b666c3
SW
859 req->r_inode = inode;
860 ihold(inode);
355da1eb
SW
861 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
862 req->r_num_caps = 1;
863 req->r_args.setxattr.flags = cpu_to_le32(flags);
864 req->r_path2 = kstrdup(name, GFP_NOFS);
865
866 req->r_pages = pages;
867 req->r_num_pages = nr_pages;
868 req->r_data_len = size;
869
870 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
5f21c96d 871 parent_inode = ceph_get_dentry_parent_inode(dentry);
355da1eb 872 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
5f21c96d 873 iput(parent_inode);
355da1eb
SW
874 ceph_mdsc_put_request(req);
875 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
876
877out:
878 if (pages) {
879 for (i = 0; i < nr_pages; i++)
880 __free_page(pages[i]);
881 kfree(pages);
882 }
883 return err;
884}
885
7221fe4c
GZ
886int __ceph_setxattr(struct dentry *dentry, const char *name,
887 const void *value, size_t size, int flags)
355da1eb
SW
888{
889 struct inode *inode = dentry->d_inode;
881a5fa2 890 struct ceph_vxattr *vxattr;
355da1eb 891 struct ceph_inode_info *ci = ceph_inode(inode);
18fa8b3f 892 int issued;
355da1eb 893 int err;
18fa8b3f 894 int dirty;
355da1eb
SW
895 int name_len = strlen(name);
896 int val_len = size;
897 char *newname = NULL;
898 char *newval = NULL;
899 struct ceph_inode_xattr *xattr = NULL;
355da1eb
SW
900 int required_blob_size;
901
355da1eb
SW
902 if (!ceph_is_valid_xattr(name))
903 return -EOPNOTSUPP;
904
06476a69
AE
905 vxattr = ceph_match_vxattr(inode, name);
906 if (vxattr && vxattr->readonly)
907 return -EOPNOTSUPP;
355da1eb 908
3adf654d
SW
909 /* pass any unhandled ceph.* xattrs through to the MDS */
910 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
911 goto do_sync_unlocked;
912
355da1eb
SW
913 /* preallocate memory for xattr name, value, index node */
914 err = -ENOMEM;
61413c2f 915 newname = kmemdup(name, name_len + 1, GFP_NOFS);
355da1eb
SW
916 if (!newname)
917 goto out;
355da1eb
SW
918
919 if (val_len) {
b829c195 920 newval = kmemdup(value, val_len, GFP_NOFS);
355da1eb
SW
921 if (!newval)
922 goto out;
355da1eb
SW
923 }
924
925 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
926 if (!xattr)
927 goto out;
928
be655596 929 spin_lock(&ci->i_ceph_lock);
355da1eb
SW
930retry:
931 issued = __ceph_caps_issued(ci, NULL);
18fa8b3f 932 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
355da1eb
SW
933 if (!(issued & CEPH_CAP_XATTR_EXCL))
934 goto do_sync;
935 __build_xattrs(inode);
936
937 required_blob_size = __get_required_blob_size(ci, name_len, val_len);
938
939 if (!ci->i_xattrs.prealloc_blob ||
940 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
18fa8b3f 941 struct ceph_buffer *blob;
355da1eb 942
be655596 943 spin_unlock(&ci->i_ceph_lock);
355da1eb 944 dout(" preaallocating new blob size=%d\n", required_blob_size);
b6c1d5b8 945 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
355da1eb
SW
946 if (!blob)
947 goto out;
be655596 948 spin_lock(&ci->i_ceph_lock);
b6c1d5b8
SW
949 if (ci->i_xattrs.prealloc_blob)
950 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
355da1eb
SW
951 ci->i_xattrs.prealloc_blob = blob;
952 goto retry;
953 }
954
355da1eb
SW
955 err = __set_xattr(ci, newname, name_len, newval,
956 val_len, 1, 1, 1, &xattr);
18fa8b3f 957
fca65b4a 958 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
355da1eb
SW
959 ci->i_xattrs.dirty = true;
960 inode->i_ctime = CURRENT_TIME;
18fa8b3f 961
be655596 962 spin_unlock(&ci->i_ceph_lock);
fca65b4a
SW
963 if (dirty)
964 __mark_inode_dirty(inode, dirty);
355da1eb
SW
965 return err;
966
967do_sync:
be655596 968 spin_unlock(&ci->i_ceph_lock);
3adf654d 969do_sync_unlocked:
355da1eb
SW
970 err = ceph_sync_setxattr(dentry, name, value, size, flags);
971out:
972 kfree(newname);
973 kfree(newval);
974 kfree(xattr);
975 return err;
976}
977
7221fe4c
GZ
978int ceph_setxattr(struct dentry *dentry, const char *name,
979 const void *value, size_t size, int flags)
980{
981 if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
982 return -EROFS;
983
984 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
985 return generic_setxattr(dentry, name, value, size, flags);
986
987 return __ceph_setxattr(dentry, name, value, size, flags);
988}
989
355da1eb
SW
990static int ceph_send_removexattr(struct dentry *dentry, const char *name)
991{
3d14c5d2
YS
992 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
993 struct ceph_mds_client *mdsc = fsc->mdsc;
355da1eb 994 struct inode *inode = dentry->d_inode;
5f21c96d 995 struct inode *parent_inode;
355da1eb
SW
996 struct ceph_mds_request *req;
997 int err;
998
999 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
1000 USE_AUTH_MDS);
1001 if (IS_ERR(req))
1002 return PTR_ERR(req);
70b666c3
SW
1003 req->r_inode = inode;
1004 ihold(inode);
355da1eb
SW
1005 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1006 req->r_num_caps = 1;
1007 req->r_path2 = kstrdup(name, GFP_NOFS);
1008
5f21c96d 1009 parent_inode = ceph_get_dentry_parent_inode(dentry);
355da1eb 1010 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
5f21c96d 1011 iput(parent_inode);
355da1eb
SW
1012 ceph_mdsc_put_request(req);
1013 return err;
1014}
1015
7221fe4c 1016int __ceph_removexattr(struct dentry *dentry, const char *name)
355da1eb
SW
1017{
1018 struct inode *inode = dentry->d_inode;
881a5fa2 1019 struct ceph_vxattr *vxattr;
355da1eb 1020 struct ceph_inode_info *ci = ceph_inode(inode);
355da1eb
SW
1021 int issued;
1022 int err;
83eb26af 1023 int required_blob_size;
fca65b4a 1024 int dirty;
355da1eb 1025
355da1eb
SW
1026 if (!ceph_is_valid_xattr(name))
1027 return -EOPNOTSUPP;
1028
06476a69
AE
1029 vxattr = ceph_match_vxattr(inode, name);
1030 if (vxattr && vxattr->readonly)
1031 return -EOPNOTSUPP;
355da1eb 1032
d421acb1
SW
1033 /* pass any unhandled ceph.* xattrs through to the MDS */
1034 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1035 goto do_sync_unlocked;
1036
83eb26af 1037 err = -ENOMEM;
be655596 1038 spin_lock(&ci->i_ceph_lock);
83eb26af 1039retry:
355da1eb
SW
1040 issued = __ceph_caps_issued(ci, NULL);
1041 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
1042
1043 if (!(issued & CEPH_CAP_XATTR_EXCL))
1044 goto do_sync;
18fa8b3f 1045 __build_xattrs(inode);
355da1eb 1046
83eb26af
AE
1047 required_blob_size = __get_required_blob_size(ci, 0, 0);
1048
1049 if (!ci->i_xattrs.prealloc_blob ||
1050 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1051 struct ceph_buffer *blob;
1052
1053 spin_unlock(&ci->i_ceph_lock);
1054 dout(" preaallocating new blob size=%d\n", required_blob_size);
1055 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1056 if (!blob)
1057 goto out;
1058 spin_lock(&ci->i_ceph_lock);
1059 if (ci->i_xattrs.prealloc_blob)
1060 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1061 ci->i_xattrs.prealloc_blob = blob;
1062 goto retry;
1063 }
1064
355da1eb 1065 err = __remove_xattr_by_name(ceph_inode(inode), name);
18fa8b3f 1066
fca65b4a 1067 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
355da1eb
SW
1068 ci->i_xattrs.dirty = true;
1069 inode->i_ctime = CURRENT_TIME;
be655596 1070 spin_unlock(&ci->i_ceph_lock);
fca65b4a
SW
1071 if (dirty)
1072 __mark_inode_dirty(inode, dirty);
355da1eb
SW
1073 return err;
1074do_sync:
be655596 1075 spin_unlock(&ci->i_ceph_lock);
d421acb1 1076do_sync_unlocked:
355da1eb 1077 err = ceph_send_removexattr(dentry, name);
83eb26af 1078out:
355da1eb
SW
1079 return err;
1080}
1081
7221fe4c
GZ
1082int ceph_removexattr(struct dentry *dentry, const char *name)
1083{
1084 if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
1085 return -EROFS;
1086
1087 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1088 return generic_removexattr(dentry, name);
1089
1090 return __ceph_removexattr(dentry, name);
1091}