]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zpl_xattr.c
Linux 3.11 compat: fops->iterate()
[mirror_zfs.git] / module / zfs / zpl_xattr.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 *
24 * Extended attributes (xattr) on Solaris are implemented as files
25 * which exist in a hidden xattr directory. These extended attributes
26 * can be accessed using the attropen() system call which opens
27 * the extended attribute. It can then be manipulated just like
28 * a standard file descriptor. This has a couple advantages such
29 * as practically no size limit on the file, and the extended
30 * attributes permissions may differ from those of the parent file.
31 * This interface is really quite clever, but it's also completely
32 * different than what is supported on Linux. It also comes with a
33 * steep performance penalty when accessing small xattrs because they
34 * are not stored with the parent file.
35 *
36 * Under Linux extended attributes are manipulated by the system
37 * calls getxattr(2), setxattr(2), and listxattr(2). They consider
38 * extended attributes to be name/value pairs where the name is a
39 * NULL terminated string. The name must also include one of the
40 * following namespace prefixes:
41 *
42 * user - No restrictions and is available to user applications.
43 * trusted - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44 * system - Used for access control lists (system.nfs4_acl, etc).
45 * security - Used by SELinux to store a files security context.
46 *
47 * The value under Linux to limited to 65536 bytes of binary data.
48 * In practice, individual xattrs tend to be much smaller than this
49 * and are typically less than 100 bytes. A good example of this
50 * are the security.selinux xattrs which are less than 100 bytes and
51 * exist for every file when xattr labeling is enabled.
52 *
53 * The Linux xattr implemenation has been written to take advantage of
54 * this typical usage. When the dataset property 'xattr=sa' is set,
55 * then xattrs will be preferentially stored as System Attributes (SA).
56 * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57 * up to 64k of xattrs to be stored in the spill block. If additional
58 * xattr space is required, which is unlikely under Linux, they will
59 * be stored using the traditional directory approach.
60 *
61 * This optimization results in roughly a 3x performance improvement
62 * when accessing xattrs because it avoids the need to perform a seek
63 * for every xattr value. When multiple xattrs are stored per-file
64 * the performance improvements are even greater because all of the
65 * xattrs stored in the spill block will be cached.
66 *
67 * However, by default SA based xattrs are disabled in the Linux port
68 * to maximize compatibility with other implementations. If you do
69 * enable SA based xattrs then they will not be visible on platforms
70 * which do not support this feature.
71 *
72 * NOTE: One additional consequence of the xattr directory implementation
73 * is that when an extended attribute is manipulated an inode is created.
74 * This inode will exist in the Linux inode cache but there will be no
75 * associated entry in the dentry cache which references it. This is
76 * safe but it may result in some confusion. Enabling SA based xattrs
77 * largely avoids the issue except in the overflow case.
78 */
79
80 #include <sys/zfs_vfsops.h>
81 #include <sys/zfs_vnops.h>
82 #include <sys/zfs_znode.h>
83 #include <sys/zap.h>
84 #include <sys/vfs.h>
85 #include <sys/zpl.h>
86
87 typedef struct xattr_filldir {
88 size_t size;
89 size_t offset;
90 char *buf;
91 struct inode *inode;
92 } xattr_filldir_t;
93
94 static int
95 zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
96 {
97 if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
98 if (!(ITOZSB(xf->inode)->z_flags & ZSB_XATTR))
99 return (0);
100
101 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
102 if (!capable(CAP_SYS_ADMIN))
103 return (0);
104
105 /* When xf->buf is NULL only calculate the required size. */
106 if (xf->buf) {
107 if (xf->offset + name_len + 1 > xf->size)
108 return (-ERANGE);
109
110 memcpy(xf->buf + xf->offset, name, name_len);
111 xf->buf[xf->offset + name_len] = '\0';
112 }
113
114 xf->offset += (name_len + 1);
115
116 return (0);
117 }
118
119 /*
120 * Read as many directory entry names as will fit in to the provided buffer,
121 * or when no buffer is provided calculate the required buffer size.
122 */
123 int
124 zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
125 {
126 zap_cursor_t zc;
127 zap_attribute_t zap;
128 int error;
129
130 zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
131
132 while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
133
134 if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
135 error = -ENXIO;
136 break;
137 }
138
139 error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
140 if (error)
141 break;
142
143 zap_cursor_advance(&zc);
144 }
145
146 zap_cursor_fini(&zc);
147
148 if (error == -ENOENT)
149 error = 0;
150
151 return (error);
152 }
153
154 static ssize_t
155 zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
156 {
157 struct inode *ip = xf->inode;
158 struct inode *dxip = NULL;
159 int error;
160
161 /* Lookup the xattr directory */
162 error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
163 if (error) {
164 if (error == -ENOENT)
165 error = 0;
166
167 return (error);
168 }
169
170 error = zpl_xattr_readdir(dxip, xf);
171 iput(dxip);
172
173 return (error);
174 }
175
176 static ssize_t
177 zpl_xattr_list_sa(xattr_filldir_t *xf)
178 {
179 znode_t *zp = ITOZ(xf->inode);
180 nvpair_t *nvp = NULL;
181 int error = 0;
182
183 mutex_enter(&zp->z_lock);
184 if (zp->z_xattr_cached == NULL)
185 error = -zfs_sa_get_xattr(zp);
186 mutex_exit(&zp->z_lock);
187
188 if (error)
189 return (error);
190
191 ASSERT(zp->z_xattr_cached);
192
193 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
194 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
195
196 error = zpl_xattr_filldir(xf, nvpair_name(nvp),
197 strlen(nvpair_name(nvp)));
198 if (error)
199 return (error);
200 }
201
202 return (0);
203 }
204
205 ssize_t
206 zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
207 {
208 znode_t *zp = ITOZ(dentry->d_inode);
209 zfs_sb_t *zsb = ZTOZSB(zp);
210 xattr_filldir_t xf = { buffer_size, 0, buffer, dentry->d_inode };
211 cred_t *cr = CRED();
212 int error = 0;
213
214 crhold(cr);
215 rw_enter(&zp->z_xattr_lock, RW_READER);
216
217 if (zsb->z_use_sa && zp->z_is_sa) {
218 error = zpl_xattr_list_sa(&xf);
219 if (error)
220 goto out;
221 }
222
223 error = zpl_xattr_list_dir(&xf, cr);
224 if (error)
225 goto out;
226
227 error = xf.offset;
228 out:
229
230 rw_exit(&zp->z_xattr_lock);
231 crfree(cr);
232
233 return (error);
234 }
235
236 static int
237 zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
238 size_t size, cred_t *cr)
239 {
240 struct inode *dxip = NULL;
241 struct inode *xip = NULL;
242 int error;
243
244 /* Lookup the xattr directory */
245 error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
246 if (error)
247 goto out;
248
249 /* Lookup a specific xattr name in the directory */
250 error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
251 if (error)
252 goto out;
253
254 if (!size) {
255 error = i_size_read(xip);
256 goto out;
257 }
258
259 if (size < i_size_read(xip)) {
260 error = -ERANGE;
261 goto out;
262 }
263
264 error = zpl_read_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
265 out:
266 if (xip)
267 iput(xip);
268
269 if (dxip)
270 iput(dxip);
271
272 return (error);
273 }
274
275 static int
276 zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
277 {
278 znode_t *zp = ITOZ(ip);
279 uchar_t *nv_value;
280 uint_t nv_size;
281 int error = 0;
282
283 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
284
285 mutex_enter(&zp->z_lock);
286 if (zp->z_xattr_cached == NULL)
287 error = -zfs_sa_get_xattr(zp);
288 mutex_exit(&zp->z_lock);
289
290 if (error)
291 return (error);
292
293 ASSERT(zp->z_xattr_cached);
294 error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
295 &nv_value, &nv_size);
296 if (error)
297 return (error);
298
299 if (!size)
300 return (nv_size);
301
302 if (size < nv_size)
303 return (-ERANGE);
304
305 memcpy(value, nv_value, nv_size);
306
307 return (nv_size);
308 }
309
310 static int
311 __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
312 cred_t *cr)
313 {
314 znode_t *zp = ITOZ(ip);
315 zfs_sb_t *zsb = ZTOZSB(zp);
316 int error;
317
318 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
319
320 if (zsb->z_use_sa && zp->z_is_sa) {
321 error = zpl_xattr_get_sa(ip, name, value, size);
322 if (error != -ENOENT)
323 goto out;
324 }
325
326 error = zpl_xattr_get_dir(ip, name, value, size, cr);
327 out:
328 if (error == -ENOENT)
329 error = -ENODATA;
330
331 return (error);
332 }
333
334 static int
335 zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
336 {
337 znode_t *zp = ITOZ(ip);
338 cred_t *cr = CRED();
339 int error;
340
341 crhold(cr);
342 rw_enter(&zp->z_xattr_lock, RW_READER);
343 error = __zpl_xattr_get(ip, name, value, size, cr);
344 rw_exit(&zp->z_xattr_lock);
345 crfree(cr);
346
347 return (error);
348 }
349
350 static int
351 zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
352 size_t size, int flags, cred_t *cr)
353 {
354 struct inode *dxip = NULL;
355 struct inode *xip = NULL;
356 vattr_t *vap = NULL;
357 ssize_t wrote;
358 int error;
359 const int xattr_mode = S_IFREG | 0644;
360
361 /* Lookup the xattr directory and create it if required. */
362 error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR | CREATE_XATTR_DIR,
363 cr, NULL, NULL);
364 if (error)
365 goto out;
366
367 /* Lookup a specific xattr name in the directory */
368 error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
369 if (error && (error != -ENOENT))
370 goto out;
371
372 error = 0;
373
374 /* Remove a specific name xattr when value is set to NULL. */
375 if (value == NULL) {
376 if (xip)
377 error = -zfs_remove(dxip, (char *)name, cr);
378
379 goto out;
380 }
381
382 /* Lookup failed create a new xattr. */
383 if (xip == NULL) {
384 vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
385 vap->va_mode = xattr_mode;
386 vap->va_mask = ATTR_MODE;
387 vap->va_uid = crgetfsuid(cr);
388 vap->va_gid = crgetfsgid(cr);
389
390 error = -zfs_create(dxip, (char *)name, vap, 0, 0644, &xip,
391 cr, 0, NULL);
392 if (error)
393 goto out;
394 }
395
396 ASSERT(xip != NULL);
397
398 error = -zfs_freesp(ITOZ(xip), 0, 0, xattr_mode, TRUE);
399 if (error)
400 goto out;
401
402 wrote = zpl_write_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
403 if (wrote < 0)
404 error = wrote;
405
406 out:
407 if (vap)
408 kmem_free(vap, sizeof(vattr_t));
409
410 if (xip)
411 iput(xip);
412
413 if (dxip)
414 iput(dxip);
415
416 if (error == -ENOENT)
417 error = -ENODATA;
418
419 ASSERT3S(error, <=, 0);
420
421 return (error);
422 }
423
424 static int
425 zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
426 size_t size, int flags, cred_t *cr)
427 {
428 znode_t *zp = ITOZ(ip);
429 nvlist_t *nvl;
430 size_t sa_size;
431 int error;
432
433 ASSERT(zp->z_xattr_cached);
434 nvl = zp->z_xattr_cached;
435
436 if (value == NULL) {
437 error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
438 if (error == -ENOENT)
439 error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
440 } else {
441 /* Limited to 32k to keep nvpair memory allocations small */
442 if (size > DXATTR_MAX_ENTRY_SIZE)
443 return (-EFBIG);
444
445 /* Prevent the DXATTR SA from consuming the entire SA region */
446 error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
447 if (error)
448 return (error);
449
450 if (sa_size > DXATTR_MAX_SA_SIZE)
451 return (-EFBIG);
452
453 error = -nvlist_add_byte_array(nvl, name,
454 (uchar_t *)value, size);
455 if (error)
456 return (error);
457 }
458
459 /* Update the SA for additions, modifications, and removals. */
460 if (!error)
461 error = -zfs_sa_set_xattr(zp);
462
463 ASSERT3S(error, <=, 0);
464
465 return (error);
466 }
467
468 static int
469 zpl_xattr_set(struct inode *ip, const char *name, const void *value,
470 size_t size, int flags)
471 {
472 znode_t *zp = ITOZ(ip);
473 zfs_sb_t *zsb = ZTOZSB(zp);
474 cred_t *cr = CRED();
475 int error;
476
477 crhold(cr);
478 rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
479
480 /*
481 * Before setting the xattr check to see if it already exists.
482 * This is done to ensure the following optional flags are honored.
483 *
484 * XATTR_CREATE: fail if xattr already exists
485 * XATTR_REPLACE: fail if xattr does not exist
486 */
487 error = __zpl_xattr_get(ip, name, NULL, 0, cr);
488 if (error < 0) {
489 if (error != -ENODATA)
490 goto out;
491
492 if ((error == -ENODATA) && (flags & XATTR_REPLACE))
493 goto out;
494 } else {
495 error = -EEXIST;
496 if (flags & XATTR_CREATE)
497 goto out;
498 }
499
500 /* Preferentially store the xattr as a SA for better performance */
501 if (zsb->z_use_sa && zsb->z_xattr_sa && zp->z_is_sa) {
502 error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
503 if (error == 0)
504 goto out;
505 }
506
507 error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
508 out:
509 rw_exit(&ITOZ(ip)->z_xattr_lock);
510 crfree(cr);
511 ASSERT3S(error, <=, 0);
512
513 return (error);
514 }
515
516 static int
517 __zpl_xattr_user_get(struct inode *ip, const char *name,
518 void *value, size_t size)
519 {
520 char *xattr_name;
521 int error;
522
523 if (strcmp(name, "") == 0)
524 return -EINVAL;
525
526 if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
527 return -EOPNOTSUPP;
528
529 xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
530 error = zpl_xattr_get(ip, xattr_name, value, size);
531 strfree(xattr_name);
532
533 return (error);
534 }
535 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
536
537 static int
538 __zpl_xattr_user_set(struct inode *ip, const char *name,
539 const void *value, size_t size, int flags)
540 {
541 char *xattr_name;
542 int error;
543
544 if (strcmp(name, "") == 0)
545 return -EINVAL;
546
547 if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
548 return -EOPNOTSUPP;
549
550 xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
551 error = zpl_xattr_set(ip, xattr_name, value, size, flags);
552 strfree(xattr_name);
553
554 return (error);
555 }
556 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
557
558 xattr_handler_t zpl_xattr_user_handler = {
559 .prefix = XATTR_USER_PREFIX,
560 .get = zpl_xattr_user_get,
561 .set = zpl_xattr_user_set,
562 };
563
564 static int
565 __zpl_xattr_trusted_get(struct inode *ip, const char *name,
566 void *value, size_t size)
567 {
568 char *xattr_name;
569 int error;
570
571 if (!capable(CAP_SYS_ADMIN))
572 return -EACCES;
573
574 if (strcmp(name, "") == 0)
575 return -EINVAL;
576
577 xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
578 error = zpl_xattr_get(ip, xattr_name, value, size);
579 strfree(xattr_name);
580
581 return (error);
582 }
583 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
584
585 static int
586 __zpl_xattr_trusted_set(struct inode *ip, const char *name,
587 const void *value, size_t size, int flags)
588 {
589 char *xattr_name;
590 int error;
591
592 if (!capable(CAP_SYS_ADMIN))
593 return -EACCES;
594
595 if (strcmp(name, "") == 0)
596 return -EINVAL;
597
598 xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
599 error = zpl_xattr_set(ip, xattr_name, value, size, flags);
600 strfree(xattr_name);
601
602 return (error);
603 }
604 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
605
606 xattr_handler_t zpl_xattr_trusted_handler = {
607 .prefix = XATTR_TRUSTED_PREFIX,
608 .get = zpl_xattr_trusted_get,
609 .set = zpl_xattr_trusted_set,
610 };
611
612 static int
613 __zpl_xattr_security_get(struct inode *ip, const char *name,
614 void *value, size_t size)
615 {
616 char *xattr_name;
617 int error;
618
619 if (strcmp(name, "") == 0)
620 return -EINVAL;
621
622 xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
623 error = zpl_xattr_get(ip, xattr_name, value, size);
624 strfree(xattr_name);
625
626 return (error);
627 }
628 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
629
630 static int
631 __zpl_xattr_security_set(struct inode *ip, const char *name,
632 const void *value, size_t size, int flags)
633 {
634 char *xattr_name;
635 int error;
636
637 if (strcmp(name, "") == 0)
638 return -EINVAL;
639
640 xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
641 error = zpl_xattr_set(ip, xattr_name, value, size, flags);
642 strfree(xattr_name);
643
644 return (error);
645 }
646 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
647
648 #ifdef HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY
649 static int
650 __zpl_xattr_security_init(struct inode *ip, const struct xattr *xattrs,
651 void *fs_info)
652 {
653 const struct xattr *xattr;
654 int error = 0;
655
656 for (xattr = xattrs; xattr->name != NULL; xattr++) {
657 error = __zpl_xattr_security_set(ip,
658 xattr->name, xattr->value, xattr->value_len, 0);
659
660 if (error < 0)
661 break;
662 }
663
664 return (error);
665 }
666
667 int
668 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
669 const struct qstr *qstr)
670 {
671 return security_inode_init_security(ip, dip, qstr,
672 &__zpl_xattr_security_init, NULL);
673 }
674
675 #else
676 int
677 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
678 const struct qstr *qstr)
679 {
680 int error;
681 size_t len;
682 void *value;
683 char *name;
684
685 error = zpl_security_inode_init_security(ip, dip, qstr,
686 &name, &value, &len);
687 if (error) {
688 if (error == -EOPNOTSUPP)
689 return 0;
690 return (error);
691 }
692
693 error = __zpl_xattr_security_set(ip, name, value, len, 0);
694
695 kfree(name);
696 kfree(value);
697
698 return (error);
699 }
700 #endif /* HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY */
701
702 xattr_handler_t zpl_xattr_security_handler = {
703 .prefix = XATTR_SECURITY_PREFIX,
704 .get = zpl_xattr_security_get,
705 .set = zpl_xattr_security_set,
706 };
707
708 xattr_handler_t *zpl_xattr_handlers[] = {
709 &zpl_xattr_security_handler,
710 &zpl_xattr_trusted_handler,
711 &zpl_xattr_user_handler,
712 #ifdef HAVE_POSIX_ACLS
713 &zpl_xattr_acl_access_handler,
714 &zpl_xattr_acl_default_handler,
715 #endif /* HAVE_POSIX_ACLS */
716 NULL
717 };