4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/zfs_context.h>
29 #include <sys/refcount.h>
30 #include <sys/nvpair.h>
32 #include <sys/kidmap.h>
34 #include <sys/zfs_vfsops.h>
35 #include <sys/zfs_znode.h>
37 #include <sys/zfs_fuid.h>
40 * FUID Domain table(s).
42 * The FUID table is stored as a packed nvlist of an array
43 * of nvlists which contain an index, domain string and offset
45 * During file system initialization the nvlist(s) are read and
46 * two AVL trees are created. One tree is keyed by the index number
47 * and the other by the domain string. Nodes are never removed from
48 * trees, but new entries may be added. If a new entry is added then
49 * the zsb->z_fuid_dirty flag is set to true and the caller will then
50 * be responsible for calling zfs_fuid_sync() to sync the changes to disk.
54 #define FUID_IDX "fuid_idx"
55 #define FUID_DOMAIN "fuid_domain"
56 #define FUID_OFFSET "fuid_offset"
57 #define FUID_NVP_ARRAY "fuid_nvlist"
59 typedef struct fuid_domain
{
66 static char *nulldomain
= "";
69 * Compare two indexes.
72 idx_compare(const void *arg1
, const void *arg2
)
74 const fuid_domain_t
*node1
= (const fuid_domain_t
*)arg1
;
75 const fuid_domain_t
*node2
= (const fuid_domain_t
*)arg2
;
77 return (AVL_CMP(node1
->f_idx
, node2
->f_idx
));
81 * Compare two domain strings.
84 domain_compare(const void *arg1
, const void *arg2
)
86 const fuid_domain_t
*node1
= (const fuid_domain_t
*)arg1
;
87 const fuid_domain_t
*node2
= (const fuid_domain_t
*)arg2
;
90 val
= strcmp(node1
->f_ksid
->kd_name
, node2
->f_ksid
->kd_name
);
92 return (AVL_ISIGN(val
));
96 zfs_fuid_avl_tree_create(avl_tree_t
*idx_tree
, avl_tree_t
*domain_tree
)
98 avl_create(idx_tree
, idx_compare
,
99 sizeof (fuid_domain_t
), offsetof(fuid_domain_t
, f_idxnode
));
100 avl_create(domain_tree
, domain_compare
,
101 sizeof (fuid_domain_t
), offsetof(fuid_domain_t
, f_domnode
));
105 * load initial fuid domain and idx trees. This function is used by
106 * both the kernel and zdb.
109 zfs_fuid_table_load(objset_t
*os
, uint64_t fuid_obj
, avl_tree_t
*idx_tree
,
110 avl_tree_t
*domain_tree
)
115 ASSERT(fuid_obj
!= 0);
116 VERIFY(0 == dmu_bonus_hold(os
, fuid_obj
,
118 fuid_size
= *(uint64_t *)db
->db_data
;
119 dmu_buf_rele(db
, FTAG
);
123 nvlist_t
*nvp
= NULL
;
128 packed
= kmem_alloc(fuid_size
, KM_SLEEP
);
129 VERIFY(dmu_read(os
, fuid_obj
, 0,
130 fuid_size
, packed
, DMU_READ_PREFETCH
) == 0);
131 VERIFY(nvlist_unpack(packed
, fuid_size
,
133 VERIFY(nvlist_lookup_nvlist_array(nvp
, FUID_NVP_ARRAY
,
134 &fuidnvp
, &count
) == 0);
136 for (i
= 0; i
!= count
; i
++) {
137 fuid_domain_t
*domnode
;
141 VERIFY(nvlist_lookup_string(fuidnvp
[i
], FUID_DOMAIN
,
143 VERIFY(nvlist_lookup_uint64(fuidnvp
[i
], FUID_IDX
,
146 domnode
= kmem_alloc(sizeof (fuid_domain_t
), KM_SLEEP
);
148 domnode
->f_idx
= idx
;
149 domnode
->f_ksid
= ksid_lookupdomain(domain
);
150 avl_add(idx_tree
, domnode
);
151 avl_add(domain_tree
, domnode
);
154 kmem_free(packed
, fuid_size
);
160 zfs_fuid_table_destroy(avl_tree_t
*idx_tree
, avl_tree_t
*domain_tree
)
162 fuid_domain_t
*domnode
;
166 while ((domnode
= avl_destroy_nodes(domain_tree
, &cookie
)))
167 ksiddomain_rele(domnode
->f_ksid
);
169 avl_destroy(domain_tree
);
171 while ((domnode
= avl_destroy_nodes(idx_tree
, &cookie
)))
172 kmem_free(domnode
, sizeof (fuid_domain_t
));
173 avl_destroy(idx_tree
);
177 zfs_fuid_idx_domain(avl_tree_t
*idx_tree
, uint32_t idx
)
179 fuid_domain_t searchnode
, *findnode
;
182 searchnode
.f_idx
= idx
;
184 findnode
= avl_find(idx_tree
, &searchnode
, &loc
);
186 return (findnode
? findnode
->f_ksid
->kd_name
: nulldomain
);
191 * Load the fuid table(s) into memory.
194 zfs_fuid_init(zfs_sb_t
*zsb
)
196 rw_enter(&zsb
->z_fuid_lock
, RW_WRITER
);
198 if (zsb
->z_fuid_loaded
) {
199 rw_exit(&zsb
->z_fuid_lock
);
203 zfs_fuid_avl_tree_create(&zsb
->z_fuid_idx
, &zsb
->z_fuid_domain
);
205 (void) zap_lookup(zsb
->z_os
, MASTER_NODE_OBJ
,
206 ZFS_FUID_TABLES
, 8, 1, &zsb
->z_fuid_obj
);
207 if (zsb
->z_fuid_obj
!= 0) {
208 zsb
->z_fuid_size
= zfs_fuid_table_load(zsb
->z_os
,
209 zsb
->z_fuid_obj
, &zsb
->z_fuid_idx
,
210 &zsb
->z_fuid_domain
);
213 zsb
->z_fuid_loaded
= B_TRUE
;
214 rw_exit(&zsb
->z_fuid_lock
);
218 * sync out AVL trees to persistent storage.
221 zfs_fuid_sync(zfs_sb_t
*zsb
, dmu_tx_t
*tx
)
228 fuid_domain_t
*domnode
;
232 if (!zsb
->z_fuid_dirty
) {
236 rw_enter(&zsb
->z_fuid_lock
, RW_WRITER
);
239 * First see if table needs to be created?
241 if (zsb
->z_fuid_obj
== 0) {
242 zsb
->z_fuid_obj
= dmu_object_alloc(zsb
->z_os
,
243 DMU_OT_FUID
, 1 << 14, DMU_OT_FUID_SIZE
,
244 sizeof (uint64_t), tx
);
245 VERIFY(zap_add(zsb
->z_os
, MASTER_NODE_OBJ
,
246 ZFS_FUID_TABLES
, sizeof (uint64_t), 1,
247 &zsb
->z_fuid_obj
, tx
) == 0);
250 VERIFY(nvlist_alloc(&nvp
, NV_UNIQUE_NAME
, KM_SLEEP
) == 0);
252 numnodes
= avl_numnodes(&zsb
->z_fuid_idx
);
253 fuids
= kmem_alloc(numnodes
* sizeof (void *), KM_SLEEP
);
254 for (i
= 0, domnode
= avl_first(&zsb
->z_fuid_domain
); domnode
; i
++,
255 domnode
= AVL_NEXT(&zsb
->z_fuid_domain
, domnode
)) {
256 VERIFY(nvlist_alloc(&fuids
[i
], NV_UNIQUE_NAME
, KM_SLEEP
) == 0);
257 VERIFY(nvlist_add_uint64(fuids
[i
], FUID_IDX
,
258 domnode
->f_idx
) == 0);
259 VERIFY(nvlist_add_uint64(fuids
[i
], FUID_OFFSET
, 0) == 0);
260 VERIFY(nvlist_add_string(fuids
[i
], FUID_DOMAIN
,
261 domnode
->f_ksid
->kd_name
) == 0);
263 VERIFY(nvlist_add_nvlist_array(nvp
, FUID_NVP_ARRAY
,
264 fuids
, numnodes
) == 0);
265 for (i
= 0; i
!= numnodes
; i
++)
266 nvlist_free(fuids
[i
]);
267 kmem_free(fuids
, numnodes
* sizeof (void *));
268 VERIFY(nvlist_size(nvp
, &nvsize
, NV_ENCODE_XDR
) == 0);
269 packed
= kmem_alloc(nvsize
, KM_SLEEP
);
270 VERIFY(nvlist_pack(nvp
, &packed
, &nvsize
,
271 NV_ENCODE_XDR
, KM_SLEEP
) == 0);
273 zsb
->z_fuid_size
= nvsize
;
274 dmu_write(zsb
->z_os
, zsb
->z_fuid_obj
, 0, zsb
->z_fuid_size
, packed
, tx
);
275 kmem_free(packed
, zsb
->z_fuid_size
);
276 VERIFY(0 == dmu_bonus_hold(zsb
->z_os
, zsb
->z_fuid_obj
,
278 dmu_buf_will_dirty(db
, tx
);
279 *(uint64_t *)db
->db_data
= zsb
->z_fuid_size
;
280 dmu_buf_rele(db
, FTAG
);
282 zsb
->z_fuid_dirty
= B_FALSE
;
283 rw_exit(&zsb
->z_fuid_lock
);
287 * Query domain table for a given domain.
289 * If domain isn't found and addok is set, it is added to AVL trees and
290 * the zsb->z_fuid_dirty flag will be set to TRUE. It will then be
291 * necessary for the caller or another thread to detect the dirty table
292 * and sync out the changes.
295 zfs_fuid_find_by_domain(zfs_sb_t
*zsb
, const char *domain
,
296 char **retdomain
, boolean_t addok
)
298 fuid_domain_t searchnode
, *findnode
;
300 krw_t rw
= RW_READER
;
303 * If the dummy "nobody" domain then return an index of 0
304 * to cause the created FUID to be a standard POSIX id
305 * for the user nobody.
307 if (domain
[0] == '\0') {
309 *retdomain
= nulldomain
;
313 searchnode
.f_ksid
= ksid_lookupdomain(domain
);
315 *retdomain
= searchnode
.f_ksid
->kd_name
;
316 if (!zsb
->z_fuid_loaded
)
320 rw_enter(&zsb
->z_fuid_lock
, rw
);
321 findnode
= avl_find(&zsb
->z_fuid_domain
, &searchnode
, &loc
);
324 rw_exit(&zsb
->z_fuid_lock
);
325 ksiddomain_rele(searchnode
.f_ksid
);
326 return (findnode
->f_idx
);
328 fuid_domain_t
*domnode
;
331 if (rw
== RW_READER
&& !rw_tryupgrade(&zsb
->z_fuid_lock
)) {
332 rw_exit(&zsb
->z_fuid_lock
);
337 domnode
= kmem_alloc(sizeof (fuid_domain_t
), KM_SLEEP
);
338 domnode
->f_ksid
= searchnode
.f_ksid
;
340 retidx
= domnode
->f_idx
= avl_numnodes(&zsb
->z_fuid_idx
) + 1;
342 avl_add(&zsb
->z_fuid_domain
, domnode
);
343 avl_add(&zsb
->z_fuid_idx
, domnode
);
344 zsb
->z_fuid_dirty
= B_TRUE
;
345 rw_exit(&zsb
->z_fuid_lock
);
348 rw_exit(&zsb
->z_fuid_lock
);
354 * Query domain table by index, returning domain string
356 * Returns a pointer from an avl node of the domain string.
360 zfs_fuid_find_by_idx(zfs_sb_t
*zsb
, uint32_t idx
)
364 if (idx
== 0 || !zsb
->z_use_fuids
)
367 if (!zsb
->z_fuid_loaded
)
370 rw_enter(&zsb
->z_fuid_lock
, RW_READER
);
372 if (zsb
->z_fuid_obj
|| zsb
->z_fuid_dirty
)
373 domain
= zfs_fuid_idx_domain(&zsb
->z_fuid_idx
, idx
);
376 rw_exit(&zsb
->z_fuid_lock
);
383 zfs_fuid_map_ids(znode_t
*zp
, cred_t
*cr
, uid_t
*uidp
, uid_t
*gidp
)
385 *uidp
= zfs_fuid_map_id(ZTOZSB(zp
), KUID_TO_SUID(ZTOI(zp
)->i_uid
),
387 *gidp
= zfs_fuid_map_id(ZTOZSB(zp
), KGID_TO_SGID(ZTOI(zp
)->i_gid
),
392 zfs_fuid_map_id(zfs_sb_t
*zsb
, uint64_t fuid
,
393 cred_t
*cr
, zfs_fuid_type_t type
)
396 uint32_t index
= FUID_INDEX(fuid
);
403 domain
= zfs_fuid_find_by_idx(zsb
, index
);
404 ASSERT(domain
!= NULL
);
406 if (type
== ZFS_OWNER
|| type
== ZFS_ACE_USER
) {
407 (void) kidmap_getuidbysid(crgetzone(cr
), domain
,
408 FUID_RID(fuid
), &id
);
410 (void) kidmap_getgidbysid(crgetzone(cr
), domain
,
411 FUID_RID(fuid
), &id
);
416 * The Linux port only supports POSIX IDs, use the passed id.
419 #endif /* HAVE_KSID */
423 * Add a FUID node to the list of fuid's being created for this
426 * If ACL has multiple domains, then keep only one copy of each unique
430 zfs_fuid_node_add(zfs_fuid_info_t
**fuidpp
, const char *domain
, uint32_t rid
,
431 uint64_t idx
, uint64_t id
, zfs_fuid_type_t type
)
434 zfs_fuid_domain_t
*fuid_domain
;
435 zfs_fuid_info_t
*fuidp
;
437 boolean_t found
= B_FALSE
;
440 *fuidpp
= zfs_fuid_info_alloc();
444 * First find fuid domain index in linked list
446 * If one isn't found then create an entry.
449 for (fuididx
= 1, fuid_domain
= list_head(&fuidp
->z_domains
);
450 fuid_domain
; fuid_domain
= list_next(&fuidp
->z_domains
,
451 fuid_domain
), fuididx
++) {
452 if (idx
== fuid_domain
->z_domidx
) {
459 fuid_domain
= kmem_alloc(sizeof (zfs_fuid_domain_t
), KM_SLEEP
);
460 fuid_domain
->z_domain
= domain
;
461 fuid_domain
->z_domidx
= idx
;
462 list_insert_tail(&fuidp
->z_domains
, fuid_domain
);
463 fuidp
->z_domain_str_sz
+= strlen(domain
) + 1;
464 fuidp
->z_domain_cnt
++;
467 if (type
== ZFS_ACE_USER
|| type
== ZFS_ACE_GROUP
) {
470 * Now allocate fuid entry and add it on the end of the list
473 fuid
= kmem_alloc(sizeof (zfs_fuid_t
), KM_SLEEP
);
475 fuid
->z_domidx
= idx
;
476 fuid
->z_logfuid
= FUID_ENCODE(fuididx
, rid
);
478 list_insert_tail(&fuidp
->z_fuids
, fuid
);
481 if (type
== ZFS_OWNER
)
482 fuidp
->z_fuid_owner
= FUID_ENCODE(fuididx
, rid
);
484 fuidp
->z_fuid_group
= FUID_ENCODE(fuididx
, rid
);
490 * Create a file system FUID, based on information in the users cred
492 * If cred contains KSID_OWNER then it should be used to determine
493 * the uid otherwise cred's uid will be used. By default cred's gid
494 * is used unless it's an ephemeral ID in which case KSID_GROUP will
495 * be used if it exists.
498 zfs_fuid_create_cred(zfs_sb_t
*zsb
, zfs_fuid_type_t type
,
499 cred_t
*cr
, zfs_fuid_info_t
**fuidp
)
508 VERIFY(type
== ZFS_OWNER
|| type
== ZFS_GROUP
);
510 ksid
= crgetsid(cr
, (type
== ZFS_OWNER
) ? KSID_OWNER
: KSID_GROUP
);
512 if (!zsb
->z_use_fuids
|| (ksid
== NULL
)) {
513 id
= (type
== ZFS_OWNER
) ? crgetuid(cr
) : crgetgid(cr
);
515 if (IS_EPHEMERAL(id
))
516 return ((type
== ZFS_OWNER
) ? UID_NOBODY
: GID_NOBODY
);
518 return ((uint64_t)id
);
522 * ksid is present and FUID is supported
524 id
= (type
== ZFS_OWNER
) ? ksid_getid(ksid
) : crgetgid(cr
);
526 if (!IS_EPHEMERAL(id
))
527 return ((uint64_t)id
);
529 if (type
== ZFS_GROUP
)
530 id
= ksid_getid(ksid
);
532 rid
= ksid_getrid(ksid
);
533 domain
= ksid_getdomain(ksid
);
535 idx
= zfs_fuid_find_by_domain(zsb
, domain
, &kdomain
, B_TRUE
);
537 zfs_fuid_node_add(fuidp
, kdomain
, rid
, idx
, id
, type
);
539 return (FUID_ENCODE(idx
, rid
));
541 #endif /* HAVE_KSID */
544 * Create a file system FUID for an ACL ace
545 * or a chown/chgrp of the file.
546 * This is similar to zfs_fuid_create_cred, except that
547 * we can't find the domain + rid information in the
548 * cred. Instead we have to query Winchester for the
551 * During replay operations the domain+rid information is
552 * found in the zfs_fuid_info_t that the replay code has
553 * attached to the zsb of the file system.
556 zfs_fuid_create(zfs_sb_t
*zsb
, uint64_t id
, cred_t
*cr
,
557 zfs_fuid_type_t type
, zfs_fuid_info_t
**fuidpp
)
562 uint32_t fuid_idx
= FUID_INDEX(id
);
566 zfs_fuid_t
*zfuid
= NULL
;
567 zfs_fuid_info_t
*fuidp
= NULL
;
570 * If POSIX ID, or entry is already a FUID then
573 * We may also be handed an already FUID'ized id via
577 if (!zsb
->z_use_fuids
|| !IS_EPHEMERAL(id
) || fuid_idx
!= 0)
581 fuidp
= zsb
->z_fuid_replay
;
584 * If we are passed an ephemeral id, but no
585 * fuid_info was logged then return NOBODY.
586 * This is most likely a result of idmap service
587 * not being available.
592 VERIFY3U(type
, >=, ZFS_OWNER
);
593 VERIFY3U(type
, <=, ZFS_ACE_GROUP
);
598 zfuid
= list_head(&fuidp
->z_fuids
);
599 rid
= FUID_RID(zfuid
->z_logfuid
);
600 idx
= FUID_INDEX(zfuid
->z_logfuid
);
603 rid
= FUID_RID(fuidp
->z_fuid_owner
);
604 idx
= FUID_INDEX(fuidp
->z_fuid_owner
);
607 rid
= FUID_RID(fuidp
->z_fuid_group
);
608 idx
= FUID_INDEX(fuidp
->z_fuid_group
);
611 domain
= fuidp
->z_domain_table
[idx
- 1];
613 if (type
== ZFS_OWNER
|| type
== ZFS_ACE_USER
)
614 status
= kidmap_getsidbyuid(crgetzone(cr
), id
,
617 status
= kidmap_getsidbygid(crgetzone(cr
), id
,
622 * When returning nobody we will need to
623 * make a dummy fuid table entry for logging
631 idx
= zfs_fuid_find_by_domain(zsb
, domain
, &kdomain
, B_TRUE
);
634 zfs_fuid_node_add(fuidpp
, kdomain
,
636 else if (zfuid
!= NULL
) {
637 list_remove(&fuidp
->z_fuids
, zfuid
);
638 kmem_free(zfuid
, sizeof (zfs_fuid_t
));
640 return (FUID_ENCODE(idx
, rid
));
643 * The Linux port only supports POSIX IDs, use the passed id.
650 zfs_fuid_destroy(zfs_sb_t
*zsb
)
652 rw_enter(&zsb
->z_fuid_lock
, RW_WRITER
);
653 if (!zsb
->z_fuid_loaded
) {
654 rw_exit(&zsb
->z_fuid_lock
);
657 zfs_fuid_table_destroy(&zsb
->z_fuid_idx
, &zsb
->z_fuid_domain
);
658 rw_exit(&zsb
->z_fuid_lock
);
662 * Allocate zfs_fuid_info for tracking FUIDs created during
663 * zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR()
666 zfs_fuid_info_alloc(void)
668 zfs_fuid_info_t
*fuidp
;
670 fuidp
= kmem_zalloc(sizeof (zfs_fuid_info_t
), KM_SLEEP
);
671 list_create(&fuidp
->z_domains
, sizeof (zfs_fuid_domain_t
),
672 offsetof(zfs_fuid_domain_t
, z_next
));
673 list_create(&fuidp
->z_fuids
, sizeof (zfs_fuid_t
),
674 offsetof(zfs_fuid_t
, z_next
));
679 * Release all memory associated with zfs_fuid_info_t
682 zfs_fuid_info_free(zfs_fuid_info_t
*fuidp
)
685 zfs_fuid_domain_t
*zdomain
;
687 while ((zfuid
= list_head(&fuidp
->z_fuids
)) != NULL
) {
688 list_remove(&fuidp
->z_fuids
, zfuid
);
689 kmem_free(zfuid
, sizeof (zfs_fuid_t
));
692 if (fuidp
->z_domain_table
!= NULL
)
693 kmem_free(fuidp
->z_domain_table
,
694 (sizeof (char **)) * fuidp
->z_domain_cnt
);
696 while ((zdomain
= list_head(&fuidp
->z_domains
)) != NULL
) {
697 list_remove(&fuidp
->z_domains
, zdomain
);
698 kmem_free(zdomain
, sizeof (zfs_fuid_domain_t
));
701 kmem_free(fuidp
, sizeof (zfs_fuid_info_t
));
705 * Check to see if id is a groupmember. If cred
706 * has ksid info then sidlist is checked first
707 * and if still not found then POSIX groups are checked
709 * Will use a straight FUID compare when possible.
712 zfs_groupmember(zfs_sb_t
*zsb
, uint64_t id
, cred_t
*cr
)
715 ksid_t
*ksid
= crgetsid(cr
, KSID_GROUP
);
716 ksidlist_t
*ksidlist
= crgetsidlist(cr
);
719 if (ksid
&& ksidlist
) {
722 uint32_t idx
= FUID_INDEX(id
);
723 uint32_t rid
= FUID_RID(id
);
725 ksid_groups
= ksidlist
->ksl_sids
;
727 for (i
= 0; i
!= ksidlist
->ksl_nsid
; i
++) {
729 if (id
!= IDMAP_WK_CREATOR_GROUP_GID
&&
730 id
== ksid_groups
[i
].ks_id
) {
736 domain
= zfs_fuid_find_by_idx(zsb
, idx
);
737 ASSERT(domain
!= NULL
);
740 IDMAP_WK_CREATOR_SID_AUTHORITY
) == 0)
744 ksid_groups
[i
].ks_domain
->kd_name
) == 0) &&
745 rid
== ksid_groups
[i
].ks_rid
)
752 * Not found in ksidlist, check posix groups
754 gid
= zfs_fuid_map_id(zsb
, id
, cr
, ZFS_GROUP
);
755 return (groupmember(gid
, cr
));
762 zfs_fuid_txhold(zfs_sb_t
*zsb
, dmu_tx_t
*tx
)
764 if (zsb
->z_fuid_obj
== 0) {
765 dmu_tx_hold_bonus(tx
, DMU_NEW_OBJECT
);
766 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0,
767 FUID_SIZE_ESTIMATE(zsb
));
768 dmu_tx_hold_zap(tx
, MASTER_NODE_OBJ
, FALSE
, NULL
);
770 dmu_tx_hold_bonus(tx
, zsb
->z_fuid_obj
);
771 dmu_tx_hold_write(tx
, zsb
->z_fuid_obj
, 0,
772 FUID_SIZE_ESTIMATE(zsb
));