4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/zfs_context.h>
27 #include <sys/sunddi.h>
31 #include <sys/refcount.h>
32 #include <sys/nvpair.h>
34 #include <sys/kidmap.h>
36 #include <sys/zfs_vfsops.h>
37 #include <sys/zfs_znode.h>
39 #include <sys/zfs_fuid.h>
42 * FUID Domain table(s).
44 * The FUID table is stored as a packed nvlist of an array
45 * of nvlists which contain an index, domain string and offset
47 * During file system initialization the nvlist(s) are read and
48 * two AVL trees are created. One tree is keyed by the index number
49 * and the other by the domain string. Nodes are never removed from
50 * trees, but new entries may be added. If a new entry is added then
51 * the zfsvfs->z_fuid_dirty flag is set to true and the caller will then
52 * be responsible for calling zfs_fuid_sync() to sync the changes to disk.
56 #define FUID_IDX "fuid_idx"
57 #define FUID_DOMAIN "fuid_domain"
58 #define FUID_OFFSET "fuid_offset"
59 #define FUID_NVP_ARRAY "fuid_nvlist"
61 typedef struct fuid_domain
{
68 static char *nulldomain
= "";
71 * Compare two indexes.
74 idx_compare(const void *arg1
, const void *arg2
)
76 const fuid_domain_t
*node1
= arg1
;
77 const fuid_domain_t
*node2
= arg2
;
79 if (node1
->f_idx
< node2
->f_idx
)
81 else if (node1
->f_idx
> node2
->f_idx
)
87 * Compare two domain strings.
90 domain_compare(const void *arg1
, const void *arg2
)
92 const fuid_domain_t
*node1
= arg1
;
93 const fuid_domain_t
*node2
= arg2
;
96 val
= strcmp(node1
->f_ksid
->kd_name
, node2
->f_ksid
->kd_name
);
99 return (val
> 0 ? 1 : -1);
103 zfs_fuid_avl_tree_create(avl_tree_t
*idx_tree
, avl_tree_t
*domain_tree
)
105 avl_create(idx_tree
, idx_compare
,
106 sizeof (fuid_domain_t
), offsetof(fuid_domain_t
, f_idxnode
));
107 avl_create(domain_tree
, domain_compare
,
108 sizeof (fuid_domain_t
), offsetof(fuid_domain_t
, f_domnode
));
112 * load initial fuid domain and idx trees. This function is used by
113 * both the kernel and zdb.
116 zfs_fuid_table_load(objset_t
*os
, uint64_t fuid_obj
, avl_tree_t
*idx_tree
,
117 avl_tree_t
*domain_tree
)
122 ASSERT(fuid_obj
!= 0);
123 VERIFY(0 == dmu_bonus_hold(os
, fuid_obj
,
125 fuid_size
= *(uint64_t *)db
->db_data
;
126 dmu_buf_rele(db
, FTAG
);
130 nvlist_t
*nvp
= NULL
;
135 packed
= kmem_alloc(fuid_size
, KM_SLEEP
);
136 VERIFY(dmu_read(os
, fuid_obj
, 0,
137 fuid_size
, packed
, DMU_READ_PREFETCH
) == 0);
138 VERIFY(nvlist_unpack(packed
, fuid_size
,
140 VERIFY(nvlist_lookup_nvlist_array(nvp
, FUID_NVP_ARRAY
,
141 &fuidnvp
, &count
) == 0);
143 for (i
= 0; i
!= count
; i
++) {
144 fuid_domain_t
*domnode
;
148 VERIFY(nvlist_lookup_string(fuidnvp
[i
], FUID_DOMAIN
,
150 VERIFY(nvlist_lookup_uint64(fuidnvp
[i
], FUID_IDX
,
153 domnode
= kmem_alloc(sizeof (fuid_domain_t
), KM_SLEEP
);
155 domnode
->f_idx
= idx
;
156 domnode
->f_ksid
= ksid_lookupdomain(domain
);
157 avl_add(idx_tree
, domnode
);
158 avl_add(domain_tree
, domnode
);
161 kmem_free(packed
, fuid_size
);
167 zfs_fuid_table_destroy(avl_tree_t
*idx_tree
, avl_tree_t
*domain_tree
)
169 fuid_domain_t
*domnode
;
173 while (domnode
= avl_destroy_nodes(domain_tree
, &cookie
))
174 ksiddomain_rele(domnode
->f_ksid
);
176 avl_destroy(domain_tree
);
178 while (domnode
= avl_destroy_nodes(idx_tree
, &cookie
))
179 kmem_free(domnode
, sizeof (fuid_domain_t
));
180 avl_destroy(idx_tree
);
184 zfs_fuid_idx_domain(avl_tree_t
*idx_tree
, uint32_t idx
)
186 fuid_domain_t searchnode
, *findnode
;
189 searchnode
.f_idx
= idx
;
191 findnode
= avl_find(idx_tree
, &searchnode
, &loc
);
193 return (findnode
? findnode
->f_ksid
->kd_name
: nulldomain
);
198 * Load the fuid table(s) into memory.
201 zfs_fuid_init(zfsvfs_t
*zfsvfs
)
203 rw_enter(&zfsvfs
->z_fuid_lock
, RW_WRITER
);
205 if (zfsvfs
->z_fuid_loaded
) {
206 rw_exit(&zfsvfs
->z_fuid_lock
);
210 zfs_fuid_avl_tree_create(&zfsvfs
->z_fuid_idx
, &zfsvfs
->z_fuid_domain
);
212 (void) zap_lookup(zfsvfs
->z_os
, MASTER_NODE_OBJ
,
213 ZFS_FUID_TABLES
, 8, 1, &zfsvfs
->z_fuid_obj
);
214 if (zfsvfs
->z_fuid_obj
!= 0) {
215 zfsvfs
->z_fuid_size
= zfs_fuid_table_load(zfsvfs
->z_os
,
216 zfsvfs
->z_fuid_obj
, &zfsvfs
->z_fuid_idx
,
217 &zfsvfs
->z_fuid_domain
);
220 zfsvfs
->z_fuid_loaded
= B_TRUE
;
221 rw_exit(&zfsvfs
->z_fuid_lock
);
225 * sync out AVL trees to persistent storage.
228 zfs_fuid_sync(zfsvfs_t
*zfsvfs
, dmu_tx_t
*tx
)
235 fuid_domain_t
*domnode
;
239 if (!zfsvfs
->z_fuid_dirty
) {
243 rw_enter(&zfsvfs
->z_fuid_lock
, RW_WRITER
);
246 * First see if table needs to be created?
248 if (zfsvfs
->z_fuid_obj
== 0) {
249 zfsvfs
->z_fuid_obj
= dmu_object_alloc(zfsvfs
->z_os
,
250 DMU_OT_FUID
, 1 << 14, DMU_OT_FUID_SIZE
,
251 sizeof (uint64_t), tx
);
252 VERIFY(zap_add(zfsvfs
->z_os
, MASTER_NODE_OBJ
,
253 ZFS_FUID_TABLES
, sizeof (uint64_t), 1,
254 &zfsvfs
->z_fuid_obj
, tx
) == 0);
257 VERIFY(nvlist_alloc(&nvp
, NV_UNIQUE_NAME
, KM_SLEEP
) == 0);
259 numnodes
= avl_numnodes(&zfsvfs
->z_fuid_idx
);
260 fuids
= kmem_alloc(numnodes
* sizeof (void *), KM_SLEEP
);
261 for (i
= 0, domnode
= avl_first(&zfsvfs
->z_fuid_domain
); domnode
; i
++,
262 domnode
= AVL_NEXT(&zfsvfs
->z_fuid_domain
, domnode
)) {
263 VERIFY(nvlist_alloc(&fuids
[i
], NV_UNIQUE_NAME
, KM_SLEEP
) == 0);
264 VERIFY(nvlist_add_uint64(fuids
[i
], FUID_IDX
,
265 domnode
->f_idx
) == 0);
266 VERIFY(nvlist_add_uint64(fuids
[i
], FUID_OFFSET
, 0) == 0);
267 VERIFY(nvlist_add_string(fuids
[i
], FUID_DOMAIN
,
268 domnode
->f_ksid
->kd_name
) == 0);
270 VERIFY(nvlist_add_nvlist_array(nvp
, FUID_NVP_ARRAY
,
271 fuids
, numnodes
) == 0);
272 for (i
= 0; i
!= numnodes
; i
++)
273 nvlist_free(fuids
[i
]);
274 kmem_free(fuids
, numnodes
* sizeof (void *));
275 VERIFY(nvlist_size(nvp
, &nvsize
, NV_ENCODE_XDR
) == 0);
276 packed
= kmem_alloc(nvsize
, KM_SLEEP
);
277 VERIFY(nvlist_pack(nvp
, &packed
, &nvsize
,
278 NV_ENCODE_XDR
, KM_SLEEP
) == 0);
280 zfsvfs
->z_fuid_size
= nvsize
;
281 dmu_write(zfsvfs
->z_os
, zfsvfs
->z_fuid_obj
, 0,
282 zfsvfs
->z_fuid_size
, packed
, tx
);
283 kmem_free(packed
, zfsvfs
->z_fuid_size
);
284 VERIFY(0 == dmu_bonus_hold(zfsvfs
->z_os
, zfsvfs
->z_fuid_obj
,
286 dmu_buf_will_dirty(db
, tx
);
287 *(uint64_t *)db
->db_data
= zfsvfs
->z_fuid_size
;
288 dmu_buf_rele(db
, FTAG
);
290 zfsvfs
->z_fuid_dirty
= B_FALSE
;
291 rw_exit(&zfsvfs
->z_fuid_lock
);
295 * Query domain table for a given domain.
297 * If domain isn't found and addok is set, it is added to AVL trees and
298 * the zfsvfs->z_fuid_dirty flag will be set to TRUE. It will then be
299 * necessary for the caller or another thread to detect the dirty table
300 * and sync out the changes.
303 zfs_fuid_find_by_domain(zfsvfs_t
*zfsvfs
, const char *domain
,
304 char **retdomain
, boolean_t addok
)
306 fuid_domain_t searchnode
, *findnode
;
308 krw_t rw
= RW_READER
;
311 * If the dummy "nobody" domain then return an index of 0
312 * to cause the created FUID to be a standard POSIX id
313 * for the user nobody.
315 if (domain
[0] == '\0') {
317 *retdomain
= nulldomain
;
321 searchnode
.f_ksid
= ksid_lookupdomain(domain
);
323 *retdomain
= searchnode
.f_ksid
->kd_name
;
324 if (!zfsvfs
->z_fuid_loaded
)
325 zfs_fuid_init(zfsvfs
);
328 rw_enter(&zfsvfs
->z_fuid_lock
, rw
);
329 findnode
= avl_find(&zfsvfs
->z_fuid_domain
, &searchnode
, &loc
);
332 rw_exit(&zfsvfs
->z_fuid_lock
);
333 ksiddomain_rele(searchnode
.f_ksid
);
334 return (findnode
->f_idx
);
336 fuid_domain_t
*domnode
;
339 if (rw
== RW_READER
&& !rw_tryupgrade(&zfsvfs
->z_fuid_lock
)) {
340 rw_exit(&zfsvfs
->z_fuid_lock
);
345 domnode
= kmem_alloc(sizeof (fuid_domain_t
), KM_SLEEP
);
346 domnode
->f_ksid
= searchnode
.f_ksid
;
348 retidx
= domnode
->f_idx
= avl_numnodes(&zfsvfs
->z_fuid_idx
) + 1;
350 avl_add(&zfsvfs
->z_fuid_domain
, domnode
);
351 avl_add(&zfsvfs
->z_fuid_idx
, domnode
);
352 zfsvfs
->z_fuid_dirty
= B_TRUE
;
353 rw_exit(&zfsvfs
->z_fuid_lock
);
361 * Query domain table by index, returning domain string
363 * Returns a pointer from an avl node of the domain string.
367 zfs_fuid_find_by_idx(zfsvfs_t
*zfsvfs
, uint32_t idx
)
371 if (idx
== 0 || !zfsvfs
->z_use_fuids
)
374 if (!zfsvfs
->z_fuid_loaded
)
375 zfs_fuid_init(zfsvfs
);
377 rw_enter(&zfsvfs
->z_fuid_lock
, RW_READER
);
379 if (zfsvfs
->z_fuid_obj
)
380 domain
= zfs_fuid_idx_domain(&zfsvfs
->z_fuid_idx
, idx
);
383 rw_exit(&zfsvfs
->z_fuid_lock
);
390 zfs_fuid_map_ids(znode_t
*zp
, cred_t
*cr
, uid_t
*uidp
, uid_t
*gidp
)
392 *uidp
= zfs_fuid_map_id(zp
->z_zfsvfs
, zp
->z_phys
->zp_uid
,
394 *gidp
= zfs_fuid_map_id(zp
->z_zfsvfs
, zp
->z_phys
->zp_gid
,
399 zfs_fuid_map_id(zfsvfs_t
*zfsvfs
, uint64_t fuid
,
400 cred_t
*cr
, zfs_fuid_type_t type
)
402 uint32_t index
= FUID_INDEX(fuid
);
409 domain
= zfs_fuid_find_by_idx(zfsvfs
, index
);
410 ASSERT(domain
!= NULL
);
412 if (type
== ZFS_OWNER
|| type
== ZFS_ACE_USER
) {
413 (void) kidmap_getuidbysid(crgetzone(cr
), domain
,
414 FUID_RID(fuid
), &id
);
416 (void) kidmap_getgidbysid(crgetzone(cr
), domain
,
417 FUID_RID(fuid
), &id
);
423 * Add a FUID node to the list of fuid's being created for this
426 * If ACL has multiple domains, then keep only one copy of each unique
430 zfs_fuid_node_add(zfs_fuid_info_t
**fuidpp
, const char *domain
, uint32_t rid
,
431 uint64_t idx
, uint64_t id
, zfs_fuid_type_t type
)
434 zfs_fuid_domain_t
*fuid_domain
;
435 zfs_fuid_info_t
*fuidp
;
437 boolean_t found
= B_FALSE
;
440 *fuidpp
= zfs_fuid_info_alloc();
444 * First find fuid domain index in linked list
446 * If one isn't found then create an entry.
449 for (fuididx
= 1, fuid_domain
= list_head(&fuidp
->z_domains
);
450 fuid_domain
; fuid_domain
= list_next(&fuidp
->z_domains
,
451 fuid_domain
), fuididx
++) {
452 if (idx
== fuid_domain
->z_domidx
) {
459 fuid_domain
= kmem_alloc(sizeof (zfs_fuid_domain_t
), KM_SLEEP
);
460 fuid_domain
->z_domain
= domain
;
461 fuid_domain
->z_domidx
= idx
;
462 list_insert_tail(&fuidp
->z_domains
, fuid_domain
);
463 fuidp
->z_domain_str_sz
+= strlen(domain
) + 1;
464 fuidp
->z_domain_cnt
++;
467 if (type
== ZFS_ACE_USER
|| type
== ZFS_ACE_GROUP
) {
470 * Now allocate fuid entry and add it on the end of the list
473 fuid
= kmem_alloc(sizeof (zfs_fuid_t
), KM_SLEEP
);
475 fuid
->z_domidx
= idx
;
476 fuid
->z_logfuid
= FUID_ENCODE(fuididx
, rid
);
478 list_insert_tail(&fuidp
->z_fuids
, fuid
);
481 if (type
== ZFS_OWNER
)
482 fuidp
->z_fuid_owner
= FUID_ENCODE(fuididx
, rid
);
484 fuidp
->z_fuid_group
= FUID_ENCODE(fuididx
, rid
);
489 * Create a file system FUID, based on information in the users cred
492 zfs_fuid_create_cred(zfsvfs_t
*zfsvfs
, zfs_fuid_type_t type
,
493 cred_t
*cr
, zfs_fuid_info_t
**fuidp
)
502 VERIFY(type
== ZFS_OWNER
|| type
== ZFS_GROUP
);
504 ksid
= crgetsid(cr
, (type
== ZFS_OWNER
) ? KSID_OWNER
: KSID_GROUP
);
506 id
= ksid_getid(ksid
);
508 if (type
== ZFS_OWNER
)
514 if (!zfsvfs
->z_use_fuids
|| (!IS_EPHEMERAL(id
)))
515 return ((uint64_t)id
);
517 rid
= ksid_getrid(ksid
);
518 domain
= ksid_getdomain(ksid
);
520 idx
= zfs_fuid_find_by_domain(zfsvfs
, domain
, &kdomain
, B_TRUE
);
522 zfs_fuid_node_add(fuidp
, kdomain
, rid
, idx
, id
, type
);
524 return (FUID_ENCODE(idx
, rid
));
528 * Create a file system FUID for an ACL ace
529 * or a chown/chgrp of the file.
530 * This is similar to zfs_fuid_create_cred, except that
531 * we can't find the domain + rid information in the
532 * cred. Instead we have to query Winchester for the
535 * During replay operations the domain+rid information is
536 * found in the zfs_fuid_info_t that the replay code has
537 * attached to the zfsvfs of the file system.
540 zfs_fuid_create(zfsvfs_t
*zfsvfs
, uint64_t id
, cred_t
*cr
,
541 zfs_fuid_type_t type
, zfs_fuid_info_t
**fuidpp
)
545 uint32_t fuid_idx
= FUID_INDEX(id
);
549 zfs_fuid_t
*zfuid
= NULL
;
550 zfs_fuid_info_t
*fuidp
;
553 * If POSIX ID, or entry is already a FUID then
556 * We may also be handed an already FUID'ized id via
560 if (!zfsvfs
->z_use_fuids
|| !IS_EPHEMERAL(id
) || fuid_idx
!= 0)
563 if (zfsvfs
->z_replay
) {
564 fuidp
= zfsvfs
->z_fuid_replay
;
567 * If we are passed an ephemeral id, but no
568 * fuid_info was logged then return NOBODY.
569 * This is most likely a result of idmap service
570 * not being available.
578 zfuid
= list_head(&fuidp
->z_fuids
);
579 rid
= FUID_RID(zfuid
->z_logfuid
);
580 idx
= FUID_INDEX(zfuid
->z_logfuid
);
583 rid
= FUID_RID(fuidp
->z_fuid_owner
);
584 idx
= FUID_INDEX(fuidp
->z_fuid_owner
);
587 rid
= FUID_RID(fuidp
->z_fuid_group
);
588 idx
= FUID_INDEX(fuidp
->z_fuid_group
);
591 domain
= fuidp
->z_domain_table
[idx
-1];
593 if (type
== ZFS_OWNER
|| type
== ZFS_ACE_USER
)
594 status
= kidmap_getsidbyuid(crgetzone(cr
), id
,
597 status
= kidmap_getsidbygid(crgetzone(cr
), id
,
602 * When returning nobody we will need to
603 * make a dummy fuid table entry for logging
611 idx
= zfs_fuid_find_by_domain(zfsvfs
, domain
, &kdomain
, B_TRUE
);
613 if (!zfsvfs
->z_replay
)
614 zfs_fuid_node_add(fuidpp
, kdomain
,
616 else if (zfuid
!= NULL
) {
617 list_remove(&fuidp
->z_fuids
, zfuid
);
618 kmem_free(zfuid
, sizeof (zfs_fuid_t
));
620 return (FUID_ENCODE(idx
, rid
));
624 zfs_fuid_destroy(zfsvfs_t
*zfsvfs
)
626 rw_enter(&zfsvfs
->z_fuid_lock
, RW_WRITER
);
627 if (!zfsvfs
->z_fuid_loaded
) {
628 rw_exit(&zfsvfs
->z_fuid_lock
);
631 zfs_fuid_table_destroy(&zfsvfs
->z_fuid_idx
, &zfsvfs
->z_fuid_domain
);
632 rw_exit(&zfsvfs
->z_fuid_lock
);
636 * Allocate zfs_fuid_info for tracking FUIDs created during
637 * zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR()
640 zfs_fuid_info_alloc(void)
642 zfs_fuid_info_t
*fuidp
;
644 fuidp
= kmem_zalloc(sizeof (zfs_fuid_info_t
), KM_SLEEP
);
645 list_create(&fuidp
->z_domains
, sizeof (zfs_fuid_domain_t
),
646 offsetof(zfs_fuid_domain_t
, z_next
));
647 list_create(&fuidp
->z_fuids
, sizeof (zfs_fuid_t
),
648 offsetof(zfs_fuid_t
, z_next
));
653 * Release all memory associated with zfs_fuid_info_t
656 zfs_fuid_info_free(zfs_fuid_info_t
*fuidp
)
659 zfs_fuid_domain_t
*zdomain
;
661 while ((zfuid
= list_head(&fuidp
->z_fuids
)) != NULL
) {
662 list_remove(&fuidp
->z_fuids
, zfuid
);
663 kmem_free(zfuid
, sizeof (zfs_fuid_t
));
666 if (fuidp
->z_domain_table
!= NULL
)
667 kmem_free(fuidp
->z_domain_table
,
668 (sizeof (char **)) * fuidp
->z_domain_cnt
);
670 while ((zdomain
= list_head(&fuidp
->z_domains
)) != NULL
) {
671 list_remove(&fuidp
->z_domains
, zdomain
);
672 kmem_free(zdomain
, sizeof (zfs_fuid_domain_t
));
675 kmem_free(fuidp
, sizeof (zfs_fuid_info_t
));
679 * Check to see if id is a groupmember. If cred
680 * has ksid info then sidlist is checked first
681 * and if still not found then POSIX groups are checked
683 * Will use a straight FUID compare when possible.
686 zfs_groupmember(zfsvfs_t
*zfsvfs
, uint64_t id
, cred_t
*cr
)
688 ksid_t
*ksid
= crgetsid(cr
, KSID_GROUP
);
689 ksidlist_t
*ksidlist
= crgetsidlist(cr
);
692 if (ksid
&& ksidlist
) {
695 uint32_t idx
= FUID_INDEX(id
);
696 uint32_t rid
= FUID_RID(id
);
698 ksid_groups
= ksidlist
->ksl_sids
;
700 for (i
= 0; i
!= ksidlist
->ksl_nsid
; i
++) {
702 if (id
!= IDMAP_WK_CREATOR_GROUP_GID
&&
703 id
== ksid_groups
[i
].ks_id
) {
709 domain
= zfs_fuid_find_by_idx(zfsvfs
, idx
);
710 ASSERT(domain
!= NULL
);
713 IDMAP_WK_CREATOR_SID_AUTHORITY
) == 0)
717 ksid_groups
[i
].ks_domain
->kd_name
) == 0) &&
718 rid
== ksid_groups
[i
].ks_rid
)
725 * Not found in ksidlist, check posix groups
727 gid
= zfs_fuid_map_id(zfsvfs
, id
, cr
, ZFS_GROUP
);
728 return (groupmember(gid
, cr
));
732 zfs_fuid_txhold(zfsvfs_t
*zfsvfs
, dmu_tx_t
*tx
)
734 if (zfsvfs
->z_fuid_obj
== 0) {
735 dmu_tx_hold_bonus(tx
, DMU_NEW_OBJECT
);
736 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0,
737 FUID_SIZE_ESTIMATE(zfsvfs
));
738 dmu_tx_hold_zap(tx
, MASTER_NODE_OBJ
, FALSE
, NULL
);
740 dmu_tx_hold_bonus(tx
, zfsvfs
->z_fuid_obj
);
741 dmu_tx_hold_write(tx
, zfsvfs
->z_fuid_obj
, 0,
742 FUID_SIZE_ESTIMATE(zfsvfs
));