]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_znode.c
Performance optimization of AVL tree comparator functions
[mirror_zfs.git] / module / zfs / zfs_znode.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
19d55079 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
34dc7c2f
BB
24 */
25
26/* Portions Copyright 2007 Jeremy Teo */
27
34dc7c2f
BB
28#ifdef _KERNEL
29#include <sys/types.h>
30#include <sys/param.h>
31#include <sys/time.h>
32#include <sys/systm.h>
33#include <sys/sysmacros.h>
34#include <sys/resource.h>
35#include <sys/mntent.h>
36#include <sys/mkdev.h>
37#include <sys/u8_textprep.h>
38#include <sys/dsl_dataset.h>
39#include <sys/vfs.h>
40#include <sys/vfs_opreg.h>
41#include <sys/vnode.h>
42#include <sys/file.h>
43#include <sys/kmem.h>
44#include <sys/errno.h>
45#include <sys/unistd.h>
46#include <sys/mode.h>
47#include <sys/atomic.h>
48#include <vm/pvn.h>
49#include "fs/fs_subr.h"
50#include <sys/zfs_dir.h>
51#include <sys/zfs_acl.h>
52#include <sys/zfs_ioctl.h>
53#include <sys/zfs_rlock.h>
54#include <sys/zfs_fuid.h>
3558fd73 55#include <sys/zfs_vnops.h>
ebe7e575 56#include <sys/zfs_ctldir.h>
428870ff 57#include <sys/dnode.h>
34dc7c2f
BB
58#include <sys/fs/zfs.h>
59#include <sys/kidmap.h>
3558fd73 60#include <sys/zpl.h>
34dc7c2f
BB
61#endif /* _KERNEL */
62
63#include <sys/dmu.h>
f1512ee6 64#include <sys/dmu_objset.h>
50c957f7 65#include <sys/dmu_tx.h>
34dc7c2f
BB
66#include <sys/refcount.h>
67#include <sys/stat.h>
68#include <sys/zap.h>
69#include <sys/zfs_znode.h>
428870ff
BB
70#include <sys/sa.h>
71#include <sys/zfs_sa.h>
572e2857 72#include <sys/zfs_stat.h>
34dc7c2f
BB
73
74#include "zfs_prop.h"
428870ff 75#include "zfs_comutil.h"
34dc7c2f 76
b128c09f
BB
77/*
78 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
79 * turned on when DEBUG is also defined.
80 */
81#ifdef DEBUG
82#define ZNODE_STATS
83#endif /* DEBUG */
84
85#ifdef ZNODE_STATS
86#define ZNODE_STAT_ADD(stat) ((stat)++)
87#else
88#define ZNODE_STAT_ADD(stat) /* nothing */
89#endif /* ZNODE_STATS */
90
34dc7c2f
BB
91/*
92 * Functions needed for userland (ie: libzpool) are not put under
93 * #ifdef_KERNEL; the rest of the functions have dependencies
94 * (such as VFS logic) that will not compile easily in userland.
95 */
96#ifdef _KERNEL
9babb374 97
b128c09f 98static kmem_cache_t *znode_cache = NULL;
c96c36fa 99static kmem_cache_t *znode_hold_cache = NULL;
0720116d 100unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
34dc7c2f 101
34dc7c2f
BB
102/*ARGSUSED*/
103static int
b128c09f 104zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
34dc7c2f
BB
105{
106 znode_t *zp = buf;
107
3558fd73 108 inode_init_once(ZTOI(zp));
b128c09f
BB
109 list_link_init(&zp->z_link_node);
110
34dc7c2f 111 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
34dc7c2f 112 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
448d7aaa 113 rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
34dc7c2f 114 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
82a37189 115 rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
34dc7c2f 116
d88895a0 117 zfs_rlock_init(&zp->z_range_lock);
34dc7c2f 118
b128c09f 119 zp->z_dirlocks = NULL;
45d1cae3 120 zp->z_acl_cached = NULL;
82a37189 121 zp->z_xattr_cached = NULL;
572e2857 122 zp->z_moved = 0;
34dc7c2f
BB
123 return (0);
124}
125
126/*ARGSUSED*/
127static void
b128c09f 128zfs_znode_cache_destructor(void *buf, void *arg)
34dc7c2f
BB
129{
130 znode_t *zp = buf;
131
b128c09f 132 ASSERT(!list_link_active(&zp->z_link_node));
34dc7c2f 133 mutex_destroy(&zp->z_lock);
34dc7c2f
BB
134 rw_destroy(&zp->z_parent_lock);
135 rw_destroy(&zp->z_name_lock);
136 mutex_destroy(&zp->z_acl_lock);
82a37189 137 rw_destroy(&zp->z_xattr_lock);
d88895a0 138 zfs_rlock_destroy(&zp->z_range_lock);
34dc7c2f 139
b128c09f 140 ASSERT(zp->z_dirlocks == NULL);
45d1cae3 141 ASSERT(zp->z_acl_cached == NULL);
82a37189 142 ASSERT(zp->z_xattr_cached == NULL);
b128c09f
BB
143}
144
c96c36fa
BB
145static int
146zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
147{
148 znode_hold_t *zh = buf;
149
150 mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
151 refcount_create(&zh->zh_refcount);
152 zh->zh_obj = ZFS_NO_OBJECT;
153
154 return (0);
155}
156
157static void
158zfs_znode_hold_cache_destructor(void *buf, void *arg)
159{
160 znode_hold_t *zh = buf;
161
162 mutex_destroy(&zh->zh_lock);
163 refcount_destroy(&zh->zh_refcount);
164}
165
34dc7c2f
BB
166void
167zfs_znode_init(void)
168{
169 /*
5074bfe8
TC
170 * Initialize zcache. The KMC_SLAB hint is used in order that it be
171 * backed by kmalloc() when on the Linux slab in order that any
172 * wait_on_bit() operations on the related inode operate properly.
34dc7c2f
BB
173 */
174 ASSERT(znode_cache == NULL);
175 znode_cache = kmem_cache_create("zfs_znode_cache",
176 sizeof (znode_t), 0, zfs_znode_cache_constructor,
5074bfe8 177 zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
c96c36fa
BB
178
179 ASSERT(znode_hold_cache == NULL);
180 znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
181 sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
182 zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
34dc7c2f
BB
183}
184
185void
186zfs_znode_fini(void)
187{
34dc7c2f
BB
188 /*
189 * Cleanup zcache
190 */
191 if (znode_cache)
192 kmem_cache_destroy(znode_cache);
193 znode_cache = NULL;
c96c36fa
BB
194
195 if (znode_hold_cache)
196 kmem_cache_destroy(znode_hold_cache);
197 znode_hold_cache = NULL;
198}
199
200/*
201 * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
202 * serialize access to a znode and its SA buffer while the object is being
203 * created or destroyed. This kind of locking would normally reside in the
204 * znode itself but in this case that's impossible because the znode and SA
205 * buffer may not yet exist. Therefore the locking is handled externally
206 * with an array of mutexs and AVLs trees which contain per-object locks.
207 *
208 * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
209 * in to the correct AVL tree and finally the per-object lock is held. In
210 * zfs_znode_hold_exit() the process is reversed. The per-object lock is
211 * released, removed from the AVL tree and destroyed if there are no waiters.
212 *
213 * This scheme has two important properties:
214 *
215 * 1) No memory allocations are performed while holding one of the z_hold_locks.
216 * This ensures evict(), which can be called from direct memory reclaim, will
217 * never block waiting on a z_hold_locks which just happens to have hashed
218 * to the same index.
219 *
220 * 2) All locks used to serialize access to an object are per-object and never
221 * shared. This minimizes lock contention without creating a large number
222 * of dedicated locks.
223 *
224 * On the downside it does require znode_lock_t structures to be frequently
225 * allocated and freed. However, because these are backed by a kmem cache
226 * and very short lived this cost is minimal.
227 */
228int
229zfs_znode_hold_compare(const void *a, const void *b)
230{
ee36c709
GN
231 const znode_hold_t *zh_a = (const znode_hold_t *)a;
232 const znode_hold_t *zh_b = (const znode_hold_t *)b;
233
234 return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj));
c96c36fa
BB
235}
236
237boolean_t
238zfs_znode_held(zfs_sb_t *zsb, uint64_t obj)
239{
240 znode_hold_t *zh, search;
241 int i = ZFS_OBJ_HASH(zsb, obj);
37c56346 242 boolean_t held;
c96c36fa
BB
243
244 search.zh_obj = obj;
245
246 mutex_enter(&zsb->z_hold_locks[i]);
247 zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
37c56346 248 held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
c96c36fa
BB
249 mutex_exit(&zsb->z_hold_locks[i]);
250
37c56346 251 return (held);
c96c36fa
BB
252}
253
254static znode_hold_t *
255zfs_znode_hold_enter(zfs_sb_t *zsb, uint64_t obj)
256{
257 znode_hold_t *zh, *zh_new, search;
258 int i = ZFS_OBJ_HASH(zsb, obj);
259 boolean_t found = B_FALSE;
260
261 zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
262 zh_new->zh_obj = obj;
263 search.zh_obj = obj;
264
265 mutex_enter(&zsb->z_hold_locks[i]);
266 zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
267 if (likely(zh == NULL)) {
268 zh = zh_new;
269 avl_add(&zsb->z_hold_trees[i], zh);
270 } else {
271 ASSERT3U(zh->zh_obj, ==, obj);
272 found = B_TRUE;
273 }
274 refcount_add(&zh->zh_refcount, NULL);
275 mutex_exit(&zsb->z_hold_locks[i]);
276
277 if (found == B_TRUE)
278 kmem_cache_free(znode_hold_cache, zh_new);
279
280 ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
281 ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
282 mutex_enter(&zh->zh_lock);
283
284 return (zh);
285}
286
287static void
288zfs_znode_hold_exit(zfs_sb_t *zsb, znode_hold_t *zh)
289{
290 int i = ZFS_OBJ_HASH(zsb, zh->zh_obj);
291 boolean_t remove = B_FALSE;
292
293 ASSERT(zfs_znode_held(zsb, zh->zh_obj));
294 ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
295 mutex_exit(&zh->zh_lock);
296
297 mutex_enter(&zsb->z_hold_locks[i]);
298 if (refcount_remove(&zh->zh_refcount, NULL) == 0) {
299 avl_remove(&zsb->z_hold_trees[i], zh);
300 remove = B_TRUE;
301 }
302 mutex_exit(&zsb->z_hold_locks[i]);
303
304 if (remove == B_TRUE)
305 kmem_cache_free(znode_hold_cache, zh);
34dc7c2f
BB
306}
307
34dc7c2f 308int
3558fd73 309zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
34dc7c2f 310{
3c9609b3 311#ifdef HAVE_SMB_SHARE
9babb374
BB
312 zfs_acl_ids_t acl_ids;
313 vattr_t vattr;
314 znode_t *sharezp;
315 vnode_t *vp;
316 znode_t *zp;
317 int error;
34dc7c2f 318
9babb374 319 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
3558fd73 320 vattr.va_mode = S_IFDIR | 0555;
9babb374
BB
321 vattr.va_uid = crgetuid(kcred);
322 vattr.va_gid = crgetgid(kcred);
34dc7c2f 323
79c76d5b 324 sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
572e2857 325 sharezp->z_moved = 0;
9babb374
BB
326 sharezp->z_unlinked = 0;
327 sharezp->z_atime_dirty = 0;
328 sharezp->z_zfsvfs = zfsvfs;
428870ff 329 sharezp->z_is_sa = zfsvfs->z_use_sa;
34dc7c2f 330
9babb374
BB
331 vp = ZTOV(sharezp);
332 vn_reinit(vp);
333 vp->v_type = VDIR;
34dc7c2f 334
9babb374
BB
335 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
336 kcred, NULL, &acl_ids));
428870ff 337 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
9babb374
BB
338 ASSERT3P(zp, ==, sharezp);
339 ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
340 POINTER_INVALIDATE(&sharezp->z_zfsvfs);
341 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
342 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
343 zfsvfs->z_shares_dir = sharezp->z_id;
344
345 zfs_acl_ids_free(&acl_ids);
3558fd73 346 // ZTOV(sharezp)->v_count = 0;
428870ff 347 sa_handle_destroy(sharezp->z_sa_hdl);
9babb374 348 kmem_cache_free(znode_cache, sharezp);
34dc7c2f 349
9babb374 350 return (error);
9ee7fac5
BB
351#else
352 return (0);
3c9609b3 353#endif /* HAVE_SMB_SHARE */
34dc7c2f
BB
354}
355
34dc7c2f 356static void
3558fd73 357zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp,
428870ff 358 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
34dc7c2f 359{
c96c36fa 360 ASSERT(zfs_znode_held(zsb, zp->z_id));
34dc7c2f
BB
361
362 mutex_enter(&zp->z_lock);
363
428870ff
BB
364 ASSERT(zp->z_sa_hdl == NULL);
365 ASSERT(zp->z_acl_cached == NULL);
366 if (sa_hdl == NULL) {
3558fd73 367 VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, zp,
428870ff
BB
368 SA_HDL_SHARED, &zp->z_sa_hdl));
369 } else {
370 zp->z_sa_hdl = sa_hdl;
371 sa_set_userp(sa_hdl, zp);
372 }
34dc7c2f 373
428870ff 374 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
34dc7c2f 375
34dc7c2f 376 mutex_exit(&zp->z_lock);
34dc7c2f
BB
377}
378
379void
380zfs_znode_dmu_fini(znode_t *zp)
381{
c96c36fa 382 ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
3558fd73 383 RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
428870ff
BB
384
385 sa_handle_destroy(zp->z_sa_hdl);
386 zp->z_sa_hdl = NULL;
34dc7c2f
BB
387}
388
389/*
3558fd73
BB
390 * Called by new_inode() to allocate a new inode.
391 */
392int
393zfs_inode_alloc(struct super_block *sb, struct inode **ip)
394{
395 znode_t *zp;
396
79c76d5b 397 zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
3558fd73
BB
398 *ip = ZTOI(zp);
399
400 return (0);
401}
402
403/*
404 * Called in multiple places when an inode should be destroyed.
405 */
406void
407zfs_inode_destroy(struct inode *ip)
408{
409 znode_t *zp = ITOZ(ip);
410 zfs_sb_t *zsb = ZTOZSB(zp);
411
412 mutex_enter(&zsb->z_znodes_lock);
7b3e34ba
BB
413 if (list_link_active(&zp->z_link_node)) {
414 list_remove(&zsb->z_all_znodes, zp);
415 zsb->z_nr_znodes--;
416 }
3558fd73
BB
417 mutex_exit(&zsb->z_znodes_lock);
418
419 if (zp->z_acl_cached) {
420 zfs_acl_free(zp->z_acl_cached);
421 zp->z_acl_cached = NULL;
422 }
423
82a37189
BB
424 if (zp->z_xattr_cached) {
425 nvlist_free(zp->z_xattr_cached);
426 zp->z_xattr_cached = NULL;
427 }
428
3558fd73
BB
429 kmem_cache_free(znode_cache, zp);
430}
431
432static void
433zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
434{
aa6d8c10 435 uint64_t rdev = 0;
3558fd73
BB
436
437 switch (ip->i_mode & S_IFMT) {
438 case S_IFREG:
439 ip->i_op = &zpl_inode_operations;
440 ip->i_fop = &zpl_file_operations;
441 ip->i_mapping->a_ops = &zpl_address_space_operations;
442 break;
443
444 case S_IFDIR:
445 ip->i_op = &zpl_dir_inode_operations;
446 ip->i_fop = &zpl_dir_file_operations;
447 ITOZ(ip)->z_zn_prefetch = B_TRUE;
448 break;
449
450 case S_IFLNK:
451 ip->i_op = &zpl_symlink_inode_operations;
452 break;
453
aa6d8c10
NB
454 /*
455 * rdev is only stored in a SA only for device files.
456 */
3558fd73
BB
457 case S_IFCHR:
458 case S_IFBLK:
53b1d979
BB
459 sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb), &rdev,
460 sizeof (rdev));
aa6d8c10
NB
461 /*FALLTHROUGH*/
462 case S_IFIFO:
463 case S_IFSOCK:
3558fd73
BB
464 init_special_inode(ip, ip->i_mode, rdev);
465 ip->i_op = &zpl_special_inode_operations;
466 break;
467
468 default:
53b1d979
BB
469 zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
470 (u_longlong_t)ip->i_ino, ip->i_mode);
471
472 /* Assume the inode is a file and attempt to continue */
473 ip->i_mode = S_IFREG | 0644;
474 ip->i_op = &zpl_inode_operations;
475 ip->i_fop = &zpl_file_operations;
476 ip->i_mapping->a_ops = &zpl_address_space_operations;
477 break;
3558fd73
BB
478 }
479}
480
704cd075
CC
481void
482zfs_set_inode_flags(znode_t *zp, struct inode *ip)
483{
484 /*
485 * Linux and Solaris have different sets of file attributes, so we
486 * restrict this conversion to the intersection of the two.
487 */
488
489 if (zp->z_pflags & ZFS_IMMUTABLE)
490 ip->i_flags |= S_IMMUTABLE;
491 else
492 ip->i_flags &= ~S_IMMUTABLE;
493
494 if (zp->z_pflags & ZFS_APPENDONLY)
495 ip->i_flags |= S_APPEND;
496 else
497 ip->i_flags &= ~S_APPEND;
498}
499
500/*
501 * Update the embedded inode given the znode. We should work toward
502 * eliminating this function as soon as possible by removing values
503 * which are duplicated between the znode and inode. If the generic
504 * inode has the correct field it should be used, and the ZFS code
505 * updated to access the inode. This can be done incrementally.
506 */
507static void
508zfs_inode_update_impl(znode_t *zp, boolean_t new)
509{
510 zfs_sb_t *zsb;
511 struct inode *ip;
512 uint32_t blksize;
513 u_longlong_t i_blocks;
514 uint64_t atime[2], mtime[2], ctime[2];
515
516 ASSERT(zp != NULL);
517 zsb = ZTOZSB(zp);
518 ip = ZTOI(zp);
519
520 /* Skip .zfs control nodes which do not exist on disk. */
521 if (zfsctl_is_node(ip))
522 return;
523
524 sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16);
525 sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16);
526 sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16);
527
528 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
529
530 spin_lock(&ip->i_lock);
704cd075
CC
531 ip->i_mode = zp->z_mode;
532 zfs_set_inode_flags(zp, ip);
704cd075
CC
533 ip->i_blocks = i_blocks;
534
535 /*
536 * Only read atime from SA if we are newly created inode (or rezget),
537 * otherwise i_atime might be dirty.
538 */
539 if (new)
540 ZFS_TIME_DECODE(&ip->i_atime, atime);
541 ZFS_TIME_DECODE(&ip->i_mtime, mtime);
542 ZFS_TIME_DECODE(&ip->i_ctime, ctime);
543
544 i_size_write(ip, zp->z_size);
545 spin_unlock(&ip->i_lock);
546}
547
548static void
549zfs_inode_update_new(znode_t *zp)
550{
551 zfs_inode_update_impl(zp, B_TRUE);
552}
553
554void
555zfs_inode_update(znode_t *zp)
556{
557 zfs_inode_update_impl(zp, B_FALSE);
558}
559
3558fd73
BB
560/*
561 * Construct a znode+inode and initialize.
34dc7c2f
BB
562 *
563 * This does not do a call to dmu_set_user() that is
564 * up to the caller to do, in case you don't want to
565 * return the znode
566 */
567static znode_t *
3558fd73 568zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
31b6111f 569 dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
34dc7c2f
BB
570{
571 znode_t *zp;
3558fd73 572 struct inode *ip;
7f89ae6b 573 uint64_t mode;
428870ff 574 uint64_t parent;
278f2236 575 uint64_t tmp_gen;
dfbc8630 576 uint64_t links;
2c6abf15 577 uint64_t z_uid, z_gid;
0df9673f 578 sa_bulk_attr_t bulk[8];
428870ff 579 int count = 0;
34dc7c2f 580
3558fd73 581 ASSERT(zsb != NULL);
34dc7c2f 582
3558fd73
BB
583 ip = new_inode(zsb->z_sb);
584 if (ip == NULL)
585 return (NULL);
7304b6e5 586
3558fd73 587 zp = ITOZ(ip);
34dc7c2f 588 ASSERT(zp->z_dirlocks == NULL);
ebe7e575
BB
589 ASSERT3P(zp->z_acl_cached, ==, NULL);
590 ASSERT3P(zp->z_xattr_cached, ==, NULL);
572e2857 591 zp->z_moved = 0;
428870ff 592 zp->z_sa_hdl = NULL;
34dc7c2f
BB
593 zp->z_unlinked = 0;
594 zp->z_atime_dirty = 0;
595 zp->z_mapcnt = 0;
34dc7c2f
BB
596 zp->z_id = db->db_object;
597 zp->z_blksz = blksz;
598 zp->z_seq = 0x7A4653;
599 zp->z_sync_cnt = 0;
ebe7e575
BB
600 zp->z_is_mapped = B_FALSE;
601 zp->z_is_ctldir = B_FALSE;
7b3e34ba 602 zp->z_is_stale = B_FALSE;
d88895a0
CC
603 zp->z_range_lock.zr_size = &zp->z_size;
604 zp->z_range_lock.zr_blksz = &zp->z_blksz;
605 zp->z_range_lock.zr_max_blksz = &ZTOZSB(zp)->z_max_blksz;
34dc7c2f 606
3558fd73
BB
607 zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
608
7f89ae6b 609 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
278f2236 610 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &tmp_gen, 8);
3558fd73 611 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8);
dfbc8630 612 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &links, 8);
3558fd73 613 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
428870ff 614 &zp->z_pflags, 8);
3558fd73 615 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
7304b6e5 616 &parent, 8);
2c6abf15
NB
617 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &z_uid, 8);
618 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &z_gid, 8);
428870ff 619
278f2236
NB
620 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 ||
621 tmp_gen == 0) {
622
428870ff
BB
623 if (hdl == NULL)
624 sa_handle_destroy(zp->z_sa_hdl);
07d63f0c 625 zp->z_sa_hdl = NULL;
3558fd73 626 goto error;
34dc7c2f 627 }
7304b6e5 628
7f89ae6b 629 zp->z_mode = mode;
278f2236 630 ip->i_generation = (uint32_t)tmp_gen;
ba2fe6af 631 ip->i_blkbits = SPA_MINBLOCKSHIFT;
dfbc8630 632 set_nlink(ip, (uint32_t)links);
2c6abf15
NB
633 zfs_uid_write(ip, z_uid);
634 zfs_gid_write(ip, z_gid);
7f89ae6b 635
3558fd73 636 ip->i_ino = obj;
704cd075 637 zfs_inode_update_new(zp);
3558fd73
BB
638 zfs_inode_set_ops(zsb, ip);
639
7b3e34ba
BB
640 /*
641 * The only way insert_inode_locked() can fail is if the ip->i_ino
642 * number is already hashed for this super block. This can never
643 * happen because the inode numbers map 1:1 with the object numbers.
644 *
645 * The one exception is rolling back a mounted file system, but in
646 * this case all the active inode are unhashed during the rollback.
647 */
648 VERIFY3S(insert_inode_locked(ip), ==, 0);
c85b224f 649
3558fd73
BB
650 mutex_enter(&zsb->z_znodes_lock);
651 list_insert_tail(&zsb->z_all_znodes, zp);
ab26409d 652 zsb->z_nr_znodes++;
b128c09f 653 membar_producer();
3558fd73 654 mutex_exit(&zsb->z_znodes_lock);
b128c09f 655
3558fd73 656 unlock_new_inode(ip);
34dc7c2f 657 return (zp);
3558fd73
BB
658
659error:
3558fd73 660 iput(ip);
d1d7e268 661 return (NULL);
34dc7c2f
BB
662}
663
1e8db771
BB
664/*
665 * Safely mark an inode dirty. Inodes which are part of a read-only
666 * file system or snapshot may not be dirtied.
667 */
668void
669zfs_mark_inode_dirty(struct inode *ip)
670{
671 zfs_sb_t *zsb = ITOZSB(ip);
672
673 if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os))
674 return;
675
676 mark_inode_dirty(ip);
677}
678
428870ff
BB
679static uint64_t empty_xattr;
680static uint64_t pad[4];
681static zfs_acl_phys_t acl_phys;
34dc7c2f
BB
682/*
683 * Create a new DMU object to hold a zfs znode.
684 *
685 * IN: dzp - parent directory for new znode
686 * vap - file attributes for new znode
687 * tx - dmu transaction id for zap operations
688 * cr - credentials of caller
689 * flag - flags:
690 * IS_ROOT_NODE - new object will be root
691 * IS_XATTR - new object is an attribute
34dc7c2f
BB
692 * bonuslen - length of bonus buffer
693 * setaclp - File/Dir initial ACL
694 * fuidp - Tracks fuid allocation.
695 *
696 * OUT: zpp - allocated znode
697 *
698 */
699void
700zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
428870ff 701 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
34dc7c2f 702{
428870ff
BB
703 uint64_t crtime[2], atime[2], mtime[2], ctime[2];
704 uint64_t mode, size, links, parent, pflags;
572e2857 705 uint64_t dzp_pflags = 0;
428870ff 706 uint64_t rdev = 0;
3558fd73 707 zfs_sb_t *zsb = ZTOZSB(dzp);
428870ff 708 dmu_buf_t *db;
34dc7c2f
BB
709 timestruc_t now;
710 uint64_t gen, obj;
428870ff 711 int bonuslen;
50c957f7 712 int dnodesize;
428870ff
BB
713 sa_handle_t *sa_hdl;
714 dmu_object_type_t obj_type;
f30484af 715 sa_bulk_attr_t *sa_attrs;
428870ff
BB
716 int cnt = 0;
717 zfs_acl_locator_cb_t locate = { 0 };
c96c36fa 718 znode_hold_t *zh;
34dc7c2f 719
3558fd73 720 if (zsb->z_replay) {
34dc7c2f 721 obj = vap->va_nodeid;
34dc7c2f
BB
722 now = vap->va_ctime; /* see zfs_replay_create() */
723 gen = vap->va_nblocks; /* ditto */
50c957f7 724 dnodesize = vap->va_fsid; /* ditto */
34dc7c2f
BB
725 } else {
726 obj = 0;
727 gethrestime(&now);
728 gen = dmu_tx_get_txg(tx);
50c957f7 729 dnodesize = dmu_objset_dnodesize(zsb->z_os);
34dc7c2f
BB
730 }
731
50c957f7
NB
732 if (dnodesize == 0)
733 dnodesize = DNODE_MIN_SIZE;
734
3558fd73 735 obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
50c957f7 736
428870ff 737 bonuslen = (obj_type == DMU_OT_SA) ?
50c957f7 738 DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
428870ff 739
34dc7c2f
BB
740 /*
741 * Create a new DMU object.
742 */
743 /*
744 * There's currently no mechanism for pre-reading the blocks that will
572e2857 745 * be needed to allocate a new object, so we accept the small chance
34dc7c2f
BB
746 * that there will be an i/o error and we will fail one of the
747 * assertions below.
748 */
3558fd73
BB
749 if (S_ISDIR(vap->va_mode)) {
750 if (zsb->z_replay) {
50c957f7 751 VERIFY0(zap_create_claim_norm_dnsize(zsb->z_os, obj,
3558fd73 752 zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
50c957f7 753 obj_type, bonuslen, dnodesize, tx));
34dc7c2f 754 } else {
50c957f7 755 obj = zap_create_norm_dnsize(zsb->z_os,
3558fd73 756 zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
50c957f7 757 obj_type, bonuslen, dnodesize, tx);
34dc7c2f
BB
758 }
759 } else {
3558fd73 760 if (zsb->z_replay) {
50c957f7 761 VERIFY0(dmu_object_claim_dnsize(zsb->z_os, obj,
34dc7c2f 762 DMU_OT_PLAIN_FILE_CONTENTS, 0,
50c957f7 763 obj_type, bonuslen, dnodesize, tx));
34dc7c2f 764 } else {
50c957f7 765 obj = dmu_object_alloc_dnsize(zsb->z_os,
34dc7c2f 766 DMU_OT_PLAIN_FILE_CONTENTS, 0,
50c957f7 767 obj_type, bonuslen, dnodesize, tx);
34dc7c2f
BB
768 }
769 }
34dc7c2f 770
c96c36fa 771 zh = zfs_znode_hold_enter(zsb, obj);
3558fd73 772 VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));
34dc7c2f
BB
773
774 /*
775 * If this is the root, fix up the half-initialized parent pointer
776 * to reference the just-allocated physical data area.
777 */
778 if (flag & IS_ROOT_NODE) {
34dc7c2f 779 dzp->z_id = obj;
428870ff
BB
780 } else {
781 dzp_pflags = dzp->z_pflags;
34dc7c2f
BB
782 }
783
784 /*
785 * If parent is an xattr, so am I.
786 */
428870ff 787 if (dzp_pflags & ZFS_XATTR) {
34dc7c2f 788 flag |= IS_XATTR;
34dc7c2f
BB
789 }
790
3558fd73 791 if (zsb->z_use_fuids)
428870ff
BB
792 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
793 else
794 pflags = 0;
34dc7c2f 795
3558fd73 796 if (S_ISDIR(vap->va_mode)) {
428870ff 797 size = 2; /* contents ("." and "..") */
dfbc8630 798 links = 2;
428870ff 799 } else {
dfbc8630
CD
800 size = 0;
801 links = 1;
34dc7c2f
BB
802 }
803
aa6d8c10 804 if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
dc1d7665 805 rdev = vap->va_rdev;
428870ff
BB
806
807 parent = dzp->z_id;
808 mode = acl_ids->z_mode;
34dc7c2f 809 if (flag & IS_XATTR)
428870ff 810 pflags |= ZFS_XATTR;
34dc7c2f 811
428870ff
BB
812 /*
813 * No execs denied will be deterimed when zfs_mode_compute() is called.
814 */
815 pflags |= acl_ids->z_aclp->z_hints &
816 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
817 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
34dc7c2f 818
428870ff
BB
819 ZFS_TIME_ENCODE(&now, crtime);
820 ZFS_TIME_ENCODE(&now, ctime);
34dc7c2f 821
3558fd73 822 if (vap->va_mask & ATTR_ATIME) {
428870ff 823 ZFS_TIME_ENCODE(&vap->va_atime, atime);
34dc7c2f 824 } else {
428870ff 825 ZFS_TIME_ENCODE(&now, atime);
34dc7c2f
BB
826 }
827
3558fd73 828 if (vap->va_mask & ATTR_MTIME) {
428870ff
BB
829 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
830 } else {
831 ZFS_TIME_ENCODE(&now, mtime);
832 }
833
834 /* Now add in all of the "SA" attributes */
3558fd73 835 VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED,
428870ff
BB
836 &sa_hdl));
837
838 /*
839 * Setup the array of attributes to be replaced/set on the new file
840 *
841 * order for DMU_OT_ZNODE is critical since it needs to be constructed
842 * in the old znode_phys_t format. Don't change this ordering
843 */
79c76d5b 844 sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
428870ff
BB
845
846 if (obj_type == DMU_OT_ZNODE) {
3558fd73 847 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
428870ff 848 NULL, &atime, 16);
3558fd73 849 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
428870ff 850 NULL, &mtime, 16);
3558fd73 851 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
428870ff 852 NULL, &ctime, 16);
3558fd73 853 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
428870ff 854 NULL, &crtime, 16);
3558fd73 855 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
428870ff 856 NULL, &gen, 8);
3558fd73 857 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
428870ff 858 NULL, &mode, 8);
3558fd73 859 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
428870ff 860 NULL, &size, 8);
3558fd73 861 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
428870ff 862 NULL, &parent, 8);
34dc7c2f 863 } else {
3558fd73 864 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
428870ff 865 NULL, &mode, 8);
3558fd73 866 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
428870ff 867 NULL, &size, 8);
3558fd73 868 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
428870ff 869 NULL, &gen, 8);
3558fd73
BB
870 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb),
871 NULL, &acl_ids->z_fuid, 8);
872 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb),
873 NULL, &acl_ids->z_fgid, 8);
874 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
428870ff 875 NULL, &parent, 8);
3558fd73 876 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
428870ff 877 NULL, &pflags, 8);
3558fd73 878 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
428870ff 879 NULL, &atime, 16);
3558fd73 880 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
428870ff 881 NULL, &mtime, 16);
3558fd73 882 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
428870ff 883 NULL, &ctime, 16);
3558fd73 884 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
428870ff
BB
885 NULL, &crtime, 16);
886 }
887
3558fd73 888 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8);
428870ff
BB
889
890 if (obj_type == DMU_OT_ZNODE) {
3558fd73 891 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL,
428870ff 892 &empty_xattr, 8);
34dc7c2f 893 }
428870ff 894 if (obj_type == DMU_OT_ZNODE ||
aa6d8c10 895 (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
3558fd73 896 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
428870ff 897 NULL, &rdev, 8);
428870ff
BB
898 }
899 if (obj_type == DMU_OT_ZNODE) {
3558fd73 900 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
428870ff 901 NULL, &pflags, 8);
3558fd73 902 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL,
428870ff 903 &acl_ids->z_fuid, 8);
3558fd73 904 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL,
428870ff 905 &acl_ids->z_fgid, 8);
3558fd73 906 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad,
428870ff 907 sizeof (uint64_t) * 4);
3558fd73 908 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL,
428870ff
BB
909 &acl_phys, sizeof (zfs_acl_phys_t));
910 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
3558fd73 911 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL,
428870ff
BB
912 &acl_ids->z_aclp->z_acl_count, 8);
913 locate.cb_aclp = acl_ids->z_aclp;
3558fd73 914 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb),
428870ff
BB
915 zfs_acl_data_locator, &locate,
916 acl_ids->z_aclp->z_acl_bytes);
917 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
918 acl_ids->z_fuid, acl_ids->z_fgid);
919 }
920
921 VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
34dc7c2f 922
34dc7c2f 923 if (!(flag & IS_ROOT_NODE)) {
31b6111f 924 *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl);
7b3e34ba
BB
925 VERIFY(*zpp != NULL);
926 VERIFY(dzp != NULL);
34dc7c2f
BB
927 } else {
928 /*
929 * If we are creating the root node, the "parent" we
930 * passed in is the znode for the root.
931 */
932 *zpp = dzp;
428870ff
BB
933
934 (*zpp)->z_sa_hdl = sa_hdl;
34dc7c2f 935 }
428870ff
BB
936
937 (*zpp)->z_pflags = pflags;
938 (*zpp)->z_mode = mode;
50c957f7 939 (*zpp)->z_dnodesize = dnodesize;
428870ff 940
428870ff
BB
941 if (obj_type == DMU_OT_ZNODE ||
942 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
b0bc7a84 943 VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
428870ff 944 }
d1d7e268 945 kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
c96c36fa 946 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
947}
948
5484965a 949/*
d3cc8b15
WA
950 * Update in-core attributes. It is assumed the caller will be doing an
951 * sa_bulk_update to push the changes out.
5484965a
BB
952 */
953void
954zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
955{
956 xoptattr_t *xoap;
957
958 xoap = xva_getxoptattr(xvap);
959 ASSERT(xoap);
960
961 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
962 uint64_t times[2];
963 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
964 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
965 &times, sizeof (times), tx);
966 XVA_SET_RTN(xvap, XAT_CREATETIME);
967 }
968 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
969 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
970 zp->z_pflags, tx);
971 XVA_SET_RTN(xvap, XAT_READONLY);
972 }
973 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
974 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
975 zp->z_pflags, tx);
976 XVA_SET_RTN(xvap, XAT_HIDDEN);
977 }
978 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
979 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
980 zp->z_pflags, tx);
981 XVA_SET_RTN(xvap, XAT_SYSTEM);
982 }
983 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
984 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
985 zp->z_pflags, tx);
986 XVA_SET_RTN(xvap, XAT_ARCHIVE);
987 }
988 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
989 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
990 zp->z_pflags, tx);
991 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
992 }
993 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
994 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
995 zp->z_pflags, tx);
996 XVA_SET_RTN(xvap, XAT_NOUNLINK);
997 }
998 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
999 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
1000 zp->z_pflags, tx);
1001 XVA_SET_RTN(xvap, XAT_APPENDONLY);
1002 }
1003 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1004 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
1005 zp->z_pflags, tx);
1006 XVA_SET_RTN(xvap, XAT_NODUMP);
1007 }
1008 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1009 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
1010 zp->z_pflags, tx);
1011 XVA_SET_RTN(xvap, XAT_OPAQUE);
1012 }
1013 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1014 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
1015 xoap->xoa_av_quarantined, zp->z_pflags, tx);
1016 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1017 }
1018 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1019 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
1020 zp->z_pflags, tx);
1021 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1022 }
1023 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
1024 zfs_sa_set_scanstamp(zp, xvap, tx);
1025 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
1026 }
1027 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1028 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
1029 zp->z_pflags, tx);
1030 XVA_SET_RTN(xvap, XAT_REPARSE);
1031 }
1032 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1033 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
1034 zp->z_pflags, tx);
1035 XVA_SET_RTN(xvap, XAT_OFFLINE);
1036 }
1037 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1038 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
1039 zp->z_pflags, tx);
1040 XVA_SET_RTN(xvap, XAT_SPARSE);
1041 }
1042}
1043
34dc7c2f 1044int
3558fd73 1045zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
34dc7c2f
BB
1046{
1047 dmu_object_info_t doi;
1048 dmu_buf_t *db;
1049 znode_t *zp;
c96c36fa 1050 znode_hold_t *zh;
34dc7c2f 1051 int err;
428870ff 1052 sa_handle_t *hdl;
34dc7c2f
BB
1053
1054 *zpp = NULL;
1055
6f9548c4 1056again:
c96c36fa 1057 zh = zfs_znode_hold_enter(zsb, obj_num);
34dc7c2f 1058
3558fd73 1059 err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
34dc7c2f 1060 if (err) {
c96c36fa 1061 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1062 return (err);
1063 }
1064
1065 dmu_object_info_from_db(db, &doi);
428870ff
BB
1066 if (doi.doi_bonus_type != DMU_OT_SA &&
1067 (doi.doi_bonus_type != DMU_OT_ZNODE ||
1068 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1069 doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1070 sa_buf_rele(db, NULL);
c96c36fa 1071 zfs_znode_hold_exit(zsb, zh);
2e528b49 1072 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1073 }
1074
428870ff
BB
1075 hdl = dmu_buf_get_user(db);
1076 if (hdl != NULL) {
36df2843 1077 zp = sa_get_userdata(hdl);
34dc7c2f 1078
8ac67298 1079
34dc7c2f 1080 /*
428870ff
BB
1081 * Since "SA" does immediate eviction we
1082 * should never find a sa handle that doesn't
1083 * know about the znode.
34dc7c2f 1084 */
428870ff
BB
1085
1086 ASSERT3P(zp, !=, NULL);
1087
1088 mutex_enter(&zp->z_lock);
34dc7c2f
BB
1089 ASSERT3U(zp->z_id, ==, obj_num);
1090 if (zp->z_unlinked) {
2e528b49 1091 err = SET_ERROR(ENOENT);
34dc7c2f 1092 } else {
6f9548c4
RY
1093 /*
1094 * If igrab() returns NULL the VFS has independently
1095 * determined the inode should be evicted and has
1096 * called iput_final() to start the eviction process.
1097 * The SA handle is still valid but because the VFS
1098 * requires that the eviction succeed we must drop
1099 * our locks and references to allow the eviction to
1100 * complete. The zfs_zget() may then be retried.
1101 *
1102 * This unlikely case could be optimized by registering
1103 * a sops->drop_inode() callback. The callback would
1104 * need to detect the active SA hold thereby informing
1105 * the VFS that this inode should not be evicted.
1106 */
1107 if (igrab(ZTOI(zp)) == NULL) {
1108 mutex_exit(&zp->z_lock);
1109 sa_buf_rele(db, NULL);
c96c36fa 1110 zfs_znode_hold_exit(zsb, zh);
6102d037 1111 /* inode might need this to finish evict */
1112 cond_resched();
6f9548c4
RY
1113 goto again;
1114 }
34dc7c2f
BB
1115 *zpp = zp;
1116 err = 0;
1117 }
34dc7c2f 1118 mutex_exit(&zp->z_lock);
f3ad9cd6 1119 sa_buf_rele(db, NULL);
c96c36fa 1120 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1121 return (err);
1122 }
1123
1124 /*
3558fd73 1125 * Not found create new znode/vnode but only if file exists.
428870ff
BB
1126 *
1127 * There is a small window where zfs_vget() could
1128 * find this object while a file create is still in
1129 * progress. This is checked for in zfs_znode_alloc()
1130 *
1131 * if zfs_znode_alloc() fails it will drop the hold on the
1132 * bonus buffer.
34dc7c2f 1133 */
3558fd73 1134 zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
31b6111f 1135 doi.doi_bonus_type, obj_num, NULL);
428870ff 1136 if (zp == NULL) {
2e528b49 1137 err = SET_ERROR(ENOENT);
428870ff
BB
1138 } else {
1139 *zpp = zp;
1140 }
c96c36fa 1141 zfs_znode_hold_exit(zsb, zh);
428870ff 1142 return (err);
34dc7c2f
BB
1143}
1144
1145int
1146zfs_rezget(znode_t *zp)
1147{
3558fd73 1148 zfs_sb_t *zsb = ZTOZSB(zp);
34dc7c2f
BB
1149 dmu_object_info_t doi;
1150 dmu_buf_t *db;
1151 uint64_t obj_num = zp->z_id;
428870ff 1152 uint64_t mode;
dfbc8630 1153 uint64_t links;
0df9673f 1154 sa_bulk_attr_t bulk[7];
34dc7c2f 1155 int err;
428870ff
BB
1156 int count = 0;
1157 uint64_t gen;
2c6abf15 1158 uint64_t z_uid, z_gid;
c96c36fa 1159 znode_hold_t *zh;
34dc7c2f 1160
cbecb4fb
CC
1161 /*
1162 * skip ctldir, otherwise they will always get invalidated. This will
1163 * cause funny behaviour for the mounted snapdirs. Especially for
1164 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
1165 * anyone automount it again as long as someone is still using the
1166 * detached mount.
1167 */
1168 if (zp->z_is_ctldir)
1169 return (0);
1170
c96c36fa 1171 zh = zfs_znode_hold_enter(zsb, obj_num);
34dc7c2f 1172
428870ff
BB
1173 mutex_enter(&zp->z_acl_lock);
1174 if (zp->z_acl_cached) {
1175 zfs_acl_free(zp->z_acl_cached);
1176 zp->z_acl_cached = NULL;
1177 }
428870ff 1178 mutex_exit(&zp->z_acl_lock);
7b3e34ba 1179
228b461b 1180 rw_enter(&zp->z_xattr_lock, RW_WRITER);
7b3e34ba
BB
1181 if (zp->z_xattr_cached) {
1182 nvlist_free(zp->z_xattr_cached);
1183 zp->z_xattr_cached = NULL;
1184 }
7b3e34ba
BB
1185 rw_exit(&zp->z_xattr_lock);
1186
428870ff 1187 ASSERT(zp->z_sa_hdl == NULL);
3558fd73 1188 err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
34dc7c2f 1189 if (err) {
c96c36fa 1190 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1191 return (err);
1192 }
1193
1194 dmu_object_info_from_db(db, &doi);
428870ff
BB
1195 if (doi.doi_bonus_type != DMU_OT_SA &&
1196 (doi.doi_bonus_type != DMU_OT_ZNODE ||
1197 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1198 doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1199 sa_buf_rele(db, NULL);
c96c36fa 1200 zfs_znode_hold_exit(zsb, zh);
2e528b49 1201 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1202 }
1203
3558fd73 1204 zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);
428870ff
BB
1205
1206 /* reload cached values */
3558fd73 1207 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL,
428870ff 1208 &gen, sizeof (gen));
3558fd73 1209 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
428870ff 1210 &zp->z_size, sizeof (zp->z_size));
3558fd73 1211 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
dfbc8630 1212 &links, sizeof (links));
3558fd73 1213 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
428870ff 1214 &zp->z_pflags, sizeof (zp->z_pflags));
3558fd73 1215 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
2c6abf15 1216 &z_uid, sizeof (z_uid));
3558fd73 1217 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL,
2c6abf15 1218 &z_gid, sizeof (z_gid));
3558fd73 1219 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
428870ff
BB
1220 &mode, sizeof (mode));
1221
428870ff
BB
1222 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
1223 zfs_znode_dmu_fini(zp);
c96c36fa 1224 zfs_znode_hold_exit(zsb, zh);
2e528b49 1225 return (SET_ERROR(EIO));
428870ff
BB
1226 }
1227
572e2857 1228 zp->z_mode = mode;
2c6abf15
NB
1229 zfs_uid_write(ZTOI(zp), z_uid);
1230 zfs_gid_write(ZTOI(zp), z_gid);
572e2857 1231
278f2236 1232 if (gen != ZTOI(zp)->i_generation) {
428870ff 1233 zfs_znode_dmu_fini(zp);
c96c36fa 1234 zfs_znode_hold_exit(zsb, zh);
2e528b49 1235 return (SET_ERROR(EIO));
34dc7c2f
BB
1236 }
1237
dfbc8630
CD
1238 zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
1239 set_nlink(ZTOI(zp), (uint32_t)links);
1240
34dc7c2f 1241 zp->z_blksz = doi.doi_data_block_size;
704cd075
CC
1242 zp->z_atime_dirty = 0;
1243 zfs_inode_update_new(zp);
34dc7c2f 1244
c96c36fa 1245 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1246
1247 return (0);
1248}
1249
1250void
1251zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1252{
3558fd73
BB
1253 zfs_sb_t *zsb = ZTOZSB(zp);
1254 objset_t *os = zsb->z_os;
34dc7c2f 1255 uint64_t obj = zp->z_id;
572e2857 1256 uint64_t acl_obj = zfs_external_acl(zp);
c96c36fa 1257 znode_hold_t *zh;
34dc7c2f 1258
c96c36fa 1259 zh = zfs_znode_hold_enter(zsb, obj);
572e2857
BB
1260 if (acl_obj) {
1261 VERIFY(!zp->z_is_sa);
b128c09f 1262 VERIFY(0 == dmu_object_free(os, acl_obj, tx));
572e2857 1263 }
b128c09f 1264 VERIFY(0 == dmu_object_free(os, obj, tx));
34dc7c2f 1265 zfs_znode_dmu_fini(zp);
c96c36fa 1266 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1267}
1268
1269void
1270zfs_zinactive(znode_t *zp)
1271{
3558fd73 1272 zfs_sb_t *zsb = ZTOZSB(zp);
34dc7c2f 1273 uint64_t z_id = zp->z_id;
c96c36fa 1274 znode_hold_t *zh;
34dc7c2f 1275
428870ff 1276 ASSERT(zp->z_sa_hdl);
34dc7c2f
BB
1277
1278 /*
d6bd8eaa 1279 * Don't allow a zfs_zget() while were trying to release this znode.
34dc7c2f 1280 */
c96c36fa 1281 zh = zfs_znode_hold_enter(zsb, z_id);
d6bd8eaa 1282
34dc7c2f 1283 mutex_enter(&zp->z_lock);
34dc7c2f
BB
1284
1285 /*
1286 * If this was the last reference to a file with no links,
1287 * remove the file from the file system.
1288 */
1289 if (zp->z_unlinked) {
1290 mutex_exit(&zp->z_lock);
c96c36fa 1291 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1292 zfs_rmnode(zp);
1293 return;
1294 }
428870ff 1295
34dc7c2f
BB
1296 mutex_exit(&zp->z_lock);
1297 zfs_znode_dmu_fini(zp);
d6bd8eaa 1298
c96c36fa 1299 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1300}
1301
6d111134
TC
1302static inline int
1303zfs_compare_timespec(struct timespec *t1, struct timespec *t2)
1304{
1305 if (t1->tv_sec < t2->tv_sec)
1306 return (-1);
1307
1308 if (t1->tv_sec > t2->tv_sec)
1309 return (1);
1310
1311 return (t1->tv_nsec - t2->tv_nsec);
1312}
1313
6d111134
TC
1314/*
1315 * Prepare to update znode time stamps.
1316 *
1317 * IN: zp - znode requiring timestamp update
0df9673f 1318 * flag - ATTR_MTIME, ATTR_CTIME flags
6d111134 1319 *
0df9673f 1320 * OUT: zp - z_seq
6d111134
TC
1321 * mtime - new mtime
1322 * ctime - new ctime
1323 *
0df9673f
CC
1324 * Note: We don't update atime here, because we rely on Linux VFS to do
1325 * atime updating.
6d111134 1326 */
34dc7c2f 1327void
428870ff 1328zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
0df9673f 1329 uint64_t ctime[2])
34dc7c2f
BB
1330{
1331 timestruc_t now;
1332
34dc7c2f
BB
1333 gethrestime(&now);
1334
0df9673f 1335 zp->z_seq++;
34dc7c2f 1336
3558fd73 1337 if (flag & ATTR_MTIME) {
428870ff 1338 ZFS_TIME_ENCODE(&now, mtime);
3558fd73 1339 if (ZTOZSB(zp)->z_use_fuids) {
428870ff
BB
1340 zp->z_pflags |= (ZFS_ARCHIVE |
1341 ZFS_AV_MODIFIED);
1342 }
34dc7c2f
BB
1343 }
1344
3558fd73 1345 if (flag & ATTR_CTIME) {
428870ff 1346 ZFS_TIME_ENCODE(&now, ctime);
3558fd73 1347 if (ZTOZSB(zp)->z_use_fuids)
428870ff 1348 zp->z_pflags |= ZFS_ARCHIVE;
34dc7c2f
BB
1349 }
1350}
1351
34dc7c2f
BB
1352/*
1353 * Grow the block size for a file.
1354 *
1355 * IN: zp - znode of file to free data in.
1356 * size - requested block size
1357 * tx - open transaction.
1358 *
1359 * NOTE: this function assumes that the znode is write locked.
1360 */
1361void
1362zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1363{
1364 int error;
1365 u_longlong_t dummy;
1366
1367 if (size <= zp->z_blksz)
1368 return;
1369 /*
1370 * If the file size is already greater than the current blocksize,
1371 * we will not grow. If there is more than one block in a file,
1372 * the blocksize cannot change.
1373 */
428870ff 1374 if (zp->z_blksz && zp->z_size > zp->z_blksz)
34dc7c2f
BB
1375 return;
1376
3558fd73 1377 error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
34dc7c2f 1378 size, 0, tx);
428870ff 1379
34dc7c2f
BB
1380 if (error == ENOTSUP)
1381 return;
c99c9001 1382 ASSERT0(error);
34dc7c2f
BB
1383
1384 /* What blocksize did we actually get? */
428870ff 1385 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
34dc7c2f
BB
1386}
1387
34dc7c2f 1388/*
b128c09f 1389 * Increase the file length
34dc7c2f
BB
1390 *
1391 * IN: zp - znode of file to free data in.
b128c09f 1392 * end - new end-of-file
34dc7c2f 1393 *
19d55079 1394 * RETURN: 0 on success, error code on failure
34dc7c2f 1395 */
b128c09f
BB
1396static int
1397zfs_extend(znode_t *zp, uint64_t end)
34dc7c2f 1398{
3558fd73 1399 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f 1400 dmu_tx_t *tx;
34dc7c2f 1401 rl_t *rl;
b128c09f 1402 uint64_t newblksz;
34dc7c2f
BB
1403 int error;
1404
34dc7c2f 1405 /*
b128c09f 1406 * We will change zp_size, lock the whole file.
34dc7c2f 1407 */
d88895a0 1408 rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
34dc7c2f
BB
1409
1410 /*
1411 * Nothing to do if file already at desired length.
1412 */
428870ff 1413 if (end <= zp->z_size) {
34dc7c2f
BB
1414 zfs_range_unlock(rl);
1415 return (0);
1416 }
3558fd73 1417 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1418 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1419 zfs_sa_upgrade_txholds(tx, zp);
b128c09f 1420 if (end > zp->z_blksz &&
3558fd73 1421 (!ISP2(zp->z_blksz) || zp->z_blksz < zsb->z_max_blksz)) {
34dc7c2f
BB
1422 /*
1423 * We are growing the file past the current block size.
1424 */
3558fd73 1425 if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
f1512ee6
MA
1426 /*
1427 * File's blocksize is already larger than the
1428 * "recordsize" property. Only let it grow to
1429 * the next power of 2.
1430 */
34dc7c2f 1431 ASSERT(!ISP2(zp->z_blksz));
f1512ee6 1432 newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
34dc7c2f 1433 } else {
3558fd73 1434 newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
34dc7c2f 1435 }
b128c09f
BB
1436 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1437 } else {
1438 newblksz = 0;
34dc7c2f
BB
1439 }
1440
384f8a09 1441 error = dmu_tx_assign(tx, TXG_WAIT);
34dc7c2f 1442 if (error) {
34dc7c2f
BB
1443 dmu_tx_abort(tx);
1444 zfs_range_unlock(rl);
1445 return (error);
1446 }
1447
b128c09f
BB
1448 if (newblksz)
1449 zfs_grow_blocksize(zp, newblksz, tx);
34dc7c2f 1450
428870ff
BB
1451 zp->z_size = end;
1452
3558fd73 1453 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
428870ff 1454 &zp->z_size, sizeof (zp->z_size), tx));
34dc7c2f 1455
b128c09f 1456 zfs_range_unlock(rl);
34dc7c2f 1457
b128c09f 1458 dmu_tx_commit(tx);
34dc7c2f 1459
b128c09f
BB
1460 return (0);
1461}
1462
223df016
TC
1463/*
1464 * zfs_zero_partial_page - Modeled after update_pages() but
1465 * with different arguments and semantics for use by zfs_freesp().
1466 *
1467 * Zeroes a piece of a single page cache entry for zp at offset
1468 * start and length len.
1469 *
1470 * Caller must acquire a range lock on the file for the region
1471 * being zeroed in order that the ARC and page cache stay in sync.
1472 */
1473static void
1474zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
1475{
1476 struct address_space *mp = ZTOI(zp)->i_mapping;
1477 struct page *pp;
1478 int64_t off;
1479 void *pb;
1480
8b1899d3 1481 ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
223df016 1482
8b1899d3
BB
1483 off = start & (PAGE_SIZE - 1);
1484 start &= PAGE_MASK;
223df016 1485
8b1899d3 1486 pp = find_lock_page(mp, start >> PAGE_SHIFT);
223df016
TC
1487 if (pp) {
1488 if (mapping_writably_mapped(mp))
1489 flush_dcache_page(pp);
1490
1491 pb = kmap(pp);
1492 bzero(pb + off, len);
1493 kunmap(pp);
1494
1495 if (mapping_writably_mapped(mp))
1496 flush_dcache_page(pp);
1497
1498 mark_page_accessed(pp);
1499 SetPageUptodate(pp);
1500 ClearPageError(pp);
1501 unlock_page(pp);
8b1899d3 1502 put_page(pp);
223df016
TC
1503 }
1504}
1505
b128c09f
BB
1506/*
1507 * Free space in a file.
1508 *
1509 * IN: zp - znode of file to free data in.
1510 * off - start of section to free.
1511 * len - length of section to free.
1512 *
19d55079 1513 * RETURN: 0 on success, error code on failure
b128c09f
BB
1514 */
1515static int
1516zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1517{
3558fd73 1518 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f
BB
1519 rl_t *rl;
1520 int error;
1521
1522 /*
1523 * Lock the range being freed.
1524 */
d88895a0 1525 rl = zfs_range_lock(&zp->z_range_lock, off, len, RL_WRITER);
b128c09f
BB
1526
1527 /*
1528 * Nothing to do if file already at desired length.
1529 */
428870ff 1530 if (off >= zp->z_size) {
b128c09f
BB
1531 zfs_range_unlock(rl);
1532 return (0);
34dc7c2f
BB
1533 }
1534
428870ff
BB
1535 if (off + len > zp->z_size)
1536 len = zp->z_size - off;
b128c09f 1537
3558fd73 1538 error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len);
b128c09f 1539
223df016
TC
1540 /*
1541 * Zero partial page cache entries. This must be done under a
1542 * range lock in order to keep the ARC and page cache in sync.
1543 */
1544 if (zp->z_is_mapped) {
1545 loff_t first_page, last_page, page_len;
1546 loff_t first_page_offset, last_page_offset;
1547
1548 /* first possible full page in hole */
8b1899d3 1549 first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
223df016 1550 /* last page of hole */
8b1899d3 1551 last_page = (off + len) >> PAGE_SHIFT;
223df016
TC
1552
1553 /* offset of first_page */
8b1899d3 1554 first_page_offset = first_page << PAGE_SHIFT;
223df016 1555 /* offset of last_page */
8b1899d3 1556 last_page_offset = last_page << PAGE_SHIFT;
223df016 1557
cb08f063
TC
1558 /* truncate whole pages */
1559 if (last_page_offset > first_page_offset) {
1560 truncate_inode_pages_range(ZTOI(zp)->i_mapping,
1561 first_page_offset, last_page_offset - 1);
1562 }
1563
1564 /* truncate sub-page ranges */
223df016
TC
1565 if (first_page > last_page) {
1566 /* entire punched area within a single page */
1567 zfs_zero_partial_page(zp, off, len);
1568 } else {
1569 /* beginning of punched area at the end of a page */
1570 page_len = first_page_offset - off;
1571 if (page_len > 0)
1572 zfs_zero_partial_page(zp, off, page_len);
1573
1574 /* end of punched area at the beginning of a page */
1575 page_len = off + len - last_page_offset;
1576 if (page_len > 0)
1577 zfs_zero_partial_page(zp, last_page_offset,
1578 page_len);
1579 }
1580 }
34dc7c2f
BB
1581 zfs_range_unlock(rl);
1582
b128c09f
BB
1583 return (error);
1584}
1585
1586/*
1587 * Truncate a file
1588 *
1589 * IN: zp - znode of file to free data in.
1590 * end - new end-of-file.
1591 *
19d55079 1592 * RETURN: 0 on success, error code on failure
b128c09f
BB
1593 */
1594static int
1595zfs_trunc(znode_t *zp, uint64_t end)
1596{
3558fd73 1597 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f
BB
1598 dmu_tx_t *tx;
1599 rl_t *rl;
1600 int error;
572e2857
BB
1601 sa_bulk_attr_t bulk[2];
1602 int count = 0;
b128c09f
BB
1603
1604 /*
1605 * We will change zp_size, lock the whole file.
1606 */
d88895a0 1607 rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
b128c09f
BB
1608
1609 /*
1610 * Nothing to do if file already at desired length.
1611 */
428870ff 1612 if (end >= zp->z_size) {
b128c09f
BB
1613 zfs_range_unlock(rl);
1614 return (0);
1615 }
1616
3558fd73 1617 error = dmu_free_long_range(zsb->z_os, zp->z_id, end, -1);
b128c09f
BB
1618 if (error) {
1619 zfs_range_unlock(rl);
1620 return (error);
1621 }
3558fd73 1622 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1623 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1624 zfs_sa_upgrade_txholds(tx, zp);
19d55079 1625 dmu_tx_mark_netfree(tx);
7a8f0e80 1626 error = dmu_tx_assign(tx, TXG_WAIT);
b128c09f 1627 if (error) {
b128c09f
BB
1628 dmu_tx_abort(tx);
1629 zfs_range_unlock(rl);
1630 return (error);
1631 }
b128c09f 1632
428870ff 1633 zp->z_size = end;
3558fd73 1634 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
572e2857 1635 NULL, &zp->z_size, sizeof (zp->z_size));
428870ff 1636
572e2857
BB
1637 if (end == 0) {
1638 zp->z_pflags &= ~ZFS_SPARSE;
3558fd73 1639 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
572e2857
BB
1640 NULL, &zp->z_pflags, 8);
1641 }
1642 VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
b128c09f 1643
34dc7c2f
BB
1644 dmu_tx_commit(tx);
1645
d164b209 1646 zfs_range_unlock(rl);
34dc7c2f
BB
1647
1648 return (0);
1649}
1650
b128c09f
BB
1651/*
1652 * Free space in a file
1653 *
1654 * IN: zp - znode of file to free data in.
1655 * off - start of range
1656 * len - end of range (0 => EOF)
1657 * flag - current file open mode flags.
1658 * log - TRUE if this action should be logged
1659 *
19d55079 1660 * RETURN: 0 on success, error code on failure
b128c09f
BB
1661 */
1662int
1663zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1664{
b128c09f 1665 dmu_tx_t *tx;
3558fd73
BB
1666 zfs_sb_t *zsb = ZTOZSB(zp);
1667 zilog_t *zilog = zsb->z_log;
428870ff
BB
1668 uint64_t mode;
1669 uint64_t mtime[2], ctime[2];
1670 sa_bulk_attr_t bulk[3];
1671 int count = 0;
b128c09f
BB
1672 int error;
1673
3558fd73 1674 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zsb), &mode,
428870ff
BB
1675 sizeof (mode))) != 0)
1676 return (error);
1677
1678 if (off > zp->z_size) {
b128c09f
BB
1679 error = zfs_extend(zp, off+len);
1680 if (error == 0 && log)
1681 goto log;
223df016 1682 goto out;
b128c09f
BB
1683 }
1684
b128c09f
BB
1685 if (len == 0) {
1686 error = zfs_trunc(zp, off);
1687 } else {
1688 if ((error = zfs_free_range(zp, off, len)) == 0 &&
428870ff 1689 off + len > zp->z_size)
b128c09f
BB
1690 error = zfs_extend(zp, off+len);
1691 }
1692 if (error || !log)
223df016 1693 goto out;
b128c09f 1694log:
3558fd73 1695 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1696 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1697 zfs_sa_upgrade_txholds(tx, zp);
384f8a09 1698 error = dmu_tx_assign(tx, TXG_WAIT);
b128c09f 1699 if (error) {
b128c09f 1700 dmu_tx_abort(tx);
223df016 1701 goto out;
b128c09f
BB
1702 }
1703
3558fd73
BB
1704 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16);
1705 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, 16);
1706 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
428870ff 1707 NULL, &zp->z_pflags, 8);
0df9673f 1708 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
428870ff
BB
1709 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1710 ASSERT(error == 0);
1711
b128c09f
BB
1712 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
1713
1714 dmu_tx_commit(tx);
223df016 1715
960e08fe 1716 zfs_inode_update(zp);
223df016
TC
1717 error = 0;
1718
1719out:
1720 /*
1721 * Truncate the page cache - for file truncate operations, use
1722 * the purpose-built API for truncations. For punching operations,
cb08f063 1723 * the truncation is handled under a range lock in zfs_free_range.
223df016
TC
1724 */
1725 if (len == 0)
1726 truncate_setsize(ZTOI(zp), off);
223df016 1727 return (error);
b128c09f
BB
1728}
1729
34dc7c2f
BB
1730void
1731zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1732{
22872ff5
BB
1733 struct super_block *sb;
1734 zfs_sb_t *zsb;
428870ff 1735 uint64_t moid, obj, sa_obj, version;
22872ff5 1736 uint64_t sense = ZFS_CASE_SENSITIVE;
34dc7c2f
BB
1737 uint64_t norm = 0;
1738 nvpair_t *elem;
c96c36fa 1739 int size;
34dc7c2f 1740 int error;
22872ff5
BB
1741 int i;
1742 znode_t *rootzp = NULL;
1743 vattr_t vattr;
1744 znode_t *zp;
1745 zfs_acl_ids_t acl_ids;
34dc7c2f
BB
1746
1747 /*
1748 * First attempt to create master node.
1749 */
1750 /*
1751 * In an empty objset, there are no blocks to read and thus
1752 * there can be no i/o errors (which we assert below).
1753 */
1754 moid = MASTER_NODE_OBJ;
1755 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
1756 DMU_OT_NONE, 0, tx);
1757 ASSERT(error == 0);
1758
50c957f7
NB
1759 /*
1760 * Give dmu_object_alloc() a hint about where to start
1761 * allocating new objects. Otherwise, since the metadnode's
1762 * dnode_phys_t structure isn't initialized yet, dmu_object_next()
1763 * would fail and we'd have to skip to the next dnode block.
1764 */
1765 os->os_obj_next = moid + 1;
1766
34dc7c2f
BB
1767 /*
1768 * Set starting attributes.
1769 */
428870ff 1770 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
34dc7c2f
BB
1771 elem = NULL;
1772 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
1773 /* For the moment we expect all zpl props to be uint64_ts */
1774 uint64_t val;
1775 char *name;
1776
1777 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
1778 VERIFY(nvpair_value_uint64(elem, &val) == 0);
1779 name = nvpair_name(elem);
1780 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
9babb374
BB
1781 if (val < version)
1782 version = val;
34dc7c2f
BB
1783 } else {
1784 error = zap_update(os, moid, name, 8, 1, &val, tx);
1785 }
1786 ASSERT(error == 0);
1787 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
1788 norm = val;
22872ff5
BB
1789 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
1790 sense = val;
34dc7c2f
BB
1791 }
1792 ASSERT(version != 0);
9babb374 1793 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
34dc7c2f 1794
428870ff
BB
1795 /*
1796 * Create zap object used for SA attribute registration
1797 */
1798
1799 if (version >= ZPL_VERSION_SA) {
1800 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
1801 DMU_OT_NONE, 0, tx);
1802 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
1803 ASSERT(error == 0);
1804 } else {
1805 sa_obj = 0;
1806 }
34dc7c2f
BB
1807 /*
1808 * Create a delete queue.
1809 */
9babb374 1810 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
34dc7c2f 1811
9babb374 1812 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
34dc7c2f
BB
1813 ASSERT(error == 0);
1814
9babb374 1815 /*
22872ff5
BB
1816 * Create root znode. Create minimal znode/inode/zsb/sb
1817 * to allow zfs_mknode to work.
9babb374 1818 */
22872ff5
BB
1819 vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
1820 vattr.va_mode = S_IFDIR|0755;
1821 vattr.va_uid = crgetuid(cr);
1822 vattr.va_gid = crgetgid(cr);
1823
79c76d5b 1824 rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
22872ff5
BB
1825 rootzp->z_moved = 0;
1826 rootzp->z_unlinked = 0;
1827 rootzp->z_atime_dirty = 0;
1828 rootzp->z_is_sa = USE_SA(version, os);
1829
79c76d5b 1830 zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
22872ff5
BB
1831 zsb->z_os = os;
1832 zsb->z_parent = zsb;
1833 zsb->z_version = version;
1834 zsb->z_use_fuids = USE_FUIDS(version, os);
1835 zsb->z_use_sa = USE_SA(version, os);
1836 zsb->z_norm = norm;
1837
79c76d5b 1838 sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
22872ff5
BB
1839 sb->s_fs_info = zsb;
1840
1841 ZTOI(rootzp)->i_sb = sb;
1842
1843 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
1844 &zsb->z_attr_table);
9babb374 1845
22872ff5 1846 ASSERT(error == 0);
9babb374 1847
60101509 1848 /*
22872ff5
BB
1849 * Fold case on file systems that are always or sometimes case
1850 * insensitive.
60101509 1851 */
22872ff5
BB
1852 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
1853 zsb->z_norm |= U8_TEXTPREP_TOUPPER;
60101509 1854
22872ff5
BB
1855 mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1856 list_create(&zsb->z_all_znodes, sizeof (znode_t),
1857 offsetof(znode_t, z_link_node));
60101509 1858
c96c36fa
BB
1859 size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
1860 zsb->z_hold_size = size;
1861 zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP);
1862 zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
1863 for (i = 0; i != size; i++) {
1864 avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare,
1865 sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
1866 mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
1867 }
60101509 1868
22872ff5
BB
1869 VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
1870 cr, NULL, &acl_ids));
1871 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
1872 ASSERT3P(zp, ==, rootzp);
1873 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
1874 ASSERT(error == 0);
1875 zfs_acl_ids_free(&acl_ids);
60101509 1876
22872ff5
BB
1877 atomic_set(&ZTOI(rootzp)->i_count, 0);
1878 sa_handle_destroy(rootzp->z_sa_hdl);
22872ff5
BB
1879 kmem_cache_free(znode_cache, rootzp);
1880
1881 /*
1882 * Create shares directory
1883 */
22872ff5 1884 error = zfs_create_share_dir(zsb, tx);
9babb374 1885 ASSERT(error == 0);
428870ff 1886
c96c36fa
BB
1887 for (i = 0; i != size; i++) {
1888 avl_destroy(&zsb->z_hold_trees[i]);
1889 mutex_destroy(&zsb->z_hold_locks[i]);
1890 }
2708f716 1891
c96c36fa
BB
1892 vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size);
1893 vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size);
2708f716
BB
1894 kmem_free(sb, sizeof (struct super_block));
1895 kmem_free(zsb, sizeof (zfs_sb_t));
34dc7c2f 1896}
34dc7c2f 1897#endif /* _KERNEL */
428870ff 1898
34dc7c2f 1899static int
572e2857
BB
1900zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
1901{
1902 uint64_t sa_obj = 0;
1903 int error;
1904
1905 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
1906 if (error != 0 && error != ENOENT)
1907 return (error);
1908
1909 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
1910 return (error);
1911}
1912
1913static int
1914zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
7b8518cb 1915 dmu_buf_t **db, void *tag)
34dc7c2f 1916{
34dc7c2f 1917 dmu_object_info_t doi;
34dc7c2f 1918 int error;
428870ff 1919
7b8518cb 1920 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
34dc7c2f
BB
1921 return (error);
1922
572e2857 1923 dmu_object_info_from_db(*db, &doi);
428870ff
BB
1924 if ((doi.doi_bonus_type != DMU_OT_SA &&
1925 doi.doi_bonus_type != DMU_OT_ZNODE) ||
d6320ddb
BB
1926 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1927 doi.doi_bonus_size < sizeof (znode_phys_t))) {
7b8518cb 1928 sa_buf_rele(*db, tag);
2e528b49 1929 return (SET_ERROR(ENOTSUP));
34dc7c2f
BB
1930 }
1931
572e2857
BB
1932 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
1933 if (error != 0) {
7b8518cb 1934 sa_buf_rele(*db, tag);
428870ff
BB
1935 return (error);
1936 }
1937
572e2857
BB
1938 return (0);
1939}
1940
1941void
7b8518cb 1942zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
572e2857
BB
1943{
1944 sa_handle_destroy(hdl);
7b8518cb 1945 sa_buf_rele(db, tag);
572e2857
BB
1946}
1947
1948/*
1949 * Given an object number, return its parent object number and whether
1950 * or not the object is an extended attribute directory.
1951 */
1952static int
b23ad7f3
JJ
1953zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
1954 uint64_t *pobjp, int *is_xattrdir)
572e2857
BB
1955{
1956 uint64_t parent;
1957 uint64_t pflags;
1958 uint64_t mode;
b23ad7f3 1959 uint64_t parent_mode;
572e2857 1960 sa_bulk_attr_t bulk[3];
b23ad7f3
JJ
1961 sa_handle_t *sa_hdl;
1962 dmu_buf_t *sa_db;
572e2857
BB
1963 int count = 0;
1964 int error;
1965
1966 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
1967 &parent, sizeof (parent));
428870ff 1968 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
572e2857 1969 &pflags, sizeof (pflags));
428870ff 1970 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
572e2857 1971 &mode, sizeof (mode));
428870ff 1972
572e2857 1973 if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
428870ff 1974 return (error);
572e2857 1975
b23ad7f3
JJ
1976 /*
1977 * When a link is removed its parent pointer is not changed and will
1978 * be invalid. There are two cases where a link is removed but the
1979 * file stays around, when it goes to the delete queue and when there
1980 * are additional links.
1981 */
1982 error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
1983 if (error != 0)
1984 return (error);
1985
1986 error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
1987 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
1988 if (error != 0)
1989 return (error);
1990
428870ff 1991 *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
34dc7c2f 1992
b23ad7f3
JJ
1993 /*
1994 * Extended attributes can be applied to files, directories, etc.
1995 * Otherwise the parent must be a directory.
1996 */
1997 if (!*is_xattrdir && !S_ISDIR(parent_mode))
1998 return (EINVAL);
1999
2000 *pobjp = parent;
2001
34dc7c2f
BB
2002 return (0);
2003}
2004
572e2857
BB
2005/*
2006 * Given an object number, return some zpl level statistics
2007 */
2008static int
2009zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
2010 zfs_stat_t *sb)
34dc7c2f 2011{
572e2857
BB
2012 sa_bulk_attr_t bulk[4];
2013 int count = 0;
2014
2015 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2016 &sb->zs_mode, sizeof (sb->zs_mode));
2017 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
2018 &sb->zs_gen, sizeof (sb->zs_gen));
2019 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
2020 &sb->zs_links, sizeof (sb->zs_links));
2021 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
2022 &sb->zs_ctime, sizeof (sb->zs_ctime));
2023
2024 return (sa_bulk_lookup(hdl, bulk, count));
2025}
2026
2027static int
2028zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
2029 sa_attr_type_t *sa_table, char *buf, int len)
2030{
2031 sa_handle_t *sa_hdl;
2032 sa_handle_t *prevhdl = NULL;
2033 dmu_buf_t *prevdb = NULL;
2034 dmu_buf_t *sa_db = NULL;
34dc7c2f
BB
2035 char *path = buf + len - 1;
2036 int error;
2037
2038 *path = '\0';
572e2857 2039 sa_hdl = hdl;
428870ff 2040
34dc7c2f 2041 for (;;) {
17897ce2 2042 uint64_t pobj = 0;
34dc7c2f
BB
2043 char component[MAXNAMELEN + 2];
2044 size_t complen;
17897ce2 2045 int is_xattrdir = 0;
34dc7c2f 2046
572e2857 2047 if (prevdb)
7b8518cb 2048 zfs_release_sa_handle(prevhdl, prevdb, FTAG);
572e2857 2049
b23ad7f3 2050 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
572e2857 2051 &is_xattrdir)) != 0)
34dc7c2f
BB
2052 break;
2053
2054 if (pobj == obj) {
2055 if (path[0] != '/')
2056 *--path = '/';
2057 break;
2058 }
2059
2060 component[0] = '/';
2061 if (is_xattrdir) {
2062 (void) sprintf(component + 1, "<xattrdir>");
2063 } else {
2064 error = zap_value_search(osp, pobj, obj,
2065 ZFS_DIRENT_OBJ(-1ULL), component + 1);
2066 if (error != 0)
2067 break;
2068 }
2069
2070 complen = strlen(component);
2071 path -= complen;
2072 ASSERT(path >= buf);
2073 bcopy(component, path, complen);
2074 obj = pobj;
572e2857
BB
2075
2076 if (sa_hdl != hdl) {
2077 prevhdl = sa_hdl;
2078 prevdb = sa_db;
2079 }
7b8518cb 2080 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
572e2857
BB
2081 if (error != 0) {
2082 sa_hdl = prevhdl;
2083 sa_db = prevdb;
2084 break;
2085 }
2086 }
2087
2088 if (sa_hdl != NULL && sa_hdl != hdl) {
2089 ASSERT(sa_db != NULL);
7b8518cb 2090 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
34dc7c2f
BB
2091 }
2092
2093 if (error == 0)
2094 (void) memmove(buf, path, buf + len - path);
428870ff 2095
34dc7c2f
BB
2096 return (error);
2097}
572e2857
BB
2098
2099int
2100zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
2101{
2102 sa_attr_type_t *sa_table;
2103 sa_handle_t *hdl;
2104 dmu_buf_t *db;
2105 int error;
2106
2107 error = zfs_sa_setup(osp, &sa_table);
2108 if (error != 0)
2109 return (error);
2110
7b8518cb 2111 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
572e2857
BB
2112 if (error != 0)
2113 return (error);
2114
2115 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2116
7b8518cb 2117 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2118 return (error);
2119}
2120
2121int
2122zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
2123 char *buf, int len)
2124{
2125 char *path = buf + len - 1;
2126 sa_attr_type_t *sa_table;
2127 sa_handle_t *hdl;
2128 dmu_buf_t *db;
2129 int error;
2130
2131 *path = '\0';
2132
2133 error = zfs_sa_setup(osp, &sa_table);
2134 if (error != 0)
2135 return (error);
2136
7b8518cb 2137 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
572e2857
BB
2138 if (error != 0)
2139 return (error);
2140
2141 error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
2142 if (error != 0) {
7b8518cb 2143 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2144 return (error);
2145 }
2146
2147 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2148
7b8518cb 2149 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2150 return (error);
2151}
c28b2279
BB
2152
2153#if defined(_KERNEL) && defined(HAVE_SPL)
2154EXPORT_SYMBOL(zfs_create_fs);
2155EXPORT_SYMBOL(zfs_obj_to_path);
0720116d
BB
2156
2157module_param(zfs_object_mutex_size, uint, 0644);
2158MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
c28b2279 2159#endif