]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_znode.c
Introduce ARC Buffer Data (ABD)
[mirror_zfs.git] / module / zfs / zfs_znode.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
19d55079 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
34dc7c2f
BB
24 */
25
26/* Portions Copyright 2007 Jeremy Teo */
27
34dc7c2f
BB
28#ifdef _KERNEL
29#include <sys/types.h>
30#include <sys/param.h>
31#include <sys/time.h>
32#include <sys/systm.h>
33#include <sys/sysmacros.h>
34#include <sys/resource.h>
35#include <sys/mntent.h>
36#include <sys/mkdev.h>
37#include <sys/u8_textprep.h>
38#include <sys/dsl_dataset.h>
39#include <sys/vfs.h>
40#include <sys/vfs_opreg.h>
41#include <sys/vnode.h>
42#include <sys/file.h>
43#include <sys/kmem.h>
44#include <sys/errno.h>
45#include <sys/unistd.h>
46#include <sys/mode.h>
47#include <sys/atomic.h>
48#include <vm/pvn.h>
49#include "fs/fs_subr.h"
50#include <sys/zfs_dir.h>
51#include <sys/zfs_acl.h>
52#include <sys/zfs_ioctl.h>
53#include <sys/zfs_rlock.h>
54#include <sys/zfs_fuid.h>
3558fd73 55#include <sys/zfs_vnops.h>
ebe7e575 56#include <sys/zfs_ctldir.h>
428870ff 57#include <sys/dnode.h>
34dc7c2f
BB
58#include <sys/fs/zfs.h>
59#include <sys/kidmap.h>
3558fd73 60#include <sys/zpl.h>
34dc7c2f
BB
61#endif /* _KERNEL */
62
63#include <sys/dmu.h>
f1512ee6 64#include <sys/dmu_objset.h>
50c957f7 65#include <sys/dmu_tx.h>
34dc7c2f
BB
66#include <sys/refcount.h>
67#include <sys/stat.h>
68#include <sys/zap.h>
69#include <sys/zfs_znode.h>
428870ff
BB
70#include <sys/sa.h>
71#include <sys/zfs_sa.h>
572e2857 72#include <sys/zfs_stat.h>
34dc7c2f
BB
73
74#include "zfs_prop.h"
428870ff 75#include "zfs_comutil.h"
34dc7c2f 76
b128c09f
BB
77/*
78 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
79 * turned on when DEBUG is also defined.
80 */
81#ifdef DEBUG
82#define ZNODE_STATS
83#endif /* DEBUG */
84
85#ifdef ZNODE_STATS
86#define ZNODE_STAT_ADD(stat) ((stat)++)
87#else
88#define ZNODE_STAT_ADD(stat) /* nothing */
89#endif /* ZNODE_STATS */
90
34dc7c2f
BB
91/*
92 * Functions needed for userland (ie: libzpool) are not put under
93 * #ifdef_KERNEL; the rest of the functions have dependencies
94 * (such as VFS logic) that will not compile easily in userland.
95 */
96#ifdef _KERNEL
9babb374 97
b128c09f 98static kmem_cache_t *znode_cache = NULL;
c96c36fa 99static kmem_cache_t *znode_hold_cache = NULL;
0720116d 100unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
34dc7c2f 101
34dc7c2f
BB
102/*ARGSUSED*/
103static int
b128c09f 104zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
34dc7c2f
BB
105{
106 znode_t *zp = buf;
107
3558fd73 108 inode_init_once(ZTOI(zp));
b128c09f
BB
109 list_link_init(&zp->z_link_node);
110
34dc7c2f 111 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
34dc7c2f 112 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
448d7aaa 113 rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
34dc7c2f 114 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
82a37189 115 rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
34dc7c2f 116
d88895a0 117 zfs_rlock_init(&zp->z_range_lock);
34dc7c2f 118
b128c09f 119 zp->z_dirlocks = NULL;
45d1cae3 120 zp->z_acl_cached = NULL;
82a37189 121 zp->z_xattr_cached = NULL;
98701490 122 zp->z_xattr_parent = 0;
572e2857 123 zp->z_moved = 0;
34dc7c2f
BB
124 return (0);
125}
126
127/*ARGSUSED*/
128static void
b128c09f 129zfs_znode_cache_destructor(void *buf, void *arg)
34dc7c2f
BB
130{
131 znode_t *zp = buf;
132
b128c09f 133 ASSERT(!list_link_active(&zp->z_link_node));
34dc7c2f 134 mutex_destroy(&zp->z_lock);
34dc7c2f
BB
135 rw_destroy(&zp->z_parent_lock);
136 rw_destroy(&zp->z_name_lock);
137 mutex_destroy(&zp->z_acl_lock);
82a37189 138 rw_destroy(&zp->z_xattr_lock);
d88895a0 139 zfs_rlock_destroy(&zp->z_range_lock);
34dc7c2f 140
b128c09f 141 ASSERT(zp->z_dirlocks == NULL);
45d1cae3 142 ASSERT(zp->z_acl_cached == NULL);
82a37189 143 ASSERT(zp->z_xattr_cached == NULL);
b128c09f
BB
144}
145
c96c36fa
BB
146static int
147zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
148{
149 znode_hold_t *zh = buf;
150
151 mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
152 refcount_create(&zh->zh_refcount);
153 zh->zh_obj = ZFS_NO_OBJECT;
154
155 return (0);
156}
157
158static void
159zfs_znode_hold_cache_destructor(void *buf, void *arg)
160{
161 znode_hold_t *zh = buf;
162
163 mutex_destroy(&zh->zh_lock);
164 refcount_destroy(&zh->zh_refcount);
165}
166
34dc7c2f
BB
167void
168zfs_znode_init(void)
169{
170 /*
5074bfe8
TC
171 * Initialize zcache. The KMC_SLAB hint is used in order that it be
172 * backed by kmalloc() when on the Linux slab in order that any
173 * wait_on_bit() operations on the related inode operate properly.
34dc7c2f
BB
174 */
175 ASSERT(znode_cache == NULL);
176 znode_cache = kmem_cache_create("zfs_znode_cache",
177 sizeof (znode_t), 0, zfs_znode_cache_constructor,
5074bfe8 178 zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
c96c36fa
BB
179
180 ASSERT(znode_hold_cache == NULL);
181 znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
182 sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
183 zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
34dc7c2f
BB
184}
185
186void
187zfs_znode_fini(void)
188{
34dc7c2f
BB
189 /*
190 * Cleanup zcache
191 */
192 if (znode_cache)
193 kmem_cache_destroy(znode_cache);
194 znode_cache = NULL;
c96c36fa
BB
195
196 if (znode_hold_cache)
197 kmem_cache_destroy(znode_hold_cache);
198 znode_hold_cache = NULL;
199}
200
201/*
202 * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
203 * serialize access to a znode and its SA buffer while the object is being
204 * created or destroyed. This kind of locking would normally reside in the
205 * znode itself but in this case that's impossible because the znode and SA
206 * buffer may not yet exist. Therefore the locking is handled externally
207 * with an array of mutexs and AVLs trees which contain per-object locks.
208 *
209 * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
210 * in to the correct AVL tree and finally the per-object lock is held. In
211 * zfs_znode_hold_exit() the process is reversed. The per-object lock is
212 * released, removed from the AVL tree and destroyed if there are no waiters.
213 *
214 * This scheme has two important properties:
215 *
216 * 1) No memory allocations are performed while holding one of the z_hold_locks.
217 * This ensures evict(), which can be called from direct memory reclaim, will
218 * never block waiting on a z_hold_locks which just happens to have hashed
219 * to the same index.
220 *
221 * 2) All locks used to serialize access to an object are per-object and never
222 * shared. This minimizes lock contention without creating a large number
223 * of dedicated locks.
224 *
225 * On the downside it does require znode_lock_t structures to be frequently
226 * allocated and freed. However, because these are backed by a kmem cache
227 * and very short lived this cost is minimal.
228 */
229int
230zfs_znode_hold_compare(const void *a, const void *b)
231{
ee36c709
GN
232 const znode_hold_t *zh_a = (const znode_hold_t *)a;
233 const znode_hold_t *zh_b = (const znode_hold_t *)b;
234
235 return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj));
c96c36fa
BB
236}
237
238boolean_t
239zfs_znode_held(zfs_sb_t *zsb, uint64_t obj)
240{
241 znode_hold_t *zh, search;
242 int i = ZFS_OBJ_HASH(zsb, obj);
37c56346 243 boolean_t held;
c96c36fa
BB
244
245 search.zh_obj = obj;
246
247 mutex_enter(&zsb->z_hold_locks[i]);
248 zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
37c56346 249 held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
c96c36fa
BB
250 mutex_exit(&zsb->z_hold_locks[i]);
251
37c56346 252 return (held);
c96c36fa
BB
253}
254
255static znode_hold_t *
256zfs_znode_hold_enter(zfs_sb_t *zsb, uint64_t obj)
257{
258 znode_hold_t *zh, *zh_new, search;
259 int i = ZFS_OBJ_HASH(zsb, obj);
260 boolean_t found = B_FALSE;
261
262 zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
263 zh_new->zh_obj = obj;
264 search.zh_obj = obj;
265
266 mutex_enter(&zsb->z_hold_locks[i]);
267 zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
268 if (likely(zh == NULL)) {
269 zh = zh_new;
270 avl_add(&zsb->z_hold_trees[i], zh);
271 } else {
272 ASSERT3U(zh->zh_obj, ==, obj);
273 found = B_TRUE;
274 }
275 refcount_add(&zh->zh_refcount, NULL);
276 mutex_exit(&zsb->z_hold_locks[i]);
277
278 if (found == B_TRUE)
279 kmem_cache_free(znode_hold_cache, zh_new);
280
281 ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
282 ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
283 mutex_enter(&zh->zh_lock);
284
285 return (zh);
286}
287
288static void
289zfs_znode_hold_exit(zfs_sb_t *zsb, znode_hold_t *zh)
290{
291 int i = ZFS_OBJ_HASH(zsb, zh->zh_obj);
292 boolean_t remove = B_FALSE;
293
294 ASSERT(zfs_znode_held(zsb, zh->zh_obj));
295 ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
296 mutex_exit(&zh->zh_lock);
297
298 mutex_enter(&zsb->z_hold_locks[i]);
299 if (refcount_remove(&zh->zh_refcount, NULL) == 0) {
300 avl_remove(&zsb->z_hold_trees[i], zh);
301 remove = B_TRUE;
302 }
303 mutex_exit(&zsb->z_hold_locks[i]);
304
305 if (remove == B_TRUE)
306 kmem_cache_free(znode_hold_cache, zh);
34dc7c2f
BB
307}
308
34dc7c2f 309int
3558fd73 310zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
34dc7c2f 311{
3c9609b3 312#ifdef HAVE_SMB_SHARE
9babb374
BB
313 zfs_acl_ids_t acl_ids;
314 vattr_t vattr;
315 znode_t *sharezp;
316 vnode_t *vp;
317 znode_t *zp;
318 int error;
34dc7c2f 319
9babb374 320 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
3558fd73 321 vattr.va_mode = S_IFDIR | 0555;
9babb374
BB
322 vattr.va_uid = crgetuid(kcred);
323 vattr.va_gid = crgetgid(kcred);
34dc7c2f 324
79c76d5b 325 sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
572e2857 326 sharezp->z_moved = 0;
9babb374
BB
327 sharezp->z_unlinked = 0;
328 sharezp->z_atime_dirty = 0;
329 sharezp->z_zfsvfs = zfsvfs;
428870ff 330 sharezp->z_is_sa = zfsvfs->z_use_sa;
34dc7c2f 331
9babb374
BB
332 vp = ZTOV(sharezp);
333 vn_reinit(vp);
334 vp->v_type = VDIR;
34dc7c2f 335
9babb374
BB
336 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
337 kcred, NULL, &acl_ids));
428870ff 338 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
9babb374
BB
339 ASSERT3P(zp, ==, sharezp);
340 ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
341 POINTER_INVALIDATE(&sharezp->z_zfsvfs);
342 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
343 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
344 zfsvfs->z_shares_dir = sharezp->z_id;
345
346 zfs_acl_ids_free(&acl_ids);
3558fd73 347 // ZTOV(sharezp)->v_count = 0;
428870ff 348 sa_handle_destroy(sharezp->z_sa_hdl);
9babb374 349 kmem_cache_free(znode_cache, sharezp);
34dc7c2f 350
9babb374 351 return (error);
9ee7fac5
BB
352#else
353 return (0);
3c9609b3 354#endif /* HAVE_SMB_SHARE */
34dc7c2f
BB
355}
356
34dc7c2f 357static void
3558fd73 358zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp,
428870ff 359 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
34dc7c2f 360{
c96c36fa 361 ASSERT(zfs_znode_held(zsb, zp->z_id));
34dc7c2f
BB
362
363 mutex_enter(&zp->z_lock);
364
428870ff
BB
365 ASSERT(zp->z_sa_hdl == NULL);
366 ASSERT(zp->z_acl_cached == NULL);
367 if (sa_hdl == NULL) {
3558fd73 368 VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, zp,
428870ff
BB
369 SA_HDL_SHARED, &zp->z_sa_hdl));
370 } else {
371 zp->z_sa_hdl = sa_hdl;
372 sa_set_userp(sa_hdl, zp);
373 }
34dc7c2f 374
428870ff 375 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
34dc7c2f 376
34dc7c2f 377 mutex_exit(&zp->z_lock);
34dc7c2f
BB
378}
379
380void
381zfs_znode_dmu_fini(znode_t *zp)
382{
c96c36fa 383 ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
3558fd73 384 RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
428870ff
BB
385
386 sa_handle_destroy(zp->z_sa_hdl);
387 zp->z_sa_hdl = NULL;
34dc7c2f
BB
388}
389
390/*
3558fd73
BB
391 * Called by new_inode() to allocate a new inode.
392 */
393int
394zfs_inode_alloc(struct super_block *sb, struct inode **ip)
395{
396 znode_t *zp;
397
79c76d5b 398 zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
3558fd73
BB
399 *ip = ZTOI(zp);
400
401 return (0);
402}
403
404/*
405 * Called in multiple places when an inode should be destroyed.
406 */
407void
408zfs_inode_destroy(struct inode *ip)
409{
410 znode_t *zp = ITOZ(ip);
411 zfs_sb_t *zsb = ZTOZSB(zp);
412
413 mutex_enter(&zsb->z_znodes_lock);
7b3e34ba
BB
414 if (list_link_active(&zp->z_link_node)) {
415 list_remove(&zsb->z_all_znodes, zp);
416 zsb->z_nr_znodes--;
417 }
3558fd73
BB
418 mutex_exit(&zsb->z_znodes_lock);
419
420 if (zp->z_acl_cached) {
421 zfs_acl_free(zp->z_acl_cached);
422 zp->z_acl_cached = NULL;
423 }
424
82a37189
BB
425 if (zp->z_xattr_cached) {
426 nvlist_free(zp->z_xattr_cached);
427 zp->z_xattr_cached = NULL;
428 }
429
3558fd73
BB
430 kmem_cache_free(znode_cache, zp);
431}
432
433static void
434zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
435{
aa6d8c10 436 uint64_t rdev = 0;
3558fd73
BB
437
438 switch (ip->i_mode & S_IFMT) {
439 case S_IFREG:
440 ip->i_op = &zpl_inode_operations;
441 ip->i_fop = &zpl_file_operations;
442 ip->i_mapping->a_ops = &zpl_address_space_operations;
443 break;
444
445 case S_IFDIR:
446 ip->i_op = &zpl_dir_inode_operations;
447 ip->i_fop = &zpl_dir_file_operations;
448 ITOZ(ip)->z_zn_prefetch = B_TRUE;
449 break;
450
451 case S_IFLNK:
452 ip->i_op = &zpl_symlink_inode_operations;
453 break;
454
aa6d8c10
NB
455 /*
456 * rdev is only stored in a SA only for device files.
457 */
3558fd73
BB
458 case S_IFCHR:
459 case S_IFBLK:
aecdc706 460 (void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb), &rdev,
53b1d979 461 sizeof (rdev));
aa6d8c10
NB
462 /*FALLTHROUGH*/
463 case S_IFIFO:
464 case S_IFSOCK:
3558fd73
BB
465 init_special_inode(ip, ip->i_mode, rdev);
466 ip->i_op = &zpl_special_inode_operations;
467 break;
468
469 default:
53b1d979
BB
470 zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
471 (u_longlong_t)ip->i_ino, ip->i_mode);
472
473 /* Assume the inode is a file and attempt to continue */
474 ip->i_mode = S_IFREG | 0644;
475 ip->i_op = &zpl_inode_operations;
476 ip->i_fop = &zpl_file_operations;
477 ip->i_mapping->a_ops = &zpl_address_space_operations;
478 break;
3558fd73
BB
479 }
480}
481
704cd075
CC
482/*
483 * Update the embedded inode given the znode. We should work toward
484 * eliminating this function as soon as possible by removing values
485 * which are duplicated between the znode and inode. If the generic
486 * inode has the correct field it should be used, and the ZFS code
487 * updated to access the inode. This can be done incrementally.
488 */
9f5f0019
NB
489void
490zfs_inode_update(znode_t *zp)
704cd075
CC
491{
492 zfs_sb_t *zsb;
493 struct inode *ip;
494 uint32_t blksize;
495 u_longlong_t i_blocks;
704cd075
CC
496
497 ASSERT(zp != NULL);
498 zsb = ZTOZSB(zp);
499 ip = ZTOI(zp);
500
501 /* Skip .zfs control nodes which do not exist on disk. */
502 if (zfsctl_is_node(ip))
503 return;
504
704cd075
CC
505 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
506
507 spin_lock(&ip->i_lock);
704cd075 508 ip->i_blocks = i_blocks;
704cd075
CC
509 i_size_write(ip, zp->z_size);
510 spin_unlock(&ip->i_lock);
511}
512
704cd075 513
3558fd73
BB
514/*
515 * Construct a znode+inode and initialize.
34dc7c2f
BB
516 *
517 * This does not do a call to dmu_set_user() that is
518 * up to the caller to do, in case you don't want to
519 * return the znode
520 */
521static znode_t *
3558fd73 522zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
31b6111f 523 dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
34dc7c2f
BB
524{
525 znode_t *zp;
3558fd73 526 struct inode *ip;
7f89ae6b 527 uint64_t mode;
428870ff 528 uint64_t parent;
278f2236 529 uint64_t tmp_gen;
dfbc8630 530 uint64_t links;
2c6abf15 531 uint64_t z_uid, z_gid;
9f5f0019
NB
532 uint64_t atime[2], mtime[2], ctime[2];
533 sa_bulk_attr_t bulk[11];
428870ff 534 int count = 0;
34dc7c2f 535
3558fd73 536 ASSERT(zsb != NULL);
34dc7c2f 537
3558fd73
BB
538 ip = new_inode(zsb->z_sb);
539 if (ip == NULL)
540 return (NULL);
7304b6e5 541
3558fd73 542 zp = ITOZ(ip);
34dc7c2f 543 ASSERT(zp->z_dirlocks == NULL);
ebe7e575
BB
544 ASSERT3P(zp->z_acl_cached, ==, NULL);
545 ASSERT3P(zp->z_xattr_cached, ==, NULL);
572e2857 546 zp->z_moved = 0;
428870ff 547 zp->z_sa_hdl = NULL;
34dc7c2f
BB
548 zp->z_unlinked = 0;
549 zp->z_atime_dirty = 0;
550 zp->z_mapcnt = 0;
34dc7c2f
BB
551 zp->z_id = db->db_object;
552 zp->z_blksz = blksz;
553 zp->z_seq = 0x7A4653;
554 zp->z_sync_cnt = 0;
ebe7e575
BB
555 zp->z_is_mapped = B_FALSE;
556 zp->z_is_ctldir = B_FALSE;
7b3e34ba 557 zp->z_is_stale = B_FALSE;
d88895a0
CC
558 zp->z_range_lock.zr_size = &zp->z_size;
559 zp->z_range_lock.zr_blksz = &zp->z_blksz;
560 zp->z_range_lock.zr_max_blksz = &ZTOZSB(zp)->z_max_blksz;
34dc7c2f 561
3558fd73
BB
562 zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
563
7f89ae6b 564 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
278f2236 565 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &tmp_gen, 8);
3558fd73 566 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8);
dfbc8630 567 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &links, 8);
3558fd73 568 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
428870ff 569 &zp->z_pflags, 8);
3558fd73 570 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
7304b6e5 571 &parent, 8);
2c6abf15
NB
572 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &z_uid, 8);
573 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &z_gid, 8);
9f5f0019
NB
574 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &atime, 16);
575 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
576 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
428870ff 577
278f2236
NB
578 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 ||
579 tmp_gen == 0) {
580
428870ff
BB
581 if (hdl == NULL)
582 sa_handle_destroy(zp->z_sa_hdl);
07d63f0c 583 zp->z_sa_hdl = NULL;
3558fd73 584 goto error;
34dc7c2f 585 }
7304b6e5 586
12fa7f34 587 zp->z_mode = ip->i_mode = mode;
278f2236 588 ip->i_generation = (uint32_t)tmp_gen;
ba2fe6af 589 ip->i_blkbits = SPA_MINBLOCKSHIFT;
dfbc8630 590 set_nlink(ip, (uint32_t)links);
2c6abf15
NB
591 zfs_uid_write(ip, z_uid);
592 zfs_gid_write(ip, z_gid);
7f89ae6b 593
98701490
CC
594 /* Cache the xattr parent id */
595 if (zp->z_pflags & ZFS_XATTR)
596 zp->z_xattr_parent = parent;
597
9f5f0019
NB
598 ZFS_TIME_DECODE(&ip->i_atime, atime);
599 ZFS_TIME_DECODE(&ip->i_mtime, mtime);
600 ZFS_TIME_DECODE(&ip->i_ctime, ctime);
601
3558fd73 602 ip->i_ino = obj;
9f5f0019 603 zfs_inode_update(zp);
3558fd73
BB
604 zfs_inode_set_ops(zsb, ip);
605
7b3e34ba
BB
606 /*
607 * The only way insert_inode_locked() can fail is if the ip->i_ino
608 * number is already hashed for this super block. This can never
609 * happen because the inode numbers map 1:1 with the object numbers.
610 *
611 * The one exception is rolling back a mounted file system, but in
612 * this case all the active inode are unhashed during the rollback.
613 */
614 VERIFY3S(insert_inode_locked(ip), ==, 0);
c85b224f 615
3558fd73
BB
616 mutex_enter(&zsb->z_znodes_lock);
617 list_insert_tail(&zsb->z_all_znodes, zp);
ab26409d 618 zsb->z_nr_znodes++;
b128c09f 619 membar_producer();
3558fd73 620 mutex_exit(&zsb->z_znodes_lock);
b128c09f 621
3558fd73 622 unlock_new_inode(ip);
34dc7c2f 623 return (zp);
3558fd73
BB
624
625error:
3558fd73 626 iput(ip);
d1d7e268 627 return (NULL);
34dc7c2f
BB
628}
629
1e8db771
BB
630/*
631 * Safely mark an inode dirty. Inodes which are part of a read-only
632 * file system or snapshot may not be dirtied.
633 */
634void
635zfs_mark_inode_dirty(struct inode *ip)
636{
637 zfs_sb_t *zsb = ITOZSB(ip);
638
639 if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os))
640 return;
641
642 mark_inode_dirty(ip);
643}
644
428870ff
BB
645static uint64_t empty_xattr;
646static uint64_t pad[4];
647static zfs_acl_phys_t acl_phys;
34dc7c2f
BB
648/*
649 * Create a new DMU object to hold a zfs znode.
650 *
651 * IN: dzp - parent directory for new znode
652 * vap - file attributes for new znode
653 * tx - dmu transaction id for zap operations
654 * cr - credentials of caller
655 * flag - flags:
656 * IS_ROOT_NODE - new object will be root
657 * IS_XATTR - new object is an attribute
34dc7c2f
BB
658 * bonuslen - length of bonus buffer
659 * setaclp - File/Dir initial ACL
660 * fuidp - Tracks fuid allocation.
661 *
662 * OUT: zpp - allocated znode
663 *
664 */
665void
666zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
428870ff 667 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
34dc7c2f 668{
428870ff
BB
669 uint64_t crtime[2], atime[2], mtime[2], ctime[2];
670 uint64_t mode, size, links, parent, pflags;
572e2857 671 uint64_t dzp_pflags = 0;
428870ff 672 uint64_t rdev = 0;
3558fd73 673 zfs_sb_t *zsb = ZTOZSB(dzp);
428870ff 674 dmu_buf_t *db;
34dc7c2f
BB
675 timestruc_t now;
676 uint64_t gen, obj;
428870ff 677 int bonuslen;
50c957f7 678 int dnodesize;
428870ff
BB
679 sa_handle_t *sa_hdl;
680 dmu_object_type_t obj_type;
f30484af 681 sa_bulk_attr_t *sa_attrs;
428870ff
BB
682 int cnt = 0;
683 zfs_acl_locator_cb_t locate = { 0 };
c96c36fa 684 znode_hold_t *zh;
34dc7c2f 685
3558fd73 686 if (zsb->z_replay) {
34dc7c2f 687 obj = vap->va_nodeid;
34dc7c2f
BB
688 now = vap->va_ctime; /* see zfs_replay_create() */
689 gen = vap->va_nblocks; /* ditto */
50c957f7 690 dnodesize = vap->va_fsid; /* ditto */
34dc7c2f
BB
691 } else {
692 obj = 0;
693 gethrestime(&now);
694 gen = dmu_tx_get_txg(tx);
50c957f7 695 dnodesize = dmu_objset_dnodesize(zsb->z_os);
34dc7c2f
BB
696 }
697
50c957f7
NB
698 if (dnodesize == 0)
699 dnodesize = DNODE_MIN_SIZE;
700
3558fd73 701 obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
50c957f7 702
428870ff 703 bonuslen = (obj_type == DMU_OT_SA) ?
50c957f7 704 DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
428870ff 705
34dc7c2f
BB
706 /*
707 * Create a new DMU object.
708 */
709 /*
710 * There's currently no mechanism for pre-reading the blocks that will
572e2857 711 * be needed to allocate a new object, so we accept the small chance
34dc7c2f
BB
712 * that there will be an i/o error and we will fail one of the
713 * assertions below.
714 */
3558fd73
BB
715 if (S_ISDIR(vap->va_mode)) {
716 if (zsb->z_replay) {
50c957f7 717 VERIFY0(zap_create_claim_norm_dnsize(zsb->z_os, obj,
3558fd73 718 zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
50c957f7 719 obj_type, bonuslen, dnodesize, tx));
34dc7c2f 720 } else {
50c957f7 721 obj = zap_create_norm_dnsize(zsb->z_os,
3558fd73 722 zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
50c957f7 723 obj_type, bonuslen, dnodesize, tx);
34dc7c2f
BB
724 }
725 } else {
3558fd73 726 if (zsb->z_replay) {
50c957f7 727 VERIFY0(dmu_object_claim_dnsize(zsb->z_os, obj,
34dc7c2f 728 DMU_OT_PLAIN_FILE_CONTENTS, 0,
50c957f7 729 obj_type, bonuslen, dnodesize, tx));
34dc7c2f 730 } else {
50c957f7 731 obj = dmu_object_alloc_dnsize(zsb->z_os,
34dc7c2f 732 DMU_OT_PLAIN_FILE_CONTENTS, 0,
50c957f7 733 obj_type, bonuslen, dnodesize, tx);
34dc7c2f
BB
734 }
735 }
34dc7c2f 736
c96c36fa 737 zh = zfs_znode_hold_enter(zsb, obj);
3558fd73 738 VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));
34dc7c2f
BB
739
740 /*
741 * If this is the root, fix up the half-initialized parent pointer
742 * to reference the just-allocated physical data area.
743 */
744 if (flag & IS_ROOT_NODE) {
34dc7c2f 745 dzp->z_id = obj;
428870ff
BB
746 } else {
747 dzp_pflags = dzp->z_pflags;
34dc7c2f
BB
748 }
749
750 /*
751 * If parent is an xattr, so am I.
752 */
428870ff 753 if (dzp_pflags & ZFS_XATTR) {
34dc7c2f 754 flag |= IS_XATTR;
34dc7c2f
BB
755 }
756
3558fd73 757 if (zsb->z_use_fuids)
428870ff
BB
758 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
759 else
760 pflags = 0;
34dc7c2f 761
3558fd73 762 if (S_ISDIR(vap->va_mode)) {
428870ff 763 size = 2; /* contents ("." and "..") */
dfbc8630 764 links = 2;
428870ff 765 } else {
dfbc8630 766 size = 0;
ace1eae8 767 links = (flag & IS_TMPFILE) ? 0 : 1;
34dc7c2f
BB
768 }
769
aa6d8c10 770 if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
dc1d7665 771 rdev = vap->va_rdev;
428870ff
BB
772
773 parent = dzp->z_id;
774 mode = acl_ids->z_mode;
34dc7c2f 775 if (flag & IS_XATTR)
428870ff 776 pflags |= ZFS_XATTR;
34dc7c2f 777
428870ff
BB
778 /*
779 * No execs denied will be deterimed when zfs_mode_compute() is called.
780 */
781 pflags |= acl_ids->z_aclp->z_hints &
782 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
783 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
34dc7c2f 784
428870ff
BB
785 ZFS_TIME_ENCODE(&now, crtime);
786 ZFS_TIME_ENCODE(&now, ctime);
34dc7c2f 787
3558fd73 788 if (vap->va_mask & ATTR_ATIME) {
428870ff 789 ZFS_TIME_ENCODE(&vap->va_atime, atime);
34dc7c2f 790 } else {
428870ff 791 ZFS_TIME_ENCODE(&now, atime);
34dc7c2f
BB
792 }
793
3558fd73 794 if (vap->va_mask & ATTR_MTIME) {
428870ff
BB
795 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
796 } else {
797 ZFS_TIME_ENCODE(&now, mtime);
798 }
799
800 /* Now add in all of the "SA" attributes */
3558fd73 801 VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED,
428870ff
BB
802 &sa_hdl));
803
804 /*
805 * Setup the array of attributes to be replaced/set on the new file
806 *
807 * order for DMU_OT_ZNODE is critical since it needs to be constructed
808 * in the old znode_phys_t format. Don't change this ordering
809 */
79c76d5b 810 sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
428870ff
BB
811
812 if (obj_type == DMU_OT_ZNODE) {
3558fd73 813 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
428870ff 814 NULL, &atime, 16);
3558fd73 815 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
428870ff 816 NULL, &mtime, 16);
3558fd73 817 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
428870ff 818 NULL, &ctime, 16);
3558fd73 819 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
428870ff 820 NULL, &crtime, 16);
3558fd73 821 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
428870ff 822 NULL, &gen, 8);
3558fd73 823 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
428870ff 824 NULL, &mode, 8);
3558fd73 825 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
428870ff 826 NULL, &size, 8);
3558fd73 827 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
428870ff 828 NULL, &parent, 8);
34dc7c2f 829 } else {
3558fd73 830 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
428870ff 831 NULL, &mode, 8);
3558fd73 832 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
428870ff 833 NULL, &size, 8);
3558fd73 834 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
428870ff 835 NULL, &gen, 8);
3558fd73
BB
836 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb),
837 NULL, &acl_ids->z_fuid, 8);
838 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb),
839 NULL, &acl_ids->z_fgid, 8);
840 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
428870ff 841 NULL, &parent, 8);
3558fd73 842 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
428870ff 843 NULL, &pflags, 8);
3558fd73 844 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
428870ff 845 NULL, &atime, 16);
3558fd73 846 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
428870ff 847 NULL, &mtime, 16);
3558fd73 848 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
428870ff 849 NULL, &ctime, 16);
3558fd73 850 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
428870ff
BB
851 NULL, &crtime, 16);
852 }
853
3558fd73 854 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8);
428870ff
BB
855
856 if (obj_type == DMU_OT_ZNODE) {
3558fd73 857 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL,
428870ff 858 &empty_xattr, 8);
34dc7c2f 859 }
428870ff 860 if (obj_type == DMU_OT_ZNODE ||
aa6d8c10 861 (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
3558fd73 862 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
428870ff 863 NULL, &rdev, 8);
428870ff
BB
864 }
865 if (obj_type == DMU_OT_ZNODE) {
3558fd73 866 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
428870ff 867 NULL, &pflags, 8);
3558fd73 868 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL,
428870ff 869 &acl_ids->z_fuid, 8);
3558fd73 870 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL,
428870ff 871 &acl_ids->z_fgid, 8);
3558fd73 872 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad,
428870ff 873 sizeof (uint64_t) * 4);
3558fd73 874 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL,
428870ff
BB
875 &acl_phys, sizeof (zfs_acl_phys_t));
876 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
3558fd73 877 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL,
428870ff
BB
878 &acl_ids->z_aclp->z_acl_count, 8);
879 locate.cb_aclp = acl_ids->z_aclp;
3558fd73 880 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb),
428870ff
BB
881 zfs_acl_data_locator, &locate,
882 acl_ids->z_aclp->z_acl_bytes);
883 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
884 acl_ids->z_fuid, acl_ids->z_fgid);
885 }
886
887 VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
34dc7c2f 888
34dc7c2f 889 if (!(flag & IS_ROOT_NODE)) {
31b6111f 890 *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl);
7b3e34ba
BB
891 VERIFY(*zpp != NULL);
892 VERIFY(dzp != NULL);
34dc7c2f
BB
893 } else {
894 /*
895 * If we are creating the root node, the "parent" we
896 * passed in is the znode for the root.
897 */
898 *zpp = dzp;
428870ff
BB
899
900 (*zpp)->z_sa_hdl = sa_hdl;
34dc7c2f 901 }
428870ff
BB
902
903 (*zpp)->z_pflags = pflags;
12fa7f34 904 (*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
50c957f7 905 (*zpp)->z_dnodesize = dnodesize;
428870ff 906
428870ff
BB
907 if (obj_type == DMU_OT_ZNODE ||
908 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
b0bc7a84 909 VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
428870ff 910 }
d1d7e268 911 kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
c96c36fa 912 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
913}
914
5484965a 915/*
d3cc8b15
WA
916 * Update in-core attributes. It is assumed the caller will be doing an
917 * sa_bulk_update to push the changes out.
5484965a
BB
918 */
919void
920zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
921{
922 xoptattr_t *xoap;
923
924 xoap = xva_getxoptattr(xvap);
925 ASSERT(xoap);
926
927 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
928 uint64_t times[2];
929 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
930 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
931 &times, sizeof (times), tx);
932 XVA_SET_RTN(xvap, XAT_CREATETIME);
933 }
64c688d7 934
5484965a
BB
935 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
936 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
937 zp->z_pflags, tx);
938 XVA_SET_RTN(xvap, XAT_READONLY);
939 }
940 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
941 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
942 zp->z_pflags, tx);
943 XVA_SET_RTN(xvap, XAT_HIDDEN);
944 }
945 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
946 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
947 zp->z_pflags, tx);
948 XVA_SET_RTN(xvap, XAT_SYSTEM);
949 }
950 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
951 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
952 zp->z_pflags, tx);
953 XVA_SET_RTN(xvap, XAT_ARCHIVE);
954 }
955 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
956 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
957 zp->z_pflags, tx);
958 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
64c688d7 959
960 ZTOI(zp)->i_flags |= S_IMMUTABLE;
961 } else {
962 ZTOI(zp)->i_flags &= ~S_IMMUTABLE;
5484965a 963 }
64c688d7 964
5484965a
BB
965 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
966 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
967 zp->z_pflags, tx);
968 XVA_SET_RTN(xvap, XAT_NOUNLINK);
969 }
970 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
971 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
972 zp->z_pflags, tx);
973 XVA_SET_RTN(xvap, XAT_APPENDONLY);
64c688d7 974
975 ZTOI(zp)->i_flags |= S_APPEND;
976 } else {
977
978 ZTOI(zp)->i_flags &= ~S_APPEND;
5484965a 979 }
64c688d7 980
5484965a
BB
981 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
982 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
983 zp->z_pflags, tx);
984 XVA_SET_RTN(xvap, XAT_NODUMP);
985 }
986 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
987 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
988 zp->z_pflags, tx);
989 XVA_SET_RTN(xvap, XAT_OPAQUE);
990 }
991 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
992 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
993 xoap->xoa_av_quarantined, zp->z_pflags, tx);
994 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
995 }
996 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
997 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
998 zp->z_pflags, tx);
999 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1000 }
1001 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
1002 zfs_sa_set_scanstamp(zp, xvap, tx);
1003 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
1004 }
1005 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1006 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
1007 zp->z_pflags, tx);
1008 XVA_SET_RTN(xvap, XAT_REPARSE);
1009 }
1010 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1011 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
1012 zp->z_pflags, tx);
1013 XVA_SET_RTN(xvap, XAT_OFFLINE);
1014 }
1015 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1016 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
1017 zp->z_pflags, tx);
1018 XVA_SET_RTN(xvap, XAT_SPARSE);
1019 }
1020}
1021
34dc7c2f 1022int
3558fd73 1023zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
34dc7c2f
BB
1024{
1025 dmu_object_info_t doi;
1026 dmu_buf_t *db;
1027 znode_t *zp;
c96c36fa 1028 znode_hold_t *zh;
34dc7c2f 1029 int err;
428870ff 1030 sa_handle_t *hdl;
34dc7c2f
BB
1031
1032 *zpp = NULL;
1033
6f9548c4 1034again:
c96c36fa 1035 zh = zfs_znode_hold_enter(zsb, obj_num);
34dc7c2f 1036
3558fd73 1037 err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
34dc7c2f 1038 if (err) {
c96c36fa 1039 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1040 return (err);
1041 }
1042
1043 dmu_object_info_from_db(db, &doi);
428870ff
BB
1044 if (doi.doi_bonus_type != DMU_OT_SA &&
1045 (doi.doi_bonus_type != DMU_OT_ZNODE ||
1046 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1047 doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1048 sa_buf_rele(db, NULL);
c96c36fa 1049 zfs_znode_hold_exit(zsb, zh);
2e528b49 1050 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1051 }
1052
428870ff
BB
1053 hdl = dmu_buf_get_user(db);
1054 if (hdl != NULL) {
36df2843 1055 zp = sa_get_userdata(hdl);
34dc7c2f 1056
8ac67298 1057
34dc7c2f 1058 /*
428870ff
BB
1059 * Since "SA" does immediate eviction we
1060 * should never find a sa handle that doesn't
1061 * know about the znode.
34dc7c2f 1062 */
428870ff
BB
1063
1064 ASSERT3P(zp, !=, NULL);
1065
1066 mutex_enter(&zp->z_lock);
34dc7c2f 1067 ASSERT3U(zp->z_id, ==, obj_num);
98701490
CC
1068 /*
1069 * If igrab() returns NULL the VFS has independently
1070 * determined the inode should be evicted and has
1071 * called iput_final() to start the eviction process.
1072 * The SA handle is still valid but because the VFS
1073 * requires that the eviction succeed we must drop
1074 * our locks and references to allow the eviction to
1075 * complete. The zfs_zget() may then be retried.
1076 *
1077 * This unlikely case could be optimized by registering
1078 * a sops->drop_inode() callback. The callback would
1079 * need to detect the active SA hold thereby informing
1080 * the VFS that this inode should not be evicted.
1081 */
1082 if (igrab(ZTOI(zp)) == NULL) {
1083 mutex_exit(&zp->z_lock);
1084 sa_buf_rele(db, NULL);
1085 zfs_znode_hold_exit(zsb, zh);
1086 /* inode might need this to finish evict */
1087 cond_resched();
1088 goto again;
34dc7c2f 1089 }
98701490
CC
1090 *zpp = zp;
1091 err = 0;
34dc7c2f 1092 mutex_exit(&zp->z_lock);
f3ad9cd6 1093 sa_buf_rele(db, NULL);
c96c36fa 1094 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1095 return (err);
1096 }
1097
1098 /*
3558fd73 1099 * Not found create new znode/vnode but only if file exists.
428870ff
BB
1100 *
1101 * There is a small window where zfs_vget() could
1102 * find this object while a file create is still in
1103 * progress. This is checked for in zfs_znode_alloc()
1104 *
1105 * if zfs_znode_alloc() fails it will drop the hold on the
1106 * bonus buffer.
34dc7c2f 1107 */
3558fd73 1108 zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
31b6111f 1109 doi.doi_bonus_type, obj_num, NULL);
428870ff 1110 if (zp == NULL) {
2e528b49 1111 err = SET_ERROR(ENOENT);
428870ff
BB
1112 } else {
1113 *zpp = zp;
1114 }
c96c36fa 1115 zfs_znode_hold_exit(zsb, zh);
428870ff 1116 return (err);
34dc7c2f
BB
1117}
1118
1119int
1120zfs_rezget(znode_t *zp)
1121{
3558fd73 1122 zfs_sb_t *zsb = ZTOZSB(zp);
34dc7c2f
BB
1123 dmu_object_info_t doi;
1124 dmu_buf_t *db;
1125 uint64_t obj_num = zp->z_id;
428870ff 1126 uint64_t mode;
dfbc8630 1127 uint64_t links;
9f5f0019 1128 sa_bulk_attr_t bulk[10];
34dc7c2f 1129 int err;
428870ff
BB
1130 int count = 0;
1131 uint64_t gen;
2c6abf15 1132 uint64_t z_uid, z_gid;
9f5f0019 1133 uint64_t atime[2], mtime[2], ctime[2];
c96c36fa 1134 znode_hold_t *zh;
34dc7c2f 1135
cbecb4fb
CC
1136 /*
1137 * skip ctldir, otherwise they will always get invalidated. This will
1138 * cause funny behaviour for the mounted snapdirs. Especially for
1139 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
1140 * anyone automount it again as long as someone is still using the
1141 * detached mount.
1142 */
1143 if (zp->z_is_ctldir)
1144 return (0);
1145
c96c36fa 1146 zh = zfs_znode_hold_enter(zsb, obj_num);
34dc7c2f 1147
428870ff
BB
1148 mutex_enter(&zp->z_acl_lock);
1149 if (zp->z_acl_cached) {
1150 zfs_acl_free(zp->z_acl_cached);
1151 zp->z_acl_cached = NULL;
1152 }
428870ff 1153 mutex_exit(&zp->z_acl_lock);
7b3e34ba 1154
228b461b 1155 rw_enter(&zp->z_xattr_lock, RW_WRITER);
7b3e34ba
BB
1156 if (zp->z_xattr_cached) {
1157 nvlist_free(zp->z_xattr_cached);
1158 zp->z_xattr_cached = NULL;
1159 }
7b3e34ba
BB
1160 rw_exit(&zp->z_xattr_lock);
1161
428870ff 1162 ASSERT(zp->z_sa_hdl == NULL);
3558fd73 1163 err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
34dc7c2f 1164 if (err) {
c96c36fa 1165 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1166 return (err);
1167 }
1168
1169 dmu_object_info_from_db(db, &doi);
428870ff
BB
1170 if (doi.doi_bonus_type != DMU_OT_SA &&
1171 (doi.doi_bonus_type != DMU_OT_ZNODE ||
1172 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1173 doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1174 sa_buf_rele(db, NULL);
c96c36fa 1175 zfs_znode_hold_exit(zsb, zh);
2e528b49 1176 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1177 }
1178
3558fd73 1179 zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);
428870ff
BB
1180
1181 /* reload cached values */
3558fd73 1182 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL,
428870ff 1183 &gen, sizeof (gen));
3558fd73 1184 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
428870ff 1185 &zp->z_size, sizeof (zp->z_size));
3558fd73 1186 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
dfbc8630 1187 &links, sizeof (links));
3558fd73 1188 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
428870ff 1189 &zp->z_pflags, sizeof (zp->z_pflags));
3558fd73 1190 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
2c6abf15 1191 &z_uid, sizeof (z_uid));
3558fd73 1192 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL,
2c6abf15 1193 &z_gid, sizeof (z_gid));
3558fd73 1194 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
428870ff 1195 &mode, sizeof (mode));
9f5f0019
NB
1196 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
1197 &atime, 16);
1198 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
1199 &mtime, 16);
1200 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
1201 &ctime, 16);
428870ff 1202
428870ff
BB
1203 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
1204 zfs_znode_dmu_fini(zp);
c96c36fa 1205 zfs_znode_hold_exit(zsb, zh);
2e528b49 1206 return (SET_ERROR(EIO));
428870ff
BB
1207 }
1208
12fa7f34 1209 zp->z_mode = ZTOI(zp)->i_mode = mode;
2c6abf15
NB
1210 zfs_uid_write(ZTOI(zp), z_uid);
1211 zfs_gid_write(ZTOI(zp), z_gid);
572e2857 1212
9f5f0019
NB
1213 ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
1214 ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
1215 ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
1216
278f2236 1217 if (gen != ZTOI(zp)->i_generation) {
428870ff 1218 zfs_znode_dmu_fini(zp);
c96c36fa 1219 zfs_znode_hold_exit(zsb, zh);
2e528b49 1220 return (SET_ERROR(EIO));
34dc7c2f
BB
1221 }
1222
dfbc8630
CD
1223 zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
1224 set_nlink(ZTOI(zp), (uint32_t)links);
1225
34dc7c2f 1226 zp->z_blksz = doi.doi_data_block_size;
704cd075 1227 zp->z_atime_dirty = 0;
9f5f0019 1228 zfs_inode_update(zp);
34dc7c2f 1229
64c688d7 1230
c96c36fa 1231 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1232
1233 return (0);
1234}
1235
1236void
1237zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1238{
3558fd73
BB
1239 zfs_sb_t *zsb = ZTOZSB(zp);
1240 objset_t *os = zsb->z_os;
34dc7c2f 1241 uint64_t obj = zp->z_id;
572e2857 1242 uint64_t acl_obj = zfs_external_acl(zp);
c96c36fa 1243 znode_hold_t *zh;
34dc7c2f 1244
c96c36fa 1245 zh = zfs_znode_hold_enter(zsb, obj);
572e2857
BB
1246 if (acl_obj) {
1247 VERIFY(!zp->z_is_sa);
b128c09f 1248 VERIFY(0 == dmu_object_free(os, acl_obj, tx));
572e2857 1249 }
b128c09f 1250 VERIFY(0 == dmu_object_free(os, obj, tx));
34dc7c2f 1251 zfs_znode_dmu_fini(zp);
c96c36fa 1252 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1253}
1254
1255void
1256zfs_zinactive(znode_t *zp)
1257{
3558fd73 1258 zfs_sb_t *zsb = ZTOZSB(zp);
34dc7c2f 1259 uint64_t z_id = zp->z_id;
c96c36fa 1260 znode_hold_t *zh;
34dc7c2f 1261
428870ff 1262 ASSERT(zp->z_sa_hdl);
34dc7c2f
BB
1263
1264 /*
d6bd8eaa 1265 * Don't allow a zfs_zget() while were trying to release this znode.
34dc7c2f 1266 */
c96c36fa 1267 zh = zfs_znode_hold_enter(zsb, z_id);
d6bd8eaa 1268
34dc7c2f 1269 mutex_enter(&zp->z_lock);
34dc7c2f
BB
1270
1271 /*
1272 * If this was the last reference to a file with no links,
1273 * remove the file from the file system.
1274 */
1275 if (zp->z_unlinked) {
1276 mutex_exit(&zp->z_lock);
c96c36fa 1277 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1278 zfs_rmnode(zp);
1279 return;
1280 }
428870ff 1281
34dc7c2f
BB
1282 mutex_exit(&zp->z_lock);
1283 zfs_znode_dmu_fini(zp);
d6bd8eaa 1284
c96c36fa 1285 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1286}
1287
6d111134
TC
1288static inline int
1289zfs_compare_timespec(struct timespec *t1, struct timespec *t2)
1290{
1291 if (t1->tv_sec < t2->tv_sec)
1292 return (-1);
1293
1294 if (t1->tv_sec > t2->tv_sec)
1295 return (1);
1296
1297 return (t1->tv_nsec - t2->tv_nsec);
1298}
1299
6d111134
TC
1300/*
1301 * Prepare to update znode time stamps.
1302 *
1303 * IN: zp - znode requiring timestamp update
0df9673f 1304 * flag - ATTR_MTIME, ATTR_CTIME flags
6d111134 1305 *
0df9673f 1306 * OUT: zp - z_seq
6d111134
TC
1307 * mtime - new mtime
1308 * ctime - new ctime
1309 *
0df9673f
CC
1310 * Note: We don't update atime here, because we rely on Linux VFS to do
1311 * atime updating.
6d111134 1312 */
34dc7c2f 1313void
428870ff 1314zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
0df9673f 1315 uint64_t ctime[2])
34dc7c2f
BB
1316{
1317 timestruc_t now;
1318
34dc7c2f
BB
1319 gethrestime(&now);
1320
0df9673f 1321 zp->z_seq++;
34dc7c2f 1322
3558fd73 1323 if (flag & ATTR_MTIME) {
428870ff 1324 ZFS_TIME_ENCODE(&now, mtime);
9f5f0019 1325 ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
3558fd73 1326 if (ZTOZSB(zp)->z_use_fuids) {
428870ff
BB
1327 zp->z_pflags |= (ZFS_ARCHIVE |
1328 ZFS_AV_MODIFIED);
1329 }
34dc7c2f
BB
1330 }
1331
3558fd73 1332 if (flag & ATTR_CTIME) {
428870ff 1333 ZFS_TIME_ENCODE(&now, ctime);
9f5f0019 1334 ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
3558fd73 1335 if (ZTOZSB(zp)->z_use_fuids)
428870ff 1336 zp->z_pflags |= ZFS_ARCHIVE;
34dc7c2f
BB
1337 }
1338}
1339
34dc7c2f
BB
1340/*
1341 * Grow the block size for a file.
1342 *
1343 * IN: zp - znode of file to free data in.
1344 * size - requested block size
1345 * tx - open transaction.
1346 *
1347 * NOTE: this function assumes that the znode is write locked.
1348 */
1349void
1350zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1351{
1352 int error;
1353 u_longlong_t dummy;
1354
1355 if (size <= zp->z_blksz)
1356 return;
1357 /*
1358 * If the file size is already greater than the current blocksize,
1359 * we will not grow. If there is more than one block in a file,
1360 * the blocksize cannot change.
1361 */
428870ff 1362 if (zp->z_blksz && zp->z_size > zp->z_blksz)
34dc7c2f
BB
1363 return;
1364
3558fd73 1365 error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
34dc7c2f 1366 size, 0, tx);
428870ff 1367
34dc7c2f
BB
1368 if (error == ENOTSUP)
1369 return;
c99c9001 1370 ASSERT0(error);
34dc7c2f
BB
1371
1372 /* What blocksize did we actually get? */
428870ff 1373 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
34dc7c2f
BB
1374}
1375
34dc7c2f 1376/*
b128c09f 1377 * Increase the file length
34dc7c2f
BB
1378 *
1379 * IN: zp - znode of file to free data in.
b128c09f 1380 * end - new end-of-file
34dc7c2f 1381 *
19d55079 1382 * RETURN: 0 on success, error code on failure
34dc7c2f 1383 */
b128c09f
BB
1384static int
1385zfs_extend(znode_t *zp, uint64_t end)
34dc7c2f 1386{
3558fd73 1387 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f 1388 dmu_tx_t *tx;
34dc7c2f 1389 rl_t *rl;
b128c09f 1390 uint64_t newblksz;
34dc7c2f
BB
1391 int error;
1392
34dc7c2f 1393 /*
b128c09f 1394 * We will change zp_size, lock the whole file.
34dc7c2f 1395 */
d88895a0 1396 rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
34dc7c2f
BB
1397
1398 /*
1399 * Nothing to do if file already at desired length.
1400 */
428870ff 1401 if (end <= zp->z_size) {
34dc7c2f
BB
1402 zfs_range_unlock(rl);
1403 return (0);
1404 }
3558fd73 1405 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1406 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1407 zfs_sa_upgrade_txholds(tx, zp);
b128c09f 1408 if (end > zp->z_blksz &&
3558fd73 1409 (!ISP2(zp->z_blksz) || zp->z_blksz < zsb->z_max_blksz)) {
34dc7c2f
BB
1410 /*
1411 * We are growing the file past the current block size.
1412 */
3558fd73 1413 if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
f1512ee6
MA
1414 /*
1415 * File's blocksize is already larger than the
1416 * "recordsize" property. Only let it grow to
1417 * the next power of 2.
1418 */
34dc7c2f 1419 ASSERT(!ISP2(zp->z_blksz));
f1512ee6 1420 newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
34dc7c2f 1421 } else {
3558fd73 1422 newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
34dc7c2f 1423 }
b128c09f
BB
1424 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1425 } else {
1426 newblksz = 0;
34dc7c2f
BB
1427 }
1428
384f8a09 1429 error = dmu_tx_assign(tx, TXG_WAIT);
34dc7c2f 1430 if (error) {
34dc7c2f
BB
1431 dmu_tx_abort(tx);
1432 zfs_range_unlock(rl);
1433 return (error);
1434 }
1435
b128c09f
BB
1436 if (newblksz)
1437 zfs_grow_blocksize(zp, newblksz, tx);
34dc7c2f 1438
428870ff
BB
1439 zp->z_size = end;
1440
3558fd73 1441 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
428870ff 1442 &zp->z_size, sizeof (zp->z_size), tx));
34dc7c2f 1443
b128c09f 1444 zfs_range_unlock(rl);
34dc7c2f 1445
b128c09f 1446 dmu_tx_commit(tx);
34dc7c2f 1447
b128c09f
BB
1448 return (0);
1449}
1450
223df016
TC
1451/*
1452 * zfs_zero_partial_page - Modeled after update_pages() but
1453 * with different arguments and semantics for use by zfs_freesp().
1454 *
1455 * Zeroes a piece of a single page cache entry for zp at offset
1456 * start and length len.
1457 *
1458 * Caller must acquire a range lock on the file for the region
1459 * being zeroed in order that the ARC and page cache stay in sync.
1460 */
1461static void
1462zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
1463{
1464 struct address_space *mp = ZTOI(zp)->i_mapping;
1465 struct page *pp;
1466 int64_t off;
1467 void *pb;
1468
8b1899d3 1469 ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
223df016 1470
8b1899d3
BB
1471 off = start & (PAGE_SIZE - 1);
1472 start &= PAGE_MASK;
223df016 1473
8b1899d3 1474 pp = find_lock_page(mp, start >> PAGE_SHIFT);
223df016
TC
1475 if (pp) {
1476 if (mapping_writably_mapped(mp))
1477 flush_dcache_page(pp);
1478
1479 pb = kmap(pp);
1480 bzero(pb + off, len);
1481 kunmap(pp);
1482
1483 if (mapping_writably_mapped(mp))
1484 flush_dcache_page(pp);
1485
1486 mark_page_accessed(pp);
1487 SetPageUptodate(pp);
1488 ClearPageError(pp);
1489 unlock_page(pp);
8b1899d3 1490 put_page(pp);
223df016
TC
1491 }
1492}
1493
b128c09f
BB
1494/*
1495 * Free space in a file.
1496 *
1497 * IN: zp - znode of file to free data in.
1498 * off - start of section to free.
1499 * len - length of section to free.
1500 *
19d55079 1501 * RETURN: 0 on success, error code on failure
b128c09f
BB
1502 */
1503static int
1504zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1505{
3558fd73 1506 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f
BB
1507 rl_t *rl;
1508 int error;
1509
1510 /*
1511 * Lock the range being freed.
1512 */
d88895a0 1513 rl = zfs_range_lock(&zp->z_range_lock, off, len, RL_WRITER);
b128c09f
BB
1514
1515 /*
1516 * Nothing to do if file already at desired length.
1517 */
428870ff 1518 if (off >= zp->z_size) {
b128c09f
BB
1519 zfs_range_unlock(rl);
1520 return (0);
34dc7c2f
BB
1521 }
1522
428870ff
BB
1523 if (off + len > zp->z_size)
1524 len = zp->z_size - off;
b128c09f 1525
3558fd73 1526 error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len);
b128c09f 1527
223df016
TC
1528 /*
1529 * Zero partial page cache entries. This must be done under a
1530 * range lock in order to keep the ARC and page cache in sync.
1531 */
1532 if (zp->z_is_mapped) {
1533 loff_t first_page, last_page, page_len;
1534 loff_t first_page_offset, last_page_offset;
1535
1536 /* first possible full page in hole */
8b1899d3 1537 first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
223df016 1538 /* last page of hole */
8b1899d3 1539 last_page = (off + len) >> PAGE_SHIFT;
223df016
TC
1540
1541 /* offset of first_page */
8b1899d3 1542 first_page_offset = first_page << PAGE_SHIFT;
223df016 1543 /* offset of last_page */
8b1899d3 1544 last_page_offset = last_page << PAGE_SHIFT;
223df016 1545
cb08f063
TC
1546 /* truncate whole pages */
1547 if (last_page_offset > first_page_offset) {
1548 truncate_inode_pages_range(ZTOI(zp)->i_mapping,
1549 first_page_offset, last_page_offset - 1);
1550 }
1551
1552 /* truncate sub-page ranges */
223df016
TC
1553 if (first_page > last_page) {
1554 /* entire punched area within a single page */
1555 zfs_zero_partial_page(zp, off, len);
1556 } else {
1557 /* beginning of punched area at the end of a page */
1558 page_len = first_page_offset - off;
1559 if (page_len > 0)
1560 zfs_zero_partial_page(zp, off, page_len);
1561
1562 /* end of punched area at the beginning of a page */
1563 page_len = off + len - last_page_offset;
1564 if (page_len > 0)
1565 zfs_zero_partial_page(zp, last_page_offset,
1566 page_len);
1567 }
1568 }
34dc7c2f
BB
1569 zfs_range_unlock(rl);
1570
b128c09f
BB
1571 return (error);
1572}
1573
1574/*
1575 * Truncate a file
1576 *
1577 * IN: zp - znode of file to free data in.
1578 * end - new end-of-file.
1579 *
19d55079 1580 * RETURN: 0 on success, error code on failure
b128c09f
BB
1581 */
1582static int
1583zfs_trunc(znode_t *zp, uint64_t end)
1584{
3558fd73 1585 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f
BB
1586 dmu_tx_t *tx;
1587 rl_t *rl;
1588 int error;
572e2857
BB
1589 sa_bulk_attr_t bulk[2];
1590 int count = 0;
b128c09f
BB
1591
1592 /*
1593 * We will change zp_size, lock the whole file.
1594 */
d88895a0 1595 rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
b128c09f
BB
1596
1597 /*
1598 * Nothing to do if file already at desired length.
1599 */
428870ff 1600 if (end >= zp->z_size) {
b128c09f
BB
1601 zfs_range_unlock(rl);
1602 return (0);
1603 }
1604
3558fd73 1605 error = dmu_free_long_range(zsb->z_os, zp->z_id, end, -1);
b128c09f
BB
1606 if (error) {
1607 zfs_range_unlock(rl);
1608 return (error);
1609 }
3558fd73 1610 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1611 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1612 zfs_sa_upgrade_txholds(tx, zp);
19d55079 1613 dmu_tx_mark_netfree(tx);
7a8f0e80 1614 error = dmu_tx_assign(tx, TXG_WAIT);
b128c09f 1615 if (error) {
b128c09f
BB
1616 dmu_tx_abort(tx);
1617 zfs_range_unlock(rl);
1618 return (error);
1619 }
b128c09f 1620
428870ff 1621 zp->z_size = end;
3558fd73 1622 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
572e2857 1623 NULL, &zp->z_size, sizeof (zp->z_size));
428870ff 1624
572e2857
BB
1625 if (end == 0) {
1626 zp->z_pflags &= ~ZFS_SPARSE;
3558fd73 1627 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
572e2857
BB
1628 NULL, &zp->z_pflags, 8);
1629 }
1630 VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
b128c09f 1631
34dc7c2f
BB
1632 dmu_tx_commit(tx);
1633
d164b209 1634 zfs_range_unlock(rl);
34dc7c2f
BB
1635
1636 return (0);
1637}
1638
b128c09f
BB
1639/*
1640 * Free space in a file
1641 *
1642 * IN: zp - znode of file to free data in.
1643 * off - start of range
1644 * len - end of range (0 => EOF)
1645 * flag - current file open mode flags.
1646 * log - TRUE if this action should be logged
1647 *
19d55079 1648 * RETURN: 0 on success, error code on failure
b128c09f
BB
1649 */
1650int
1651zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1652{
b128c09f 1653 dmu_tx_t *tx;
3558fd73
BB
1654 zfs_sb_t *zsb = ZTOZSB(zp);
1655 zilog_t *zilog = zsb->z_log;
428870ff
BB
1656 uint64_t mode;
1657 uint64_t mtime[2], ctime[2];
1658 sa_bulk_attr_t bulk[3];
1659 int count = 0;
b128c09f
BB
1660 int error;
1661
3558fd73 1662 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zsb), &mode,
428870ff
BB
1663 sizeof (mode))) != 0)
1664 return (error);
1665
1666 if (off > zp->z_size) {
b128c09f
BB
1667 error = zfs_extend(zp, off+len);
1668 if (error == 0 && log)
1669 goto log;
223df016 1670 goto out;
b128c09f
BB
1671 }
1672
b128c09f
BB
1673 if (len == 0) {
1674 error = zfs_trunc(zp, off);
1675 } else {
1676 if ((error = zfs_free_range(zp, off, len)) == 0 &&
428870ff 1677 off + len > zp->z_size)
b128c09f
BB
1678 error = zfs_extend(zp, off+len);
1679 }
1680 if (error || !log)
223df016 1681 goto out;
b128c09f 1682log:
3558fd73 1683 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1684 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1685 zfs_sa_upgrade_txholds(tx, zp);
384f8a09 1686 error = dmu_tx_assign(tx, TXG_WAIT);
b128c09f 1687 if (error) {
b128c09f 1688 dmu_tx_abort(tx);
223df016 1689 goto out;
b128c09f
BB
1690 }
1691
3558fd73
BB
1692 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16);
1693 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, 16);
1694 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
428870ff 1695 NULL, &zp->z_pflags, 8);
0df9673f 1696 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
428870ff
BB
1697 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1698 ASSERT(error == 0);
1699
b128c09f
BB
1700 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
1701
1702 dmu_tx_commit(tx);
223df016 1703
960e08fe 1704 zfs_inode_update(zp);
223df016
TC
1705 error = 0;
1706
1707out:
1708 /*
1709 * Truncate the page cache - for file truncate operations, use
1710 * the purpose-built API for truncations. For punching operations,
cb08f063 1711 * the truncation is handled under a range lock in zfs_free_range.
223df016
TC
1712 */
1713 if (len == 0)
1714 truncate_setsize(ZTOI(zp), off);
223df016 1715 return (error);
b128c09f
BB
1716}
1717
34dc7c2f
BB
1718void
1719zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1720{
22872ff5
BB
1721 struct super_block *sb;
1722 zfs_sb_t *zsb;
428870ff 1723 uint64_t moid, obj, sa_obj, version;
22872ff5 1724 uint64_t sense = ZFS_CASE_SENSITIVE;
34dc7c2f
BB
1725 uint64_t norm = 0;
1726 nvpair_t *elem;
c96c36fa 1727 int size;
34dc7c2f 1728 int error;
22872ff5
BB
1729 int i;
1730 znode_t *rootzp = NULL;
1731 vattr_t vattr;
1732 znode_t *zp;
1733 zfs_acl_ids_t acl_ids;
34dc7c2f
BB
1734
1735 /*
1736 * First attempt to create master node.
1737 */
1738 /*
1739 * In an empty objset, there are no blocks to read and thus
1740 * there can be no i/o errors (which we assert below).
1741 */
1742 moid = MASTER_NODE_OBJ;
1743 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
1744 DMU_OT_NONE, 0, tx);
1745 ASSERT(error == 0);
1746
50c957f7
NB
1747 /*
1748 * Give dmu_object_alloc() a hint about where to start
1749 * allocating new objects. Otherwise, since the metadnode's
1750 * dnode_phys_t structure isn't initialized yet, dmu_object_next()
1751 * would fail and we'd have to skip to the next dnode block.
1752 */
1753 os->os_obj_next = moid + 1;
1754
34dc7c2f
BB
1755 /*
1756 * Set starting attributes.
1757 */
428870ff 1758 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
34dc7c2f
BB
1759 elem = NULL;
1760 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
1761 /* For the moment we expect all zpl props to be uint64_ts */
1762 uint64_t val;
1763 char *name;
1764
1765 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
1766 VERIFY(nvpair_value_uint64(elem, &val) == 0);
1767 name = nvpair_name(elem);
1768 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
9babb374
BB
1769 if (val < version)
1770 version = val;
34dc7c2f
BB
1771 } else {
1772 error = zap_update(os, moid, name, 8, 1, &val, tx);
1773 }
1774 ASSERT(error == 0);
1775 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
1776 norm = val;
22872ff5
BB
1777 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
1778 sense = val;
34dc7c2f
BB
1779 }
1780 ASSERT(version != 0);
9babb374 1781 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
34dc7c2f 1782
428870ff
BB
1783 /*
1784 * Create zap object used for SA attribute registration
1785 */
1786
1787 if (version >= ZPL_VERSION_SA) {
1788 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
1789 DMU_OT_NONE, 0, tx);
1790 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
1791 ASSERT(error == 0);
1792 } else {
1793 sa_obj = 0;
1794 }
34dc7c2f
BB
1795 /*
1796 * Create a delete queue.
1797 */
9babb374 1798 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
34dc7c2f 1799
9babb374 1800 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
34dc7c2f
BB
1801 ASSERT(error == 0);
1802
9babb374 1803 /*
22872ff5
BB
1804 * Create root znode. Create minimal znode/inode/zsb/sb
1805 * to allow zfs_mknode to work.
9babb374 1806 */
22872ff5
BB
1807 vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
1808 vattr.va_mode = S_IFDIR|0755;
1809 vattr.va_uid = crgetuid(cr);
1810 vattr.va_gid = crgetgid(cr);
1811
79c76d5b 1812 rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
22872ff5
BB
1813 rootzp->z_moved = 0;
1814 rootzp->z_unlinked = 0;
1815 rootzp->z_atime_dirty = 0;
1816 rootzp->z_is_sa = USE_SA(version, os);
1817
79c76d5b 1818 zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
22872ff5
BB
1819 zsb->z_os = os;
1820 zsb->z_parent = zsb;
1821 zsb->z_version = version;
1822 zsb->z_use_fuids = USE_FUIDS(version, os);
1823 zsb->z_use_sa = USE_SA(version, os);
1824 zsb->z_norm = norm;
1825
79c76d5b 1826 sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
22872ff5
BB
1827 sb->s_fs_info = zsb;
1828
1829 ZTOI(rootzp)->i_sb = sb;
1830
1831 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
1832 &zsb->z_attr_table);
9babb374 1833
22872ff5 1834 ASSERT(error == 0);
9babb374 1835
60101509 1836 /*
22872ff5
BB
1837 * Fold case on file systems that are always or sometimes case
1838 * insensitive.
60101509 1839 */
22872ff5
BB
1840 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
1841 zsb->z_norm |= U8_TEXTPREP_TOUPPER;
60101509 1842
22872ff5
BB
1843 mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1844 list_create(&zsb->z_all_znodes, sizeof (znode_t),
1845 offsetof(znode_t, z_link_node));
60101509 1846
c96c36fa
BB
1847 size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
1848 zsb->z_hold_size = size;
1849 zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP);
1850 zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
1851 for (i = 0; i != size; i++) {
1852 avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare,
1853 sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
1854 mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
1855 }
60101509 1856
22872ff5
BB
1857 VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
1858 cr, NULL, &acl_ids));
1859 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
1860 ASSERT3P(zp, ==, rootzp);
1861 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
1862 ASSERT(error == 0);
1863 zfs_acl_ids_free(&acl_ids);
60101509 1864
22872ff5
BB
1865 atomic_set(&ZTOI(rootzp)->i_count, 0);
1866 sa_handle_destroy(rootzp->z_sa_hdl);
22872ff5
BB
1867 kmem_cache_free(znode_cache, rootzp);
1868
1869 /*
1870 * Create shares directory
1871 */
22872ff5 1872 error = zfs_create_share_dir(zsb, tx);
9babb374 1873 ASSERT(error == 0);
428870ff 1874
c96c36fa
BB
1875 for (i = 0; i != size; i++) {
1876 avl_destroy(&zsb->z_hold_trees[i]);
1877 mutex_destroy(&zsb->z_hold_locks[i]);
1878 }
2708f716 1879
c96c36fa
BB
1880 vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size);
1881 vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size);
2708f716
BB
1882 kmem_free(sb, sizeof (struct super_block));
1883 kmem_free(zsb, sizeof (zfs_sb_t));
34dc7c2f 1884}
34dc7c2f 1885#endif /* _KERNEL */
428870ff 1886
34dc7c2f 1887static int
572e2857
BB
1888zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
1889{
1890 uint64_t sa_obj = 0;
1891 int error;
1892
1893 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
1894 if (error != 0 && error != ENOENT)
1895 return (error);
1896
1897 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
1898 return (error);
1899}
1900
1901static int
1902zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
7b8518cb 1903 dmu_buf_t **db, void *tag)
34dc7c2f 1904{
34dc7c2f 1905 dmu_object_info_t doi;
34dc7c2f 1906 int error;
428870ff 1907
7b8518cb 1908 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
34dc7c2f
BB
1909 return (error);
1910
572e2857 1911 dmu_object_info_from_db(*db, &doi);
428870ff
BB
1912 if ((doi.doi_bonus_type != DMU_OT_SA &&
1913 doi.doi_bonus_type != DMU_OT_ZNODE) ||
d6320ddb
BB
1914 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1915 doi.doi_bonus_size < sizeof (znode_phys_t))) {
7b8518cb 1916 sa_buf_rele(*db, tag);
2e528b49 1917 return (SET_ERROR(ENOTSUP));
34dc7c2f
BB
1918 }
1919
572e2857
BB
1920 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
1921 if (error != 0) {
7b8518cb 1922 sa_buf_rele(*db, tag);
428870ff
BB
1923 return (error);
1924 }
1925
572e2857
BB
1926 return (0);
1927}
1928
1929void
7b8518cb 1930zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
572e2857
BB
1931{
1932 sa_handle_destroy(hdl);
7b8518cb 1933 sa_buf_rele(db, tag);
572e2857
BB
1934}
1935
1936/*
1937 * Given an object number, return its parent object number and whether
1938 * or not the object is an extended attribute directory.
1939 */
1940static int
b23ad7f3
JJ
1941zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
1942 uint64_t *pobjp, int *is_xattrdir)
572e2857
BB
1943{
1944 uint64_t parent;
1945 uint64_t pflags;
1946 uint64_t mode;
b23ad7f3 1947 uint64_t parent_mode;
572e2857 1948 sa_bulk_attr_t bulk[3];
b23ad7f3
JJ
1949 sa_handle_t *sa_hdl;
1950 dmu_buf_t *sa_db;
572e2857
BB
1951 int count = 0;
1952 int error;
1953
1954 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
1955 &parent, sizeof (parent));
428870ff 1956 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
572e2857 1957 &pflags, sizeof (pflags));
428870ff 1958 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
572e2857 1959 &mode, sizeof (mode));
428870ff 1960
572e2857 1961 if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
428870ff 1962 return (error);
572e2857 1963
b23ad7f3
JJ
1964 /*
1965 * When a link is removed its parent pointer is not changed and will
1966 * be invalid. There are two cases where a link is removed but the
1967 * file stays around, when it goes to the delete queue and when there
1968 * are additional links.
1969 */
1970 error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
1971 if (error != 0)
1972 return (error);
1973
1974 error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
1975 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
1976 if (error != 0)
1977 return (error);
1978
428870ff 1979 *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
34dc7c2f 1980
b23ad7f3
JJ
1981 /*
1982 * Extended attributes can be applied to files, directories, etc.
1983 * Otherwise the parent must be a directory.
1984 */
1985 if (!*is_xattrdir && !S_ISDIR(parent_mode))
1986 return (EINVAL);
1987
1988 *pobjp = parent;
1989
34dc7c2f
BB
1990 return (0);
1991}
1992
572e2857
BB
1993/*
1994 * Given an object number, return some zpl level statistics
1995 */
1996static int
1997zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
1998 zfs_stat_t *sb)
34dc7c2f 1999{
572e2857
BB
2000 sa_bulk_attr_t bulk[4];
2001 int count = 0;
2002
2003 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2004 &sb->zs_mode, sizeof (sb->zs_mode));
2005 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
2006 &sb->zs_gen, sizeof (sb->zs_gen));
2007 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
2008 &sb->zs_links, sizeof (sb->zs_links));
2009 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
2010 &sb->zs_ctime, sizeof (sb->zs_ctime));
2011
2012 return (sa_bulk_lookup(hdl, bulk, count));
2013}
2014
2015static int
2016zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
2017 sa_attr_type_t *sa_table, char *buf, int len)
2018{
2019 sa_handle_t *sa_hdl;
2020 sa_handle_t *prevhdl = NULL;
2021 dmu_buf_t *prevdb = NULL;
2022 dmu_buf_t *sa_db = NULL;
34dc7c2f
BB
2023 char *path = buf + len - 1;
2024 int error;
2025
2026 *path = '\0';
572e2857 2027 sa_hdl = hdl;
428870ff 2028
34dc7c2f 2029 for (;;) {
17897ce2 2030 uint64_t pobj = 0;
34dc7c2f
BB
2031 char component[MAXNAMELEN + 2];
2032 size_t complen;
17897ce2 2033 int is_xattrdir = 0;
34dc7c2f 2034
572e2857 2035 if (prevdb)
7b8518cb 2036 zfs_release_sa_handle(prevhdl, prevdb, FTAG);
572e2857 2037
b23ad7f3 2038 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
572e2857 2039 &is_xattrdir)) != 0)
34dc7c2f
BB
2040 break;
2041
2042 if (pobj == obj) {
2043 if (path[0] != '/')
2044 *--path = '/';
2045 break;
2046 }
2047
2048 component[0] = '/';
2049 if (is_xattrdir) {
2050 (void) sprintf(component + 1, "<xattrdir>");
2051 } else {
2052 error = zap_value_search(osp, pobj, obj,
2053 ZFS_DIRENT_OBJ(-1ULL), component + 1);
2054 if (error != 0)
2055 break;
2056 }
2057
2058 complen = strlen(component);
2059 path -= complen;
2060 ASSERT(path >= buf);
2061 bcopy(component, path, complen);
2062 obj = pobj;
572e2857
BB
2063
2064 if (sa_hdl != hdl) {
2065 prevhdl = sa_hdl;
2066 prevdb = sa_db;
2067 }
7b8518cb 2068 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
572e2857
BB
2069 if (error != 0) {
2070 sa_hdl = prevhdl;
2071 sa_db = prevdb;
2072 break;
2073 }
2074 }
2075
2076 if (sa_hdl != NULL && sa_hdl != hdl) {
2077 ASSERT(sa_db != NULL);
7b8518cb 2078 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
34dc7c2f
BB
2079 }
2080
2081 if (error == 0)
2082 (void) memmove(buf, path, buf + len - path);
428870ff 2083
34dc7c2f
BB
2084 return (error);
2085}
572e2857
BB
2086
2087int
2088zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
2089{
2090 sa_attr_type_t *sa_table;
2091 sa_handle_t *hdl;
2092 dmu_buf_t *db;
2093 int error;
2094
2095 error = zfs_sa_setup(osp, &sa_table);
2096 if (error != 0)
2097 return (error);
2098
7b8518cb 2099 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
572e2857
BB
2100 if (error != 0)
2101 return (error);
2102
2103 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2104
7b8518cb 2105 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2106 return (error);
2107}
2108
2109int
2110zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
2111 char *buf, int len)
2112{
2113 char *path = buf + len - 1;
2114 sa_attr_type_t *sa_table;
2115 sa_handle_t *hdl;
2116 dmu_buf_t *db;
2117 int error;
2118
2119 *path = '\0';
2120
2121 error = zfs_sa_setup(osp, &sa_table);
2122 if (error != 0)
2123 return (error);
2124
7b8518cb 2125 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
572e2857
BB
2126 if (error != 0)
2127 return (error);
2128
2129 error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
2130 if (error != 0) {
7b8518cb 2131 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2132 return (error);
2133 }
2134
2135 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2136
7b8518cb 2137 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2138 return (error);
2139}
c28b2279
BB
2140
2141#if defined(_KERNEL) && defined(HAVE_SPL)
2142EXPORT_SYMBOL(zfs_create_fs);
2143EXPORT_SYMBOL(zfs_obj_to_path);
0720116d
BB
2144
2145module_param(zfs_object_mutex_size, uint, 0644);
2146MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
c28b2279 2147#endif