]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_znode.c
Fix ZVOL BLKFLSBUF ioctl
[mirror_zfs.git] / module / zfs / zfs_znode.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
19d55079 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
34dc7c2f
BB
24 */
25
26/* Portions Copyright 2007 Jeremy Teo */
27
34dc7c2f
BB
28#ifdef _KERNEL
29#include <sys/types.h>
30#include <sys/param.h>
31#include <sys/time.h>
32#include <sys/systm.h>
33#include <sys/sysmacros.h>
34#include <sys/resource.h>
35#include <sys/mntent.h>
36#include <sys/mkdev.h>
37#include <sys/u8_textprep.h>
38#include <sys/dsl_dataset.h>
39#include <sys/vfs.h>
40#include <sys/vfs_opreg.h>
41#include <sys/vnode.h>
42#include <sys/file.h>
43#include <sys/kmem.h>
44#include <sys/errno.h>
45#include <sys/unistd.h>
46#include <sys/mode.h>
47#include <sys/atomic.h>
48#include <vm/pvn.h>
49#include "fs/fs_subr.h"
50#include <sys/zfs_dir.h>
51#include <sys/zfs_acl.h>
52#include <sys/zfs_ioctl.h>
53#include <sys/zfs_rlock.h>
54#include <sys/zfs_fuid.h>
3558fd73 55#include <sys/zfs_vnops.h>
ebe7e575 56#include <sys/zfs_ctldir.h>
428870ff 57#include <sys/dnode.h>
34dc7c2f
BB
58#include <sys/fs/zfs.h>
59#include <sys/kidmap.h>
3558fd73 60#include <sys/zpl.h>
34dc7c2f
BB
61#endif /* _KERNEL */
62
63#include <sys/dmu.h>
f1512ee6 64#include <sys/dmu_objset.h>
50c957f7 65#include <sys/dmu_tx.h>
34dc7c2f
BB
66#include <sys/refcount.h>
67#include <sys/stat.h>
68#include <sys/zap.h>
69#include <sys/zfs_znode.h>
428870ff
BB
70#include <sys/sa.h>
71#include <sys/zfs_sa.h>
572e2857 72#include <sys/zfs_stat.h>
34dc7c2f
BB
73
74#include "zfs_prop.h"
428870ff 75#include "zfs_comutil.h"
34dc7c2f 76
b128c09f
BB
77/*
78 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
79 * turned on when DEBUG is also defined.
80 */
81#ifdef DEBUG
82#define ZNODE_STATS
83#endif /* DEBUG */
84
85#ifdef ZNODE_STATS
86#define ZNODE_STAT_ADD(stat) ((stat)++)
87#else
88#define ZNODE_STAT_ADD(stat) /* nothing */
89#endif /* ZNODE_STATS */
90
34dc7c2f
BB
91/*
92 * Functions needed for userland (ie: libzpool) are not put under
93 * #ifdef_KERNEL; the rest of the functions have dependencies
94 * (such as VFS logic) that will not compile easily in userland.
95 */
96#ifdef _KERNEL
9babb374 97
b128c09f 98static kmem_cache_t *znode_cache = NULL;
c96c36fa 99static kmem_cache_t *znode_hold_cache = NULL;
0720116d 100unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
34dc7c2f 101
34dc7c2f
BB
102/*ARGSUSED*/
103static int
b128c09f 104zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
34dc7c2f
BB
105{
106 znode_t *zp = buf;
107
3558fd73 108 inode_init_once(ZTOI(zp));
b128c09f
BB
109 list_link_init(&zp->z_link_node);
110
34dc7c2f 111 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
34dc7c2f 112 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
448d7aaa 113 rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
34dc7c2f 114 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
82a37189 115 rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
34dc7c2f 116
d88895a0 117 zfs_rlock_init(&zp->z_range_lock);
34dc7c2f 118
b128c09f 119 zp->z_dirlocks = NULL;
45d1cae3 120 zp->z_acl_cached = NULL;
82a37189 121 zp->z_xattr_cached = NULL;
98701490 122 zp->z_xattr_parent = 0;
572e2857 123 zp->z_moved = 0;
34dc7c2f
BB
124 return (0);
125}
126
127/*ARGSUSED*/
128static void
b128c09f 129zfs_znode_cache_destructor(void *buf, void *arg)
34dc7c2f
BB
130{
131 znode_t *zp = buf;
132
b128c09f 133 ASSERT(!list_link_active(&zp->z_link_node));
34dc7c2f 134 mutex_destroy(&zp->z_lock);
34dc7c2f
BB
135 rw_destroy(&zp->z_parent_lock);
136 rw_destroy(&zp->z_name_lock);
137 mutex_destroy(&zp->z_acl_lock);
82a37189 138 rw_destroy(&zp->z_xattr_lock);
d88895a0 139 zfs_rlock_destroy(&zp->z_range_lock);
34dc7c2f 140
b128c09f 141 ASSERT(zp->z_dirlocks == NULL);
45d1cae3 142 ASSERT(zp->z_acl_cached == NULL);
82a37189 143 ASSERT(zp->z_xattr_cached == NULL);
b128c09f
BB
144}
145
c96c36fa
BB
146static int
147zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
148{
149 znode_hold_t *zh = buf;
150
151 mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
152 refcount_create(&zh->zh_refcount);
153 zh->zh_obj = ZFS_NO_OBJECT;
154
155 return (0);
156}
157
158static void
159zfs_znode_hold_cache_destructor(void *buf, void *arg)
160{
161 znode_hold_t *zh = buf;
162
163 mutex_destroy(&zh->zh_lock);
164 refcount_destroy(&zh->zh_refcount);
165}
166
34dc7c2f
BB
167void
168zfs_znode_init(void)
169{
170 /*
5074bfe8
TC
171 * Initialize zcache. The KMC_SLAB hint is used in order that it be
172 * backed by kmalloc() when on the Linux slab in order that any
173 * wait_on_bit() operations on the related inode operate properly.
34dc7c2f
BB
174 */
175 ASSERT(znode_cache == NULL);
176 znode_cache = kmem_cache_create("zfs_znode_cache",
177 sizeof (znode_t), 0, zfs_znode_cache_constructor,
5074bfe8 178 zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
c96c36fa
BB
179
180 ASSERT(znode_hold_cache == NULL);
181 znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
182 sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
183 zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
34dc7c2f
BB
184}
185
186void
187zfs_znode_fini(void)
188{
34dc7c2f
BB
189 /*
190 * Cleanup zcache
191 */
192 if (znode_cache)
193 kmem_cache_destroy(znode_cache);
194 znode_cache = NULL;
c96c36fa
BB
195
196 if (znode_hold_cache)
197 kmem_cache_destroy(znode_hold_cache);
198 znode_hold_cache = NULL;
199}
200
201/*
202 * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
203 * serialize access to a znode and its SA buffer while the object is being
204 * created or destroyed. This kind of locking would normally reside in the
205 * znode itself but in this case that's impossible because the znode and SA
206 * buffer may not yet exist. Therefore the locking is handled externally
207 * with an array of mutexs and AVLs trees which contain per-object locks.
208 *
209 * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
210 * in to the correct AVL tree and finally the per-object lock is held. In
211 * zfs_znode_hold_exit() the process is reversed. The per-object lock is
212 * released, removed from the AVL tree and destroyed if there are no waiters.
213 *
214 * This scheme has two important properties:
215 *
216 * 1) No memory allocations are performed while holding one of the z_hold_locks.
217 * This ensures evict(), which can be called from direct memory reclaim, will
218 * never block waiting on a z_hold_locks which just happens to have hashed
219 * to the same index.
220 *
221 * 2) All locks used to serialize access to an object are per-object and never
222 * shared. This minimizes lock contention without creating a large number
223 * of dedicated locks.
224 *
225 * On the downside it does require znode_lock_t structures to be frequently
226 * allocated and freed. However, because these are backed by a kmem cache
227 * and very short lived this cost is minimal.
228 */
229int
230zfs_znode_hold_compare(const void *a, const void *b)
231{
ee36c709
GN
232 const znode_hold_t *zh_a = (const znode_hold_t *)a;
233 const znode_hold_t *zh_b = (const znode_hold_t *)b;
234
235 return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj));
c96c36fa
BB
236}
237
238boolean_t
239zfs_znode_held(zfs_sb_t *zsb, uint64_t obj)
240{
241 znode_hold_t *zh, search;
242 int i = ZFS_OBJ_HASH(zsb, obj);
37c56346 243 boolean_t held;
c96c36fa
BB
244
245 search.zh_obj = obj;
246
247 mutex_enter(&zsb->z_hold_locks[i]);
248 zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
37c56346 249 held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
c96c36fa
BB
250 mutex_exit(&zsb->z_hold_locks[i]);
251
37c56346 252 return (held);
c96c36fa
BB
253}
254
255static znode_hold_t *
256zfs_znode_hold_enter(zfs_sb_t *zsb, uint64_t obj)
257{
258 znode_hold_t *zh, *zh_new, search;
259 int i = ZFS_OBJ_HASH(zsb, obj);
260 boolean_t found = B_FALSE;
261
262 zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
263 zh_new->zh_obj = obj;
264 search.zh_obj = obj;
265
266 mutex_enter(&zsb->z_hold_locks[i]);
267 zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
268 if (likely(zh == NULL)) {
269 zh = zh_new;
270 avl_add(&zsb->z_hold_trees[i], zh);
271 } else {
272 ASSERT3U(zh->zh_obj, ==, obj);
273 found = B_TRUE;
274 }
275 refcount_add(&zh->zh_refcount, NULL);
276 mutex_exit(&zsb->z_hold_locks[i]);
277
278 if (found == B_TRUE)
279 kmem_cache_free(znode_hold_cache, zh_new);
280
281 ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
282 ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
283 mutex_enter(&zh->zh_lock);
284
285 return (zh);
286}
287
288static void
289zfs_znode_hold_exit(zfs_sb_t *zsb, znode_hold_t *zh)
290{
291 int i = ZFS_OBJ_HASH(zsb, zh->zh_obj);
292 boolean_t remove = B_FALSE;
293
294 ASSERT(zfs_znode_held(zsb, zh->zh_obj));
295 ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
296 mutex_exit(&zh->zh_lock);
297
298 mutex_enter(&zsb->z_hold_locks[i]);
299 if (refcount_remove(&zh->zh_refcount, NULL) == 0) {
300 avl_remove(&zsb->z_hold_trees[i], zh);
301 remove = B_TRUE;
302 }
303 mutex_exit(&zsb->z_hold_locks[i]);
304
305 if (remove == B_TRUE)
306 kmem_cache_free(znode_hold_cache, zh);
34dc7c2f
BB
307}
308
34dc7c2f 309int
3558fd73 310zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
34dc7c2f 311{
3c9609b3 312#ifdef HAVE_SMB_SHARE
9babb374
BB
313 zfs_acl_ids_t acl_ids;
314 vattr_t vattr;
315 znode_t *sharezp;
316 vnode_t *vp;
317 znode_t *zp;
318 int error;
34dc7c2f 319
9babb374 320 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
3558fd73 321 vattr.va_mode = S_IFDIR | 0555;
9babb374
BB
322 vattr.va_uid = crgetuid(kcred);
323 vattr.va_gid = crgetgid(kcred);
34dc7c2f 324
79c76d5b 325 sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
572e2857 326 sharezp->z_moved = 0;
9babb374
BB
327 sharezp->z_unlinked = 0;
328 sharezp->z_atime_dirty = 0;
329 sharezp->z_zfsvfs = zfsvfs;
428870ff 330 sharezp->z_is_sa = zfsvfs->z_use_sa;
34dc7c2f 331
9babb374
BB
332 vp = ZTOV(sharezp);
333 vn_reinit(vp);
334 vp->v_type = VDIR;
34dc7c2f 335
9babb374
BB
336 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
337 kcred, NULL, &acl_ids));
428870ff 338 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
9babb374
BB
339 ASSERT3P(zp, ==, sharezp);
340 ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
341 POINTER_INVALIDATE(&sharezp->z_zfsvfs);
342 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
343 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
344 zfsvfs->z_shares_dir = sharezp->z_id;
345
346 zfs_acl_ids_free(&acl_ids);
3558fd73 347 // ZTOV(sharezp)->v_count = 0;
428870ff 348 sa_handle_destroy(sharezp->z_sa_hdl);
9babb374 349 kmem_cache_free(znode_cache, sharezp);
34dc7c2f 350
9babb374 351 return (error);
9ee7fac5
BB
352#else
353 return (0);
3c9609b3 354#endif /* HAVE_SMB_SHARE */
34dc7c2f
BB
355}
356
34dc7c2f 357static void
3558fd73 358zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp,
428870ff 359 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
34dc7c2f 360{
c96c36fa 361 ASSERT(zfs_znode_held(zsb, zp->z_id));
34dc7c2f
BB
362
363 mutex_enter(&zp->z_lock);
364
428870ff
BB
365 ASSERT(zp->z_sa_hdl == NULL);
366 ASSERT(zp->z_acl_cached == NULL);
367 if (sa_hdl == NULL) {
3558fd73 368 VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, zp,
428870ff
BB
369 SA_HDL_SHARED, &zp->z_sa_hdl));
370 } else {
371 zp->z_sa_hdl = sa_hdl;
372 sa_set_userp(sa_hdl, zp);
373 }
34dc7c2f 374
428870ff 375 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
34dc7c2f 376
34dc7c2f 377 mutex_exit(&zp->z_lock);
34dc7c2f
BB
378}
379
380void
381zfs_znode_dmu_fini(znode_t *zp)
382{
c96c36fa 383 ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
3558fd73 384 RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
428870ff
BB
385
386 sa_handle_destroy(zp->z_sa_hdl);
387 zp->z_sa_hdl = NULL;
34dc7c2f
BB
388}
389
390/*
3558fd73
BB
391 * Called by new_inode() to allocate a new inode.
392 */
393int
394zfs_inode_alloc(struct super_block *sb, struct inode **ip)
395{
396 znode_t *zp;
397
79c76d5b 398 zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
3558fd73
BB
399 *ip = ZTOI(zp);
400
401 return (0);
402}
403
404/*
405 * Called in multiple places when an inode should be destroyed.
406 */
407void
408zfs_inode_destroy(struct inode *ip)
409{
410 znode_t *zp = ITOZ(ip);
411 zfs_sb_t *zsb = ZTOZSB(zp);
412
413 mutex_enter(&zsb->z_znodes_lock);
7b3e34ba
BB
414 if (list_link_active(&zp->z_link_node)) {
415 list_remove(&zsb->z_all_znodes, zp);
416 zsb->z_nr_znodes--;
417 }
3558fd73
BB
418 mutex_exit(&zsb->z_znodes_lock);
419
420 if (zp->z_acl_cached) {
421 zfs_acl_free(zp->z_acl_cached);
422 zp->z_acl_cached = NULL;
423 }
424
82a37189
BB
425 if (zp->z_xattr_cached) {
426 nvlist_free(zp->z_xattr_cached);
427 zp->z_xattr_cached = NULL;
428 }
429
3558fd73
BB
430 kmem_cache_free(znode_cache, zp);
431}
432
433static void
434zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
435{
aa6d8c10 436 uint64_t rdev = 0;
3558fd73
BB
437
438 switch (ip->i_mode & S_IFMT) {
439 case S_IFREG:
440 ip->i_op = &zpl_inode_operations;
441 ip->i_fop = &zpl_file_operations;
442 ip->i_mapping->a_ops = &zpl_address_space_operations;
443 break;
444
445 case S_IFDIR:
446 ip->i_op = &zpl_dir_inode_operations;
447 ip->i_fop = &zpl_dir_file_operations;
448 ITOZ(ip)->z_zn_prefetch = B_TRUE;
449 break;
450
451 case S_IFLNK:
452 ip->i_op = &zpl_symlink_inode_operations;
453 break;
454
aa6d8c10
NB
455 /*
456 * rdev is only stored in a SA only for device files.
457 */
3558fd73
BB
458 case S_IFCHR:
459 case S_IFBLK:
aecdc706 460 (void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb), &rdev,
53b1d979 461 sizeof (rdev));
aa6d8c10
NB
462 /*FALLTHROUGH*/
463 case S_IFIFO:
464 case S_IFSOCK:
3558fd73
BB
465 init_special_inode(ip, ip->i_mode, rdev);
466 ip->i_op = &zpl_special_inode_operations;
467 break;
468
469 default:
53b1d979
BB
470 zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
471 (u_longlong_t)ip->i_ino, ip->i_mode);
472
473 /* Assume the inode is a file and attempt to continue */
474 ip->i_mode = S_IFREG | 0644;
475 ip->i_op = &zpl_inode_operations;
476 ip->i_fop = &zpl_file_operations;
477 ip->i_mapping->a_ops = &zpl_address_space_operations;
478 break;
3558fd73
BB
479 }
480}
481
7bb1325f
CC
482void
483zfs_set_inode_flags(znode_t *zp, struct inode *ip)
484{
485 /*
486 * Linux and Solaris have different sets of file attributes, so we
487 * restrict this conversion to the intersection of the two.
488 */
a5248129
CC
489#ifdef HAVE_INODE_SET_FLAGS
490 unsigned int flags = 0;
491 if (zp->z_pflags & ZFS_IMMUTABLE)
492 flags |= S_IMMUTABLE;
493 if (zp->z_pflags & ZFS_APPENDONLY)
494 flags |= S_APPEND;
7bb1325f 495
a5248129
CC
496 inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND);
497#else
7bb1325f
CC
498 if (zp->z_pflags & ZFS_IMMUTABLE)
499 ip->i_flags |= S_IMMUTABLE;
500 else
501 ip->i_flags &= ~S_IMMUTABLE;
502
503 if (zp->z_pflags & ZFS_APPENDONLY)
504 ip->i_flags |= S_APPEND;
505 else
506 ip->i_flags &= ~S_APPEND;
a5248129 507#endif
7bb1325f
CC
508}
509
704cd075
CC
510/*
511 * Update the embedded inode given the znode. We should work toward
512 * eliminating this function as soon as possible by removing values
513 * which are duplicated between the znode and inode. If the generic
514 * inode has the correct field it should be used, and the ZFS code
515 * updated to access the inode. This can be done incrementally.
516 */
9f5f0019
NB
517void
518zfs_inode_update(znode_t *zp)
704cd075
CC
519{
520 zfs_sb_t *zsb;
521 struct inode *ip;
522 uint32_t blksize;
523 u_longlong_t i_blocks;
704cd075
CC
524
525 ASSERT(zp != NULL);
526 zsb = ZTOZSB(zp);
527 ip = ZTOI(zp);
528
529 /* Skip .zfs control nodes which do not exist on disk. */
530 if (zfsctl_is_node(ip))
531 return;
532
704cd075
CC
533 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
534
535 spin_lock(&ip->i_lock);
704cd075 536 ip->i_blocks = i_blocks;
704cd075
CC
537 i_size_write(ip, zp->z_size);
538 spin_unlock(&ip->i_lock);
539}
540
704cd075 541
3558fd73
BB
542/*
543 * Construct a znode+inode and initialize.
34dc7c2f
BB
544 *
545 * This does not do a call to dmu_set_user() that is
546 * up to the caller to do, in case you don't want to
547 * return the znode
548 */
549static znode_t *
3558fd73 550zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
31b6111f 551 dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
34dc7c2f
BB
552{
553 znode_t *zp;
3558fd73 554 struct inode *ip;
7f89ae6b 555 uint64_t mode;
428870ff 556 uint64_t parent;
278f2236 557 uint64_t tmp_gen;
dfbc8630 558 uint64_t links;
2c6abf15 559 uint64_t z_uid, z_gid;
9f5f0019
NB
560 uint64_t atime[2], mtime[2], ctime[2];
561 sa_bulk_attr_t bulk[11];
428870ff 562 int count = 0;
34dc7c2f 563
3558fd73 564 ASSERT(zsb != NULL);
34dc7c2f 565
3558fd73
BB
566 ip = new_inode(zsb->z_sb);
567 if (ip == NULL)
568 return (NULL);
7304b6e5 569
3558fd73 570 zp = ITOZ(ip);
34dc7c2f 571 ASSERT(zp->z_dirlocks == NULL);
ebe7e575
BB
572 ASSERT3P(zp->z_acl_cached, ==, NULL);
573 ASSERT3P(zp->z_xattr_cached, ==, NULL);
572e2857 574 zp->z_moved = 0;
428870ff 575 zp->z_sa_hdl = NULL;
34dc7c2f
BB
576 zp->z_unlinked = 0;
577 zp->z_atime_dirty = 0;
578 zp->z_mapcnt = 0;
34dc7c2f
BB
579 zp->z_id = db->db_object;
580 zp->z_blksz = blksz;
581 zp->z_seq = 0x7A4653;
582 zp->z_sync_cnt = 0;
ebe7e575
BB
583 zp->z_is_mapped = B_FALSE;
584 zp->z_is_ctldir = B_FALSE;
7b3e34ba 585 zp->z_is_stale = B_FALSE;
d88895a0
CC
586 zp->z_range_lock.zr_size = &zp->z_size;
587 zp->z_range_lock.zr_blksz = &zp->z_blksz;
588 zp->z_range_lock.zr_max_blksz = &ZTOZSB(zp)->z_max_blksz;
34dc7c2f 589
3558fd73
BB
590 zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
591
7f89ae6b 592 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
278f2236 593 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &tmp_gen, 8);
3558fd73 594 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8);
dfbc8630 595 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &links, 8);
3558fd73 596 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
428870ff 597 &zp->z_pflags, 8);
3558fd73 598 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
7304b6e5 599 &parent, 8);
2c6abf15
NB
600 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &z_uid, 8);
601 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &z_gid, 8);
9f5f0019
NB
602 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &atime, 16);
603 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
604 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
428870ff 605
02730c33 606 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0) {
428870ff
BB
607 if (hdl == NULL)
608 sa_handle_destroy(zp->z_sa_hdl);
07d63f0c 609 zp->z_sa_hdl = NULL;
3558fd73 610 goto error;
34dc7c2f 611 }
7304b6e5 612
12fa7f34 613 zp->z_mode = ip->i_mode = mode;
278f2236 614 ip->i_generation = (uint32_t)tmp_gen;
ba2fe6af 615 ip->i_blkbits = SPA_MINBLOCKSHIFT;
dfbc8630 616 set_nlink(ip, (uint32_t)links);
2c6abf15
NB
617 zfs_uid_write(ip, z_uid);
618 zfs_gid_write(ip, z_gid);
7bb1325f 619 zfs_set_inode_flags(zp, ip);
7f89ae6b 620
98701490
CC
621 /* Cache the xattr parent id */
622 if (zp->z_pflags & ZFS_XATTR)
623 zp->z_xattr_parent = parent;
624
9f5f0019
NB
625 ZFS_TIME_DECODE(&ip->i_atime, atime);
626 ZFS_TIME_DECODE(&ip->i_mtime, mtime);
627 ZFS_TIME_DECODE(&ip->i_ctime, ctime);
628
3558fd73 629 ip->i_ino = obj;
9f5f0019 630 zfs_inode_update(zp);
3558fd73
BB
631 zfs_inode_set_ops(zsb, ip);
632
7b3e34ba
BB
633 /*
634 * The only way insert_inode_locked() can fail is if the ip->i_ino
635 * number is already hashed for this super block. This can never
636 * happen because the inode numbers map 1:1 with the object numbers.
637 *
638 * The one exception is rolling back a mounted file system, but in
639 * this case all the active inode are unhashed during the rollback.
640 */
641 VERIFY3S(insert_inode_locked(ip), ==, 0);
c85b224f 642
3558fd73
BB
643 mutex_enter(&zsb->z_znodes_lock);
644 list_insert_tail(&zsb->z_all_znodes, zp);
ab26409d 645 zsb->z_nr_znodes++;
b128c09f 646 membar_producer();
3558fd73 647 mutex_exit(&zsb->z_znodes_lock);
b128c09f 648
3558fd73 649 unlock_new_inode(ip);
34dc7c2f 650 return (zp);
3558fd73
BB
651
652error:
3558fd73 653 iput(ip);
d1d7e268 654 return (NULL);
34dc7c2f
BB
655}
656
1e8db771
BB
657/*
658 * Safely mark an inode dirty. Inodes which are part of a read-only
659 * file system or snapshot may not be dirtied.
660 */
661void
662zfs_mark_inode_dirty(struct inode *ip)
663{
664 zfs_sb_t *zsb = ITOZSB(ip);
665
666 if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os))
667 return;
668
669 mark_inode_dirty(ip);
670}
671
428870ff
BB
672static uint64_t empty_xattr;
673static uint64_t pad[4];
674static zfs_acl_phys_t acl_phys;
34dc7c2f
BB
675/*
676 * Create a new DMU object to hold a zfs znode.
677 *
678 * IN: dzp - parent directory for new znode
679 * vap - file attributes for new znode
680 * tx - dmu transaction id for zap operations
681 * cr - credentials of caller
682 * flag - flags:
683 * IS_ROOT_NODE - new object will be root
684 * IS_XATTR - new object is an attribute
34dc7c2f
BB
685 * bonuslen - length of bonus buffer
686 * setaclp - File/Dir initial ACL
687 * fuidp - Tracks fuid allocation.
688 *
689 * OUT: zpp - allocated znode
690 *
691 */
692void
693zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
428870ff 694 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
34dc7c2f 695{
428870ff
BB
696 uint64_t crtime[2], atime[2], mtime[2], ctime[2];
697 uint64_t mode, size, links, parent, pflags;
572e2857 698 uint64_t dzp_pflags = 0;
428870ff 699 uint64_t rdev = 0;
3558fd73 700 zfs_sb_t *zsb = ZTOZSB(dzp);
428870ff 701 dmu_buf_t *db;
34dc7c2f
BB
702 timestruc_t now;
703 uint64_t gen, obj;
428870ff 704 int bonuslen;
50c957f7 705 int dnodesize;
428870ff
BB
706 sa_handle_t *sa_hdl;
707 dmu_object_type_t obj_type;
f30484af 708 sa_bulk_attr_t *sa_attrs;
428870ff
BB
709 int cnt = 0;
710 zfs_acl_locator_cb_t locate = { 0 };
c96c36fa 711 znode_hold_t *zh;
34dc7c2f 712
3558fd73 713 if (zsb->z_replay) {
34dc7c2f 714 obj = vap->va_nodeid;
34dc7c2f
BB
715 now = vap->va_ctime; /* see zfs_replay_create() */
716 gen = vap->va_nblocks; /* ditto */
50c957f7 717 dnodesize = vap->va_fsid; /* ditto */
34dc7c2f
BB
718 } else {
719 obj = 0;
720 gethrestime(&now);
721 gen = dmu_tx_get_txg(tx);
50c957f7 722 dnodesize = dmu_objset_dnodesize(zsb->z_os);
34dc7c2f
BB
723 }
724
50c957f7
NB
725 if (dnodesize == 0)
726 dnodesize = DNODE_MIN_SIZE;
727
3558fd73 728 obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
50c957f7 729
428870ff 730 bonuslen = (obj_type == DMU_OT_SA) ?
50c957f7 731 DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
428870ff 732
34dc7c2f
BB
733 /*
734 * Create a new DMU object.
735 */
736 /*
737 * There's currently no mechanism for pre-reading the blocks that will
572e2857 738 * be needed to allocate a new object, so we accept the small chance
34dc7c2f
BB
739 * that there will be an i/o error and we will fail one of the
740 * assertions below.
741 */
3558fd73
BB
742 if (S_ISDIR(vap->va_mode)) {
743 if (zsb->z_replay) {
50c957f7 744 VERIFY0(zap_create_claim_norm_dnsize(zsb->z_os, obj,
3558fd73 745 zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
50c957f7 746 obj_type, bonuslen, dnodesize, tx));
34dc7c2f 747 } else {
50c957f7 748 obj = zap_create_norm_dnsize(zsb->z_os,
3558fd73 749 zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
50c957f7 750 obj_type, bonuslen, dnodesize, tx);
34dc7c2f
BB
751 }
752 } else {
3558fd73 753 if (zsb->z_replay) {
50c957f7 754 VERIFY0(dmu_object_claim_dnsize(zsb->z_os, obj,
34dc7c2f 755 DMU_OT_PLAIN_FILE_CONTENTS, 0,
50c957f7 756 obj_type, bonuslen, dnodesize, tx));
34dc7c2f 757 } else {
50c957f7 758 obj = dmu_object_alloc_dnsize(zsb->z_os,
34dc7c2f 759 DMU_OT_PLAIN_FILE_CONTENTS, 0,
50c957f7 760 obj_type, bonuslen, dnodesize, tx);
34dc7c2f
BB
761 }
762 }
34dc7c2f 763
c96c36fa 764 zh = zfs_znode_hold_enter(zsb, obj);
3558fd73 765 VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));
34dc7c2f
BB
766
767 /*
768 * If this is the root, fix up the half-initialized parent pointer
769 * to reference the just-allocated physical data area.
770 */
771 if (flag & IS_ROOT_NODE) {
34dc7c2f 772 dzp->z_id = obj;
428870ff
BB
773 } else {
774 dzp_pflags = dzp->z_pflags;
34dc7c2f
BB
775 }
776
777 /*
778 * If parent is an xattr, so am I.
779 */
428870ff 780 if (dzp_pflags & ZFS_XATTR) {
34dc7c2f 781 flag |= IS_XATTR;
34dc7c2f
BB
782 }
783
3558fd73 784 if (zsb->z_use_fuids)
428870ff
BB
785 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
786 else
787 pflags = 0;
34dc7c2f 788
3558fd73 789 if (S_ISDIR(vap->va_mode)) {
428870ff 790 size = 2; /* contents ("." and "..") */
dfbc8630 791 links = 2;
428870ff 792 } else {
dfbc8630 793 size = 0;
ace1eae8 794 links = (flag & IS_TMPFILE) ? 0 : 1;
34dc7c2f
BB
795 }
796
aa6d8c10 797 if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
dc1d7665 798 rdev = vap->va_rdev;
428870ff
BB
799
800 parent = dzp->z_id;
801 mode = acl_ids->z_mode;
34dc7c2f 802 if (flag & IS_XATTR)
428870ff 803 pflags |= ZFS_XATTR;
34dc7c2f 804
428870ff
BB
805 /*
806 * No execs denied will be deterimed when zfs_mode_compute() is called.
807 */
808 pflags |= acl_ids->z_aclp->z_hints &
809 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
810 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
34dc7c2f 811
428870ff
BB
812 ZFS_TIME_ENCODE(&now, crtime);
813 ZFS_TIME_ENCODE(&now, ctime);
34dc7c2f 814
3558fd73 815 if (vap->va_mask & ATTR_ATIME) {
428870ff 816 ZFS_TIME_ENCODE(&vap->va_atime, atime);
34dc7c2f 817 } else {
428870ff 818 ZFS_TIME_ENCODE(&now, atime);
34dc7c2f
BB
819 }
820
3558fd73 821 if (vap->va_mask & ATTR_MTIME) {
428870ff
BB
822 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
823 } else {
824 ZFS_TIME_ENCODE(&now, mtime);
825 }
826
827 /* Now add in all of the "SA" attributes */
3558fd73 828 VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED,
428870ff
BB
829 &sa_hdl));
830
831 /*
832 * Setup the array of attributes to be replaced/set on the new file
833 *
834 * order for DMU_OT_ZNODE is critical since it needs to be constructed
835 * in the old znode_phys_t format. Don't change this ordering
836 */
79c76d5b 837 sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
428870ff
BB
838
839 if (obj_type == DMU_OT_ZNODE) {
3558fd73 840 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
428870ff 841 NULL, &atime, 16);
3558fd73 842 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
428870ff 843 NULL, &mtime, 16);
3558fd73 844 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
428870ff 845 NULL, &ctime, 16);
3558fd73 846 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
428870ff 847 NULL, &crtime, 16);
3558fd73 848 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
428870ff 849 NULL, &gen, 8);
3558fd73 850 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
428870ff 851 NULL, &mode, 8);
3558fd73 852 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
428870ff 853 NULL, &size, 8);
3558fd73 854 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
428870ff 855 NULL, &parent, 8);
34dc7c2f 856 } else {
3558fd73 857 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
428870ff 858 NULL, &mode, 8);
3558fd73 859 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
428870ff 860 NULL, &size, 8);
3558fd73 861 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
428870ff 862 NULL, &gen, 8);
3558fd73
BB
863 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb),
864 NULL, &acl_ids->z_fuid, 8);
865 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb),
866 NULL, &acl_ids->z_fgid, 8);
867 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
428870ff 868 NULL, &parent, 8);
3558fd73 869 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
428870ff 870 NULL, &pflags, 8);
3558fd73 871 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
428870ff 872 NULL, &atime, 16);
3558fd73 873 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
428870ff 874 NULL, &mtime, 16);
3558fd73 875 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
428870ff 876 NULL, &ctime, 16);
3558fd73 877 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
428870ff
BB
878 NULL, &crtime, 16);
879 }
880
3558fd73 881 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8);
428870ff
BB
882
883 if (obj_type == DMU_OT_ZNODE) {
3558fd73 884 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL,
428870ff 885 &empty_xattr, 8);
34dc7c2f 886 }
428870ff 887 if (obj_type == DMU_OT_ZNODE ||
aa6d8c10 888 (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
3558fd73 889 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
428870ff 890 NULL, &rdev, 8);
428870ff
BB
891 }
892 if (obj_type == DMU_OT_ZNODE) {
3558fd73 893 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
428870ff 894 NULL, &pflags, 8);
3558fd73 895 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL,
428870ff 896 &acl_ids->z_fuid, 8);
3558fd73 897 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL,
428870ff 898 &acl_ids->z_fgid, 8);
3558fd73 899 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad,
428870ff 900 sizeof (uint64_t) * 4);
3558fd73 901 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL,
428870ff
BB
902 &acl_phys, sizeof (zfs_acl_phys_t));
903 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
3558fd73 904 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL,
428870ff
BB
905 &acl_ids->z_aclp->z_acl_count, 8);
906 locate.cb_aclp = acl_ids->z_aclp;
3558fd73 907 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb),
428870ff
BB
908 zfs_acl_data_locator, &locate,
909 acl_ids->z_aclp->z_acl_bytes);
910 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
911 acl_ids->z_fuid, acl_ids->z_fgid);
912 }
913
914 VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
34dc7c2f 915
34dc7c2f 916 if (!(flag & IS_ROOT_NODE)) {
31b6111f 917 *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl);
7b3e34ba
BB
918 VERIFY(*zpp != NULL);
919 VERIFY(dzp != NULL);
34dc7c2f
BB
920 } else {
921 /*
922 * If we are creating the root node, the "parent" we
923 * passed in is the znode for the root.
924 */
925 *zpp = dzp;
428870ff
BB
926
927 (*zpp)->z_sa_hdl = sa_hdl;
34dc7c2f 928 }
428870ff
BB
929
930 (*zpp)->z_pflags = pflags;
12fa7f34 931 (*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
50c957f7 932 (*zpp)->z_dnodesize = dnodesize;
428870ff 933
428870ff
BB
934 if (obj_type == DMU_OT_ZNODE ||
935 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
b0bc7a84 936 VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
428870ff 937 }
d1d7e268 938 kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
c96c36fa 939 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
940}
941
5484965a 942/*
d3cc8b15
WA
943 * Update in-core attributes. It is assumed the caller will be doing an
944 * sa_bulk_update to push the changes out.
5484965a
BB
945 */
946void
947zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
948{
949 xoptattr_t *xoap;
7bb1325f 950 boolean_t update_inode = B_FALSE;
5484965a
BB
951
952 xoap = xva_getxoptattr(xvap);
953 ASSERT(xoap);
954
955 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
956 uint64_t times[2];
957 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
958 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
959 &times, sizeof (times), tx);
960 XVA_SET_RTN(xvap, XAT_CREATETIME);
961 }
962 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
963 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
964 zp->z_pflags, tx);
965 XVA_SET_RTN(xvap, XAT_READONLY);
966 }
967 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
968 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
969 zp->z_pflags, tx);
970 XVA_SET_RTN(xvap, XAT_HIDDEN);
971 }
972 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
973 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
974 zp->z_pflags, tx);
975 XVA_SET_RTN(xvap, XAT_SYSTEM);
976 }
977 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
978 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
979 zp->z_pflags, tx);
980 XVA_SET_RTN(xvap, XAT_ARCHIVE);
981 }
982 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
983 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
984 zp->z_pflags, tx);
985 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
64c688d7 986
7bb1325f 987 update_inode = B_TRUE;
5484965a
BB
988 }
989 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
990 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
991 zp->z_pflags, tx);
992 XVA_SET_RTN(xvap, XAT_NOUNLINK);
993 }
994 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
995 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
996 zp->z_pflags, tx);
997 XVA_SET_RTN(xvap, XAT_APPENDONLY);
64c688d7 998
7bb1325f 999 update_inode = B_TRUE;
5484965a
BB
1000 }
1001 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1002 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
1003 zp->z_pflags, tx);
1004 XVA_SET_RTN(xvap, XAT_NODUMP);
1005 }
1006 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1007 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
1008 zp->z_pflags, tx);
1009 XVA_SET_RTN(xvap, XAT_OPAQUE);
1010 }
1011 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1012 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
1013 xoap->xoa_av_quarantined, zp->z_pflags, tx);
1014 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1015 }
1016 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1017 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
1018 zp->z_pflags, tx);
1019 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1020 }
1021 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
1022 zfs_sa_set_scanstamp(zp, xvap, tx);
1023 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
1024 }
1025 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1026 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
1027 zp->z_pflags, tx);
1028 XVA_SET_RTN(xvap, XAT_REPARSE);
1029 }
1030 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1031 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
1032 zp->z_pflags, tx);
1033 XVA_SET_RTN(xvap, XAT_OFFLINE);
1034 }
1035 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1036 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
1037 zp->z_pflags, tx);
1038 XVA_SET_RTN(xvap, XAT_SPARSE);
1039 }
7bb1325f
CC
1040
1041 if (update_inode)
1042 zfs_set_inode_flags(zp, ZTOI(zp));
5484965a
BB
1043}
1044
34dc7c2f 1045int
3558fd73 1046zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
34dc7c2f
BB
1047{
1048 dmu_object_info_t doi;
1049 dmu_buf_t *db;
1050 znode_t *zp;
c96c36fa 1051 znode_hold_t *zh;
34dc7c2f 1052 int err;
428870ff 1053 sa_handle_t *hdl;
34dc7c2f
BB
1054
1055 *zpp = NULL;
1056
6f9548c4 1057again:
c96c36fa 1058 zh = zfs_znode_hold_enter(zsb, obj_num);
34dc7c2f 1059
3558fd73 1060 err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
34dc7c2f 1061 if (err) {
c96c36fa 1062 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1063 return (err);
1064 }
1065
1066 dmu_object_info_from_db(db, &doi);
428870ff
BB
1067 if (doi.doi_bonus_type != DMU_OT_SA &&
1068 (doi.doi_bonus_type != DMU_OT_ZNODE ||
1069 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1070 doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1071 sa_buf_rele(db, NULL);
c96c36fa 1072 zfs_znode_hold_exit(zsb, zh);
2e528b49 1073 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1074 }
1075
428870ff
BB
1076 hdl = dmu_buf_get_user(db);
1077 if (hdl != NULL) {
36df2843 1078 zp = sa_get_userdata(hdl);
34dc7c2f 1079
8ac67298 1080
34dc7c2f 1081 /*
428870ff
BB
1082 * Since "SA" does immediate eviction we
1083 * should never find a sa handle that doesn't
1084 * know about the znode.
34dc7c2f 1085 */
428870ff
BB
1086
1087 ASSERT3P(zp, !=, NULL);
1088
1089 mutex_enter(&zp->z_lock);
34dc7c2f 1090 ASSERT3U(zp->z_id, ==, obj_num);
98701490
CC
1091 /*
1092 * If igrab() returns NULL the VFS has independently
1093 * determined the inode should be evicted and has
1094 * called iput_final() to start the eviction process.
1095 * The SA handle is still valid but because the VFS
1096 * requires that the eviction succeed we must drop
1097 * our locks and references to allow the eviction to
1098 * complete. The zfs_zget() may then be retried.
1099 *
1100 * This unlikely case could be optimized by registering
1101 * a sops->drop_inode() callback. The callback would
1102 * need to detect the active SA hold thereby informing
1103 * the VFS that this inode should not be evicted.
1104 */
1105 if (igrab(ZTOI(zp)) == NULL) {
1106 mutex_exit(&zp->z_lock);
1107 sa_buf_rele(db, NULL);
1108 zfs_znode_hold_exit(zsb, zh);
1109 /* inode might need this to finish evict */
1110 cond_resched();
1111 goto again;
34dc7c2f 1112 }
98701490
CC
1113 *zpp = zp;
1114 err = 0;
34dc7c2f 1115 mutex_exit(&zp->z_lock);
f3ad9cd6 1116 sa_buf_rele(db, NULL);
c96c36fa 1117 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1118 return (err);
1119 }
1120
1121 /*
3558fd73 1122 * Not found create new znode/vnode but only if file exists.
428870ff
BB
1123 *
1124 * There is a small window where zfs_vget() could
1125 * find this object while a file create is still in
1126 * progress. This is checked for in zfs_znode_alloc()
1127 *
1128 * if zfs_znode_alloc() fails it will drop the hold on the
1129 * bonus buffer.
34dc7c2f 1130 */
3558fd73 1131 zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
31b6111f 1132 doi.doi_bonus_type, obj_num, NULL);
428870ff 1133 if (zp == NULL) {
2e528b49 1134 err = SET_ERROR(ENOENT);
428870ff
BB
1135 } else {
1136 *zpp = zp;
1137 }
c96c36fa 1138 zfs_znode_hold_exit(zsb, zh);
428870ff 1139 return (err);
34dc7c2f
BB
1140}
1141
1142int
1143zfs_rezget(znode_t *zp)
1144{
3558fd73 1145 zfs_sb_t *zsb = ZTOZSB(zp);
34dc7c2f
BB
1146 dmu_object_info_t doi;
1147 dmu_buf_t *db;
1148 uint64_t obj_num = zp->z_id;
428870ff 1149 uint64_t mode;
dfbc8630 1150 uint64_t links;
9f5f0019 1151 sa_bulk_attr_t bulk[10];
34dc7c2f 1152 int err;
428870ff
BB
1153 int count = 0;
1154 uint64_t gen;
2c6abf15 1155 uint64_t z_uid, z_gid;
9f5f0019 1156 uint64_t atime[2], mtime[2], ctime[2];
c96c36fa 1157 znode_hold_t *zh;
34dc7c2f 1158
cbecb4fb
CC
1159 /*
1160 * skip ctldir, otherwise they will always get invalidated. This will
1161 * cause funny behaviour for the mounted snapdirs. Especially for
1162 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
1163 * anyone automount it again as long as someone is still using the
1164 * detached mount.
1165 */
1166 if (zp->z_is_ctldir)
1167 return (0);
1168
c96c36fa 1169 zh = zfs_znode_hold_enter(zsb, obj_num);
34dc7c2f 1170
428870ff
BB
1171 mutex_enter(&zp->z_acl_lock);
1172 if (zp->z_acl_cached) {
1173 zfs_acl_free(zp->z_acl_cached);
1174 zp->z_acl_cached = NULL;
1175 }
428870ff 1176 mutex_exit(&zp->z_acl_lock);
7b3e34ba 1177
228b461b 1178 rw_enter(&zp->z_xattr_lock, RW_WRITER);
7b3e34ba
BB
1179 if (zp->z_xattr_cached) {
1180 nvlist_free(zp->z_xattr_cached);
1181 zp->z_xattr_cached = NULL;
1182 }
7b3e34ba
BB
1183 rw_exit(&zp->z_xattr_lock);
1184
428870ff 1185 ASSERT(zp->z_sa_hdl == NULL);
3558fd73 1186 err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
34dc7c2f 1187 if (err) {
c96c36fa 1188 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1189 return (err);
1190 }
1191
1192 dmu_object_info_from_db(db, &doi);
428870ff
BB
1193 if (doi.doi_bonus_type != DMU_OT_SA &&
1194 (doi.doi_bonus_type != DMU_OT_ZNODE ||
1195 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1196 doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1197 sa_buf_rele(db, NULL);
c96c36fa 1198 zfs_znode_hold_exit(zsb, zh);
2e528b49 1199 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1200 }
1201
3558fd73 1202 zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);
428870ff
BB
1203
1204 /* reload cached values */
3558fd73 1205 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL,
428870ff 1206 &gen, sizeof (gen));
3558fd73 1207 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
428870ff 1208 &zp->z_size, sizeof (zp->z_size));
3558fd73 1209 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
dfbc8630 1210 &links, sizeof (links));
3558fd73 1211 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
428870ff 1212 &zp->z_pflags, sizeof (zp->z_pflags));
3558fd73 1213 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
2c6abf15 1214 &z_uid, sizeof (z_uid));
3558fd73 1215 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL,
2c6abf15 1216 &z_gid, sizeof (z_gid));
3558fd73 1217 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
428870ff 1218 &mode, sizeof (mode));
9f5f0019
NB
1219 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
1220 &atime, 16);
1221 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
1222 &mtime, 16);
1223 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
1224 &ctime, 16);
428870ff 1225
428870ff
BB
1226 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
1227 zfs_znode_dmu_fini(zp);
c96c36fa 1228 zfs_znode_hold_exit(zsb, zh);
2e528b49 1229 return (SET_ERROR(EIO));
428870ff
BB
1230 }
1231
12fa7f34 1232 zp->z_mode = ZTOI(zp)->i_mode = mode;
2c6abf15
NB
1233 zfs_uid_write(ZTOI(zp), z_uid);
1234 zfs_gid_write(ZTOI(zp), z_gid);
572e2857 1235
9f5f0019
NB
1236 ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
1237 ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
1238 ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
1239
278f2236 1240 if (gen != ZTOI(zp)->i_generation) {
428870ff 1241 zfs_znode_dmu_fini(zp);
c96c36fa 1242 zfs_znode_hold_exit(zsb, zh);
2e528b49 1243 return (SET_ERROR(EIO));
34dc7c2f
BB
1244 }
1245
dfbc8630
CD
1246 zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
1247 set_nlink(ZTOI(zp), (uint32_t)links);
7bb1325f 1248 zfs_set_inode_flags(zp, ZTOI(zp));
dfbc8630 1249
34dc7c2f 1250 zp->z_blksz = doi.doi_data_block_size;
704cd075 1251 zp->z_atime_dirty = 0;
9f5f0019 1252 zfs_inode_update(zp);
34dc7c2f 1253
c96c36fa 1254 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1255
1256 return (0);
1257}
1258
1259void
1260zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1261{
3558fd73
BB
1262 zfs_sb_t *zsb = ZTOZSB(zp);
1263 objset_t *os = zsb->z_os;
34dc7c2f 1264 uint64_t obj = zp->z_id;
572e2857 1265 uint64_t acl_obj = zfs_external_acl(zp);
c96c36fa 1266 znode_hold_t *zh;
34dc7c2f 1267
c96c36fa 1268 zh = zfs_znode_hold_enter(zsb, obj);
572e2857
BB
1269 if (acl_obj) {
1270 VERIFY(!zp->z_is_sa);
b128c09f 1271 VERIFY(0 == dmu_object_free(os, acl_obj, tx));
572e2857 1272 }
b128c09f 1273 VERIFY(0 == dmu_object_free(os, obj, tx));
34dc7c2f 1274 zfs_znode_dmu_fini(zp);
c96c36fa 1275 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1276}
1277
1278void
1279zfs_zinactive(znode_t *zp)
1280{
3558fd73 1281 zfs_sb_t *zsb = ZTOZSB(zp);
34dc7c2f 1282 uint64_t z_id = zp->z_id;
c96c36fa 1283 znode_hold_t *zh;
34dc7c2f 1284
428870ff 1285 ASSERT(zp->z_sa_hdl);
34dc7c2f
BB
1286
1287 /*
d6bd8eaa 1288 * Don't allow a zfs_zget() while were trying to release this znode.
34dc7c2f 1289 */
c96c36fa 1290 zh = zfs_znode_hold_enter(zsb, z_id);
d6bd8eaa 1291
34dc7c2f 1292 mutex_enter(&zp->z_lock);
34dc7c2f
BB
1293
1294 /*
1295 * If this was the last reference to a file with no links,
1296 * remove the file from the file system.
1297 */
1298 if (zp->z_unlinked) {
1299 mutex_exit(&zp->z_lock);
c96c36fa 1300 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1301 zfs_rmnode(zp);
1302 return;
1303 }
428870ff 1304
34dc7c2f
BB
1305 mutex_exit(&zp->z_lock);
1306 zfs_znode_dmu_fini(zp);
d6bd8eaa 1307
c96c36fa 1308 zfs_znode_hold_exit(zsb, zh);
34dc7c2f
BB
1309}
1310
6d111134
TC
1311static inline int
1312zfs_compare_timespec(struct timespec *t1, struct timespec *t2)
1313{
1314 if (t1->tv_sec < t2->tv_sec)
1315 return (-1);
1316
1317 if (t1->tv_sec > t2->tv_sec)
1318 return (1);
1319
1320 return (t1->tv_nsec - t2->tv_nsec);
1321}
1322
6d111134
TC
1323/*
1324 * Prepare to update znode time stamps.
1325 *
1326 * IN: zp - znode requiring timestamp update
0df9673f 1327 * flag - ATTR_MTIME, ATTR_CTIME flags
6d111134 1328 *
0df9673f 1329 * OUT: zp - z_seq
6d111134
TC
1330 * mtime - new mtime
1331 * ctime - new ctime
1332 *
0df9673f
CC
1333 * Note: We don't update atime here, because we rely on Linux VFS to do
1334 * atime updating.
6d111134 1335 */
34dc7c2f 1336void
428870ff 1337zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
0df9673f 1338 uint64_t ctime[2])
34dc7c2f
BB
1339{
1340 timestruc_t now;
1341
34dc7c2f
BB
1342 gethrestime(&now);
1343
0df9673f 1344 zp->z_seq++;
34dc7c2f 1345
3558fd73 1346 if (flag & ATTR_MTIME) {
428870ff 1347 ZFS_TIME_ENCODE(&now, mtime);
9f5f0019 1348 ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
3558fd73 1349 if (ZTOZSB(zp)->z_use_fuids) {
428870ff
BB
1350 zp->z_pflags |= (ZFS_ARCHIVE |
1351 ZFS_AV_MODIFIED);
1352 }
34dc7c2f
BB
1353 }
1354
3558fd73 1355 if (flag & ATTR_CTIME) {
428870ff 1356 ZFS_TIME_ENCODE(&now, ctime);
9f5f0019 1357 ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
3558fd73 1358 if (ZTOZSB(zp)->z_use_fuids)
428870ff 1359 zp->z_pflags |= ZFS_ARCHIVE;
34dc7c2f
BB
1360 }
1361}
1362
34dc7c2f
BB
1363/*
1364 * Grow the block size for a file.
1365 *
1366 * IN: zp - znode of file to free data in.
1367 * size - requested block size
1368 * tx - open transaction.
1369 *
1370 * NOTE: this function assumes that the znode is write locked.
1371 */
1372void
1373zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1374{
1375 int error;
1376 u_longlong_t dummy;
1377
1378 if (size <= zp->z_blksz)
1379 return;
1380 /*
1381 * If the file size is already greater than the current blocksize,
1382 * we will not grow. If there is more than one block in a file,
1383 * the blocksize cannot change.
1384 */
428870ff 1385 if (zp->z_blksz && zp->z_size > zp->z_blksz)
34dc7c2f
BB
1386 return;
1387
3558fd73 1388 error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
34dc7c2f 1389 size, 0, tx);
428870ff 1390
34dc7c2f
BB
1391 if (error == ENOTSUP)
1392 return;
c99c9001 1393 ASSERT0(error);
34dc7c2f
BB
1394
1395 /* What blocksize did we actually get? */
428870ff 1396 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
34dc7c2f
BB
1397}
1398
34dc7c2f 1399/*
b128c09f 1400 * Increase the file length
34dc7c2f
BB
1401 *
1402 * IN: zp - znode of file to free data in.
b128c09f 1403 * end - new end-of-file
34dc7c2f 1404 *
19d55079 1405 * RETURN: 0 on success, error code on failure
34dc7c2f 1406 */
b128c09f
BB
1407static int
1408zfs_extend(znode_t *zp, uint64_t end)
34dc7c2f 1409{
3558fd73 1410 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f 1411 dmu_tx_t *tx;
34dc7c2f 1412 rl_t *rl;
b128c09f 1413 uint64_t newblksz;
34dc7c2f
BB
1414 int error;
1415
34dc7c2f 1416 /*
b128c09f 1417 * We will change zp_size, lock the whole file.
34dc7c2f 1418 */
d88895a0 1419 rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
34dc7c2f
BB
1420
1421 /*
1422 * Nothing to do if file already at desired length.
1423 */
428870ff 1424 if (end <= zp->z_size) {
34dc7c2f
BB
1425 zfs_range_unlock(rl);
1426 return (0);
1427 }
3558fd73 1428 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1429 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1430 zfs_sa_upgrade_txholds(tx, zp);
b128c09f 1431 if (end > zp->z_blksz &&
3558fd73 1432 (!ISP2(zp->z_blksz) || zp->z_blksz < zsb->z_max_blksz)) {
34dc7c2f
BB
1433 /*
1434 * We are growing the file past the current block size.
1435 */
3558fd73 1436 if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
f1512ee6
MA
1437 /*
1438 * File's blocksize is already larger than the
1439 * "recordsize" property. Only let it grow to
1440 * the next power of 2.
1441 */
34dc7c2f 1442 ASSERT(!ISP2(zp->z_blksz));
f1512ee6 1443 newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
34dc7c2f 1444 } else {
3558fd73 1445 newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
34dc7c2f 1446 }
b128c09f
BB
1447 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1448 } else {
1449 newblksz = 0;
34dc7c2f
BB
1450 }
1451
384f8a09 1452 error = dmu_tx_assign(tx, TXG_WAIT);
34dc7c2f 1453 if (error) {
34dc7c2f
BB
1454 dmu_tx_abort(tx);
1455 zfs_range_unlock(rl);
1456 return (error);
1457 }
1458
b128c09f
BB
1459 if (newblksz)
1460 zfs_grow_blocksize(zp, newblksz, tx);
34dc7c2f 1461
428870ff
BB
1462 zp->z_size = end;
1463
3558fd73 1464 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
428870ff 1465 &zp->z_size, sizeof (zp->z_size), tx));
34dc7c2f 1466
b128c09f 1467 zfs_range_unlock(rl);
34dc7c2f 1468
b128c09f 1469 dmu_tx_commit(tx);
34dc7c2f 1470
b128c09f
BB
1471 return (0);
1472}
1473
223df016
TC
1474/*
1475 * zfs_zero_partial_page - Modeled after update_pages() but
1476 * with different arguments and semantics for use by zfs_freesp().
1477 *
1478 * Zeroes a piece of a single page cache entry for zp at offset
1479 * start and length len.
1480 *
1481 * Caller must acquire a range lock on the file for the region
1482 * being zeroed in order that the ARC and page cache stay in sync.
1483 */
1484static void
1485zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
1486{
1487 struct address_space *mp = ZTOI(zp)->i_mapping;
1488 struct page *pp;
1489 int64_t off;
1490 void *pb;
1491
8b1899d3 1492 ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
223df016 1493
8b1899d3
BB
1494 off = start & (PAGE_SIZE - 1);
1495 start &= PAGE_MASK;
223df016 1496
8b1899d3 1497 pp = find_lock_page(mp, start >> PAGE_SHIFT);
223df016
TC
1498 if (pp) {
1499 if (mapping_writably_mapped(mp))
1500 flush_dcache_page(pp);
1501
1502 pb = kmap(pp);
1503 bzero(pb + off, len);
1504 kunmap(pp);
1505
1506 if (mapping_writably_mapped(mp))
1507 flush_dcache_page(pp);
1508
1509 mark_page_accessed(pp);
1510 SetPageUptodate(pp);
1511 ClearPageError(pp);
1512 unlock_page(pp);
8b1899d3 1513 put_page(pp);
223df016
TC
1514 }
1515}
1516
b128c09f
BB
1517/*
1518 * Free space in a file.
1519 *
1520 * IN: zp - znode of file to free data in.
1521 * off - start of section to free.
1522 * len - length of section to free.
1523 *
19d55079 1524 * RETURN: 0 on success, error code on failure
b128c09f
BB
1525 */
1526static int
1527zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1528{
3558fd73 1529 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f
BB
1530 rl_t *rl;
1531 int error;
1532
1533 /*
1534 * Lock the range being freed.
1535 */
d88895a0 1536 rl = zfs_range_lock(&zp->z_range_lock, off, len, RL_WRITER);
b128c09f
BB
1537
1538 /*
1539 * Nothing to do if file already at desired length.
1540 */
428870ff 1541 if (off >= zp->z_size) {
b128c09f
BB
1542 zfs_range_unlock(rl);
1543 return (0);
34dc7c2f
BB
1544 }
1545
428870ff
BB
1546 if (off + len > zp->z_size)
1547 len = zp->z_size - off;
b128c09f 1548
3558fd73 1549 error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len);
b128c09f 1550
223df016
TC
1551 /*
1552 * Zero partial page cache entries. This must be done under a
1553 * range lock in order to keep the ARC and page cache in sync.
1554 */
1555 if (zp->z_is_mapped) {
1556 loff_t first_page, last_page, page_len;
1557 loff_t first_page_offset, last_page_offset;
1558
1559 /* first possible full page in hole */
8b1899d3 1560 first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
223df016 1561 /* last page of hole */
8b1899d3 1562 last_page = (off + len) >> PAGE_SHIFT;
223df016
TC
1563
1564 /* offset of first_page */
8b1899d3 1565 first_page_offset = first_page << PAGE_SHIFT;
223df016 1566 /* offset of last_page */
8b1899d3 1567 last_page_offset = last_page << PAGE_SHIFT;
223df016 1568
cb08f063
TC
1569 /* truncate whole pages */
1570 if (last_page_offset > first_page_offset) {
1571 truncate_inode_pages_range(ZTOI(zp)->i_mapping,
1572 first_page_offset, last_page_offset - 1);
1573 }
1574
1575 /* truncate sub-page ranges */
223df016
TC
1576 if (first_page > last_page) {
1577 /* entire punched area within a single page */
1578 zfs_zero_partial_page(zp, off, len);
1579 } else {
1580 /* beginning of punched area at the end of a page */
1581 page_len = first_page_offset - off;
1582 if (page_len > 0)
1583 zfs_zero_partial_page(zp, off, page_len);
1584
1585 /* end of punched area at the beginning of a page */
1586 page_len = off + len - last_page_offset;
1587 if (page_len > 0)
1588 zfs_zero_partial_page(zp, last_page_offset,
1589 page_len);
1590 }
1591 }
34dc7c2f
BB
1592 zfs_range_unlock(rl);
1593
b128c09f
BB
1594 return (error);
1595}
1596
1597/*
1598 * Truncate a file
1599 *
1600 * IN: zp - znode of file to free data in.
1601 * end - new end-of-file.
1602 *
19d55079 1603 * RETURN: 0 on success, error code on failure
b128c09f
BB
1604 */
1605static int
1606zfs_trunc(znode_t *zp, uint64_t end)
1607{
3558fd73 1608 zfs_sb_t *zsb = ZTOZSB(zp);
b128c09f
BB
1609 dmu_tx_t *tx;
1610 rl_t *rl;
1611 int error;
572e2857
BB
1612 sa_bulk_attr_t bulk[2];
1613 int count = 0;
b128c09f
BB
1614
1615 /*
1616 * We will change zp_size, lock the whole file.
1617 */
d88895a0 1618 rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
b128c09f
BB
1619
1620 /*
1621 * Nothing to do if file already at desired length.
1622 */
428870ff 1623 if (end >= zp->z_size) {
b128c09f
BB
1624 zfs_range_unlock(rl);
1625 return (0);
1626 }
1627
3558fd73 1628 error = dmu_free_long_range(zsb->z_os, zp->z_id, end, -1);
b128c09f
BB
1629 if (error) {
1630 zfs_range_unlock(rl);
1631 return (error);
1632 }
3558fd73 1633 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1634 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1635 zfs_sa_upgrade_txholds(tx, zp);
19d55079 1636 dmu_tx_mark_netfree(tx);
7a8f0e80 1637 error = dmu_tx_assign(tx, TXG_WAIT);
b128c09f 1638 if (error) {
b128c09f
BB
1639 dmu_tx_abort(tx);
1640 zfs_range_unlock(rl);
1641 return (error);
1642 }
b128c09f 1643
428870ff 1644 zp->z_size = end;
3558fd73 1645 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
572e2857 1646 NULL, &zp->z_size, sizeof (zp->z_size));
428870ff 1647
572e2857
BB
1648 if (end == 0) {
1649 zp->z_pflags &= ~ZFS_SPARSE;
3558fd73 1650 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
572e2857
BB
1651 NULL, &zp->z_pflags, 8);
1652 }
1653 VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
b128c09f 1654
34dc7c2f
BB
1655 dmu_tx_commit(tx);
1656
d164b209 1657 zfs_range_unlock(rl);
34dc7c2f
BB
1658
1659 return (0);
1660}
1661
b128c09f
BB
1662/*
1663 * Free space in a file
1664 *
1665 * IN: zp - znode of file to free data in.
1666 * off - start of range
1667 * len - end of range (0 => EOF)
1668 * flag - current file open mode flags.
1669 * log - TRUE if this action should be logged
1670 *
19d55079 1671 * RETURN: 0 on success, error code on failure
b128c09f
BB
1672 */
1673int
1674zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1675{
b128c09f 1676 dmu_tx_t *tx;
3558fd73
BB
1677 zfs_sb_t *zsb = ZTOZSB(zp);
1678 zilog_t *zilog = zsb->z_log;
428870ff
BB
1679 uint64_t mode;
1680 uint64_t mtime[2], ctime[2];
1681 sa_bulk_attr_t bulk[3];
1682 int count = 0;
b128c09f
BB
1683 int error;
1684
3558fd73 1685 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zsb), &mode,
428870ff
BB
1686 sizeof (mode))) != 0)
1687 return (error);
1688
1689 if (off > zp->z_size) {
b128c09f
BB
1690 error = zfs_extend(zp, off+len);
1691 if (error == 0 && log)
1692 goto log;
223df016 1693 goto out;
b128c09f
BB
1694 }
1695
b128c09f
BB
1696 if (len == 0) {
1697 error = zfs_trunc(zp, off);
1698 } else {
1699 if ((error = zfs_free_range(zp, off, len)) == 0 &&
428870ff 1700 off + len > zp->z_size)
b128c09f
BB
1701 error = zfs_extend(zp, off+len);
1702 }
1703 if (error || !log)
223df016 1704 goto out;
b128c09f 1705log:
3558fd73 1706 tx = dmu_tx_create(zsb->z_os);
428870ff
BB
1707 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1708 zfs_sa_upgrade_txholds(tx, zp);
384f8a09 1709 error = dmu_tx_assign(tx, TXG_WAIT);
b128c09f 1710 if (error) {
b128c09f 1711 dmu_tx_abort(tx);
223df016 1712 goto out;
b128c09f
BB
1713 }
1714
3558fd73
BB
1715 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16);
1716 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, 16);
1717 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
428870ff 1718 NULL, &zp->z_pflags, 8);
0df9673f 1719 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
428870ff
BB
1720 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1721 ASSERT(error == 0);
1722
b128c09f
BB
1723 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
1724
1725 dmu_tx_commit(tx);
223df016 1726
960e08fe 1727 zfs_inode_update(zp);
223df016
TC
1728 error = 0;
1729
1730out:
1731 /*
1732 * Truncate the page cache - for file truncate operations, use
1733 * the purpose-built API for truncations. For punching operations,
cb08f063 1734 * the truncation is handled under a range lock in zfs_free_range.
223df016
TC
1735 */
1736 if (len == 0)
1737 truncate_setsize(ZTOI(zp), off);
223df016 1738 return (error);
b128c09f
BB
1739}
1740
34dc7c2f
BB
1741void
1742zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1743{
22872ff5
BB
1744 struct super_block *sb;
1745 zfs_sb_t *zsb;
428870ff 1746 uint64_t moid, obj, sa_obj, version;
22872ff5 1747 uint64_t sense = ZFS_CASE_SENSITIVE;
34dc7c2f
BB
1748 uint64_t norm = 0;
1749 nvpair_t *elem;
c96c36fa 1750 int size;
34dc7c2f 1751 int error;
22872ff5
BB
1752 int i;
1753 znode_t *rootzp = NULL;
1754 vattr_t vattr;
1755 znode_t *zp;
1756 zfs_acl_ids_t acl_ids;
34dc7c2f
BB
1757
1758 /*
1759 * First attempt to create master node.
1760 */
1761 /*
1762 * In an empty objset, there are no blocks to read and thus
1763 * there can be no i/o errors (which we assert below).
1764 */
1765 moid = MASTER_NODE_OBJ;
1766 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
1767 DMU_OT_NONE, 0, tx);
1768 ASSERT(error == 0);
1769
50c957f7
NB
1770 /*
1771 * Give dmu_object_alloc() a hint about where to start
1772 * allocating new objects. Otherwise, since the metadnode's
1773 * dnode_phys_t structure isn't initialized yet, dmu_object_next()
1774 * would fail and we'd have to skip to the next dnode block.
1775 */
1776 os->os_obj_next = moid + 1;
1777
34dc7c2f
BB
1778 /*
1779 * Set starting attributes.
1780 */
428870ff 1781 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
34dc7c2f
BB
1782 elem = NULL;
1783 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
1784 /* For the moment we expect all zpl props to be uint64_ts */
1785 uint64_t val;
1786 char *name;
1787
1788 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
1789 VERIFY(nvpair_value_uint64(elem, &val) == 0);
1790 name = nvpair_name(elem);
1791 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
9babb374
BB
1792 if (val < version)
1793 version = val;
34dc7c2f
BB
1794 } else {
1795 error = zap_update(os, moid, name, 8, 1, &val, tx);
1796 }
1797 ASSERT(error == 0);
1798 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
1799 norm = val;
22872ff5
BB
1800 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
1801 sense = val;
34dc7c2f
BB
1802 }
1803 ASSERT(version != 0);
9babb374 1804 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
34dc7c2f 1805
428870ff
BB
1806 /*
1807 * Create zap object used for SA attribute registration
1808 */
1809
1810 if (version >= ZPL_VERSION_SA) {
1811 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
1812 DMU_OT_NONE, 0, tx);
1813 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
1814 ASSERT(error == 0);
1815 } else {
1816 sa_obj = 0;
1817 }
34dc7c2f
BB
1818 /*
1819 * Create a delete queue.
1820 */
9babb374 1821 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
34dc7c2f 1822
9babb374 1823 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
34dc7c2f
BB
1824 ASSERT(error == 0);
1825
9babb374 1826 /*
22872ff5
BB
1827 * Create root znode. Create minimal znode/inode/zsb/sb
1828 * to allow zfs_mknode to work.
9babb374 1829 */
22872ff5
BB
1830 vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
1831 vattr.va_mode = S_IFDIR|0755;
1832 vattr.va_uid = crgetuid(cr);
1833 vattr.va_gid = crgetgid(cr);
1834
79c76d5b 1835 rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
22872ff5
BB
1836 rootzp->z_moved = 0;
1837 rootzp->z_unlinked = 0;
1838 rootzp->z_atime_dirty = 0;
1839 rootzp->z_is_sa = USE_SA(version, os);
1840
79c76d5b 1841 zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
22872ff5
BB
1842 zsb->z_os = os;
1843 zsb->z_parent = zsb;
1844 zsb->z_version = version;
1845 zsb->z_use_fuids = USE_FUIDS(version, os);
1846 zsb->z_use_sa = USE_SA(version, os);
1847 zsb->z_norm = norm;
1848
79c76d5b 1849 sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
22872ff5
BB
1850 sb->s_fs_info = zsb;
1851
1852 ZTOI(rootzp)->i_sb = sb;
1853
1854 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
1855 &zsb->z_attr_table);
9babb374 1856
22872ff5 1857 ASSERT(error == 0);
9babb374 1858
60101509 1859 /*
22872ff5
BB
1860 * Fold case on file systems that are always or sometimes case
1861 * insensitive.
60101509 1862 */
22872ff5
BB
1863 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
1864 zsb->z_norm |= U8_TEXTPREP_TOUPPER;
60101509 1865
22872ff5
BB
1866 mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1867 list_create(&zsb->z_all_znodes, sizeof (znode_t),
1868 offsetof(znode_t, z_link_node));
60101509 1869
c96c36fa
BB
1870 size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
1871 zsb->z_hold_size = size;
1872 zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP);
1873 zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
1874 for (i = 0; i != size; i++) {
1875 avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare,
1876 sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
1877 mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
1878 }
60101509 1879
22872ff5
BB
1880 VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
1881 cr, NULL, &acl_ids));
1882 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
1883 ASSERT3P(zp, ==, rootzp);
1884 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
1885 ASSERT(error == 0);
1886 zfs_acl_ids_free(&acl_ids);
60101509 1887
22872ff5
BB
1888 atomic_set(&ZTOI(rootzp)->i_count, 0);
1889 sa_handle_destroy(rootzp->z_sa_hdl);
22872ff5
BB
1890 kmem_cache_free(znode_cache, rootzp);
1891
1892 /*
1893 * Create shares directory
1894 */
22872ff5 1895 error = zfs_create_share_dir(zsb, tx);
9babb374 1896 ASSERT(error == 0);
428870ff 1897
c96c36fa
BB
1898 for (i = 0; i != size; i++) {
1899 avl_destroy(&zsb->z_hold_trees[i]);
1900 mutex_destroy(&zsb->z_hold_locks[i]);
1901 }
2708f716 1902
c96c36fa
BB
1903 vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size);
1904 vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size);
2708f716
BB
1905 kmem_free(sb, sizeof (struct super_block));
1906 kmem_free(zsb, sizeof (zfs_sb_t));
34dc7c2f 1907}
34dc7c2f 1908#endif /* _KERNEL */
428870ff 1909
34dc7c2f 1910static int
572e2857
BB
1911zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
1912{
1913 uint64_t sa_obj = 0;
1914 int error;
1915
1916 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
1917 if (error != 0 && error != ENOENT)
1918 return (error);
1919
1920 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
1921 return (error);
1922}
1923
1924static int
1925zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
7b8518cb 1926 dmu_buf_t **db, void *tag)
34dc7c2f 1927{
34dc7c2f 1928 dmu_object_info_t doi;
34dc7c2f 1929 int error;
428870ff 1930
7b8518cb 1931 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
34dc7c2f
BB
1932 return (error);
1933
572e2857 1934 dmu_object_info_from_db(*db, &doi);
428870ff
BB
1935 if ((doi.doi_bonus_type != DMU_OT_SA &&
1936 doi.doi_bonus_type != DMU_OT_ZNODE) ||
d6320ddb
BB
1937 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1938 doi.doi_bonus_size < sizeof (znode_phys_t))) {
7b8518cb 1939 sa_buf_rele(*db, tag);
2e528b49 1940 return (SET_ERROR(ENOTSUP));
34dc7c2f
BB
1941 }
1942
572e2857
BB
1943 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
1944 if (error != 0) {
7b8518cb 1945 sa_buf_rele(*db, tag);
428870ff
BB
1946 return (error);
1947 }
1948
572e2857
BB
1949 return (0);
1950}
1951
1952void
7b8518cb 1953zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
572e2857
BB
1954{
1955 sa_handle_destroy(hdl);
7b8518cb 1956 sa_buf_rele(db, tag);
572e2857
BB
1957}
1958
1959/*
1960 * Given an object number, return its parent object number and whether
1961 * or not the object is an extended attribute directory.
1962 */
1963static int
b23ad7f3
JJ
1964zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
1965 uint64_t *pobjp, int *is_xattrdir)
572e2857
BB
1966{
1967 uint64_t parent;
1968 uint64_t pflags;
1969 uint64_t mode;
b23ad7f3 1970 uint64_t parent_mode;
572e2857 1971 sa_bulk_attr_t bulk[3];
b23ad7f3
JJ
1972 sa_handle_t *sa_hdl;
1973 dmu_buf_t *sa_db;
572e2857
BB
1974 int count = 0;
1975 int error;
1976
1977 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
1978 &parent, sizeof (parent));
428870ff 1979 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
572e2857 1980 &pflags, sizeof (pflags));
428870ff 1981 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
572e2857 1982 &mode, sizeof (mode));
428870ff 1983
572e2857 1984 if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
428870ff 1985 return (error);
572e2857 1986
b23ad7f3
JJ
1987 /*
1988 * When a link is removed its parent pointer is not changed and will
1989 * be invalid. There are two cases where a link is removed but the
1990 * file stays around, when it goes to the delete queue and when there
1991 * are additional links.
1992 */
1993 error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
1994 if (error != 0)
1995 return (error);
1996
1997 error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
1998 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
1999 if (error != 0)
2000 return (error);
2001
428870ff 2002 *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
34dc7c2f 2003
b23ad7f3
JJ
2004 /*
2005 * Extended attributes can be applied to files, directories, etc.
2006 * Otherwise the parent must be a directory.
2007 */
2008 if (!*is_xattrdir && !S_ISDIR(parent_mode))
2009 return (EINVAL);
2010
2011 *pobjp = parent;
2012
34dc7c2f
BB
2013 return (0);
2014}
2015
572e2857
BB
2016/*
2017 * Given an object number, return some zpl level statistics
2018 */
2019static int
2020zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
2021 zfs_stat_t *sb)
34dc7c2f 2022{
572e2857
BB
2023 sa_bulk_attr_t bulk[4];
2024 int count = 0;
2025
2026 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2027 &sb->zs_mode, sizeof (sb->zs_mode));
2028 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
2029 &sb->zs_gen, sizeof (sb->zs_gen));
2030 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
2031 &sb->zs_links, sizeof (sb->zs_links));
2032 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
2033 &sb->zs_ctime, sizeof (sb->zs_ctime));
2034
2035 return (sa_bulk_lookup(hdl, bulk, count));
2036}
2037
2038static int
2039zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
2040 sa_attr_type_t *sa_table, char *buf, int len)
2041{
2042 sa_handle_t *sa_hdl;
2043 sa_handle_t *prevhdl = NULL;
2044 dmu_buf_t *prevdb = NULL;
2045 dmu_buf_t *sa_db = NULL;
34dc7c2f
BB
2046 char *path = buf + len - 1;
2047 int error;
2048
2049 *path = '\0';
572e2857 2050 sa_hdl = hdl;
428870ff 2051
34dc7c2f 2052 for (;;) {
17897ce2 2053 uint64_t pobj = 0;
34dc7c2f
BB
2054 char component[MAXNAMELEN + 2];
2055 size_t complen;
17897ce2 2056 int is_xattrdir = 0;
34dc7c2f 2057
572e2857 2058 if (prevdb)
7b8518cb 2059 zfs_release_sa_handle(prevhdl, prevdb, FTAG);
572e2857 2060
b23ad7f3 2061 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
572e2857 2062 &is_xattrdir)) != 0)
34dc7c2f
BB
2063 break;
2064
2065 if (pobj == obj) {
2066 if (path[0] != '/')
2067 *--path = '/';
2068 break;
2069 }
2070
2071 component[0] = '/';
2072 if (is_xattrdir) {
2073 (void) sprintf(component + 1, "<xattrdir>");
2074 } else {
2075 error = zap_value_search(osp, pobj, obj,
2076 ZFS_DIRENT_OBJ(-1ULL), component + 1);
2077 if (error != 0)
2078 break;
2079 }
2080
2081 complen = strlen(component);
2082 path -= complen;
2083 ASSERT(path >= buf);
2084 bcopy(component, path, complen);
2085 obj = pobj;
572e2857
BB
2086
2087 if (sa_hdl != hdl) {
2088 prevhdl = sa_hdl;
2089 prevdb = sa_db;
2090 }
7b8518cb 2091 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
572e2857
BB
2092 if (error != 0) {
2093 sa_hdl = prevhdl;
2094 sa_db = prevdb;
2095 break;
2096 }
2097 }
2098
2099 if (sa_hdl != NULL && sa_hdl != hdl) {
2100 ASSERT(sa_db != NULL);
7b8518cb 2101 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
34dc7c2f
BB
2102 }
2103
2104 if (error == 0)
2105 (void) memmove(buf, path, buf + len - path);
428870ff 2106
34dc7c2f
BB
2107 return (error);
2108}
572e2857
BB
2109
2110int
2111zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
2112{
2113 sa_attr_type_t *sa_table;
2114 sa_handle_t *hdl;
2115 dmu_buf_t *db;
2116 int error;
2117
2118 error = zfs_sa_setup(osp, &sa_table);
2119 if (error != 0)
2120 return (error);
2121
7b8518cb 2122 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
572e2857
BB
2123 if (error != 0)
2124 return (error);
2125
2126 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2127
7b8518cb 2128 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2129 return (error);
2130}
2131
2132int
2133zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
2134 char *buf, int len)
2135{
2136 char *path = buf + len - 1;
2137 sa_attr_type_t *sa_table;
2138 sa_handle_t *hdl;
2139 dmu_buf_t *db;
2140 int error;
2141
2142 *path = '\0';
2143
2144 error = zfs_sa_setup(osp, &sa_table);
2145 if (error != 0)
2146 return (error);
2147
7b8518cb 2148 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
572e2857
BB
2149 if (error != 0)
2150 return (error);
2151
2152 error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
2153 if (error != 0) {
7b8518cb 2154 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2155 return (error);
2156 }
2157
2158 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2159
7b8518cb 2160 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2161 return (error);
2162}
c28b2279
BB
2163
2164#if defined(_KERNEL) && defined(HAVE_SPL)
2165EXPORT_SYMBOL(zfs_create_fs);
2166EXPORT_SYMBOL(zfs_obj_to_path);
0720116d 2167
02730c33 2168/* CSTYLED */
0720116d
BB
2169module_param(zfs_object_mutex_size, uint, 0644);
2170MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
c28b2279 2171#endif