]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_znode.c
Fix flake 8 style warnings
[mirror_zfs.git] / module / zfs / zfs_znode.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
19d55079 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
34dc7c2f
BB
24 */
25
26/* Portions Copyright 2007 Jeremy Teo */
27
34dc7c2f
BB
28#ifdef _KERNEL
29#include <sys/types.h>
30#include <sys/param.h>
31#include <sys/time.h>
34dc7c2f 32#include <sys/sysmacros.h>
34dc7c2f 33#include <sys/mntent.h>
34dc7c2f
BB
34#include <sys/u8_textprep.h>
35#include <sys/dsl_dataset.h>
36#include <sys/vfs.h>
34dc7c2f
BB
37#include <sys/vnode.h>
38#include <sys/file.h>
39#include <sys/kmem.h>
40#include <sys/errno.h>
34dc7c2f
BB
41#include <sys/mode.h>
42#include <sys/atomic.h>
34dc7c2f
BB
43#include <sys/zfs_dir.h>
44#include <sys/zfs_acl.h>
45#include <sys/zfs_ioctl.h>
46#include <sys/zfs_rlock.h>
47#include <sys/zfs_fuid.h>
3558fd73 48#include <sys/zfs_vnops.h>
ebe7e575 49#include <sys/zfs_ctldir.h>
428870ff 50#include <sys/dnode.h>
34dc7c2f 51#include <sys/fs/zfs.h>
3558fd73 52#include <sys/zpl.h>
34dc7c2f
BB
53#endif /* _KERNEL */
54
55#include <sys/dmu.h>
f1512ee6 56#include <sys/dmu_objset.h>
50c957f7 57#include <sys/dmu_tx.h>
34dc7c2f
BB
58#include <sys/refcount.h>
59#include <sys/stat.h>
60#include <sys/zap.h>
61#include <sys/zfs_znode.h>
428870ff
BB
62#include <sys/sa.h>
63#include <sys/zfs_sa.h>
572e2857 64#include <sys/zfs_stat.h>
34dc7c2f
BB
65
66#include "zfs_prop.h"
428870ff 67#include "zfs_comutil.h"
34dc7c2f 68
b128c09f
BB
69/*
70 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
71 * turned on when DEBUG is also defined.
72 */
73#ifdef DEBUG
74#define ZNODE_STATS
75#endif /* DEBUG */
76
77#ifdef ZNODE_STATS
78#define ZNODE_STAT_ADD(stat) ((stat)++)
79#else
80#define ZNODE_STAT_ADD(stat) /* nothing */
81#endif /* ZNODE_STATS */
82
34dc7c2f
BB
83/*
84 * Functions needed for userland (ie: libzpool) are not put under
85 * #ifdef_KERNEL; the rest of the functions have dependencies
86 * (such as VFS logic) that will not compile easily in userland.
87 */
88#ifdef _KERNEL
9babb374 89
b128c09f 90static kmem_cache_t *znode_cache = NULL;
c96c36fa 91static kmem_cache_t *znode_hold_cache = NULL;
0720116d 92unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
34dc7c2f 93
34dc7c2f
BB
94/*ARGSUSED*/
95static int
b128c09f 96zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
34dc7c2f
BB
97{
98 znode_t *zp = buf;
99
3558fd73 100 inode_init_once(ZTOI(zp));
b128c09f
BB
101 list_link_init(&zp->z_link_node);
102
34dc7c2f 103 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
34dc7c2f 104 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
448d7aaa 105 rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
34dc7c2f 106 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
82a37189 107 rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
34dc7c2f 108
d88895a0 109 zfs_rlock_init(&zp->z_range_lock);
34dc7c2f 110
b128c09f 111 zp->z_dirlocks = NULL;
45d1cae3 112 zp->z_acl_cached = NULL;
82a37189 113 zp->z_xattr_cached = NULL;
98701490 114 zp->z_xattr_parent = 0;
572e2857 115 zp->z_moved = 0;
34dc7c2f
BB
116 return (0);
117}
118
119/*ARGSUSED*/
120static void
b128c09f 121zfs_znode_cache_destructor(void *buf, void *arg)
34dc7c2f
BB
122{
123 znode_t *zp = buf;
124
b128c09f 125 ASSERT(!list_link_active(&zp->z_link_node));
34dc7c2f 126 mutex_destroy(&zp->z_lock);
34dc7c2f
BB
127 rw_destroy(&zp->z_parent_lock);
128 rw_destroy(&zp->z_name_lock);
129 mutex_destroy(&zp->z_acl_lock);
82a37189 130 rw_destroy(&zp->z_xattr_lock);
d88895a0 131 zfs_rlock_destroy(&zp->z_range_lock);
34dc7c2f 132
b128c09f 133 ASSERT(zp->z_dirlocks == NULL);
45d1cae3 134 ASSERT(zp->z_acl_cached == NULL);
82a37189 135 ASSERT(zp->z_xattr_cached == NULL);
b128c09f
BB
136}
137
c96c36fa
BB
138static int
139zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
140{
141 znode_hold_t *zh = buf;
142
143 mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
144 refcount_create(&zh->zh_refcount);
145 zh->zh_obj = ZFS_NO_OBJECT;
146
147 return (0);
148}
149
150static void
151zfs_znode_hold_cache_destructor(void *buf, void *arg)
152{
153 znode_hold_t *zh = buf;
154
155 mutex_destroy(&zh->zh_lock);
156 refcount_destroy(&zh->zh_refcount);
157}
158
34dc7c2f
BB
159void
160zfs_znode_init(void)
161{
162 /*
5074bfe8
TC
163 * Initialize zcache. The KMC_SLAB hint is used in order that it be
164 * backed by kmalloc() when on the Linux slab in order that any
165 * wait_on_bit() operations on the related inode operate properly.
34dc7c2f
BB
166 */
167 ASSERT(znode_cache == NULL);
168 znode_cache = kmem_cache_create("zfs_znode_cache",
169 sizeof (znode_t), 0, zfs_znode_cache_constructor,
5074bfe8 170 zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
c96c36fa
BB
171
172 ASSERT(znode_hold_cache == NULL);
173 znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
174 sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
175 zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
34dc7c2f
BB
176}
177
178void
179zfs_znode_fini(void)
180{
34dc7c2f
BB
181 /*
182 * Cleanup zcache
183 */
184 if (znode_cache)
185 kmem_cache_destroy(znode_cache);
186 znode_cache = NULL;
c96c36fa
BB
187
188 if (znode_hold_cache)
189 kmem_cache_destroy(znode_hold_cache);
190 znode_hold_cache = NULL;
191}
192
193/*
194 * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
195 * serialize access to a znode and its SA buffer while the object is being
196 * created or destroyed. This kind of locking would normally reside in the
197 * znode itself but in this case that's impossible because the znode and SA
198 * buffer may not yet exist. Therefore the locking is handled externally
199 * with an array of mutexs and AVLs trees which contain per-object locks.
200 *
201 * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
202 * in to the correct AVL tree and finally the per-object lock is held. In
203 * zfs_znode_hold_exit() the process is reversed. The per-object lock is
204 * released, removed from the AVL tree and destroyed if there are no waiters.
205 *
206 * This scheme has two important properties:
207 *
208 * 1) No memory allocations are performed while holding one of the z_hold_locks.
209 * This ensures evict(), which can be called from direct memory reclaim, will
210 * never block waiting on a z_hold_locks which just happens to have hashed
211 * to the same index.
212 *
213 * 2) All locks used to serialize access to an object are per-object and never
214 * shared. This minimizes lock contention without creating a large number
215 * of dedicated locks.
216 *
217 * On the downside it does require znode_lock_t structures to be frequently
218 * allocated and freed. However, because these are backed by a kmem cache
219 * and very short lived this cost is minimal.
220 */
221int
222zfs_znode_hold_compare(const void *a, const void *b)
223{
ee36c709
GN
224 const znode_hold_t *zh_a = (const znode_hold_t *)a;
225 const znode_hold_t *zh_b = (const znode_hold_t *)b;
226
227 return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj));
c96c36fa
BB
228}
229
230boolean_t
0037b49e 231zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
c96c36fa
BB
232{
233 znode_hold_t *zh, search;
0037b49e 234 int i = ZFS_OBJ_HASH(zfsvfs, obj);
37c56346 235 boolean_t held;
c96c36fa
BB
236
237 search.zh_obj = obj;
238
0037b49e
BB
239 mutex_enter(&zfsvfs->z_hold_locks[i]);
240 zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
37c56346 241 held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
0037b49e 242 mutex_exit(&zfsvfs->z_hold_locks[i]);
c96c36fa 243
37c56346 244 return (held);
c96c36fa
BB
245}
246
247static znode_hold_t *
0037b49e 248zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
c96c36fa
BB
249{
250 znode_hold_t *zh, *zh_new, search;
0037b49e 251 int i = ZFS_OBJ_HASH(zfsvfs, obj);
c96c36fa
BB
252 boolean_t found = B_FALSE;
253
254 zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
255 zh_new->zh_obj = obj;
256 search.zh_obj = obj;
257
0037b49e
BB
258 mutex_enter(&zfsvfs->z_hold_locks[i]);
259 zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
c96c36fa
BB
260 if (likely(zh == NULL)) {
261 zh = zh_new;
0037b49e 262 avl_add(&zfsvfs->z_hold_trees[i], zh);
c96c36fa
BB
263 } else {
264 ASSERT3U(zh->zh_obj, ==, obj);
265 found = B_TRUE;
266 }
c13060e4 267 zfs_refcount_add(&zh->zh_refcount, NULL);
0037b49e 268 mutex_exit(&zfsvfs->z_hold_locks[i]);
c96c36fa
BB
269
270 if (found == B_TRUE)
271 kmem_cache_free(znode_hold_cache, zh_new);
272
273 ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
274 ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
275 mutex_enter(&zh->zh_lock);
276
277 return (zh);
278}
279
280static void
0037b49e 281zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
c96c36fa 282{
0037b49e 283 int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
c96c36fa
BB
284 boolean_t remove = B_FALSE;
285
0037b49e 286 ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
c96c36fa
BB
287 ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
288 mutex_exit(&zh->zh_lock);
289
0037b49e 290 mutex_enter(&zfsvfs->z_hold_locks[i]);
c96c36fa 291 if (refcount_remove(&zh->zh_refcount, NULL) == 0) {
0037b49e 292 avl_remove(&zfsvfs->z_hold_trees[i], zh);
c96c36fa
BB
293 remove = B_TRUE;
294 }
0037b49e 295 mutex_exit(&zfsvfs->z_hold_locks[i]);
c96c36fa
BB
296
297 if (remove == B_TRUE)
298 kmem_cache_free(znode_hold_cache, zh);
34dc7c2f
BB
299}
300
34dc7c2f 301int
0037b49e 302zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
34dc7c2f 303{
3c9609b3 304#ifdef HAVE_SMB_SHARE
9babb374
BB
305 zfs_acl_ids_t acl_ids;
306 vattr_t vattr;
307 znode_t *sharezp;
308 vnode_t *vp;
309 znode_t *zp;
310 int error;
34dc7c2f 311
9babb374 312 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
3558fd73 313 vattr.va_mode = S_IFDIR | 0555;
9babb374
BB
314 vattr.va_uid = crgetuid(kcred);
315 vattr.va_gid = crgetgid(kcred);
34dc7c2f 316
79c76d5b 317 sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
572e2857 318 sharezp->z_moved = 0;
9babb374
BB
319 sharezp->z_unlinked = 0;
320 sharezp->z_atime_dirty = 0;
321 sharezp->z_zfsvfs = zfsvfs;
428870ff 322 sharezp->z_is_sa = zfsvfs->z_use_sa;
9c5167d1 323 sharezp->z_pflags = 0;
34dc7c2f 324
9babb374
BB
325 vp = ZTOV(sharezp);
326 vn_reinit(vp);
327 vp->v_type = VDIR;
34dc7c2f 328
9babb374
BB
329 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
330 kcred, NULL, &acl_ids));
428870ff 331 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
9babb374
BB
332 ASSERT3P(zp, ==, sharezp);
333 ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
334 POINTER_INVALIDATE(&sharezp->z_zfsvfs);
335 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
336 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
337 zfsvfs->z_shares_dir = sharezp->z_id;
338
339 zfs_acl_ids_free(&acl_ids);
3558fd73 340 // ZTOV(sharezp)->v_count = 0;
428870ff 341 sa_handle_destroy(sharezp->z_sa_hdl);
9babb374 342 kmem_cache_free(znode_cache, sharezp);
34dc7c2f 343
9babb374 344 return (error);
9ee7fac5
BB
345#else
346 return (0);
3c9609b3 347#endif /* HAVE_SMB_SHARE */
34dc7c2f
BB
348}
349
34dc7c2f 350static void
0037b49e 351zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
428870ff 352 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
34dc7c2f 353{
0037b49e 354 ASSERT(zfs_znode_held(zfsvfs, zp->z_id));
34dc7c2f
BB
355
356 mutex_enter(&zp->z_lock);
357
428870ff
BB
358 ASSERT(zp->z_sa_hdl == NULL);
359 ASSERT(zp->z_acl_cached == NULL);
360 if (sa_hdl == NULL) {
0037b49e 361 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
428870ff
BB
362 SA_HDL_SHARED, &zp->z_sa_hdl));
363 } else {
364 zp->z_sa_hdl = sa_hdl;
365 sa_set_userp(sa_hdl, zp);
366 }
34dc7c2f 367
428870ff 368 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
34dc7c2f 369
34dc7c2f 370 mutex_exit(&zp->z_lock);
34dc7c2f
BB
371}
372
373void
374zfs_znode_dmu_fini(znode_t *zp)
375{
c96c36fa 376 ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
3558fd73 377 RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
428870ff
BB
378
379 sa_handle_destroy(zp->z_sa_hdl);
380 zp->z_sa_hdl = NULL;
34dc7c2f
BB
381}
382
383/*
3558fd73
BB
384 * Called by new_inode() to allocate a new inode.
385 */
386int
387zfs_inode_alloc(struct super_block *sb, struct inode **ip)
388{
389 znode_t *zp;
390
79c76d5b 391 zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
3558fd73
BB
392 *ip = ZTOI(zp);
393
394 return (0);
395}
396
397/*
398 * Called in multiple places when an inode should be destroyed.
399 */
400void
401zfs_inode_destroy(struct inode *ip)
402{
403 znode_t *zp = ITOZ(ip);
0037b49e 404 zfsvfs_t *zfsvfs = ZTOZSB(zp);
3558fd73 405
0037b49e 406 mutex_enter(&zfsvfs->z_znodes_lock);
7b3e34ba 407 if (list_link_active(&zp->z_link_node)) {
0037b49e
BB
408 list_remove(&zfsvfs->z_all_znodes, zp);
409 zfsvfs->z_nr_znodes--;
7b3e34ba 410 }
0037b49e 411 mutex_exit(&zfsvfs->z_znodes_lock);
3558fd73
BB
412
413 if (zp->z_acl_cached) {
414 zfs_acl_free(zp->z_acl_cached);
415 zp->z_acl_cached = NULL;
416 }
417
82a37189
BB
418 if (zp->z_xattr_cached) {
419 nvlist_free(zp->z_xattr_cached);
420 zp->z_xattr_cached = NULL;
421 }
422
3558fd73
BB
423 kmem_cache_free(znode_cache, zp);
424}
425
426static void
0037b49e 427zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
3558fd73 428{
aa6d8c10 429 uint64_t rdev = 0;
3558fd73
BB
430
431 switch (ip->i_mode & S_IFMT) {
432 case S_IFREG:
433 ip->i_op = &zpl_inode_operations;
434 ip->i_fop = &zpl_file_operations;
435 ip->i_mapping->a_ops = &zpl_address_space_operations;
436 break;
437
438 case S_IFDIR:
439 ip->i_op = &zpl_dir_inode_operations;
440 ip->i_fop = &zpl_dir_file_operations;
441 ITOZ(ip)->z_zn_prefetch = B_TRUE;
442 break;
443
444 case S_IFLNK:
445 ip->i_op = &zpl_symlink_inode_operations;
446 break;
447
aa6d8c10
NB
448 /*
449 * rdev is only stored in a SA only for device files.
450 */
3558fd73
BB
451 case S_IFCHR:
452 case S_IFBLK:
0037b49e 453 (void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev,
53b1d979 454 sizeof (rdev));
aa6d8c10
NB
455 /*FALLTHROUGH*/
456 case S_IFIFO:
457 case S_IFSOCK:
3558fd73
BB
458 init_special_inode(ip, ip->i_mode, rdev);
459 ip->i_op = &zpl_special_inode_operations;
460 break;
461
462 default:
53b1d979
BB
463 zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
464 (u_longlong_t)ip->i_ino, ip->i_mode);
465
466 /* Assume the inode is a file and attempt to continue */
467 ip->i_mode = S_IFREG | 0644;
468 ip->i_op = &zpl_inode_operations;
469 ip->i_fop = &zpl_file_operations;
470 ip->i_mapping->a_ops = &zpl_address_space_operations;
471 break;
3558fd73
BB
472 }
473}
474
7bb1325f
CC
475void
476zfs_set_inode_flags(znode_t *zp, struct inode *ip)
477{
478 /*
479 * Linux and Solaris have different sets of file attributes, so we
480 * restrict this conversion to the intersection of the two.
481 */
a5248129
CC
482#ifdef HAVE_INODE_SET_FLAGS
483 unsigned int flags = 0;
484 if (zp->z_pflags & ZFS_IMMUTABLE)
485 flags |= S_IMMUTABLE;
486 if (zp->z_pflags & ZFS_APPENDONLY)
487 flags |= S_APPEND;
7bb1325f 488
a5248129
CC
489 inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND);
490#else
7bb1325f
CC
491 if (zp->z_pflags & ZFS_IMMUTABLE)
492 ip->i_flags |= S_IMMUTABLE;
493 else
494 ip->i_flags &= ~S_IMMUTABLE;
495
496 if (zp->z_pflags & ZFS_APPENDONLY)
497 ip->i_flags |= S_APPEND;
498 else
499 ip->i_flags &= ~S_APPEND;
a5248129 500#endif
7bb1325f
CC
501}
502
704cd075
CC
503/*
504 * Update the embedded inode given the znode. We should work toward
505 * eliminating this function as soon as possible by removing values
506 * which are duplicated between the znode and inode. If the generic
507 * inode has the correct field it should be used, and the ZFS code
508 * updated to access the inode. This can be done incrementally.
509 */
9f5f0019
NB
510void
511zfs_inode_update(znode_t *zp)
704cd075 512{
0037b49e 513 zfsvfs_t *zfsvfs;
704cd075
CC
514 struct inode *ip;
515 uint32_t blksize;
516 u_longlong_t i_blocks;
704cd075
CC
517
518 ASSERT(zp != NULL);
0037b49e 519 zfsvfs = ZTOZSB(zp);
704cd075
CC
520 ip = ZTOI(zp);
521
522 /* Skip .zfs control nodes which do not exist on disk. */
523 if (zfsctl_is_node(ip))
524 return;
525
704cd075
CC
526 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
527
528 spin_lock(&ip->i_lock);
704cd075 529 ip->i_blocks = i_blocks;
704cd075
CC
530 i_size_write(ip, zp->z_size);
531 spin_unlock(&ip->i_lock);
532}
533
704cd075 534
3558fd73
BB
535/*
536 * Construct a znode+inode and initialize.
34dc7c2f
BB
537 *
538 * This does not do a call to dmu_set_user() that is
539 * up to the caller to do, in case you don't want to
540 * return the znode
541 */
542static znode_t *
0037b49e 543zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
31b6111f 544 dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
34dc7c2f
BB
545{
546 znode_t *zp;
3558fd73 547 struct inode *ip;
7f89ae6b 548 uint64_t mode;
428870ff 549 uint64_t parent;
278f2236 550 uint64_t tmp_gen;
dfbc8630 551 uint64_t links;
2c6abf15 552 uint64_t z_uid, z_gid;
9f5f0019 553 uint64_t atime[2], mtime[2], ctime[2];
9c5167d1 554 uint64_t projid = ZFS_DEFAULT_PROJID;
9f5f0019 555 sa_bulk_attr_t bulk[11];
428870ff 556 int count = 0;
34dc7c2f 557
0037b49e 558 ASSERT(zfsvfs != NULL);
34dc7c2f 559
0037b49e 560 ip = new_inode(zfsvfs->z_sb);
3558fd73
BB
561 if (ip == NULL)
562 return (NULL);
7304b6e5 563
3558fd73 564 zp = ITOZ(ip);
34dc7c2f 565 ASSERT(zp->z_dirlocks == NULL);
ebe7e575
BB
566 ASSERT3P(zp->z_acl_cached, ==, NULL);
567 ASSERT3P(zp->z_xattr_cached, ==, NULL);
572e2857 568 zp->z_moved = 0;
428870ff 569 zp->z_sa_hdl = NULL;
34dc7c2f
BB
570 zp->z_unlinked = 0;
571 zp->z_atime_dirty = 0;
572 zp->z_mapcnt = 0;
34dc7c2f
BB
573 zp->z_id = db->db_object;
574 zp->z_blksz = blksz;
575 zp->z_seq = 0x7A4653;
576 zp->z_sync_cnt = 0;
ebe7e575
BB
577 zp->z_is_mapped = B_FALSE;
578 zp->z_is_ctldir = B_FALSE;
7b3e34ba 579 zp->z_is_stale = B_FALSE;
d88895a0
CC
580 zp->z_range_lock.zr_size = &zp->z_size;
581 zp->z_range_lock.zr_blksz = &zp->z_blksz;
582 zp->z_range_lock.zr_max_blksz = &ZTOZSB(zp)->z_max_blksz;
34dc7c2f 583
0037b49e 584 zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
3558fd73 585
0037b49e
BB
586 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
587 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
588 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
589 &zp->z_size, 8);
590 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
591 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
428870ff 592 &zp->z_pflags, 8);
0037b49e 593 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
7304b6e5 594 &parent, 8);
0037b49e
BB
595 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8);
596 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8);
597 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
598 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
599 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
428870ff 600
9c5167d1
NF
601 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 ||
602 (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
603 (zp->z_pflags & ZFS_PROJID) &&
604 sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
428870ff
BB
605 if (hdl == NULL)
606 sa_handle_destroy(zp->z_sa_hdl);
07d63f0c 607 zp->z_sa_hdl = NULL;
3558fd73 608 goto error;
34dc7c2f 609 }
7304b6e5 610
9c5167d1 611 zp->z_projid = projid;
12fa7f34 612 zp->z_mode = ip->i_mode = mode;
278f2236 613 ip->i_generation = (uint32_t)tmp_gen;
ba2fe6af 614 ip->i_blkbits = SPA_MINBLOCKSHIFT;
dfbc8630 615 set_nlink(ip, (uint32_t)links);
2c6abf15
NB
616 zfs_uid_write(ip, z_uid);
617 zfs_gid_write(ip, z_gid);
7bb1325f 618 zfs_set_inode_flags(zp, ip);
7f89ae6b 619
98701490
CC
620 /* Cache the xattr parent id */
621 if (zp->z_pflags & ZFS_XATTR)
622 zp->z_xattr_parent = parent;
623
9f5f0019
NB
624 ZFS_TIME_DECODE(&ip->i_atime, atime);
625 ZFS_TIME_DECODE(&ip->i_mtime, mtime);
626 ZFS_TIME_DECODE(&ip->i_ctime, ctime);
627
3558fd73 628 ip->i_ino = obj;
9f5f0019 629 zfs_inode_update(zp);
0037b49e 630 zfs_inode_set_ops(zfsvfs, ip);
3558fd73 631
7b3e34ba
BB
632 /*
633 * The only way insert_inode_locked() can fail is if the ip->i_ino
634 * number is already hashed for this super block. This can never
635 * happen because the inode numbers map 1:1 with the object numbers.
636 *
637 * The one exception is rolling back a mounted file system, but in
638 * this case all the active inode are unhashed during the rollback.
639 */
640 VERIFY3S(insert_inode_locked(ip), ==, 0);
c85b224f 641
0037b49e
BB
642 mutex_enter(&zfsvfs->z_znodes_lock);
643 list_insert_tail(&zfsvfs->z_all_znodes, zp);
644 zfsvfs->z_nr_znodes++;
b128c09f 645 membar_producer();
0037b49e 646 mutex_exit(&zfsvfs->z_znodes_lock);
b128c09f 647
3558fd73 648 unlock_new_inode(ip);
34dc7c2f 649 return (zp);
3558fd73
BB
650
651error:
3558fd73 652 iput(ip);
d1d7e268 653 return (NULL);
34dc7c2f
BB
654}
655
1e8db771
BB
656/*
657 * Safely mark an inode dirty. Inodes which are part of a read-only
658 * file system or snapshot may not be dirtied.
659 */
660void
661zfs_mark_inode_dirty(struct inode *ip)
662{
0037b49e 663 zfsvfs_t *zfsvfs = ITOZSB(ip);
1e8db771 664
0037b49e 665 if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
1e8db771
BB
666 return;
667
668 mark_inode_dirty(ip);
669}
670
428870ff
BB
671static uint64_t empty_xattr;
672static uint64_t pad[4];
673static zfs_acl_phys_t acl_phys;
34dc7c2f
BB
674/*
675 * Create a new DMU object to hold a zfs znode.
676 *
677 * IN: dzp - parent directory for new znode
678 * vap - file attributes for new znode
679 * tx - dmu transaction id for zap operations
680 * cr - credentials of caller
681 * flag - flags:
682 * IS_ROOT_NODE - new object will be root
683 * IS_XATTR - new object is an attribute
34dc7c2f
BB
684 * bonuslen - length of bonus buffer
685 * setaclp - File/Dir initial ACL
686 * fuidp - Tracks fuid allocation.
687 *
688 * OUT: zpp - allocated znode
689 *
690 */
691void
692zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
428870ff 693 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
34dc7c2f 694{
428870ff
BB
695 uint64_t crtime[2], atime[2], mtime[2], ctime[2];
696 uint64_t mode, size, links, parent, pflags;
9c5167d1 697 uint64_t projid = ZFS_DEFAULT_PROJID;
428870ff 698 uint64_t rdev = 0;
0037b49e 699 zfsvfs_t *zfsvfs = ZTOZSB(dzp);
428870ff 700 dmu_buf_t *db;
6413c95f 701 inode_timespec_t now;
34dc7c2f 702 uint64_t gen, obj;
428870ff 703 int bonuslen;
50c957f7 704 int dnodesize;
428870ff
BB
705 sa_handle_t *sa_hdl;
706 dmu_object_type_t obj_type;
f30484af 707 sa_bulk_attr_t *sa_attrs;
428870ff
BB
708 int cnt = 0;
709 zfs_acl_locator_cb_t locate = { 0 };
c96c36fa 710 znode_hold_t *zh;
34dc7c2f 711
0037b49e 712 if (zfsvfs->z_replay) {
34dc7c2f 713 obj = vap->va_nodeid;
34dc7c2f
BB
714 now = vap->va_ctime; /* see zfs_replay_create() */
715 gen = vap->va_nblocks; /* ditto */
50c957f7 716 dnodesize = vap->va_fsid; /* ditto */
34dc7c2f
BB
717 } else {
718 obj = 0;
719 gethrestime(&now);
720 gen = dmu_tx_get_txg(tx);
0037b49e 721 dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
34dc7c2f
BB
722 }
723
50c957f7
NB
724 if (dnodesize == 0)
725 dnodesize = DNODE_MIN_SIZE;
726
0037b49e 727 obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
50c957f7 728
428870ff 729 bonuslen = (obj_type == DMU_OT_SA) ?
50c957f7 730 DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
428870ff 731
34dc7c2f
BB
732 /*
733 * Create a new DMU object.
734 */
735 /*
736 * There's currently no mechanism for pre-reading the blocks that will
572e2857 737 * be needed to allocate a new object, so we accept the small chance
34dc7c2f
BB
738 * that there will be an i/o error and we will fail one of the
739 * assertions below.
740 */
3558fd73 741 if (S_ISDIR(vap->va_mode)) {
0037b49e
BB
742 if (zfsvfs->z_replay) {
743 VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
744 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
50c957f7 745 obj_type, bonuslen, dnodesize, tx));
34dc7c2f 746 } else {
0037b49e
BB
747 obj = zap_create_norm_dnsize(zfsvfs->z_os,
748 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
50c957f7 749 obj_type, bonuslen, dnodesize, tx);
34dc7c2f
BB
750 }
751 } else {
0037b49e
BB
752 if (zfsvfs->z_replay) {
753 VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
34dc7c2f 754 DMU_OT_PLAIN_FILE_CONTENTS, 0,
50c957f7 755 obj_type, bonuslen, dnodesize, tx));
34dc7c2f 756 } else {
0037b49e 757 obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
34dc7c2f 758 DMU_OT_PLAIN_FILE_CONTENTS, 0,
50c957f7 759 obj_type, bonuslen, dnodesize, tx);
34dc7c2f
BB
760 }
761 }
34dc7c2f 762
0037b49e 763 zh = zfs_znode_hold_enter(zfsvfs, obj);
9631681b 764 VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
34dc7c2f
BB
765
766 /*
767 * If this is the root, fix up the half-initialized parent pointer
768 * to reference the just-allocated physical data area.
769 */
770 if (flag & IS_ROOT_NODE) {
34dc7c2f
BB
771 dzp->z_id = obj;
772 }
773
774 /*
775 * If parent is an xattr, so am I.
776 */
9c5167d1 777 if (dzp->z_pflags & ZFS_XATTR) {
34dc7c2f 778 flag |= IS_XATTR;
34dc7c2f
BB
779 }
780
0037b49e 781 if (zfsvfs->z_use_fuids)
428870ff
BB
782 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
783 else
784 pflags = 0;
34dc7c2f 785
3558fd73 786 if (S_ISDIR(vap->va_mode)) {
428870ff 787 size = 2; /* contents ("." and "..") */
dfbc8630 788 links = 2;
428870ff 789 } else {
dfbc8630 790 size = 0;
ace1eae8 791 links = (flag & IS_TMPFILE) ? 0 : 1;
34dc7c2f
BB
792 }
793
aa6d8c10 794 if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
dc1d7665 795 rdev = vap->va_rdev;
428870ff
BB
796
797 parent = dzp->z_id;
798 mode = acl_ids->z_mode;
34dc7c2f 799 if (flag & IS_XATTR)
428870ff 800 pflags |= ZFS_XATTR;
34dc7c2f 801
9c5167d1
NF
802 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) {
803 /*
804 * With ZFS_PROJID flag, we can easily know whether there is
805 * project ID stored on disk or not. See zfs_space_delta_cb().
806 */
807 if (obj_type != DMU_OT_ZNODE &&
808 dmu_objset_projectquota_enabled(zfsvfs->z_os))
809 pflags |= ZFS_PROJID;
810
811 /*
812 * Inherit project ID from parent if required.
813 */
814 projid = zfs_inherit_projid(dzp);
815 if (dzp->z_pflags & ZFS_PROJINHERIT)
816 pflags |= ZFS_PROJINHERIT;
817 }
818
428870ff
BB
819 /*
820 * No execs denied will be deterimed when zfs_mode_compute() is called.
821 */
822 pflags |= acl_ids->z_aclp->z_hints &
823 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
824 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
34dc7c2f 825
428870ff
BB
826 ZFS_TIME_ENCODE(&now, crtime);
827 ZFS_TIME_ENCODE(&now, ctime);
34dc7c2f 828
3558fd73 829 if (vap->va_mask & ATTR_ATIME) {
428870ff 830 ZFS_TIME_ENCODE(&vap->va_atime, atime);
34dc7c2f 831 } else {
428870ff 832 ZFS_TIME_ENCODE(&now, atime);
34dc7c2f
BB
833 }
834
3558fd73 835 if (vap->va_mask & ATTR_MTIME) {
428870ff
BB
836 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
837 } else {
838 ZFS_TIME_ENCODE(&now, mtime);
839 }
840
841 /* Now add in all of the "SA" attributes */
0037b49e 842 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
428870ff
BB
843 &sa_hdl));
844
845 /*
846 * Setup the array of attributes to be replaced/set on the new file
847 *
848 * order for DMU_OT_ZNODE is critical since it needs to be constructed
849 * in the old znode_phys_t format. Don't change this ordering
850 */
79c76d5b 851 sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
428870ff
BB
852
853 if (obj_type == DMU_OT_ZNODE) {
0037b49e 854 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
428870ff 855 NULL, &atime, 16);
0037b49e 856 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
428870ff 857 NULL, &mtime, 16);
0037b49e 858 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
428870ff 859 NULL, &ctime, 16);
0037b49e 860 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
428870ff 861 NULL, &crtime, 16);
0037b49e 862 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
428870ff 863 NULL, &gen, 8);
0037b49e 864 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
428870ff 865 NULL, &mode, 8);
0037b49e 866 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
428870ff 867 NULL, &size, 8);
0037b49e 868 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
428870ff 869 NULL, &parent, 8);
34dc7c2f 870 } else {
0037b49e 871 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
428870ff 872 NULL, &mode, 8);
0037b49e 873 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
428870ff 874 NULL, &size, 8);
0037b49e 875 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
428870ff 876 NULL, &gen, 8);
0037b49e 877 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
3558fd73 878 NULL, &acl_ids->z_fuid, 8);
0037b49e 879 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
3558fd73 880 NULL, &acl_ids->z_fgid, 8);
0037b49e 881 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
428870ff 882 NULL, &parent, 8);
0037b49e 883 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
428870ff 884 NULL, &pflags, 8);
0037b49e 885 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
428870ff 886 NULL, &atime, 16);
0037b49e 887 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
428870ff 888 NULL, &mtime, 16);
0037b49e 889 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
428870ff 890 NULL, &ctime, 16);
0037b49e 891 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
428870ff
BB
892 NULL, &crtime, 16);
893 }
894
0037b49e 895 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
428870ff
BB
896
897 if (obj_type == DMU_OT_ZNODE) {
0037b49e 898 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
428870ff 899 &empty_xattr, 8);
9c5167d1
NF
900 } else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
901 pflags & ZFS_PROJID) {
902 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
903 NULL, &projid, 8);
34dc7c2f 904 }
428870ff 905 if (obj_type == DMU_OT_ZNODE ||
aa6d8c10 906 (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
0037b49e 907 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
428870ff 908 NULL, &rdev, 8);
428870ff
BB
909 }
910 if (obj_type == DMU_OT_ZNODE) {
0037b49e 911 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
428870ff 912 NULL, &pflags, 8);
0037b49e 913 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
428870ff 914 &acl_ids->z_fuid, 8);
0037b49e 915 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
428870ff 916 &acl_ids->z_fgid, 8);
0037b49e 917 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
428870ff 918 sizeof (uint64_t) * 4);
0037b49e 919 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
428870ff
BB
920 &acl_phys, sizeof (zfs_acl_phys_t));
921 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
0037b49e 922 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
428870ff
BB
923 &acl_ids->z_aclp->z_acl_count, 8);
924 locate.cb_aclp = acl_ids->z_aclp;
0037b49e 925 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
428870ff
BB
926 zfs_acl_data_locator, &locate,
927 acl_ids->z_aclp->z_acl_bytes);
928 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
929 acl_ids->z_fuid, acl_ids->z_fgid);
930 }
931
932 VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
34dc7c2f 933
34dc7c2f 934 if (!(flag & IS_ROOT_NODE)) {
8d703987
BB
935 /*
936 * The call to zfs_znode_alloc() may fail if memory is low
937 * via the call path: alloc_inode() -> inode_init_always() ->
938 * security_inode_alloc() -> inode_alloc_security(). Since
939 * the existing code is written such that zfs_mknode() can
940 * not fail retry until sufficient memory has been reclaimed.
941 */
942 do {
943 *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, obj,
944 sa_hdl);
945 } while (*zpp == NULL);
946
7b3e34ba
BB
947 VERIFY(*zpp != NULL);
948 VERIFY(dzp != NULL);
34dc7c2f
BB
949 } else {
950 /*
951 * If we are creating the root node, the "parent" we
952 * passed in is the znode for the root.
953 */
954 *zpp = dzp;
428870ff
BB
955
956 (*zpp)->z_sa_hdl = sa_hdl;
34dc7c2f 957 }
428870ff
BB
958
959 (*zpp)->z_pflags = pflags;
12fa7f34 960 (*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
50c957f7 961 (*zpp)->z_dnodesize = dnodesize;
9c5167d1 962 (*zpp)->z_projid = projid;
428870ff 963
428870ff
BB
964 if (obj_type == DMU_OT_ZNODE ||
965 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
b0bc7a84 966 VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
428870ff 967 }
d1d7e268 968 kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
0037b49e 969 zfs_znode_hold_exit(zfsvfs, zh);
34dc7c2f
BB
970}
971
5484965a 972/*
d3cc8b15
WA
973 * Update in-core attributes. It is assumed the caller will be doing an
974 * sa_bulk_update to push the changes out.
5484965a
BB
975 */
976void
977zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
978{
979 xoptattr_t *xoap;
7bb1325f 980 boolean_t update_inode = B_FALSE;
5484965a
BB
981
982 xoap = xva_getxoptattr(xvap);
983 ASSERT(xoap);
984
985 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
986 uint64_t times[2];
987 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
988 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
989 &times, sizeof (times), tx);
990 XVA_SET_RTN(xvap, XAT_CREATETIME);
991 }
992 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
993 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
994 zp->z_pflags, tx);
995 XVA_SET_RTN(xvap, XAT_READONLY);
996 }
997 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
998 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
999 zp->z_pflags, tx);
1000 XVA_SET_RTN(xvap, XAT_HIDDEN);
1001 }
1002 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1003 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
1004 zp->z_pflags, tx);
1005 XVA_SET_RTN(xvap, XAT_SYSTEM);
1006 }
1007 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1008 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
1009 zp->z_pflags, tx);
1010 XVA_SET_RTN(xvap, XAT_ARCHIVE);
1011 }
1012 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
1013 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
1014 zp->z_pflags, tx);
1015 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
64c688d7 1016
7bb1325f 1017 update_inode = B_TRUE;
5484965a
BB
1018 }
1019 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
1020 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
1021 zp->z_pflags, tx);
1022 XVA_SET_RTN(xvap, XAT_NOUNLINK);
1023 }
1024 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
1025 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
1026 zp->z_pflags, tx);
1027 XVA_SET_RTN(xvap, XAT_APPENDONLY);
64c688d7 1028
7bb1325f 1029 update_inode = B_TRUE;
5484965a
BB
1030 }
1031 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1032 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
1033 zp->z_pflags, tx);
1034 XVA_SET_RTN(xvap, XAT_NODUMP);
1035 }
1036 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1037 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
1038 zp->z_pflags, tx);
1039 XVA_SET_RTN(xvap, XAT_OPAQUE);
1040 }
1041 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1042 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
1043 xoap->xoa_av_quarantined, zp->z_pflags, tx);
1044 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1045 }
1046 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1047 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
1048 zp->z_pflags, tx);
1049 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1050 }
1051 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
1052 zfs_sa_set_scanstamp(zp, xvap, tx);
1053 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
1054 }
1055 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1056 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
1057 zp->z_pflags, tx);
1058 XVA_SET_RTN(xvap, XAT_REPARSE);
1059 }
1060 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1061 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
1062 zp->z_pflags, tx);
1063 XVA_SET_RTN(xvap, XAT_OFFLINE);
1064 }
1065 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1066 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
1067 zp->z_pflags, tx);
1068 XVA_SET_RTN(xvap, XAT_SPARSE);
1069 }
9c5167d1
NF
1070 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
1071 ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit,
1072 zp->z_pflags, tx);
1073 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
1074 }
7bb1325f
CC
1075
1076 if (update_inode)
1077 zfs_set_inode_flags(zp, ZTOI(zp));
5484965a
BB
1078}
1079
34dc7c2f 1080int
0037b49e 1081zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
34dc7c2f
BB
1082{
1083 dmu_object_info_t doi;
1084 dmu_buf_t *db;
1085 znode_t *zp;
c96c36fa 1086 znode_hold_t *zh;
34dc7c2f 1087 int err;
428870ff 1088 sa_handle_t *hdl;
34dc7c2f
BB
1089
1090 *zpp = NULL;
1091
6f9548c4 1092again:
0037b49e 1093 zh = zfs_znode_hold_enter(zfsvfs, obj_num);
34dc7c2f 1094
0037b49e 1095 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
34dc7c2f 1096 if (err) {
0037b49e 1097 zfs_znode_hold_exit(zfsvfs, zh);
34dc7c2f
BB
1098 return (err);
1099 }
1100
1101 dmu_object_info_from_db(db, &doi);
428870ff
BB
1102 if (doi.doi_bonus_type != DMU_OT_SA &&
1103 (doi.doi_bonus_type != DMU_OT_ZNODE ||
1104 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1105 doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1106 sa_buf_rele(db, NULL);
0037b49e 1107 zfs_znode_hold_exit(zfsvfs, zh);
2e528b49 1108 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1109 }
1110
428870ff
BB
1111 hdl = dmu_buf_get_user(db);
1112 if (hdl != NULL) {
36df2843 1113 zp = sa_get_userdata(hdl);
34dc7c2f 1114
8ac67298 1115
34dc7c2f 1116 /*
428870ff
BB
1117 * Since "SA" does immediate eviction we
1118 * should never find a sa handle that doesn't
1119 * know about the znode.
34dc7c2f 1120 */
428870ff
BB
1121
1122 ASSERT3P(zp, !=, NULL);
1123
1124 mutex_enter(&zp->z_lock);
34dc7c2f 1125 ASSERT3U(zp->z_id, ==, obj_num);
98701490
CC
1126 /*
1127 * If igrab() returns NULL the VFS has independently
1128 * determined the inode should be evicted and has
1129 * called iput_final() to start the eviction process.
1130 * The SA handle is still valid but because the VFS
1131 * requires that the eviction succeed we must drop
1132 * our locks and references to allow the eviction to
1133 * complete. The zfs_zget() may then be retried.
1134 *
1135 * This unlikely case could be optimized by registering
1136 * a sops->drop_inode() callback. The callback would
1137 * need to detect the active SA hold thereby informing
1138 * the VFS that this inode should not be evicted.
1139 */
1140 if (igrab(ZTOI(zp)) == NULL) {
1141 mutex_exit(&zp->z_lock);
1142 sa_buf_rele(db, NULL);
0037b49e 1143 zfs_znode_hold_exit(zfsvfs, zh);
98701490
CC
1144 /* inode might need this to finish evict */
1145 cond_resched();
1146 goto again;
34dc7c2f 1147 }
98701490
CC
1148 *zpp = zp;
1149 err = 0;
34dc7c2f 1150 mutex_exit(&zp->z_lock);
f3ad9cd6 1151 sa_buf_rele(db, NULL);
0037b49e 1152 zfs_znode_hold_exit(zfsvfs, zh);
34dc7c2f
BB
1153 return (err);
1154 }
1155
1156 /*
3558fd73 1157 * Not found create new znode/vnode but only if file exists.
428870ff
BB
1158 *
1159 * There is a small window where zfs_vget() could
1160 * find this object while a file create is still in
1161 * progress. This is checked for in zfs_znode_alloc()
1162 *
1163 * if zfs_znode_alloc() fails it will drop the hold on the
1164 * bonus buffer.
34dc7c2f 1165 */
0037b49e 1166 zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
31b6111f 1167 doi.doi_bonus_type, obj_num, NULL);
428870ff 1168 if (zp == NULL) {
2e528b49 1169 err = SET_ERROR(ENOENT);
428870ff
BB
1170 } else {
1171 *zpp = zp;
1172 }
0037b49e 1173 zfs_znode_hold_exit(zfsvfs, zh);
428870ff 1174 return (err);
34dc7c2f
BB
1175}
1176
1177int
1178zfs_rezget(znode_t *zp)
1179{
0037b49e 1180 zfsvfs_t *zfsvfs = ZTOZSB(zp);
34dc7c2f
BB
1181 dmu_object_info_t doi;
1182 dmu_buf_t *db;
1183 uint64_t obj_num = zp->z_id;
428870ff 1184 uint64_t mode;
dfbc8630 1185 uint64_t links;
9f5f0019 1186 sa_bulk_attr_t bulk[10];
34dc7c2f 1187 int err;
428870ff
BB
1188 int count = 0;
1189 uint64_t gen;
2c6abf15 1190 uint64_t z_uid, z_gid;
9f5f0019 1191 uint64_t atime[2], mtime[2], ctime[2];
9c5167d1 1192 uint64_t projid = ZFS_DEFAULT_PROJID;
c96c36fa 1193 znode_hold_t *zh;
34dc7c2f 1194
cbecb4fb
CC
1195 /*
1196 * skip ctldir, otherwise they will always get invalidated. This will
1197 * cause funny behaviour for the mounted snapdirs. Especially for
1198 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
1199 * anyone automount it again as long as someone is still using the
1200 * detached mount.
1201 */
1202 if (zp->z_is_ctldir)
1203 return (0);
1204
0037b49e 1205 zh = zfs_znode_hold_enter(zfsvfs, obj_num);
34dc7c2f 1206
428870ff
BB
1207 mutex_enter(&zp->z_acl_lock);
1208 if (zp->z_acl_cached) {
1209 zfs_acl_free(zp->z_acl_cached);
1210 zp->z_acl_cached = NULL;
1211 }
428870ff 1212 mutex_exit(&zp->z_acl_lock);
7b3e34ba 1213
228b461b 1214 rw_enter(&zp->z_xattr_lock, RW_WRITER);
7b3e34ba
BB
1215 if (zp->z_xattr_cached) {
1216 nvlist_free(zp->z_xattr_cached);
1217 zp->z_xattr_cached = NULL;
1218 }
7b3e34ba
BB
1219 rw_exit(&zp->z_xattr_lock);
1220
428870ff 1221 ASSERT(zp->z_sa_hdl == NULL);
0037b49e 1222 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
34dc7c2f 1223 if (err) {
0037b49e 1224 zfs_znode_hold_exit(zfsvfs, zh);
34dc7c2f
BB
1225 return (err);
1226 }
1227
1228 dmu_object_info_from_db(db, &doi);
428870ff
BB
1229 if (doi.doi_bonus_type != DMU_OT_SA &&
1230 (doi.doi_bonus_type != DMU_OT_ZNODE ||
1231 (doi.doi_bonus_type == DMU_OT_ZNODE &&
1232 doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1233 sa_buf_rele(db, NULL);
0037b49e 1234 zfs_znode_hold_exit(zfsvfs, zh);
2e528b49 1235 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1236 }
1237
0037b49e 1238 zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
428870ff
BB
1239
1240 /* reload cached values */
0037b49e 1241 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
428870ff 1242 &gen, sizeof (gen));
0037b49e 1243 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
428870ff 1244 &zp->z_size, sizeof (zp->z_size));
0037b49e 1245 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
dfbc8630 1246 &links, sizeof (links));
0037b49e 1247 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
428870ff 1248 &zp->z_pflags, sizeof (zp->z_pflags));
0037b49e 1249 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2c6abf15 1250 &z_uid, sizeof (z_uid));
0037b49e 1251 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
2c6abf15 1252 &z_gid, sizeof (z_gid));
0037b49e 1253 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
428870ff 1254 &mode, sizeof (mode));
0037b49e 1255 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
9f5f0019 1256 &atime, 16);
0037b49e 1257 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
9f5f0019 1258 &mtime, 16);
0037b49e 1259 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
9f5f0019 1260 &ctime, 16);
428870ff 1261
428870ff
BB
1262 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
1263 zfs_znode_dmu_fini(zp);
0037b49e 1264 zfs_znode_hold_exit(zfsvfs, zh);
2e528b49 1265 return (SET_ERROR(EIO));
428870ff
BB
1266 }
1267
9c5167d1
NF
1268 if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
1269 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
1270 &projid, 8);
1271 if (err != 0 && err != ENOENT) {
1272 zfs_znode_dmu_fini(zp);
1273 zfs_znode_hold_exit(zfsvfs, zh);
1274 return (SET_ERROR(err));
1275 }
1276 }
1277
1278 zp->z_projid = projid;
12fa7f34 1279 zp->z_mode = ZTOI(zp)->i_mode = mode;
2c6abf15
NB
1280 zfs_uid_write(ZTOI(zp), z_uid);
1281 zfs_gid_write(ZTOI(zp), z_gid);
572e2857 1282
9f5f0019
NB
1283 ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
1284 ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
1285 ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
1286
278f2236 1287 if (gen != ZTOI(zp)->i_generation) {
428870ff 1288 zfs_znode_dmu_fini(zp);
0037b49e 1289 zfs_znode_hold_exit(zfsvfs, zh);
2e528b49 1290 return (SET_ERROR(EIO));
34dc7c2f
BB
1291 }
1292
dfbc8630 1293 set_nlink(ZTOI(zp), (uint32_t)links);
7bb1325f 1294 zfs_set_inode_flags(zp, ZTOI(zp));
dfbc8630 1295
34dc7c2f 1296 zp->z_blksz = doi.doi_data_block_size;
704cd075 1297 zp->z_atime_dirty = 0;
9f5f0019 1298 zfs_inode_update(zp);
34dc7c2f 1299
6a218566
AG
1300 /*
1301 * If the file has zero links, then it has been unlinked on the send
1302 * side and it must be in the received unlinked set.
1303 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
1304 * stale data and to prevent automatical removal of the file in
1305 * zfs_zinactive(). The file will be removed either when it is removed
1306 * on the send side and the next incremental stream is received or
1307 * when the unlinked set gets processed.
1308 */
1309 zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
1310 if (zp->z_unlinked)
1311 zfs_znode_dmu_fini(zp);
1312
0037b49e 1313 zfs_znode_hold_exit(zfsvfs, zh);
34dc7c2f
BB
1314
1315 return (0);
1316}
1317
1318void
1319zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1320{
0037b49e
BB
1321 zfsvfs_t *zfsvfs = ZTOZSB(zp);
1322 objset_t *os = zfsvfs->z_os;
34dc7c2f 1323 uint64_t obj = zp->z_id;
572e2857 1324 uint64_t acl_obj = zfs_external_acl(zp);
c96c36fa 1325 znode_hold_t *zh;
34dc7c2f 1326
0037b49e 1327 zh = zfs_znode_hold_enter(zfsvfs, obj);
572e2857
BB
1328 if (acl_obj) {
1329 VERIFY(!zp->z_is_sa);
b128c09f 1330 VERIFY(0 == dmu_object_free(os, acl_obj, tx));
572e2857 1331 }
b128c09f 1332 VERIFY(0 == dmu_object_free(os, obj, tx));
34dc7c2f 1333 zfs_znode_dmu_fini(zp);
0037b49e 1334 zfs_znode_hold_exit(zfsvfs, zh);
34dc7c2f
BB
1335}
1336
1337void
1338zfs_zinactive(znode_t *zp)
1339{
0037b49e 1340 zfsvfs_t *zfsvfs = ZTOZSB(zp);
34dc7c2f 1341 uint64_t z_id = zp->z_id;
c96c36fa 1342 znode_hold_t *zh;
34dc7c2f 1343
428870ff 1344 ASSERT(zp->z_sa_hdl);
34dc7c2f
BB
1345
1346 /*
d6bd8eaa 1347 * Don't allow a zfs_zget() while were trying to release this znode.
34dc7c2f 1348 */
0037b49e 1349 zh = zfs_znode_hold_enter(zfsvfs, z_id);
d6bd8eaa 1350
34dc7c2f 1351 mutex_enter(&zp->z_lock);
34dc7c2f
BB
1352
1353 /*
6a218566
AG
1354 * If this was the last reference to a file with no links, remove
1355 * the file from the file system unless the file system is mounted
1356 * read-only. That can happen, for example, if the file system was
1357 * originally read-write, the file was opened, then unlinked and
1358 * the file system was made read-only before the file was finally
1359 * closed. The file will remain in the unlinked set.
34dc7c2f
BB
1360 */
1361 if (zp->z_unlinked) {
6a218566
AG
1362 ASSERT(!zfsvfs->z_issnap);
1363 if (!zfs_is_readonly(zfsvfs)) {
1364 mutex_exit(&zp->z_lock);
1365 zfs_znode_hold_exit(zfsvfs, zh);
1366 zfs_rmnode(zp);
1367 return;
1368 }
34dc7c2f 1369 }
428870ff 1370
34dc7c2f
BB
1371 mutex_exit(&zp->z_lock);
1372 zfs_znode_dmu_fini(zp);
d6bd8eaa 1373
0037b49e 1374 zfs_znode_hold_exit(zfsvfs, zh);
34dc7c2f
BB
1375}
1376
6d111134
TC
1377static inline int
1378zfs_compare_timespec(struct timespec *t1, struct timespec *t2)
1379{
1380 if (t1->tv_sec < t2->tv_sec)
1381 return (-1);
1382
1383 if (t1->tv_sec > t2->tv_sec)
1384 return (1);
1385
1386 return (t1->tv_nsec - t2->tv_nsec);
1387}
1388
6d111134
TC
1389/*
1390 * Prepare to update znode time stamps.
1391 *
1392 * IN: zp - znode requiring timestamp update
0df9673f 1393 * flag - ATTR_MTIME, ATTR_CTIME flags
6d111134 1394 *
0df9673f 1395 * OUT: zp - z_seq
6d111134
TC
1396 * mtime - new mtime
1397 * ctime - new ctime
1398 *
0df9673f
CC
1399 * Note: We don't update atime here, because we rely on Linux VFS to do
1400 * atime updating.
6d111134 1401 */
34dc7c2f 1402void
428870ff 1403zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
0df9673f 1404 uint64_t ctime[2])
34dc7c2f 1405{
6413c95f 1406 inode_timespec_t now;
34dc7c2f 1407
34dc7c2f
BB
1408 gethrestime(&now);
1409
0df9673f 1410 zp->z_seq++;
34dc7c2f 1411
3558fd73 1412 if (flag & ATTR_MTIME) {
428870ff 1413 ZFS_TIME_ENCODE(&now, mtime);
9f5f0019 1414 ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
3558fd73 1415 if (ZTOZSB(zp)->z_use_fuids) {
428870ff
BB
1416 zp->z_pflags |= (ZFS_ARCHIVE |
1417 ZFS_AV_MODIFIED);
1418 }
34dc7c2f
BB
1419 }
1420
3558fd73 1421 if (flag & ATTR_CTIME) {
428870ff 1422 ZFS_TIME_ENCODE(&now, ctime);
9f5f0019 1423 ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
3558fd73 1424 if (ZTOZSB(zp)->z_use_fuids)
428870ff 1425 zp->z_pflags |= ZFS_ARCHIVE;
34dc7c2f
BB
1426 }
1427}
1428
34dc7c2f
BB
1429/*
1430 * Grow the block size for a file.
1431 *
1432 * IN: zp - znode of file to free data in.
1433 * size - requested block size
1434 * tx - open transaction.
1435 *
1436 * NOTE: this function assumes that the znode is write locked.
1437 */
1438void
1439zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1440{
1441 int error;
1442 u_longlong_t dummy;
1443
1444 if (size <= zp->z_blksz)
1445 return;
1446 /*
1447 * If the file size is already greater than the current blocksize,
1448 * we will not grow. If there is more than one block in a file,
1449 * the blocksize cannot change.
1450 */
428870ff 1451 if (zp->z_blksz && zp->z_size > zp->z_blksz)
34dc7c2f
BB
1452 return;
1453
3558fd73 1454 error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
34dc7c2f 1455 size, 0, tx);
428870ff 1456
34dc7c2f
BB
1457 if (error == ENOTSUP)
1458 return;
c99c9001 1459 ASSERT0(error);
34dc7c2f
BB
1460
1461 /* What blocksize did we actually get? */
428870ff 1462 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
34dc7c2f
BB
1463}
1464
34dc7c2f 1465/*
b128c09f 1466 * Increase the file length
34dc7c2f
BB
1467 *
1468 * IN: zp - znode of file to free data in.
b128c09f 1469 * end - new end-of-file
34dc7c2f 1470 *
19d55079 1471 * RETURN: 0 on success, error code on failure
34dc7c2f 1472 */
b128c09f
BB
1473static int
1474zfs_extend(znode_t *zp, uint64_t end)
34dc7c2f 1475{
0037b49e 1476 zfsvfs_t *zfsvfs = ZTOZSB(zp);
b128c09f 1477 dmu_tx_t *tx;
34dc7c2f 1478 rl_t *rl;
b128c09f 1479 uint64_t newblksz;
34dc7c2f
BB
1480 int error;
1481
34dc7c2f 1482 /*
b128c09f 1483 * We will change zp_size, lock the whole file.
34dc7c2f 1484 */
d88895a0 1485 rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
34dc7c2f
BB
1486
1487 /*
1488 * Nothing to do if file already at desired length.
1489 */
428870ff 1490 if (end <= zp->z_size) {
34dc7c2f
BB
1491 zfs_range_unlock(rl);
1492 return (0);
1493 }
0037b49e 1494 tx = dmu_tx_create(zfsvfs->z_os);
428870ff
BB
1495 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1496 zfs_sa_upgrade_txholds(tx, zp);
b128c09f 1497 if (end > zp->z_blksz &&
0037b49e 1498 (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
34dc7c2f
BB
1499 /*
1500 * We are growing the file past the current block size.
1501 */
3558fd73 1502 if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
f1512ee6
MA
1503 /*
1504 * File's blocksize is already larger than the
1505 * "recordsize" property. Only let it grow to
1506 * the next power of 2.
1507 */
34dc7c2f 1508 ASSERT(!ISP2(zp->z_blksz));
f1512ee6 1509 newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
34dc7c2f 1510 } else {
3558fd73 1511 newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
34dc7c2f 1512 }
b128c09f
BB
1513 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1514 } else {
1515 newblksz = 0;
34dc7c2f
BB
1516 }
1517
384f8a09 1518 error = dmu_tx_assign(tx, TXG_WAIT);
34dc7c2f 1519 if (error) {
34dc7c2f
BB
1520 dmu_tx_abort(tx);
1521 zfs_range_unlock(rl);
1522 return (error);
1523 }
1524
b128c09f
BB
1525 if (newblksz)
1526 zfs_grow_blocksize(zp, newblksz, tx);
34dc7c2f 1527
428870ff
BB
1528 zp->z_size = end;
1529
3558fd73 1530 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
428870ff 1531 &zp->z_size, sizeof (zp->z_size), tx));
34dc7c2f 1532
b128c09f 1533 zfs_range_unlock(rl);
34dc7c2f 1534
b128c09f 1535 dmu_tx_commit(tx);
34dc7c2f 1536
b128c09f
BB
1537 return (0);
1538}
1539
223df016
TC
1540/*
1541 * zfs_zero_partial_page - Modeled after update_pages() but
1542 * with different arguments and semantics for use by zfs_freesp().
1543 *
1544 * Zeroes a piece of a single page cache entry for zp at offset
1545 * start and length len.
1546 *
1547 * Caller must acquire a range lock on the file for the region
1548 * being zeroed in order that the ARC and page cache stay in sync.
1549 */
1550static void
1551zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
1552{
1553 struct address_space *mp = ZTOI(zp)->i_mapping;
1554 struct page *pp;
1555 int64_t off;
1556 void *pb;
1557
8b1899d3 1558 ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
223df016 1559
8b1899d3
BB
1560 off = start & (PAGE_SIZE - 1);
1561 start &= PAGE_MASK;
223df016 1562
8b1899d3 1563 pp = find_lock_page(mp, start >> PAGE_SHIFT);
223df016
TC
1564 if (pp) {
1565 if (mapping_writably_mapped(mp))
1566 flush_dcache_page(pp);
1567
1568 pb = kmap(pp);
1569 bzero(pb + off, len);
1570 kunmap(pp);
1571
1572 if (mapping_writably_mapped(mp))
1573 flush_dcache_page(pp);
1574
1575 mark_page_accessed(pp);
1576 SetPageUptodate(pp);
1577 ClearPageError(pp);
1578 unlock_page(pp);
8b1899d3 1579 put_page(pp);
223df016
TC
1580 }
1581}
1582
b128c09f
BB
1583/*
1584 * Free space in a file.
1585 *
1586 * IN: zp - znode of file to free data in.
1587 * off - start of section to free.
1588 * len - length of section to free.
1589 *
19d55079 1590 * RETURN: 0 on success, error code on failure
b128c09f
BB
1591 */
1592static int
1593zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1594{
0037b49e 1595 zfsvfs_t *zfsvfs = ZTOZSB(zp);
b128c09f
BB
1596 rl_t *rl;
1597 int error;
1598
1599 /*
1600 * Lock the range being freed.
1601 */
d88895a0 1602 rl = zfs_range_lock(&zp->z_range_lock, off, len, RL_WRITER);
b128c09f
BB
1603
1604 /*
1605 * Nothing to do if file already at desired length.
1606 */
428870ff 1607 if (off >= zp->z_size) {
b128c09f
BB
1608 zfs_range_unlock(rl);
1609 return (0);
34dc7c2f
BB
1610 }
1611
428870ff
BB
1612 if (off + len > zp->z_size)
1613 len = zp->z_size - off;
b128c09f 1614
0037b49e 1615 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
b128c09f 1616
223df016
TC
1617 /*
1618 * Zero partial page cache entries. This must be done under a
1619 * range lock in order to keep the ARC and page cache in sync.
1620 */
1621 if (zp->z_is_mapped) {
1622 loff_t first_page, last_page, page_len;
1623 loff_t first_page_offset, last_page_offset;
1624
1625 /* first possible full page in hole */
8b1899d3 1626 first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
223df016 1627 /* last page of hole */
8b1899d3 1628 last_page = (off + len) >> PAGE_SHIFT;
223df016
TC
1629
1630 /* offset of first_page */
8b1899d3 1631 first_page_offset = first_page << PAGE_SHIFT;
223df016 1632 /* offset of last_page */
8b1899d3 1633 last_page_offset = last_page << PAGE_SHIFT;
223df016 1634
cb08f063
TC
1635 /* truncate whole pages */
1636 if (last_page_offset > first_page_offset) {
1637 truncate_inode_pages_range(ZTOI(zp)->i_mapping,
1638 first_page_offset, last_page_offset - 1);
1639 }
1640
1641 /* truncate sub-page ranges */
223df016
TC
1642 if (first_page > last_page) {
1643 /* entire punched area within a single page */
1644 zfs_zero_partial_page(zp, off, len);
1645 } else {
1646 /* beginning of punched area at the end of a page */
1647 page_len = first_page_offset - off;
1648 if (page_len > 0)
1649 zfs_zero_partial_page(zp, off, page_len);
1650
1651 /* end of punched area at the beginning of a page */
1652 page_len = off + len - last_page_offset;
1653 if (page_len > 0)
1654 zfs_zero_partial_page(zp, last_page_offset,
1655 page_len);
1656 }
1657 }
34dc7c2f
BB
1658 zfs_range_unlock(rl);
1659
b128c09f
BB
1660 return (error);
1661}
1662
1663/*
1664 * Truncate a file
1665 *
1666 * IN: zp - znode of file to free data in.
1667 * end - new end-of-file.
1668 *
19d55079 1669 * RETURN: 0 on success, error code on failure
b128c09f
BB
1670 */
1671static int
1672zfs_trunc(znode_t *zp, uint64_t end)
1673{
0037b49e 1674 zfsvfs_t *zfsvfs = ZTOZSB(zp);
b128c09f
BB
1675 dmu_tx_t *tx;
1676 rl_t *rl;
1677 int error;
572e2857
BB
1678 sa_bulk_attr_t bulk[2];
1679 int count = 0;
b128c09f
BB
1680
1681 /*
1682 * We will change zp_size, lock the whole file.
1683 */
d88895a0 1684 rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER);
b128c09f
BB
1685
1686 /*
1687 * Nothing to do if file already at desired length.
1688 */
428870ff 1689 if (end >= zp->z_size) {
b128c09f
BB
1690 zfs_range_unlock(rl);
1691 return (0);
1692 }
1693
18a2485f
FS
1694 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
1695 DMU_OBJECT_END);
b128c09f
BB
1696 if (error) {
1697 zfs_range_unlock(rl);
1698 return (error);
1699 }
0037b49e 1700 tx = dmu_tx_create(zfsvfs->z_os);
428870ff
BB
1701 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1702 zfs_sa_upgrade_txholds(tx, zp);
19d55079 1703 dmu_tx_mark_netfree(tx);
7a8f0e80 1704 error = dmu_tx_assign(tx, TXG_WAIT);
b128c09f 1705 if (error) {
b128c09f
BB
1706 dmu_tx_abort(tx);
1707 zfs_range_unlock(rl);
1708 return (error);
1709 }
b128c09f 1710
428870ff 1711 zp->z_size = end;
0037b49e 1712 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
572e2857 1713 NULL, &zp->z_size, sizeof (zp->z_size));
428870ff 1714
572e2857
BB
1715 if (end == 0) {
1716 zp->z_pflags &= ~ZFS_SPARSE;
0037b49e 1717 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
572e2857
BB
1718 NULL, &zp->z_pflags, 8);
1719 }
1720 VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
b128c09f 1721
34dc7c2f
BB
1722 dmu_tx_commit(tx);
1723
d164b209 1724 zfs_range_unlock(rl);
34dc7c2f
BB
1725
1726 return (0);
1727}
1728
b128c09f
BB
1729/*
1730 * Free space in a file
1731 *
1732 * IN: zp - znode of file to free data in.
1733 * off - start of range
1734 * len - end of range (0 => EOF)
1735 * flag - current file open mode flags.
1736 * log - TRUE if this action should be logged
1737 *
19d55079 1738 * RETURN: 0 on success, error code on failure
b128c09f
BB
1739 */
1740int
1741zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1742{
b128c09f 1743 dmu_tx_t *tx;
0037b49e
BB
1744 zfsvfs_t *zfsvfs = ZTOZSB(zp);
1745 zilog_t *zilog = zfsvfs->z_log;
428870ff
BB
1746 uint64_t mode;
1747 uint64_t mtime[2], ctime[2];
1748 sa_bulk_attr_t bulk[3];
1749 int count = 0;
b128c09f
BB
1750 int error;
1751
0037b49e 1752 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
428870ff
BB
1753 sizeof (mode))) != 0)
1754 return (error);
1755
1756 if (off > zp->z_size) {
b128c09f
BB
1757 error = zfs_extend(zp, off+len);
1758 if (error == 0 && log)
1759 goto log;
223df016 1760 goto out;
b128c09f
BB
1761 }
1762
b128c09f
BB
1763 if (len == 0) {
1764 error = zfs_trunc(zp, off);
1765 } else {
1766 if ((error = zfs_free_range(zp, off, len)) == 0 &&
428870ff 1767 off + len > zp->z_size)
b128c09f
BB
1768 error = zfs_extend(zp, off+len);
1769 }
1770 if (error || !log)
223df016 1771 goto out;
b128c09f 1772log:
0037b49e 1773 tx = dmu_tx_create(zfsvfs->z_os);
428870ff
BB
1774 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1775 zfs_sa_upgrade_txholds(tx, zp);
384f8a09 1776 error = dmu_tx_assign(tx, TXG_WAIT);
b128c09f 1777 if (error) {
b128c09f 1778 dmu_tx_abort(tx);
223df016 1779 goto out;
b128c09f
BB
1780 }
1781
0037b49e
BB
1782 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
1783 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
1784 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
428870ff 1785 NULL, &zp->z_pflags, 8);
0df9673f 1786 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
428870ff
BB
1787 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1788 ASSERT(error == 0);
1789
b128c09f
BB
1790 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
1791
1792 dmu_tx_commit(tx);
223df016 1793
960e08fe 1794 zfs_inode_update(zp);
223df016
TC
1795 error = 0;
1796
1797out:
1798 /*
1799 * Truncate the page cache - for file truncate operations, use
1800 * the purpose-built API for truncations. For punching operations,
cb08f063 1801 * the truncation is handled under a range lock in zfs_free_range.
223df016
TC
1802 */
1803 if (len == 0)
1804 truncate_setsize(ZTOI(zp), off);
223df016 1805 return (error);
b128c09f
BB
1806}
1807
34dc7c2f
BB
1808void
1809zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1810{
22872ff5 1811 struct super_block *sb;
0037b49e 1812 zfsvfs_t *zfsvfs;
428870ff 1813 uint64_t moid, obj, sa_obj, version;
22872ff5 1814 uint64_t sense = ZFS_CASE_SENSITIVE;
34dc7c2f
BB
1815 uint64_t norm = 0;
1816 nvpair_t *elem;
c96c36fa 1817 int size;
34dc7c2f 1818 int error;
22872ff5
BB
1819 int i;
1820 znode_t *rootzp = NULL;
1821 vattr_t vattr;
1822 znode_t *zp;
1823 zfs_acl_ids_t acl_ids;
34dc7c2f
BB
1824
1825 /*
1826 * First attempt to create master node.
1827 */
1828 /*
1829 * In an empty objset, there are no blocks to read and thus
1830 * there can be no i/o errors (which we assert below).
1831 */
1832 moid = MASTER_NODE_OBJ;
1833 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
1834 DMU_OT_NONE, 0, tx);
1835 ASSERT(error == 0);
1836
1837 /*
1838 * Set starting attributes.
1839 */
428870ff 1840 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
34dc7c2f
BB
1841 elem = NULL;
1842 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
1843 /* For the moment we expect all zpl props to be uint64_ts */
1844 uint64_t val;
1845 char *name;
1846
1847 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
1848 VERIFY(nvpair_value_uint64(elem, &val) == 0);
1849 name = nvpair_name(elem);
1850 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
9babb374
BB
1851 if (val < version)
1852 version = val;
34dc7c2f
BB
1853 } else {
1854 error = zap_update(os, moid, name, 8, 1, &val, tx);
1855 }
1856 ASSERT(error == 0);
1857 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
1858 norm = val;
22872ff5
BB
1859 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
1860 sense = val;
34dc7c2f
BB
1861 }
1862 ASSERT(version != 0);
9babb374 1863 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
34dc7c2f 1864
428870ff
BB
1865 /*
1866 * Create zap object used for SA attribute registration
1867 */
1868
1869 if (version >= ZPL_VERSION_SA) {
1870 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
1871 DMU_OT_NONE, 0, tx);
1872 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
1873 ASSERT(error == 0);
1874 } else {
1875 sa_obj = 0;
1876 }
34dc7c2f
BB
1877 /*
1878 * Create a delete queue.
1879 */
9babb374 1880 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
34dc7c2f 1881
9babb374 1882 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
34dc7c2f
BB
1883 ASSERT(error == 0);
1884
9babb374 1885 /*
0037b49e 1886 * Create root znode. Create minimal znode/inode/zfsvfs/sb
22872ff5 1887 * to allow zfs_mknode to work.
9babb374 1888 */
22872ff5
BB
1889 vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
1890 vattr.va_mode = S_IFDIR|0755;
1891 vattr.va_uid = crgetuid(cr);
1892 vattr.va_gid = crgetgid(cr);
1893
79c76d5b 1894 rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
22872ff5
BB
1895 rootzp->z_moved = 0;
1896 rootzp->z_unlinked = 0;
1897 rootzp->z_atime_dirty = 0;
1898 rootzp->z_is_sa = USE_SA(version, os);
9c5167d1 1899 rootzp->z_pflags = 0;
22872ff5 1900
0037b49e
BB
1901 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
1902 zfsvfs->z_os = os;
1903 zfsvfs->z_parent = zfsvfs;
1904 zfsvfs->z_version = version;
1905 zfsvfs->z_use_fuids = USE_FUIDS(version, os);
1906 zfsvfs->z_use_sa = USE_SA(version, os);
1907 zfsvfs->z_norm = norm;
22872ff5 1908
79c76d5b 1909 sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
0037b49e 1910 sb->s_fs_info = zfsvfs;
22872ff5
BB
1911
1912 ZTOI(rootzp)->i_sb = sb;
1913
1914 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
0037b49e 1915 &zfsvfs->z_attr_table);
9babb374 1916
22872ff5 1917 ASSERT(error == 0);
9babb374 1918
60101509 1919 /*
22872ff5
BB
1920 * Fold case on file systems that are always or sometimes case
1921 * insensitive.
60101509 1922 */
22872ff5 1923 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
0037b49e 1924 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
60101509 1925
0037b49e
BB
1926 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1927 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
22872ff5 1928 offsetof(znode_t, z_link_node));
60101509 1929
c96c36fa 1930 size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
0037b49e
BB
1931 zfsvfs->z_hold_size = size;
1932 zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
1933 KM_SLEEP);
1934 zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
c96c36fa 1935 for (i = 0; i != size; i++) {
0037b49e 1936 avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
c96c36fa 1937 sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
0037b49e 1938 mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
c96c36fa 1939 }
60101509 1940
22872ff5
BB
1941 VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
1942 cr, NULL, &acl_ids));
1943 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
1944 ASSERT3P(zp, ==, rootzp);
1945 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
1946 ASSERT(error == 0);
1947 zfs_acl_ids_free(&acl_ids);
60101509 1948
22872ff5
BB
1949 atomic_set(&ZTOI(rootzp)->i_count, 0);
1950 sa_handle_destroy(rootzp->z_sa_hdl);
22872ff5
BB
1951 kmem_cache_free(znode_cache, rootzp);
1952
1953 /*
1954 * Create shares directory
1955 */
0037b49e 1956 error = zfs_create_share_dir(zfsvfs, tx);
9babb374 1957 ASSERT(error == 0);
428870ff 1958
c96c36fa 1959 for (i = 0; i != size; i++) {
0037b49e
BB
1960 avl_destroy(&zfsvfs->z_hold_trees[i]);
1961 mutex_destroy(&zfsvfs->z_hold_locks[i]);
c96c36fa 1962 }
2708f716 1963
c17486b2
GN
1964 mutex_destroy(&zfsvfs->z_znodes_lock);
1965
0037b49e
BB
1966 vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
1967 vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
2708f716 1968 kmem_free(sb, sizeof (struct super_block));
0037b49e 1969 kmem_free(zfsvfs, sizeof (zfsvfs_t));
34dc7c2f 1970}
34dc7c2f 1971#endif /* _KERNEL */
428870ff 1972
34dc7c2f 1973static int
572e2857
BB
1974zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
1975{
1976 uint64_t sa_obj = 0;
1977 int error;
1978
1979 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
1980 if (error != 0 && error != ENOENT)
1981 return (error);
1982
1983 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
1984 return (error);
1985}
1986
1987static int
1988zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
7b8518cb 1989 dmu_buf_t **db, void *tag)
34dc7c2f 1990{
34dc7c2f 1991 dmu_object_info_t doi;
34dc7c2f 1992 int error;
428870ff 1993
7b8518cb 1994 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
34dc7c2f
BB
1995 return (error);
1996
572e2857 1997 dmu_object_info_from_db(*db, &doi);
428870ff
BB
1998 if ((doi.doi_bonus_type != DMU_OT_SA &&
1999 doi.doi_bonus_type != DMU_OT_ZNODE) ||
d6320ddb
BB
2000 (doi.doi_bonus_type == DMU_OT_ZNODE &&
2001 doi.doi_bonus_size < sizeof (znode_phys_t))) {
7b8518cb 2002 sa_buf_rele(*db, tag);
2e528b49 2003 return (SET_ERROR(ENOTSUP));
34dc7c2f
BB
2004 }
2005
572e2857
BB
2006 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
2007 if (error != 0) {
7b8518cb 2008 sa_buf_rele(*db, tag);
428870ff
BB
2009 return (error);
2010 }
2011
572e2857
BB
2012 return (0);
2013}
2014
2015void
7b8518cb 2016zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
572e2857
BB
2017{
2018 sa_handle_destroy(hdl);
7b8518cb 2019 sa_buf_rele(db, tag);
572e2857
BB
2020}
2021
2022/*
2023 * Given an object number, return its parent object number and whether
2024 * or not the object is an extended attribute directory.
2025 */
2026static int
b23ad7f3
JJ
2027zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
2028 uint64_t *pobjp, int *is_xattrdir)
572e2857
BB
2029{
2030 uint64_t parent;
2031 uint64_t pflags;
2032 uint64_t mode;
b23ad7f3 2033 uint64_t parent_mode;
572e2857 2034 sa_bulk_attr_t bulk[3];
b23ad7f3
JJ
2035 sa_handle_t *sa_hdl;
2036 dmu_buf_t *sa_db;
572e2857
BB
2037 int count = 0;
2038 int error;
2039
2040 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
2041 &parent, sizeof (parent));
428870ff 2042 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
572e2857 2043 &pflags, sizeof (pflags));
428870ff 2044 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
572e2857 2045 &mode, sizeof (mode));
428870ff 2046
572e2857 2047 if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
428870ff 2048 return (error);
572e2857 2049
b23ad7f3
JJ
2050 /*
2051 * When a link is removed its parent pointer is not changed and will
2052 * be invalid. There are two cases where a link is removed but the
2053 * file stays around, when it goes to the delete queue and when there
2054 * are additional links.
2055 */
2056 error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
2057 if (error != 0)
2058 return (error);
2059
2060 error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
2061 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
2062 if (error != 0)
2063 return (error);
2064
428870ff 2065 *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
34dc7c2f 2066
b23ad7f3
JJ
2067 /*
2068 * Extended attributes can be applied to files, directories, etc.
2069 * Otherwise the parent must be a directory.
2070 */
2071 if (!*is_xattrdir && !S_ISDIR(parent_mode))
ecb2b7dc 2072 return (SET_ERROR(EINVAL));
b23ad7f3
JJ
2073
2074 *pobjp = parent;
2075
34dc7c2f
BB
2076 return (0);
2077}
2078
572e2857
BB
2079/*
2080 * Given an object number, return some zpl level statistics
2081 */
2082static int
2083zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
2084 zfs_stat_t *sb)
34dc7c2f 2085{
572e2857
BB
2086 sa_bulk_attr_t bulk[4];
2087 int count = 0;
2088
2089 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2090 &sb->zs_mode, sizeof (sb->zs_mode));
2091 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
2092 &sb->zs_gen, sizeof (sb->zs_gen));
2093 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
2094 &sb->zs_links, sizeof (sb->zs_links));
2095 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
2096 &sb->zs_ctime, sizeof (sb->zs_ctime));
2097
2098 return (sa_bulk_lookup(hdl, bulk, count));
2099}
2100
2101static int
2102zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
2103 sa_attr_type_t *sa_table, char *buf, int len)
2104{
2105 sa_handle_t *sa_hdl;
2106 sa_handle_t *prevhdl = NULL;
2107 dmu_buf_t *prevdb = NULL;
2108 dmu_buf_t *sa_db = NULL;
34dc7c2f
BB
2109 char *path = buf + len - 1;
2110 int error;
2111
2112 *path = '\0';
572e2857 2113 sa_hdl = hdl;
428870ff 2114
64c1dcef
PD
2115 uint64_t deleteq_obj;
2116 VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
2117 ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
2118 error = zap_lookup_int(osp, deleteq_obj, obj);
2119 if (error == 0) {
2120 return (ESTALE);
2121 } else if (error != ENOENT) {
2122 return (error);
2123 }
2124 error = 0;
2125
34dc7c2f 2126 for (;;) {
17897ce2 2127 uint64_t pobj = 0;
34dc7c2f
BB
2128 char component[MAXNAMELEN + 2];
2129 size_t complen;
17897ce2 2130 int is_xattrdir = 0;
34dc7c2f 2131
572e2857 2132 if (prevdb)
7b8518cb 2133 zfs_release_sa_handle(prevhdl, prevdb, FTAG);
572e2857 2134
b23ad7f3 2135 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
572e2857 2136 &is_xattrdir)) != 0)
34dc7c2f
BB
2137 break;
2138
2139 if (pobj == obj) {
2140 if (path[0] != '/')
2141 *--path = '/';
2142 break;
2143 }
2144
2145 component[0] = '/';
2146 if (is_xattrdir) {
2147 (void) sprintf(component + 1, "<xattrdir>");
2148 } else {
2149 error = zap_value_search(osp, pobj, obj,
2150 ZFS_DIRENT_OBJ(-1ULL), component + 1);
2151 if (error != 0)
2152 break;
2153 }
2154
2155 complen = strlen(component);
2156 path -= complen;
2157 ASSERT(path >= buf);
2158 bcopy(component, path, complen);
2159 obj = pobj;
572e2857
BB
2160
2161 if (sa_hdl != hdl) {
2162 prevhdl = sa_hdl;
2163 prevdb = sa_db;
2164 }
7b8518cb 2165 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
572e2857
BB
2166 if (error != 0) {
2167 sa_hdl = prevhdl;
2168 sa_db = prevdb;
2169 break;
2170 }
2171 }
2172
2173 if (sa_hdl != NULL && sa_hdl != hdl) {
2174 ASSERT(sa_db != NULL);
7b8518cb 2175 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
34dc7c2f
BB
2176 }
2177
2178 if (error == 0)
2179 (void) memmove(buf, path, buf + len - path);
428870ff 2180
34dc7c2f
BB
2181 return (error);
2182}
572e2857
BB
2183
2184int
2185zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
2186{
2187 sa_attr_type_t *sa_table;
2188 sa_handle_t *hdl;
2189 dmu_buf_t *db;
2190 int error;
2191
2192 error = zfs_sa_setup(osp, &sa_table);
2193 if (error != 0)
2194 return (error);
2195
7b8518cb 2196 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
572e2857
BB
2197 if (error != 0)
2198 return (error);
2199
2200 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2201
7b8518cb 2202 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2203 return (error);
2204}
2205
2206int
2207zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
2208 char *buf, int len)
2209{
2210 char *path = buf + len - 1;
2211 sa_attr_type_t *sa_table;
2212 sa_handle_t *hdl;
2213 dmu_buf_t *db;
2214 int error;
2215
2216 *path = '\0';
2217
2218 error = zfs_sa_setup(osp, &sa_table);
2219 if (error != 0)
2220 return (error);
2221
7b8518cb 2222 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
572e2857
BB
2223 if (error != 0)
2224 return (error);
2225
2226 error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
2227 if (error != 0) {
7b8518cb 2228 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2229 return (error);
2230 }
2231
2232 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2233
7b8518cb 2234 zfs_release_sa_handle(hdl, db, FTAG);
572e2857
BB
2235 return (error);
2236}
c28b2279 2237
93ce2b4c 2238#if defined(_KERNEL)
c28b2279
BB
2239EXPORT_SYMBOL(zfs_create_fs);
2240EXPORT_SYMBOL(zfs_obj_to_path);
0720116d 2241
02730c33 2242/* CSTYLED */
0720116d
BB
2243module_param(zfs_object_mutex_size, uint, 0644);
2244MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
c28b2279 2245#endif