]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_vnops.c
Fix link count of root inode when snapdir is visible
[mirror_zfs.git] / module / zfs / zfs_vnops.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
9b7b9cd3 21
34dc7c2f 22/*
428870ff 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
a448a255 24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
5475aada 25 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
9b7b9cd3 26 * Copyright 2017 Nexenta Systems, Inc.
34dc7c2f
BB
27 */
28
29/* Portions Copyright 2007 Jeremy Teo */
428870ff 30/* Portions Copyright 2010 Robert Milkowski */
34dc7c2f 31
60101509 32
34dc7c2f
BB
33#include <sys/types.h>
34#include <sys/param.h>
35#include <sys/time.h>
34dc7c2f 36#include <sys/sysmacros.h>
34dc7c2f 37#include <sys/vfs.h>
34dc7c2f
BB
38#include <sys/file.h>
39#include <sys/stat.h>
40#include <sys/kmem.h>
41#include <sys/taskq.h>
42#include <sys/uio.h>
43#include <sys/vmsystm.h>
44#include <sys/atomic.h>
34dc7c2f
BB
45#include <sys/pathname.h>
46#include <sys/cmn_err.h>
47#include <sys/errno.h>
34dc7c2f
BB
48#include <sys/zfs_dir.h>
49#include <sys/zfs_acl.h>
50#include <sys/zfs_ioctl.h>
51#include <sys/fs/zfs.h>
52#include <sys/dmu.h>
428870ff 53#include <sys/dmu_objset.h>
34dc7c2f
BB
54#include <sys/spa.h>
55#include <sys/txg.h>
56#include <sys/dbuf.h>
57#include <sys/zap.h>
428870ff 58#include <sys/sa.h>
34dc7c2f
BB
59#include <sys/policy.h>
60#include <sys/sunddi.h>
b128c09f 61#include <sys/sid.h>
bcf30822 62#include <sys/mode.h>
ebe7e575 63#include <sys/zfs_ctldir.h>
34dc7c2f 64#include <sys/zfs_fuid.h>
428870ff 65#include <sys/zfs_sa.h>
e5c39b95 66#include <sys/zfs_vnops.h>
34dc7c2f 67#include <sys/zfs_rlock.h>
428870ff 68#include <sys/cred.h>
218b8eaf 69#include <sys/zpl.h>
1ce23dca 70#include <sys/zil.h>
9c5167d1 71#include <sys/sa_impl.h>
34dc7c2f
BB
72
73/*
74 * Programming rules.
75 *
76 * Each vnode op performs some logical unit of work. To do this, the ZPL must
77 * properly lock its in-core state, create a DMU transaction, do the work,
78 * record this work in the intent log (ZIL), commit the DMU transaction,
79 * and wait for the intent log to commit if it is a synchronous operation.
80 * Moreover, the vnode ops must work in both normal and log replay context.
81 * The ordering of events is important to avoid deadlocks and references
82 * to freed memory. The example below illustrates the following Big Rules:
83 *
84 * (1) A check must be made in each zfs thread for a mounted file system.
0037b49e
BB
85 * This is done avoiding races using ZFS_ENTER(zfsvfs).
86 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes
34dc7c2f
BB
87 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
88 * can return EIO from the calling function.
89 *
3558fd73 90 * (2) iput() should always be the last thing except for zil_commit()
34dc7c2f
BB
91 * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
92 * First, if it's the last reference, the vnode/znode
93 * can be freed, so the zp may point to freed memory. Second, the last
94 * reference will call zfs_zinactive(), which may induce a lot of work --
95 * pushing cached pages (which acquires range locks) and syncing out
96 * cached atime changes. Third, zfs_zinactive() may require a new tx,
97 * which could deadlock the system if you were already holding one.
0a50679c 98 * If you must call iput() within a tx then use zfs_iput_async().
34dc7c2f
BB
99 *
100 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
101 * as they can span dmu_tx_assign() calls.
102 *
384f8a09
MA
103 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
104 * dmu_tx_assign(). This is critical because we don't want to block
105 * while holding locks.
106 *
107 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This
108 * reduces lock contention and CPU usage when we must wait (note that if
109 * throughput is constrained by the storage, nearly every transaction
110 * must wait).
111 *
112 * Note, in particular, that if a lock is sometimes acquired before
113 * the tx assigns, and sometimes after (e.g. z_lock), then failing
114 * to use a non-blocking assign can deadlock the system. The scenario:
34dc7c2f
BB
115 *
116 * Thread A has grabbed a lock before calling dmu_tx_assign().
117 * Thread B is in an already-assigned tx, and blocks for this lock.
118 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
119 * forever, because the previous txg can't quiesce until B's tx commits.
120 *
0037b49e 121 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
e8b96c60 122 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
0735ecb3 123 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
e8b96c60
MA
124 * to indicate that this operation has already called dmu_tx_wait().
125 * This will ensure that we don't retry forever, waiting a short bit
126 * each time.
34dc7c2f
BB
127 *
128 * (5) If the operation succeeded, generate the intent log entry for it
129 * before dropping locks. This ensures that the ordering of events
130 * in the intent log matches the order in which they actually occurred.
d3cc8b15 131 * During ZIL replay the zfs_log_* functions will update the sequence
fb5f0bc8 132 * number to indicate the zil transaction has replayed.
34dc7c2f
BB
133 *
134 * (6) At the end of each vnode op, the DMU tx must always commit,
135 * regardless of whether there were any errors.
136 *
572e2857 137 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
34dc7c2f
BB
138 * to ensure that synchronous semantics are provided when necessary.
139 *
140 * In general, this is how things should be ordered in each vnode op:
141 *
0037b49e 142 * ZFS_ENTER(zfsvfs); // exit if unmounted
34dc7c2f 143 * top:
3558fd73 144 * zfs_dirent_lock(&dl, ...) // lock directory entry (may igrab())
34dc7c2f
BB
145 * rw_enter(...); // grab any other locks you need
146 * tx = dmu_tx_create(...); // get DMU tx
147 * dmu_tx_hold_*(); // hold each object you might modify
0735ecb3 148 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
34dc7c2f
BB
149 * if (error) {
150 * rw_exit(...); // drop locks
151 * zfs_dirent_unlock(dl); // unlock directory entry
3558fd73 152 * iput(...); // release held vnodes
fb5f0bc8 153 * if (error == ERESTART) {
e8b96c60 154 * waited = B_TRUE;
34dc7c2f
BB
155 * dmu_tx_wait(tx);
156 * dmu_tx_abort(tx);
157 * goto top;
158 * }
159 * dmu_tx_abort(tx); // abort DMU tx
0037b49e 160 * ZFS_EXIT(zfsvfs); // finished in zfs
34dc7c2f
BB
161 * return (error); // really out of space
162 * }
163 * error = do_real_work(); // do whatever this VOP does
164 * if (error == 0)
165 * zfs_log_*(...); // on success, make ZIL entry
166 * dmu_tx_commit(tx); // commit DMU tx -- error or not
167 * rw_exit(...); // drop locks
168 * zfs_dirent_unlock(dl); // unlock directory entry
3558fd73 169 * iput(...); // release held vnodes
572e2857 170 * zil_commit(zilog, foid); // synchronous when necessary
0037b49e 171 * ZFS_EXIT(zfsvfs); // finished in zfs
34dc7c2f
BB
172 * return (error); // done, report error
173 */
174
126400a1
BB
175/*
176 * Virus scanning is unsupported. It would be possible to add a hook
177 * here to performance the required virus scan. This could be done
178 * entirely in the kernel or potentially as an update to invoke a
179 * scanning utility.
180 */
181static int
182zfs_vscan(struct inode *ip, cred_t *cr, int async)
183{
184 return (0);
185}
186
187/* ARGSUSED */
188int
189zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
190{
191 znode_t *zp = ITOZ(ip);
0037b49e 192 zfsvfs_t *zfsvfs = ITOZSB(ip);
126400a1 193
0037b49e 194 ZFS_ENTER(zfsvfs);
126400a1
BB
195 ZFS_VERIFY_ZP(zp);
196
197 /* Honor ZFS_APPENDONLY file attribute */
198 if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
199 ((flag & O_APPEND) == 0)) {
0037b49e 200 ZFS_EXIT(zfsvfs);
2e528b49 201 return (SET_ERROR(EPERM));
126400a1
BB
202 }
203
204 /* Virus scan eligible files on open */
0037b49e 205 if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
126400a1
BB
206 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
207 if (zfs_vscan(ip, cr, 0) != 0) {
0037b49e 208 ZFS_EXIT(zfsvfs);
2e528b49 209 return (SET_ERROR(EACCES));
126400a1
BB
210 }
211 }
212
213 /* Keep a count of the synchronous opens in the znode */
214 if (flag & O_SYNC)
215 atomic_inc_32(&zp->z_sync_cnt);
216
0037b49e 217 ZFS_EXIT(zfsvfs);
126400a1
BB
218 return (0);
219}
126400a1
BB
220
221/* ARGSUSED */
222int
223zfs_close(struct inode *ip, int flag, cred_t *cr)
224{
225 znode_t *zp = ITOZ(ip);
0037b49e 226 zfsvfs_t *zfsvfs = ITOZSB(ip);
126400a1 227
0037b49e 228 ZFS_ENTER(zfsvfs);
126400a1
BB
229 ZFS_VERIFY_ZP(zp);
230
7dc71949 231 /* Decrement the synchronous opens in the znode */
126400a1 232 if (flag & O_SYNC)
7dc71949 233 atomic_dec_32(&zp->z_sync_cnt);
126400a1 234
0037b49e 235 if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
126400a1
BB
236 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
237 VERIFY(zfs_vscan(ip, cr, 1) == 0);
238
0037b49e 239 ZFS_EXIT(zfsvfs);
8780c539 240 return (0);
126400a1 241}
126400a1 242
802e7b5f 243#if defined(SEEK_HOLE) && defined(SEEK_DATA)
cf91b2b6 244/*
802e7b5f
LD
245 * Lseek support for finding holes (cmd == SEEK_HOLE) and
246 * data (cmd == SEEK_DATA). "off" is an in/out parameter.
cf91b2b6
MA
247 */
248static int
802e7b5f 249zfs_holey_common(struct inode *ip, int cmd, loff_t *off)
cf91b2b6 250{
802e7b5f 251 znode_t *zp = ITOZ(ip);
cf91b2b6
MA
252 uint64_t noff = (uint64_t)*off; /* new offset */
253 uint64_t file_sz;
254 int error;
255 boolean_t hole;
256
257 file_sz = zp->z_size;
258 if (noff >= file_sz) {
2e528b49 259 return (SET_ERROR(ENXIO));
cf91b2b6
MA
260 }
261
802e7b5f 262 if (cmd == SEEK_HOLE)
cf91b2b6
MA
263 hole = B_TRUE;
264 else
265 hole = B_FALSE;
266
802e7b5f 267 error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
cf91b2b6 268
d97aa48f 269 if (error == ESRCH)
2e528b49 270 return (SET_ERROR(ENXIO));
d97aa48f 271
6e03ec4f
DB
272 /* file was dirty, so fall back to using generic logic */
273 if (error == EBUSY) {
274 if (hole)
275 *off = file_sz;
276
277 return (0);
278 }
66aca247 279
d97aa48f
MA
280 /*
281 * We could find a hole that begins after the logical end-of-file,
282 * because dmu_offset_next() only works on whole blocks. If the
283 * EOF falls mid-block, then indicate that the "virtual hole"
284 * at the end of the file begins at the logical EOF, rather than
285 * at the end of the last block.
286 */
287 if (noff > file_sz) {
288 ASSERT(hole);
289 noff = file_sz;
cf91b2b6
MA
290 }
291
292 if (noff < *off)
293 return (error);
294 *off = noff;
295 return (error);
296}
802e7b5f
LD
297
298int
299zfs_holey(struct inode *ip, int cmd, loff_t *off)
300{
301 znode_t *zp = ITOZ(ip);
0037b49e 302 zfsvfs_t *zfsvfs = ITOZSB(ip);
802e7b5f
LD
303 int error;
304
0037b49e 305 ZFS_ENTER(zfsvfs);
802e7b5f
LD
306 ZFS_VERIFY_ZP(zp);
307
308 error = zfs_holey_common(ip, cmd, off);
309
0037b49e 310 ZFS_EXIT(zfsvfs);
802e7b5f
LD
311 return (error);
312}
802e7b5f 313#endif /* SEEK_HOLE && SEEK_DATA */
cf91b2b6 314
c0d35759 315#if defined(_KERNEL)
34dc7c2f
BB
316/*
317 * When a file is memory mapped, we must keep the IO data synchronized
318 * between the DMU cache and the memory mapped pages. What this means:
319 *
320 * On Write: If we find a memory mapped page, we write to *both*
321 * the page and the dmu buffer.
34dc7c2f 322 */
d164b209 323static void
c0d35759
BB
324update_pages(struct inode *ip, int64_t start, int len,
325 objset_t *os, uint64_t oid)
34dc7c2f 326{
c0d35759
BB
327 struct address_space *mp = ip->i_mapping;
328 struct page *pp;
329 uint64_t nbytes;
d164b209 330 int64_t off;
c0d35759 331 void *pb;
34dc7c2f 332
8b1899d3
BB
333 off = start & (PAGE_SIZE-1);
334 for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
335 nbytes = MIN(PAGE_SIZE - off, len);
34dc7c2f 336
8b1899d3 337 pp = find_lock_page(mp, start >> PAGE_SHIFT);
c0d35759
BB
338 if (pp) {
339 if (mapping_writably_mapped(mp))
340 flush_dcache_page(pp);
34dc7c2f 341
c0d35759
BB
342 pb = kmap(pp);
343 (void) dmu_read(os, oid, start+off, nbytes, pb+off,
9babb374 344 DMU_READ_PREFETCH);
c0d35759
BB
345 kunmap(pp);
346
347 if (mapping_writably_mapped(mp))
348 flush_dcache_page(pp);
349
350 mark_page_accessed(pp);
351 SetPageUptodate(pp);
352 ClearPageError(pp);
353 unlock_page(pp);
8b1899d3 354 put_page(pp);
34dc7c2f 355 }
c0d35759 356
d164b209 357 len -= nbytes;
34dc7c2f 358 off = 0;
34dc7c2f 359 }
34dc7c2f
BB
360}
361
362/*
363 * When a file is memory mapped, we must keep the IO data synchronized
364 * between the DMU cache and the memory mapped pages. What this means:
365 *
366 * On Read: We "read" preferentially from memory mapped pages,
367 * else we default from the dmu buffer.
368 *
369 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
d3cc8b15 370 * the file is memory mapped.
34dc7c2f
BB
371 */
372static int
3558fd73 373mappedread(struct inode *ip, int nbytes, uio_t *uio)
34dc7c2f 374{
c0d35759
BB
375 struct address_space *mp = ip->i_mapping;
376 struct page *pp;
3558fd73 377 znode_t *zp = ITOZ(ip);
34dc7c2f 378 int64_t start, off;
c0d35759 379 uint64_t bytes;
34dc7c2f
BB
380 int len = nbytes;
381 int error = 0;
c0d35759 382 void *pb;
34dc7c2f
BB
383
384 start = uio->uio_loffset;
8b1899d3
BB
385 off = start & (PAGE_SIZE-1);
386 for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
387 bytes = MIN(PAGE_SIZE - off, len);
c0d35759 388
8b1899d3 389 pp = find_lock_page(mp, start >> PAGE_SHIFT);
c0d35759
BB
390 if (pp) {
391 ASSERT(PageUptodate(pp));
b2ab468d 392 unlock_page(pp);
c0d35759
BB
393
394 pb = kmap(pp);
395 error = uiomove(pb + off, bytes, UIO_READ, uio);
396 kunmap(pp);
397
398 if (mapping_writably_mapped(mp))
399 flush_dcache_page(pp);
400
401 mark_page_accessed(pp);
8b1899d3 402 put_page(pp);
34dc7c2f 403 } else {
804e0504
MA
404 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
405 uio, bytes);
34dc7c2f 406 }
c0d35759 407
34dc7c2f
BB
408 len -= bytes;
409 off = 0;
410 if (error)
411 break;
412 }
413 return (error);
414}
c0d35759 415#endif /* _KERNEL */
34dc7c2f 416
c409e464 417unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
a966c564 418unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
34dc7c2f
BB
419
420/*
421 * Read bytes from specified file into supplied buffer.
422 *
3558fd73 423 * IN: ip - inode of file to be read from.
34dc7c2f
BB
424 * uio - structure supplying read location, range info,
425 * and return buffer.
c0d35759
BB
426 * ioflag - FSYNC flags; used to provide FRSYNC semantics.
427 * O_DIRECT flag; used to bypass page cache.
34dc7c2f 428 * cr - credentials of caller.
34dc7c2f
BB
429 *
430 * OUT: uio - updated offset and range, buffer filled.
431 *
d3cc8b15 432 * RETURN: 0 on success, error code on failure.
34dc7c2f
BB
433 *
434 * Side Effects:
3558fd73 435 * inode - atime updated if byte count > 0
34dc7c2f
BB
436 */
437/* ARGSUSED */
e5c39b95 438int
3558fd73 439zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
34dc7c2f 440{
a448a255 441 int error = 0;
2efea7c8 442 boolean_t frsync = B_FALSE;
34dc7c2f 443
a448a255
SD
444 znode_t *zp = ITOZ(ip);
445 zfsvfs_t *zfsvfs = ITOZSB(ip);
0037b49e 446 ZFS_ENTER(zfsvfs);
34dc7c2f 447 ZFS_VERIFY_ZP(zp);
34dc7c2f 448
428870ff 449 if (zp->z_pflags & ZFS_AV_QUARANTINED) {
0037b49e 450 ZFS_EXIT(zfsvfs);
2e528b49 451 return (SET_ERROR(EACCES));
34dc7c2f
BB
452 }
453
454 /*
455 * Validate file offset
456 */
457 if (uio->uio_loffset < (offset_t)0) {
0037b49e 458 ZFS_EXIT(zfsvfs);
2e528b49 459 return (SET_ERROR(EINVAL));
34dc7c2f
BB
460 }
461
462 /*
463 * Fasttrack empty reads
464 */
465 if (uio->uio_resid == 0) {
0037b49e 466 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
467 return (0);
468 }
469
2efea7c8 470#ifdef FRSYNC
34dc7c2f
BB
471 /*
472 * If we're in FRSYNC mode, sync out this znode before reading it.
37699482 473 * Only do this for non-snapshots.
2efea7c8
GW
474 *
475 * Some platforms do not support FRSYNC and instead map it
476 * to FSYNC, which results in unnecessary calls to zil_commit. We
477 * only honor FRSYNC requests on platforms which support it.
34dc7c2f 478 */
2efea7c8
GW
479 frsync = !!(ioflag & FRSYNC);
480#endif
37699482 481 if (zfsvfs->z_log &&
2efea7c8 482 (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
0037b49e 483 zil_commit(zfsvfs->z_log, zp->z_id);
34dc7c2f
BB
484
485 /*
486 * Lock the range against changes.
487 */
5d43cc9a 488 locked_range_t *lr = rangelock_enter(&zp->z_rangelock,
a448a255 489 uio->uio_loffset, uio->uio_resid, RL_READER);
34dc7c2f
BB
490
491 /*
492 * If we are reading past end-of-file we can skip
493 * to the end; but we might still need to set atime.
494 */
428870ff 495 if (uio->uio_loffset >= zp->z_size) {
34dc7c2f
BB
496 error = 0;
497 goto out;
498 }
499
428870ff 500 ASSERT(uio->uio_loffset < zp->z_size);
a448a255
SD
501 ssize_t n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
502 ssize_t start_resid = n;
428870ff 503
3558fd73 504#ifdef HAVE_UIO_ZEROCOPY
a448a255 505 xuio_t *xuio = NULL;
428870ff
BB
506 if ((uio->uio_extflg == UIO_XUIO) &&
507 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
508 int nblk;
509 int blksz = zp->z_blksz;
510 uint64_t offset = uio->uio_loffset;
511
512 xuio = (xuio_t *)uio;
513 if ((ISP2(blksz))) {
514 nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
515 blksz)) / blksz;
516 } else {
517 ASSERT(offset + n <= blksz);
518 nblk = 1;
519 }
520 (void) dmu_xuio_init(xuio, nblk);
521
3558fd73 522 if (vn_has_cached_data(ip)) {
428870ff
BB
523 /*
524 * For simplicity, we always allocate a full buffer
525 * even if we only expect to read a portion of a block.
526 */
527 while (--nblk >= 0) {
528 (void) dmu_xuio_add(xuio,
529 dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
530 blksz), 0, blksz);
531 }
532 }
533 }
3558fd73 534#endif /* HAVE_UIO_ZEROCOPY */
34dc7c2f
BB
535
536 while (n > 0) {
a448a255 537 ssize_t nbytes = MIN(n, zfs_read_chunk_size -
34dc7c2f
BB
538 P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
539
804e0504 540 if (zp->z_is_mapped && !(ioflag & O_DIRECT)) {
3558fd73 541 error = mappedread(ip, nbytes, uio);
804e0504
MA
542 } else {
543 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
544 uio, nbytes);
545 }
c0d35759 546
b128c09f
BB
547 if (error) {
548 /* convert checksum errors into IO errors */
549 if (error == ECKSUM)
2e528b49 550 error = SET_ERROR(EIO);
34dc7c2f 551 break;
b128c09f 552 }
34dc7c2f
BB
553
554 n -= nbytes;
555 }
a448a255
SD
556
557 int64_t nread = start_resid - n;
558 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread);
559 task_io_account_read(nread);
34dc7c2f 560out:
5d43cc9a 561 rangelock_exit(lr);
34dc7c2f 562
0037b49e 563 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
564 return (error);
565}
566
34dc7c2f
BB
567/*
568 * Write the bytes to a file.
569 *
3558fd73 570 * IN: ip - inode of file to be written to.
34dc7c2f
BB
571 * uio - structure supplying write location, range info,
572 * and data buffer.
573 * ioflag - FAPPEND flag set if in append mode.
c0d35759 574 * O_DIRECT flag; used to bypass page cache.
34dc7c2f 575 * cr - credentials of caller.
34dc7c2f
BB
576 *
577 * OUT: uio - updated offset and range.
578 *
579 * RETURN: 0 if success
580 * error code if failure
581 *
582 * Timestamps:
3558fd73 583 * ip - ctime|mtime updated if byte count > 0
34dc7c2f 584 */
428870ff 585
34dc7c2f 586/* ARGSUSED */
e5c39b95 587int
3558fd73 588zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
34dc7c2f 589{
a448a255
SD
590 int error = 0;
591 ssize_t start_resid = uio->uio_resid;
34dc7c2f 592
34dc7c2f
BB
593 /*
594 * Fasttrack empty write
595 */
a448a255 596 ssize_t n = start_resid;
34dc7c2f
BB
597 if (n == 0)
598 return (0);
599
a448a255 600 rlim64_t limit = uio->uio_limit;
34dc7c2f
BB
601 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
602 limit = MAXOFFSET_T;
603
a448a255
SD
604 znode_t *zp = ITOZ(ip);
605 zfsvfs_t *zfsvfs = ZTOZSB(zp);
0037b49e 606 ZFS_ENTER(zfsvfs);
34dc7c2f 607 ZFS_VERIFY_ZP(zp);
b128c09f 608
a448a255
SD
609 sa_bulk_attr_t bulk[4];
610 int count = 0;
611 uint64_t mtime[2], ctime[2];
0037b49e
BB
612 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
613 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
614 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
615 &zp->z_size, 8);
616 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
428870ff
BB
617 &zp->z_pflags, 8);
618
f3c9dca0
MT
619 /*
620 * Callers might not be able to detect properly that we are read-only,
621 * so check it explicitly here.
622 */
0037b49e
BB
623 if (zfs_is_readonly(zfsvfs)) {
624 ZFS_EXIT(zfsvfs);
f3c9dca0
MT
625 return (SET_ERROR(EROFS));
626 }
627
b128c09f
BB
628 /*
629 * If immutable or not appending then return EPERM
630 */
428870ff
BB
631 if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
632 ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
633 (uio->uio_loffset < zp->z_size))) {
0037b49e 634 ZFS_EXIT(zfsvfs);
2e528b49 635 return (SET_ERROR(EPERM));
b128c09f
BB
636 }
637
428870ff
BB
638 /*
639 * Validate file offset
640 */
a448a255 641 offset_t woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
428870ff 642 if (woff < 0) {
0037b49e 643 ZFS_EXIT(zfsvfs);
2e528b49 644 return (SET_ERROR(EINVAL));
428870ff
BB
645 }
646
a448a255
SD
647 int max_blksz = zfsvfs->z_max_blksz;
648 xuio_t *xuio = NULL;
649
34dc7c2f
BB
650 /*
651 * Pre-fault the pages to ensure slow (eg NFS) pages
652 * don't hold up txg.
428870ff 653 * Skip this if uio contains loaned arc_buf.
34dc7c2f 654 */
9cac042c 655#ifdef HAVE_UIO_ZEROCOPY
428870ff
BB
656 if ((uio->uio_extflg == UIO_XUIO) &&
657 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
658 xuio = (xuio_t *)uio;
659 else
9cac042c 660#endif
779a6c0b 661 if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
662 ZFS_EXIT(zfsvfs);
663 return (SET_ERROR(EFAULT));
664 }
34dc7c2f
BB
665
666 /*
667 * If in append mode, set the io offset pointer to eof.
668 */
5d43cc9a 669 locked_range_t *lr;
34dc7c2f
BB
670 if (ioflag & FAPPEND) {
671 /*
428870ff
BB
672 * Obtain an appending range lock to guarantee file append
673 * semantics. We reset the write offset once we have the lock.
34dc7c2f 674 */
5d43cc9a
MA
675 lr = rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
676 woff = lr->lr_offset;
677 if (lr->lr_length == UINT64_MAX) {
428870ff
BB
678 /*
679 * We overlocked the file because this write will cause
680 * the file block size to increase.
681 * Note that zp_size cannot change with this lock held.
682 */
683 woff = zp->z_size;
34dc7c2f 684 }
428870ff 685 uio->uio_loffset = woff;
34dc7c2f 686 } else {
34dc7c2f 687 /*
428870ff
BB
688 * Note that if the file block size will change as a result of
689 * this write, then this range lock will lock the entire file
690 * so that we can re-write the block safely.
34dc7c2f 691 */
5d43cc9a 692 lr = rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
34dc7c2f
BB
693 }
694
695 if (woff >= limit) {
5d43cc9a 696 rangelock_exit(lr);
0037b49e 697 ZFS_EXIT(zfsvfs);
2e528b49 698 return (SET_ERROR(EFBIG));
34dc7c2f
BB
699 }
700
701 if ((woff + n) > limit || woff > (limit - n))
702 n = limit - woff;
703
428870ff 704 /* Will this write extend the file length? */
a448a255
SD
705 int write_eof = (woff + n > zp->z_size);
706
707 uint64_t end_size = MAX(zp->z_size, woff + n);
708 zilog_t *zilog = zfsvfs->z_log;
709#ifdef HAVE_UIO_ZEROCOPY
710 int i_iov = 0;
711 const iovec_t *iovp = uio->uio_iov;
712 ASSERTV(int iovcnt = uio->uio_iovcnt);
713#endif
428870ff 714
34dc7c2f
BB
715
716 /*
717 * Write the file in reasonable size chunks. Each chunk is written
718 * in a separate transaction; this keeps the intent log records small
719 * and allows us to do more fine-grained space accounting.
720 */
721 while (n > 0) {
9babb374 722 woff = uio->uio_loffset;
a448a255 723
9c5167d1
NF
724 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
725 KUID_TO_SUID(ip->i_uid)) ||
726 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
727 KGID_TO_SGID(ip->i_gid)) ||
728 (zp->z_projid != ZFS_DEFAULT_PROJID &&
729 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
730 zp->z_projid))) {
2e528b49 731 error = SET_ERROR(EDQUOT);
9babb374
BB
732 break;
733 }
734
a448a255
SD
735 arc_buf_t *abuf = NULL;
736 const iovec_t *aiov = NULL;
737 if (xuio) {
5a6765cf 738#ifdef HAVE_UIO_ZEROCOPY
428870ff 739 ASSERT(i_iov < iovcnt);
5475aada 740 ASSERT3U(uio->uio_segflg, !=, UIO_BVEC);
428870ff
BB
741 aiov = &iovp[i_iov];
742 abuf = dmu_xuio_arcbuf(xuio, i_iov);
743 dmu_xuio_clear(xuio, i_iov);
428870ff
BB
744 ASSERT((aiov->iov_base == abuf->b_data) ||
745 ((char *)aiov->iov_base - (char *)abuf->b_data +
746 aiov->iov_len == arc_buf_size(abuf)));
747 i_iov++;
5a6765cf 748#endif
a448a255 749 } else if (n >= max_blksz && woff >= zp->z_size &&
9babb374
BB
750 P2PHASE(woff, max_blksz) == 0 &&
751 zp->z_blksz == max_blksz) {
428870ff
BB
752 /*
753 * This write covers a full block. "Borrow" a buffer
754 * from the dmu so that we can fill it before we enter
755 * a transaction. This avoids the possibility of
756 * holding up the transaction if the data copy hangs
757 * up on a pagefault (e.g., from an NFS server mapping).
758 */
9babb374
BB
759 size_t cbytes;
760
428870ff
BB
761 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
762 max_blksz);
9babb374
BB
763 ASSERT(abuf != NULL);
764 ASSERT(arc_buf_size(abuf) == max_blksz);
149e873a
BB
765 if ((error = uiocopy(abuf->b_data, max_blksz,
766 UIO_WRITE, uio, &cbytes))) {
9babb374
BB
767 dmu_return_arcbuf(abuf);
768 break;
769 }
770 ASSERT(cbytes == max_blksz);
771 }
772
34dc7c2f
BB
773 /*
774 * Start a transaction.
775 */
a448a255 776 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
428870ff 777 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
34dc7c2f 778 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
428870ff 779 zfs_sa_upgrade_txholds(tx, zp);
384f8a09 780 error = dmu_tx_assign(tx, TXG_WAIT);
34dc7c2f 781 if (error) {
34dc7c2f 782 dmu_tx_abort(tx);
9babb374
BB
783 if (abuf != NULL)
784 dmu_return_arcbuf(abuf);
34dc7c2f
BB
785 break;
786 }
787
788 /*
5d43cc9a 789 * If rangelock_enter() over-locked we grow the blocksize
34dc7c2f 790 * and then reduce the lock range. This will only happen
5d43cc9a
MA
791 * on the first iteration since rangelock_reduce() will
792 * shrink down lr_length to the appropriate size.
34dc7c2f 793 */
5d43cc9a 794 if (lr->lr_length == UINT64_MAX) {
34dc7c2f
BB
795 uint64_t new_blksz;
796
797 if (zp->z_blksz > max_blksz) {
f1512ee6
MA
798 /*
799 * File's blocksize is already larger than the
800 * "recordsize" property. Only let it grow to
801 * the next power of 2.
802 */
34dc7c2f 803 ASSERT(!ISP2(zp->z_blksz));
f1512ee6
MA
804 new_blksz = MIN(end_size,
805 1 << highbit64(zp->z_blksz));
34dc7c2f
BB
806 } else {
807 new_blksz = MIN(end_size, max_blksz);
808 }
809 zfs_grow_blocksize(zp, new_blksz, tx);
5d43cc9a 810 rangelock_reduce(lr, woff, n);
34dc7c2f
BB
811 }
812
813 /*
814 * XXX - should we really limit each write to z_max_blksz?
815 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
816 */
a448a255 817 ssize_t nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
34dc7c2f 818
a448a255 819 ssize_t tx_bytes;
9babb374
BB
820 if (abuf == NULL) {
821 tx_bytes = uio->uio_resid;
779a6c0b 822 uio->uio_fault_disable = B_TRUE;
428870ff
BB
823 error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
824 uio, nbytes, tx);
515ddf65 825 uio->uio_fault_disable = B_FALSE;
779a6c0b 826 if (error == EFAULT) {
827 dmu_tx_commit(tx);
828 if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
829 break;
830 }
831 continue;
832 } else if (error != 0) {
833 dmu_tx_commit(tx);
834 break;
835 }
9babb374
BB
836 tx_bytes -= uio->uio_resid;
837 } else {
838 tx_bytes = nbytes;
428870ff
BB
839 ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
840 /*
841 * If this is not a full block write, but we are
842 * extending the file past EOF and this data starts
843 * block-aligned, use assign_arcbuf(). Otherwise,
844 * write via dmu_write().
845 */
846 if (tx_bytes < max_blksz && (!write_eof ||
847 aiov->iov_base != abuf->b_data)) {
848 ASSERT(xuio);
0037b49e 849 dmu_write(zfsvfs->z_os, zp->z_id, woff,
94183a9d 850 /* cppcheck-suppress nullPointer */
428870ff
BB
851 aiov->iov_len, aiov->iov_base, tx);
852 dmu_return_arcbuf(abuf);
853 xuio_stat_wbuf_copied();
854 } else {
855 ASSERT(xuio || tx_bytes == max_blksz);
305781da 856 error = dmu_assign_arcbuf_by_dbuf(
440a3eb9 857 sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
305781da
TC
858 if (error != 0) {
859 dmu_return_arcbuf(abuf);
860 dmu_tx_commit(tx);
861 break;
862 }
428870ff 863 }
9babb374
BB
864 ASSERT(tx_bytes <= uio->uio_resid);
865 uioskip(uio, tx_bytes);
866 }
0037b49e
BB
867 if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) {
868 update_pages(ip, woff,
869 tx_bytes, zfsvfs->z_os, zp->z_id);
870 }
34dc7c2f
BB
871
872 /*
873 * If we made no progress, we're done. If we made even
874 * partial progress, update the znode and ZIL accordingly.
875 */
876 if (tx_bytes == 0) {
0037b49e 877 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
428870ff 878 (void *)&zp->z_size, sizeof (uint64_t), tx);
34dc7c2f
BB
879 dmu_tx_commit(tx);
880 ASSERT(error != 0);
881 break;
882 }
883
884 /*
885 * Clear Set-UID/Set-GID bits on successful write if not
4e33ba4c 886 * privileged and at least one of the execute bits is set.
34dc7c2f
BB
887 *
888 * It would be nice to to this after all writes have
889 * been done, but that would still expose the ISUID/ISGID
890 * to another app after the partial write is committed.
891 *
572e2857
BB
892 * Note: we don't call zfs_fuid_map_id() here because
893 * user 0 is not an ephemeral uid.
34dc7c2f
BB
894 */
895 mutex_enter(&zp->z_acl_lock);
a448a255 896 uint32_t uid = KUID_TO_SUID(ip->i_uid);
428870ff 897 if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
34dc7c2f 898 (S_IXUSR >> 6))) != 0 &&
428870ff 899 (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
34dc7c2f 900 secpolicy_vnode_setid_retain(cr,
2c6abf15 901 ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
428870ff
BB
902 uint64_t newmode;
903 zp->z_mode &= ~(S_ISUID | S_ISGID);
12fa7f34 904 ip->i_mode = newmode = zp->z_mode;
0037b49e 905 (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
428870ff 906 (void *)&newmode, sizeof (uint64_t), tx);
34dc7c2f
BB
907 }
908 mutex_exit(&zp->z_acl_lock);
909
0df9673f 910 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
34dc7c2f
BB
911
912 /*
913 * Update the file size (zp_size) if it has changed;
914 * account for possible concurrent updates.
915 */
428870ff
BB
916 while ((end_size = zp->z_size) < uio->uio_loffset) {
917 (void) atomic_cas_64(&zp->z_size, end_size,
34dc7c2f 918 uio->uio_loffset);
428870ff
BB
919 ASSERT(error == 0);
920 }
572e2857
BB
921 /*
922 * If we are replaying and eof is non zero then force
923 * the file size to the specified eof. Note, there's no
924 * concurrency during replay.
925 */
0037b49e
BB
926 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
927 zp->z_size = zfsvfs->z_replay_eof;
572e2857 928
428870ff
BB
929 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
930
119a394a
ED
931 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
932 NULL, NULL);
34dc7c2f
BB
933 dmu_tx_commit(tx);
934
935 if (error != 0)
936 break;
937 ASSERT(tx_bytes == nbytes);
938 n -= nbytes;
572e2857 939
779a6c0b 940 if (!xuio && n > 0) {
941 if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
942 error = EFAULT;
943 break;
944 }
945 }
34dc7c2f
BB
946 }
947
2a53e2da 948 zfs_inode_update(zp);
5d43cc9a 949 rangelock_exit(lr);
34dc7c2f
BB
950
951 /*
952 * If we're in replay mode, or we made no progress, return error.
953 * Otherwise, it's at least a partial write, so it's successful.
954 */
0037b49e
BB
955 if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
956 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
957 return (error);
958 }
959
428870ff 960 if (ioflag & (FSYNC | FDSYNC) ||
0037b49e 961 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 962 zil_commit(zilog, zp->z_id);
34dc7c2f 963
a448a255
SD
964 int64_t nwritten = start_resid - uio->uio_resid;
965 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
966 task_io_account_write(nwritten);
967
0037b49e 968 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
969 return (0);
970}
971
7c502b0b 972/*
973 * Drop a reference on the passed inode asynchronously. This ensures
974 * that the caller will never drop the last reference on an inode in
975 * the current context. Doing so while holding open a tx could result
976 * in a deadlock if iput_final() re-enters the filesystem code.
977 */
0a50679c
BB
978void
979zfs_iput_async(struct inode *ip)
3558fd73 980{
0a50679c
BB
981 objset_t *os = ITOZSB(ip)->z_os;
982
3558fd73 983 ASSERT(atomic_read(&ip->i_count) > 0);
0a50679c
BB
984 ASSERT(os != NULL);
985
3558fd73 986 if (atomic_read(&ip->i_count) == 1)
7c502b0b 987 VERIFY(taskq_dispatch(dsl_pool_iput_taskq(dmu_objset_pool(os)),
48d3eb40 988 (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
3558fd73
BB
989 else
990 iput(ip);
991}
992
900d09b2 993/* ARGSUSED */
34dc7c2f 994void
428870ff 995zfs_get_done(zgd_t *zgd, int error)
34dc7c2f 996{
428870ff 997 znode_t *zp = zgd->zgd_private;
428870ff
BB
998
999 if (zgd->zgd_db)
1000 dmu_buf_rele(zgd->zgd_db, zgd);
1001
5d43cc9a 1002 rangelock_exit(zgd->zgd_lr);
34dc7c2f 1003
9babb374
BB
1004 /*
1005 * Release the vnode asynchronously as we currently have the
1006 * txg stopped from syncing.
1007 */
0a50679c 1008 zfs_iput_async(ZTOI(zp));
428870ff 1009
34dc7c2f
BB
1010 kmem_free(zgd, sizeof (zgd_t));
1011}
1012
45d1cae3
BB
1013#ifdef DEBUG
1014static int zil_fault_io = 0;
1015#endif
1016
34dc7c2f
BB
1017/*
1018 * Get data to generate a TX_WRITE intent log record.
1019 */
1020int
1ce23dca 1021zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
34dc7c2f 1022{
0037b49e
BB
1023 zfsvfs_t *zfsvfs = arg;
1024 objset_t *os = zfsvfs->z_os;
34dc7c2f 1025 znode_t *zp;
428870ff
BB
1026 uint64_t object = lr->lr_foid;
1027 uint64_t offset = lr->lr_offset;
1028 uint64_t size = lr->lr_length;
34dc7c2f 1029 dmu_buf_t *db;
34dc7c2f 1030 zgd_t *zgd;
34dc7c2f
BB
1031 int error = 0;
1032
1ce23dca
PS
1033 ASSERT3P(lwb, !=, NULL);
1034 ASSERT3P(zio, !=, NULL);
1035 ASSERT3U(size, !=, 0);
34dc7c2f
BB
1036
1037 /*
1038 * Nothing to do if the file has been removed
1039 */
0037b49e 1040 if (zfs_zget(zfsvfs, object, &zp) != 0)
2e528b49 1041 return (SET_ERROR(ENOENT));
34dc7c2f 1042 if (zp->z_unlinked) {
9babb374
BB
1043 /*
1044 * Release the vnode asynchronously as we currently have the
1045 * txg stopped from syncing.
1046 */
0a50679c 1047 zfs_iput_async(ZTOI(zp));
2e528b49 1048 return (SET_ERROR(ENOENT));
34dc7c2f
BB
1049 }
1050
79c76d5b 1051 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
1ce23dca 1052 zgd->zgd_lwb = lwb;
428870ff
BB
1053 zgd->zgd_private = zp;
1054
34dc7c2f
BB
1055 /*
1056 * Write records come in two flavors: immediate and indirect.
1057 * For small writes it's cheaper to store the data with the
1058 * log record (immediate); for large writes it's cheaper to
1059 * sync the data and get a pointer to it (indirect) so that
1060 * we don't have to write the data twice.
1061 */
1062 if (buf != NULL) { /* immediate write */
5d43cc9a
MA
1063 zgd->zgd_lr = rangelock_enter(&zp->z_rangelock,
1064 offset, size, RL_READER);
34dc7c2f 1065 /* test for truncation needs to be done while range locked */
428870ff 1066 if (offset >= zp->z_size) {
2e528b49 1067 error = SET_ERROR(ENOENT);
428870ff
BB
1068 } else {
1069 error = dmu_read(os, object, offset, size, buf,
1070 DMU_READ_NO_PREFETCH);
34dc7c2f 1071 }
428870ff 1072 ASSERT(error == 0 || error == ENOENT);
34dc7c2f 1073 } else { /* indirect write */
34dc7c2f
BB
1074 /*
1075 * Have to lock the whole block to ensure when it's
f763c3d1 1076 * written out and its checksum is being calculated
34dc7c2f
BB
1077 * that no one can change the data. We need to re-check
1078 * blocksize after we get the lock in case it's changed!
1079 */
1080 for (;;) {
428870ff
BB
1081 uint64_t blkoff;
1082 size = zp->z_blksz;
1083 blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
1084 offset -= blkoff;
5d43cc9a
MA
1085 zgd->zgd_lr = rangelock_enter(&zp->z_rangelock,
1086 offset, size, RL_READER);
428870ff 1087 if (zp->z_blksz == size)
34dc7c2f 1088 break;
428870ff 1089 offset += blkoff;
5d43cc9a 1090 rangelock_exit(zgd->zgd_lr);
34dc7c2f
BB
1091 }
1092 /* test for truncation needs to be done while range locked */
428870ff 1093 if (lr->lr_offset >= zp->z_size)
2e528b49 1094 error = SET_ERROR(ENOENT);
45d1cae3
BB
1095#ifdef DEBUG
1096 if (zil_fault_io) {
2e528b49 1097 error = SET_ERROR(EIO);
45d1cae3 1098 zil_fault_io = 0;
45d1cae3 1099 }
45d1cae3 1100#endif
34dc7c2f 1101 if (error == 0)
428870ff
BB
1102 error = dmu_buf_hold(os, object, offset, zgd, &db,
1103 DMU_READ_NO_PREFETCH);
1104
1105 if (error == 0) {
02dc43bc 1106 blkptr_t *bp = &lr->lr_blkptr;
03c6040b 1107
428870ff
BB
1108 zgd->zgd_db = db;
1109 zgd->zgd_bp = bp;
1110
1111 ASSERT(db->db_offset == offset);
1112 ASSERT(db->db_size == size);
1113
1114 error = dmu_sync(zio, lr->lr_common.lrc_txg,
1115 zfs_get_done, zgd);
61ca48ff 1116 ASSERT(error || lr->lr_length <= size);
428870ff
BB
1117
1118 /*
1119 * On success, we need to wait for the write I/O
1120 * initiated by dmu_sync() to complete before we can
1121 * release this dbuf. We will finish everything up
1122 * in the zfs_get_done() callback.
1123 */
1124 if (error == 0)
1125 return (0);
1126
1127 if (error == EALREADY) {
1128 lr->lr_common.lrc_txtype = TX_WRITE2;
a1d477c2
MA
1129 /*
1130 * TX_WRITE2 relies on the data previously
1131 * written by the TX_WRITE that caused
1132 * EALREADY. We zero out the BP because
900d09b2 1133 * it is the old, currently-on-disk BP.
a1d477c2
MA
1134 */
1135 zgd->zgd_bp = NULL;
1136 BP_ZERO(bp);
428870ff
BB
1137 error = 0;
1138 }
1139 }
34dc7c2f 1140 }
428870ff
BB
1141
1142 zfs_get_done(zgd, error);
1143
34dc7c2f
BB
1144 return (error);
1145}
1146
1147/*ARGSUSED*/
3558fd73
BB
1148int
1149zfs_access(struct inode *ip, int mode, int flag, cred_t *cr)
34dc7c2f 1150{
3558fd73 1151 znode_t *zp = ITOZ(ip);
0037b49e 1152 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f
BB
1153 int error;
1154
0037b49e 1155 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
1156 ZFS_VERIFY_ZP(zp);
1157
1158 if (flag & V_ACE_MASK)
1159 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
1160 else
1161 error = zfs_zaccess_rwx(zp, mode, flag, cr);
1162
0037b49e 1163 ZFS_EXIT(zfsvfs);
45d1cae3
BB
1164 return (error);
1165}
45d1cae3 1166
34dc7c2f
BB
1167/*
1168 * Lookup an entry in a directory, or an extended attribute directory.
3558fd73 1169 * If it exists, return a held inode reference for it.
34dc7c2f 1170 *
3558fd73 1171 * IN: dip - inode of directory to search.
34dc7c2f 1172 * nm - name of entry to lookup.
34dc7c2f 1173 * flags - LOOKUP_XATTR set if looking for an attribute.
34dc7c2f 1174 * cr - credentials of caller.
34dc7c2f
BB
1175 * direntflags - directory lookup flags
1176 * realpnp - returned pathname.
1177 *
3558fd73 1178 * OUT: ipp - inode of located entry, NULL if not found.
34dc7c2f 1179 *
d3cc8b15 1180 * RETURN: 0 on success, error code on failure.
34dc7c2f
BB
1181 *
1182 * Timestamps:
1183 * NA
1184 */
1185/* ARGSUSED */
e5c39b95 1186int
3558fd73
BB
1187zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
1188 cred_t *cr, int *direntflags, pathname_t *realpnp)
34dc7c2f 1189{
3558fd73 1190 znode_t *zdp = ITOZ(dip);
0037b49e 1191 zfsvfs_t *zfsvfs = ITOZSB(dip);
3558fd73 1192 int error = 0;
45d1cae3 1193
9b7b9cd3
GM
1194 /*
1195 * Fast path lookup, however we must skip DNLC lookup
1196 * for case folding or normalizing lookups because the
1197 * DNLC code only stores the passed in name. This means
1198 * creating 'a' and removing 'A' on a case insensitive
1199 * file system would work, but DNLC still thinks 'a'
1200 * exists and won't let you create it again on the next
1201 * pass through fast path.
1202 */
45d1cae3
BB
1203 if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
1204
3558fd73 1205 if (!S_ISDIR(dip->i_mode)) {
2e528b49 1206 return (SET_ERROR(ENOTDIR));
428870ff 1207 } else if (zdp->z_sa_hdl == NULL) {
2e528b49 1208 return (SET_ERROR(EIO));
45d1cae3
BB
1209 }
1210
1211 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
1212 error = zfs_fastaccesschk_execute(zdp, cr);
1213 if (!error) {
3558fd73
BB
1214 *ipp = dip;
1215 igrab(*ipp);
45d1cae3
BB
1216 return (0);
1217 }
1218 return (error);
45d1cae3
BB
1219 }
1220 }
1221
0037b49e 1222 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
1223 ZFS_VERIFY_ZP(zdp);
1224
3558fd73 1225 *ipp = NULL;
34dc7c2f
BB
1226
1227 if (flags & LOOKUP_XATTR) {
34dc7c2f
BB
1228 /*
1229 * We don't allow recursive attributes..
1230 * Maybe someday we will.
1231 */
428870ff 1232 if (zdp->z_pflags & ZFS_XATTR) {
0037b49e 1233 ZFS_EXIT(zfsvfs);
2e528b49 1234 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1235 }
1236
3558fd73 1237 if ((error = zfs_get_xattrdir(zdp, ipp, cr, flags))) {
0037b49e 1238 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1239 return (error);
1240 }
1241
1242 /*
1243 * Do we have permission to get into attribute directory?
1244 */
1245
3558fd73 1246 if ((error = zfs_zaccess(ITOZ(*ipp), ACE_EXECUTE, 0,
149e873a 1247 B_FALSE, cr))) {
3558fd73
BB
1248 iput(*ipp);
1249 *ipp = NULL;
34dc7c2f
BB
1250 }
1251
0037b49e 1252 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1253 return (error);
1254 }
1255
3558fd73 1256 if (!S_ISDIR(dip->i_mode)) {
0037b49e 1257 ZFS_EXIT(zfsvfs);
2e528b49 1258 return (SET_ERROR(ENOTDIR));
34dc7c2f
BB
1259 }
1260
1261 /*
1262 * Check accessibility of directory.
1263 */
1264
149e873a 1265 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
0037b49e 1266 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1267 return (error);
1268 }
1269
0037b49e 1270 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
34dc7c2f 1271 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
0037b49e 1272 ZFS_EXIT(zfsvfs);
2e528b49 1273 return (SET_ERROR(EILSEQ));
34dc7c2f
BB
1274 }
1275
3558fd73
BB
1276 error = zfs_dirlook(zdp, nm, ipp, flags, direntflags, realpnp);
1277 if ((error == 0) && (*ipp))
1278 zfs_inode_update(ITOZ(*ipp));
34dc7c2f 1279
0037b49e 1280 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1281 return (error);
1282}
1283
1284/*
1285 * Attempt to create a new entry in a directory. If the entry
1286 * already exists, truncate the file if permissible, else return
3558fd73 1287 * an error. Return the ip of the created or trunc'd file.
34dc7c2f 1288 *
3558fd73 1289 * IN: dip - inode of directory to put new file entry in.
34dc7c2f
BB
1290 * name - name of new file entry.
1291 * vap - attributes of new file.
1292 * excl - flag indicating exclusive or non-exclusive mode.
1293 * mode - mode to open file with.
1294 * cr - credentials of caller.
a35c1207 1295 * flag - file flag.
3558fd73 1296 * vsecp - ACL to be set
34dc7c2f 1297 *
3558fd73 1298 * OUT: ipp - inode of created or trunc'd entry.
34dc7c2f 1299 *
d3cc8b15 1300 * RETURN: 0 on success, error code on failure.
34dc7c2f
BB
1301 *
1302 * Timestamps:
3558fd73
BB
1303 * dip - ctime|mtime updated if new entry created
1304 * ip - ctime|mtime always, atime if new
34dc7c2f
BB
1305 */
1306
1307/* ARGSUSED */
e5c39b95 1308int
3558fd73
BB
1309zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
1310 int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
34dc7c2f 1311{
3558fd73 1312 znode_t *zp, *dzp = ITOZ(dip);
0037b49e 1313 zfsvfs_t *zfsvfs = ITOZSB(dip);
34dc7c2f
BB
1314 zilog_t *zilog;
1315 objset_t *os;
1316 zfs_dirlock_t *dl;
1317 dmu_tx_t *tx;
1318 int error;
b128c09f 1319 uid_t uid;
149e873a 1320 gid_t gid;
428870ff 1321 zfs_acl_ids_t acl_ids;
9babb374 1322 boolean_t fuid_dirtied;
428870ff 1323 boolean_t have_acl = B_FALSE;
e8b96c60 1324 boolean_t waited = B_FALSE;
34dc7c2f
BB
1325
1326 /*
1327 * If we have an ephemeral id, ACL, or XVATTR then
1328 * make sure file system is at proper version
1329 */
1330
149e873a 1331 gid = crgetgid(cr);
3558fd73 1332 uid = crgetuid(cr);
b128c09f 1333
0037b49e 1334 if (zfsvfs->z_use_fuids == B_FALSE &&
3558fd73 1335 (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
2e528b49 1336 return (SET_ERROR(EINVAL));
34dc7c2f 1337
32dec7bd 1338 if (name == NULL)
1339 return (SET_ERROR(EINVAL));
1340
0037b49e 1341 ZFS_ENTER(zfsvfs);
34dc7c2f 1342 ZFS_VERIFY_ZP(dzp);
0037b49e
BB
1343 os = zfsvfs->z_os;
1344 zilog = zfsvfs->z_log;
34dc7c2f 1345
0037b49e 1346 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
34dc7c2f 1347 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
0037b49e 1348 ZFS_EXIT(zfsvfs);
2e528b49 1349 return (SET_ERROR(EILSEQ));
34dc7c2f
BB
1350 }
1351
5484965a 1352 if (vap->va_mask & ATTR_XVATTR) {
34dc7c2f 1353 if ((error = secpolicy_xvattr((xvattr_t *)vap,
3558fd73 1354 crgetuid(cr), cr, vap->va_mode)) != 0) {
0037b49e 1355 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1356 return (error);
1357 }
1358 }
34dc7c2f 1359
3558fd73
BB
1360top:
1361 *ipp = NULL;
34dc7c2f
BB
1362 if (*name == '\0') {
1363 /*
1364 * Null component name refers to the directory itself.
1365 */
3558fd73 1366 igrab(dip);
34dc7c2f
BB
1367 zp = dzp;
1368 dl = NULL;
1369 error = 0;
1370 } else {
3558fd73 1371 /* possible igrab(zp) */
34dc7c2f
BB
1372 int zflg = 0;
1373
1374 if (flag & FIGNORECASE)
1375 zflg |= ZCILOOK;
1376
1377 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
1378 NULL, NULL);
1379 if (error) {
572e2857
BB
1380 if (have_acl)
1381 zfs_acl_ids_free(&acl_ids);
34dc7c2f 1382 if (strcmp(name, "..") == 0)
2e528b49 1383 error = SET_ERROR(EISDIR);
0037b49e 1384 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1385 return (error);
1386 }
1387 }
428870ff 1388
34dc7c2f
BB
1389 if (zp == NULL) {
1390 uint64_t txtype;
9c5167d1 1391 uint64_t projid = ZFS_DEFAULT_PROJID;
34dc7c2f
BB
1392
1393 /*
1394 * Create a new file object and update the directory
1395 * to reference it.
1396 */
149e873a 1397 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
572e2857
BB
1398 if (have_acl)
1399 zfs_acl_ids_free(&acl_ids);
34dc7c2f
BB
1400 goto out;
1401 }
1402
1403 /*
1404 * We only support the creation of regular files in
1405 * extended attribute directories.
1406 */
428870ff 1407
3558fd73 1408 if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) {
572e2857
BB
1409 if (have_acl)
1410 zfs_acl_ids_free(&acl_ids);
2e528b49 1411 error = SET_ERROR(EINVAL);
34dc7c2f
BB
1412 goto out;
1413 }
1414
428870ff
BB
1415 if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
1416 cr, vsecp, &acl_ids)) != 0)
9babb374 1417 goto out;
428870ff
BB
1418 have_acl = B_TRUE;
1419
9c5167d1
NF
1420 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1421 projid = zfs_inherit_projid(dzp);
1422 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
45d1cae3 1423 zfs_acl_ids_free(&acl_ids);
2e528b49 1424 error = SET_ERROR(EDQUOT);
9babb374
BB
1425 goto out;
1426 }
1427
34dc7c2f 1428 tx = dmu_tx_create(os);
428870ff
BB
1429
1430 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1431 ZFS_SA_BASE_ATTR_SIZE);
1432
0037b49e 1433 fuid_dirtied = zfsvfs->z_fuid_dirty;
9babb374 1434 if (fuid_dirtied)
0037b49e 1435 zfs_fuid_txhold(zfsvfs, tx);
34dc7c2f 1436 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
428870ff 1437 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
0037b49e 1438 if (!zfsvfs->z_use_sa &&
428870ff 1439 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
34dc7c2f 1440 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
428870ff 1441 0, acl_ids.z_aclp->z_acl_bytes);
34dc7c2f 1442 }
599b8648 1443
0735ecb3
PS
1444 error = dmu_tx_assign(tx,
1445 (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
34dc7c2f
BB
1446 if (error) {
1447 zfs_dirent_unlock(dl);
fb5f0bc8 1448 if (error == ERESTART) {
e8b96c60 1449 waited = B_TRUE;
34dc7c2f
BB
1450 dmu_tx_wait(tx);
1451 dmu_tx_abort(tx);
1452 goto top;
1453 }
428870ff 1454 zfs_acl_ids_free(&acl_ids);
34dc7c2f 1455 dmu_tx_abort(tx);
0037b49e 1456 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1457 return (error);
1458 }
428870ff 1459 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
9babb374 1460
599b8648
CC
1461 error = zfs_link_create(dl, zp, tx, ZNEW);
1462 if (error != 0) {
1463 /*
1464 * Since, we failed to add the directory entry for it,
1465 * delete the newly created dnode.
1466 */
1467 zfs_znode_delete(zp, tx);
1468 remove_inode_hash(ZTOI(zp));
1469 zfs_acl_ids_free(&acl_ids);
1470 dmu_tx_commit(tx);
1471 goto out;
1472 }
1473
9babb374 1474 if (fuid_dirtied)
0037b49e 1475 zfs_fuid_sync(zfsvfs, tx);
9babb374 1476
34dc7c2f
BB
1477 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1478 if (flag & FIGNORECASE)
1479 txtype |= TX_CI;
1480 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
9babb374
BB
1481 vsecp, acl_ids.z_fuidp, vap);
1482 zfs_acl_ids_free(&acl_ids);
34dc7c2f
BB
1483 dmu_tx_commit(tx);
1484 } else {
1485 int aflags = (flag & FAPPEND) ? V_APPEND : 0;
1486
572e2857
BB
1487 if (have_acl)
1488 zfs_acl_ids_free(&acl_ids);
1489 have_acl = B_FALSE;
1490
34dc7c2f
BB
1491 /*
1492 * A directory entry already exists for this name.
1493 */
1494 /*
1495 * Can't truncate an existing file if in exclusive mode.
1496 */
3558fd73 1497 if (excl) {
2e528b49 1498 error = SET_ERROR(EEXIST);
34dc7c2f
BB
1499 goto out;
1500 }
1501 /*
1502 * Can't open a directory for writing.
1503 */
3558fd73 1504 if (S_ISDIR(ZTOI(zp)->i_mode)) {
2e528b49 1505 error = SET_ERROR(EISDIR);
34dc7c2f
BB
1506 goto out;
1507 }
1508 /*
1509 * Verify requested access to file.
1510 */
1511 if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
1512 goto out;
1513 }
1514
1515 mutex_enter(&dzp->z_lock);
1516 dzp->z_seq++;
1517 mutex_exit(&dzp->z_lock);
1518
1519 /*
1520 * Truncate regular files if requested.
1521 */
3558fd73
BB
1522 if (S_ISREG(ZTOI(zp)->i_mode) &&
1523 (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
b128c09f 1524 /* we can't hold any locks when calling zfs_freesp() */
609603a5
B
1525 if (dl) {
1526 zfs_dirent_unlock(dl);
1527 dl = NULL;
1528 }
34dc7c2f 1529 error = zfs_freesp(zp, 0, 0, mode, TRUE);
34dc7c2f
BB
1530 }
1531 }
1532out:
1533
1534 if (dl)
1535 zfs_dirent_unlock(dl);
1536
1537 if (error) {
1538 if (zp)
3558fd73 1539 iput(ZTOI(zp));
34dc7c2f 1540 } else {
960e08fe
BB
1541 zfs_inode_update(dzp);
1542 zfs_inode_update(zp);
3558fd73 1543 *ipp = ZTOI(zp);
34dc7c2f 1544 }
34dc7c2f 1545
0037b49e 1546 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 1547 zil_commit(zilog, 0);
428870ff 1548
0037b49e 1549 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1550 return (error);
1551}
1552
ace1eae8
CC
1553/* ARGSUSED */
1554int
1555zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
1556 int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
1557{
1558 znode_t *zp = NULL, *dzp = ITOZ(dip);
0037b49e 1559 zfsvfs_t *zfsvfs = ITOZSB(dip);
ace1eae8
CC
1560 objset_t *os;
1561 dmu_tx_t *tx;
1562 int error;
1563 uid_t uid;
1564 gid_t gid;
1565 zfs_acl_ids_t acl_ids;
9c5167d1 1566 uint64_t projid = ZFS_DEFAULT_PROJID;
ace1eae8
CC
1567 boolean_t fuid_dirtied;
1568 boolean_t have_acl = B_FALSE;
1569 boolean_t waited = B_FALSE;
1570
1571 /*
1572 * If we have an ephemeral id, ACL, or XVATTR then
1573 * make sure file system is at proper version
1574 */
1575
1576 gid = crgetgid(cr);
1577 uid = crgetuid(cr);
1578
0037b49e 1579 if (zfsvfs->z_use_fuids == B_FALSE &&
ace1eae8
CC
1580 (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1581 return (SET_ERROR(EINVAL));
1582
0037b49e 1583 ZFS_ENTER(zfsvfs);
ace1eae8 1584 ZFS_VERIFY_ZP(dzp);
0037b49e 1585 os = zfsvfs->z_os;
ace1eae8
CC
1586
1587 if (vap->va_mask & ATTR_XVATTR) {
1588 if ((error = secpolicy_xvattr((xvattr_t *)vap,
1589 crgetuid(cr), cr, vap->va_mode)) != 0) {
0037b49e 1590 ZFS_EXIT(zfsvfs);
ace1eae8
CC
1591 return (error);
1592 }
1593 }
1594
1595top:
1596 *ipp = NULL;
1597
1598 /*
1599 * Create a new file object and update the directory
1600 * to reference it.
1601 */
1602 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
1603 if (have_acl)
1604 zfs_acl_ids_free(&acl_ids);
1605 goto out;
1606 }
1607
1608 if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
1609 cr, vsecp, &acl_ids)) != 0)
1610 goto out;
1611 have_acl = B_TRUE;
1612
9c5167d1
NF
1613 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1614 projid = zfs_inherit_projid(dzp);
1615 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
ace1eae8
CC
1616 zfs_acl_ids_free(&acl_ids);
1617 error = SET_ERROR(EDQUOT);
1618 goto out;
1619 }
1620
1621 tx = dmu_tx_create(os);
1622
1623 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1624 ZFS_SA_BASE_ATTR_SIZE);
0037b49e 1625 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
ace1eae8 1626
0037b49e 1627 fuid_dirtied = zfsvfs->z_fuid_dirty;
ace1eae8 1628 if (fuid_dirtied)
0037b49e
BB
1629 zfs_fuid_txhold(zfsvfs, tx);
1630 if (!zfsvfs->z_use_sa &&
ace1eae8
CC
1631 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1632 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1633 0, acl_ids.z_aclp->z_acl_bytes);
1634 }
0735ecb3 1635 error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
ace1eae8
CC
1636 if (error) {
1637 if (error == ERESTART) {
1638 waited = B_TRUE;
1639 dmu_tx_wait(tx);
1640 dmu_tx_abort(tx);
1641 goto top;
1642 }
1643 zfs_acl_ids_free(&acl_ids);
1644 dmu_tx_abort(tx);
0037b49e 1645 ZFS_EXIT(zfsvfs);
ace1eae8
CC
1646 return (error);
1647 }
1648 zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
1649
1650 if (fuid_dirtied)
0037b49e 1651 zfs_fuid_sync(zfsvfs, tx);
ace1eae8
CC
1652
1653 /* Add to unlinked set */
1654 zp->z_unlinked = 1;
1655 zfs_unlinked_add(zp, tx);
1656 zfs_acl_ids_free(&acl_ids);
1657 dmu_tx_commit(tx);
1658out:
1659
1660 if (error) {
1661 if (zp)
1662 iput(ZTOI(zp));
1663 } else {
1664 zfs_inode_update(dzp);
1665 zfs_inode_update(zp);
1666 *ipp = ZTOI(zp);
1667 }
1668
0037b49e 1669 ZFS_EXIT(zfsvfs);
ace1eae8
CC
1670 return (error);
1671}
1672
34dc7c2f
BB
1673/*
1674 * Remove an entry from a directory.
1675 *
3558fd73 1676 * IN: dip - inode of directory to remove entry from.
34dc7c2f
BB
1677 * name - name of entry to remove.
1678 * cr - credentials of caller.
34dc7c2f
BB
1679 *
1680 * RETURN: 0 if success
1681 * error code if failure
1682 *
1683 * Timestamps:
3558fd73
BB
1684 * dip - ctime|mtime
1685 * ip - ctime (if nlink > 0)
34dc7c2f 1686 */
428870ff
BB
1687
1688uint64_t null_xattr = 0;
1689
34dc7c2f 1690/*ARGSUSED*/
e5c39b95 1691int
da5e151f 1692zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags)
34dc7c2f 1693{
3558fd73 1694 znode_t *zp, *dzp = ITOZ(dip);
572e2857 1695 znode_t *xzp;
3558fd73 1696 struct inode *ip;
0037b49e 1697 zfsvfs_t *zfsvfs = ITOZSB(dip);
34dc7c2f 1698 zilog_t *zilog;
a966c564 1699 uint64_t acl_obj, xattr_obj;
3558fd73 1700 uint64_t xattr_obj_unlinked = 0;
572e2857 1701 uint64_t obj = 0;
dfbc8630 1702 uint64_t links;
34dc7c2f
BB
1703 zfs_dirlock_t *dl;
1704 dmu_tx_t *tx;
a966c564
K
1705 boolean_t may_delete_now, delete_now = FALSE;
1706 boolean_t unlinked, toobig = FALSE;
34dc7c2f
BB
1707 uint64_t txtype;
1708 pathname_t *realnmp = NULL;
1709 pathname_t realnm;
1710 int error;
1711 int zflg = ZEXISTS;
e8b96c60 1712 boolean_t waited = B_FALSE;
34dc7c2f 1713
32dec7bd 1714 if (name == NULL)
1715 return (SET_ERROR(EINVAL));
1716
0037b49e 1717 ZFS_ENTER(zfsvfs);
34dc7c2f 1718 ZFS_VERIFY_ZP(dzp);
0037b49e 1719 zilog = zfsvfs->z_log;
34dc7c2f
BB
1720
1721 if (flags & FIGNORECASE) {
1722 zflg |= ZCILOOK;
1723 pn_alloc(&realnm);
1724 realnmp = &realnm;
1725 }
1726
1727top:
572e2857
BB
1728 xattr_obj = 0;
1729 xzp = NULL;
34dc7c2f
BB
1730 /*
1731 * Attempt to lock directory; fail if entry doesn't exist.
1732 */
149e873a
BB
1733 if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
1734 NULL, realnmp))) {
34dc7c2f
BB
1735 if (realnmp)
1736 pn_free(realnmp);
0037b49e 1737 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1738 return (error);
1739 }
1740
3558fd73 1741 ip = ZTOI(zp);
34dc7c2f 1742
149e873a 1743 if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
34dc7c2f
BB
1744 goto out;
1745 }
1746
1747 /*
1748 * Need to use rmdir for removing directories.
1749 */
3558fd73 1750 if (S_ISDIR(ip->i_mode)) {
2e528b49 1751 error = SET_ERROR(EPERM);
34dc7c2f
BB
1752 goto out;
1753 }
1754
19d55079
MA
1755 mutex_enter(&zp->z_lock);
1756 may_delete_now = atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped);
1757 mutex_exit(&zp->z_lock);
1758
34dc7c2f 1759 /*
a966c564
K
1760 * We may delete the znode now, or we may put it in the unlinked set;
1761 * it depends on whether we're the last link, and on whether there are
1762 * other holds on the inode. So we dmu_tx_hold() the right things to
1763 * allow for either case.
34dc7c2f 1764 */
572e2857 1765 obj = zp->z_id;
0037b49e 1766 tx = dmu_tx_create(zfsvfs->z_os);
34dc7c2f 1767 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
428870ff
BB
1768 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1769 zfs_sa_upgrade_txholds(tx, zp);
1770 zfs_sa_upgrade_txholds(tx, dzp);
a966c564
K
1771 if (may_delete_now) {
1772 toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
1773 /* if the file is too big, only hold_free a token amount */
1774 dmu_tx_hold_free(tx, zp->z_id, 0,
1775 (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
1776 }
34dc7c2f
BB
1777
1778 /* are there any extended attributes? */
0037b49e 1779 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
428870ff 1780 &xattr_obj, sizeof (xattr_obj));
572e2857 1781 if (error == 0 && xattr_obj) {
0037b49e 1782 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
c99c9001 1783 ASSERT0(error);
428870ff
BB
1784 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1785 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
34dc7c2f
BB
1786 }
1787
a966c564
K
1788 mutex_enter(&zp->z_lock);
1789 if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
1790 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
1791 mutex_exit(&zp->z_lock);
1792
34dc7c2f 1793 /* charge as an update -- would be nice not to charge at all */
0037b49e 1794 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
34dc7c2f 1795
19d55079 1796 /*
1a04bab3 1797 * Mark this transaction as typically resulting in a net free of space
19d55079 1798 */
1a04bab3 1799 dmu_tx_mark_netfree(tx);
19d55079 1800
0735ecb3 1801 error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
34dc7c2f
BB
1802 if (error) {
1803 zfs_dirent_unlock(dl);
fb5f0bc8 1804 if (error == ERESTART) {
e8b96c60 1805 waited = B_TRUE;
34dc7c2f
BB
1806 dmu_tx_wait(tx);
1807 dmu_tx_abort(tx);
ea7e86d8
BB
1808 iput(ip);
1809 if (xzp)
1810 iput(ZTOI(xzp));
34dc7c2f
BB
1811 goto top;
1812 }
1813 if (realnmp)
1814 pn_free(realnmp);
1815 dmu_tx_abort(tx);
ea7e86d8
BB
1816 iput(ip);
1817 if (xzp)
1818 iput(ZTOI(xzp));
0037b49e 1819 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1820 return (error);
1821 }
1822
1823 /*
1824 * Remove the directory entry.
1825 */
1826 error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
1827
1828 if (error) {
1829 dmu_tx_commit(tx);
1830 goto out;
1831 }
1832
1833 if (unlinked) {
572e2857
BB
1834 /*
1835 * Hold z_lock so that we can make sure that the ACL obj
1836 * hasn't changed. Could have been deleted due to
1837 * zfs_sa_upgrade().
1838 */
1839 mutex_enter(&zp->z_lock);
0037b49e 1840 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
428870ff 1841 &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
a966c564
K
1842 delete_now = may_delete_now && !toobig &&
1843 atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped) &&
1844 xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) ==
1845 acl_obj;
1846 }
1847
1848 if (delete_now) {
1849 if (xattr_obj_unlinked) {
dfbc8630 1850 ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2);
a966c564
K
1851 mutex_enter(&xzp->z_lock);
1852 xzp->z_unlinked = 1;
dfbc8630
CD
1853 clear_nlink(ZTOI(xzp));
1854 links = 0;
0037b49e 1855 error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
dfbc8630 1856 &links, sizeof (links), tx);
a966c564
K
1857 ASSERT3U(error, ==, 0);
1858 mutex_exit(&xzp->z_lock);
1859 zfs_unlinked_add(xzp, tx);
1860
1861 if (zp->z_is_sa)
1862 error = sa_remove(zp->z_sa_hdl,
0037b49e 1863 SA_ZPL_XATTR(zfsvfs), tx);
a966c564
K
1864 else
1865 error = sa_update(zp->z_sa_hdl,
0037b49e 1866 SA_ZPL_XATTR(zfsvfs), &null_xattr,
a966c564
K
1867 sizeof (uint64_t), tx);
1868 ASSERT0(error);
1869 }
1870 /*
1871 * Add to the unlinked set because a new reference could be
1872 * taken concurrently resulting in a deferred destruction.
1873 */
1874 zfs_unlinked_add(zp, tx);
1875 mutex_exit(&zp->z_lock);
a966c564 1876 } else if (unlinked) {
572e2857 1877 mutex_exit(&zp->z_lock);
34dc7c2f
BB
1878 zfs_unlinked_add(zp, tx);
1879 }
1880
1881 txtype = TX_REMOVE;
1882 if (flags & FIGNORECASE)
1883 txtype |= TX_CI;
572e2857 1884 zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
34dc7c2f
BB
1885
1886 dmu_tx_commit(tx);
1887out:
1888 if (realnmp)
1889 pn_free(realnmp);
1890
1891 zfs_dirent_unlock(dl);
960e08fe 1892 zfs_inode_update(dzp);
ea7e86d8 1893 zfs_inode_update(zp);
34dc7c2f 1894
ea7e86d8
BB
1895 if (delete_now)
1896 iput(ip);
1897 else
a966c564 1898 zfs_iput_async(ip);
a966c564
K
1899
1900 if (xzp) {
1901 zfs_inode_update(xzp);
1902 zfs_iput_async(ZTOI(xzp));
1903 }
428870ff 1904
0037b49e 1905 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 1906 zil_commit(zilog, 0);
34dc7c2f 1907
0037b49e 1908 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1909 return (error);
1910}
1911
1912/*
3558fd73 1913 * Create a new directory and insert it into dip using the name
34dc7c2f
BB
1914 * provided. Return a pointer to the inserted directory.
1915 *
3558fd73 1916 * IN: dip - inode of directory to add subdir to.
34dc7c2f
BB
1917 * dirname - name of new directory.
1918 * vap - attributes of new directory.
1919 * cr - credentials of caller.
34dc7c2f
BB
1920 * vsecp - ACL to be set
1921 *
3558fd73 1922 * OUT: ipp - inode of created directory.
34dc7c2f
BB
1923 *
1924 * RETURN: 0 if success
1925 * error code if failure
1926 *
1927 * Timestamps:
3558fd73
BB
1928 * dip - ctime|mtime updated
1929 * ipp - ctime|mtime|atime updated
34dc7c2f
BB
1930 */
1931/*ARGSUSED*/
e5c39b95 1932int
3558fd73
BB
1933zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
1934 cred_t *cr, int flags, vsecattr_t *vsecp)
34dc7c2f 1935{
3558fd73 1936 znode_t *zp, *dzp = ITOZ(dip);
0037b49e 1937 zfsvfs_t *zfsvfs = ITOZSB(dip);
34dc7c2f
BB
1938 zilog_t *zilog;
1939 zfs_dirlock_t *dl;
1940 uint64_t txtype;
1941 dmu_tx_t *tx;
1942 int error;
34dc7c2f 1943 int zf = ZNEW;
b128c09f
BB
1944 uid_t uid;
1945 gid_t gid = crgetgid(cr);
428870ff 1946 zfs_acl_ids_t acl_ids;
9babb374 1947 boolean_t fuid_dirtied;
e8b96c60 1948 boolean_t waited = B_FALSE;
34dc7c2f 1949
3558fd73 1950 ASSERT(S_ISDIR(vap->va_mode));
34dc7c2f
BB
1951
1952 /*
1953 * If we have an ephemeral id, ACL, or XVATTR then
1954 * make sure file system is at proper version
1955 */
1956
3558fd73 1957 uid = crgetuid(cr);
0037b49e 1958 if (zfsvfs->z_use_fuids == B_FALSE &&
3558fd73 1959 (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
2e528b49 1960 return (SET_ERROR(EINVAL));
34dc7c2f 1961
32dec7bd 1962 if (dirname == NULL)
1963 return (SET_ERROR(EINVAL));
1964
0037b49e 1965 ZFS_ENTER(zfsvfs);
34dc7c2f 1966 ZFS_VERIFY_ZP(dzp);
0037b49e 1967 zilog = zfsvfs->z_log;
34dc7c2f 1968
428870ff 1969 if (dzp->z_pflags & ZFS_XATTR) {
0037b49e 1970 ZFS_EXIT(zfsvfs);
2e528b49 1971 return (SET_ERROR(EINVAL));
34dc7c2f
BB
1972 }
1973
0037b49e 1974 if (zfsvfs->z_utf8 && u8_validate(dirname,
34dc7c2f 1975 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
0037b49e 1976 ZFS_EXIT(zfsvfs);
2e528b49 1977 return (SET_ERROR(EILSEQ));
34dc7c2f
BB
1978 }
1979 if (flags & FIGNORECASE)
1980 zf |= ZCILOOK;
1981
5484965a 1982 if (vap->va_mask & ATTR_XVATTR) {
34dc7c2f 1983 if ((error = secpolicy_xvattr((xvattr_t *)vap,
3558fd73 1984 crgetuid(cr), cr, vap->va_mode)) != 0) {
0037b49e 1985 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
1986 return (error);
1987 }
428870ff 1988 }
34dc7c2f 1989
428870ff
BB
1990 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1991 vsecp, &acl_ids)) != 0) {
0037b49e 1992 ZFS_EXIT(zfsvfs);
428870ff
BB
1993 return (error);
1994 }
34dc7c2f
BB
1995 /*
1996 * First make sure the new directory doesn't exist.
428870ff
BB
1997 *
1998 * Existence is checked first to make sure we don't return
1999 * EACCES instead of EEXIST which can cause some applications
2000 * to fail.
34dc7c2f
BB
2001 */
2002top:
3558fd73 2003 *ipp = NULL;
34dc7c2f 2004
149e873a
BB
2005 if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
2006 NULL, NULL))) {
428870ff 2007 zfs_acl_ids_free(&acl_ids);
0037b49e 2008 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
2009 return (error);
2010 }
2011
149e873a 2012 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
428870ff 2013 zfs_acl_ids_free(&acl_ids);
34dc7c2f 2014 zfs_dirent_unlock(dl);
0037b49e 2015 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
2016 return (error);
2017 }
2018
9c5167d1 2019 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
45d1cae3 2020 zfs_acl_ids_free(&acl_ids);
9babb374 2021 zfs_dirent_unlock(dl);
0037b49e 2022 ZFS_EXIT(zfsvfs);
2e528b49 2023 return (SET_ERROR(EDQUOT));
9babb374
BB
2024 }
2025
34dc7c2f
BB
2026 /*
2027 * Add a new entry to the directory.
2028 */
0037b49e 2029 tx = dmu_tx_create(zfsvfs->z_os);
34dc7c2f
BB
2030 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
2031 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
0037b49e 2032 fuid_dirtied = zfsvfs->z_fuid_dirty;
9babb374 2033 if (fuid_dirtied)
0037b49e
BB
2034 zfs_fuid_txhold(zfsvfs, tx);
2035 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
428870ff
BB
2036 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
2037 acl_ids.z_aclp->z_acl_bytes);
2038 }
2039
2040 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
2041 ZFS_SA_BASE_ATTR_SIZE);
2042
0735ecb3 2043 error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
34dc7c2f
BB
2044 if (error) {
2045 zfs_dirent_unlock(dl);
fb5f0bc8 2046 if (error == ERESTART) {
e8b96c60 2047 waited = B_TRUE;
34dc7c2f
BB
2048 dmu_tx_wait(tx);
2049 dmu_tx_abort(tx);
2050 goto top;
2051 }
428870ff 2052 zfs_acl_ids_free(&acl_ids);
34dc7c2f 2053 dmu_tx_abort(tx);
0037b49e 2054 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
2055 return (error);
2056 }
2057
2058 /*
2059 * Create new node.
2060 */
428870ff 2061 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
34dc7c2f
BB
2062
2063 /*
2064 * Now put new name in parent dir.
2065 */
599b8648
CC
2066 error = zfs_link_create(dl, zp, tx, ZNEW);
2067 if (error != 0) {
2068 zfs_znode_delete(zp, tx);
2069 remove_inode_hash(ZTOI(zp));
2070 goto out;
2071 }
2072
2073 if (fuid_dirtied)
2074 zfs_fuid_sync(zfsvfs, tx);
34dc7c2f 2075
3558fd73 2076 *ipp = ZTOI(zp);
34dc7c2f
BB
2077
2078 txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
2079 if (flags & FIGNORECASE)
2080 txtype |= TX_CI;
9babb374
BB
2081 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
2082 acl_ids.z_fuidp, vap);
34dc7c2f 2083
599b8648 2084out:
9babb374 2085 zfs_acl_ids_free(&acl_ids);
428870ff 2086
34dc7c2f
BB
2087 dmu_tx_commit(tx);
2088
2089 zfs_dirent_unlock(dl);
2090
0037b49e 2091 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 2092 zil_commit(zilog, 0);
428870ff 2093
599b8648
CC
2094 if (error != 0) {
2095 iput(ZTOI(zp));
2096 } else {
2097 zfs_inode_update(dzp);
2098 zfs_inode_update(zp);
2099 }
0037b49e 2100 ZFS_EXIT(zfsvfs);
599b8648 2101 return (error);
34dc7c2f
BB
2102}
2103
2104/*
2105 * Remove a directory subdir entry. If the current working
2106 * directory is the same as the subdir to be removed, the
2107 * remove will fail.
2108 *
3558fd73 2109 * IN: dip - inode of directory to remove from.
34dc7c2f 2110 * name - name of directory to be removed.
3558fd73 2111 * cwd - inode of current working directory.
34dc7c2f 2112 * cr - credentials of caller.
34dc7c2f
BB
2113 * flags - case flags
2114 *
d3cc8b15 2115 * RETURN: 0 on success, error code on failure.
34dc7c2f
BB
2116 *
2117 * Timestamps:
3558fd73 2118 * dip - ctime|mtime updated
34dc7c2f
BB
2119 */
2120/*ARGSUSED*/
e5c39b95 2121int
3558fd73
BB
2122zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
2123 int flags)
34dc7c2f 2124{
3558fd73 2125 znode_t *dzp = ITOZ(dip);
34dc7c2f 2126 znode_t *zp;
3558fd73 2127 struct inode *ip;
0037b49e 2128 zfsvfs_t *zfsvfs = ITOZSB(dip);
34dc7c2f
BB
2129 zilog_t *zilog;
2130 zfs_dirlock_t *dl;
2131 dmu_tx_t *tx;
2132 int error;
2133 int zflg = ZEXISTS;
e8b96c60 2134 boolean_t waited = B_FALSE;
34dc7c2f 2135
32dec7bd 2136 if (name == NULL)
2137 return (SET_ERROR(EINVAL));
2138
0037b49e 2139 ZFS_ENTER(zfsvfs);
34dc7c2f 2140 ZFS_VERIFY_ZP(dzp);
0037b49e 2141 zilog = zfsvfs->z_log;
34dc7c2f
BB
2142
2143 if (flags & FIGNORECASE)
2144 zflg |= ZCILOOK;
2145top:
2146 zp = NULL;
2147
2148 /*
2149 * Attempt to lock directory; fail if entry doesn't exist.
2150 */
149e873a
BB
2151 if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
2152 NULL, NULL))) {
0037b49e 2153 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
2154 return (error);
2155 }
2156
3558fd73 2157 ip = ZTOI(zp);
34dc7c2f 2158
149e873a 2159 if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
34dc7c2f
BB
2160 goto out;
2161 }
2162
3558fd73 2163 if (!S_ISDIR(ip->i_mode)) {
2e528b49 2164 error = SET_ERROR(ENOTDIR);
34dc7c2f
BB
2165 goto out;
2166 }
2167
3558fd73 2168 if (ip == cwd) {
2e528b49 2169 error = SET_ERROR(EINVAL);
34dc7c2f
BB
2170 goto out;
2171 }
2172
34dc7c2f 2173 /*
4e33ba4c 2174 * Grab a lock on the directory to make sure that no one is
34dc7c2f
BB
2175 * trying to add (or lookup) entries while we are removing it.
2176 */
2177 rw_enter(&zp->z_name_lock, RW_WRITER);
2178
2179 /*
2180 * Grab a lock on the parent pointer to make sure we play well
2181 * with the treewalk and directory rename code.
2182 */
2183 rw_enter(&zp->z_parent_lock, RW_WRITER);
2184
0037b49e 2185 tx = dmu_tx_create(zfsvfs->z_os);
34dc7c2f 2186 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
428870ff 2187 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
0037b49e 2188 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
428870ff
BB
2189 zfs_sa_upgrade_txholds(tx, zp);
2190 zfs_sa_upgrade_txholds(tx, dzp);
db707ad0 2191 dmu_tx_mark_netfree(tx);
0735ecb3 2192 error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
34dc7c2f
BB
2193 if (error) {
2194 rw_exit(&zp->z_parent_lock);
2195 rw_exit(&zp->z_name_lock);
2196 zfs_dirent_unlock(dl);
fb5f0bc8 2197 if (error == ERESTART) {
e8b96c60 2198 waited = B_TRUE;
34dc7c2f
BB
2199 dmu_tx_wait(tx);
2200 dmu_tx_abort(tx);
ea7e86d8 2201 iput(ip);
34dc7c2f
BB
2202 goto top;
2203 }
2204 dmu_tx_abort(tx);
ea7e86d8 2205 iput(ip);
0037b49e 2206 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
2207 return (error);
2208 }
2209
2210 error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
2211
2212 if (error == 0) {
2213 uint64_t txtype = TX_RMDIR;
2214 if (flags & FIGNORECASE)
2215 txtype |= TX_CI;
572e2857 2216 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
34dc7c2f
BB
2217 }
2218
2219 dmu_tx_commit(tx);
2220
2221 rw_exit(&zp->z_parent_lock);
2222 rw_exit(&zp->z_name_lock);
2223out:
2224 zfs_dirent_unlock(dl);
2225
59157910
BB
2226 zfs_inode_update(dzp);
2227 zfs_inode_update(zp);
3558fd73 2228 iput(ip);
34dc7c2f 2229
0037b49e 2230 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 2231 zil_commit(zilog, 0);
428870ff 2232
0037b49e 2233 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
2234 return (error);
2235}
2236
2237/*
2238 * Read as many directory entries as will fit into the provided
3558fd73 2239 * dirent buffer from the given directory cursor position.
34dc7c2f 2240 *
3558fd73
BB
2241 * IN: ip - inode of directory to read.
2242 * dirent - buffer for directory entries.
34dc7c2f 2243 *
3558fd73 2244 * OUT: dirent - filler buffer of directory entries.
34dc7c2f
BB
2245 *
2246 * RETURN: 0 if success
2247 * error code if failure
2248 *
2249 * Timestamps:
3558fd73 2250 * ip - atime updated
34dc7c2f
BB
2251 *
2252 * Note that the low 4 bits of the cookie returned by zap is always zero.
2253 * This allows us to use the low range for "special" directory entries:
2254 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
2255 * we use the offset 2 for the '.zfs' directory.
2256 */
2257/* ARGSUSED */
3558fd73 2258int
9464b959 2259zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
34dc7c2f 2260{
3558fd73 2261 znode_t *zp = ITOZ(ip);
0037b49e 2262 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f 2263 objset_t *os;
34dc7c2f
BB
2264 zap_cursor_t zc;
2265 zap_attribute_t zap;
34dc7c2f
BB
2266 int error;
2267 uint8_t prefetch;
c12e3a59 2268 uint8_t type;
3558fd73
BB
2269 int done = 0;
2270 uint64_t parent;
c12e3a59 2271 uint64_t offset; /* must be unsigned; checks for < 1 */
34dc7c2f 2272
0037b49e 2273 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
2274 ZFS_VERIFY_ZP(zp);
2275
0037b49e 2276 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3558fd73
BB
2277 &parent, sizeof (parent))) != 0)
2278 goto out;
34dc7c2f
BB
2279
2280 /*
2281 * Quit if directory has been removed (posix)
2282 */
3558fd73
BB
2283 if (zp->z_unlinked)
2284 goto out;
2285
c12e3a59 2286 error = 0;
0037b49e 2287 os = zfsvfs->z_os;
c12e3a59 2288 offset = ctx->pos;
34dc7c2f
BB
2289 prefetch = zp->z_zn_prefetch;
2290
2291 /*
2292 * Initialize the iterator cursor.
2293 */
c12e3a59 2294 if (offset <= 3) {
34dc7c2f
BB
2295 /*
2296 * Start iteration from the beginning of the directory.
2297 */
2298 zap_cursor_init(&zc, os, zp->z_id);
2299 } else {
2300 /*
2301 * The offset is a serialized cursor.
2302 */
c12e3a59 2303 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
34dc7c2f
BB
2304 }
2305
34dc7c2f
BB
2306 /*
2307 * Transform to file-system independent format
2308 */
3558fd73
BB
2309 while (!done) {
2310 uint64_t objnum;
34dc7c2f
BB
2311 /*
2312 * Special case `.', `..', and `.zfs'.
2313 */
c12e3a59 2314 if (offset == 0) {
34dc7c2f
BB
2315 (void) strcpy(zap.za_name, ".");
2316 zap.za_normalization_conflict = 0;
2317 objnum = zp->z_id;
c12e3a59
RY
2318 type = DT_DIR;
2319 } else if (offset == 1) {
34dc7c2f
BB
2320 (void) strcpy(zap.za_name, "..");
2321 zap.za_normalization_conflict = 0;
428870ff 2322 objnum = parent;
c12e3a59
RY
2323 type = DT_DIR;
2324 } else if (offset == 2 && zfs_show_ctldir(zp)) {
34dc7c2f
BB
2325 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
2326 zap.za_normalization_conflict = 0;
2327 objnum = ZFSCTL_INO_ROOT;
c12e3a59 2328 type = DT_DIR;
34dc7c2f
BB
2329 } else {
2330 /*
2331 * Grab next entry.
2332 */
3558fd73
BB
2333 if ((error = zap_cursor_retrieve(&zc, &zap))) {
2334 if (error == ENOENT)
34dc7c2f
BB
2335 break;
2336 else
2337 goto update;
2338 }
2339
0c5dde49
BB
2340 /*
2341 * Allow multiple entries provided the first entry is
2342 * the object id. Non-zpl consumers may safely make
2343 * use of the additional space.
2344 *
2345 * XXX: This should be a feature flag for compatibility
2346 */
34dc7c2f 2347 if (zap.za_integer_length != 8 ||
0c5dde49 2348 zap.za_num_integers == 0) {
34dc7c2f 2349 cmn_err(CE_WARN, "zap_readdir: bad directory "
0c5dde49
BB
2350 "entry, obj = %lld, offset = %lld, "
2351 "length = %d, num = %lld\n",
34dc7c2f 2352 (u_longlong_t)zp->z_id,
c12e3a59 2353 (u_longlong_t)offset,
0c5dde49
BB
2354 zap.za_integer_length,
2355 (u_longlong_t)zap.za_num_integers);
2e528b49 2356 error = SET_ERROR(ENXIO);
34dc7c2f
BB
2357 goto update;
2358 }
2359
2360 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
c12e3a59 2361 type = ZFS_DIRENT_TYPE(zap.za_first_integer);
34dc7c2f 2362 }
0f37d0c8 2363
9464b959 2364 done = !zpl_dir_emit(ctx, zap.za_name, strlen(zap.za_name),
c12e3a59 2365 objnum, type);
0f37d0c8 2366 if (done)
34dc7c2f 2367 break;
34dc7c2f
BB
2368
2369 /* Prefetch znode */
3558fd73 2370 if (prefetch) {
fcff0f35
PD
2371 dmu_prefetch(os, objnum, 0, 0, 0,
2372 ZIO_PRIORITY_SYNC_READ);
3558fd73 2373 }
34dc7c2f 2374
c12e3a59
RY
2375 /*
2376 * Move to the next entry, fill in the previous offset.
2377 */
2378 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
34dc7c2f 2379 zap_cursor_advance(&zc);
c12e3a59 2380 offset = zap_cursor_serialize(&zc);
34dc7c2f 2381 } else {
c12e3a59 2382 offset += 1;
34dc7c2f 2383 }
c12e3a59 2384 ctx->pos = offset;
34dc7c2f
BB
2385 }
2386 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
2387
34dc7c2f
BB
2388update:
2389 zap_cursor_fini(&zc);
34dc7c2f
BB
2390 if (error == ENOENT)
2391 error = 0;
3558fd73 2392out:
0037b49e 2393 ZFS_EXIT(zfsvfs);
34dc7c2f 2394
34dc7c2f
BB
2395 return (error);
2396}
2397
d5446cfc
BB
2398ulong_t zfs_fsync_sync_cnt = 4;
2399
e5c39b95 2400int
3558fd73 2401zfs_fsync(struct inode *ip, int syncflag, cred_t *cr)
34dc7c2f 2402{
3558fd73 2403 znode_t *zp = ITOZ(ip);
0037b49e 2404 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f 2405
d5446cfc
BB
2406 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
2407
0037b49e
BB
2408 if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
2409 ZFS_ENTER(zfsvfs);
428870ff 2410 ZFS_VERIFY_ZP(zp);
0037b49e
BB
2411 zil_commit(zfsvfs->z_log, zp->z_id);
2412 ZFS_EXIT(zfsvfs);
428870ff 2413 }
07012da6
CC
2414 tsd_set(zfs_fsyncer_key, NULL);
2415
34dc7c2f
BB
2416 return (0);
2417}
2418
2419
2420/*
2421 * Get the requested file attributes and place them in the provided
2422 * vattr structure.
2423 *
3558fd73 2424 * IN: ip - inode of file.
5484965a
BB
2425 * vap - va_mask identifies requested attributes.
2426 * If ATTR_XVATTR set, then optional attrs are requested
34dc7c2f
BB
2427 * flags - ATTR_NOACLCHECK (CIFS server context)
2428 * cr - credentials of caller.
34dc7c2f 2429 *
5484965a
BB
2430 * OUT: vap - attribute values.
2431 *
2432 * RETURN: 0 (always succeeds)
34dc7c2f
BB
2433 */
2434/* ARGSUSED */
e5c39b95 2435int
5484965a 2436zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
34dc7c2f 2437{
3558fd73 2438 znode_t *zp = ITOZ(ip);
0037b49e 2439 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f
BB
2440 int error = 0;
2441 uint64_t links;
0df9673f 2442 uint64_t atime[2], mtime[2], ctime[2];
5484965a
BB
2443 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2444 xoptattr_t *xoap = NULL;
34dc7c2f 2445 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
0df9673f 2446 sa_bulk_attr_t bulk[3];
428870ff 2447 int count = 0;
34dc7c2f 2448
0037b49e 2449 ZFS_ENTER(zfsvfs);
34dc7c2f 2450 ZFS_VERIFY_ZP(zp);
428870ff 2451
5484965a 2452 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
572e2857 2453
0037b49e
BB
2454 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
2455 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
2456 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
428870ff
BB
2457
2458 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
0037b49e 2459 ZFS_EXIT(zfsvfs);
428870ff
BB
2460 return (error);
2461 }
34dc7c2f 2462
34dc7c2f
BB
2463 /*
2464 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2465 * Also, if we are the owner don't bother, since owner should
2466 * always be allowed to read basic attributes of file.
2467 */
572e2857 2468 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
5484965a 2469 (vap->va_uid != crgetuid(cr))) {
149e873a
BB
2470 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
2471 skipaclchk, cr))) {
0037b49e 2472 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
2473 return (error);
2474 }
2475 }
2476
2477 /*
2478 * Return all attributes. It's cheaper to provide the answer
2479 * than to determine whether we were asked the question.
2480 */
2481
9babb374 2482 mutex_enter(&zp->z_lock);
5484965a
BB
2483 vap->va_type = vn_mode_to_vtype(zp->z_mode);
2484 vap->va_mode = zp->z_mode;
53cf50e0 2485 vap->va_fsid = ZTOI(zp)->i_sb->s_dev;
5484965a 2486 vap->va_nodeid = zp->z_id;
0037b49e 2487 if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
dfbc8630 2488 links = ZTOI(zp)->i_nlink + 1;
34dc7c2f 2489 else
dfbc8630 2490 links = ZTOI(zp)->i_nlink;
5484965a
BB
2491 vap->va_nlink = MIN(links, ZFS_LINK_MAX);
2492 vap->va_size = i_size_read(ip);
2493 vap->va_rdev = ip->i_rdev;
2494 vap->va_seq = ip->i_generation;
2495
2496 /*
2497 * Add in any requested optional attributes and the create time.
2498 * Also set the corresponding bits in the returned attribute bitmap.
2499 */
0037b49e 2500 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
5484965a
BB
2501 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2502 xoap->xoa_archive =
2503 ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2504 XVA_SET_RTN(xvap, XAT_ARCHIVE);
2505 }
2506
2507 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2508 xoap->xoa_readonly =
2509 ((zp->z_pflags & ZFS_READONLY) != 0);
2510 XVA_SET_RTN(xvap, XAT_READONLY);
2511 }
2512
2513 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2514 xoap->xoa_system =
2515 ((zp->z_pflags & ZFS_SYSTEM) != 0);
2516 XVA_SET_RTN(xvap, XAT_SYSTEM);
2517 }
2518
2519 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2520 xoap->xoa_hidden =
2521 ((zp->z_pflags & ZFS_HIDDEN) != 0);
2522 XVA_SET_RTN(xvap, XAT_HIDDEN);
2523 }
2524
2525 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2526 xoap->xoa_nounlink =
2527 ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2528 XVA_SET_RTN(xvap, XAT_NOUNLINK);
2529 }
2530
2531 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2532 xoap->xoa_immutable =
2533 ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2534 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2535 }
2536
2537 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2538 xoap->xoa_appendonly =
2539 ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2540 XVA_SET_RTN(xvap, XAT_APPENDONLY);
2541 }
2542
2543 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2544 xoap->xoa_nodump =
2545 ((zp->z_pflags & ZFS_NODUMP) != 0);
2546 XVA_SET_RTN(xvap, XAT_NODUMP);
2547 }
2548
2549 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2550 xoap->xoa_opaque =
2551 ((zp->z_pflags & ZFS_OPAQUE) != 0);
2552 XVA_SET_RTN(xvap, XAT_OPAQUE);
2553 }
2554
2555 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2556 xoap->xoa_av_quarantined =
2557 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2558 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2559 }
2560
2561 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2562 xoap->xoa_av_modified =
2563 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2564 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2565 }
2566
2567 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2568 S_ISREG(ip->i_mode)) {
2569 zfs_sa_get_scanstamp(zp, xvap);
2570 }
34dc7c2f 2571
5484965a
BB
2572 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
2573 uint64_t times[2];
2574
0037b49e 2575 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
5484965a
BB
2576 times, sizeof (times));
2577 ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
2578 XVA_SET_RTN(xvap, XAT_CREATETIME);
2579 }
2580
2581 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2582 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2583 XVA_SET_RTN(xvap, XAT_REPARSE);
2584 }
2585 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
278f2236 2586 xoap->xoa_generation = ip->i_generation;
5484965a
BB
2587 XVA_SET_RTN(xvap, XAT_GEN);
2588 }
2589
2590 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2591 xoap->xoa_offline =
2592 ((zp->z_pflags & ZFS_OFFLINE) != 0);
2593 XVA_SET_RTN(xvap, XAT_OFFLINE);
2594 }
2595
2596 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2597 xoap->xoa_sparse =
2598 ((zp->z_pflags & ZFS_SPARSE) != 0);
2599 XVA_SET_RTN(xvap, XAT_SPARSE);
2600 }
9c5167d1
NF
2601
2602 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2603 xoap->xoa_projinherit =
2604 ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
2605 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
2606 }
2607
2608 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2609 xoap->xoa_projid = zp->z_projid;
2610 XVA_SET_RTN(xvap, XAT_PROJID);
2611 }
5484965a
BB
2612 }
2613
0df9673f 2614 ZFS_TIME_DECODE(&vap->va_atime, atime);
5484965a
BB
2615 ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2616 ZFS_TIME_DECODE(&vap->va_ctime, ctime);
34dc7c2f
BB
2617
2618 mutex_exit(&zp->z_lock);
2619
5484965a 2620 sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks);
34dc7c2f
BB
2621
2622 if (zp->z_blksz == 0) {
2623 /*
2624 * Block size hasn't been set; suggest maximal I/O transfers.
2625 */
0037b49e 2626 vap->va_blksize = zfsvfs->z_max_blksz;
34dc7c2f
BB
2627 }
2628
0037b49e 2629 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
2630 return (0);
2631}
2632
057e8eee
BB
2633/*
2634 * Get the basic file attributes and place them in the provided kstat
2635 * structure. The inode is assumed to be the authoritative source
2636 * for most of the attributes. However, the znode currently has the
2637 * authoritative atime, blksize, and block count.
2638 *
2639 * IN: ip - inode of file.
2640 *
2641 * OUT: sp - kstat values.
2642 *
2643 * RETURN: 0 (always succeeds)
2644 */
2645/* ARGSUSED */
2646int
2647zfs_getattr_fast(struct inode *ip, struct kstat *sp)
2648{
2649 znode_t *zp = ITOZ(ip);
0037b49e 2650 zfsvfs_t *zfsvfs = ITOZSB(ip);
b585bc4a
BB
2651 uint32_t blksize;
2652 u_longlong_t nblocks;
057e8eee 2653
0037b49e 2654 ZFS_ENTER(zfsvfs);
a7b125e9
GB
2655 ZFS_VERIFY_ZP(zp);
2656
057e8eee
BB
2657 mutex_enter(&zp->z_lock);
2658
2659 generic_fillattr(ip, sp);
97aa3ba4
TK
2660 /*
2661 * +1 link count for root inode with visible '.zfs' directory.
2662 */
2663 if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
2664 if (sp->nlink < ZFS_LINK_MAX)
2665 sp->nlink++;
057e8eee 2666
b585bc4a
BB
2667 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2668 sp->blksize = blksize;
2669 sp->blocks = nblocks;
2670
057e8eee
BB
2671 if (unlikely(zp->z_blksz == 0)) {
2672 /*
2673 * Block size hasn't been set; suggest maximal I/O transfers.
2674 */
0037b49e 2675 sp->blksize = zfsvfs->z_max_blksz;
057e8eee
BB
2676 }
2677
2678 mutex_exit(&zp->z_lock);
2679
aa9b2708
AV
2680 /*
2681 * Required to prevent NFS client from detecting different inode
2682 * numbers of snapshot root dentry before and after snapshot mount.
2683 */
0037b49e 2684 if (zfsvfs->z_issnap) {
aa9b2708
AV
2685 if (ip->i_sb->s_root->d_inode == ip)
2686 sp->ino = ZFSCTL_INO_SNAPDIRS -
0037b49e 2687 dmu_objset_id(zfsvfs->z_os);
aa9b2708
AV
2688 }
2689
0037b49e 2690 ZFS_EXIT(zfsvfs);
a7b125e9 2691
057e8eee
BB
2692 return (0);
2693}
057e8eee 2694
9c5167d1
NF
2695/*
2696 * For the operation of changing file's user/group/project, we need to
2697 * handle not only the main object that is assigned to the file directly,
2698 * but also the ones that are used by the file via hidden xattr directory.
2699 *
2700 * Because the xattr directory may contains many EA entries, as to it may
2701 * be impossible to change all of them via the transaction of changing the
2702 * main object's user/group/project attributes. Then we have to change them
2703 * via other multiple independent transactions one by one. It may be not good
2704 * solution, but we have no better idea yet.
2705 */
2706static int
2707zfs_setattr_dir(znode_t *dzp)
2708{
2709 struct inode *dxip = ZTOI(dzp);
2710 struct inode *xip = NULL;
2711 zfsvfs_t *zfsvfs = ITOZSB(dxip);
2712 objset_t *os = zfsvfs->z_os;
2713 zap_cursor_t zc;
2714 zap_attribute_t zap;
2715 zfs_dirlock_t *dl;
2716 znode_t *zp;
2717 dmu_tx_t *tx = NULL;
2718 uint64_t uid, gid;
2719 sa_bulk_attr_t bulk[4];
8cb34421 2720 int count;
9c5167d1
NF
2721 int err;
2722
2723 zap_cursor_init(&zc, os, dzp->z_id);
2724 while ((err = zap_cursor_retrieve(&zc, &zap)) == 0) {
8cb34421 2725 count = 0;
9c5167d1
NF
2726 if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
2727 err = ENXIO;
2728 break;
2729 }
2730
2731 err = zfs_dirent_lock(&dl, dzp, (char *)zap.za_name, &zp,
2732 ZEXISTS, NULL, NULL);
2733 if (err == ENOENT)
2734 goto next;
2735 if (err)
2736 break;
2737
2738 xip = ZTOI(zp);
2739 if (KUID_TO_SUID(xip->i_uid) == KUID_TO_SUID(dxip->i_uid) &&
2740 KGID_TO_SGID(xip->i_gid) == KGID_TO_SGID(dxip->i_gid) &&
2741 zp->z_projid == dzp->z_projid)
2742 goto next;
2743
2744 tx = dmu_tx_create(os);
2745 if (!(zp->z_pflags & ZFS_PROJID))
2746 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2747 else
2748 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2749
2750 err = dmu_tx_assign(tx, TXG_WAIT);
2751 if (err)
2752 break;
2753
2754 mutex_enter(&dzp->z_lock);
2755
2756 if (KUID_TO_SUID(xip->i_uid) != KUID_TO_SUID(dxip->i_uid)) {
2757 xip->i_uid = dxip->i_uid;
2758 uid = zfs_uid_read(dxip);
2759 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2760 &uid, sizeof (uid));
2761 }
2762
2763 if (KGID_TO_SGID(xip->i_gid) != KGID_TO_SGID(dxip->i_gid)) {
2764 xip->i_gid = dxip->i_gid;
2765 gid = zfs_gid_read(dxip);
2766 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
2767 &gid, sizeof (gid));
2768 }
2769
2770 if (zp->z_projid != dzp->z_projid) {
2771 if (!(zp->z_pflags & ZFS_PROJID)) {
2772 zp->z_pflags |= ZFS_PROJID;
2773 SA_ADD_BULK_ATTR(bulk, count,
2774 SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags,
2775 sizeof (zp->z_pflags));
2776 }
2777
2778 zp->z_projid = dzp->z_projid;
2779 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PROJID(zfsvfs),
2780 NULL, &zp->z_projid, sizeof (zp->z_projid));
2781 }
2782
2783 mutex_exit(&dzp->z_lock);
2784
2785 if (likely(count > 0)) {
2786 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2787 dmu_tx_commit(tx);
2788 } else {
2789 dmu_tx_abort(tx);
2790 }
2791 tx = NULL;
2792 if (err != 0 && err != ENOENT)
2793 break;
2794
2795next:
2796 if (xip) {
2797 iput(xip);
2798 xip = NULL;
2799 zfs_dirent_unlock(dl);
2800 }
2801 zap_cursor_advance(&zc);
2802 }
2803
2804 if (tx)
2805 dmu_tx_abort(tx);
2806 if (xip) {
2807 iput(xip);
2808 zfs_dirent_unlock(dl);
2809 }
2810 zap_cursor_fini(&zc);
2811
2812 return (err == ENOENT ? 0 : err);
2813}
2814
34dc7c2f
BB
2815/*
2816 * Set the file attributes to the values contained in the
2817 * vattr structure.
2818 *
3558fd73 2819 * IN: ip - inode of file to be modified.
34dc7c2f 2820 * vap - new attribute values.
5484965a 2821 * If ATTR_XVATTR set, then optional attrs are being set
34dc7c2f
BB
2822 * flags - ATTR_UTIME set if non-default time values provided.
2823 * - ATTR_NOACLCHECK (CIFS context only).
2824 * cr - credentials of caller.
34dc7c2f
BB
2825 *
2826 * RETURN: 0 if success
2827 * error code if failure
2828 *
2829 * Timestamps:
3558fd73 2830 * ip - ctime updated, mtime updated if size changed.
34dc7c2f
BB
2831 */
2832/* ARGSUSED */
e5c39b95 2833int
5484965a 2834zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
34dc7c2f 2835{
3558fd73 2836 znode_t *zp = ITOZ(ip);
0037b49e 2837 zfsvfs_t *zfsvfs = ITOZSB(ip);
9c5167d1 2838 objset_t *os = zfsvfs->z_os;
34dc7c2f
BB
2839 zilog_t *zilog;
2840 dmu_tx_t *tx;
2841 vattr_t oldva;
f4ea75d4 2842 xvattr_t *tmpxvattr;
5484965a 2843 uint_t mask = vap->va_mask;
a117a6d6 2844 uint_t saved_mask = 0;
34dc7c2f
BB
2845 int trim_mask = 0;
2846 uint64_t new_mode;
64aefee1 2847 uint64_t new_kuid = 0, new_kgid = 0, new_uid, new_gid;
572e2857 2848 uint64_t xattr_obj;
0df9673f 2849 uint64_t mtime[2], ctime[2], atime[2];
9c5167d1 2850 uint64_t projid = ZFS_INVALID_PROJID;
34dc7c2f
BB
2851 znode_t *attrzp;
2852 int need_policy = FALSE;
9c5167d1 2853 int err, err2 = 0;
34dc7c2f 2854 zfs_fuid_info_t *fuidp = NULL;
5484965a
BB
2855 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2856 xoptattr_t *xoap;
2857 zfs_acl_t *aclp;
34dc7c2f 2858 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
428870ff 2859 boolean_t fuid_dirtied = B_FALSE;
9c5167d1 2860 boolean_t handle_eadir = B_FALSE;
17c37660 2861 sa_bulk_attr_t *bulk, *xattr_bulk;
9c5167d1 2862 int count = 0, xattr_count = 0, bulks = 8;
34dc7c2f
BB
2863
2864 if (mask == 0)
2865 return (0);
2866
0037b49e 2867 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
2868 ZFS_VERIFY_ZP(zp);
2869
9c5167d1
NF
2870 /*
2871 * If this is a xvattr_t, then get a pointer to the structure of
2872 * optional attributes. If this is NULL, then we have a vattr_t.
2873 */
2874 xoap = xva_getxoptattr(xvap);
2875 if (xoap != NULL && (mask & ATTR_XVATTR)) {
2876 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2877 if (!dmu_objset_projectquota_enabled(os) ||
2878 (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
2879 ZFS_EXIT(zfsvfs);
2880 return (SET_ERROR(ENOTSUP));
2881 }
2882
2883 projid = xoap->xoa_projid;
2884 if (unlikely(projid == ZFS_INVALID_PROJID)) {
2885 ZFS_EXIT(zfsvfs);
2886 return (SET_ERROR(EINVAL));
2887 }
2888
2889 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2890 projid = ZFS_INVALID_PROJID;
2891 else
2892 need_policy = TRUE;
2893 }
2894
2895 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2705ebf0
NF
2896 (xoap->xoa_projinherit !=
2897 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
9c5167d1
NF
2898 (!dmu_objset_projectquota_enabled(os) ||
2899 (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
2705ebf0
NF
2900 ZFS_EXIT(zfsvfs);
2901 return (SET_ERROR(ENOTSUP));
9c5167d1
NF
2902 }
2903 }
2904
0037b49e 2905 zilog = zfsvfs->z_log;
34dc7c2f
BB
2906
2907 /*
2908 * Make sure that if we have ephemeral uid/gid or xvattr specified
2909 * that file system is at proper version level
2910 */
5484965a 2911
0037b49e 2912 if (zfsvfs->z_use_fuids == B_FALSE &&
5484965a
BB
2913 (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2914 ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2915 (mask & ATTR_XVATTR))) {
0037b49e 2916 ZFS_EXIT(zfsvfs);
2e528b49 2917 return (SET_ERROR(EINVAL));
34dc7c2f
BB
2918 }
2919
3558fd73 2920 if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
0037b49e 2921 ZFS_EXIT(zfsvfs);
2e528b49 2922 return (SET_ERROR(EISDIR));
34dc7c2f
BB
2923 }
2924
3558fd73 2925 if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
0037b49e 2926 ZFS_EXIT(zfsvfs);
2e528b49 2927 return (SET_ERROR(EINVAL));
34dc7c2f
BB
2928 }
2929
d1d7e268 2930 tmpxvattr = kmem_alloc(sizeof (xvattr_t), KM_SLEEP);
f4ea75d4 2931 xva_init(tmpxvattr);
5484965a 2932
9c5167d1
NF
2933 bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
2934 xattr_bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
17c37660 2935
5484965a
BB
2936 /*
2937 * Immutable files can only alter immutable bit and atime
2938 */
2939 if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2940 ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) ||
2941 ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
ecb2b7dc 2942 err = SET_ERROR(EPERM);
f4ea75d4 2943 goto out3;
5484965a
BB
2944 }
2945
3558fd73 2946 if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
ecb2b7dc 2947 err = SET_ERROR(EPERM);
f4ea75d4 2948 goto out3;
34dc7c2f
BB
2949 }
2950
5484965a
BB
2951 /*
2952 * Verify timestamps doesn't overflow 32 bits.
2953 * ZFS can handle large timestamps, but 32bit syscalls can't
2954 * handle times greater than 2039. This check should be removed
2955 * once large timestamps are fully supported.
2956 */
2957 if (mask & (ATTR_ATIME | ATTR_MTIME)) {
d1d7e268
MK
2958 if (((mask & ATTR_ATIME) &&
2959 TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2960 ((mask & ATTR_MTIME) &&
2961 TIMESPEC_OVERFLOW(&vap->va_mtime))) {
ecb2b7dc 2962 err = SET_ERROR(EOVERFLOW);
f4ea75d4 2963 goto out3;
5484965a
BB
2964 }
2965 }
2966
34dc7c2f
BB
2967top:
2968 attrzp = NULL;
572e2857 2969 aclp = NULL;
34dc7c2f 2970
45d1cae3 2971 /* Can this be moved to before the top label? */
0037b49e 2972 if (zfs_is_readonly(zfsvfs)) {
ecb2b7dc 2973 err = SET_ERROR(EROFS);
f4ea75d4 2974 goto out3;
34dc7c2f
BB
2975 }
2976
2977 /*
2978 * First validate permissions
2979 */
2980
3558fd73 2981 if (mask & ATTR_SIZE) {
34dc7c2f 2982 err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
f4ea75d4
BB
2983 if (err)
2984 goto out3;
2985
34dc7c2f
BB
2986 /*
2987 * XXX - Note, we are not providing any open
2988 * mode flags here (like FNDELAY), so we may
2989 * block if there are locks present... this
2990 * should be addressed in openat().
2991 */
b128c09f 2992 /* XXX - would it be OK to generate a log record here? */
5484965a 2993 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
f4ea75d4
BB
2994 if (err)
2995 goto out3;
428870ff 2996 }
34dc7c2f 2997
5484965a
BB
2998 if (mask & (ATTR_ATIME|ATTR_MTIME) ||
2999 ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
3000 XVA_ISSET_REQ(xvap, XAT_READONLY) ||
3001 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
3002 XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
3003 XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
3004 XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
3005 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
3006 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
3007 skipaclchk, cr);
3008 }
3009
3558fd73
BB
3010 if (mask & (ATTR_UID|ATTR_GID)) {
3011 int idmask = (mask & (ATTR_UID|ATTR_GID));
34dc7c2f
BB
3012 int take_owner;
3013 int take_group;
3014
3015 /*
3016 * NOTE: even if a new mode is being set,
3017 * we may clear S_ISUID/S_ISGID bits.
3018 */
3019
3558fd73 3020 if (!(mask & ATTR_MODE))
5484965a 3021 vap->va_mode = zp->z_mode;
34dc7c2f
BB
3022
3023 /*
3024 * Take ownership or chgrp to group we are a member of
3025 */
3026
5484965a 3027 take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
3558fd73 3028 take_group = (mask & ATTR_GID) &&
0037b49e 3029 zfs_groupmember(zfsvfs, vap->va_gid, cr);
34dc7c2f
BB
3030
3031 /*
5484965a 3032 * If both ATTR_UID and ATTR_GID are set then take_owner and
34dc7c2f
BB
3033 * take_group must both be set in order to allow taking
3034 * ownership.
3035 *
3036 * Otherwise, send the check through secpolicy_vnode_setattr()
3037 *
3038 */
3039
3558fd73
BB
3040 if (((idmask == (ATTR_UID|ATTR_GID)) &&
3041 take_owner && take_group) ||
3042 ((idmask == ATTR_UID) && take_owner) ||
3043 ((idmask == ATTR_GID) && take_group)) {
34dc7c2f
BB
3044 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
3045 skipaclchk, cr) == 0) {
3046 /*
3047 * Remove setuid/setgid for non-privileged users
3048 */
5484965a 3049 (void) secpolicy_setid_clear(vap, cr);
3558fd73 3050 trim_mask = (mask & (ATTR_UID|ATTR_GID));
34dc7c2f
BB
3051 } else {
3052 need_policy = TRUE;
3053 }
3054 } else {
3055 need_policy = TRUE;
3056 }
3057 }
3058
3059 mutex_enter(&zp->z_lock);
428870ff 3060 oldva.va_mode = zp->z_mode;
572e2857 3061 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
5484965a
BB
3062 if (mask & ATTR_XVATTR) {
3063 /*
3064 * Update xvattr mask to include only those attributes
3065 * that are actually changing.
3066 *
3067 * the bits will be restored prior to actually setting
3068 * the attributes so the caller thinks they were set.
3069 */
3070 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
3071 if (xoap->xoa_appendonly !=
3072 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
3073 need_policy = TRUE;
3074 } else {
3075 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
f4ea75d4 3076 XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY);
5484965a
BB
3077 }
3078 }
3079
9c5167d1
NF
3080 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
3081 if (xoap->xoa_projinherit !=
3082 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
3083 need_policy = TRUE;
3084 } else {
3085 XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
3086 XVA_SET_REQ(tmpxvattr, XAT_PROJINHERIT);
3087 }
3088 }
3089
5484965a
BB
3090 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
3091 if (xoap->xoa_nounlink !=
3092 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
3093 need_policy = TRUE;
3094 } else {
3095 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
f4ea75d4 3096 XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK);
5484965a
BB
3097 }
3098 }
3099
3100 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
3101 if (xoap->xoa_immutable !=
3102 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
3103 need_policy = TRUE;
3104 } else {
3105 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
f4ea75d4 3106 XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE);
5484965a
BB
3107 }
3108 }
3109
3110 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
3111 if (xoap->xoa_nodump !=
3112 ((zp->z_pflags & ZFS_NODUMP) != 0)) {
3113 need_policy = TRUE;
3114 } else {
3115 XVA_CLR_REQ(xvap, XAT_NODUMP);
f4ea75d4 3116 XVA_SET_REQ(tmpxvattr, XAT_NODUMP);
5484965a
BB
3117 }
3118 }
3119
3120 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
3121 if (xoap->xoa_av_modified !=
3122 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
3123 need_policy = TRUE;
3124 } else {
3125 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
f4ea75d4 3126 XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED);
5484965a
BB
3127 }
3128 }
3129
3130 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
3131 if ((!S_ISREG(ip->i_mode) &&
3132 xoap->xoa_av_quarantined) ||
3133 xoap->xoa_av_quarantined !=
3134 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
3135 need_policy = TRUE;
3136 } else {
3137 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
f4ea75d4 3138 XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED);
5484965a
BB
3139 }
3140 }
3141
3142 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
3143 mutex_exit(&zp->z_lock);
ecb2b7dc 3144 err = SET_ERROR(EPERM);
f4ea75d4 3145 goto out3;
5484965a
BB
3146 }
3147
3148 if (need_policy == FALSE &&
3149 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
3150 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
3151 need_policy = TRUE;
3152 }
3153 }
34dc7c2f
BB
3154
3155 mutex_exit(&zp->z_lock);
3156
3558fd73 3157 if (mask & ATTR_MODE) {
34dc7c2f 3158 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
5484965a 3159 err = secpolicy_setid_setsticky_clear(ip, vap,
34dc7c2f 3160 &oldva, cr);
f4ea75d4
BB
3161 if (err)
3162 goto out3;
3163
3558fd73 3164 trim_mask |= ATTR_MODE;
34dc7c2f
BB
3165 } else {
3166 need_policy = TRUE;
3167 }
3168 }
3169
3170 if (need_policy) {
3171 /*
3172 * If trim_mask is set then take ownership
3173 * has been granted or write_acl is present and user
3174 * has the ability to modify mode. In that case remove
3175 * UID|GID and or MODE from mask so that
3176 * secpolicy_vnode_setattr() doesn't revoke it.
3177 */
3178
3179 if (trim_mask) {
5484965a
BB
3180 saved_mask = vap->va_mask;
3181 vap->va_mask &= ~trim_mask;
34dc7c2f 3182 }
5484965a 3183 err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
34dc7c2f 3184 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
f4ea75d4
BB
3185 if (err)
3186 goto out3;
34dc7c2f
BB
3187
3188 if (trim_mask)
5484965a 3189 vap->va_mask |= saved_mask;
34dc7c2f
BB
3190 }
3191
3192 /*
3193 * secpolicy_vnode_setattr, or take ownership may have
3194 * changed va_mask
3195 */
5484965a 3196 mask = vap->va_mask;
34dc7c2f 3197
9c5167d1
NF
3198 if ((mask & (ATTR_UID | ATTR_GID)) || projid != ZFS_INVALID_PROJID) {
3199 handle_eadir = B_TRUE;
0037b49e 3200 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
572e2857 3201 &xattr_obj, sizeof (xattr_obj));
428870ff 3202
572e2857 3203 if (err == 0 && xattr_obj) {
3558fd73 3204 err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp);
428870ff
BB
3205 if (err)
3206 goto out2;
3207 }
3558fd73 3208 if (mask & ATTR_UID) {
0037b49e 3209 new_kuid = zfs_fuid_create(zfsvfs,
5484965a 3210 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
64aefee1 3211 if (new_kuid != KUID_TO_SUID(ZTOI(zp)->i_uid) &&
9c5167d1
NF
3212 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
3213 new_kuid)) {
572e2857 3214 if (attrzp)
3558fd73 3215 iput(ZTOI(attrzp));
ecb2b7dc 3216 err = SET_ERROR(EDQUOT);
428870ff
BB
3217 goto out2;
3218 }
3219 }
3220
3558fd73 3221 if (mask & ATTR_GID) {
0037b49e
BB
3222 new_kgid = zfs_fuid_create(zfsvfs,
3223 (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp);
64aefee1 3224 if (new_kgid != KGID_TO_SGID(ZTOI(zp)->i_gid) &&
9c5167d1
NF
3225 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
3226 new_kgid)) {
572e2857 3227 if (attrzp)
3558fd73 3228 iput(ZTOI(attrzp));
ecb2b7dc 3229 err = SET_ERROR(EDQUOT);
428870ff
BB
3230 goto out2;
3231 }
3232 }
9c5167d1
NF
3233
3234 if (projid != ZFS_INVALID_PROJID &&
3235 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
3236 if (attrzp)
3237 iput(ZTOI(attrzp));
3238 err = EDQUOT;
3239 goto out2;
3240 }
428870ff 3241 }
9c5167d1 3242 tx = dmu_tx_create(os);
34dc7c2f 3243
3558fd73 3244 if (mask & ATTR_MODE) {
428870ff 3245 uint64_t pmode = zp->z_mode;
572e2857 3246 uint64_t acl_obj;
5484965a 3247 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
34dc7c2f 3248
572e2857 3249 zfs_acl_chmod_setattr(zp, &aclp, new_mode);
428870ff 3250
572e2857
BB
3251 mutex_enter(&zp->z_lock);
3252 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
428870ff
BB
3253 /*
3254 * Are we upgrading ACL from old V0 format
3255 * to V1 format?
3256 */
0037b49e 3257 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
572e2857 3258 zfs_znode_acl_version(zp) ==
34dc7c2f 3259 ZFS_ACL_VERSION_INITIAL) {
572e2857 3260 dmu_tx_hold_free(tx, acl_obj, 0,
34dc7c2f
BB
3261 DMU_OBJECT_END);
3262 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
3263 0, aclp->z_acl_bytes);
3264 } else {
572e2857 3265 dmu_tx_hold_write(tx, acl_obj, 0,
34dc7c2f
BB
3266 aclp->z_acl_bytes);
3267 }
428870ff 3268 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
34dc7c2f
BB
3269 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
3270 0, aclp->z_acl_bytes);
3271 }
572e2857 3272 mutex_exit(&zp->z_lock);
428870ff
BB
3273 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
3274 } else {
9c5167d1
NF
3275 if (((mask & ATTR_XVATTR) &&
3276 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
3277 (projid != ZFS_INVALID_PROJID &&
3278 !(zp->z_pflags & ZFS_PROJID)))
5484965a
BB
3279 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
3280 else
3281 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
34dc7c2f
BB
3282 }
3283
428870ff
BB
3284 if (attrzp) {
3285 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
34dc7c2f
BB
3286 }
3287
0037b49e 3288 fuid_dirtied = zfsvfs->z_fuid_dirty;
428870ff 3289 if (fuid_dirtied)
0037b49e 3290 zfs_fuid_txhold(zfsvfs, tx);
428870ff
BB
3291
3292 zfs_sa_upgrade_txholds(tx, zp);
3293
384f8a09
MA
3294 err = dmu_tx_assign(tx, TXG_WAIT);
3295 if (err)
9babb374 3296 goto out;
34dc7c2f 3297
428870ff 3298 count = 0;
34dc7c2f
BB
3299 /*
3300 * Set each attribute requested.
3301 * We group settings according to the locks they need to acquire.
3302 *
3303 * Note: you cannot set ctime directly, although it will be
3304 * updated as a side-effect of calling this function.
3305 */
3306
9c5167d1
NF
3307 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
3308 /*
3309 * For the existed object that is upgraded from old system,
3310 * its on-disk layout has no slot for the project ID attribute.
3311 * But quota accounting logic needs to access related slots by
3312 * offset directly. So we need to adjust old objects' layout
3313 * to make the project ID to some unified and fixed offset.
3314 */
3315 if (attrzp)
3316 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
3317 if (err == 0)
3318 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
3319
3320 if (unlikely(err == EEXIST))
3321 err = 0;
3322 else if (err != 0)
3323 goto out;
3324 else
3325 projid = ZFS_INVALID_PROJID;
3326 }
572e2857 3327
3558fd73 3328 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
572e2857 3329 mutex_enter(&zp->z_acl_lock);
34dc7c2f
BB
3330 mutex_enter(&zp->z_lock);
3331
0037b49e 3332 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
428870ff
BB
3333 &zp->z_pflags, sizeof (zp->z_pflags));
3334
3335 if (attrzp) {
3558fd73 3336 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
572e2857 3337 mutex_enter(&attrzp->z_acl_lock);
428870ff
BB
3338 mutex_enter(&attrzp->z_lock);
3339 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
0037b49e 3340 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
428870ff 3341 sizeof (attrzp->z_pflags));
9c5167d1
NF
3342 if (projid != ZFS_INVALID_PROJID) {
3343 attrzp->z_projid = projid;
3344 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3345 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
3346 sizeof (attrzp->z_projid));
3347 }
428870ff
BB
3348 }
3349
3558fd73 3350 if (mask & (ATTR_UID|ATTR_GID)) {
428870ff 3351
3558fd73 3352 if (mask & ATTR_UID) {
64aefee1
NB
3353 ZTOI(zp)->i_uid = SUID_TO_KUID(new_kuid);
3354 new_uid = zfs_uid_read(ZTOI(zp));
0037b49e 3355 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
428870ff 3356 &new_uid, sizeof (new_uid));
428870ff
BB
3357 if (attrzp) {
3358 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
0037b49e 3359 SA_ZPL_UID(zfsvfs), NULL, &new_uid,
428870ff 3360 sizeof (new_uid));
2c6abf15 3361 ZTOI(attrzp)->i_uid = SUID_TO_KUID(new_uid);
428870ff
BB
3362 }
3363 }
3364
3558fd73 3365 if (mask & ATTR_GID) {
64aefee1
NB
3366 ZTOI(zp)->i_gid = SGID_TO_KGID(new_kgid);
3367 new_gid = zfs_gid_read(ZTOI(zp));
0037b49e 3368 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
428870ff 3369 NULL, &new_gid, sizeof (new_gid));
428870ff
BB
3370 if (attrzp) {
3371 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
0037b49e 3372 SA_ZPL_GID(zfsvfs), NULL, &new_gid,
428870ff 3373 sizeof (new_gid));
64aefee1 3374 ZTOI(attrzp)->i_gid = SGID_TO_KGID(new_kgid);
428870ff
BB
3375 }
3376 }
3558fd73 3377 if (!(mask & ATTR_MODE)) {
0037b49e 3378 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
428870ff
BB
3379 NULL, &new_mode, sizeof (new_mode));
3380 new_mode = zp->z_mode;
3381 }
3382 err = zfs_acl_chown_setattr(zp);
3383 ASSERT(err == 0);
3384 if (attrzp) {
3385 err = zfs_acl_chown_setattr(attrzp);
3386 ASSERT(err == 0);
3387 }
3388 }
3389
3558fd73 3390 if (mask & ATTR_MODE) {
0037b49e 3391 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
428870ff 3392 &new_mode, sizeof (new_mode));
12fa7f34 3393 zp->z_mode = ZTOI(zp)->i_mode = new_mode;
99c564bc 3394 ASSERT3P(aclp, !=, NULL);
9babb374 3395 err = zfs_aclset_common(zp, aclp, cr, tx);
c99c9001 3396 ASSERT0(err);
572e2857
BB
3397 if (zp->z_acl_cached)
3398 zfs_acl_free(zp->z_acl_cached);
45d1cae3
BB
3399 zp->z_acl_cached = aclp;
3400 aclp = NULL;
34dc7c2f
BB
3401 }
3402
704cd075
CC
3403 if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
3404 zp->z_atime_dirty = 0;
3405 ZFS_TIME_ENCODE(&ip->i_atime, atime);
0037b49e 3406 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
0df9673f 3407 &atime, sizeof (atime));
34dc7c2f
BB
3408 }
3409
99834d19 3410 if (mask & (ATTR_MTIME | ATTR_SIZE)) {
5484965a 3411 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
6413c95f 3412 ZTOI(zp)->i_mtime = zpl_inode_timespec_trunc(vap->va_mtime,
87f9371a
NB
3413 ZTOI(zp)->i_sb->s_time_gran);
3414
0037b49e 3415 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
428870ff 3416 mtime, sizeof (mtime));
34dc7c2f
BB
3417 }
3418
99834d19 3419 if (mask & (ATTR_CTIME | ATTR_SIZE)) {
87f9371a 3420 ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
6413c95f 3421 ZTOI(zp)->i_ctime = zpl_inode_timespec_trunc(vap->va_ctime,
87f9371a 3422 ZTOI(zp)->i_sb->s_time_gran);
0037b49e 3423 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
87f9371a 3424 ctime, sizeof (ctime));
428870ff 3425 }
87f9371a 3426
9c5167d1
NF
3427 if (projid != ZFS_INVALID_PROJID) {
3428 zp->z_projid = projid;
3429 SA_ADD_BULK_ATTR(bulk, count,
3430 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
3431 sizeof (zp->z_projid));
3432 }
3433
87f9371a
NB
3434 if (attrzp && mask) {
3435 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
0037b49e 3436 SA_ZPL_CTIME(zfsvfs), NULL, &ctime,
87f9371a
NB
3437 sizeof (ctime));
3438 }
3439
34dc7c2f
BB
3440 /*
3441 * Do this after setting timestamps to prevent timestamp
3442 * update from toggling bit
3443 */
3444
5484965a
BB
3445 if (xoap && (mask & ATTR_XVATTR)) {
3446
3447 /*
3448 * restore trimmed off masks
3449 * so that return masks can be set for caller.
3450 */
3451
f4ea75d4 3452 if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) {
5484965a
BB
3453 XVA_SET_REQ(xvap, XAT_APPENDONLY);
3454 }
f4ea75d4 3455 if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) {
5484965a
BB
3456 XVA_SET_REQ(xvap, XAT_NOUNLINK);
3457 }
f4ea75d4 3458 if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) {
5484965a
BB
3459 XVA_SET_REQ(xvap, XAT_IMMUTABLE);
3460 }
f4ea75d4 3461 if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) {
5484965a
BB
3462 XVA_SET_REQ(xvap, XAT_NODUMP);
3463 }
f4ea75d4 3464 if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) {
5484965a
BB
3465 XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
3466 }
f4ea75d4 3467 if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) {
5484965a
BB
3468 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
3469 }
9c5167d1
NF
3470 if (XVA_ISSET_REQ(tmpxvattr, XAT_PROJINHERIT)) {
3471 XVA_SET_REQ(xvap, XAT_PROJINHERIT);
3472 }
5484965a
BB
3473
3474 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
3475 ASSERT(S_ISREG(ip->i_mode));
3476
3477 zfs_xvattr_set(zp, xvap, tx);
3478 }
3479
9babb374 3480 if (fuid_dirtied)
0037b49e 3481 zfs_fuid_sync(zfsvfs, tx);
9babb374 3482
34dc7c2f 3483 if (mask != 0)
5484965a 3484 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
34dc7c2f 3485
34dc7c2f 3486 mutex_exit(&zp->z_lock);
3558fd73 3487 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
572e2857 3488 mutex_exit(&zp->z_acl_lock);
34dc7c2f 3489
572e2857 3490 if (attrzp) {
3558fd73 3491 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
572e2857
BB
3492 mutex_exit(&attrzp->z_acl_lock);
3493 mutex_exit(&attrzp->z_lock);
3494 }
9babb374 3495out:
9c5167d1 3496 if (err == 0 && xattr_count > 0) {
428870ff
BB
3497 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
3498 xattr_count, tx);
3499 ASSERT(err2 == 0);
3500 }
3501
45d1cae3 3502 if (aclp)
9babb374 3503 zfs_acl_free(aclp);
9babb374
BB
3504
3505 if (fuidp) {
3506 zfs_fuid_info_free(fuidp);
3507 fuidp = NULL;
3508 }
3509
428870ff 3510 if (err) {
9babb374 3511 dmu_tx_abort(tx);
ea7e86d8
BB
3512 if (attrzp)
3513 iput(ZTOI(attrzp));
428870ff
BB
3514 if (err == ERESTART)
3515 goto top;
3516 } else {
9c5167d1
NF
3517 if (count > 0)
3518 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
9babb374 3519 dmu_tx_commit(tx);
9c5167d1
NF
3520 if (attrzp) {
3521 if (err2 == 0 && handle_eadir)
3522 err2 = zfs_setattr_dir(attrzp);
ea7e86d8 3523 iput(ZTOI(attrzp));
9c5167d1 3524 }
037849f8 3525 zfs_inode_update(zp);
428870ff
BB
3526 }
3527
428870ff 3528out2:
9c5167d1 3529 if (os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 3530 zil_commit(zilog, 0);
34dc7c2f 3531
f4ea75d4 3532out3:
9c5167d1
NF
3533 kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
3534 kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
d1d7e268 3535 kmem_free(tmpxvattr, sizeof (xvattr_t));
0037b49e 3536 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
3537 return (err);
3538}
3539
3540typedef struct zfs_zlock {
3541 krwlock_t *zl_rwlock; /* lock we acquired */
3542 znode_t *zl_znode; /* znode we held */
3543 struct zfs_zlock *zl_next; /* next in list */
3544} zfs_zlock_t;
3545
3546/*
3547 * Drop locks and release vnodes that were held by zfs_rename_lock().
3548 */
3549static void
3550zfs_rename_unlock(zfs_zlock_t **zlpp)
3551{
3552 zfs_zlock_t *zl;
3553
3554 while ((zl = *zlpp) != NULL) {
3555 if (zl->zl_znode != NULL)
ea7e86d8 3556 zfs_iput_async(ZTOI(zl->zl_znode));
34dc7c2f
BB
3557 rw_exit(zl->zl_rwlock);
3558 *zlpp = zl->zl_next;
3559 kmem_free(zl, sizeof (*zl));
3560 }
3561}
3562
3563/*
3564 * Search back through the directory tree, using the ".." entries.
3565 * Lock each directory in the chain to prevent concurrent renames.
3566 * Fail any attempt to move a directory into one of its own descendants.
3567 * XXX - z_parent_lock can overlap with map or grow locks
3568 */
3569static int
3570zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
3571{
3572 zfs_zlock_t *zl;
3573 znode_t *zp = tdzp;
3558fd73 3574 uint64_t rootid = ZTOZSB(zp)->z_root;
428870ff 3575 uint64_t oidp = zp->z_id;
34dc7c2f
BB
3576 krwlock_t *rwlp = &szp->z_parent_lock;
3577 krw_t rw = RW_WRITER;
3578
3579 /*
3580 * First pass write-locks szp and compares to zp->z_id.
3581 * Later passes read-lock zp and compare to zp->z_parent.
3582 */
3583 do {
3584 if (!rw_tryenter(rwlp, rw)) {
3585 /*
3586 * Another thread is renaming in this path.
3587 * Note that if we are a WRITER, we don't have any
3588 * parent_locks held yet.
3589 */
3590 if (rw == RW_READER && zp->z_id > szp->z_id) {
3591 /*
3592 * Drop our locks and restart
3593 */
3594 zfs_rename_unlock(&zl);
3595 *zlpp = NULL;
3596 zp = tdzp;
428870ff 3597 oidp = zp->z_id;
34dc7c2f
BB
3598 rwlp = &szp->z_parent_lock;
3599 rw = RW_WRITER;
3600 continue;
3601 } else {
3602 /*
3603 * Wait for other thread to drop its locks
3604 */
3605 rw_enter(rwlp, rw);
3606 }
3607 }
3608
3609 zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
3610 zl->zl_rwlock = rwlp;
3611 zl->zl_znode = NULL;
3612 zl->zl_next = *zlpp;
3613 *zlpp = zl;
3614
428870ff 3615 if (oidp == szp->z_id) /* We're a descendant of szp */
2e528b49 3616 return (SET_ERROR(EINVAL));
34dc7c2f 3617
428870ff 3618 if (oidp == rootid) /* We've hit the top */
34dc7c2f
BB
3619 return (0);
3620
3621 if (rw == RW_READER) { /* i.e. not the first pass */
3558fd73 3622 int error = zfs_zget(ZTOZSB(zp), oidp, &zp);
34dc7c2f
BB
3623 if (error)
3624 return (error);
3625 zl->zl_znode = zp;
3626 }
3558fd73 3627 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)),
428870ff 3628 &oidp, sizeof (oidp));
34dc7c2f
BB
3629 rwlp = &zp->z_parent_lock;
3630 rw = RW_READER;
3631
3632 } while (zp->z_id != sdzp->z_id);
3633
3634 return (0);
3635}
3636
3637/*
3638 * Move an entry from the provided source directory to the target
3639 * directory. Change the entry name as indicated.
3640 *
3558fd73 3641 * IN: sdip - Source directory containing the "old entry".
34dc7c2f 3642 * snm - Old entry name.
3558fd73 3643 * tdip - Target directory to contain the "new entry".
34dc7c2f
BB
3644 * tnm - New entry name.
3645 * cr - credentials of caller.
34dc7c2f
BB
3646 * flags - case flags
3647 *
d3cc8b15 3648 * RETURN: 0 on success, error code on failure.
34dc7c2f
BB
3649 *
3650 * Timestamps:
3558fd73 3651 * sdip,tdip - ctime|mtime updated
34dc7c2f
BB
3652 */
3653/*ARGSUSED*/
e5c39b95 3654int
3558fd73
BB
3655zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
3656 cred_t *cr, int flags)
34dc7c2f
BB
3657{
3658 znode_t *tdzp, *szp, *tzp;
3558fd73 3659 znode_t *sdzp = ITOZ(sdip);
0037b49e 3660 zfsvfs_t *zfsvfs = ITOZSB(sdip);
34dc7c2f 3661 zilog_t *zilog;
34dc7c2f
BB
3662 zfs_dirlock_t *sdl, *tdl;
3663 dmu_tx_t *tx;
3664 zfs_zlock_t *zl;
3665 int cmp, serr, terr;
3666 int error = 0;
3667 int zflg = 0;
e8b96c60 3668 boolean_t waited = B_FALSE;
34dc7c2f 3669
32dec7bd 3670 if (snm == NULL || tnm == NULL)
3671 return (SET_ERROR(EINVAL));
3672
0037b49e 3673 ZFS_ENTER(zfsvfs);
34dc7c2f 3674 ZFS_VERIFY_ZP(sdzp);
0037b49e 3675 zilog = zfsvfs->z_log;
34dc7c2f 3676
812e91a7
MT
3677 tdzp = ITOZ(tdip);
3678 ZFS_VERIFY_ZP(tdzp);
3679
3680 /*
3681 * We check i_sb because snapshots and the ctldir must have different
3682 * super blocks.
3683 */
c0ebc844 3684 if (tdip->i_sb != sdip->i_sb || zfsctl_is_node(tdip)) {
0037b49e 3685 ZFS_EXIT(zfsvfs);
2e528b49 3686 return (SET_ERROR(EXDEV));
34dc7c2f
BB
3687 }
3688
0037b49e 3689 if (zfsvfs->z_utf8 && u8_validate(tnm,
34dc7c2f 3690 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
0037b49e 3691 ZFS_EXIT(zfsvfs);
2e528b49 3692 return (SET_ERROR(EILSEQ));
34dc7c2f
BB
3693 }
3694
3695 if (flags & FIGNORECASE)
3696 zflg |= ZCILOOK;
3697
3698top:
3699 szp = NULL;
3700 tzp = NULL;
3701 zl = NULL;
3702
3703 /*
3704 * This is to prevent the creation of links into attribute space
3705 * by renaming a linked file into/outof an attribute directory.
3706 * See the comment in zfs_link() for why this is considered bad.
3707 */
428870ff 3708 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
0037b49e 3709 ZFS_EXIT(zfsvfs);
2e528b49 3710 return (SET_ERROR(EINVAL));
34dc7c2f
BB
3711 }
3712
3713 /*
3714 * Lock source and target directory entries. To prevent deadlock,
3715 * a lock ordering must be defined. We lock the directory with
3716 * the smallest object id first, or if it's a tie, the one with
3717 * the lexically first name.
3718 */
3719 if (sdzp->z_id < tdzp->z_id) {
3720 cmp = -1;
3721 } else if (sdzp->z_id > tdzp->z_id) {
3722 cmp = 1;
3723 } else {
3724 /*
3725 * First compare the two name arguments without
3726 * considering any case folding.
3727 */
0037b49e 3728 int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
34dc7c2f
BB
3729
3730 cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
0037b49e 3731 ASSERT(error == 0 || !zfsvfs->z_utf8);
34dc7c2f
BB
3732 if (cmp == 0) {
3733 /*
3734 * POSIX: "If the old argument and the new argument
3735 * both refer to links to the same existing file,
3736 * the rename() function shall return successfully
3737 * and perform no other action."
3738 */
0037b49e 3739 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
3740 return (0);
3741 }
3742 /*
3743 * If the file system is case-folding, then we may
3744 * have some more checking to do. A case-folding file
3745 * system is either supporting mixed case sensitivity
3746 * access or is completely case-insensitive. Note
3747 * that the file system is always case preserving.
3748 *
3749 * In mixed sensitivity mode case sensitive behavior
3750 * is the default. FIGNORECASE must be used to
3751 * explicitly request case insensitive behavior.
3752 *
3753 * If the source and target names provided differ only
3754 * by case (e.g., a request to rename 'tim' to 'Tim'),
3755 * we will treat this as a special case in the
3756 * case-insensitive mode: as long as the source name
3757 * is an exact match, we will allow this to proceed as
3758 * a name-change request.
3759 */
0037b49e
BB
3760 if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
3761 (zfsvfs->z_case == ZFS_CASE_MIXED &&
34dc7c2f 3762 flags & FIGNORECASE)) &&
0037b49e 3763 u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
34dc7c2f
BB
3764 &error) == 0) {
3765 /*
3766 * case preserving rename request, require exact
3767 * name matches
3768 */
3769 zflg |= ZCIEXACT;
3770 zflg &= ~ZCILOOK;
3771 }
3772 }
3773
428870ff
BB
3774 /*
3775 * If the source and destination directories are the same, we should
3776 * grab the z_name_lock of that directory only once.
3777 */
3778 if (sdzp == tdzp) {
3779 zflg |= ZHAVELOCK;
3780 rw_enter(&sdzp->z_name_lock, RW_READER);
3781 }
3782
34dc7c2f
BB
3783 if (cmp < 0) {
3784 serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
3785 ZEXISTS | zflg, NULL, NULL);
3786 terr = zfs_dirent_lock(&tdl,
3787 tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
3788 } else {
3789 terr = zfs_dirent_lock(&tdl,
3790 tdzp, tnm, &tzp, zflg, NULL, NULL);
3791 serr = zfs_dirent_lock(&sdl,
3792 sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
3793 NULL, NULL);
3794 }
3795
3796 if (serr) {
3797 /*
3798 * Source entry invalid or not there.
3799 */
3800 if (!terr) {
3801 zfs_dirent_unlock(tdl);
3802 if (tzp)
3558fd73 3803 iput(ZTOI(tzp));
34dc7c2f 3804 }
428870ff
BB
3805
3806 if (sdzp == tdzp)
3807 rw_exit(&sdzp->z_name_lock);
3808
34dc7c2f
BB
3809 if (strcmp(snm, "..") == 0)
3810 serr = EINVAL;
0037b49e 3811 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
3812 return (serr);
3813 }
3814 if (terr) {
3815 zfs_dirent_unlock(sdl);
3558fd73 3816 iput(ZTOI(szp));
428870ff
BB
3817
3818 if (sdzp == tdzp)
3819 rw_exit(&sdzp->z_name_lock);
3820
34dc7c2f
BB
3821 if (strcmp(tnm, "..") == 0)
3822 terr = EINVAL;
0037b49e 3823 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
3824 return (terr);
3825 }
3826
9c5167d1
NF
3827 /*
3828 * If we are using project inheritance, means if the directory has
3829 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3830 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3831 * such case, we only allow renames into our tree when the project
3832 * IDs are the same.
3833 */
3834 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3835 tdzp->z_projid != szp->z_projid) {
3836 error = SET_ERROR(EXDEV);
3837 goto out;
3838 }
3839
34dc7c2f
BB
3840 /*
3841 * Must have write access at the source to remove the old entry
3842 * and write access at the target to create the new entry.
3843 * Note that if target and source are the same, this can be
3844 * done in a single check.
3845 */
3846
149e873a 3847 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
34dc7c2f
BB
3848 goto out;
3849
3558fd73 3850 if (S_ISDIR(ZTOI(szp)->i_mode)) {
34dc7c2f
BB
3851 /*
3852 * Check to make sure rename is valid.
3853 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3854 */
149e873a 3855 if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl)))
34dc7c2f
BB
3856 goto out;
3857 }
3858
3859 /*
3860 * Does target exist?
3861 */
3862 if (tzp) {
3863 /*
3864 * Source and target must be the same type.
3865 */
3558fd73
BB
3866 if (S_ISDIR(ZTOI(szp)->i_mode)) {
3867 if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
2e528b49 3868 error = SET_ERROR(ENOTDIR);
34dc7c2f
BB
3869 goto out;
3870 }
3871 } else {
3558fd73 3872 if (S_ISDIR(ZTOI(tzp)->i_mode)) {
2e528b49 3873 error = SET_ERROR(EISDIR);
34dc7c2f
BB
3874 goto out;
3875 }
3876 }
3877 /*
3878 * POSIX dictates that when the source and target
3879 * entries refer to the same file object, rename
3880 * must do nothing and exit without error.
3881 */
3882 if (szp->z_id == tzp->z_id) {
3883 error = 0;
3884 goto out;
3885 }
3886 }
3887
0037b49e 3888 tx = dmu_tx_create(zfsvfs->z_os);
428870ff
BB
3889 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3890 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
34dc7c2f
BB
3891 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3892 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
428870ff
BB
3893 if (sdzp != tdzp) {
3894 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3895 zfs_sa_upgrade_txholds(tx, tdzp);
3896 }
3897 if (tzp) {
3898 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3899 zfs_sa_upgrade_txholds(tx, tzp);
3900 }
3901
3902 zfs_sa_upgrade_txholds(tx, szp);
0037b49e 3903 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
0735ecb3 3904 error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
34dc7c2f
BB
3905 if (error) {
3906 if (zl != NULL)
3907 zfs_rename_unlock(&zl);
3908 zfs_dirent_unlock(sdl);
3909 zfs_dirent_unlock(tdl);
428870ff
BB
3910
3911 if (sdzp == tdzp)
3912 rw_exit(&sdzp->z_name_lock);
3913
fb5f0bc8 3914 if (error == ERESTART) {
e8b96c60 3915 waited = B_TRUE;
34dc7c2f
BB
3916 dmu_tx_wait(tx);
3917 dmu_tx_abort(tx);
ea7e86d8
BB
3918 iput(ZTOI(szp));
3919 if (tzp)
3920 iput(ZTOI(tzp));
34dc7c2f
BB
3921 goto top;
3922 }
3923 dmu_tx_abort(tx);
ea7e86d8
BB
3924 iput(ZTOI(szp));
3925 if (tzp)
3926 iput(ZTOI(tzp));
0037b49e 3927 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
3928 return (error);
3929 }
3930
3931 if (tzp) /* Attempt to remove the existing target */
3932 error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
3933
3934 if (error == 0) {
3935 error = zfs_link_create(tdl, szp, tx, ZRENAMING);
3936 if (error == 0) {
428870ff 3937 szp->z_pflags |= ZFS_AV_MODIFIED;
9c5167d1
NF
3938 if (tdzp->z_pflags & ZFS_PROJINHERIT)
3939 szp->z_pflags |= ZFS_PROJINHERIT;
34dc7c2f 3940
0037b49e 3941 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
428870ff 3942 (void *)&szp->z_pflags, sizeof (uint64_t), tx);
c99c9001 3943 ASSERT0(error);
34dc7c2f 3944
428870ff
BB
3945 error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
3946 if (error == 0) {
3947 zfs_log_rename(zilog, tx, TX_RENAME |
572e2857
BB
3948 (flags & FIGNORECASE ? TX_CI : 0), sdzp,
3949 sdl->dl_name, tdzp, tdl->dl_name, szp);
428870ff
BB
3950 } else {
3951 /*
3952 * At this point, we have successfully created
3953 * the target name, but have failed to remove
3954 * the source name. Since the create was done
3955 * with the ZRENAMING flag, there are
3956 * complications; for one, the link count is
3957 * wrong. The easiest way to deal with this
3958 * is to remove the newly created target, and
3959 * return the original error. This must
3960 * succeed; fortunately, it is very unlikely to
3961 * fail, since we just created it.
3962 */
3963 VERIFY3U(zfs_link_destroy(tdl, szp, tx,
3964 ZRENAMING, NULL), ==, 0);
3965 }
599b8648
CC
3966 } else {
3967 /*
3968 * If we had removed the existing target, subsequent
3969 * call to zfs_link_create() to add back the same entry
3970 * but, the new dnode (szp) should not fail.
3971 */
3972 ASSERT(tzp == NULL);
34dc7c2f
BB
3973 }
3974 }
3975
3976 dmu_tx_commit(tx);
3977out:
3978 if (zl != NULL)
3979 zfs_rename_unlock(&zl);
3980
3981 zfs_dirent_unlock(sdl);
3982 zfs_dirent_unlock(tdl);
3983
960e08fe 3984 zfs_inode_update(sdzp);
428870ff
BB
3985 if (sdzp == tdzp)
3986 rw_exit(&sdzp->z_name_lock);
3987
960e08fe
BB
3988 if (sdzp != tdzp)
3989 zfs_inode_update(tdzp);
428870ff 3990
960e08fe 3991 zfs_inode_update(szp);
3558fd73 3992 iput(ZTOI(szp));
960e08fe
BB
3993 if (tzp) {
3994 zfs_inode_update(tzp);
3558fd73 3995 iput(ZTOI(tzp));
960e08fe 3996 }
34dc7c2f 3997
0037b49e 3998 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 3999 zil_commit(zilog, 0);
428870ff 4000
0037b49e 4001 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4002 return (error);
4003}
4004
4005/*
4006 * Insert the indicated symbolic reference entry into the directory.
4007 *
3558fd73 4008 * IN: dip - Directory to contain new symbolic link.
34dc7c2f
BB
4009 * link - Name for new symlink entry.
4010 * vap - Attributes of new entry.
4011 * target - Target path of new symlink.
3558fd73 4012 *
34dc7c2f 4013 * cr - credentials of caller.
34dc7c2f
BB
4014 * flags - case flags
4015 *
d3cc8b15 4016 * RETURN: 0 on success, error code on failure.
34dc7c2f
BB
4017 *
4018 * Timestamps:
3558fd73 4019 * dip - ctime|mtime updated
34dc7c2f
BB
4020 */
4021/*ARGSUSED*/
e5c39b95 4022int
3558fd73
BB
4023zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
4024 struct inode **ipp, cred_t *cr, int flags)
34dc7c2f 4025{
3558fd73 4026 znode_t *zp, *dzp = ITOZ(dip);
34dc7c2f
BB
4027 zfs_dirlock_t *dl;
4028 dmu_tx_t *tx;
0037b49e 4029 zfsvfs_t *zfsvfs = ITOZSB(dip);
34dc7c2f 4030 zilog_t *zilog;
428870ff 4031 uint64_t len = strlen(link);
34dc7c2f
BB
4032 int error;
4033 int zflg = ZNEW;
9babb374
BB
4034 zfs_acl_ids_t acl_ids;
4035 boolean_t fuid_dirtied;
428870ff 4036 uint64_t txtype = TX_SYMLINK;
e8b96c60 4037 boolean_t waited = B_FALSE;
34dc7c2f 4038
3558fd73 4039 ASSERT(S_ISLNK(vap->va_mode));
34dc7c2f 4040
32dec7bd 4041 if (name == NULL)
4042 return (SET_ERROR(EINVAL));
4043
0037b49e 4044 ZFS_ENTER(zfsvfs);
34dc7c2f 4045 ZFS_VERIFY_ZP(dzp);
0037b49e 4046 zilog = zfsvfs->z_log;
34dc7c2f 4047
0037b49e 4048 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
34dc7c2f 4049 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
0037b49e 4050 ZFS_EXIT(zfsvfs);
2e528b49 4051 return (SET_ERROR(EILSEQ));
34dc7c2f
BB
4052 }
4053 if (flags & FIGNORECASE)
4054 zflg |= ZCILOOK;
34dc7c2f
BB
4055
4056 if (len > MAXPATHLEN) {
0037b49e 4057 ZFS_EXIT(zfsvfs);
2e528b49 4058 return (SET_ERROR(ENAMETOOLONG));
34dc7c2f
BB
4059 }
4060
428870ff
BB
4061 if ((error = zfs_acl_ids_create(dzp, 0,
4062 vap, cr, NULL, &acl_ids)) != 0) {
0037b49e 4063 ZFS_EXIT(zfsvfs);
428870ff
BB
4064 return (error);
4065 }
4066top:
3558fd73
BB
4067 *ipp = NULL;
4068
34dc7c2f
BB
4069 /*
4070 * Attempt to lock directory; fail if entry already exists.
4071 */
4072 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
4073 if (error) {
428870ff 4074 zfs_acl_ids_free(&acl_ids);
0037b49e 4075 ZFS_EXIT(zfsvfs);
428870ff
BB
4076 return (error);
4077 }
4078
149e873a 4079 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
428870ff
BB
4080 zfs_acl_ids_free(&acl_ids);
4081 zfs_dirent_unlock(dl);
0037b49e 4082 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4083 return (error);
4084 }
4085
9c5167d1 4086 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
9babb374
BB
4087 zfs_acl_ids_free(&acl_ids);
4088 zfs_dirent_unlock(dl);
0037b49e 4089 ZFS_EXIT(zfsvfs);
2e528b49 4090 return (SET_ERROR(EDQUOT));
9babb374 4091 }
0037b49e
BB
4092 tx = dmu_tx_create(zfsvfs->z_os);
4093 fuid_dirtied = zfsvfs->z_fuid_dirty;
34dc7c2f 4094 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
34dc7c2f 4095 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
428870ff
BB
4096 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
4097 ZFS_SA_BASE_ATTR_SIZE + len);
4098 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
0037b49e 4099 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
428870ff
BB
4100 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
4101 acl_ids.z_aclp->z_acl_bytes);
4102 }
9babb374 4103 if (fuid_dirtied)
0037b49e 4104 zfs_fuid_txhold(zfsvfs, tx);
0735ecb3 4105 error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
34dc7c2f
BB
4106 if (error) {
4107 zfs_dirent_unlock(dl);
fb5f0bc8 4108 if (error == ERESTART) {
e8b96c60 4109 waited = B_TRUE;
34dc7c2f
BB
4110 dmu_tx_wait(tx);
4111 dmu_tx_abort(tx);
4112 goto top;
4113 }
428870ff 4114 zfs_acl_ids_free(&acl_ids);
34dc7c2f 4115 dmu_tx_abort(tx);
0037b49e 4116 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4117 return (error);
4118 }
4119
34dc7c2f
BB
4120 /*
4121 * Create a new object for the symlink.
428870ff 4122 * for version 4 ZPL datsets the symlink will be an SA attribute
34dc7c2f 4123 */
428870ff 4124 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
9babb374 4125
428870ff 4126 if (fuid_dirtied)
0037b49e 4127 zfs_fuid_sync(zfsvfs, tx);
34dc7c2f 4128
572e2857 4129 mutex_enter(&zp->z_lock);
428870ff 4130 if (zp->z_is_sa)
0037b49e 4131 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
428870ff
BB
4132 link, len, tx);
4133 else
4134 zfs_sa_symlink(zp, link, len, tx);
572e2857 4135 mutex_exit(&zp->z_lock);
34dc7c2f 4136
428870ff 4137 zp->z_size = len;
0037b49e 4138 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
428870ff 4139 &zp->z_size, sizeof (zp->z_size), tx);
34dc7c2f
BB
4140 /*
4141 * Insert the new object into the directory.
4142 */
599b8648
CC
4143 error = zfs_link_create(dl, zp, tx, ZNEW);
4144 if (error != 0) {
4145 zfs_znode_delete(zp, tx);
4146 remove_inode_hash(ZTOI(zp));
4147 } else {
4148 if (flags & FIGNORECASE)
4149 txtype |= TX_CI;
4150 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
4f301661 4151
599b8648
CC
4152 zfs_inode_update(dzp);
4153 zfs_inode_update(zp);
4154 }
960e08fe 4155
9babb374 4156 zfs_acl_ids_free(&acl_ids);
34dc7c2f
BB
4157
4158 dmu_tx_commit(tx);
4159
4160 zfs_dirent_unlock(dl);
4161
599b8648
CC
4162 if (error == 0) {
4163 *ipp = ZTOI(zp);
34dc7c2f 4164
599b8648
CC
4165 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4166 zil_commit(zilog, 0);
4167 } else {
4168 iput(ZTOI(zp));
4169 }
428870ff 4170
0037b49e 4171 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4172 return (error);
4173}
4174
4175/*
4176 * Return, in the buffer contained in the provided uio structure,
3558fd73 4177 * the symbolic path referred to by ip.
34dc7c2f 4178 *
8b4f9a2d
BB
4179 * IN: ip - inode of symbolic link
4180 * uio - structure to contain the link path.
4181 * cr - credentials of caller.
34dc7c2f
BB
4182 *
4183 * RETURN: 0 if success
4184 * error code if failure
4185 *
4186 * Timestamps:
3558fd73 4187 * ip - atime updated
34dc7c2f
BB
4188 */
4189/* ARGSUSED */
e5c39b95 4190int
8b4f9a2d 4191zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
34dc7c2f 4192{
3558fd73 4193 znode_t *zp = ITOZ(ip);
0037b49e 4194 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f
BB
4195 int error;
4196
0037b49e 4197 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
4198 ZFS_VERIFY_ZP(zp);
4199
572e2857 4200 mutex_enter(&zp->z_lock);
428870ff 4201 if (zp->z_is_sa)
8b4f9a2d 4202 error = sa_lookup_uio(zp->z_sa_hdl,
0037b49e 4203 SA_ZPL_SYMLINK(zfsvfs), uio);
428870ff 4204 else
8b4f9a2d 4205 error = zfs_sa_readlink(zp, uio);
572e2857 4206 mutex_exit(&zp->z_lock);
34dc7c2f 4207
0037b49e 4208 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4209 return (error);
4210}
4211
4212/*
3558fd73 4213 * Insert a new entry into directory tdip referencing sip.
34dc7c2f 4214 *
3558fd73
BB
4215 * IN: tdip - Directory to contain new entry.
4216 * sip - inode of new entry.
34dc7c2f
BB
4217 * name - name of new entry.
4218 * cr - credentials of caller.
34dc7c2f
BB
4219 *
4220 * RETURN: 0 if success
4221 * error code if failure
4222 *
4223 * Timestamps:
3558fd73
BB
4224 * tdip - ctime|mtime updated
4225 * sip - ctime updated
34dc7c2f
BB
4226 */
4227/* ARGSUSED */
e5c39b95 4228int
da5e151f
BB
4229zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr,
4230 int flags)
34dc7c2f 4231{
3558fd73 4232 znode_t *dzp = ITOZ(tdip);
34dc7c2f 4233 znode_t *tzp, *szp;
0037b49e 4234 zfsvfs_t *zfsvfs = ITOZSB(tdip);
34dc7c2f
BB
4235 zilog_t *zilog;
4236 zfs_dirlock_t *dl;
4237 dmu_tx_t *tx;
34dc7c2f
BB
4238 int error;
4239 int zf = ZNEW;
428870ff 4240 uint64_t parent;
572e2857 4241 uid_t owner;
e8b96c60 4242 boolean_t waited = B_FALSE;
ace1eae8
CC
4243 boolean_t is_tmpfile = 0;
4244 uint64_t txg;
4245#ifdef HAVE_TMPFILE
4246 is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
4247#endif
3558fd73 4248 ASSERT(S_ISDIR(tdip->i_mode));
34dc7c2f 4249
32dec7bd 4250 if (name == NULL)
4251 return (SET_ERROR(EINVAL));
4252
0037b49e 4253 ZFS_ENTER(zfsvfs);
34dc7c2f 4254 ZFS_VERIFY_ZP(dzp);
0037b49e 4255 zilog = zfsvfs->z_log;
34dc7c2f 4256
428870ff
BB
4257 /*
4258 * POSIX dictates that we return EPERM here.
4259 * Better choices include ENOTSUP or EISDIR.
4260 */
3558fd73 4261 if (S_ISDIR(sip->i_mode)) {
0037b49e 4262 ZFS_EXIT(zfsvfs);
2e528b49 4263 return (SET_ERROR(EPERM));
428870ff
BB
4264 }
4265
812e91a7
MT
4266 szp = ITOZ(sip);
4267 ZFS_VERIFY_ZP(szp);
4268
9c5167d1
NF
4269 /*
4270 * If we are using project inheritance, means if the directory has
4271 * ZFS_PROJINHERIT set, then its descendant directories will inherit
4272 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
4273 * such case, we only allow hard link creation in our tree when the
4274 * project IDs are the same.
4275 */
4276 if (dzp->z_pflags & ZFS_PROJINHERIT && dzp->z_projid != szp->z_projid) {
4277 ZFS_EXIT(zfsvfs);
4278 return (SET_ERROR(EXDEV));
4279 }
4280
812e91a7
MT
4281 /*
4282 * We check i_sb because snapshots and the ctldir must have different
4283 * super blocks.
4284 */
c0ebc844 4285 if (sip->i_sb != tdip->i_sb || zfsctl_is_node(sip)) {
0037b49e 4286 ZFS_EXIT(zfsvfs);
2e528b49 4287 return (SET_ERROR(EXDEV));
34dc7c2f 4288 }
428870ff 4289
428870ff
BB
4290 /* Prevent links to .zfs/shares files */
4291
0037b49e 4292 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
428870ff 4293 &parent, sizeof (uint64_t))) != 0) {
0037b49e 4294 ZFS_EXIT(zfsvfs);
428870ff
BB
4295 return (error);
4296 }
0037b49e
BB
4297 if (parent == zfsvfs->z_shares_dir) {
4298 ZFS_EXIT(zfsvfs);
2e528b49 4299 return (SET_ERROR(EPERM));
428870ff
BB
4300 }
4301
0037b49e 4302 if (zfsvfs->z_utf8 && u8_validate(name,
34dc7c2f 4303 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
0037b49e 4304 ZFS_EXIT(zfsvfs);
2e528b49 4305 return (SET_ERROR(EILSEQ));
34dc7c2f
BB
4306 }
4307 if (flags & FIGNORECASE)
4308 zf |= ZCILOOK;
4309
34dc7c2f
BB
4310 /*
4311 * We do not support links between attributes and non-attributes
4312 * because of the potential security risk of creating links
4313 * into "normal" file space in order to circumvent restrictions
4314 * imposed in attribute space.
4315 */
428870ff 4316 if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
0037b49e 4317 ZFS_EXIT(zfsvfs);
2e528b49 4318 return (SET_ERROR(EINVAL));
34dc7c2f
BB
4319 }
4320
0037b49e
BB
4321 owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
4322 cr, ZFS_OWNER);
572e2857 4323 if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
0037b49e 4324 ZFS_EXIT(zfsvfs);
2e528b49 4325 return (SET_ERROR(EPERM));
34dc7c2f
BB
4326 }
4327
149e873a 4328 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
0037b49e 4329 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4330 return (error);
4331 }
4332
428870ff 4333top:
34dc7c2f
BB
4334 /*
4335 * Attempt to lock directory; fail if entry already exists.
4336 */
4337 error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
4338 if (error) {
0037b49e 4339 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4340 return (error);
4341 }
4342
0037b49e 4343 tx = dmu_tx_create(zfsvfs->z_os);
428870ff 4344 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
34dc7c2f 4345 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
ace1eae8 4346 if (is_tmpfile)
0037b49e 4347 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
ace1eae8 4348
428870ff
BB
4349 zfs_sa_upgrade_txholds(tx, szp);
4350 zfs_sa_upgrade_txholds(tx, dzp);
0735ecb3 4351 error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
34dc7c2f
BB
4352 if (error) {
4353 zfs_dirent_unlock(dl);
fb5f0bc8 4354 if (error == ERESTART) {
e8b96c60 4355 waited = B_TRUE;
34dc7c2f
BB
4356 dmu_tx_wait(tx);
4357 dmu_tx_abort(tx);
4358 goto top;
4359 }
4360 dmu_tx_abort(tx);
0037b49e 4361 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4362 return (error);
4363 }
ace1eae8
CC
4364 /* unmark z_unlinked so zfs_link_create will not reject */
4365 if (is_tmpfile)
4366 szp->z_unlinked = 0;
34dc7c2f
BB
4367 error = zfs_link_create(dl, szp, tx, 0);
4368
4369 if (error == 0) {
4370 uint64_t txtype = TX_LINK;
ace1eae8
CC
4371 /*
4372 * tmpfile is created to be in z_unlinkedobj, so remove it.
4373 * Also, we don't log in ZIL, be cause all previous file
4374 * operation on the tmpfile are ignored by ZIL. Instead we
4375 * always wait for txg to sync to make sure all previous
4376 * operation are sync safe.
4377 */
4378 if (is_tmpfile) {
0037b49e
BB
4379 VERIFY(zap_remove_int(zfsvfs->z_os,
4380 zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
ace1eae8
CC
4381 } else {
4382 if (flags & FIGNORECASE)
4383 txtype |= TX_CI;
4384 zfs_log_link(zilog, tx, txtype, dzp, szp, name);
4385 }
4386 } else if (is_tmpfile) {
4387 /* restore z_unlinked since when linking failed */
4388 szp->z_unlinked = 1;
34dc7c2f 4389 }
ace1eae8 4390 txg = dmu_tx_get_txg(tx);
34dc7c2f
BB
4391 dmu_tx_commit(tx);
4392
4393 zfs_dirent_unlock(dl);
4394
0037b49e 4395 if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 4396 zil_commit(zilog, 0);
428870ff 4397
ace1eae8 4398 if (is_tmpfile)
0037b49e 4399 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
ace1eae8 4400
960e08fe
BB
4401 zfs_inode_update(dzp);
4402 zfs_inode_update(szp);
0037b49e 4403 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4404 return (error);
4405}
4406
3c0e5c0f 4407static void
119a394a 4408zfs_putpage_commit_cb(void *arg)
3c0e5c0f
BB
4409{
4410 struct page *pp = arg;
4411
119a394a 4412 ClearPageError(pp);
3c0e5c0f
BB
4413 end_page_writeback(pp);
4414}
4415
34dc7c2f 4416/*
3c0e5c0f
BB
4417 * Push a page out to disk, once the page is on stable storage the
4418 * registered commit callback will be run as notification of completion.
34dc7c2f 4419 *
3c0e5c0f
BB
4420 * IN: ip - page mapped for inode.
4421 * pp - page to push (page is locked)
4422 * wbc - writeback control data
34dc7c2f
BB
4423 *
4424 * RETURN: 0 if success
4425 * error code if failure
4426 *
3c0e5c0f
BB
4427 * Timestamps:
4428 * ip - ctime|mtime updated
34dc7c2f
BB
4429 */
4430/* ARGSUSED */
3c0e5c0f
BB
4431int
4432zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
34dc7c2f 4433{
3c0e5c0f 4434 znode_t *zp = ITOZ(ip);
0037b49e 4435 zfsvfs_t *zfsvfs = ITOZSB(ip);
3c0e5c0f
BB
4436 loff_t offset;
4437 loff_t pgoff;
4c837f0d 4438 unsigned int pglen;
3c0e5c0f
BB
4439 dmu_tx_t *tx;
4440 caddr_t va;
4441 int err = 0;
4442 uint64_t mtime[2], ctime[2];
4443 sa_bulk_attr_t bulk[3];
4444 int cnt = 0;
21a96fb6 4445 struct address_space *mapping;
3c0e5c0f 4446
0037b49e 4447 ZFS_ENTER(zfsvfs);
4c837f0d 4448 ZFS_VERIFY_ZP(zp);
d164b209 4449
3c0e5c0f
BB
4450 ASSERT(PageLocked(pp));
4451
d1d7e268
MK
4452 pgoff = page_offset(pp); /* Page byte-offset in file */
4453 offset = i_size_read(ip); /* File length in bytes */
8b1899d3
BB
4454 pglen = MIN(PAGE_SIZE, /* Page length in bytes */
4455 P2ROUNDUP(offset, PAGE_SIZE)-pgoff);
3c0e5c0f
BB
4456
4457 /* Page is beyond end of file */
4458 if (pgoff >= offset) {
4459 unlock_page(pp);
0037b49e 4460 ZFS_EXIT(zfsvfs);
3c0e5c0f
BB
4461 return (0);
4462 }
4463
4464 /* Truncate page length to end of file */
4465 if (pgoff + pglen > offset)
4466 pglen = offset - pgoff;
4467
4468#if 0
34dc7c2f 4469 /*
3c0e5c0f
BB
4470 * FIXME: Allow mmap writes past its quota. The correct fix
4471 * is to register a page_mkwrite() handler to count the page
4472 * against its quota when it is about to be dirtied.
34dc7c2f 4473 */
9c5167d1
NF
4474 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
4475 KUID_TO_SUID(ip->i_uid)) ||
4476 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
4477 KGID_TO_SGID(ip->i_gid)) ||
4478 (zp->z_projid != ZFS_DEFAULT_PROJID &&
4479 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4480 zp->z_projid))) {
9babb374 4481 err = EDQUOT;
9babb374 4482 }
3c0e5c0f
BB
4483#endif
4484
d958324f
BB
4485 /*
4486 * The ordering here is critical and must adhere to the following
4487 * rules in order to avoid deadlocking in either zfs_read() or
4488 * zfs_free_range() due to a lock inversion.
4489 *
4490 * 1) The page must be unlocked prior to acquiring the range lock.
4491 * This is critical because zfs_read() calls find_lock_page()
4492 * which may block on the page lock while holding the range lock.
4493 *
4494 * 2) Before setting or clearing write back on a page the range lock
4495 * must be held in order to prevent a lock inversion with the
4496 * zfs_free_range() function.
21a96fb6
CC
4497 *
4498 * This presents a problem because upon entering this function the
4499 * page lock is already held. To safely acquire the range lock the
4500 * page lock must be dropped. This creates a window where another
4501 * process could truncate, invalidate, dirty, or write out the page.
4502 *
4503 * Therefore, after successfully reacquiring the range and page locks
4504 * the current page state is checked. In the common case everything
4505 * will be as is expected and it can be written out. However, if
4506 * the page state has changed it must be handled accordingly.
d958324f 4507 */
21a96fb6
CC
4508 mapping = pp->mapping;
4509 redirty_page_for_writepage(wbc, pp);
d958324f 4510 unlock_page(pp);
21a96fb6 4511
5d43cc9a
MA
4512 locked_range_t *lr = rangelock_enter(&zp->z_rangelock,
4513 pgoff, pglen, RL_WRITER);
21a96fb6
CC
4514 lock_page(pp);
4515
4516 /* Page mapping changed or it was no longer dirty, we're done */
4517 if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
4518 unlock_page(pp);
5d43cc9a 4519 rangelock_exit(lr);
0037b49e 4520 ZFS_EXIT(zfsvfs);
21a96fb6
CC
4521 return (0);
4522 }
4523
4524 /* Another process started write block if required */
4525 if (PageWriteback(pp)) {
4526 unlock_page(pp);
5d43cc9a 4527 rangelock_exit(lr);
21a96fb6
CC
4528
4529 if (wbc->sync_mode != WB_SYNC_NONE)
4530 wait_on_page_writeback(pp);
4531
0037b49e 4532 ZFS_EXIT(zfsvfs);
21a96fb6
CC
4533 return (0);
4534 }
4535
4536 /* Clear the dirty flag the required locks are held */
4537 if (!clear_page_dirty_for_io(pp)) {
4538 unlock_page(pp);
5d43cc9a 4539 rangelock_exit(lr);
0037b49e 4540 ZFS_EXIT(zfsvfs);
21a96fb6
CC
4541 return (0);
4542 }
4543
4544 /*
4545 * Counterpart for redirty_page_for_writepage() above. This page
4546 * was in fact not skipped and should not be counted as if it were.
4547 */
4548 wbc->pages_skipped--;
3c0e5c0f 4549 set_page_writeback(pp);
21a96fb6 4550 unlock_page(pp);
3c0e5c0f 4551
0037b49e 4552 tx = dmu_tx_create(zfsvfs->z_os);
3c0e5c0f 4553 dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen);
428870ff
BB
4554 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4555 zfs_sa_upgrade_txholds(tx, zp);
d958324f 4556
fb5f0bc8 4557 err = dmu_tx_assign(tx, TXG_NOWAIT);
34dc7c2f 4558 if (err != 0) {
3c0e5c0f 4559 if (err == ERESTART)
34dc7c2f 4560 dmu_tx_wait(tx);
3c0e5c0f 4561
34dc7c2f 4562 dmu_tx_abort(tx);
119a394a
ED
4563 __set_page_dirty_nobuffers(pp);
4564 ClearPageError(pp);
4565 end_page_writeback(pp);
5d43cc9a 4566 rangelock_exit(lr);
0037b49e 4567 ZFS_EXIT(zfsvfs);
3c0e5c0f 4568 return (err);
34dc7c2f
BB
4569 }
4570
dde471ef 4571 va = kmap(pp);
8b1899d3 4572 ASSERT3U(pglen, <=, PAGE_SIZE);
0037b49e 4573 dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx);
dde471ef 4574 kunmap(pp);
34dc7c2f 4575
0037b49e
BB
4576 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
4577 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
4578 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zfsvfs), NULL,
4579 &zp->z_pflags, 8);
428870ff 4580
d3aa3ea9
BB
4581 /* Preserve the mtime and ctime provided by the inode */
4582 ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
4583 ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
4584 zp->z_atime_dirty = 0;
4585 zp->z_seq++;
4586
4587 err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
4588
0037b49e 4589 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
119a394a 4590 zfs_putpage_commit_cb, pp);
45d1cae3 4591 dmu_tx_commit(tx);
d3aa3ea9 4592
5d43cc9a 4593 rangelock_exit(lr);
34dc7c2f 4594
119a394a
ED
4595 if (wbc->sync_mode != WB_SYNC_NONE) {
4596 /*
4597 * Note that this is rarely called under writepages(), because
4598 * writepages() normally handles the entire commit for
4599 * performance reasons.
4600 */
0037b49e 4601 zil_commit(zfsvfs->z_log, zp->z_id);
2b286136 4602 }
3c0e5c0f 4603
0037b49e 4604 ZFS_EXIT(zfsvfs);
3c0e5c0f 4605 return (err);
34dc7c2f
BB
4606}
4607
8780c539
BB
4608/*
4609 * Update the system attributes when the inode has been dirtied. For the
023699cd 4610 * moment we only update the mode, atime, mtime, and ctime.
8780c539
BB
4611 */
4612int
4613zfs_dirty_inode(struct inode *ip, int flags)
4614{
4615 znode_t *zp = ITOZ(ip);
0037b49e 4616 zfsvfs_t *zfsvfs = ITOZSB(ip);
8780c539 4617 dmu_tx_t *tx;
023699cd
MM
4618 uint64_t mode, atime[2], mtime[2], ctime[2];
4619 sa_bulk_attr_t bulk[4];
704cd075 4620 int error = 0;
8780c539
BB
4621 int cnt = 0;
4622
0037b49e 4623 if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
c944be5d
BB
4624 return (0);
4625
0037b49e 4626 ZFS_ENTER(zfsvfs);
8780c539
BB
4627 ZFS_VERIFY_ZP(zp);
4628
704cd075
CC
4629#ifdef I_DIRTY_TIME
4630 /*
4631 * This is the lazytime semantic indroduced in Linux 4.0
4632 * This flag will only be called from update_time when lazytime is set.
4633 * (Note, I_DIRTY_SYNC will also set if not lazytime)
4634 * Fortunately mtime and ctime are managed within ZFS itself, so we
4635 * only need to dirty atime.
4636 */
4637 if (flags == I_DIRTY_TIME) {
4638 zp->z_atime_dirty = 1;
4639 goto out;
4640 }
4641#endif
4642
0037b49e 4643 tx = dmu_tx_create(zfsvfs->z_os);
8780c539
BB
4644
4645 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4646 zfs_sa_upgrade_txholds(tx, zp);
4647
4648 error = dmu_tx_assign(tx, TXG_WAIT);
4649 if (error) {
4650 dmu_tx_abort(tx);
4651 goto out;
4652 }
4653
4654 mutex_enter(&zp->z_lock);
704cd075
CC
4655 zp->z_atime_dirty = 0;
4656
0037b49e
BB
4657 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
4658 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
4659 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
4660 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
8780c539 4661
023699cd 4662 /* Preserve the mode, mtime and ctime provided by the inode */
8780c539
BB
4663 ZFS_TIME_ENCODE(&ip->i_atime, atime);
4664 ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
4665 ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
023699cd
MM
4666 mode = ip->i_mode;
4667
4668 zp->z_mode = mode;
8780c539
BB
4669
4670 error = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
4671 mutex_exit(&zp->z_lock);
4672
4673 dmu_tx_commit(tx);
4674out:
0037b49e 4675 ZFS_EXIT(zfsvfs);
8780c539
BB
4676 return (error);
4677}
8780c539 4678
34dc7c2f
BB
4679/*ARGSUSED*/
4680void
c0d35759 4681zfs_inactive(struct inode *ip)
34dc7c2f 4682{
c0d35759 4683 znode_t *zp = ITOZ(ip);
0037b49e 4684 zfsvfs_t *zfsvfs = ITOZSB(ip);
0df9673f 4685 uint64_t atime[2];
34dc7c2f 4686 int error;
cafbd2ac 4687 int need_unlock = 0;
34dc7c2f 4688
cafbd2ac 4689 /* Only read lock if we haven't already write locked, e.g. rollback */
0037b49e 4690 if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
cafbd2ac 4691 need_unlock = 1;
0037b49e 4692 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
cafbd2ac 4693 }
c0d35759 4694 if (zp->z_sa_hdl == NULL) {
cafbd2ac 4695 if (need_unlock)
0037b49e 4696 rw_exit(&zfsvfs->z_teardown_inactive_lock);
c0d35759 4697 return;
34dc7c2f
BB
4698 }
4699
4700 if (zp->z_atime_dirty && zp->z_unlinked == 0) {
0037b49e 4701 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
34dc7c2f 4702
428870ff
BB
4703 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4704 zfs_sa_upgrade_txholds(tx, zp);
34dc7c2f
BB
4705 error = dmu_tx_assign(tx, TXG_WAIT);
4706 if (error) {
4707 dmu_tx_abort(tx);
4708 } else {
0df9673f 4709 ZFS_TIME_ENCODE(&ip->i_atime, atime);
34dc7c2f 4710 mutex_enter(&zp->z_lock);
0037b49e 4711 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
0df9673f 4712 (void *)&atime, sizeof (atime), tx);
34dc7c2f
BB
4713 zp->z_atime_dirty = 0;
4714 mutex_exit(&zp->z_lock);
4715 dmu_tx_commit(tx);
4716 }
4717 }
4718
4719 zfs_zinactive(zp);
cafbd2ac 4720 if (need_unlock)
0037b49e 4721 rw_exit(&zfsvfs->z_teardown_inactive_lock);
34dc7c2f
BB
4722}
4723
4724/*
4725 * Bounds-check the seek operation.
4726 *
3558fd73 4727 * IN: ip - inode seeking within
34dc7c2f
BB
4728 * ooff - old file offset
4729 * noffp - pointer to new file offset
4730 * ct - caller context
4731 *
4732 * RETURN: 0 if success
4733 * EINVAL if new offset invalid
4734 */
4735/* ARGSUSED */
3558fd73 4736int
9623f736 4737zfs_seek(struct inode *ip, offset_t ooff, offset_t *noffp)
34dc7c2f 4738{
3558fd73 4739 if (S_ISDIR(ip->i_mode))
34dc7c2f
BB
4740 return (0);
4741 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
4742}
4743
34dc7c2f 4744/*
dde471ef 4745 * Fill pages with data from the disk.
34dc7c2f
BB
4746 */
4747static int
dde471ef 4748zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
34dc7c2f 4749{
d1d7e268 4750 znode_t *zp = ITOZ(ip);
0037b49e 4751 zfsvfs_t *zfsvfs = ITOZSB(ip);
d1d7e268 4752 objset_t *os;
dde471ef 4753 struct page *cur_pp;
d1d7e268
MK
4754 u_offset_t io_off, total;
4755 size_t io_len;
4756 loff_t i_size;
4757 unsigned page_idx;
4758 int err;
34dc7c2f 4759
0037b49e 4760 os = zfsvfs->z_os;
8b1899d3 4761 io_len = nr_pages << PAGE_SHIFT;
dde471ef
PJ
4762 i_size = i_size_read(ip);
4763 io_off = page_offset(pl[0]);
4764
4765 if (io_off + io_len > i_size)
4766 io_len = i_size - io_off;
34dc7c2f
BB
4767
4768 /*
dde471ef 4769 * Iterate over list of pages and read each page individually.
34dc7c2f 4770 */
dde471ef 4771 page_idx = 0;
34dc7c2f 4772 for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
d164b209
BB
4773 caddr_t va;
4774
540c3927 4775 cur_pp = pl[page_idx++];
dde471ef 4776 va = kmap(cur_pp);
9babb374
BB
4777 err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
4778 DMU_READ_PREFETCH);
dde471ef 4779 kunmap(cur_pp);
34dc7c2f 4780 if (err) {
b128c09f
BB
4781 /* convert checksum errors into IO errors */
4782 if (err == ECKSUM)
2e528b49 4783 err = SET_ERROR(EIO);
34dc7c2f
BB
4784 return (err);
4785 }
34dc7c2f 4786 }
d164b209 4787
34dc7c2f
BB
4788 return (0);
4789}
4790
4791/*
dde471ef 4792 * Uses zfs_fillpage to read data from the file and fill the pages.
34dc7c2f 4793 *
dde471ef
PJ
4794 * IN: ip - inode of file to get data from.
4795 * pl - list of pages to read
4796 * nr_pages - number of pages to read
34dc7c2f 4797 *
d3cc8b15 4798 * RETURN: 0 on success, error code on failure.
34dc7c2f
BB
4799 *
4800 * Timestamps:
4801 * vp - atime updated
4802 */
4803/* ARGSUSED */
dde471ef
PJ
4804int
4805zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
34dc7c2f 4806{
dde471ef 4807 znode_t *zp = ITOZ(ip);
0037b49e 4808 zfsvfs_t *zfsvfs = ITOZSB(ip);
dde471ef 4809 int err;
d164b209 4810
d164b209
BB
4811 if (pl == NULL)
4812 return (0);
34dc7c2f 4813
0037b49e 4814 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
4815 ZFS_VERIFY_ZP(zp);
4816
dde471ef 4817 err = zfs_fillpage(ip, pl, nr_pages);
34dc7c2f 4818
0037b49e 4819 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4820 return (err);
4821}
4822
4823/*
e2e7aa2d 4824 * Check ZFS specific permissions to memory map a section of a file.
34dc7c2f 4825 *
e2e7aa2d
BB
4826 * IN: ip - inode of the file to mmap
4827 * off - file offset
4828 * addrp - start address in memory region
4829 * len - length of memory region
4830 * vm_flags- address flags
34dc7c2f 4831 *
e2e7aa2d
BB
4832 * RETURN: 0 if success
4833 * error code if failure
34dc7c2f
BB
4834 */
4835/*ARGSUSED*/
e2e7aa2d
BB
4836int
4837zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
4838 unsigned long vm_flags)
34dc7c2f 4839{
e2e7aa2d 4840 znode_t *zp = ITOZ(ip);
0037b49e 4841 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f 4842
0037b49e 4843 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
4844 ZFS_VERIFY_ZP(zp);
4845
e2e7aa2d 4846 if ((vm_flags & VM_WRITE) && (zp->z_pflags &
428870ff 4847 (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
0037b49e 4848 ZFS_EXIT(zfsvfs);
2e528b49 4849 return (SET_ERROR(EPERM));
34dc7c2f
BB
4850 }
4851
e2e7aa2d 4852 if ((vm_flags & (VM_READ | VM_EXEC)) &&
428870ff 4853 (zp->z_pflags & ZFS_AV_QUARANTINED)) {
0037b49e 4854 ZFS_EXIT(zfsvfs);
2e528b49 4855 return (SET_ERROR(EACCES));
34dc7c2f
BB
4856 }
4857
34dc7c2f 4858 if (off < 0 || len > MAXOFFSET_T - off) {
0037b49e 4859 ZFS_EXIT(zfsvfs);
2e528b49 4860 return (SET_ERROR(ENXIO));
34dc7c2f
BB
4861 }
4862
0037b49e 4863 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4864 return (0);
4865}
4866
3558fd73
BB
4867/*
4868 * convoff - converts the given data (start, whence) to the
4869 * given whence.
4870 */
4871int
4872convoff(struct inode *ip, flock64_t *lckdat, int whence, offset_t offset)
4873{
5484965a 4874 vattr_t vap;
3558fd73
BB
4875 int error;
4876
126d0fa7 4877 if ((lckdat->l_whence == SEEK_END) || (whence == SEEK_END)) {
d95a5980 4878 if ((error = zfs_getattr(ip, &vap, 0, CRED())))
3558fd73
BB
4879 return (error);
4880 }
4881
4882 switch (lckdat->l_whence) {
126d0fa7 4883 case SEEK_CUR:
3558fd73
BB
4884 lckdat->l_start += offset;
4885 break;
126d0fa7 4886 case SEEK_END:
5484965a 4887 lckdat->l_start += vap.va_size;
3558fd73 4888 /* FALLTHRU */
126d0fa7 4889 case SEEK_SET:
3558fd73
BB
4890 break;
4891 default:
2e528b49 4892 return (SET_ERROR(EINVAL));
3558fd73
BB
4893 }
4894
4895 if (lckdat->l_start < 0)
2e528b49 4896 return (SET_ERROR(EINVAL));
3558fd73
BB
4897
4898 switch (whence) {
126d0fa7 4899 case SEEK_CUR:
3558fd73
BB
4900 lckdat->l_start -= offset;
4901 break;
126d0fa7 4902 case SEEK_END:
5484965a 4903 lckdat->l_start -= vap.va_size;
3558fd73 4904 /* FALLTHRU */
126d0fa7 4905 case SEEK_SET:
3558fd73
BB
4906 break;
4907 default:
2e528b49 4908 return (SET_ERROR(EINVAL));
3558fd73
BB
4909 }
4910
4911 lckdat->l_whence = (short)whence;
4912 return (0);
4913}
4914
34dc7c2f
BB
4915/*
4916 * Free or allocate space in a file. Currently, this function only
4917 * supports the `F_FREESP' command. However, this command is somewhat
4918 * misnamed, as its functionality includes the ability to allocate as
4919 * well as free space.
4920 *
3558fd73 4921 * IN: ip - inode of file to free data in.
34dc7c2f
BB
4922 * cmd - action to take (only F_FREESP supported).
4923 * bfp - section of file to free/alloc.
4924 * flag - current file open mode flags.
4925 * offset - current file offset.
a35c1207 4926 * cr - credentials of caller.
34dc7c2f 4927 *
d3cc8b15 4928 * RETURN: 0 on success, error code on failure.
34dc7c2f
BB
4929 *
4930 * Timestamps:
3558fd73 4931 * ip - ctime|mtime updated
34dc7c2f
BB
4932 */
4933/* ARGSUSED */
e5c39b95 4934int
3558fd73
BB
4935zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
4936 offset_t offset, cred_t *cr)
34dc7c2f 4937{
3558fd73 4938 znode_t *zp = ITOZ(ip);
0037b49e 4939 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f
BB
4940 uint64_t off, len;
4941 int error;
4942
0037b49e 4943 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
4944 ZFS_VERIFY_ZP(zp);
4945
34dc7c2f 4946 if (cmd != F_FREESP) {
0037b49e 4947 ZFS_EXIT(zfsvfs);
2e528b49 4948 return (SET_ERROR(EINVAL));
34dc7c2f
BB
4949 }
4950
f3c9dca0
MT
4951 /*
4952 * Callers might not be able to detect properly that we are read-only,
4953 * so check it explicitly here.
4954 */
0037b49e
BB
4955 if (zfs_is_readonly(zfsvfs)) {
4956 ZFS_EXIT(zfsvfs);
f3c9dca0
MT
4957 return (SET_ERROR(EROFS));
4958 }
4959
126d0fa7 4960 if ((error = convoff(ip, bfp, SEEK_SET, offset))) {
0037b49e 4961 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4962 return (error);
4963 }
4964
4965 if (bfp->l_len < 0) {
0037b49e 4966 ZFS_EXIT(zfsvfs);
2e528b49 4967 return (SET_ERROR(EINVAL));
34dc7c2f
BB
4968 }
4969
aec69371
ED
4970 /*
4971 * Permissions aren't checked on Solaris because on this OS
4972 * zfs_space() can only be called with an opened file handle.
4973 * On Linux we can get here through truncate_range() which
4974 * operates directly on inodes, so we need to check access rights.
4975 */
4976 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
0037b49e 4977 ZFS_EXIT(zfsvfs);
aec69371
ED
4978 return (error);
4979 }
4980
34dc7c2f
BB
4981 off = bfp->l_start;
4982 len = bfp->l_len; /* 0 means from off to end of file */
4983
b128c09f 4984 error = zfs_freesp(zp, off, len, flag, TRUE);
34dc7c2f 4985
0037b49e 4986 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
4987 return (error);
4988}
4989
4990/*ARGSUSED*/
e5c39b95 4991int
3558fd73 4992zfs_fid(struct inode *ip, fid_t *fidp)
34dc7c2f 4993{
3558fd73 4994 znode_t *zp = ITOZ(ip);
0037b49e 4995 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f 4996 uint32_t gen;
428870ff 4997 uint64_t gen64;
34dc7c2f
BB
4998 uint64_t object = zp->z_id;
4999 zfid_short_t *zfid;
428870ff 5000 int size, i, error;
34dc7c2f 5001
0037b49e 5002 ZFS_ENTER(zfsvfs);
34dc7c2f 5003 ZFS_VERIFY_ZP(zp);
428870ff 5004
0037b49e 5005 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
428870ff 5006 &gen64, sizeof (uint64_t))) != 0) {
0037b49e 5007 ZFS_EXIT(zfsvfs);
428870ff
BB
5008 return (error);
5009 }
5010
5011 gen = (uint32_t)gen64;
34dc7c2f 5012
9b77d1c9 5013 size = SHORT_FID_LEN;
34dc7c2f
BB
5014
5015 zfid = (zfid_short_t *)fidp;
5016
5017 zfid->zf_len = size;
5018
5019 for (i = 0; i < sizeof (zfid->zf_object); i++)
5020 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
5021
5022 /* Must have a non-zero generation number to distinguish from .zfs */
5023 if (gen == 0)
5024 gen = 1;
5025 for (i = 0; i < sizeof (zfid->zf_gen); i++)
5026 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
5027
0037b49e 5028 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
5029 return (0);
5030}
5031
34dc7c2f 5032/*ARGSUSED*/
e5c39b95 5033int
3558fd73 5034zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
34dc7c2f 5035{
3558fd73 5036 znode_t *zp = ITOZ(ip);
0037b49e 5037 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f
BB
5038 int error;
5039 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
5040
0037b49e 5041 ZFS_ENTER(zfsvfs);
34dc7c2f
BB
5042 ZFS_VERIFY_ZP(zp);
5043 error = zfs_getacl(zp, vsecp, skipaclchk, cr);
0037b49e 5044 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
5045
5046 return (error);
5047}
5048
5049/*ARGSUSED*/
e5c39b95 5050int
3558fd73 5051zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
34dc7c2f 5052{
3558fd73 5053 znode_t *zp = ITOZ(ip);
0037b49e 5054 zfsvfs_t *zfsvfs = ITOZSB(ip);
34dc7c2f
BB
5055 int error;
5056 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
0037b49e 5057 zilog_t *zilog = zfsvfs->z_log;
34dc7c2f 5058
0037b49e 5059 ZFS_ENTER(zfsvfs);
34dc7c2f 5060 ZFS_VERIFY_ZP(zp);
428870ff 5061
34dc7c2f 5062 error = zfs_setacl(zp, vsecp, skipaclchk, cr);
428870ff 5063
0037b49e 5064 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
572e2857 5065 zil_commit(zilog, 0);
428870ff 5066
0037b49e 5067 ZFS_EXIT(zfsvfs);
34dc7c2f
BB
5068 return (error);
5069}
5070
3558fd73 5071#ifdef HAVE_UIO_ZEROCOPY
428870ff
BB
5072/*
5073 * Tunable, both must be a power of 2.
5074 *
5075 * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf
5076 * zcr_blksz_max: if set to less than the file block size, allow loaning out of
3558fd73 5077 * an arcbuf for a partial block read
428870ff
BB
5078 */
5079int zcr_blksz_min = (1 << 10); /* 1K */
5080int zcr_blksz_max = (1 << 17); /* 128K */
5081
5082/*ARGSUSED*/
5083static int
3558fd73 5084zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
428870ff 5085{
3558fd73 5086 znode_t *zp = ITOZ(ip);
0037b49e
BB
5087 zfsvfs_t *zfsvfs = ITOZSB(ip);
5088 int max_blksz = zfsvfs->z_max_blksz;
428870ff
BB
5089 uio_t *uio = &xuio->xu_uio;
5090 ssize_t size = uio->uio_resid;
5091 offset_t offset = uio->uio_loffset;
5092 int blksz;
5093 int fullblk, i;
5094 arc_buf_t *abuf;
5095 ssize_t maxsize;
5096 int preamble, postamble;
5097
5098 if (xuio->xu_type != UIOTYPE_ZEROCOPY)
2e528b49 5099 return (SET_ERROR(EINVAL));
428870ff 5100
0037b49e 5101 ZFS_ENTER(zfsvfs);
428870ff
BB
5102 ZFS_VERIFY_ZP(zp);
5103 switch (ioflag) {
5104 case UIO_WRITE:
5105 /*
5106 * Loan out an arc_buf for write if write size is bigger than
5107 * max_blksz, and the file's block size is also max_blksz.
5108 */
5109 blksz = max_blksz;
5110 if (size < blksz || zp->z_blksz != blksz) {
0037b49e 5111 ZFS_EXIT(zfsvfs);
2e528b49 5112 return (SET_ERROR(EINVAL));
428870ff
BB
5113 }
5114 /*
5115 * Caller requests buffers for write before knowing where the
5116 * write offset might be (e.g. NFS TCP write).
5117 */
5118 if (offset == -1) {
5119 preamble = 0;
5120 } else {
5121 preamble = P2PHASE(offset, blksz);
5122 if (preamble) {
5123 preamble = blksz - preamble;
5124 size -= preamble;
5125 }
5126 }
5127
5128 postamble = P2PHASE(size, blksz);
5129 size -= postamble;
5130
5131 fullblk = size / blksz;
5132 (void) dmu_xuio_init(xuio,
5133 (preamble != 0) + fullblk + (postamble != 0));
428870ff
BB
5134
5135 /*
5136 * Have to fix iov base/len for partial buffers. They
5137 * currently represent full arc_buf's.
5138 */
5139 if (preamble) {
5140 /* data begins in the middle of the arc_buf */
5141 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
5142 blksz);
5143 ASSERT(abuf);
5144 (void) dmu_xuio_add(xuio, abuf,
5145 blksz - preamble, preamble);
5146 }
5147
5148 for (i = 0; i < fullblk; i++) {
5149 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
5150 blksz);
5151 ASSERT(abuf);
5152 (void) dmu_xuio_add(xuio, abuf, 0, blksz);
5153 }
5154
5155 if (postamble) {
5156 /* data ends in the middle of the arc_buf */
5157 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
5158 blksz);
5159 ASSERT(abuf);
5160 (void) dmu_xuio_add(xuio, abuf, 0, postamble);
5161 }
5162 break;
5163 case UIO_READ:
5164 /*
5165 * Loan out an arc_buf for read if the read size is larger than
5166 * the current file block size. Block alignment is not
5167 * considered. Partial arc_buf will be loaned out for read.
5168 */
5169 blksz = zp->z_blksz;
5170 if (blksz < zcr_blksz_min)
5171 blksz = zcr_blksz_min;
5172 if (blksz > zcr_blksz_max)
5173 blksz = zcr_blksz_max;
5174 /* avoid potential complexity of dealing with it */
5175 if (blksz > max_blksz) {
0037b49e 5176 ZFS_EXIT(zfsvfs);
2e528b49 5177 return (SET_ERROR(EINVAL));
428870ff
BB
5178 }
5179
5180 maxsize = zp->z_size - uio->uio_loffset;
5181 if (size > maxsize)
5182 size = maxsize;
5183
3558fd73 5184 if (size < blksz) {
0037b49e 5185 ZFS_EXIT(zfsvfs);
2e528b49 5186 return (SET_ERROR(EINVAL));
428870ff
BB
5187 }
5188 break;
5189 default:
0037b49e 5190 ZFS_EXIT(zfsvfs);
2e528b49 5191 return (SET_ERROR(EINVAL));
428870ff
BB
5192 }
5193
5194 uio->uio_extflg = UIO_XUIO;
5195 XUIO_XUZC_RW(xuio) = ioflag;
0037b49e 5196 ZFS_EXIT(zfsvfs);
428870ff
BB
5197 return (0);
5198}
5199
5200/*ARGSUSED*/
5201static int
3558fd73 5202zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
428870ff
BB
5203{
5204 int i;
5205 arc_buf_t *abuf;
5206 int ioflag = XUIO_XUZC_RW(xuio);
5207
5208 ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY);
5209
5210 i = dmu_xuio_cnt(xuio);
5211 while (i-- > 0) {
5212 abuf = dmu_xuio_arcbuf(xuio, i);
5213 /*
5214 * if abuf == NULL, it must be a write buffer
5215 * that has been returned in zfs_write().
5216 */
5217 if (abuf)
5218 dmu_return_arcbuf(abuf);
5219 ASSERT(abuf || ioflag == UIO_WRITE);
5220 }
5221
5222 dmu_xuio_fini(xuio);
5223 return (0);
5224}
3558fd73 5225#endif /* HAVE_UIO_ZEROCOPY */
c409e464 5226
93ce2b4c 5227#if defined(_KERNEL)
f298b24d
BB
5228EXPORT_SYMBOL(zfs_open);
5229EXPORT_SYMBOL(zfs_close);
5230EXPORT_SYMBOL(zfs_read);
5231EXPORT_SYMBOL(zfs_write);
5232EXPORT_SYMBOL(zfs_access);
5233EXPORT_SYMBOL(zfs_lookup);
5234EXPORT_SYMBOL(zfs_create);
5235EXPORT_SYMBOL(zfs_tmpfile);
5236EXPORT_SYMBOL(zfs_remove);
5237EXPORT_SYMBOL(zfs_mkdir);
5238EXPORT_SYMBOL(zfs_rmdir);
5239EXPORT_SYMBOL(zfs_readdir);
5240EXPORT_SYMBOL(zfs_fsync);
5241EXPORT_SYMBOL(zfs_getattr);
5242EXPORT_SYMBOL(zfs_getattr_fast);
5243EXPORT_SYMBOL(zfs_setattr);
5244EXPORT_SYMBOL(zfs_rename);
5245EXPORT_SYMBOL(zfs_symlink);
5246EXPORT_SYMBOL(zfs_readlink);
5247EXPORT_SYMBOL(zfs_link);
5248EXPORT_SYMBOL(zfs_inactive);
5249EXPORT_SYMBOL(zfs_space);
5250EXPORT_SYMBOL(zfs_fid);
5251EXPORT_SYMBOL(zfs_getsecattr);
5252EXPORT_SYMBOL(zfs_setsecattr);
5253EXPORT_SYMBOL(zfs_getpage);
5254EXPORT_SYMBOL(zfs_putpage);
5255EXPORT_SYMBOL(zfs_dirty_inode);
5256EXPORT_SYMBOL(zfs_map);
5257
02730c33 5258/* CSTYLED */
a966c564
K
5259module_param(zfs_delete_blocks, ulong, 0644);
5260MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
c409e464
BB
5261module_param(zfs_read_chunk_size, long, 0644);
5262MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
5263#endif