4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 Nexenta Systems, Inc.
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
32 #include <sys/types.h>
33 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/resource.h>
39 #include <sys/endian.h>
41 #include <sys/vnode.h>
42 #include <sys/dirent.h>
46 #include <sys/taskq.h>
48 #include <sys/atomic.h>
49 #include <sys/namei.h>
51 #include <sys/cmn_err.h>
53 #include <sys/sysproto.h>
54 #include <sys/errno.h>
55 #include <sys/unistd.h>
56 #include <sys/zfs_dir.h>
57 #include <sys/zfs_ioctl.h>
58 #include <sys/fs/zfs.h>
60 #include <sys/dmu_objset.h>
66 #include <sys/policy.h>
67 #include <sys/sunddi.h>
68 #include <sys/filio.h>
70 #include <sys/zfs_ctldir.h>
71 #include <sys/zfs_fuid.h>
72 #include <sys/zfs_quota.h>
73 #include <sys/zfs_sa.h>
74 #include <sys/zfs_rlock.h>
75 #include <sys/extdirent.h>
78 #include <sys/sched.h>
80 #include <sys/vmmeter.h>
81 #include <vm/vm_param.h>
83 #include <sys/zfs_vnops.h>
85 #include <vm/vm_object.h>
87 #include <sys/extattr.h>
91 #define VN_OPEN_INVFS 0x0
94 #if __FreeBSD_version >= 1300047
95 #define vm_page_wire_lock(pp)
96 #define vm_page_wire_unlock(pp)
98 #define vm_page_wire_lock(pp) vm_page_lock(pp)
99 #define vm_page_wire_unlock(pp) vm_page_unlock(pp)
103 zfs_u8_validate(const char *u8str
, size_t n
, char **list
, int flag
, int *errnum
)
106 return (u8_validate(__DECONST(char *, u8str
), n
, list
, flag
, errnum
));
108 #define u8_validate zfs_u8_validate
110 #ifdef DEBUG_VFS_LOCKS
111 #define VNCHECKREF(vp) \
112 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \
113 ("%s: wrong ref counts", __func__));
115 #define VNCHECKREF(vp)
121 * Each vnode op performs some logical unit of work. To do this, the ZPL must
122 * properly lock its in-core state, create a DMU transaction, do the work,
123 * record this work in the intent log (ZIL), commit the DMU transaction,
124 * and wait for the intent log to commit if it is a synchronous operation.
125 * Moreover, the vnode ops must work in both normal and log replay context.
126 * The ordering of events is important to avoid deadlocks and references
127 * to freed memory. The example below illustrates the following Big Rules:
129 * (1) A check must be made in each zfs thread for a mounted file system.
130 * This is done avoiding races using ZFS_ENTER(zfsvfs).
131 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes
132 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
133 * can return EIO from the calling function.
135 * (2) VN_RELE() should always be the last thing except for zil_commit()
136 * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
137 * First, if it's the last reference, the vnode/znode
138 * can be freed, so the zp may point to freed memory. Second, the last
139 * reference will call zfs_zinactive(), which may induce a lot of work --
140 * pushing cached pages (which acquires range locks) and syncing out
141 * cached atime changes. Third, zfs_zinactive() may require a new tx,
142 * which could deadlock the system if you were already holding one.
143 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
145 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
146 * as they can span dmu_tx_assign() calls.
148 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
149 * dmu_tx_assign(). This is critical because we don't want to block
150 * while holding locks.
152 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This
153 * reduces lock contention and CPU usage when we must wait (note that if
154 * throughput is constrained by the storage, nearly every transaction
157 * Note, in particular, that if a lock is sometimes acquired before
158 * the tx assigns, and sometimes after (e.g. z_lock), then failing
159 * to use a non-blocking assign can deadlock the system. The scenario:
161 * Thread A has grabbed a lock before calling dmu_tx_assign().
162 * Thread B is in an already-assigned tx, and blocks for this lock.
163 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
164 * forever, because the previous txg can't quiesce until B's tx commits.
166 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
167 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
168 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
169 * to indicate that this operation has already called dmu_tx_wait().
170 * This will ensure that we don't retry forever, waiting a short bit
173 * (5) If the operation succeeded, generate the intent log entry for it
174 * before dropping locks. This ensures that the ordering of events
175 * in the intent log matches the order in which they actually occurred.
176 * During ZIL replay the zfs_log_* functions will update the sequence
177 * number to indicate the zil transaction has replayed.
179 * (6) At the end of each vnode op, the DMU tx must always commit,
180 * regardless of whether there were any errors.
182 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
183 * to ensure that synchronous semantics are provided when necessary.
185 * In general, this is how things should be ordered in each vnode op:
187 * ZFS_ENTER(zfsvfs); // exit if unmounted
189 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD())
190 * rw_enter(...); // grab any other locks you need
191 * tx = dmu_tx_create(...); // get DMU tx
192 * dmu_tx_hold_*(); // hold each object you might modify
193 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
195 * rw_exit(...); // drop locks
196 * zfs_dirent_unlock(dl); // unlock directory entry
197 * VN_RELE(...); // release held vnodes
198 * if (error == ERESTART) {
204 * dmu_tx_abort(tx); // abort DMU tx
205 * ZFS_EXIT(zfsvfs); // finished in zfs
206 * return (error); // really out of space
208 * error = do_real_work(); // do whatever this VOP does
210 * zfs_log_*(...); // on success, make ZIL entry
211 * dmu_tx_commit(tx); // commit DMU tx -- error or not
212 * rw_exit(...); // drop locks
213 * zfs_dirent_unlock(dl); // unlock directory entry
214 * VN_RELE(...); // release held vnodes
215 * zil_commit(zilog, foid); // synchronous when necessary
216 * ZFS_EXIT(zfsvfs); // finished in zfs
217 * return (error); // done, report error
222 zfs_open(vnode_t
**vpp
, int flag
, cred_t
*cr
)
224 znode_t
*zp
= VTOZ(*vpp
);
225 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
230 if ((flag
& FWRITE
) && (zp
->z_pflags
& ZFS_APPENDONLY
) &&
231 ((flag
& FAPPEND
) == 0)) {
233 return (SET_ERROR(EPERM
));
236 if (!zfs_has_ctldir(zp
) && zp
->z_zfsvfs
->z_vscan
&&
237 ZTOV(zp
)->v_type
== VREG
&&
238 !(zp
->z_pflags
& ZFS_AV_QUARANTINED
) && zp
->z_size
> 0) {
239 if (fs_vscan(*vpp
, cr
, 0) != 0) {
241 return (SET_ERROR(EACCES
));
245 /* Keep a count of the synchronous opens in the znode */
246 if (flag
& (FSYNC
| FDSYNC
))
247 atomic_inc_32(&zp
->z_sync_cnt
);
255 zfs_close(vnode_t
*vp
, int flag
, int count
, offset_t offset
, cred_t
*cr
)
257 znode_t
*zp
= VTOZ(vp
);
258 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
263 /* Decrement the synchronous opens in the znode */
264 if ((flag
& (FSYNC
| FDSYNC
)) && (count
== 1))
265 atomic_dec_32(&zp
->z_sync_cnt
);
267 if (!zfs_has_ctldir(zp
) && zp
->z_zfsvfs
->z_vscan
&&
268 ZTOV(zp
)->v_type
== VREG
&&
269 !(zp
->z_pflags
& ZFS_AV_QUARANTINED
) && zp
->z_size
> 0)
270 VERIFY(fs_vscan(vp
, cr
, 1) == 0);
277 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
278 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
281 zfs_holey(vnode_t
*vp
, ulong_t cmd
, offset_t
*off
)
283 znode_t
*zp
= VTOZ(vp
);
284 uint64_t noff
= (uint64_t)*off
; /* new offset */
289 file_sz
= zp
->z_size
;
290 if (noff
>= file_sz
) {
291 return (SET_ERROR(ENXIO
));
294 if (cmd
== _FIO_SEEK_HOLE
)
299 error
= dmu_offset_next(zp
->z_zfsvfs
->z_os
, zp
->z_id
, hole
, &noff
);
302 return (SET_ERROR(ENXIO
));
304 /* file was dirty, so fall back to using generic logic */
305 if (error
== EBUSY
) {
313 * We could find a hole that begins after the logical end-of-file,
314 * because dmu_offset_next() only works on whole blocks. If the
315 * EOF falls mid-block, then indicate that the "virtual hole"
316 * at the end of the file begins at the logical EOF, rather than
317 * at the end of the last block.
319 if (noff
> file_sz
) {
332 zfs_ioctl(vnode_t
*vp
, ulong_t com
, intptr_t data
, int flag
, cred_t
*cred
,
346 * The following two ioctls are used by bfu. Faking out,
347 * necessary to avoid bfu errors.
359 off
= *(offset_t
*)data
;
361 zfsvfs
= zp
->z_zfsvfs
;
365 /* offset parameter is in/out */
366 error
= zfs_holey(vp
, com
, &off
);
370 *(offset_t
*)data
= off
;
374 return (SET_ERROR(ENOTTY
));
378 page_busy(vnode_t
*vp
, int64_t start
, int64_t off
, int64_t nbytes
)
385 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
386 * aligned boundaries, if the range is not aligned. As a result a
387 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
388 * It may happen that all DEV_BSIZE subranges are marked clean and thus
389 * the whole page would be considered clean despite have some
391 * For this reason we should shrink the range to DEV_BSIZE aligned
392 * boundaries before calling vm_page_clear_dirty.
394 end
= rounddown2(off
+ nbytes
, DEV_BSIZE
);
395 off
= roundup2(off
, DEV_BSIZE
);
399 zfs_vmobject_assert_wlocked_12(obj
);
400 #if __FreeBSD_version < 1300050
402 if ((pp
= vm_page_lookup(obj
, OFF_TO_IDX(start
))) != NULL
&&
404 if (vm_page_xbusied(pp
)) {
406 * Reference the page before unlocking and
407 * sleeping so that the page daemon is less
408 * likely to reclaim it.
410 vm_page_reference(pp
);
412 zfs_vmobject_wunlock(obj
);
413 vm_page_busy_sleep(pp
, "zfsmwb", true);
414 zfs_vmobject_wlock(obj
);
418 } else if (pp
!= NULL
) {
423 ASSERT3U(pp
->valid
, ==, VM_PAGE_BITS_ALL
);
424 vm_object_pip_add(obj
, 1);
425 pmap_remove_write(pp
);
427 vm_page_clear_dirty(pp
, off
, nbytes
);
432 vm_page_grab_valid_unlocked(&pp
, obj
, OFF_TO_IDX(start
),
433 VM_ALLOC_NOCREAT
| VM_ALLOC_SBUSY
| VM_ALLOC_NORMAL
|
436 ASSERT3U(pp
->valid
, ==, VM_PAGE_BITS_ALL
);
437 vm_object_pip_add(obj
, 1);
438 pmap_remove_write(pp
);
440 vm_page_clear_dirty(pp
, off
, nbytes
);
447 page_unbusy(vm_page_t pp
)
451 #if __FreeBSD_version >= 1300041
452 vm_object_pip_wakeup(pp
->object
);
454 vm_object_pip_subtract(pp
->object
, 1);
458 #if __FreeBSD_version > 1300051
460 page_hold(vnode_t
*vp
, int64_t start
)
466 vm_page_grab_valid_unlocked(&m
, obj
, OFF_TO_IDX(start
),
467 VM_ALLOC_NOCREAT
| VM_ALLOC_WIRED
| VM_ALLOC_IGN_SBUSY
|
473 page_hold(vnode_t
*vp
, int64_t start
)
479 zfs_vmobject_assert_wlocked(obj
);
482 if ((pp
= vm_page_lookup(obj
, OFF_TO_IDX(start
))) != NULL
&&
484 if (vm_page_xbusied(pp
)) {
486 * Reference the page before unlocking and
487 * sleeping so that the page daemon is less
488 * likely to reclaim it.
490 vm_page_reference(pp
);
492 zfs_vmobject_wunlock(obj
);
493 vm_page_busy_sleep(pp
, "zfsmwb", true);
494 zfs_vmobject_wlock(obj
);
498 ASSERT3U(pp
->valid
, ==, VM_PAGE_BITS_ALL
);
499 vm_page_wire_lock(pp
);
501 vm_page_wire_unlock(pp
);
512 page_unhold(vm_page_t pp
)
515 vm_page_wire_lock(pp
);
516 #if __FreeBSD_version >= 1300035
517 vm_page_unwire(pp
, PQ_ACTIVE
);
521 vm_page_wire_unlock(pp
);
525 * When a file is memory mapped, we must keep the IO data synchronized
526 * between the DMU cache and the memory mapped pages. What this means:
528 * On Write: If we find a memory mapped page, we write to *both*
529 * the page and the dmu buffer.
532 update_pages(vnode_t
*vp
, int64_t start
, int len
, objset_t
*os
, uint64_t oid
,
533 int segflg
, dmu_tx_t
*tx
)
540 ASSERT(segflg
!= UIO_NOCOPY
);
541 ASSERT(vp
->v_mount
!= NULL
);
545 off
= start
& PAGEOFFSET
;
546 zfs_vmobject_wlock_12(obj
);
547 #if __FreeBSD_version >= 1300041
548 vm_object_pip_add(obj
, 1);
550 for (start
&= PAGEMASK
; len
> 0; start
+= PAGESIZE
) {
552 int nbytes
= imin(PAGESIZE
- off
, len
);
554 if ((pp
= page_busy(vp
, start
, off
, nbytes
)) != NULL
) {
555 zfs_vmobject_wunlock_12(obj
);
557 va
= zfs_map_page(pp
, &sf
);
558 (void) dmu_read(os
, oid
, start
+off
, nbytes
,
559 va
+off
, DMU_READ_PREFETCH
);
562 zfs_vmobject_wlock_12(obj
);
568 #if __FreeBSD_version >= 1300041
569 vm_object_pip_wakeup(obj
);
571 vm_object_pip_wakeupn(obj
, 0);
573 zfs_vmobject_wunlock_12(obj
);
577 * Read with UIO_NOCOPY flag means that sendfile(2) requests
578 * ZFS to populate a range of page cache pages with data.
580 * NOTE: this function could be optimized to pre-allocate
581 * all pages in advance, drain exclusive busy on all of them,
582 * map them into contiguous KVA region and populate them
583 * in one single dmu_read() call.
586 mappedread_sf(vnode_t
*vp
, int nbytes
, uio_t
*uio
)
588 znode_t
*zp
= VTOZ(vp
);
589 objset_t
*os
= zp
->z_zfsvfs
->z_os
;
598 ASSERT(uio
->uio_segflg
== UIO_NOCOPY
);
599 ASSERT(vp
->v_mount
!= NULL
);
602 ASSERT((uio
->uio_loffset
& PAGEOFFSET
) == 0);
604 zfs_vmobject_wlock_12(obj
);
605 for (start
= uio
->uio_loffset
; len
> 0; start
+= PAGESIZE
) {
606 int bytes
= MIN(PAGESIZE
, len
);
608 pp
= vm_page_grab_unlocked(obj
, OFF_TO_IDX(start
),
609 VM_ALLOC_SBUSY
| VM_ALLOC_NORMAL
| VM_ALLOC_IGN_SBUSY
);
610 if (vm_page_none_valid(pp
)) {
611 zfs_vmobject_wunlock_12(obj
);
612 va
= zfs_map_page(pp
, &sf
);
613 error
= dmu_read(os
, zp
->z_id
, start
, bytes
, va
,
615 if (bytes
!= PAGESIZE
&& error
== 0)
616 bzero(va
+ bytes
, PAGESIZE
- bytes
);
618 zfs_vmobject_wlock_12(obj
);
619 #if __FreeBSD_version >= 1300081
622 vm_page_activate(pp
);
623 vm_page_do_sunbusy(pp
);
625 zfs_vmobject_wlock(obj
);
626 if (!vm_page_wired(pp
) && pp
->valid
== 0 &&
627 vm_page_busy_tryupgrade(pp
))
631 zfs_vmobject_wunlock(obj
);
634 vm_page_do_sunbusy(pp
);
637 if (pp
->wire_count
== 0 && pp
->valid
== 0 &&
641 pp
->valid
= VM_PAGE_BITS_ALL
;
642 vm_page_activate(pp
);
647 ASSERT3U(pp
->valid
, ==, VM_PAGE_BITS_ALL
);
648 vm_page_do_sunbusy(pp
);
652 uio
->uio_resid
-= bytes
;
653 uio
->uio_offset
+= bytes
;
656 zfs_vmobject_wunlock_12(obj
);
661 * When a file is memory mapped, we must keep the IO data synchronized
662 * between the DMU cache and the memory mapped pages. What this means:
664 * On Read: We "read" preferentially from memory mapped pages,
665 * else we default from the dmu buffer.
667 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
668 * the file is memory mapped.
671 mappedread(vnode_t
*vp
, int nbytes
, uio_t
*uio
)
673 znode_t
*zp
= VTOZ(vp
);
680 ASSERT(vp
->v_mount
!= NULL
);
684 start
= uio
->uio_loffset
;
685 off
= start
& PAGEOFFSET
;
686 zfs_vmobject_wlock_12(obj
);
687 for (start
&= PAGEMASK
; len
> 0; start
+= PAGESIZE
) {
689 uint64_t bytes
= MIN(PAGESIZE
- off
, len
);
691 if ((pp
= page_hold(vp
, start
))) {
695 zfs_vmobject_wunlock_12(obj
);
696 va
= zfs_map_page(pp
, &sf
);
697 error
= vn_io_fault_uiomove(va
+ off
, bytes
, uio
);
699 zfs_vmobject_wlock_12(obj
);
702 zfs_vmobject_wunlock_12(obj
);
703 error
= dmu_read_uio_dbuf(sa_get_db(zp
->z_sa_hdl
),
705 zfs_vmobject_wlock_12(obj
);
712 zfs_vmobject_wunlock_12(obj
);
716 offset_t zfs_read_chunk_size
= 1024 * 1024; /* Tunable */
719 * Read bytes from specified file into supplied buffer.
721 * IN: vp - vnode of file to be read from.
722 * uio - structure supplying read location, range info,
724 * ioflag - SYNC flags; used to provide FRSYNC semantics.
725 * cr - credentials of caller.
726 * ct - caller context
728 * OUT: uio - updated offset and range, buffer filled.
730 * RETURN: 0 on success, error code on failure.
733 * vp - atime updated if byte count > 0
737 zfs_read(vnode_t
*vp
, uio_t
*uio
, int ioflag
, cred_t
*cr
)
739 znode_t
*zp
= VTOZ(vp
);
740 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
741 ssize_t n
, nbytes
, start_resid
;
744 zfs_locked_range_t
*lr
;
749 /* We don't copy out anything useful for directories. */
750 if (vp
->v_type
== VDIR
) {
752 return (SET_ERROR(EISDIR
));
755 if (zp
->z_pflags
& ZFS_AV_QUARANTINED
) {
757 return (SET_ERROR(EACCES
));
761 * Validate file offset
763 if (uio
->uio_loffset
< (offset_t
)0) {
765 return (SET_ERROR(EINVAL
));
769 * Fasttrack empty reads
771 if (uio
->uio_resid
== 0) {
777 * If we're in FRSYNC mode, sync out this znode before reading it.
780 (ioflag
& FRSYNC
|| zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
))
781 zil_commit(zfsvfs
->z_log
, zp
->z_id
);
784 * Lock the range against changes.
786 lr
= zfs_rangelock_enter(&zp
->z_rangelock
, uio
->uio_loffset
,
787 uio
->uio_resid
, RL_READER
);
790 * If we are reading past end-of-file we can skip
791 * to the end; but we might still need to set atime.
793 if (uio
->uio_loffset
>= zp
->z_size
) {
798 ASSERT(uio
->uio_loffset
< zp
->z_size
);
799 n
= MIN(uio
->uio_resid
, zp
->z_size
- uio
->uio_loffset
);
803 nbytes
= MIN(n
, zfs_read_chunk_size
-
804 P2PHASE(uio
->uio_loffset
, zfs_read_chunk_size
));
806 if (uio
->uio_segflg
== UIO_NOCOPY
)
807 error
= mappedread_sf(vp
, nbytes
, uio
);
808 else if (vn_has_cached_data(vp
)) {
809 error
= mappedread(vp
, nbytes
, uio
);
811 error
= dmu_read_uio_dbuf(sa_get_db(zp
->z_sa_hdl
),
815 /* convert checksum errors into IO errors */
817 error
= SET_ERROR(EIO
);
824 nread
= start_resid
- n
;
825 dataset_kstats_update_read_kstats(&zfsvfs
->z_kstat
, nread
);
828 zfs_rangelock_exit(lr
);
830 ZFS_ACCESSTIME_STAMP(zfsvfs
, zp
);
836 * Write the bytes to a file.
838 * IN: vp - vnode of file to be written to.
839 * uio - structure supplying write location, range info,
841 * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is
842 * set if in append mode.
843 * cr - credentials of caller.
844 * ct - caller context (NFS/CIFS fem monitor only)
846 * OUT: uio - updated offset and range.
848 * RETURN: 0 on success, error code on failure.
851 * vp - ctime|mtime updated if byte count > 0
856 zfs_write(vnode_t
*vp
, uio_t
*uio
, int ioflag
, cred_t
*cr
)
858 znode_t
*zp
= VTOZ(vp
);
859 rlim64_t limit
= MAXOFFSET_T
;
860 ssize_t start_resid
= uio
->uio_resid
;
865 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
869 zfs_locked_range_t
*lr
;
870 int max_blksz
= zfsvfs
->z_max_blksz
;
873 iovec_t
*aiov
= NULL
;
876 int iovcnt __unused
= uio
->uio_iovcnt
;
877 iovec_t
*iovp
= uio
->uio_iov
;
880 sa_bulk_attr_t bulk
[4];
881 uint64_t mtime
[2], ctime
[2];
882 uint64_t uid
, gid
, projid
;
886 * Fasttrack empty write
892 if (limit
== RLIM64_INFINITY
|| limit
> MAXOFFSET_T
)
898 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MTIME(zfsvfs
), NULL
, &mtime
, 16);
899 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
), NULL
, &ctime
, 16);
900 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_SIZE(zfsvfs
), NULL
,
902 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_FLAGS(zfsvfs
), NULL
,
906 * Callers might not be able to detect properly that we are read-only,
907 * so check it explicitly here.
909 if (zfs_is_readonly(zfsvfs
)) {
911 return (SET_ERROR(EROFS
));
915 * If immutable or not appending then return EPERM.
916 * Intentionally allow ZFS_READONLY through here.
917 * See zfs_zaccess_common()
919 if ((zp
->z_pflags
& ZFS_IMMUTABLE
) ||
920 ((zp
->z_pflags
& ZFS_APPENDONLY
) && !(ioflag
& FAPPEND
) &&
921 (uio
->uio_loffset
< zp
->z_size
))) {
923 return (SET_ERROR(EPERM
));
926 zilog
= zfsvfs
->z_log
;
929 * Validate file offset
931 woff
= ioflag
& FAPPEND
? zp
->z_size
: uio
->uio_loffset
;
934 return (SET_ERROR(EINVAL
));
938 * If in append mode, set the io offset pointer to eof.
940 if (ioflag
& FAPPEND
) {
942 * Obtain an appending range lock to guarantee file append
943 * semantics. We reset the write offset once we have the lock.
945 lr
= zfs_rangelock_enter(&zp
->z_rangelock
, 0, n
, RL_APPEND
);
946 woff
= lr
->lr_offset
;
947 if (lr
->lr_length
== UINT64_MAX
) {
949 * We overlocked the file because this write will cause
950 * the file block size to increase.
951 * Note that zp_size cannot change with this lock held.
955 uio
->uio_loffset
= woff
;
958 * Note that if the file block size will change as a result of
959 * this write, then this range lock will lock the entire file
960 * so that we can re-write the block safely.
962 lr
= zfs_rangelock_enter(&zp
->z_rangelock
, woff
, n
, RL_WRITER
);
965 if (vn_rlimit_fsize(vp
, uio
, uio
->uio_td
)) {
966 zfs_rangelock_exit(lr
);
972 zfs_rangelock_exit(lr
);
974 return (SET_ERROR(EFBIG
));
977 if ((woff
+ n
) > limit
|| woff
> (limit
- n
))
980 /* Will this write extend the file length? */
981 write_eof
= (woff
+ n
> zp
->z_size
);
983 end_size
= MAX(zp
->z_size
, woff
+ n
);
987 projid
= zp
->z_projid
;
990 * Write the file in reasonable size chunks. Each chunk is written
991 * in a separate transaction; this keeps the intent log records small
992 * and allows us to do more fine-grained space accounting.
995 woff
= uio
->uio_loffset
;
997 if (zfs_id_overblockquota(zfsvfs
, DMU_USERUSED_OBJECT
, uid
) ||
998 zfs_id_overblockquota(zfsvfs
, DMU_GROUPUSED_OBJECT
, gid
) ||
999 (projid
!= ZFS_DEFAULT_PROJID
&&
1000 zfs_id_overblockquota(zfsvfs
, DMU_PROJECTUSED_OBJECT
,
1002 error
= SET_ERROR(EDQUOT
);
1008 ASSERT(i_iov
< iovcnt
);
1009 aiov
= &iovp
[i_iov
];
1010 abuf
= dmu_xuio_arcbuf(xuio
, i_iov
);
1011 dmu_xuio_clear(xuio
, i_iov
);
1012 DTRACE_PROBE3(zfs_cp_write
, int, i_iov
,
1013 iovec_t
*, aiov
, arc_buf_t
*, abuf
);
1014 ASSERT((aiov
->iov_base
== abuf
->b_data
) ||
1015 ((char *)aiov
->iov_base
- (char *)abuf
->b_data
+
1016 aiov
->iov_len
== arc_buf_size(abuf
)));
1018 } else if (n
>= max_blksz
&&
1019 woff
>= zp
->z_size
&&
1020 P2PHASE(woff
, max_blksz
) == 0 &&
1021 zp
->z_blksz
== max_blksz
) {
1023 * This write covers a full block. "Borrow" a buffer
1024 * from the dmu so that we can fill it before we enter
1025 * a transaction. This avoids the possibility of
1026 * holding up the transaction if the data copy hangs
1027 * up on a pagefault (e.g., from an NFS server mapping).
1031 abuf
= dmu_request_arcbuf(sa_get_db(zp
->z_sa_hdl
),
1033 ASSERT(abuf
!= NULL
);
1034 ASSERT(arc_buf_size(abuf
) == max_blksz
);
1035 if ((error
= uiocopy(abuf
->b_data
, max_blksz
,
1036 UIO_WRITE
, uio
, &cbytes
))) {
1037 dmu_return_arcbuf(abuf
);
1040 ASSERT(cbytes
== max_blksz
);
1044 * Start a transaction.
1046 tx
= dmu_tx_create(zfsvfs
->z_os
);
1047 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_FALSE
);
1048 db
= (dmu_buf_impl_t
*)sa_get_db(zp
->z_sa_hdl
);
1050 dmu_tx_hold_write_by_dnode(tx
, DB_DNODE(db
), woff
,
1053 zfs_sa_upgrade_txholds(tx
, zp
);
1054 error
= dmu_tx_assign(tx
, TXG_WAIT
);
1058 dmu_return_arcbuf(abuf
);
1063 * If zfs_range_lock() over-locked we grow the blocksize
1064 * and then reduce the lock range. This will only happen
1065 * on the first iteration since zfs_range_reduce() will
1066 * shrink down r_len to the appropriate size.
1068 if (lr
->lr_length
== UINT64_MAX
) {
1071 if (zp
->z_blksz
> max_blksz
) {
1073 * File's blocksize is already larger than the
1074 * "recordsize" property. Only let it grow to
1075 * the next power of 2.
1077 ASSERT(!ISP2(zp
->z_blksz
));
1078 new_blksz
= MIN(end_size
,
1079 1 << highbit64(zp
->z_blksz
));
1081 new_blksz
= MIN(end_size
, max_blksz
);
1083 zfs_grow_blocksize(zp
, new_blksz
, tx
);
1084 zfs_rangelock_reduce(lr
, woff
, n
);
1088 * XXX - should we really limit each write to z_max_blksz?
1089 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
1091 nbytes
= MIN(n
, max_blksz
- P2PHASE(woff
, max_blksz
));
1093 if (woff
+ nbytes
> zp
->z_size
)
1094 vnode_pager_setsize(vp
, woff
+ nbytes
);
1097 tx_bytes
= uio
->uio_resid
;
1098 error
= dmu_write_uio_dbuf(sa_get_db(zp
->z_sa_hdl
),
1100 tx_bytes
-= uio
->uio_resid
;
1103 ASSERT(xuio
== NULL
|| tx_bytes
== aiov
->iov_len
);
1105 * If this is not a full block write, but we are
1106 * extending the file past EOF and this data starts
1107 * block-aligned, use assign_arcbuf(). Otherwise,
1108 * write via dmu_write().
1110 if (tx_bytes
< max_blksz
&& (!write_eof
||
1111 aiov
->iov_base
!= abuf
->b_data
)) {
1113 dmu_write(zfsvfs
->z_os
, zp
->z_id
, woff
,
1114 aiov
->iov_len
, aiov
->iov_base
, tx
);
1115 dmu_return_arcbuf(abuf
);
1116 xuio_stat_wbuf_copied();
1118 ASSERT(xuio
|| tx_bytes
== max_blksz
);
1119 dmu_assign_arcbuf(sa_get_db(zp
->z_sa_hdl
), woff
,
1122 ASSERT(tx_bytes
<= uio
->uio_resid
);
1123 uioskip(uio
, tx_bytes
);
1125 if (tx_bytes
&& vn_has_cached_data(vp
)) {
1126 update_pages(vp
, woff
, tx_bytes
, zfsvfs
->z_os
,
1127 zp
->z_id
, uio
->uio_segflg
, tx
);
1131 * If we made no progress, we're done. If we made even
1132 * partial progress, update the znode and ZIL accordingly.
1134 if (tx_bytes
== 0) {
1135 (void) sa_update(zp
->z_sa_hdl
, SA_ZPL_SIZE(zfsvfs
),
1136 (void *)&zp
->z_size
, sizeof (uint64_t), tx
);
1143 * Clear Set-UID/Set-GID bits on successful write if not
1144 * privileged and at least one of the execute bits is set.
1146 * It would be nice to to this after all writes have
1147 * been done, but that would still expose the ISUID/ISGID
1148 * to another app after the partial write is committed.
1150 * Note: we don't call zfs_fuid_map_id() here because
1151 * user 0 is not an ephemeral uid.
1153 mutex_enter(&zp
->z_acl_lock
);
1154 if ((zp
->z_mode
& (S_IXUSR
| (S_IXUSR
>> 3) |
1155 (S_IXUSR
>> 6))) != 0 &&
1156 (zp
->z_mode
& (S_ISUID
| S_ISGID
)) != 0 &&
1157 secpolicy_vnode_setid_retain(vp
, cr
,
1158 (zp
->z_mode
& S_ISUID
) != 0 && zp
->z_uid
== 0) != 0) {
1160 zp
->z_mode
&= ~(S_ISUID
| S_ISGID
);
1161 newmode
= zp
->z_mode
;
1162 (void) sa_update(zp
->z_sa_hdl
, SA_ZPL_MODE(zfsvfs
),
1163 (void *)&newmode
, sizeof (uint64_t), tx
);
1165 mutex_exit(&zp
->z_acl_lock
);
1167 zfs_tstamp_update_setup(zp
, CONTENT_MODIFIED
, mtime
, ctime
);
1170 * Update the file size (zp_size) if it has changed;
1171 * account for possible concurrent updates.
1173 while ((end_size
= zp
->z_size
) < uio
->uio_loffset
) {
1174 (void) atomic_cas_64(&zp
->z_size
, end_size
,
1176 ASSERT(error
== 0 || error
== EFAULT
);
1179 * If we are replaying and eof is non zero then force
1180 * the file size to the specified eof. Note, there's no
1181 * concurrency during replay.
1183 if (zfsvfs
->z_replay
&& zfsvfs
->z_replay_eof
!= 0)
1184 zp
->z_size
= zfsvfs
->z_replay_eof
;
1187 error
= sa_bulk_update(zp
->z_sa_hdl
, bulk
, count
, tx
);
1189 (void) sa_bulk_update(zp
->z_sa_hdl
, bulk
, count
, tx
);
1191 zfs_log_write(zilog
, tx
, TX_WRITE
, zp
, woff
, tx_bytes
,
1192 ioflag
, NULL
, NULL
);
1197 ASSERT(tx_bytes
== nbytes
);
1202 zfs_rangelock_exit(lr
);
1205 * If we're in replay mode, or we made no progress, return error.
1206 * Otherwise, it's at least a partial write, so it's successful.
1208 if (zfsvfs
->z_replay
|| uio
->uio_resid
== start_resid
) {
1214 * EFAULT means that at least one page of the source buffer was not
1215 * available. VFS will re-try remaining I/O upon this error.
1217 if (error
== EFAULT
) {
1222 if (ioflag
& (FSYNC
| FDSYNC
) ||
1223 zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
1224 zil_commit(zilog
, zp
->z_id
);
1226 nwritten
= start_resid
- uio
->uio_resid
;
1227 dataset_kstats_update_write_kstats(&zfsvfs
->z_kstat
, nwritten
);
1234 zfs_write_simple(znode_t
*zp
, const void *data
, size_t len
,
1235 loff_t pos
, size_t *presid
)
1240 error
= vn_rdwr(UIO_WRITE
, ZTOV(zp
), __DECONST(void *, data
), len
, pos
,
1241 UIO_SYSSPACE
, IO_SYNC
, kcred
, NOCRED
, &resid
, curthread
);
1244 return (SET_ERROR(error
));
1245 } else if (presid
== NULL
) {
1247 error
= SET_ERROR(EIO
);
1256 zfs_get_done(zgd_t
*zgd
, int error
)
1258 znode_t
*zp
= zgd
->zgd_private
;
1259 objset_t
*os
= zp
->z_zfsvfs
->z_os
;
1262 dmu_buf_rele(zgd
->zgd_db
, zgd
);
1264 zfs_rangelock_exit(zgd
->zgd_lr
);
1267 * Release the vnode asynchronously as we currently have the
1268 * txg stopped from syncing.
1270 VN_RELE_ASYNC(ZTOV(zp
), dsl_pool_zrele_taskq(dmu_objset_pool(os
)));
1272 kmem_free(zgd
, sizeof (zgd_t
));
1276 static int zil_fault_io
= 0;
1280 * Get data to generate a TX_WRITE intent log record.
1283 zfs_get_data(void *arg
, lr_write_t
*lr
, char *buf
, struct lwb
*lwb
, zio_t
*zio
)
1285 zfsvfs_t
*zfsvfs
= arg
;
1286 objset_t
*os
= zfsvfs
->z_os
;
1288 uint64_t object
= lr
->lr_foid
;
1289 uint64_t offset
= lr
->lr_offset
;
1290 uint64_t size
= lr
->lr_length
;
1295 ASSERT3P(lwb
, !=, NULL
);
1296 ASSERT3P(zio
, !=, NULL
);
1297 ASSERT3U(size
, !=, 0);
1300 * Nothing to do if the file has been removed
1302 if (zfs_zget(zfsvfs
, object
, &zp
) != 0)
1303 return (SET_ERROR(ENOENT
));
1304 if (zp
->z_unlinked
) {
1306 * Release the vnode asynchronously as we currently have the
1307 * txg stopped from syncing.
1309 VN_RELE_ASYNC(ZTOV(zp
),
1310 dsl_pool_zrele_taskq(dmu_objset_pool(os
)));
1311 return (SET_ERROR(ENOENT
));
1314 zgd
= (zgd_t
*)kmem_zalloc(sizeof (zgd_t
), KM_SLEEP
);
1316 zgd
->zgd_private
= zp
;
1319 * Write records come in two flavors: immediate and indirect.
1320 * For small writes it's cheaper to store the data with the
1321 * log record (immediate); for large writes it's cheaper to
1322 * sync the data and get a pointer to it (indirect) so that
1323 * we don't have to write the data twice.
1325 if (buf
!= NULL
) { /* immediate write */
1326 zgd
->zgd_lr
= zfs_rangelock_enter(&zp
->z_rangelock
, offset
,
1328 /* test for truncation needs to be done while range locked */
1329 if (offset
>= zp
->z_size
) {
1330 error
= SET_ERROR(ENOENT
);
1332 error
= dmu_read(os
, object
, offset
, size
, buf
,
1333 DMU_READ_NO_PREFETCH
);
1335 ASSERT(error
== 0 || error
== ENOENT
);
1336 } else { /* indirect write */
1338 * Have to lock the whole block to ensure when it's
1339 * written out and its checksum is being calculated
1340 * that no one can change the data. We need to re-check
1341 * blocksize after we get the lock in case it's changed!
1346 blkoff
= ISP2(size
) ? P2PHASE(offset
, size
) : offset
;
1348 zgd
->zgd_lr
= zfs_rangelock_enter(&zp
->z_rangelock
,
1349 offset
, size
, RL_READER
);
1350 if (zp
->z_blksz
== size
)
1353 zfs_rangelock_exit(zgd
->zgd_lr
);
1355 /* test for truncation needs to be done while range locked */
1356 if (lr
->lr_offset
>= zp
->z_size
)
1357 error
= SET_ERROR(ENOENT
);
1360 error
= SET_ERROR(EIO
);
1365 error
= dmu_buf_hold(os
, object
, offset
, zgd
, &db
,
1366 DMU_READ_NO_PREFETCH
);
1369 blkptr_t
*bp
= &lr
->lr_blkptr
;
1374 ASSERT(db
->db_offset
== offset
);
1375 ASSERT(db
->db_size
== size
);
1377 error
= dmu_sync(zio
, lr
->lr_common
.lrc_txg
,
1379 ASSERT(error
|| lr
->lr_length
<= size
);
1382 * On success, we need to wait for the write I/O
1383 * initiated by dmu_sync() to complete before we can
1384 * release this dbuf. We will finish everything up
1385 * in the zfs_get_done() callback.
1390 if (error
== EALREADY
) {
1391 lr
->lr_common
.lrc_txtype
= TX_WRITE2
;
1393 * TX_WRITE2 relies on the data previously
1394 * written by the TX_WRITE that caused
1395 * EALREADY. We zero out the BP because
1396 * it is the old, currently-on-disk BP,
1397 * so there's no need to zio_flush() its
1398 * vdevs (flushing would needlesly hurt
1399 * performance, and doesn't work on
1409 zfs_get_done(zgd
, error
);
1416 zfs_access(vnode_t
*vp
, int mode
, int flag
, cred_t
*cr
,
1417 caller_context_t
*ct
)
1419 znode_t
*zp
= VTOZ(vp
);
1420 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1426 if (flag
& V_ACE_MASK
)
1427 error
= zfs_zaccess(zp
, mode
, flag
, B_FALSE
, cr
);
1429 error
= zfs_zaccess_rwx(zp
, mode
, flag
, cr
);
1436 zfs_dd_callback(struct mount
*mp
, void *arg
, int lkflags
, struct vnode
**vpp
)
1441 error
= vn_lock(*vpp
, lkflags
);
1448 zfs_lookup_lock(vnode_t
*dvp
, vnode_t
*vp
, const char *name
, int lkflags
)
1450 znode_t
*zdp
= VTOZ(dvp
);
1451 zfsvfs_t
*zfsvfs __unused
= zdp
->z_zfsvfs
;
1455 if (zfsvfs
->z_replay
== B_FALSE
)
1456 ASSERT_VOP_LOCKED(dvp
, __func__
);
1458 if ((zdp
->z_pflags
& ZFS_XATTR
) == 0)
1459 VERIFY(!RRM_LOCK_HELD(&zfsvfs
->z_teardown_lock
));
1462 if (name
[0] == 0 || (name
[0] == '.' && name
[1] == 0)) {
1463 ASSERT3P(dvp
, ==, vp
);
1465 ltype
= lkflags
& LK_TYPE_MASK
;
1466 if (ltype
!= VOP_ISLOCKED(dvp
)) {
1467 if (ltype
== LK_EXCLUSIVE
)
1468 vn_lock(dvp
, LK_UPGRADE
| LK_RETRY
);
1469 else /* if (ltype == LK_SHARED) */
1470 vn_lock(dvp
, LK_DOWNGRADE
| LK_RETRY
);
1473 * Relock for the "." case could leave us with
1476 if (VN_IS_DOOMED(dvp
)) {
1478 return (SET_ERROR(ENOENT
));
1482 } else if (name
[0] == '.' && name
[1] == '.' && name
[2] == 0) {
1484 * Note that in this case, dvp is the child vnode, and we
1485 * are looking up the parent vnode - exactly reverse from
1486 * normal operation. Unlocking dvp requires some rather
1487 * tricky unlock/relock dance to prevent mp from being freed;
1488 * use vn_vget_ino_gen() which takes care of all that.
1490 * XXX Note that there is a time window when both vnodes are
1491 * unlocked. It is possible, although highly unlikely, that
1492 * during that window the parent-child relationship between
1493 * the vnodes may change, for example, get reversed.
1494 * In that case we would have a wrong lock order for the vnodes.
1495 * All other filesystems seem to ignore this problem, so we
1497 * A potential solution could be implemented as follows:
1498 * - using LK_NOWAIT when locking the second vnode and retrying
1500 * - checking that the parent-child relationship still holds
1501 * after locking both vnodes and retrying if it doesn't
1503 error
= vn_vget_ino_gen(dvp
, zfs_dd_callback
, vp
, lkflags
, &vp
);
1506 error
= vn_lock(vp
, lkflags
);
1514 * Lookup an entry in a directory, or an extended attribute directory.
1515 * If it exists, return a held vnode reference for it.
1517 * IN: dvp - vnode of directory to search.
1518 * nm - name of entry to lookup.
1519 * pnp - full pathname to lookup [UNUSED].
1520 * flags - LOOKUP_XATTR set if looking for an attribute.
1521 * rdir - root directory vnode [UNUSED].
1522 * cr - credentials of caller.
1523 * ct - caller context
1525 * OUT: vpp - vnode of located entry, NULL if not found.
1527 * RETURN: 0 on success, error code on failure.
1534 zfs_lookup(vnode_t
*dvp
, char *nm
, vnode_t
**vpp
, struct componentname
*cnp
,
1535 int nameiop
, cred_t
*cr
, kthread_t
*td
, int flags
, boolean_t cached
)
1537 znode_t
*zdp
= VTOZ(dvp
);
1539 zfsvfs_t
*zfsvfs
= zdp
->z_zfsvfs
;
1543 * Fast path lookup, however we must skip DNLC lookup
1544 * for case folding or normalizing lookups because the
1545 * DNLC code only stores the passed in name. This means
1546 * creating 'a' and removing 'A' on a case insensitive
1547 * file system would work, but DNLC still thinks 'a'
1548 * exists and won't let you create it again on the next
1549 * pass through fast path.
1551 if (!(flags
& LOOKUP_XATTR
)) {
1552 if (dvp
->v_type
!= VDIR
) {
1553 return (SET_ERROR(ENOTDIR
));
1554 } else if (zdp
->z_sa_hdl
== NULL
) {
1555 return (SET_ERROR(EIO
));
1559 DTRACE_PROBE2(zfs__fastpath__lookup__miss
, vnode_t
*, dvp
, char *, nm
);
1566 if (flags
& LOOKUP_XATTR
) {
1568 * If the xattr property is off, refuse the lookup request.
1570 if (!(zfsvfs
->z_flags
& ZSB_XATTR
)) {
1572 return (SET_ERROR(EOPNOTSUPP
));
1576 * We don't allow recursive attributes..
1577 * Maybe someday we will.
1579 if (zdp
->z_pflags
& ZFS_XATTR
) {
1581 return (SET_ERROR(EINVAL
));
1584 if ((error
= zfs_get_xattrdir(VTOZ(dvp
), &zp
, cr
, flags
))) {
1591 * Do we have permission to get into attribute directory?
1593 error
= zfs_zaccess(zp
, ACE_EXECUTE
, 0, B_FALSE
, cr
);
1603 * Check accessibility of directory if we're not coming in via
1608 if ((cnp
->cn_flags
& NOEXECCHECK
) != 0) {
1609 cnp
->cn_flags
&= ~NOEXECCHECK
;
1612 if ((error
= zfs_zaccess(zdp
, ACE_EXECUTE
, 0, B_FALSE
, cr
))) {
1618 if (zfsvfs
->z_utf8
&& u8_validate(nm
, strlen(nm
),
1619 NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
1621 return (SET_ERROR(EILSEQ
));
1626 * First handle the special cases.
1628 if ((cnp
->cn_flags
& ISDOTDOT
) != 0) {
1630 * If we are a snapshot mounted under .zfs, return
1631 * the vp for the snapshot directory.
1633 if (zdp
->z_id
== zfsvfs
->z_root
&& zfsvfs
->z_parent
!= zfsvfs
) {
1634 struct componentname cn
;
1639 ltype
= VOP_ISLOCKED(dvp
);
1641 error
= zfsctl_root(zfsvfs
->z_parent
, LK_SHARED
,
1644 cn
.cn_nameptr
= "snapshot";
1645 cn
.cn_namelen
= strlen(cn
.cn_nameptr
);
1646 cn
.cn_nameiop
= cnp
->cn_nameiop
;
1647 cn
.cn_flags
= cnp
->cn_flags
& ~ISDOTDOT
;
1648 cn
.cn_lkflags
= cnp
->cn_lkflags
;
1649 error
= VOP_LOOKUP(zfsctl_vp
, vpp
, &cn
);
1652 vn_lock(dvp
, ltype
| LK_RETRY
);
1656 if (zfs_has_ctldir(zdp
) && strcmp(nm
, ZFS_CTLDIR_NAME
) == 0) {
1658 if ((cnp
->cn_flags
& ISLASTCN
) != 0 && nameiop
!= LOOKUP
)
1659 return (SET_ERROR(ENOTSUP
));
1660 error
= zfsctl_root(zfsvfs
, cnp
->cn_lkflags
, vpp
);
1665 * The loop is retry the lookup if the parent-child relationship
1666 * changes during the dot-dot locking complexities.
1671 error
= zfs_dirlook(zdp
, nm
, &zp
);
1679 error
= zfs_lookup_lock(dvp
, *vpp
, nm
, cnp
->cn_lkflags
);
1682 * If we've got a locking error, then the vnode
1683 * got reclaimed because of a force unmount.
1684 * We never enter doomed vnodes into the name cache.
1690 if ((cnp
->cn_flags
& ISDOTDOT
) == 0)
1694 if (zdp
->z_sa_hdl
== NULL
) {
1695 error
= SET_ERROR(EIO
);
1697 error
= sa_lookup(zdp
->z_sa_hdl
, SA_ZPL_PARENT(zfsvfs
),
1698 &parent
, sizeof (parent
));
1705 if (zp
->z_id
== parent
) {
1715 /* Translate errors and add SAVENAME when needed. */
1716 if (cnp
->cn_flags
& ISLASTCN
) {
1720 if (error
== ENOENT
) {
1721 error
= EJUSTRETURN
;
1722 cnp
->cn_flags
|= SAVENAME
;
1728 cnp
->cn_flags
|= SAVENAME
;
1733 /* Insert name into cache (as non-existent) if appropriate. */
1734 if (zfsvfs
->z_use_namecache
&& !zfsvfs
->z_replay
&&
1735 error
== ENOENT
&& (cnp
->cn_flags
& MAKEENTRY
) != 0)
1736 cache_enter(dvp
, NULL
, cnp
);
1738 /* Insert name into cache if appropriate. */
1739 if (zfsvfs
->z_use_namecache
&& !zfsvfs
->z_replay
&&
1740 error
== 0 && (cnp
->cn_flags
& MAKEENTRY
)) {
1741 if (!(cnp
->cn_flags
& ISLASTCN
) ||
1742 (nameiop
!= DELETE
&& nameiop
!= RENAME
)) {
1743 cache_enter(dvp
, *vpp
, cnp
);
1751 * Attempt to create a new entry in a directory. If the entry
1752 * already exists, truncate the file if permissible, else return
1753 * an error. Return the vp of the created or trunc'd file.
1755 * IN: dvp - vnode of directory to put new file entry in.
1756 * name - name of new file entry.
1757 * vap - attributes of new file.
1758 * excl - flag indicating exclusive or non-exclusive mode.
1759 * mode - mode to open file with.
1760 * cr - credentials of caller.
1761 * flag - large file flag [UNUSED].
1762 * ct - caller context
1763 * vsecp - ACL to be set
1765 * OUT: vpp - vnode of created or trunc'd entry.
1767 * RETURN: 0 on success, error code on failure.
1770 * dvp - ctime|mtime updated if new entry created
1771 * vp - ctime|mtime always, atime if new
1776 zfs_create(znode_t
*dzp
, char *name
, vattr_t
*vap
, int excl
, int mode
,
1777 znode_t
**zpp
, cred_t
*cr
, int flag
, vsecattr_t
*vsecp
)
1780 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
1787 gid_t gid
= crgetgid(cr
);
1788 uint64_t projid
= ZFS_DEFAULT_PROJID
;
1789 zfs_acl_ids_t acl_ids
;
1790 boolean_t fuid_dirtied
;
1792 #ifdef DEBUG_VFS_LOCKS
1793 vnode_t
*dvp
= ZTOV(dzp
);
1797 * If we have an ephemeral id, ACL, or XVATTR then
1798 * make sure file system is at proper version
1801 ksid
= crgetsid(cr
, KSID_OWNER
);
1803 uid
= ksid_getid(ksid
);
1807 if (zfsvfs
->z_use_fuids
== B_FALSE
&&
1808 (vsecp
|| (vap
->va_mask
& AT_XVATTR
) ||
1809 IS_EPHEMERAL(uid
) || IS_EPHEMERAL(gid
)))
1810 return (SET_ERROR(EINVAL
));
1815 zilog
= zfsvfs
->z_log
;
1817 if (zfsvfs
->z_utf8
&& u8_validate(name
, strlen(name
),
1818 NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
1820 return (SET_ERROR(EILSEQ
));
1823 if (vap
->va_mask
& AT_XVATTR
) {
1824 if ((error
= secpolicy_xvattr(ZTOV(dzp
), (xvattr_t
*)vap
,
1825 crgetuid(cr
), cr
, vap
->va_type
)) != 0) {
1833 if ((vap
->va_mode
& S_ISVTX
) && secpolicy_vnode_stky_modify(cr
))
1834 vap
->va_mode
&= ~S_ISVTX
;
1836 error
= zfs_dirent_lookup(dzp
, name
, &zp
, ZNEW
);
1841 ASSERT3P(zp
, ==, NULL
);
1844 * Create a new file object and update the directory
1847 if ((error
= zfs_zaccess(dzp
, ACE_ADD_FILE
, 0, B_FALSE
, cr
))) {
1852 * We only support the creation of regular files in
1853 * extended attribute directories.
1856 if ((dzp
->z_pflags
& ZFS_XATTR
) &&
1857 (vap
->va_type
!= VREG
)) {
1858 error
= SET_ERROR(EINVAL
);
1862 if ((error
= zfs_acl_ids_create(dzp
, 0, vap
,
1863 cr
, vsecp
, &acl_ids
)) != 0)
1866 if (S_ISREG(vap
->va_mode
) || S_ISDIR(vap
->va_mode
))
1867 projid
= zfs_inherit_projid(dzp
);
1868 if (zfs_acl_ids_overquota(zfsvfs
, &acl_ids
, projid
)) {
1869 zfs_acl_ids_free(&acl_ids
);
1870 error
= SET_ERROR(EDQUOT
);
1874 getnewvnode_reserve_();
1876 tx
= dmu_tx_create(os
);
1878 dmu_tx_hold_sa_create(tx
, acl_ids
.z_aclp
->z_acl_bytes
+
1879 ZFS_SA_BASE_ATTR_SIZE
);
1881 fuid_dirtied
= zfsvfs
->z_fuid_dirty
;
1883 zfs_fuid_txhold(zfsvfs
, tx
);
1884 dmu_tx_hold_zap(tx
, dzp
->z_id
, TRUE
, name
);
1885 dmu_tx_hold_sa(tx
, dzp
->z_sa_hdl
, B_FALSE
);
1886 if (!zfsvfs
->z_use_sa
&&
1887 acl_ids
.z_aclp
->z_acl_bytes
> ZFS_ACE_SPACE
) {
1888 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
,
1889 0, acl_ids
.z_aclp
->z_acl_bytes
);
1891 error
= dmu_tx_assign(tx
, TXG_WAIT
);
1893 zfs_acl_ids_free(&acl_ids
);
1895 getnewvnode_drop_reserve();
1899 zfs_mknode(dzp
, vap
, tx
, cr
, 0, &zp
, &acl_ids
);
1901 zfs_fuid_sync(zfsvfs
, tx
);
1903 (void) zfs_link_create(dzp
, name
, zp
, tx
, ZNEW
);
1904 txtype
= zfs_log_create_txtype(Z_FILE
, vsecp
, vap
);
1905 zfs_log_create(zilog
, tx
, txtype
, dzp
, zp
, name
,
1906 vsecp
, acl_ids
.z_fuidp
, vap
);
1907 zfs_acl_ids_free(&acl_ids
);
1910 getnewvnode_drop_reserve();
1918 if (zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
1919 zil_commit(zilog
, 0);
1926 * Remove an entry from a directory.
1928 * IN: dvp - vnode of directory to remove entry from.
1929 * name - name of entry to remove.
1930 * cr - credentials of caller.
1931 * ct - caller context
1932 * flags - case flags
1934 * RETURN: 0 on success, error code on failure.
1938 * vp - ctime (if nlink > 0)
1943 zfs_remove_(vnode_t
*dvp
, vnode_t
*vp
, char *name
, cred_t
*cr
)
1945 znode_t
*dzp
= VTOZ(dvp
);
1948 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
1962 zilog
= zfsvfs
->z_log
;
1967 if ((error
= zfs_zaccess_delete(dzp
, zp
, cr
))) {
1972 * Need to use rmdir for removing directories.
1974 if (vp
->v_type
== VDIR
) {
1975 error
= SET_ERROR(EPERM
);
1979 vnevent_remove(vp
, dvp
, name
, ct
);
1983 /* are there any extended attributes? */
1984 error
= sa_lookup(zp
->z_sa_hdl
, SA_ZPL_XATTR(zfsvfs
),
1985 &xattr_obj
, sizeof (xattr_obj
));
1986 if (error
== 0 && xattr_obj
) {
1987 error
= zfs_zget(zfsvfs
, xattr_obj
, &xzp
);
1992 * We may delete the znode now, or we may put it in the unlinked set;
1993 * it depends on whether we're the last link, and on whether there are
1994 * other holds on the vnode. So we dmu_tx_hold() the right things to
1995 * allow for either case.
1997 tx
= dmu_tx_create(zfsvfs
->z_os
);
1998 dmu_tx_hold_zap(tx
, dzp
->z_id
, FALSE
, name
);
1999 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_FALSE
);
2000 zfs_sa_upgrade_txholds(tx
, zp
);
2001 zfs_sa_upgrade_txholds(tx
, dzp
);
2004 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_TRUE
);
2005 dmu_tx_hold_sa(tx
, xzp
->z_sa_hdl
, B_FALSE
);
2008 /* charge as an update -- would be nice not to charge at all */
2009 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, FALSE
, NULL
);
2012 * Mark this transaction as typically resulting in a net free of space
2014 dmu_tx_mark_netfree(tx
);
2016 error
= dmu_tx_assign(tx
, TXG_WAIT
);
2024 * Remove the directory entry.
2026 error
= zfs_link_destroy(dzp
, name
, zp
, tx
, ZEXISTS
, &unlinked
);
2034 zfs_unlinked_add(zp
, tx
);
2035 vp
->v_vflag
|= VV_NOSYNC
;
2037 /* XXX check changes to linux vnops */
2039 zfs_log_remove(zilog
, tx
, txtype
, dzp
, name
, obj
, unlinked
);
2047 if (zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
2048 zil_commit(zilog
, 0);
2057 zfs_lookup_internal(znode_t
*dzp
, char *name
, vnode_t
**vpp
,
2058 struct componentname
*cnp
, int nameiop
)
2060 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
2063 cnp
->cn_nameptr
= name
;
2064 cnp
->cn_namelen
= strlen(name
);
2065 cnp
->cn_nameiop
= nameiop
;
2066 cnp
->cn_flags
= ISLASTCN
| SAVENAME
;
2067 cnp
->cn_lkflags
= LK_EXCLUSIVE
| LK_RETRY
;
2068 cnp
->cn_cred
= kcred
;
2069 cnp
->cn_thread
= curthread
;
2071 if (zfsvfs
->z_use_namecache
&& !zfsvfs
->z_replay
) {
2072 struct vop_lookup_args a
;
2074 a
.a_gen
.a_desc
= &vop_lookup_desc
;
2075 a
.a_dvp
= ZTOV(dzp
);
2078 error
= vfs_cache_lookup(&a
);
2080 error
= zfs_lookup(ZTOV(dzp
), name
, vpp
, cnp
, nameiop
, kcred
,
2081 curthread
, 0, B_FALSE
);
2085 printf("got error %d on name %s on op %d\n", error
, name
,
2094 zfs_remove(znode_t
*dzp
, char *name
, cred_t
*cr
, int flags
)
2098 struct componentname cn
;
2100 if ((error
= zfs_lookup_internal(dzp
, name
, &vp
, &cn
, DELETE
)))
2103 error
= zfs_remove_(ZTOV(dzp
), vp
, name
, cr
);
2108 * Create a new directory and insert it into dvp using the name
2109 * provided. Return a pointer to the inserted directory.
2111 * IN: dvp - vnode of directory to add subdir to.
2112 * dirname - name of new directory.
2113 * vap - attributes of new directory.
2114 * cr - credentials of caller.
2115 * ct - caller context
2116 * flags - case flags
2117 * vsecp - ACL to be set
2119 * OUT: vpp - vnode of created directory.
2121 * RETURN: 0 on success, error code on failure.
2124 * dvp - ctime|mtime updated
2125 * vp - ctime|mtime|atime updated
2129 zfs_mkdir(znode_t
*dzp
, char *dirname
, vattr_t
*vap
, znode_t
**zpp
, cred_t
*cr
,
2130 int flags
, vsecattr_t
*vsecp
)
2133 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
2140 gid_t gid
= crgetgid(cr
);
2141 zfs_acl_ids_t acl_ids
;
2142 boolean_t fuid_dirtied
;
2144 ASSERT(vap
->va_type
== VDIR
);
2147 * If we have an ephemeral id, ACL, or XVATTR then
2148 * make sure file system is at proper version
2151 ksid
= crgetsid(cr
, KSID_OWNER
);
2153 uid
= ksid_getid(ksid
);
2156 if (zfsvfs
->z_use_fuids
== B_FALSE
&&
2157 ((vap
->va_mask
& AT_XVATTR
) ||
2158 IS_EPHEMERAL(uid
) || IS_EPHEMERAL(gid
)))
2159 return (SET_ERROR(EINVAL
));
2163 zilog
= zfsvfs
->z_log
;
2165 if (dzp
->z_pflags
& ZFS_XATTR
) {
2167 return (SET_ERROR(EINVAL
));
2170 if (zfsvfs
->z_utf8
&& u8_validate(dirname
,
2171 strlen(dirname
), NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
2173 return (SET_ERROR(EILSEQ
));
2176 if (vap
->va_mask
& AT_XVATTR
) {
2177 if ((error
= secpolicy_xvattr(ZTOV(dzp
), (xvattr_t
*)vap
,
2178 crgetuid(cr
), cr
, vap
->va_type
)) != 0) {
2184 if ((error
= zfs_acl_ids_create(dzp
, 0, vap
, cr
,
2185 NULL
, &acl_ids
)) != 0) {
2191 * First make sure the new directory doesn't exist.
2193 * Existence is checked first to make sure we don't return
2194 * EACCES instead of EEXIST which can cause some applications
2199 if ((error
= zfs_dirent_lookup(dzp
, dirname
, &zp
, ZNEW
))) {
2200 zfs_acl_ids_free(&acl_ids
);
2204 ASSERT3P(zp
, ==, NULL
);
2206 if ((error
= zfs_zaccess(dzp
, ACE_ADD_SUBDIRECTORY
, 0, B_FALSE
, cr
))) {
2207 zfs_acl_ids_free(&acl_ids
);
2212 if (zfs_acl_ids_overquota(zfsvfs
, &acl_ids
, zfs_inherit_projid(dzp
))) {
2213 zfs_acl_ids_free(&acl_ids
);
2215 return (SET_ERROR(EDQUOT
));
2219 * Add a new entry to the directory.
2221 getnewvnode_reserve_();
2222 tx
= dmu_tx_create(zfsvfs
->z_os
);
2223 dmu_tx_hold_zap(tx
, dzp
->z_id
, TRUE
, dirname
);
2224 dmu_tx_hold_zap(tx
, DMU_NEW_OBJECT
, FALSE
, NULL
);
2225 fuid_dirtied
= zfsvfs
->z_fuid_dirty
;
2227 zfs_fuid_txhold(zfsvfs
, tx
);
2228 if (!zfsvfs
->z_use_sa
&& acl_ids
.z_aclp
->z_acl_bytes
> ZFS_ACE_SPACE
) {
2229 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0,
2230 acl_ids
.z_aclp
->z_acl_bytes
);
2233 dmu_tx_hold_sa_create(tx
, acl_ids
.z_aclp
->z_acl_bytes
+
2234 ZFS_SA_BASE_ATTR_SIZE
);
2236 error
= dmu_tx_assign(tx
, TXG_WAIT
);
2238 zfs_acl_ids_free(&acl_ids
);
2240 getnewvnode_drop_reserve();
2248 zfs_mknode(dzp
, vap
, tx
, cr
, 0, &zp
, &acl_ids
);
2251 zfs_fuid_sync(zfsvfs
, tx
);
2254 * Now put new name in parent dir.
2256 (void) zfs_link_create(dzp
, dirname
, zp
, tx
, ZNEW
);
2260 txtype
= zfs_log_create_txtype(Z_DIR
, NULL
, vap
);
2261 zfs_log_create(zilog
, tx
, txtype
, dzp
, zp
, dirname
, NULL
,
2262 acl_ids
.z_fuidp
, vap
);
2264 zfs_acl_ids_free(&acl_ids
);
2268 getnewvnode_drop_reserve();
2270 if (zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
2271 zil_commit(zilog
, 0);
2278 * Remove a directory subdir entry. If the current working
2279 * directory is the same as the subdir to be removed, the
2282 * IN: dvp - vnode of directory to remove from.
2283 * name - name of directory to be removed.
2284 * cwd - vnode of current working directory.
2285 * cr - credentials of caller.
2286 * ct - caller context
2287 * flags - case flags
2289 * RETURN: 0 on success, error code on failure.
2292 * dvp - ctime|mtime updated
2296 zfs_rmdir_(vnode_t
*dvp
, vnode_t
*vp
, char *name
, cred_t
*cr
)
2298 znode_t
*dzp
= VTOZ(dvp
);
2299 znode_t
*zp
= VTOZ(vp
);
2300 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
2308 zilog
= zfsvfs
->z_log
;
2311 if ((error
= zfs_zaccess_delete(dzp
, zp
, cr
))) {
2315 if (vp
->v_type
!= VDIR
) {
2316 error
= SET_ERROR(ENOTDIR
);
2320 vnevent_rmdir(vp
, dvp
, name
, ct
);
2322 tx
= dmu_tx_create(zfsvfs
->z_os
);
2323 dmu_tx_hold_zap(tx
, dzp
->z_id
, FALSE
, name
);
2324 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_FALSE
);
2325 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, FALSE
, NULL
);
2326 zfs_sa_upgrade_txholds(tx
, zp
);
2327 zfs_sa_upgrade_txholds(tx
, dzp
);
2328 dmu_tx_mark_netfree(tx
);
2329 error
= dmu_tx_assign(tx
, TXG_WAIT
);
2338 error
= zfs_link_destroy(dzp
, name
, zp
, tx
, ZEXISTS
, NULL
);
2341 uint64_t txtype
= TX_RMDIR
;
2342 zfs_log_remove(zilog
, tx
, txtype
, dzp
, name
,
2343 ZFS_NO_OBJECT
, B_FALSE
);
2350 if (zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
2351 zil_commit(zilog
, 0);
2358 zfs_rmdir(znode_t
*dzp
, char *name
, znode_t
*cwd
, cred_t
*cr
, int flags
)
2360 struct componentname cn
;
2364 if ((error
= zfs_lookup_internal(dzp
, name
, &vp
, &cn
, DELETE
)))
2367 error
= zfs_rmdir_(ZTOV(dzp
), vp
, name
, cr
);
2373 * Read as many directory entries as will fit into the provided
2374 * buffer from the given directory cursor position (specified in
2375 * the uio structure).
2377 * IN: vp - vnode of directory to read.
2378 * uio - structure supplying read location, range info,
2379 * and return buffer.
2380 * cr - credentials of caller.
2381 * ct - caller context
2382 * flags - case flags
2384 * OUT: uio - updated offset and range, buffer filled.
2385 * eofp - set to true if end-of-file detected.
2387 * RETURN: 0 on success, error code on failure.
2390 * vp - atime updated
2392 * Note that the low 4 bits of the cookie returned by zap is always zero.
2393 * This allows us to use the low range for "special" directory entries:
2394 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
2395 * we use the offset 2 for the '.zfs' directory.
2399 zfs_readdir(vnode_t
*vp
, uio_t
*uio
, cred_t
*cr
, int *eofp
,
2400 int *ncookies
, ulong_t
**cookies
)
2402 znode_t
*zp
= VTOZ(vp
);
2406 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
2411 zap_attribute_t zap
;
2412 uint_t bytes_wanted
;
2413 uint64_t offset
; /* must be unsigned; checks for < 1 */
2419 boolean_t check_sysattrs
;
2422 ulong_t
*cooks
= NULL
;
2428 if ((error
= sa_lookup(zp
->z_sa_hdl
, SA_ZPL_PARENT(zfsvfs
),
2429 &parent
, sizeof (parent
))) != 0) {
2435 * If we are not given an eof variable,
2442 * Check for valid iov_len.
2444 if (uio
->uio_iov
->iov_len
<= 0) {
2446 return (SET_ERROR(EINVAL
));
2450 * Quit if directory has been removed (posix)
2452 if ((*eofp
= zp
->z_unlinked
) != 0) {
2459 offset
= uio
->uio_loffset
;
2460 prefetch
= zp
->z_zn_prefetch
;
2463 * Initialize the iterator cursor.
2467 * Start iteration from the beginning of the directory.
2469 zap_cursor_init(&zc
, os
, zp
->z_id
);
2472 * The offset is a serialized cursor.
2474 zap_cursor_init_serialized(&zc
, os
, zp
->z_id
, offset
);
2478 * Get space to change directory entries into fs independent format.
2480 iovp
= uio
->uio_iov
;
2481 bytes_wanted
= iovp
->iov_len
;
2482 if (uio
->uio_segflg
!= UIO_SYSSPACE
|| uio
->uio_iovcnt
!= 1) {
2483 bufsize
= bytes_wanted
;
2484 outbuf
= kmem_alloc(bufsize
, KM_SLEEP
);
2485 odp
= (struct dirent64
*)outbuf
;
2487 bufsize
= bytes_wanted
;
2489 odp
= (struct dirent64
*)iovp
->iov_base
;
2491 eodp
= (struct edirent
*)odp
;
2493 if (ncookies
!= NULL
) {
2495 * Minimum entry size is dirent size and 1 byte for a file name.
2497 ncooks
= uio
->uio_resid
/ (sizeof (struct dirent
) -
2498 sizeof (((struct dirent
*)NULL
)->d_name
) + 1);
2499 cooks
= malloc(ncooks
* sizeof (ulong_t
), M_TEMP
, M_WAITOK
);
2504 * If this VFS supports the system attribute view interface; and
2505 * we're looking at an extended attribute directory; and we care
2506 * about normalization conflicts on this vfs; then we must check
2507 * for normalization conflicts with the sysattr name space.
2510 check_sysattrs
= vfs_has_feature(vp
->v_vfsp
, VFSFT_SYSATTR_VIEWS
) &&
2511 (vp
->v_flag
& V_XATTRDIR
) && zfsvfs
->z_norm
&&
2512 (flags
& V_RDDIR_ENTFLAGS
);
2518 * Transform to file-system independent format
2521 while (outcount
< bytes_wanted
) {
2524 off64_t
*next
= NULL
;
2527 * Special case `.', `..', and `.zfs'.
2530 (void) strcpy(zap
.za_name
, ".");
2531 zap
.za_normalization_conflict
= 0;
2534 } else if (offset
== 1) {
2535 (void) strcpy(zap
.za_name
, "..");
2536 zap
.za_normalization_conflict
= 0;
2539 } else if (offset
== 2 && zfs_show_ctldir(zp
)) {
2540 (void) strcpy(zap
.za_name
, ZFS_CTLDIR_NAME
);
2541 zap
.za_normalization_conflict
= 0;
2542 objnum
= ZFSCTL_INO_ROOT
;
2548 if ((error
= zap_cursor_retrieve(&zc
, &zap
))) {
2549 if ((*eofp
= (error
== ENOENT
)) != 0)
2555 if (zap
.za_integer_length
!= 8 ||
2556 zap
.za_num_integers
!= 1) {
2557 cmn_err(CE_WARN
, "zap_readdir: bad directory "
2558 "entry, obj = %lld, offset = %lld\n",
2559 (u_longlong_t
)zp
->z_id
,
2560 (u_longlong_t
)offset
);
2561 error
= SET_ERROR(ENXIO
);
2565 objnum
= ZFS_DIRENT_OBJ(zap
.za_first_integer
);
2567 * MacOS X can extract the object type here such as:
2568 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
2570 type
= ZFS_DIRENT_TYPE(zap
.za_first_integer
);
2572 if (check_sysattrs
&& !zap
.za_normalization_conflict
) {
2574 zap
.za_normalization_conflict
=
2575 xattr_sysattr_casechk(zap
.za_name
);
2577 panic("%s:%u: TODO", __func__
, __LINE__
);
2582 if (flags
& V_RDDIR_ACCFILTER
) {
2584 * If we have no access at all, don't include
2585 * this entry in the returned information
2588 if (zfs_zget(zp
->z_zfsvfs
, objnum
, &ezp
) != 0)
2590 if (!zfs_has_access(ezp
, cr
)) {
2597 if (flags
& V_RDDIR_ENTFLAGS
)
2598 reclen
= EDIRENT_RECLEN(strlen(zap
.za_name
));
2600 reclen
= DIRENT64_RECLEN(strlen(zap
.za_name
));
2603 * Will this entry fit in the buffer?
2605 if (outcount
+ reclen
> bufsize
) {
2607 * Did we manage to fit anything in the buffer?
2610 error
= SET_ERROR(EINVAL
);
2615 if (flags
& V_RDDIR_ENTFLAGS
) {
2617 * Add extended flag entry:
2619 eodp
->ed_ino
= objnum
;
2620 eodp
->ed_reclen
= reclen
;
2621 /* NOTE: ed_off is the offset for the *next* entry */
2622 next
= &(eodp
->ed_off
);
2623 eodp
->ed_eflags
= zap
.za_normalization_conflict
?
2624 ED_CASE_CONFLICT
: 0;
2625 (void) strncpy(eodp
->ed_name
, zap
.za_name
,
2626 EDIRENT_NAMELEN(reclen
));
2627 eodp
= (edirent_t
*)((intptr_t)eodp
+ reclen
);
2632 odp
->d_ino
= objnum
;
2633 odp
->d_reclen
= reclen
;
2634 odp
->d_namlen
= strlen(zap
.za_name
);
2635 /* NOTE: d_off is the offset for the *next* entry. */
2637 strlcpy(odp
->d_name
, zap
.za_name
, odp
->d_namlen
+ 1);
2639 dirent_terminate(odp
);
2640 odp
= (dirent64_t
*)((intptr_t)odp
+ reclen
);
2644 ASSERT(outcount
<= bufsize
);
2646 /* Prefetch znode */
2648 dmu_prefetch(os
, objnum
, 0, 0, 0,
2649 ZIO_PRIORITY_SYNC_READ
);
2653 * Move to the next entry, fill in the previous offset.
2655 if (offset
> 2 || (offset
== 2 && !zfs_show_ctldir(zp
))) {
2656 zap_cursor_advance(&zc
);
2657 offset
= zap_cursor_serialize(&zc
);
2662 /* Fill the offset right after advancing the cursor. */
2665 if (cooks
!= NULL
) {
2668 KASSERT(ncooks
>= 0, ("ncookies=%d", ncooks
));
2671 zp
->z_zn_prefetch
= B_FALSE
; /* a lookup will re-enable pre-fetching */
2673 /* Subtract unused cookies */
2674 if (ncookies
!= NULL
)
2675 *ncookies
-= ncooks
;
2677 if (uio
->uio_segflg
== UIO_SYSSPACE
&& uio
->uio_iovcnt
== 1) {
2678 iovp
->iov_base
+= outcount
;
2679 iovp
->iov_len
-= outcount
;
2680 uio
->uio_resid
-= outcount
;
2681 } else if ((error
= uiomove(outbuf
, (long)outcount
, UIO_READ
, uio
))) {
2683 * Reset the pointer.
2685 offset
= uio
->uio_loffset
;
2689 zap_cursor_fini(&zc
);
2690 if (uio
->uio_segflg
!= UIO_SYSSPACE
|| uio
->uio_iovcnt
!= 1)
2691 kmem_free(outbuf
, bufsize
);
2693 if (error
== ENOENT
)
2696 ZFS_ACCESSTIME_STAMP(zfsvfs
, zp
);
2698 uio
->uio_loffset
= offset
;
2700 if (error
!= 0 && cookies
!= NULL
) {
2701 free(*cookies
, M_TEMP
);
2708 ulong_t zfs_fsync_sync_cnt
= 4;
2711 zfs_fsync(vnode_t
*vp
, int syncflag
, cred_t
*cr
, caller_context_t
*ct
)
2713 znode_t
*zp
= VTOZ(vp
);
2714 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
2716 (void) tsd_set(zfs_fsyncer_key
, (void *)zfs_fsync_sync_cnt
);
2718 if (zfsvfs
->z_os
->os_sync
!= ZFS_SYNC_DISABLED
) {
2721 zil_commit(zfsvfs
->z_log
, zp
->z_id
);
2724 tsd_set(zfs_fsyncer_key
, NULL
);
2730 * Get the requested file attributes and place them in the provided
2733 * IN: vp - vnode of file.
2734 * vap - va_mask identifies requested attributes.
2735 * If AT_XVATTR set, then optional attrs are requested
2736 * flags - ATTR_NOACLCHECK (CIFS server context)
2737 * cr - credentials of caller.
2739 * OUT: vap - attribute values.
2741 * RETURN: 0 (always succeeds).
2745 zfs_getattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, cred_t
*cr
)
2747 znode_t
*zp
= VTOZ(vp
);
2748 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
2751 u_longlong_t nblocks
;
2752 uint64_t mtime
[2], ctime
[2], crtime
[2], rdev
;
2753 xvattr_t
*xvap
= (xvattr_t
*)vap
; /* vap may be an xvattr_t * */
2754 xoptattr_t
*xoap
= NULL
;
2755 boolean_t skipaclchk
= (flags
& ATTR_NOACLCHECK
) ? B_TRUE
: B_FALSE
;
2756 sa_bulk_attr_t bulk
[4];
2762 zfs_fuid_map_ids(zp
, cr
, &vap
->va_uid
, &vap
->va_gid
);
2764 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MTIME(zfsvfs
), NULL
, &mtime
, 16);
2765 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
), NULL
, &ctime
, 16);
2766 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CRTIME(zfsvfs
), NULL
, &crtime
, 16);
2767 if (vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
)
2768 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_RDEV(zfsvfs
), NULL
,
2771 if ((error
= sa_bulk_lookup(zp
->z_sa_hdl
, bulk
, count
)) != 0) {
2777 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2778 * Also, if we are the owner don't bother, since owner should
2779 * always be allowed to read basic attributes of file.
2781 if (!(zp
->z_pflags
& ZFS_ACL_TRIVIAL
) &&
2782 (vap
->va_uid
!= crgetuid(cr
))) {
2783 if ((error
= zfs_zaccess(zp
, ACE_READ_ATTRIBUTES
, 0,
2791 * Return all attributes. It's cheaper to provide the answer
2792 * than to determine whether we were asked the question.
2795 vap
->va_type
= IFTOVT(zp
->z_mode
);
2796 vap
->va_mode
= zp
->z_mode
& ~S_IFMT
;
2798 vap
->va_nodeid
= zp
->z_id
;
2799 vap
->va_nlink
= zp
->z_links
;
2800 if ((vp
->v_flag
& VROOT
) && zfs_show_ctldir(zp
) &&
2801 zp
->z_links
< ZFS_LINK_MAX
)
2803 vap
->va_size
= zp
->z_size
;
2804 if (vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
)
2805 vap
->va_rdev
= zfs_cmpldev(rdev
);
2806 vap
->va_seq
= zp
->z_seq
;
2807 vap
->va_flags
= 0; /* FreeBSD: Reset chflags(2) flags. */
2808 vap
->va_filerev
= zp
->z_seq
;
2811 * Add in any requested optional attributes and the create time.
2812 * Also set the corresponding bits in the returned attribute bitmap.
2814 if ((xoap
= xva_getxoptattr(xvap
)) != NULL
&& zfsvfs
->z_use_fuids
) {
2815 if (XVA_ISSET_REQ(xvap
, XAT_ARCHIVE
)) {
2817 ((zp
->z_pflags
& ZFS_ARCHIVE
) != 0);
2818 XVA_SET_RTN(xvap
, XAT_ARCHIVE
);
2821 if (XVA_ISSET_REQ(xvap
, XAT_READONLY
)) {
2822 xoap
->xoa_readonly
=
2823 ((zp
->z_pflags
& ZFS_READONLY
) != 0);
2824 XVA_SET_RTN(xvap
, XAT_READONLY
);
2827 if (XVA_ISSET_REQ(xvap
, XAT_SYSTEM
)) {
2829 ((zp
->z_pflags
& ZFS_SYSTEM
) != 0);
2830 XVA_SET_RTN(xvap
, XAT_SYSTEM
);
2833 if (XVA_ISSET_REQ(xvap
, XAT_HIDDEN
)) {
2835 ((zp
->z_pflags
& ZFS_HIDDEN
) != 0);
2836 XVA_SET_RTN(xvap
, XAT_HIDDEN
);
2839 if (XVA_ISSET_REQ(xvap
, XAT_NOUNLINK
)) {
2840 xoap
->xoa_nounlink
=
2841 ((zp
->z_pflags
& ZFS_NOUNLINK
) != 0);
2842 XVA_SET_RTN(xvap
, XAT_NOUNLINK
);
2845 if (XVA_ISSET_REQ(xvap
, XAT_IMMUTABLE
)) {
2846 xoap
->xoa_immutable
=
2847 ((zp
->z_pflags
& ZFS_IMMUTABLE
) != 0);
2848 XVA_SET_RTN(xvap
, XAT_IMMUTABLE
);
2851 if (XVA_ISSET_REQ(xvap
, XAT_APPENDONLY
)) {
2852 xoap
->xoa_appendonly
=
2853 ((zp
->z_pflags
& ZFS_APPENDONLY
) != 0);
2854 XVA_SET_RTN(xvap
, XAT_APPENDONLY
);
2857 if (XVA_ISSET_REQ(xvap
, XAT_NODUMP
)) {
2859 ((zp
->z_pflags
& ZFS_NODUMP
) != 0);
2860 XVA_SET_RTN(xvap
, XAT_NODUMP
);
2863 if (XVA_ISSET_REQ(xvap
, XAT_OPAQUE
)) {
2865 ((zp
->z_pflags
& ZFS_OPAQUE
) != 0);
2866 XVA_SET_RTN(xvap
, XAT_OPAQUE
);
2869 if (XVA_ISSET_REQ(xvap
, XAT_AV_QUARANTINED
)) {
2870 xoap
->xoa_av_quarantined
=
2871 ((zp
->z_pflags
& ZFS_AV_QUARANTINED
) != 0);
2872 XVA_SET_RTN(xvap
, XAT_AV_QUARANTINED
);
2875 if (XVA_ISSET_REQ(xvap
, XAT_AV_MODIFIED
)) {
2876 xoap
->xoa_av_modified
=
2877 ((zp
->z_pflags
& ZFS_AV_MODIFIED
) != 0);
2878 XVA_SET_RTN(xvap
, XAT_AV_MODIFIED
);
2881 if (XVA_ISSET_REQ(xvap
, XAT_AV_SCANSTAMP
) &&
2882 vp
->v_type
== VREG
) {
2883 zfs_sa_get_scanstamp(zp
, xvap
);
2886 if (XVA_ISSET_REQ(xvap
, XAT_REPARSE
)) {
2887 xoap
->xoa_reparse
= ((zp
->z_pflags
& ZFS_REPARSE
) != 0);
2888 XVA_SET_RTN(xvap
, XAT_REPARSE
);
2890 if (XVA_ISSET_REQ(xvap
, XAT_GEN
)) {
2891 xoap
->xoa_generation
= zp
->z_gen
;
2892 XVA_SET_RTN(xvap
, XAT_GEN
);
2895 if (XVA_ISSET_REQ(xvap
, XAT_OFFLINE
)) {
2897 ((zp
->z_pflags
& ZFS_OFFLINE
) != 0);
2898 XVA_SET_RTN(xvap
, XAT_OFFLINE
);
2901 if (XVA_ISSET_REQ(xvap
, XAT_SPARSE
)) {
2903 ((zp
->z_pflags
& ZFS_SPARSE
) != 0);
2904 XVA_SET_RTN(xvap
, XAT_SPARSE
);
2907 if (XVA_ISSET_REQ(xvap
, XAT_PROJINHERIT
)) {
2908 xoap
->xoa_projinherit
=
2909 ((zp
->z_pflags
& ZFS_PROJINHERIT
) != 0);
2910 XVA_SET_RTN(xvap
, XAT_PROJINHERIT
);
2913 if (XVA_ISSET_REQ(xvap
, XAT_PROJID
)) {
2914 xoap
->xoa_projid
= zp
->z_projid
;
2915 XVA_SET_RTN(xvap
, XAT_PROJID
);
2919 ZFS_TIME_DECODE(&vap
->va_atime
, zp
->z_atime
);
2920 ZFS_TIME_DECODE(&vap
->va_mtime
, mtime
);
2921 ZFS_TIME_DECODE(&vap
->va_ctime
, ctime
);
2922 ZFS_TIME_DECODE(&vap
->va_birthtime
, crtime
);
2925 sa_object_size(zp
->z_sa_hdl
, &blksize
, &nblocks
);
2926 vap
->va_blksize
= blksize
;
2927 vap
->va_bytes
= nblocks
<< 9; /* nblocks * 512 */
2929 if (zp
->z_blksz
== 0) {
2931 * Block size hasn't been set; suggest maximal I/O transfers.
2933 vap
->va_blksize
= zfsvfs
->z_max_blksz
;
2941 * Set the file attributes to the values contained in the
2944 * IN: zp - znode of file to be modified.
2945 * vap - new attribute values.
2946 * If AT_XVATTR set, then optional attrs are being set
2947 * flags - ATTR_UTIME set if non-default time values provided.
2948 * - ATTR_NOACLCHECK (CIFS context only).
2949 * cr - credentials of caller.
2950 * ct - caller context
2952 * RETURN: 0 on success, error code on failure.
2955 * vp - ctime updated, mtime updated if size changed.
2959 zfs_setattr(znode_t
*zp
, vattr_t
*vap
, int flags
, cred_t
*cr
)
2961 vnode_t
*vp
= ZTOV(zp
);
2962 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
2963 objset_t
*os
= zfsvfs
->z_os
;
2968 uint_t mask
= vap
->va_mask
;
2969 uint_t saved_mask
= 0;
2970 uint64_t saved_mode
;
2973 uint64_t new_uid
, new_gid
;
2975 uint64_t mtime
[2], ctime
[2];
2976 uint64_t projid
= ZFS_INVALID_PROJID
;
2978 int need_policy
= FALSE
;
2980 zfs_fuid_info_t
*fuidp
= NULL
;
2981 xvattr_t
*xvap
= (xvattr_t
*)vap
; /* vap may be an xvattr_t * */
2984 boolean_t skipaclchk
= (flags
& ATTR_NOACLCHECK
) ? B_TRUE
: B_FALSE
;
2985 boolean_t fuid_dirtied
= B_FALSE
;
2986 sa_bulk_attr_t bulk
[7], xattr_bulk
[7];
2987 int count
= 0, xattr_count
= 0;
2992 if (mask
& AT_NOSET
)
2993 return (SET_ERROR(EINVAL
));
2998 zilog
= zfsvfs
->z_log
;
3001 * Make sure that if we have ephemeral uid/gid or xvattr specified
3002 * that file system is at proper version level
3005 if (zfsvfs
->z_use_fuids
== B_FALSE
&&
3006 (((mask
& AT_UID
) && IS_EPHEMERAL(vap
->va_uid
)) ||
3007 ((mask
& AT_GID
) && IS_EPHEMERAL(vap
->va_gid
)) ||
3008 (mask
& AT_XVATTR
))) {
3010 return (SET_ERROR(EINVAL
));
3013 if (mask
& AT_SIZE
&& vp
->v_type
== VDIR
) {
3015 return (SET_ERROR(EISDIR
));
3018 if (mask
& AT_SIZE
&& vp
->v_type
!= VREG
&& vp
->v_type
!= VFIFO
) {
3020 return (SET_ERROR(EINVAL
));
3024 * If this is an xvattr_t, then get a pointer to the structure of
3025 * optional attributes. If this is NULL, then we have a vattr_t.
3027 xoap
= xva_getxoptattr(xvap
);
3029 xva_init(&tmpxvattr
);
3032 * Immutable files can only alter immutable bit and atime
3034 if ((zp
->z_pflags
& ZFS_IMMUTABLE
) &&
3035 ((mask
& (AT_SIZE
|AT_UID
|AT_GID
|AT_MTIME
|AT_MODE
)) ||
3036 ((mask
& AT_XVATTR
) && XVA_ISSET_REQ(xvap
, XAT_CREATETIME
)))) {
3038 return (SET_ERROR(EPERM
));
3042 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
3046 * Verify timestamps doesn't overflow 32 bits.
3047 * ZFS can handle large timestamps, but 32bit syscalls can't
3048 * handle times greater than 2039. This check should be removed
3049 * once large timestamps are fully supported.
3051 if (mask
& (AT_ATIME
| AT_MTIME
)) {
3052 if (((mask
& AT_ATIME
) && TIMESPEC_OVERFLOW(&vap
->va_atime
)) ||
3053 ((mask
& AT_MTIME
) && TIMESPEC_OVERFLOW(&vap
->va_mtime
))) {
3055 return (SET_ERROR(EOVERFLOW
));
3058 if (xoap
!= NULL
&& (mask
& AT_XVATTR
)) {
3059 if (XVA_ISSET_REQ(xvap
, XAT_CREATETIME
) &&
3060 TIMESPEC_OVERFLOW(&vap
->va_birthtime
)) {
3062 return (SET_ERROR(EOVERFLOW
));
3065 if (XVA_ISSET_REQ(xvap
, XAT_PROJID
)) {
3066 if (!dmu_objset_projectquota_enabled(os
) ||
3067 (!S_ISREG(zp
->z_mode
) && !S_ISDIR(zp
->z_mode
))) {
3069 return (SET_ERROR(EOPNOTSUPP
));
3072 projid
= xoap
->xoa_projid
;
3073 if (unlikely(projid
== ZFS_INVALID_PROJID
)) {
3075 return (SET_ERROR(EINVAL
));
3078 if (projid
== zp
->z_projid
&& zp
->z_pflags
& ZFS_PROJID
)
3079 projid
= ZFS_INVALID_PROJID
;
3084 if (XVA_ISSET_REQ(xvap
, XAT_PROJINHERIT
) &&
3085 (xoap
->xoa_projinherit
!=
3086 ((zp
->z_pflags
& ZFS_PROJINHERIT
) != 0)) &&
3087 (!dmu_objset_projectquota_enabled(os
) ||
3088 (!S_ISREG(zp
->z_mode
) && !S_ISDIR(zp
->z_mode
)))) {
3090 return (SET_ERROR(EOPNOTSUPP
));
3097 if (zfsvfs
->z_vfs
->vfs_flag
& VFS_RDONLY
) {
3099 return (SET_ERROR(EROFS
));
3103 * First validate permissions
3106 if (mask
& AT_SIZE
) {
3108 * XXX - Note, we are not providing any open
3109 * mode flags here (like FNDELAY), so we may
3110 * block if there are locks present... this
3111 * should be addressed in openat().
3113 /* XXX - would it be OK to generate a log record here? */
3114 err
= zfs_freesp(zp
, vap
->va_size
, 0, 0, FALSE
);
3121 if (mask
& (AT_ATIME
|AT_MTIME
) ||
3122 ((mask
& AT_XVATTR
) && (XVA_ISSET_REQ(xvap
, XAT_HIDDEN
) ||
3123 XVA_ISSET_REQ(xvap
, XAT_READONLY
) ||
3124 XVA_ISSET_REQ(xvap
, XAT_ARCHIVE
) ||
3125 XVA_ISSET_REQ(xvap
, XAT_OFFLINE
) ||
3126 XVA_ISSET_REQ(xvap
, XAT_SPARSE
) ||
3127 XVA_ISSET_REQ(xvap
, XAT_CREATETIME
) ||
3128 XVA_ISSET_REQ(xvap
, XAT_SYSTEM
)))) {
3129 need_policy
= zfs_zaccess(zp
, ACE_WRITE_ATTRIBUTES
, 0,
3133 if (mask
& (AT_UID
|AT_GID
)) {
3134 int idmask
= (mask
& (AT_UID
|AT_GID
));
3139 * NOTE: even if a new mode is being set,
3140 * we may clear S_ISUID/S_ISGID bits.
3143 if (!(mask
& AT_MODE
))
3144 vap
->va_mode
= zp
->z_mode
;
3147 * Take ownership or chgrp to group we are a member of
3150 take_owner
= (mask
& AT_UID
) && (vap
->va_uid
== crgetuid(cr
));
3151 take_group
= (mask
& AT_GID
) &&
3152 zfs_groupmember(zfsvfs
, vap
->va_gid
, cr
);
3155 * If both AT_UID and AT_GID are set then take_owner and
3156 * take_group must both be set in order to allow taking
3159 * Otherwise, send the check through secpolicy_vnode_setattr()
3163 if (((idmask
== (AT_UID
|AT_GID
)) && take_owner
&& take_group
) ||
3164 ((idmask
== AT_UID
) && take_owner
) ||
3165 ((idmask
== AT_GID
) && take_group
)) {
3166 if (zfs_zaccess(zp
, ACE_WRITE_OWNER
, 0,
3167 skipaclchk
, cr
) == 0) {
3169 * Remove setuid/setgid for non-privileged users
3171 secpolicy_setid_clear(vap
, vp
, cr
);
3172 trim_mask
= (mask
& (AT_UID
|AT_GID
));
3181 oldva
.va_mode
= zp
->z_mode
;
3182 zfs_fuid_map_ids(zp
, cr
, &oldva
.va_uid
, &oldva
.va_gid
);
3183 if (mask
& AT_XVATTR
) {
3185 * Update xvattr mask to include only those attributes
3186 * that are actually changing.
3188 * the bits will be restored prior to actually setting
3189 * the attributes so the caller thinks they were set.
3191 if (XVA_ISSET_REQ(xvap
, XAT_APPENDONLY
)) {
3192 if (xoap
->xoa_appendonly
!=
3193 ((zp
->z_pflags
& ZFS_APPENDONLY
) != 0)) {
3196 XVA_CLR_REQ(xvap
, XAT_APPENDONLY
);
3197 XVA_SET_REQ(&tmpxvattr
, XAT_APPENDONLY
);
3201 if (XVA_ISSET_REQ(xvap
, XAT_PROJINHERIT
)) {
3202 if (xoap
->xoa_projinherit
!=
3203 ((zp
->z_pflags
& ZFS_PROJINHERIT
) != 0)) {
3206 XVA_CLR_REQ(xvap
, XAT_PROJINHERIT
);
3207 XVA_SET_REQ(&tmpxvattr
, XAT_PROJINHERIT
);
3211 if (XVA_ISSET_REQ(xvap
, XAT_NOUNLINK
)) {
3212 if (xoap
->xoa_nounlink
!=
3213 ((zp
->z_pflags
& ZFS_NOUNLINK
) != 0)) {
3216 XVA_CLR_REQ(xvap
, XAT_NOUNLINK
);
3217 XVA_SET_REQ(&tmpxvattr
, XAT_NOUNLINK
);
3221 if (XVA_ISSET_REQ(xvap
, XAT_IMMUTABLE
)) {
3222 if (xoap
->xoa_immutable
!=
3223 ((zp
->z_pflags
& ZFS_IMMUTABLE
) != 0)) {
3226 XVA_CLR_REQ(xvap
, XAT_IMMUTABLE
);
3227 XVA_SET_REQ(&tmpxvattr
, XAT_IMMUTABLE
);
3231 if (XVA_ISSET_REQ(xvap
, XAT_NODUMP
)) {
3232 if (xoap
->xoa_nodump
!=
3233 ((zp
->z_pflags
& ZFS_NODUMP
) != 0)) {
3236 XVA_CLR_REQ(xvap
, XAT_NODUMP
);
3237 XVA_SET_REQ(&tmpxvattr
, XAT_NODUMP
);
3241 if (XVA_ISSET_REQ(xvap
, XAT_AV_MODIFIED
)) {
3242 if (xoap
->xoa_av_modified
!=
3243 ((zp
->z_pflags
& ZFS_AV_MODIFIED
) != 0)) {
3246 XVA_CLR_REQ(xvap
, XAT_AV_MODIFIED
);
3247 XVA_SET_REQ(&tmpxvattr
, XAT_AV_MODIFIED
);
3251 if (XVA_ISSET_REQ(xvap
, XAT_AV_QUARANTINED
)) {
3252 if ((vp
->v_type
!= VREG
&&
3253 xoap
->xoa_av_quarantined
) ||
3254 xoap
->xoa_av_quarantined
!=
3255 ((zp
->z_pflags
& ZFS_AV_QUARANTINED
) != 0)) {
3258 XVA_CLR_REQ(xvap
, XAT_AV_QUARANTINED
);
3259 XVA_SET_REQ(&tmpxvattr
, XAT_AV_QUARANTINED
);
3263 if (XVA_ISSET_REQ(xvap
, XAT_REPARSE
)) {
3265 return (SET_ERROR(EPERM
));
3268 if (need_policy
== FALSE
&&
3269 (XVA_ISSET_REQ(xvap
, XAT_AV_SCANSTAMP
) ||
3270 XVA_ISSET_REQ(xvap
, XAT_OPAQUE
))) {
3275 if (mask
& AT_MODE
) {
3276 if (zfs_zaccess(zp
, ACE_WRITE_ACL
, 0, skipaclchk
, cr
) == 0) {
3277 err
= secpolicy_setid_setsticky_clear(vp
, vap
,
3283 trim_mask
|= AT_MODE
;
3291 * If trim_mask is set then take ownership
3292 * has been granted or write_acl is present and user
3293 * has the ability to modify mode. In that case remove
3294 * UID|GID and or MODE from mask so that
3295 * secpolicy_vnode_setattr() doesn't revoke it.
3299 saved_mask
= vap
->va_mask
;
3300 vap
->va_mask
&= ~trim_mask
;
3301 if (trim_mask
& AT_MODE
) {
3303 * Save the mode, as secpolicy_vnode_setattr()
3304 * will overwrite it with ova.va_mode.
3306 saved_mode
= vap
->va_mode
;
3309 err
= secpolicy_vnode_setattr(cr
, vp
, vap
, &oldva
, flags
,
3310 (int (*)(void *, int, cred_t
*))zfs_zaccess_unix
, zp
);
3317 vap
->va_mask
|= saved_mask
;
3318 if (trim_mask
& AT_MODE
) {
3320 * Recover the mode after
3321 * secpolicy_vnode_setattr().
3323 vap
->va_mode
= saved_mode
;
3329 * secpolicy_vnode_setattr, or take ownership may have
3332 mask
= vap
->va_mask
;
3334 if ((mask
& (AT_UID
| AT_GID
)) || projid
!= ZFS_INVALID_PROJID
) {
3335 err
= sa_lookup(zp
->z_sa_hdl
, SA_ZPL_XATTR(zfsvfs
),
3336 &xattr_obj
, sizeof (xattr_obj
));
3338 if (err
== 0 && xattr_obj
) {
3339 err
= zfs_zget(zp
->z_zfsvfs
, xattr_obj
, &attrzp
);
3341 err
= vn_lock(ZTOV(attrzp
), LK_EXCLUSIVE
);
3343 vrele(ZTOV(attrzp
));
3348 if (mask
& AT_UID
) {
3349 new_uid
= zfs_fuid_create(zfsvfs
,
3350 (uint64_t)vap
->va_uid
, cr
, ZFS_OWNER
, &fuidp
);
3351 if (new_uid
!= zp
->z_uid
&&
3352 zfs_id_overquota(zfsvfs
, DMU_USERUSED_OBJECT
,
3356 err
= SET_ERROR(EDQUOT
);
3361 if (mask
& AT_GID
) {
3362 new_gid
= zfs_fuid_create(zfsvfs
, (uint64_t)vap
->va_gid
,
3363 cr
, ZFS_GROUP
, &fuidp
);
3364 if (new_gid
!= zp
->z_gid
&&
3365 zfs_id_overquota(zfsvfs
, DMU_GROUPUSED_OBJECT
,
3369 err
= SET_ERROR(EDQUOT
);
3374 if (projid
!= ZFS_INVALID_PROJID
&&
3375 zfs_id_overquota(zfsvfs
, DMU_PROJECTUSED_OBJECT
, projid
)) {
3378 err
= SET_ERROR(EDQUOT
);
3382 tx
= dmu_tx_create(os
);
3384 if (mask
& AT_MODE
) {
3385 uint64_t pmode
= zp
->z_mode
;
3387 new_mode
= (pmode
& S_IFMT
) | (vap
->va_mode
& ~S_IFMT
);
3389 if (zp
->z_zfsvfs
->z_acl_mode
== ZFS_ACL_RESTRICTED
&&
3390 !(zp
->z_pflags
& ZFS_ACL_TRIVIAL
)) {
3391 err
= SET_ERROR(EPERM
);
3395 if ((err
= zfs_acl_chmod_setattr(zp
, &aclp
, new_mode
)))
3398 if (!zp
->z_is_sa
&& ((acl_obj
= zfs_external_acl(zp
)) != 0)) {
3400 * Are we upgrading ACL from old V0 format
3403 if (zfsvfs
->z_version
>= ZPL_VERSION_FUID
&&
3404 zfs_znode_acl_version(zp
) ==
3405 ZFS_ACL_VERSION_INITIAL
) {
3406 dmu_tx_hold_free(tx
, acl_obj
, 0,
3408 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
,
3409 0, aclp
->z_acl_bytes
);
3411 dmu_tx_hold_write(tx
, acl_obj
, 0,
3414 } else if (!zp
->z_is_sa
&& aclp
->z_acl_bytes
> ZFS_ACE_SPACE
) {
3415 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
,
3416 0, aclp
->z_acl_bytes
);
3418 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_TRUE
);
3420 if (((mask
& AT_XVATTR
) &&
3421 XVA_ISSET_REQ(xvap
, XAT_AV_SCANSTAMP
)) ||
3422 (projid
!= ZFS_INVALID_PROJID
&&
3423 !(zp
->z_pflags
& ZFS_PROJID
)))
3424 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_TRUE
);
3426 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_FALSE
);
3430 dmu_tx_hold_sa(tx
, attrzp
->z_sa_hdl
, B_FALSE
);
3433 fuid_dirtied
= zfsvfs
->z_fuid_dirty
;
3435 zfs_fuid_txhold(zfsvfs
, tx
);
3437 zfs_sa_upgrade_txholds(tx
, zp
);
3439 err
= dmu_tx_assign(tx
, TXG_WAIT
);
3445 * Set each attribute requested.
3446 * We group settings according to the locks they need to acquire.
3448 * Note: you cannot set ctime directly, although it will be
3449 * updated as a side-effect of calling this function.
3452 if (projid
!= ZFS_INVALID_PROJID
&& !(zp
->z_pflags
& ZFS_PROJID
)) {
3454 * For the existed object that is upgraded from old system,
3455 * its on-disk layout has no slot for the project ID attribute.
3456 * But quota accounting logic needs to access related slots by
3457 * offset directly. So we need to adjust old objects' layout
3458 * to make the project ID to some unified and fixed offset.
3461 err
= sa_add_projid(attrzp
->z_sa_hdl
, tx
, projid
);
3463 err
= sa_add_projid(zp
->z_sa_hdl
, tx
, projid
);
3465 if (unlikely(err
== EEXIST
))
3470 projid
= ZFS_INVALID_PROJID
;
3473 if (mask
& (AT_UID
|AT_GID
|AT_MODE
))
3474 mutex_enter(&zp
->z_acl_lock
);
3476 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_FLAGS(zfsvfs
), NULL
,
3477 &zp
->z_pflags
, sizeof (zp
->z_pflags
));
3480 if (mask
& (AT_UID
|AT_GID
|AT_MODE
))
3481 mutex_enter(&attrzp
->z_acl_lock
);
3482 SA_ADD_BULK_ATTR(xattr_bulk
, xattr_count
,
3483 SA_ZPL_FLAGS(zfsvfs
), NULL
, &attrzp
->z_pflags
,
3484 sizeof (attrzp
->z_pflags
));
3485 if (projid
!= ZFS_INVALID_PROJID
) {
3486 attrzp
->z_projid
= projid
;
3487 SA_ADD_BULK_ATTR(xattr_bulk
, xattr_count
,
3488 SA_ZPL_PROJID(zfsvfs
), NULL
, &attrzp
->z_projid
,
3489 sizeof (attrzp
->z_projid
));
3493 if (mask
& (AT_UID
|AT_GID
)) {
3495 if (mask
& AT_UID
) {
3496 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_UID(zfsvfs
), NULL
,
3497 &new_uid
, sizeof (new_uid
));
3498 zp
->z_uid
= new_uid
;
3500 SA_ADD_BULK_ATTR(xattr_bulk
, xattr_count
,
3501 SA_ZPL_UID(zfsvfs
), NULL
, &new_uid
,
3503 attrzp
->z_uid
= new_uid
;
3507 if (mask
& AT_GID
) {
3508 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_GID(zfsvfs
),
3509 NULL
, &new_gid
, sizeof (new_gid
));
3510 zp
->z_gid
= new_gid
;
3512 SA_ADD_BULK_ATTR(xattr_bulk
, xattr_count
,
3513 SA_ZPL_GID(zfsvfs
), NULL
, &new_gid
,
3515 attrzp
->z_gid
= new_gid
;
3518 if (!(mask
& AT_MODE
)) {
3519 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MODE(zfsvfs
),
3520 NULL
, &new_mode
, sizeof (new_mode
));
3521 new_mode
= zp
->z_mode
;
3523 err
= zfs_acl_chown_setattr(zp
);
3526 err
= zfs_acl_chown_setattr(attrzp
);
3531 if (mask
& AT_MODE
) {
3532 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MODE(zfsvfs
), NULL
,
3533 &new_mode
, sizeof (new_mode
));
3534 zp
->z_mode
= new_mode
;
3535 ASSERT3U((uintptr_t)aclp
, !=, 0);
3536 err
= zfs_aclset_common(zp
, aclp
, cr
, tx
);
3538 if (zp
->z_acl_cached
)
3539 zfs_acl_free(zp
->z_acl_cached
);
3540 zp
->z_acl_cached
= aclp
;
3545 if (mask
& AT_ATIME
) {
3546 ZFS_TIME_ENCODE(&vap
->va_atime
, zp
->z_atime
);
3547 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_ATIME(zfsvfs
), NULL
,
3548 &zp
->z_atime
, sizeof (zp
->z_atime
));
3551 if (mask
& AT_MTIME
) {
3552 ZFS_TIME_ENCODE(&vap
->va_mtime
, mtime
);
3553 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MTIME(zfsvfs
), NULL
,
3554 mtime
, sizeof (mtime
));
3557 if (projid
!= ZFS_INVALID_PROJID
) {
3558 zp
->z_projid
= projid
;
3559 SA_ADD_BULK_ATTR(bulk
, count
,
3560 SA_ZPL_PROJID(zfsvfs
), NULL
, &zp
->z_projid
,
3561 sizeof (zp
->z_projid
));
3564 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
3565 if (mask
& AT_SIZE
&& !(mask
& AT_MTIME
)) {
3566 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MTIME(zfsvfs
),
3567 NULL
, mtime
, sizeof (mtime
));
3568 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
), NULL
,
3569 &ctime
, sizeof (ctime
));
3570 zfs_tstamp_update_setup(zp
, CONTENT_MODIFIED
, mtime
, ctime
);
3571 } else if (mask
!= 0) {
3572 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
), NULL
,
3573 &ctime
, sizeof (ctime
));
3574 zfs_tstamp_update_setup(zp
, STATE_CHANGED
, mtime
, ctime
);
3576 SA_ADD_BULK_ATTR(xattr_bulk
, xattr_count
,
3577 SA_ZPL_CTIME(zfsvfs
), NULL
,
3578 &ctime
, sizeof (ctime
));
3579 zfs_tstamp_update_setup(attrzp
, STATE_CHANGED
,
3585 * Do this after setting timestamps to prevent timestamp
3586 * update from toggling bit
3589 if (xoap
&& (mask
& AT_XVATTR
)) {
3591 if (XVA_ISSET_REQ(xvap
, XAT_CREATETIME
))
3592 xoap
->xoa_createtime
= vap
->va_birthtime
;
3594 * restore trimmed off masks
3595 * so that return masks can be set for caller.
3598 if (XVA_ISSET_REQ(&tmpxvattr
, XAT_APPENDONLY
)) {
3599 XVA_SET_REQ(xvap
, XAT_APPENDONLY
);
3601 if (XVA_ISSET_REQ(&tmpxvattr
, XAT_NOUNLINK
)) {
3602 XVA_SET_REQ(xvap
, XAT_NOUNLINK
);
3604 if (XVA_ISSET_REQ(&tmpxvattr
, XAT_IMMUTABLE
)) {
3605 XVA_SET_REQ(xvap
, XAT_IMMUTABLE
);
3607 if (XVA_ISSET_REQ(&tmpxvattr
, XAT_NODUMP
)) {
3608 XVA_SET_REQ(xvap
, XAT_NODUMP
);
3610 if (XVA_ISSET_REQ(&tmpxvattr
, XAT_AV_MODIFIED
)) {
3611 XVA_SET_REQ(xvap
, XAT_AV_MODIFIED
);
3613 if (XVA_ISSET_REQ(&tmpxvattr
, XAT_AV_QUARANTINED
)) {
3614 XVA_SET_REQ(xvap
, XAT_AV_QUARANTINED
);
3616 if (XVA_ISSET_REQ(&tmpxvattr
, XAT_PROJINHERIT
)) {
3617 XVA_SET_REQ(xvap
, XAT_PROJINHERIT
);
3620 if (XVA_ISSET_REQ(xvap
, XAT_AV_SCANSTAMP
))
3621 ASSERT(vp
->v_type
== VREG
);
3623 zfs_xvattr_set(zp
, xvap
, tx
);
3627 zfs_fuid_sync(zfsvfs
, tx
);
3630 zfs_log_setattr(zilog
, tx
, TX_SETATTR
, zp
, vap
, mask
, fuidp
);
3632 if (mask
& (AT_UID
|AT_GID
|AT_MODE
))
3633 mutex_exit(&zp
->z_acl_lock
);
3636 if (mask
& (AT_UID
|AT_GID
|AT_MODE
))
3637 mutex_exit(&attrzp
->z_acl_lock
);
3640 if (err
== 0 && attrzp
) {
3641 err2
= sa_bulk_update(attrzp
->z_sa_hdl
, xattr_bulk
,
3653 zfs_fuid_info_free(fuidp
);
3660 err2
= sa_bulk_update(zp
->z_sa_hdl
, bulk
, count
, tx
);
3665 if (os
->os_sync
== ZFS_SYNC_ALWAYS
)
3666 zil_commit(zilog
, 0);
3673 * We acquire all but fdvp locks using non-blocking acquisitions. If we
3674 * fail to acquire any lock in the path we will drop all held locks,
3675 * acquire the new lock in a blocking fashion, and then release it and
3676 * restart the rename. This acquire/release step ensures that we do not
3677 * spin on a lock waiting for release. On error release all vnode locks
3678 * and decrement references the way tmpfs_rename() would do.
3681 zfs_rename_relock(struct vnode
*sdvp
, struct vnode
**svpp
,
3682 struct vnode
*tdvp
, struct vnode
**tvpp
,
3683 const struct componentname
*scnp
, const struct componentname
*tcnp
)
3686 struct vnode
*nvp
, *svp
, *tvp
;
3687 znode_t
*sdzp
, *tdzp
, *szp
, *tzp
;
3688 const char *snm
= scnp
->cn_nameptr
;
3689 const char *tnm
= tcnp
->cn_nameptr
;
3693 if (*tvpp
!= NULL
&& *tvpp
!= tdvp
)
3697 error
= vn_lock(sdvp
, LK_EXCLUSIVE
);
3702 error
= vn_lock(tdvp
, LK_EXCLUSIVE
| LK_NOWAIT
);
3707 error
= vn_lock(tdvp
, LK_EXCLUSIVE
);
3716 * Before using sdzp and tdzp we must ensure that they are live.
3717 * As a porting legacy from illumos we have two things to worry
3718 * about. One is typical for FreeBSD and it is that the vnode is
3719 * not reclaimed (doomed). The other is that the znode is live.
3720 * The current code can invalidate the znode without acquiring the
3721 * corresponding vnode lock if the object represented by the znode
3722 * and vnode is no longer valid after a rollback or receive operation.
3723 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
3724 * that protects the znodes from the invalidation.
3726 zfsvfs
= sdzp
->z_zfsvfs
;
3727 ASSERT3P(zfsvfs
, ==, tdzp
->z_zfsvfs
);
3731 * We can not use ZFS_VERIFY_ZP() here because it could directly return
3732 * bypassing the cleanup code in the case of an error.
3734 if (tdzp
->z_sa_hdl
== NULL
|| sdzp
->z_sa_hdl
== NULL
) {
3738 error
= SET_ERROR(EIO
);
3743 * Re-resolve svp to be certain it still exists and fetch the
3746 error
= zfs_dirent_lookup(sdzp
, snm
, &szp
, ZEXISTS
);
3748 /* Source entry invalid or not there. */
3752 if ((scnp
->cn_flags
& ISDOTDOT
) != 0 ||
3753 (scnp
->cn_namelen
== 1 && scnp
->cn_nameptr
[0] == '.'))
3754 error
= SET_ERROR(EINVAL
);
3760 * Re-resolve tvp, if it disappeared we just carry on.
3762 error
= zfs_dirent_lookup(tdzp
, tnm
, &tzp
, 0);
3768 if ((tcnp
->cn_flags
& ISDOTDOT
) != 0)
3769 error
= SET_ERROR(EINVAL
);
3778 * At present the vnode locks must be acquired before z_teardown_lock,
3779 * although it would be more logical to use the opposite order.
3784 * Now try acquire locks on svp and tvp.
3787 error
= vn_lock(nvp
, LK_EXCLUSIVE
| LK_NOWAIT
);
3793 if (error
!= EBUSY
) {
3797 error
= vn_lock(nvp
, LK_EXCLUSIVE
);
3804 * Concurrent rename race.
3809 error
= SET_ERROR(EINVAL
);
3824 error
= vn_lock(nvp
, LK_EXCLUSIVE
| LK_NOWAIT
);
3829 if (error
!= EBUSY
) {
3833 error
= vn_lock(nvp
, LK_EXCLUSIVE
);
3851 * Note that we must use VRELE_ASYNC in this function as it walks
3852 * up the directory tree and vrele may need to acquire an exclusive
3853 * lock if a last reference to a vnode is dropped.
3856 zfs_rename_check(znode_t
*szp
, znode_t
*sdzp
, znode_t
*tdzp
)
3863 zfsvfs
= tdzp
->z_zfsvfs
;
3865 return (SET_ERROR(EINVAL
));
3868 if (tdzp
->z_id
== zfsvfs
->z_root
)
3872 ASSERT(!zp
->z_unlinked
);
3873 if ((error
= sa_lookup(zp
->z_sa_hdl
,
3874 SA_ZPL_PARENT(zfsvfs
), &parent
, sizeof (parent
))) != 0)
3877 if (parent
== szp
->z_id
) {
3878 error
= SET_ERROR(EINVAL
);
3881 if (parent
== zfsvfs
->z_root
)
3883 if (parent
== sdzp
->z_id
)
3886 error
= zfs_zget(zfsvfs
, parent
, &zp1
);
3891 VN_RELE_ASYNC(ZTOV(zp
),
3892 dsl_pool_zrele_taskq(
3893 dmu_objset_pool(zfsvfs
->z_os
)));
3897 if (error
== ENOTDIR
)
3898 panic("checkpath: .. not a directory\n");
3900 VN_RELE_ASYNC(ZTOV(zp
),
3901 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs
->z_os
)));
3906 * Move an entry from the provided source directory to the target
3907 * directory. Change the entry name as indicated.
3909 * IN: sdvp - Source directory containing the "old entry".
3910 * snm - Old entry name.
3911 * tdvp - Target directory to contain the "new entry".
3912 * tnm - New entry name.
3913 * cr - credentials of caller.
3914 * ct - caller context
3915 * flags - case flags
3917 * RETURN: 0 on success, error code on failure.
3920 * sdvp,tdvp - ctime|mtime updated
3924 zfs_rename_(vnode_t
*sdvp
, vnode_t
**svpp
, struct componentname
*scnp
,
3925 vnode_t
*tdvp
, vnode_t
**tvpp
, struct componentname
*tcnp
,
3926 cred_t
*cr
, int log
)
3929 znode_t
*sdzp
, *tdzp
, *szp
, *tzp
;
3930 zilog_t
*zilog
= NULL
;
3932 char *snm
= scnp
->cn_nameptr
;
3933 char *tnm
= tcnp
->cn_nameptr
;
3936 /* Reject renames across filesystems. */
3937 if ((*svpp
)->v_mount
!= tdvp
->v_mount
||
3938 ((*tvpp
) != NULL
&& (*svpp
)->v_mount
!= (*tvpp
)->v_mount
)) {
3939 error
= SET_ERROR(EXDEV
);
3943 if (zfsctl_is_node(tdvp
)) {
3944 error
= SET_ERROR(EXDEV
);
3949 * Lock all four vnodes to ensure safety and semantics of renaming.
3951 error
= zfs_rename_relock(sdvp
, svpp
, tdvp
, tvpp
, scnp
, tcnp
);
3953 /* no vnodes are locked in the case of error here */
3959 zfsvfs
= tdzp
->z_zfsvfs
;
3960 zilog
= zfsvfs
->z_log
;
3963 * After we re-enter ZFS_ENTER() we will have to revalidate all
3968 if (zfsvfs
->z_utf8
&& u8_validate(tnm
,
3969 strlen(tnm
), NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
3970 error
= SET_ERROR(EILSEQ
);
3974 /* If source and target are the same file, there is nothing to do. */
3975 if ((*svpp
) == (*tvpp
)) {
3980 if (((*svpp
)->v_type
== VDIR
&& (*svpp
)->v_mountedhere
!= NULL
) ||
3981 ((*tvpp
) != NULL
&& (*tvpp
)->v_type
== VDIR
&&
3982 (*tvpp
)->v_mountedhere
!= NULL
)) {
3983 error
= SET_ERROR(EXDEV
);
3988 * We can not use ZFS_VERIFY_ZP() here because it could directly return
3989 * bypassing the cleanup code in the case of an error.
3991 if (tdzp
->z_sa_hdl
== NULL
|| sdzp
->z_sa_hdl
== NULL
) {
3992 error
= SET_ERROR(EIO
);
3997 tzp
= *tvpp
== NULL
? NULL
: VTOZ(*tvpp
);
3998 if (szp
->z_sa_hdl
== NULL
|| (tzp
!= NULL
&& tzp
->z_sa_hdl
== NULL
)) {
3999 error
= SET_ERROR(EIO
);
4004 * This is to prevent the creation of links into attribute space
4005 * by renaming a linked file into/outof an attribute directory.
4006 * See the comment in zfs_link() for why this is considered bad.
4008 if ((tdzp
->z_pflags
& ZFS_XATTR
) != (sdzp
->z_pflags
& ZFS_XATTR
)) {
4009 error
= SET_ERROR(EINVAL
);
4014 * If we are using project inheritance, means if the directory has
4015 * ZFS_PROJINHERIT set, then its descendant directories will inherit
4016 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
4017 * such case, we only allow renames into our tree when the project
4020 if (tdzp
->z_pflags
& ZFS_PROJINHERIT
&&
4021 tdzp
->z_projid
!= szp
->z_projid
) {
4022 error
= SET_ERROR(EXDEV
);
4027 * Must have write access at the source to remove the old entry
4028 * and write access at the target to create the new entry.
4029 * Note that if target and source are the same, this can be
4030 * done in a single check.
4032 if ((error
= zfs_zaccess_rename(sdzp
, szp
, tdzp
, tzp
, cr
)))
4035 if ((*svpp
)->v_type
== VDIR
) {
4037 * Avoid ".", "..", and aliases of "." for obvious reasons.
4039 if ((scnp
->cn_namelen
== 1 && scnp
->cn_nameptr
[0] == '.') ||
4041 (scnp
->cn_flags
| tcnp
->cn_flags
) & ISDOTDOT
) {
4047 * Check to make sure rename is valid.
4048 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
4050 if ((error
= zfs_rename_check(szp
, sdzp
, tdzp
)))
4055 * Does target exist?
4059 * Source and target must be the same type.
4061 if ((*svpp
)->v_type
== VDIR
) {
4062 if ((*tvpp
)->v_type
!= VDIR
) {
4063 error
= SET_ERROR(ENOTDIR
);
4071 if ((*tvpp
)->v_type
== VDIR
) {
4072 error
= SET_ERROR(EISDIR
);
4078 vnevent_rename_src(*svpp
, sdvp
, scnp
->cn_nameptr
, ct
);
4080 vnevent_rename_dest(*tvpp
, tdvp
, tnm
, ct
);
4083 * notify the target directory if it is not the same
4084 * as source directory.
4087 vnevent_rename_dest_dir(tdvp
, ct
);
4090 tx
= dmu_tx_create(zfsvfs
->z_os
);
4091 dmu_tx_hold_sa(tx
, szp
->z_sa_hdl
, B_FALSE
);
4092 dmu_tx_hold_sa(tx
, sdzp
->z_sa_hdl
, B_FALSE
);
4093 dmu_tx_hold_zap(tx
, sdzp
->z_id
, FALSE
, snm
);
4094 dmu_tx_hold_zap(tx
, tdzp
->z_id
, TRUE
, tnm
);
4096 dmu_tx_hold_sa(tx
, tdzp
->z_sa_hdl
, B_FALSE
);
4097 zfs_sa_upgrade_txholds(tx
, tdzp
);
4100 dmu_tx_hold_sa(tx
, tzp
->z_sa_hdl
, B_FALSE
);
4101 zfs_sa_upgrade_txholds(tx
, tzp
);
4104 zfs_sa_upgrade_txholds(tx
, szp
);
4105 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, FALSE
, NULL
);
4106 error
= dmu_tx_assign(tx
, TXG_WAIT
);
4113 if (tzp
) /* Attempt to remove the existing target */
4114 error
= zfs_link_destroy(tdzp
, tnm
, tzp
, tx
, 0, NULL
);
4117 error
= zfs_link_create(tdzp
, tnm
, szp
, tx
, ZRENAMING
);
4119 szp
->z_pflags
|= ZFS_AV_MODIFIED
;
4121 error
= sa_update(szp
->z_sa_hdl
, SA_ZPL_FLAGS(zfsvfs
),
4122 (void *)&szp
->z_pflags
, sizeof (uint64_t), tx
);
4125 error
= zfs_link_destroy(sdzp
, snm
, szp
, tx
, ZRENAMING
,
4128 zfs_log_rename(zilog
, tx
, TX_RENAME
, sdzp
,
4129 snm
, tdzp
, tnm
, szp
);
4132 * Update path information for the target vnode
4134 vn_renamepath(tdvp
, *svpp
, tnm
, strlen(tnm
));
4137 * At this point, we have successfully created
4138 * the target name, but have failed to remove
4139 * the source name. Since the create was done
4140 * with the ZRENAMING flag, there are
4141 * complications; for one, the link count is
4142 * wrong. The easiest way to deal with this
4143 * is to remove the newly created target, and
4144 * return the original error. This must
4145 * succeed; fortunately, it is very unlikely to
4146 * fail, since we just created it.
4148 VERIFY3U(zfs_link_destroy(tdzp
, tnm
, szp
, tx
,
4149 ZRENAMING
, NULL
), ==, 0);
4156 cache_purge_negative(tdvp
);
4162 unlockout
: /* all 4 vnodes are locked, ZFS_ENTER called */
4167 out
: /* original two vnodes are locked */
4168 if (error
== 0 && zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
4169 zil_commit(zilog
, 0);
4179 zfs_rename(znode_t
*sdzp
, char *sname
, znode_t
*tdzp
, char *tname
,
4180 cred_t
*cr
, int flags
)
4182 struct componentname scn
, tcn
;
4183 vnode_t
*sdvp
, *tdvp
;
4190 error
= zfs_lookup_internal(sdzp
, sname
, &svp
, &scn
, DELETE
);
4191 if (sdzp
->z_zfsvfs
->z_replay
== B_FALSE
)
4197 vn_lock(tdvp
, LK_EXCLUSIVE
| LK_RETRY
);
4198 error
= zfs_lookup_internal(tdzp
, tname
, &tvp
, &tcn
, RENAME
);
4199 if (error
== EJUSTRETURN
)
4201 else if (error
!= 0) {
4206 error
= zfs_rename_(sdvp
, &svp
, &scn
, tdvp
, &tvp
, &tcn
, cr
, 0);
4217 * Insert the indicated symbolic reference entry into the directory.
4219 * IN: dvp - Directory to contain new symbolic link.
4220 * link - Name for new symlink entry.
4221 * vap - Attributes of new entry.
4222 * cr - credentials of caller.
4223 * ct - caller context
4224 * flags - case flags
4226 * RETURN: 0 on success, error code on failure.
4229 * dvp - ctime|mtime updated
4233 zfs_symlink(znode_t
*dzp
, const char *name
, vattr_t
*vap
,
4234 const char *link
, znode_t
**zpp
, cred_t
*cr
, int flags
)
4238 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
4240 uint64_t len
= strlen(link
);
4242 zfs_acl_ids_t acl_ids
;
4243 boolean_t fuid_dirtied
;
4244 uint64_t txtype
= TX_SYMLINK
;
4246 ASSERT(vap
->va_type
== VLNK
);
4250 zilog
= zfsvfs
->z_log
;
4252 if (zfsvfs
->z_utf8
&& u8_validate(name
, strlen(name
),
4253 NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
4255 return (SET_ERROR(EILSEQ
));
4258 if (len
> MAXPATHLEN
) {
4260 return (SET_ERROR(ENAMETOOLONG
));
4263 if ((error
= zfs_acl_ids_create(dzp
, 0,
4264 vap
, cr
, NULL
, &acl_ids
)) != 0) {
4270 * Attempt to lock directory; fail if entry already exists.
4272 error
= zfs_dirent_lookup(dzp
, name
, &zp
, ZNEW
);
4274 zfs_acl_ids_free(&acl_ids
);
4279 if ((error
= zfs_zaccess(dzp
, ACE_ADD_FILE
, 0, B_FALSE
, cr
))) {
4280 zfs_acl_ids_free(&acl_ids
);
4285 if (zfs_acl_ids_overquota(zfsvfs
, &acl_ids
,
4287 zfs_acl_ids_free(&acl_ids
);
4289 return (SET_ERROR(EDQUOT
));
4292 getnewvnode_reserve_();
4293 tx
= dmu_tx_create(zfsvfs
->z_os
);
4294 fuid_dirtied
= zfsvfs
->z_fuid_dirty
;
4295 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0, MAX(1, len
));
4296 dmu_tx_hold_zap(tx
, dzp
->z_id
, TRUE
, name
);
4297 dmu_tx_hold_sa_create(tx
, acl_ids
.z_aclp
->z_acl_bytes
+
4298 ZFS_SA_BASE_ATTR_SIZE
+ len
);
4299 dmu_tx_hold_sa(tx
, dzp
->z_sa_hdl
, B_FALSE
);
4300 if (!zfsvfs
->z_use_sa
&& acl_ids
.z_aclp
->z_acl_bytes
> ZFS_ACE_SPACE
) {
4301 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0,
4302 acl_ids
.z_aclp
->z_acl_bytes
);
4305 zfs_fuid_txhold(zfsvfs
, tx
);
4306 error
= dmu_tx_assign(tx
, TXG_WAIT
);
4308 zfs_acl_ids_free(&acl_ids
);
4310 getnewvnode_drop_reserve();
4316 * Create a new object for the symlink.
4317 * for version 4 ZPL datsets the symlink will be an SA attribute
4319 zfs_mknode(dzp
, vap
, tx
, cr
, 0, &zp
, &acl_ids
);
4322 zfs_fuid_sync(zfsvfs
, tx
);
4325 error
= sa_update(zp
->z_sa_hdl
, SA_ZPL_SYMLINK(zfsvfs
),
4326 __DECONST(void *, link
), len
, tx
);
4328 zfs_sa_symlink(zp
, __DECONST(char *, link
), len
, tx
);
4331 (void) sa_update(zp
->z_sa_hdl
, SA_ZPL_SIZE(zfsvfs
),
4332 &zp
->z_size
, sizeof (zp
->z_size
), tx
);
4334 * Insert the new object into the directory.
4336 (void) zfs_link_create(dzp
, name
, zp
, tx
, ZNEW
);
4338 zfs_log_symlink(zilog
, tx
, txtype
, dzp
, zp
,
4339 __DECONST(char *, name
), __DECONST(char *, link
));
4342 zfs_acl_ids_free(&acl_ids
);
4346 getnewvnode_drop_reserve();
4348 if (zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
4349 zil_commit(zilog
, 0);
4356 * Return, in the buffer contained in the provided uio structure,
4357 * the symbolic path referred to by vp.
4359 * IN: vp - vnode of symbolic link.
4360 * uio - structure to contain the link path.
4361 * cr - credentials of caller.
4362 * ct - caller context
4364 * OUT: uio - structure containing the link path.
4366 * RETURN: 0 on success, error code on failure.
4369 * vp - atime updated
4373 zfs_readlink(vnode_t
*vp
, uio_t
*uio
, cred_t
*cr
, caller_context_t
*ct
)
4375 znode_t
*zp
= VTOZ(vp
);
4376 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
4383 error
= sa_lookup_uio(zp
->z_sa_hdl
,
4384 SA_ZPL_SYMLINK(zfsvfs
), uio
);
4386 error
= zfs_sa_readlink(zp
, uio
);
4388 ZFS_ACCESSTIME_STAMP(zfsvfs
, zp
);
4395 * Insert a new entry into directory tdvp referencing svp.
4397 * IN: tdvp - Directory to contain new entry.
4398 * svp - vnode of new entry.
4399 * name - name of new entry.
4400 * cr - credentials of caller.
4402 * RETURN: 0 on success, error code on failure.
4405 * tdvp - ctime|mtime updated
4406 * svp - ctime updated
4410 zfs_link(znode_t
*tdzp
, znode_t
*szp
, char *name
, cred_t
*cr
,
4414 zfsvfs_t
*zfsvfs
= tdzp
->z_zfsvfs
;
4421 ASSERT(ZTOV(tdzp
)->v_type
== VDIR
);
4424 ZFS_VERIFY_ZP(tdzp
);
4425 zilog
= zfsvfs
->z_log
;
4428 * POSIX dictates that we return EPERM here.
4429 * Better choices include ENOTSUP or EISDIR.
4431 if (ZTOV(szp
)->v_type
== VDIR
) {
4433 return (SET_ERROR(EPERM
));
4439 * If we are using project inheritance, means if the directory has
4440 * ZFS_PROJINHERIT set, then its descendant directories will inherit
4441 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
4442 * such case, we only allow hard link creation in our tree when the
4443 * project IDs are the same.
4445 if (tdzp
->z_pflags
& ZFS_PROJINHERIT
&&
4446 tdzp
->z_projid
!= szp
->z_projid
) {
4448 return (SET_ERROR(EXDEV
));
4451 if (szp
->z_pflags
& (ZFS_APPENDONLY
|
4452 ZFS_IMMUTABLE
| ZFS_READONLY
)) {
4454 return (SET_ERROR(EPERM
));
4457 /* Prevent links to .zfs/shares files */
4459 if ((error
= sa_lookup(szp
->z_sa_hdl
, SA_ZPL_PARENT(zfsvfs
),
4460 &parent
, sizeof (uint64_t))) != 0) {
4464 if (parent
== zfsvfs
->z_shares_dir
) {
4466 return (SET_ERROR(EPERM
));
4469 if (zfsvfs
->z_utf8
&& u8_validate(name
,
4470 strlen(name
), NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
4472 return (SET_ERROR(EILSEQ
));
4476 * We do not support links between attributes and non-attributes
4477 * because of the potential security risk of creating links
4478 * into "normal" file space in order to circumvent restrictions
4479 * imposed in attribute space.
4481 if ((szp
->z_pflags
& ZFS_XATTR
) != (tdzp
->z_pflags
& ZFS_XATTR
)) {
4483 return (SET_ERROR(EINVAL
));
4487 owner
= zfs_fuid_map_id(zfsvfs
, szp
->z_uid
, cr
, ZFS_OWNER
);
4488 if (owner
!= crgetuid(cr
) && secpolicy_basic_link(ZTOV(szp
), cr
) != 0) {
4490 return (SET_ERROR(EPERM
));
4493 if ((error
= zfs_zaccess(tdzp
, ACE_ADD_FILE
, 0, B_FALSE
, cr
))) {
4499 * Attempt to lock directory; fail if entry already exists.
4501 error
= zfs_dirent_lookup(tdzp
, name
, &tzp
, ZNEW
);
4507 tx
= dmu_tx_create(zfsvfs
->z_os
);
4508 dmu_tx_hold_sa(tx
, szp
->z_sa_hdl
, B_FALSE
);
4509 dmu_tx_hold_zap(tx
, tdzp
->z_id
, TRUE
, name
);
4510 zfs_sa_upgrade_txholds(tx
, szp
);
4511 zfs_sa_upgrade_txholds(tx
, tdzp
);
4512 error
= dmu_tx_assign(tx
, TXG_WAIT
);
4519 error
= zfs_link_create(tdzp
, name
, szp
, tx
, 0);
4522 uint64_t txtype
= TX_LINK
;
4523 zfs_log_link(zilog
, tx
, txtype
, tdzp
, szp
, name
);
4529 vnevent_link(ZTOV(szp
), ct
);
4532 if (zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
4533 zil_commit(zilog
, 0);
4540 * Free or allocate space in a file. Currently, this function only
4541 * supports the `F_FREESP' command. However, this command is somewhat
4542 * misnamed, as its functionality includes the ability to allocate as
4543 * well as free space.
4545 * IN: ip - inode of file to free data in.
4546 * cmd - action to take (only F_FREESP supported).
4547 * bfp - section of file to free/alloc.
4548 * flag - current file open mode flags.
4549 * offset - current file offset.
4550 * cr - credentials of caller.
4552 * RETURN: 0 on success, error code on failure.
4555 * ip - ctime|mtime updated
4559 zfs_space(znode_t
*zp
, int cmd
, flock64_t
*bfp
, int flag
,
4560 offset_t offset
, cred_t
*cr
)
4562 zfsvfs_t
*zfsvfs
= ZTOZSB(zp
);
4569 if (cmd
!= F_FREESP
) {
4571 return (SET_ERROR(EINVAL
));
4575 * Callers might not be able to detect properly that we are read-only,
4576 * so check it explicitly here.
4578 if (zfs_is_readonly(zfsvfs
)) {
4580 return (SET_ERROR(EROFS
));
4583 if (bfp
->l_len
< 0) {
4585 return (SET_ERROR(EINVAL
));
4589 * Permissions aren't checked on Solaris because on this OS
4590 * zfs_space() can only be called with an opened file handle.
4591 * On Linux we can get here through truncate_range() which
4592 * operates directly on inodes, so we need to check access rights.
4594 if ((error
= zfs_zaccess(zp
, ACE_WRITE_DATA
, 0, B_FALSE
, cr
))) {
4600 len
= bfp
->l_len
; /* 0 means from off to end of file */
4602 error
= zfs_freesp(zp
, off
, len
, flag
, TRUE
);
4610 zfs_inactive(vnode_t
*vp
, cred_t
*cr
, caller_context_t
*ct
)
4612 znode_t
*zp
= VTOZ(vp
);
4613 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
4616 rw_enter(&zfsvfs
->z_teardown_inactive_lock
, RW_READER
);
4617 if (zp
->z_sa_hdl
== NULL
) {
4619 * The fs has been unmounted, or we did a
4620 * suspend/resume and this file no longer exists.
4622 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
4627 if (zp
->z_unlinked
) {
4629 * Fast path to recycle a vnode of a removed file.
4631 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
4636 if (zp
->z_atime_dirty
&& zp
->z_unlinked
== 0) {
4637 dmu_tx_t
*tx
= dmu_tx_create(zfsvfs
->z_os
);
4639 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_FALSE
);
4640 zfs_sa_upgrade_txholds(tx
, zp
);
4641 error
= dmu_tx_assign(tx
, TXG_WAIT
);
4645 (void) sa_update(zp
->z_sa_hdl
, SA_ZPL_ATIME(zfsvfs
),
4646 (void *)&zp
->z_atime
, sizeof (zp
->z_atime
), tx
);
4647 zp
->z_atime_dirty
= 0;
4651 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
4655 CTASSERT(sizeof (struct zfid_short
) <= sizeof (struct fid
));
4656 CTASSERT(sizeof (struct zfid_long
) <= sizeof (struct fid
));
4660 zfs_fid(vnode_t
*vp
, fid_t
*fidp
, caller_context_t
*ct
)
4662 znode_t
*zp
= VTOZ(vp
);
4663 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
4666 uint64_t object
= zp
->z_id
;
4673 if ((error
= sa_lookup(zp
->z_sa_hdl
, SA_ZPL_GEN(zfsvfs
),
4674 &gen64
, sizeof (uint64_t))) != 0) {
4679 gen
= (uint32_t)gen64
;
4681 size
= (zfsvfs
->z_parent
!= zfsvfs
) ? LONG_FID_LEN
: SHORT_FID_LEN
;
4682 fidp
->fid_len
= size
;
4684 zfid
= (zfid_short_t
*)fidp
;
4686 zfid
->zf_len
= size
;
4688 for (i
= 0; i
< sizeof (zfid
->zf_object
); i
++)
4689 zfid
->zf_object
[i
] = (uint8_t)(object
>> (8 * i
));
4691 /* Must have a non-zero generation number to distinguish from .zfs */
4694 for (i
= 0; i
< sizeof (zfid
->zf_gen
); i
++)
4695 zfid
->zf_gen
[i
] = (uint8_t)(gen
>> (8 * i
));
4697 if (size
== LONG_FID_LEN
) {
4698 uint64_t objsetid
= dmu_objset_id(zfsvfs
->z_os
);
4701 zlfid
= (zfid_long_t
*)fidp
;
4703 for (i
= 0; i
< sizeof (zlfid
->zf_setid
); i
++)
4704 zlfid
->zf_setid
[i
] = (uint8_t)(objsetid
>> (8 * i
));
4706 /* XXX - this should be the generation number for the objset */
4707 for (i
= 0; i
< sizeof (zlfid
->zf_setgen
); i
++)
4708 zlfid
->zf_setgen
[i
] = 0;
4716 zfs_pathconf(vnode_t
*vp
, int cmd
, ulong_t
*valp
, cred_t
*cr
,
4717 caller_context_t
*ct
)
4722 *valp
= MIN(LONG_MAX
, ZFS_LINK_MAX
);
4725 case _PC_FILESIZEBITS
:
4728 case _PC_MIN_HOLE_SIZE
:
4729 *valp
= (int)SPA_MINBLOCKSIZE
;
4731 case _PC_ACL_EXTENDED
:
4739 case _PC_ACL_PATH_MAX
:
4740 *valp
= ACL_MAX_ENTRIES
;
4744 return (EOPNOTSUPP
);
4750 zfs_getsecattr(vnode_t
*vp
, vsecattr_t
*vsecp
, int flag
, cred_t
*cr
,
4751 caller_context_t
*ct
)
4753 znode_t
*zp
= VTOZ(vp
);
4754 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
4756 boolean_t skipaclchk
= (flag
& ATTR_NOACLCHECK
) ? B_TRUE
: B_FALSE
;
4760 error
= zfs_getacl(zp
, vsecp
, skipaclchk
, cr
);
4768 zfs_setsecattr(znode_t
*zp
, vsecattr_t
*vsecp
, int flag
, cred_t
*cr
)
4770 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
4772 boolean_t skipaclchk
= (flag
& ATTR_NOACLCHECK
) ? B_TRUE
: B_FALSE
;
4773 zilog_t
*zilog
= zfsvfs
->z_log
;
4778 error
= zfs_setacl(zp
, vsecp
, skipaclchk
, cr
);
4780 if (zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
4781 zil_commit(zilog
, 0);
4788 zfs_getpages(struct vnode
*vp
, vm_page_t
*ma
, int count
, int *rbehind
,
4791 znode_t
*zp
= VTOZ(vp
);
4792 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
4793 objset_t
*os
= zp
->z_zfsvfs
->z_os
;
4794 zfs_locked_range_t
*lr
;
4796 off_t start
, end
, obj_size
;
4798 int pgsin_b
, pgsin_a
;
4804 start
= IDX_TO_OFF(ma
[0]->pindex
);
4805 end
= IDX_TO_OFF(ma
[count
- 1]->pindex
+ 1);
4808 * Lock a range covering all required and optional pages.
4809 * Note that we need to handle the case of the block size growing.
4812 blksz
= zp
->z_blksz
;
4813 lr
= zfs_rangelock_tryenter(&zp
->z_rangelock
,
4814 rounddown(start
, blksz
),
4815 roundup(end
, blksz
) - rounddown(start
, blksz
), RL_READER
);
4817 if (rahead
!= NULL
) {
4821 if (rbehind
!= NULL
) {
4827 if (blksz
== zp
->z_blksz
)
4829 zfs_rangelock_exit(lr
);
4832 object
= ma
[0]->object
;
4833 zfs_vmobject_wlock(object
);
4834 obj_size
= object
->un_pager
.vnp
.vnp_size
;
4835 zfs_vmobject_wunlock(object
);
4836 if (IDX_TO_OFF(ma
[count
- 1]->pindex
) >= obj_size
) {
4837 zfs_rangelock_exit(lr
);
4839 return (zfs_vm_pagerret_bad
);
4843 if (rbehind
!= NULL
) {
4844 pgsin_b
= OFF_TO_IDX(start
- rounddown(start
, blksz
));
4845 pgsin_b
= MIN(*rbehind
, pgsin_b
);
4849 if (rahead
!= NULL
) {
4850 pgsin_a
= OFF_TO_IDX(roundup(end
, blksz
) - end
);
4851 if (end
+ IDX_TO_OFF(pgsin_a
) >= obj_size
)
4852 pgsin_a
= OFF_TO_IDX(round_page(obj_size
) - end
);
4853 pgsin_a
= MIN(*rahead
, pgsin_a
);
4857 * NB: we need to pass the exact byte size of the data that we expect
4858 * to read after accounting for the file size. This is required because
4859 * ZFS will panic if we request DMU to read beyond the end of the last
4862 error
= dmu_read_pages(os
, zp
->z_id
, ma
, count
, &pgsin_b
, &pgsin_a
,
4863 MIN(end
, obj_size
) - (end
- PAGE_SIZE
));
4865 zfs_rangelock_exit(lr
);
4866 ZFS_ACCESSTIME_STAMP(zfsvfs
, zp
);
4870 return (zfs_vm_pagerret_error
);
4872 VM_CNT_INC(v_vnodein
);
4873 VM_CNT_ADD(v_vnodepgsin
, count
+ pgsin_b
+ pgsin_a
);
4874 if (rbehind
!= NULL
)
4878 return (zfs_vm_pagerret_ok
);
4881 #ifndef _SYS_SYSPROTO_H_
4882 struct vop_getpages_args
{
4892 zfs_freebsd_getpages(struct vop_getpages_args
*ap
)
4895 return (zfs_getpages(ap
->a_vp
, ap
->a_m
, ap
->a_count
, ap
->a_rbehind
,
4900 zfs_putpages(struct vnode
*vp
, vm_page_t
*ma
, size_t len
, int flags
,
4903 znode_t
*zp
= VTOZ(vp
);
4904 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
4905 zfs_locked_range_t
*lr
;
4913 vm_ooffset_t lo_off
;
4924 object
= vp
->v_object
;
4928 KASSERT(ma
[0]->object
== object
, ("mismatching object"));
4929 KASSERT(len
> 0 && (len
& PAGE_MASK
) == 0, ("unexpected length"));
4931 for (i
= 0; i
< pcount
; i
++)
4932 rtvals
[i
] = zfs_vm_pagerret_error
;
4934 off
= IDX_TO_OFF(ma
[0]->pindex
);
4935 blksz
= zp
->z_blksz
;
4936 lo_off
= rounddown(off
, blksz
);
4937 lo_len
= roundup(len
+ (off
- lo_off
), blksz
);
4938 lr
= zfs_rangelock_enter(&zp
->z_rangelock
, lo_off
, lo_len
, RL_WRITER
);
4940 zfs_vmobject_wlock(object
);
4941 if (len
+ off
> object
->un_pager
.vnp
.vnp_size
) {
4942 if (object
->un_pager
.vnp
.vnp_size
> off
) {
4945 len
= object
->un_pager
.vnp
.vnp_size
- off
;
4947 if ((pgoff
= (int)len
& PAGE_MASK
) != 0) {
4949 * If the object is locked and the following
4950 * conditions hold, then the page's dirty
4951 * field cannot be concurrently changed by a
4955 vm_page_assert_sbusied(m
);
4956 KASSERT(!pmap_page_is_write_mapped(m
),
4957 ("zfs_putpages: page %p is not read-only",
4959 vm_page_clear_dirty(m
, pgoff
, PAGE_SIZE
-
4966 if (ncount
< pcount
) {
4967 for (i
= ncount
; i
< pcount
; i
++) {
4968 rtvals
[i
] = zfs_vm_pagerret_bad
;
4972 zfs_vmobject_wunlock(object
);
4977 if (zfs_id_overblockquota(zfsvfs
, DMU_USERUSED_OBJECT
, zp
->z_uid
) ||
4978 zfs_id_overblockquota(zfsvfs
, DMU_GROUPUSED_OBJECT
, zp
->z_gid
) ||
4979 (zp
->z_projid
!= ZFS_DEFAULT_PROJID
&&
4980 zfs_id_overblockquota(zfsvfs
, DMU_PROJECTUSED_OBJECT
,
4985 tx
= dmu_tx_create(zfsvfs
->z_os
);
4986 dmu_tx_hold_write(tx
, zp
->z_id
, off
, len
);
4988 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_FALSE
);
4989 zfs_sa_upgrade_txholds(tx
, zp
);
4990 err
= dmu_tx_assign(tx
, TXG_WAIT
);
4996 if (zp
->z_blksz
< PAGE_SIZE
) {
4997 for (i
= 0; len
> 0; off
+= tocopy
, len
-= tocopy
, i
++) {
4998 tocopy
= len
> PAGE_SIZE
? PAGE_SIZE
: len
;
4999 va
= zfs_map_page(ma
[i
], &sf
);
5000 dmu_write(zfsvfs
->z_os
, zp
->z_id
, off
, tocopy
, va
, tx
);
5004 err
= dmu_write_pages(zfsvfs
->z_os
, zp
->z_id
, off
, len
, ma
, tx
);
5008 uint64_t mtime
[2], ctime
[2];
5009 sa_bulk_attr_t bulk
[3];
5012 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MTIME(zfsvfs
), NULL
,
5014 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
), NULL
,
5016 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_FLAGS(zfsvfs
), NULL
,
5018 zfs_tstamp_update_setup(zp
, CONTENT_MODIFIED
, mtime
, ctime
);
5019 err
= sa_bulk_update(zp
->z_sa_hdl
, bulk
, count
, tx
);
5022 * XXX we should be passing a callback to undirty
5023 * but that would make the locking messier
5025 zfs_log_write(zfsvfs
->z_log
, tx
, TX_WRITE
, zp
, off
,
5026 len
, 0, NULL
, NULL
);
5028 zfs_vmobject_wlock(object
);
5029 for (i
= 0; i
< ncount
; i
++) {
5030 rtvals
[i
] = zfs_vm_pagerret_ok
;
5031 vm_page_undirty(ma
[i
]);
5033 zfs_vmobject_wunlock(object
);
5034 VM_CNT_INC(v_vnodeout
);
5035 VM_CNT_ADD(v_vnodepgsout
, ncount
);
5040 zfs_rangelock_exit(lr
);
5041 if ((flags
& (zfs_vm_pagerput_sync
| zfs_vm_pagerput_inval
)) != 0 ||
5042 zfsvfs
->z_os
->os_sync
== ZFS_SYNC_ALWAYS
)
5043 zil_commit(zfsvfs
->z_log
, zp
->z_id
);
5048 #ifndef _SYS_SYSPROTO_H_
5049 struct vop_putpages_args
{
5059 zfs_freebsd_putpages(struct vop_putpages_args
*ap
)
5062 return (zfs_putpages(ap
->a_vp
, ap
->a_m
, ap
->a_count
, ap
->a_sync
,
5066 #ifndef _SYS_SYSPROTO_H_
5067 struct vop_bmap_args
{
5070 struct bufobj
**a_bop
;
5078 zfs_freebsd_bmap(struct vop_bmap_args
*ap
)
5081 if (ap
->a_bop
!= NULL
)
5082 *ap
->a_bop
= &ap
->a_vp
->v_bufobj
;
5083 if (ap
->a_bnp
!= NULL
)
5084 *ap
->a_bnp
= ap
->a_bn
;
5085 if (ap
->a_runp
!= NULL
)
5087 if (ap
->a_runb
!= NULL
)
5093 #ifndef _SYS_SYSPROTO_H_
5094 struct vop_open_args
{
5097 struct ucred
*a_cred
;
5098 struct thread
*a_td
;
5103 zfs_freebsd_open(struct vop_open_args
*ap
)
5105 vnode_t
*vp
= ap
->a_vp
;
5106 znode_t
*zp
= VTOZ(vp
);
5109 error
= zfs_open(&vp
, ap
->a_mode
, ap
->a_cred
);
5111 vnode_create_vobject(vp
, zp
->z_size
, ap
->a_td
);
5115 #ifndef _SYS_SYSPROTO_H_
5116 struct vop_close_args
{
5119 struct ucred
*a_cred
;
5120 struct thread
*a_td
;
5125 zfs_freebsd_close(struct vop_close_args
*ap
)
5128 return (zfs_close(ap
->a_vp
, ap
->a_fflag
, 1, 0, ap
->a_cred
));
5131 #ifndef _SYS_SYSPROTO_H_
5132 struct vop_ioctl_args
{
5143 zfs_freebsd_ioctl(struct vop_ioctl_args
*ap
)
5146 return (zfs_ioctl(ap
->a_vp
, ap
->a_command
, (intptr_t)ap
->a_data
,
5147 ap
->a_fflag
, ap
->a_cred
, NULL
));
5151 ioflags(int ioflags
)
5155 if (ioflags
& IO_APPEND
)
5157 if (ioflags
& IO_NDELAY
)
5159 if (ioflags
& IO_SYNC
)
5160 flags
|= (FSYNC
| FDSYNC
| FRSYNC
);
5165 #ifndef _SYS_SYSPROTO_H_
5166 struct vop_read_args
{
5170 struct ucred
*a_cred
;
5175 zfs_freebsd_read(struct vop_read_args
*ap
)
5178 return (zfs_read(ap
->a_vp
, ap
->a_uio
, ioflags(ap
->a_ioflag
),
5182 #ifndef _SYS_SYSPROTO_H_
5183 struct vop_write_args
{
5187 struct ucred
*a_cred
;
5192 zfs_freebsd_write(struct vop_write_args
*ap
)
5195 return (zfs_write(ap
->a_vp
, ap
->a_uio
, ioflags(ap
->a_ioflag
),
5199 #ifndef _SYS_SYSPROTO_H_
5200 struct vop_access_args
{
5202 accmode_t a_accmode
;
5203 struct ucred
*a_cred
;
5204 struct thread
*a_td
;
5209 zfs_freebsd_access(struct vop_access_args
*ap
)
5211 vnode_t
*vp
= ap
->a_vp
;
5212 znode_t
*zp
= VTOZ(vp
);
5217 if (ap
->a_accmode
== VEXEC
) {
5218 if (zfs_fastaccesschk_execute(zp
, ap
->a_cred
) == 0)
5223 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
5225 accmode
= ap
->a_accmode
& (VREAD
|VWRITE
|VEXEC
|VAPPEND
);
5227 error
= zfs_access(ap
->a_vp
, accmode
, 0, ap
->a_cred
, NULL
);
5230 * VADMIN has to be handled by vaccess().
5233 accmode
= ap
->a_accmode
& ~(VREAD
|VWRITE
|VEXEC
|VAPPEND
);
5235 error
= vaccess(vp
->v_type
, zp
->z_mode
, zp
->z_uid
,
5236 zp
->z_gid
, accmode
, ap
->a_cred
, NULL
);
5241 * For VEXEC, ensure that at least one execute bit is set for
5244 if (error
== 0 && (ap
->a_accmode
& VEXEC
) != 0 && vp
->v_type
!= VDIR
&&
5245 (zp
->z_mode
& (S_IXUSR
| S_IXGRP
| S_IXOTH
)) == 0) {
5252 #ifndef _SYS_SYSPROTO_H_
5253 struct vop_lookup_args
{
5254 struct vnode
*a_dvp
;
5255 struct vnode
**a_vpp
;
5256 struct componentname
*a_cnp
;
5261 zfs_freebsd_lookup(struct vop_lookup_args
*ap
, boolean_t cached
)
5263 struct componentname
*cnp
= ap
->a_cnp
;
5264 char nm
[NAME_MAX
+ 1];
5266 ASSERT(cnp
->cn_namelen
< sizeof (nm
));
5267 strlcpy(nm
, cnp
->cn_nameptr
, MIN(cnp
->cn_namelen
+ 1, sizeof (nm
)));
5269 return (zfs_lookup(ap
->a_dvp
, nm
, ap
->a_vpp
, cnp
, cnp
->cn_nameiop
,
5270 cnp
->cn_cred
, cnp
->cn_thread
, 0, cached
));
5274 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args
*ap
)
5277 return (zfs_freebsd_lookup((struct vop_lookup_args
*)ap
, B_TRUE
));
5280 #ifndef _SYS_SYSPROTO_H_
5281 struct vop_lookup_args
{
5282 struct vnode
*a_dvp
;
5283 struct vnode
**a_vpp
;
5284 struct componentname
*a_cnp
;
5289 zfs_cache_lookup(struct vop_lookup_args
*ap
)
5293 zfsvfs
= ap
->a_dvp
->v_mount
->mnt_data
;
5294 if (zfsvfs
->z_use_namecache
)
5295 return (vfs_cache_lookup(ap
));
5297 return (zfs_freebsd_lookup(ap
, B_FALSE
));
5300 #ifndef _SYS_SYSPROTO_H_
5301 struct vop_create_args
{
5302 struct vnode
*a_dvp
;
5303 struct vnode
**a_vpp
;
5304 struct componentname
*a_cnp
;
5305 struct vattr
*a_vap
;
5310 zfs_freebsd_create(struct vop_create_args
*ap
)
5313 struct componentname
*cnp
= ap
->a_cnp
;
5314 vattr_t
*vap
= ap
->a_vap
;
5318 ASSERT(cnp
->cn_flags
& SAVENAME
);
5320 vattr_init_mask(vap
);
5321 mode
= vap
->va_mode
& ALLPERMS
;
5322 zfsvfs
= ap
->a_dvp
->v_mount
->mnt_data
;
5325 rc
= zfs_create(VTOZ(ap
->a_dvp
), cnp
->cn_nameptr
, vap
, !EXCL
, mode
,
5326 &zp
, cnp
->cn_cred
, 0 /* flag */, NULL
/* vsecattr */);
5328 *ap
->a_vpp
= ZTOV(zp
);
5329 if (zfsvfs
->z_use_namecache
&&
5330 rc
== 0 && (cnp
->cn_flags
& MAKEENTRY
) != 0)
5331 cache_enter(ap
->a_dvp
, *ap
->a_vpp
, cnp
);
5336 #ifndef _SYS_SYSPROTO_H_
5337 struct vop_remove_args
{
5338 struct vnode
*a_dvp
;
5340 struct componentname
*a_cnp
;
5345 zfs_freebsd_remove(struct vop_remove_args
*ap
)
5348 ASSERT(ap
->a_cnp
->cn_flags
& SAVENAME
);
5350 return (zfs_remove_(ap
->a_dvp
, ap
->a_vp
, ap
->a_cnp
->cn_nameptr
,
5351 ap
->a_cnp
->cn_cred
));
5354 #ifndef _SYS_SYSPROTO_H_
5355 struct vop_mkdir_args
{
5356 struct vnode
*a_dvp
;
5357 struct vnode
**a_vpp
;
5358 struct componentname
*a_cnp
;
5359 struct vattr
*a_vap
;
5364 zfs_freebsd_mkdir(struct vop_mkdir_args
*ap
)
5366 vattr_t
*vap
= ap
->a_vap
;
5370 ASSERT(ap
->a_cnp
->cn_flags
& SAVENAME
);
5372 vattr_init_mask(vap
);
5375 rc
= zfs_mkdir(VTOZ(ap
->a_dvp
), ap
->a_cnp
->cn_nameptr
, vap
, &zp
,
5376 ap
->a_cnp
->cn_cred
, 0, NULL
);
5379 *ap
->a_vpp
= ZTOV(zp
);
5383 #ifndef _SYS_SYSPROTO_H_
5384 struct vop_rmdir_args
{
5385 struct vnode
*a_dvp
;
5387 struct componentname
*a_cnp
;
5392 zfs_freebsd_rmdir(struct vop_rmdir_args
*ap
)
5394 struct componentname
*cnp
= ap
->a_cnp
;
5396 ASSERT(cnp
->cn_flags
& SAVENAME
);
5398 return (zfs_rmdir_(ap
->a_dvp
, ap
->a_vp
, cnp
->cn_nameptr
, cnp
->cn_cred
));
5401 #ifndef _SYS_SYSPROTO_H_
5402 struct vop_readdir_args
{
5405 struct ucred
*a_cred
;
5408 ulong_t
**a_cookies
;
5413 zfs_freebsd_readdir(struct vop_readdir_args
*ap
)
5416 return (zfs_readdir(ap
->a_vp
, ap
->a_uio
, ap
->a_cred
, ap
->a_eofflag
,
5417 ap
->a_ncookies
, ap
->a_cookies
));
5420 #ifndef _SYS_SYSPROTO_H_
5421 struct vop_fsync_args
{
5424 struct thread
*a_td
;
5429 zfs_freebsd_fsync(struct vop_fsync_args
*ap
)
5433 return (zfs_fsync(ap
->a_vp
, 0, ap
->a_td
->td_ucred
, NULL
));
5436 #ifndef _SYS_SYSPROTO_H_
5437 struct vop_getattr_args
{
5439 struct vattr
*a_vap
;
5440 struct ucred
*a_cred
;
5445 zfs_freebsd_getattr(struct vop_getattr_args
*ap
)
5447 vattr_t
*vap
= ap
->a_vap
;
5453 xvap
.xva_vattr
= *vap
;
5454 xvap
.xva_vattr
.va_mask
|= AT_XVATTR
;
5456 /* Convert chflags into ZFS-type flags. */
5457 /* XXX: what about SF_SETTABLE?. */
5458 XVA_SET_REQ(&xvap
, XAT_IMMUTABLE
);
5459 XVA_SET_REQ(&xvap
, XAT_APPENDONLY
);
5460 XVA_SET_REQ(&xvap
, XAT_NOUNLINK
);
5461 XVA_SET_REQ(&xvap
, XAT_NODUMP
);
5462 XVA_SET_REQ(&xvap
, XAT_READONLY
);
5463 XVA_SET_REQ(&xvap
, XAT_ARCHIVE
);
5464 XVA_SET_REQ(&xvap
, XAT_SYSTEM
);
5465 XVA_SET_REQ(&xvap
, XAT_HIDDEN
);
5466 XVA_SET_REQ(&xvap
, XAT_REPARSE
);
5467 XVA_SET_REQ(&xvap
, XAT_OFFLINE
);
5468 XVA_SET_REQ(&xvap
, XAT_SPARSE
);
5470 error
= zfs_getattr(ap
->a_vp
, (vattr_t
*)&xvap
, 0, ap
->a_cred
);
5474 /* Convert ZFS xattr into chflags. */
5475 #define FLAG_CHECK(fflag, xflag, xfield) do { \
5476 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
5477 fflags |= (fflag); \
5479 FLAG_CHECK(SF_IMMUTABLE
, XAT_IMMUTABLE
,
5480 xvap
.xva_xoptattrs
.xoa_immutable
);
5481 FLAG_CHECK(SF_APPEND
, XAT_APPENDONLY
,
5482 xvap
.xva_xoptattrs
.xoa_appendonly
);
5483 FLAG_CHECK(SF_NOUNLINK
, XAT_NOUNLINK
,
5484 xvap
.xva_xoptattrs
.xoa_nounlink
);
5485 FLAG_CHECK(UF_ARCHIVE
, XAT_ARCHIVE
,
5486 xvap
.xva_xoptattrs
.xoa_archive
);
5487 FLAG_CHECK(UF_NODUMP
, XAT_NODUMP
,
5488 xvap
.xva_xoptattrs
.xoa_nodump
);
5489 FLAG_CHECK(UF_READONLY
, XAT_READONLY
,
5490 xvap
.xva_xoptattrs
.xoa_readonly
);
5491 FLAG_CHECK(UF_SYSTEM
, XAT_SYSTEM
,
5492 xvap
.xva_xoptattrs
.xoa_system
);
5493 FLAG_CHECK(UF_HIDDEN
, XAT_HIDDEN
,
5494 xvap
.xva_xoptattrs
.xoa_hidden
);
5495 FLAG_CHECK(UF_REPARSE
, XAT_REPARSE
,
5496 xvap
.xva_xoptattrs
.xoa_reparse
);
5497 FLAG_CHECK(UF_OFFLINE
, XAT_OFFLINE
,
5498 xvap
.xva_xoptattrs
.xoa_offline
);
5499 FLAG_CHECK(UF_SPARSE
, XAT_SPARSE
,
5500 xvap
.xva_xoptattrs
.xoa_sparse
);
5503 *vap
= xvap
.xva_vattr
;
5504 vap
->va_flags
= fflags
;
5508 #ifndef _SYS_SYSPROTO_H_
5509 struct vop_setattr_args
{
5511 struct vattr
*a_vap
;
5512 struct ucred
*a_cred
;
5517 zfs_freebsd_setattr(struct vop_setattr_args
*ap
)
5519 vnode_t
*vp
= ap
->a_vp
;
5520 vattr_t
*vap
= ap
->a_vap
;
5521 cred_t
*cred
= ap
->a_cred
;
5526 vattr_init_mask(vap
);
5527 vap
->va_mask
&= ~AT_NOSET
;
5530 xvap
.xva_vattr
= *vap
;
5532 zflags
= VTOZ(vp
)->z_pflags
;
5534 if (vap
->va_flags
!= VNOVAL
) {
5535 zfsvfs_t
*zfsvfs
= VTOZ(vp
)->z_zfsvfs
;
5538 if (zfsvfs
->z_use_fuids
== B_FALSE
)
5539 return (EOPNOTSUPP
);
5541 fflags
= vap
->va_flags
;
5544 * We need to figure out whether it makes sense to allow
5545 * UF_REPARSE through, since we don't really have other
5546 * facilities to handle reparse points and zfs_setattr()
5547 * doesn't currently allow setting that attribute anyway.
5549 if ((fflags
& ~(SF_IMMUTABLE
|SF_APPEND
|SF_NOUNLINK
|UF_ARCHIVE
|
5550 UF_NODUMP
|UF_SYSTEM
|UF_HIDDEN
|UF_READONLY
|UF_REPARSE
|
5551 UF_OFFLINE
|UF_SPARSE
)) != 0)
5552 return (EOPNOTSUPP
);
5554 * Unprivileged processes are not permitted to unset system
5555 * flags, or modify flags if any system flags are set.
5556 * Privileged non-jail processes may not modify system flags
5557 * if securelevel > 0 and any existing system flags are set.
5558 * Privileged jail processes behave like privileged non-jail
5559 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
5560 * otherwise, they behave like unprivileged processes.
5562 if (secpolicy_fs_owner(vp
->v_mount
, cred
) == 0 ||
5563 spl_priv_check_cred(cred
, PRIV_VFS_SYSFLAGS
) == 0) {
5565 (ZFS_IMMUTABLE
| ZFS_APPENDONLY
| ZFS_NOUNLINK
)) {
5566 error
= securelevel_gt(cred
, 0);
5572 * Callers may only modify the file flags on
5573 * objects they have VADMIN rights for.
5575 if ((error
= VOP_ACCESS(vp
, VADMIN
, cred
,
5579 (ZFS_IMMUTABLE
| ZFS_APPENDONLY
|
5584 (SF_IMMUTABLE
| SF_APPEND
| SF_NOUNLINK
)) {
5589 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
5590 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
5591 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
5592 XVA_SET_REQ(&xvap, (xflag)); \
5593 (xfield) = ((fflags & (fflag)) != 0); \
5596 /* Convert chflags into ZFS-type flags. */
5597 /* XXX: what about SF_SETTABLE?. */
5598 FLAG_CHANGE(SF_IMMUTABLE
, ZFS_IMMUTABLE
, XAT_IMMUTABLE
,
5599 xvap
.xva_xoptattrs
.xoa_immutable
);
5600 FLAG_CHANGE(SF_APPEND
, ZFS_APPENDONLY
, XAT_APPENDONLY
,
5601 xvap
.xva_xoptattrs
.xoa_appendonly
);
5602 FLAG_CHANGE(SF_NOUNLINK
, ZFS_NOUNLINK
, XAT_NOUNLINK
,
5603 xvap
.xva_xoptattrs
.xoa_nounlink
);
5604 FLAG_CHANGE(UF_ARCHIVE
, ZFS_ARCHIVE
, XAT_ARCHIVE
,
5605 xvap
.xva_xoptattrs
.xoa_archive
);
5606 FLAG_CHANGE(UF_NODUMP
, ZFS_NODUMP
, XAT_NODUMP
,
5607 xvap
.xva_xoptattrs
.xoa_nodump
);
5608 FLAG_CHANGE(UF_READONLY
, ZFS_READONLY
, XAT_READONLY
,
5609 xvap
.xva_xoptattrs
.xoa_readonly
);
5610 FLAG_CHANGE(UF_SYSTEM
, ZFS_SYSTEM
, XAT_SYSTEM
,
5611 xvap
.xva_xoptattrs
.xoa_system
);
5612 FLAG_CHANGE(UF_HIDDEN
, ZFS_HIDDEN
, XAT_HIDDEN
,
5613 xvap
.xva_xoptattrs
.xoa_hidden
);
5614 FLAG_CHANGE(UF_REPARSE
, ZFS_REPARSE
, XAT_REPARSE
,
5615 xvap
.xva_xoptattrs
.xoa_reparse
);
5616 FLAG_CHANGE(UF_OFFLINE
, ZFS_OFFLINE
, XAT_OFFLINE
,
5617 xvap
.xva_xoptattrs
.xoa_offline
);
5618 FLAG_CHANGE(UF_SPARSE
, ZFS_SPARSE
, XAT_SPARSE
,
5619 xvap
.xva_xoptattrs
.xoa_sparse
);
5622 if (vap
->va_birthtime
.tv_sec
!= VNOVAL
) {
5623 xvap
.xva_vattr
.va_mask
|= AT_XVATTR
;
5624 XVA_SET_REQ(&xvap
, XAT_CREATETIME
);
5626 return (zfs_setattr(VTOZ(vp
), (vattr_t
*)&xvap
, 0, cred
));
5629 #ifndef _SYS_SYSPROTO_H_
5630 struct vop_rename_args
{
5631 struct vnode
*a_fdvp
;
5632 struct vnode
*a_fvp
;
5633 struct componentname
*a_fcnp
;
5634 struct vnode
*a_tdvp
;
5635 struct vnode
*a_tvp
;
5636 struct componentname
*a_tcnp
;
5641 zfs_freebsd_rename(struct vop_rename_args
*ap
)
5643 vnode_t
*fdvp
= ap
->a_fdvp
;
5644 vnode_t
*fvp
= ap
->a_fvp
;
5645 vnode_t
*tdvp
= ap
->a_tdvp
;
5646 vnode_t
*tvp
= ap
->a_tvp
;
5649 ASSERT(ap
->a_fcnp
->cn_flags
& (SAVENAME
|SAVESTART
));
5650 ASSERT(ap
->a_tcnp
->cn_flags
& (SAVENAME
|SAVESTART
));
5652 error
= zfs_rename_(fdvp
, &fvp
, ap
->a_fcnp
, tdvp
, &tvp
,
5653 ap
->a_tcnp
, ap
->a_fcnp
->cn_cred
, 1);
5664 #ifndef _SYS_SYSPROTO_H_
5665 struct vop_symlink_args
{
5666 struct vnode
*a_dvp
;
5667 struct vnode
**a_vpp
;
5668 struct componentname
*a_cnp
;
5669 struct vattr
*a_vap
;
5675 zfs_freebsd_symlink(struct vop_symlink_args
*ap
)
5677 struct componentname
*cnp
= ap
->a_cnp
;
5678 vattr_t
*vap
= ap
->a_vap
;
5682 ASSERT(cnp
->cn_flags
& SAVENAME
);
5684 vap
->va_type
= VLNK
; /* FreeBSD: Syscall only sets va_mode. */
5685 vattr_init_mask(vap
);
5688 rc
= zfs_symlink(VTOZ(ap
->a_dvp
), cnp
->cn_nameptr
, vap
,
5689 ap
->a_target
, &zp
, cnp
->cn_cred
, 0 /* flags */);
5691 *ap
->a_vpp
= ZTOV(zp
);
5695 #ifndef _SYS_SYSPROTO_H_
5696 struct vop_readlink_args
{
5699 struct ucred
*a_cred
;
5704 zfs_freebsd_readlink(struct vop_readlink_args
*ap
)
5707 return (zfs_readlink(ap
->a_vp
, ap
->a_uio
, ap
->a_cred
, NULL
));
5710 #ifndef _SYS_SYSPROTO_H_
5711 struct vop_link_args
{
5712 struct vnode
*a_tdvp
;
5714 struct componentname
*a_cnp
;
5719 zfs_freebsd_link(struct vop_link_args
*ap
)
5721 struct componentname
*cnp
= ap
->a_cnp
;
5722 vnode_t
*vp
= ap
->a_vp
;
5723 vnode_t
*tdvp
= ap
->a_tdvp
;
5725 if (tdvp
->v_mount
!= vp
->v_mount
)
5728 ASSERT(cnp
->cn_flags
& SAVENAME
);
5730 return (zfs_link(VTOZ(tdvp
), VTOZ(vp
),
5731 cnp
->cn_nameptr
, cnp
->cn_cred
, 0));
5734 #ifndef _SYS_SYSPROTO_H_
5735 struct vop_inactive_args
{
5737 struct thread
*a_td
;
5742 zfs_freebsd_inactive(struct vop_inactive_args
*ap
)
5744 vnode_t
*vp
= ap
->a_vp
;
5746 zfs_inactive(vp
, ap
->a_td
->td_ucred
, NULL
);
5750 #if __FreeBSD_version >= 1300042
5751 #ifndef _SYS_SYSPROTO_H_
5752 struct vop_need_inactive_args
{
5754 struct thread
*a_td
;
5759 zfs_freebsd_need_inactive(struct vop_need_inactive_args
*ap
)
5761 vnode_t
*vp
= ap
->a_vp
;
5762 znode_t
*zp
= VTOZ(vp
);
5763 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
5766 if (vn_need_pageq_flush(vp
))
5769 if (!rw_tryenter(&zfsvfs
->z_teardown_inactive_lock
, RW_READER
))
5771 need
= (zp
->z_sa_hdl
== NULL
|| zp
->z_unlinked
|| zp
->z_atime_dirty
);
5772 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
5778 #ifndef _SYS_SYSPROTO_H_
5779 struct vop_reclaim_args
{
5781 struct thread
*a_td
;
5786 zfs_freebsd_reclaim(struct vop_reclaim_args
*ap
)
5788 vnode_t
*vp
= ap
->a_vp
;
5789 znode_t
*zp
= VTOZ(vp
);
5790 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
5794 #if __FreeBSD_version < 1300042
5795 /* Destroy the vm object and flush associated pages. */
5796 vnode_destroy_vobject(vp
);
5799 * z_teardown_inactive_lock protects from a race with
5800 * zfs_znode_dmu_fini in zfsvfs_teardown during
5803 rw_enter(&zfsvfs
->z_teardown_inactive_lock
, RW_READER
);
5804 if (zp
->z_sa_hdl
== NULL
)
5808 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
5814 #ifndef _SYS_SYSPROTO_H_
5815 struct vop_fid_args
{
5822 zfs_freebsd_fid(struct vop_fid_args
*ap
)
5825 return (zfs_fid(ap
->a_vp
, (void *)ap
->a_fid
, NULL
));
5829 #ifndef _SYS_SYSPROTO_H_
5830 struct vop_pathconf_args
{
5833 register_t
*a_retval
;
5838 zfs_freebsd_pathconf(struct vop_pathconf_args
*ap
)
5843 error
= zfs_pathconf(ap
->a_vp
, ap
->a_name
, &val
,
5844 curthread
->td_ucred
, NULL
);
5846 *ap
->a_retval
= val
;
5849 if (error
!= EOPNOTSUPP
)
5852 switch (ap
->a_name
) {
5854 *ap
->a_retval
= NAME_MAX
;
5857 if (ap
->a_vp
->v_type
== VDIR
|| ap
->a_vp
->v_type
== VFIFO
) {
5858 *ap
->a_retval
= PIPE_BUF
;
5863 return (vop_stdpathconf(ap
));
5868 * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5869 * extended attribute name:
5872 * system freebsd:system:
5873 * user (none, can be used to access ZFS fsattr(5) attributes
5874 * created on Solaris)
5877 zfs_create_attrname(int attrnamespace
, const char *name
, char *attrname
,
5880 const char *namespace, *prefix
, *suffix
;
5882 /* We don't allow '/' character in attribute name. */
5883 if (strchr(name
, '/') != NULL
)
5885 /* We don't allow attribute names that start with "freebsd:" string. */
5886 if (strncmp(name
, "freebsd:", 8) == 0)
5889 bzero(attrname
, size
);
5891 switch (attrnamespace
) {
5892 case EXTATTR_NAMESPACE_USER
:
5894 prefix
= "freebsd:";
5895 namespace = EXTATTR_NAMESPACE_USER_STRING
;
5899 * This is the default namespace by which we can access all
5900 * attributes created on Solaris.
5902 prefix
= namespace = suffix
= "";
5905 case EXTATTR_NAMESPACE_SYSTEM
:
5906 prefix
= "freebsd:";
5907 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING
;
5910 case EXTATTR_NAMESPACE_EMPTY
:
5914 if (snprintf(attrname
, size
, "%s%s%s%s", prefix
, namespace, suffix
,
5916 return (ENAMETOOLONG
);
5921 #ifndef _SYS_SYSPROTO_H_
5922 struct vop_getextattr
{
5923 IN
struct vnode
*a_vp
;
5924 IN
int a_attrnamespace
;
5925 IN
const char *a_name
;
5926 INOUT
struct uio
*a_uio
;
5928 IN
struct ucred
*a_cred
;
5929 IN
struct thread
*a_td
;
5934 * Vnode operating to retrieve a named extended attribute.
5937 zfs_getextattr(struct vop_getextattr_args
*ap
)
5939 zfsvfs_t
*zfsvfs
= VTOZ(ap
->a_vp
)->z_zfsvfs
;
5940 struct thread
*td
= ap
->a_td
;
5941 struct nameidata nd
;
5944 vnode_t
*xvp
= NULL
, *vp
;
5948 * If the xattr property is off, refuse the request.
5950 if (!(zfsvfs
->z_flags
& ZSB_XATTR
)) {
5951 return (SET_ERROR(EOPNOTSUPP
));
5954 error
= extattr_check_cred(ap
->a_vp
, ap
->a_attrnamespace
,
5955 ap
->a_cred
, ap
->a_td
, VREAD
);
5959 error
= zfs_create_attrname(ap
->a_attrnamespace
, ap
->a_name
, attrname
,
5966 error
= zfs_lookup(ap
->a_vp
, NULL
, &xvp
, NULL
, 0, ap
->a_cred
, td
,
5967 LOOKUP_XATTR
, B_FALSE
);
5974 NDINIT_ATVP(&nd
, LOOKUP
, NOFOLLOW
, UIO_SYSSPACE
, attrname
,
5976 error
= vn_open_cred(&nd
, &flags
, 0, VN_OPEN_INVFS
, ap
->a_cred
, NULL
);
5978 NDFREE(&nd
, NDF_ONLY_PNBUF
);
5981 if (error
== ENOENT
)
5986 if (ap
->a_size
!= NULL
) {
5987 error
= VOP_GETATTR(vp
, &va
, ap
->a_cred
);
5989 *ap
->a_size
= (size_t)va
.va_size
;
5990 } else if (ap
->a_uio
!= NULL
)
5991 error
= VOP_READ(vp
, ap
->a_uio
, IO_UNIT
, ap
->a_cred
);
5994 vn_close(vp
, flags
, ap
->a_cred
, td
);
5999 #ifndef _SYS_SYSPROTO_H_
6000 struct vop_deleteextattr
{
6001 IN
struct vnode
*a_vp
;
6002 IN
int a_attrnamespace
;
6003 IN
const char *a_name
;
6004 IN
struct ucred
*a_cred
;
6005 IN
struct thread
*a_td
;
6010 * Vnode operation to remove a named attribute.
6013 zfs_deleteextattr(struct vop_deleteextattr_args
*ap
)
6015 zfsvfs_t
*zfsvfs
= VTOZ(ap
->a_vp
)->z_zfsvfs
;
6016 struct thread
*td
= ap
->a_td
;
6017 struct nameidata nd
;
6019 vnode_t
*xvp
= NULL
, *vp
;
6023 * If the xattr property is off, refuse the request.
6025 if (!(zfsvfs
->z_flags
& ZSB_XATTR
)) {
6026 return (SET_ERROR(EOPNOTSUPP
));
6029 error
= extattr_check_cred(ap
->a_vp
, ap
->a_attrnamespace
,
6030 ap
->a_cred
, ap
->a_td
, VWRITE
);
6034 error
= zfs_create_attrname(ap
->a_attrnamespace
, ap
->a_name
, attrname
,
6041 error
= zfs_lookup(ap
->a_vp
, NULL
, &xvp
, NULL
, 0, ap
->a_cred
, td
,
6042 LOOKUP_XATTR
, B_FALSE
);
6048 NDINIT_ATVP(&nd
, DELETE
, NOFOLLOW
| LOCKPARENT
| LOCKLEAF
,
6049 UIO_SYSSPACE
, attrname
, xvp
, td
);
6054 NDFREE(&nd
, NDF_ONLY_PNBUF
);
6055 if (error
== ENOENT
)
6060 error
= VOP_REMOVE(nd
.ni_dvp
, vp
, &nd
.ni_cnd
);
6061 NDFREE(&nd
, NDF_ONLY_PNBUF
);
6064 if (vp
== nd
.ni_dvp
)
6073 #ifndef _SYS_SYSPROTO_H_
6074 struct vop_setextattr
{
6075 IN
struct vnode
*a_vp
;
6076 IN
int a_attrnamespace
;
6077 IN
const char *a_name
;
6078 INOUT
struct uio
*a_uio
;
6079 IN
struct ucred
*a_cred
;
6080 IN
struct thread
*a_td
;
6085 * Vnode operation to set a named attribute.
6088 zfs_setextattr(struct vop_setextattr_args
*ap
)
6090 zfsvfs_t
*zfsvfs
= VTOZ(ap
->a_vp
)->z_zfsvfs
;
6091 struct thread
*td
= ap
->a_td
;
6092 struct nameidata nd
;
6095 vnode_t
*xvp
= NULL
, *vp
;
6099 * If the xattr property is off, refuse the request.
6101 if (!(zfsvfs
->z_flags
& ZSB_XATTR
)) {
6102 return (SET_ERROR(EOPNOTSUPP
));
6105 error
= extattr_check_cred(ap
->a_vp
, ap
->a_attrnamespace
,
6106 ap
->a_cred
, ap
->a_td
, VWRITE
);
6109 error
= zfs_create_attrname(ap
->a_attrnamespace
, ap
->a_name
, attrname
,
6116 error
= zfs_lookup(ap
->a_vp
, NULL
, &xvp
, NULL
, 0, ap
->a_cred
, td
,
6117 LOOKUP_XATTR
| CREATE_XATTR_DIR
, B_FALSE
);
6123 flags
= FFLAGS(O_WRONLY
| O_CREAT
);
6124 NDINIT_ATVP(&nd
, LOOKUP
, NOFOLLOW
, UIO_SYSSPACE
, attrname
,
6126 error
= vn_open_cred(&nd
, &flags
, 0600, VN_OPEN_INVFS
, ap
->a_cred
,
6129 NDFREE(&nd
, NDF_ONLY_PNBUF
);
6137 error
= VOP_SETATTR(vp
, &va
, ap
->a_cred
);
6139 VOP_WRITE(vp
, ap
->a_uio
, IO_UNIT
, ap
->a_cred
);
6142 vn_close(vp
, flags
, ap
->a_cred
, td
);
6147 #ifndef _SYS_SYSPROTO_H_
6148 struct vop_listextattr
{
6149 IN
struct vnode
*a_vp
;
6150 IN
int a_attrnamespace
;
6151 INOUT
struct uio
*a_uio
;
6153 IN
struct ucred
*a_cred
;
6154 IN
struct thread
*a_td
;
6159 * Vnode operation to retrieve extended attributes on a vnode.
6162 zfs_listextattr(struct vop_listextattr_args
*ap
)
6164 zfsvfs_t
*zfsvfs
= VTOZ(ap
->a_vp
)->z_zfsvfs
;
6165 struct thread
*td
= ap
->a_td
;
6166 struct nameidata nd
;
6167 char attrprefix
[16];
6168 uint8_t dirbuf
[sizeof (struct dirent
)];
6171 struct uio auio
, *uio
= ap
->a_uio
;
6172 size_t *sizep
= ap
->a_size
;
6174 vnode_t
*xvp
= NULL
, *vp
;
6175 int done
, error
, eof
, pos
;
6178 * If the xattr property is off, refuse the request.
6180 if (!(zfsvfs
->z_flags
& ZSB_XATTR
)) {
6181 return (SET_ERROR(EOPNOTSUPP
));
6184 error
= extattr_check_cred(ap
->a_vp
, ap
->a_attrnamespace
,
6185 ap
->a_cred
, ap
->a_td
, VREAD
);
6189 error
= zfs_create_attrname(ap
->a_attrnamespace
, "", attrprefix
,
6190 sizeof (attrprefix
));
6193 plen
= strlen(attrprefix
);
6200 error
= zfs_lookup(ap
->a_vp
, NULL
, &xvp
, NULL
, 0, ap
->a_cred
, td
,
6201 LOOKUP_XATTR
, B_FALSE
);
6205 * ENOATTR means that the EA directory does not yet exist,
6206 * i.e. there are no extended attributes there.
6208 if (error
== ENOATTR
)
6213 NDINIT_ATVP(&nd
, LOOKUP
, NOFOLLOW
| LOCKLEAF
| LOCKSHARED
,
6214 UIO_SYSSPACE
, ".", xvp
, td
);
6217 NDFREE(&nd
, NDF_ONLY_PNBUF
);
6223 auio
.uio_iov
= &aiov
;
6224 auio
.uio_iovcnt
= 1;
6225 auio
.uio_segflg
= UIO_SYSSPACE
;
6227 auio
.uio_rw
= UIO_READ
;
6228 auio
.uio_offset
= 0;
6233 aiov
.iov_base
= (void *)dirbuf
;
6234 aiov
.iov_len
= sizeof (dirbuf
);
6235 auio
.uio_resid
= sizeof (dirbuf
);
6236 error
= VOP_READDIR(vp
, &auio
, ap
->a_cred
, &eof
, NULL
, NULL
);
6237 done
= sizeof (dirbuf
) - auio
.uio_resid
;
6240 for (pos
= 0; pos
< done
; ) {
6241 dp
= (struct dirent
*)(dirbuf
+ pos
);
6242 pos
+= dp
->d_reclen
;
6244 * XXX: Temporarily we also accept DT_UNKNOWN, as this
6245 * is what we get when attribute was created on Solaris.
6247 if (dp
->d_type
!= DT_REG
&& dp
->d_type
!= DT_UNKNOWN
)
6250 strncmp(dp
->d_name
, "freebsd:", 8) == 0)
6252 else if (strncmp(dp
->d_name
, attrprefix
, plen
) != 0)
6254 nlen
= dp
->d_namlen
- plen
;
6257 else if (uio
!= NULL
) {
6259 * Format of extattr name entry is one byte for
6260 * length and the rest for name.
6262 error
= uiomove(&nlen
, 1, uio
->uio_rw
, uio
);
6264 error
= uiomove(dp
->d_name
+ plen
, nlen
,
6271 } while (!eof
&& error
== 0);
6279 #ifndef _SYS_SYSPROTO_H_
6280 struct vop_getacl_args
{
6290 zfs_freebsd_getacl(struct vop_getacl_args
*ap
)
6293 vsecattr_t vsecattr
;
6295 if (ap
->a_type
!= ACL_TYPE_NFS4
)
6298 vsecattr
.vsa_mask
= VSA_ACE
| VSA_ACECNT
;
6299 if ((error
= zfs_getsecattr(ap
->a_vp
, &vsecattr
, 0, ap
->a_cred
, NULL
)))
6302 error
= acl_from_aces(ap
->a_aclp
, vsecattr
.vsa_aclentp
,
6303 vsecattr
.vsa_aclcnt
);
6304 if (vsecattr
.vsa_aclentp
!= NULL
)
6305 kmem_free(vsecattr
.vsa_aclentp
, vsecattr
.vsa_aclentsz
);
6310 #ifndef _SYS_SYSPROTO_H_
6311 struct vop_setacl_args
{
6321 zfs_freebsd_setacl(struct vop_setacl_args
*ap
)
6324 vsecattr_t vsecattr
;
6325 int aclbsize
; /* size of acl list in bytes */
6328 if (ap
->a_type
!= ACL_TYPE_NFS4
)
6331 if (ap
->a_aclp
== NULL
)
6334 if (ap
->a_aclp
->acl_cnt
< 1 || ap
->a_aclp
->acl_cnt
> MAX_ACL_ENTRIES
)
6338 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
6339 * splitting every entry into two and appending "canonical six"
6340 * entries at the end. Don't allow for setting an ACL that would
6341 * cause chmod(2) to run out of ACL entries.
6343 if (ap
->a_aclp
->acl_cnt
* 2 + 6 > ACL_MAX_ENTRIES
)
6346 error
= acl_nfs4_check(ap
->a_aclp
, ap
->a_vp
->v_type
== VDIR
);
6350 vsecattr
.vsa_mask
= VSA_ACE
;
6351 aclbsize
= ap
->a_aclp
->acl_cnt
* sizeof (ace_t
);
6352 vsecattr
.vsa_aclentp
= kmem_alloc(aclbsize
, KM_SLEEP
);
6353 aaclp
= vsecattr
.vsa_aclentp
;
6354 vsecattr
.vsa_aclentsz
= aclbsize
;
6356 aces_from_acl(vsecattr
.vsa_aclentp
, &vsecattr
.vsa_aclcnt
, ap
->a_aclp
);
6357 error
= zfs_setsecattr(VTOZ(ap
->a_vp
), &vsecattr
, 0, ap
->a_cred
);
6358 kmem_free(aaclp
, aclbsize
);
6363 #ifndef _SYS_SYSPROTO_H_
6364 struct vop_aclcheck_args
{
6374 zfs_freebsd_aclcheck(struct vop_aclcheck_args
*ap
)
6377 return (EOPNOTSUPP
);
6381 zfs_vptocnp(struct vop_vptocnp_args
*ap
)
6383 vnode_t
*covered_vp
;
6384 vnode_t
*vp
= ap
->a_vp
;
6385 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
6386 znode_t
*zp
= VTOZ(vp
);
6394 * If we are a snapshot mounted under .zfs, run the operation
6395 * on the covered vnode.
6397 if (zp
->z_id
!= zfsvfs
->z_root
|| zfsvfs
->z_parent
== zfsvfs
) {
6398 char name
[MAXNAMLEN
+ 1];
6402 error
= zfs_znode_parent_and_name(zp
, &dzp
, name
);
6405 if (*ap
->a_buflen
< len
)
6406 error
= SET_ERROR(ENOMEM
);
6409 *ap
->a_buflen
-= len
;
6410 bcopy(name
, ap
->a_buf
+ *ap
->a_buflen
, len
);
6411 *ap
->a_vpp
= ZTOV(dzp
);
6418 covered_vp
= vp
->v_mount
->mnt_vnodecovered
;
6419 #if __FreeBSD_version >= 1300045
6420 enum vgetstate vs
= vget_prep(covered_vp
);
6424 ltype
= VOP_ISLOCKED(vp
);
6426 #if __FreeBSD_version >= 1300045
6427 error
= vget_finish(covered_vp
, LK_SHARED
, vs
);
6429 error
= vget(covered_vp
, LK_SHARED
| LK_VNHELD
, curthread
);
6432 error
= VOP_VPTOCNP(covered_vp
, ap
->a_vpp
, ap
->a_cred
,
6433 ap
->a_buf
, ap
->a_buflen
);
6436 vn_lock(vp
, ltype
| LK_RETRY
);
6437 if (VN_IS_DOOMED(vp
))
6438 error
= SET_ERROR(ENOENT
);
6443 #ifndef _SYS_SYSPROTO_H_
6444 struct vop_lock1_args
{
6453 zfs_lock(struct vop_lock1_args
*ap
)
6459 #if __FreeBSD_version >= 1300064
6462 err
= vop_stdlock(ap
);
6464 if (err
== 0 && (ap
->a_flags
& LK_NOWAIT
) == 0) {
6467 if (vp
->v_mount
!= NULL
&& !VN_IS_DOOMED(vp
) &&
6468 zp
!= NULL
&& (zp
->z_pflags
& ZFS_XATTR
) == 0)
6469 VERIFY(!RRM_LOCK_HELD(&zp
->z_zfsvfs
->z_teardown_lock
));
6475 struct vop_vector zfs_vnodeops
;
6476 struct vop_vector zfs_fifoops
;
6477 struct vop_vector zfs_shareops
;
6479 struct vop_vector zfs_vnodeops
= {
6480 .vop_default
= &default_vnodeops
,
6481 .vop_inactive
= zfs_freebsd_inactive
,
6482 #if __FreeBSD_version >= 1300042
6483 .vop_need_inactive
= zfs_freebsd_need_inactive
,
6485 .vop_reclaim
= zfs_freebsd_reclaim
,
6486 .vop_access
= zfs_freebsd_access
,
6487 .vop_allocate
= VOP_EINVAL
,
6488 .vop_lookup
= zfs_cache_lookup
,
6489 .vop_cachedlookup
= zfs_freebsd_cachedlookup
,
6490 .vop_getattr
= zfs_freebsd_getattr
,
6491 .vop_setattr
= zfs_freebsd_setattr
,
6492 .vop_create
= zfs_freebsd_create
,
6493 .vop_mknod
= (vop_mknod_t
*)zfs_freebsd_create
,
6494 .vop_mkdir
= zfs_freebsd_mkdir
,
6495 .vop_readdir
= zfs_freebsd_readdir
,
6496 .vop_fsync
= zfs_freebsd_fsync
,
6497 .vop_open
= zfs_freebsd_open
,
6498 .vop_close
= zfs_freebsd_close
,
6499 .vop_rmdir
= zfs_freebsd_rmdir
,
6500 .vop_ioctl
= zfs_freebsd_ioctl
,
6501 .vop_link
= zfs_freebsd_link
,
6502 .vop_symlink
= zfs_freebsd_symlink
,
6503 .vop_readlink
= zfs_freebsd_readlink
,
6504 .vop_read
= zfs_freebsd_read
,
6505 .vop_write
= zfs_freebsd_write
,
6506 .vop_remove
= zfs_freebsd_remove
,
6507 .vop_rename
= zfs_freebsd_rename
,
6508 .vop_pathconf
= zfs_freebsd_pathconf
,
6509 .vop_bmap
= zfs_freebsd_bmap
,
6510 .vop_fid
= zfs_freebsd_fid
,
6511 .vop_getextattr
= zfs_getextattr
,
6512 .vop_deleteextattr
= zfs_deleteextattr
,
6513 .vop_setextattr
= zfs_setextattr
,
6514 .vop_listextattr
= zfs_listextattr
,
6515 .vop_getacl
= zfs_freebsd_getacl
,
6516 .vop_setacl
= zfs_freebsd_setacl
,
6517 .vop_aclcheck
= zfs_freebsd_aclcheck
,
6518 .vop_getpages
= zfs_freebsd_getpages
,
6519 .vop_putpages
= zfs_freebsd_putpages
,
6520 .vop_vptocnp
= zfs_vptocnp
,
6521 #if __FreeBSD_version >= 1300064
6523 .vop_lock1
= zfs_lock
,
6525 .vop_lock1
= vop_lock
,
6527 .vop_unlock
= vop_unlock
,
6528 .vop_islocked
= vop_islocked
,
6531 .vop_lock1
= zfs_lock
,
6535 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops
);
6537 struct vop_vector zfs_fifoops
= {
6538 .vop_default
= &fifo_specops
,
6539 .vop_fsync
= zfs_freebsd_fsync
,
6540 .vop_access
= zfs_freebsd_access
,
6541 .vop_getattr
= zfs_freebsd_getattr
,
6542 .vop_inactive
= zfs_freebsd_inactive
,
6543 .vop_read
= VOP_PANIC
,
6544 .vop_reclaim
= zfs_freebsd_reclaim
,
6545 .vop_setattr
= zfs_freebsd_setattr
,
6546 .vop_write
= VOP_PANIC
,
6547 .vop_pathconf
= zfs_freebsd_pathconf
,
6548 .vop_fid
= zfs_freebsd_fid
,
6549 .vop_getacl
= zfs_freebsd_getacl
,
6550 .vop_setacl
= zfs_freebsd_setacl
,
6551 .vop_aclcheck
= zfs_freebsd_aclcheck
,
6553 VFS_VOP_VECTOR_REGISTER(zfs_fifoops
);
6556 * special share hidden files vnode operations template
6558 struct vop_vector zfs_shareops
= {
6559 .vop_default
= &default_vnodeops
,
6560 .vop_access
= zfs_freebsd_access
,
6561 .vop_inactive
= zfs_freebsd_inactive
,
6562 .vop_reclaim
= zfs_freebsd_reclaim
,
6563 .vop_fid
= zfs_freebsd_fid
,
6564 .vop_pathconf
= zfs_freebsd_pathconf
,
6566 VFS_VOP_VECTOR_REGISTER(zfs_shareops
);