]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
d164b209 | 22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
34dc7c2f BB |
23 | * Use is subject to license terms. |
24 | */ | |
25 | ||
26 | /* Portions Copyright 2007 Jeremy Teo */ | |
27 | ||
34dc7c2f BB |
28 | #include <sys/types.h> |
29 | #include <sys/param.h> | |
30 | #include <sys/time.h> | |
31 | #include <sys/systm.h> | |
32 | #include <sys/sysmacros.h> | |
33 | #include <sys/resource.h> | |
34 | #include <sys/vfs.h> | |
35 | #include <sys/vfs_opreg.h> | |
36 | #include <sys/vnode.h> | |
37 | #include <sys/file.h> | |
38 | #include <sys/stat.h> | |
39 | #include <sys/kmem.h> | |
40 | #include <sys/taskq.h> | |
41 | #include <sys/uio.h> | |
42 | #include <sys/vmsystm.h> | |
43 | #include <sys/atomic.h> | |
44 | #include <sys/vm.h> | |
45 | #include <vm/seg_vn.h> | |
46 | #include <vm/pvn.h> | |
47 | #include <vm/as.h> | |
b128c09f BB |
48 | #include <vm/kpm.h> |
49 | #include <vm/seg_kpm.h> | |
34dc7c2f BB |
50 | #include <sys/mman.h> |
51 | #include <sys/pathname.h> | |
52 | #include <sys/cmn_err.h> | |
53 | #include <sys/errno.h> | |
54 | #include <sys/unistd.h> | |
55 | #include <sys/zfs_dir.h> | |
56 | #include <sys/zfs_acl.h> | |
57 | #include <sys/zfs_ioctl.h> | |
58 | #include <sys/fs/zfs.h> | |
59 | #include <sys/dmu.h> | |
60 | #include <sys/spa.h> | |
61 | #include <sys/txg.h> | |
62 | #include <sys/dbuf.h> | |
63 | #include <sys/zap.h> | |
64 | #include <sys/dirent.h> | |
65 | #include <sys/policy.h> | |
66 | #include <sys/sunddi.h> | |
67 | #include <sys/filio.h> | |
b128c09f | 68 | #include <sys/sid.h> |
34dc7c2f BB |
69 | #include "fs/fs_subr.h" |
70 | #include <sys/zfs_ctldir.h> | |
71 | #include <sys/zfs_fuid.h> | |
72 | #include <sys/dnlc.h> | |
73 | #include <sys/zfs_rlock.h> | |
74 | #include <sys/extdirent.h> | |
75 | #include <sys/kidmap.h> | |
76 | #include <sys/cred_impl.h> | |
77 | #include <sys/attr.h> | |
78 | ||
79 | /* | |
80 | * Programming rules. | |
81 | * | |
82 | * Each vnode op performs some logical unit of work. To do this, the ZPL must | |
83 | * properly lock its in-core state, create a DMU transaction, do the work, | |
84 | * record this work in the intent log (ZIL), commit the DMU transaction, | |
85 | * and wait for the intent log to commit if it is a synchronous operation. | |
86 | * Moreover, the vnode ops must work in both normal and log replay context. | |
87 | * The ordering of events is important to avoid deadlocks and references | |
88 | * to freed memory. The example below illustrates the following Big Rules: | |
89 | * | |
90 | * (1) A check must be made in each zfs thread for a mounted file system. | |
91 | * This is done avoiding races using ZFS_ENTER(zfsvfs). | |
92 | * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes | |
93 | * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros | |
94 | * can return EIO from the calling function. | |
95 | * | |
96 | * (2) VN_RELE() should always be the last thing except for zil_commit() | |
97 | * (if necessary) and ZFS_EXIT(). This is for 3 reasons: | |
98 | * First, if it's the last reference, the vnode/znode | |
99 | * can be freed, so the zp may point to freed memory. Second, the last | |
100 | * reference will call zfs_zinactive(), which may induce a lot of work -- | |
101 | * pushing cached pages (which acquires range locks) and syncing out | |
102 | * cached atime changes. Third, zfs_zinactive() may require a new tx, | |
103 | * which could deadlock the system if you were already holding one. | |
9babb374 | 104 | * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). |
34dc7c2f BB |
105 | * |
106 | * (3) All range locks must be grabbed before calling dmu_tx_assign(), | |
107 | * as they can span dmu_tx_assign() calls. | |
108 | * | |
fb5f0bc8 | 109 | * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). |
34dc7c2f BB |
110 | * This is critical because we don't want to block while holding locks. |
111 | * Note, in particular, that if a lock is sometimes acquired before | |
112 | * the tx assigns, and sometimes after (e.g. z_lock), then failing to | |
113 | * use a non-blocking assign can deadlock the system. The scenario: | |
114 | * | |
115 | * Thread A has grabbed a lock before calling dmu_tx_assign(). | |
116 | * Thread B is in an already-assigned tx, and blocks for this lock. | |
117 | * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() | |
118 | * forever, because the previous txg can't quiesce until B's tx commits. | |
119 | * | |
120 | * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, | |
121 | * then drop all locks, call dmu_tx_wait(), and try again. | |
122 | * | |
123 | * (5) If the operation succeeded, generate the intent log entry for it | |
124 | * before dropping locks. This ensures that the ordering of events | |
125 | * in the intent log matches the order in which they actually occurred. | |
fb5f0bc8 BB |
126 | * During ZIL replay the zfs_log_* functions will update the sequence |
127 | * number to indicate the zil transaction has replayed. | |
34dc7c2f BB |
128 | * |
129 | * (6) At the end of each vnode op, the DMU tx must always commit, | |
130 | * regardless of whether there were any errors. | |
131 | * | |
132 | * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) | |
133 | * to ensure that synchronous semantics are provided when necessary. | |
134 | * | |
135 | * In general, this is how things should be ordered in each vnode op: | |
136 | * | |
137 | * ZFS_ENTER(zfsvfs); // exit if unmounted | |
138 | * top: | |
139 | * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) | |
140 | * rw_enter(...); // grab any other locks you need | |
141 | * tx = dmu_tx_create(...); // get DMU tx | |
142 | * dmu_tx_hold_*(); // hold each object you might modify | |
fb5f0bc8 | 143 | * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign |
34dc7c2f BB |
144 | * if (error) { |
145 | * rw_exit(...); // drop locks | |
146 | * zfs_dirent_unlock(dl); // unlock directory entry | |
147 | * VN_RELE(...); // release held vnodes | |
fb5f0bc8 | 148 | * if (error == ERESTART) { |
34dc7c2f BB |
149 | * dmu_tx_wait(tx); |
150 | * dmu_tx_abort(tx); | |
151 | * goto top; | |
152 | * } | |
153 | * dmu_tx_abort(tx); // abort DMU tx | |
154 | * ZFS_EXIT(zfsvfs); // finished in zfs | |
155 | * return (error); // really out of space | |
156 | * } | |
157 | * error = do_real_work(); // do whatever this VOP does | |
158 | * if (error == 0) | |
159 | * zfs_log_*(...); // on success, make ZIL entry | |
160 | * dmu_tx_commit(tx); // commit DMU tx -- error or not | |
161 | * rw_exit(...); // drop locks | |
162 | * zfs_dirent_unlock(dl); // unlock directory entry | |
163 | * VN_RELE(...); // release held vnodes | |
164 | * zil_commit(zilog, seq, foid); // synchronous when necessary | |
165 | * ZFS_EXIT(zfsvfs); // finished in zfs | |
166 | * return (error); // done, report error | |
167 | */ | |
168 | ||
169 | /* ARGSUSED */ | |
170 | static int | |
171 | zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) | |
172 | { | |
173 | znode_t *zp = VTOZ(*vpp); | |
b128c09f BB |
174 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
175 | ||
176 | ZFS_ENTER(zfsvfs); | |
177 | ZFS_VERIFY_ZP(zp); | |
34dc7c2f BB |
178 | |
179 | if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) && | |
180 | ((flag & FAPPEND) == 0)) { | |
b128c09f | 181 | ZFS_EXIT(zfsvfs); |
34dc7c2f BB |
182 | return (EPERM); |
183 | } | |
184 | ||
185 | if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && | |
186 | ZTOV(zp)->v_type == VREG && | |
187 | !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && | |
b128c09f BB |
188 | zp->z_phys->zp_size > 0) { |
189 | if (fs_vscan(*vpp, cr, 0) != 0) { | |
190 | ZFS_EXIT(zfsvfs); | |
34dc7c2f | 191 | return (EACCES); |
b128c09f BB |
192 | } |
193 | } | |
34dc7c2f BB |
194 | |
195 | /* Keep a count of the synchronous opens in the znode */ | |
196 | if (flag & (FSYNC | FDSYNC)) | |
197 | atomic_inc_32(&zp->z_sync_cnt); | |
198 | ||
b128c09f | 199 | ZFS_EXIT(zfsvfs); |
34dc7c2f BB |
200 | return (0); |
201 | } | |
202 | ||
203 | /* ARGSUSED */ | |
204 | static int | |
205 | zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, | |
206 | caller_context_t *ct) | |
207 | { | |
208 | znode_t *zp = VTOZ(vp); | |
b128c09f BB |
209 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
210 | ||
45d1cae3 BB |
211 | /* |
212 | * Clean up any locks held by this process on the vp. | |
213 | */ | |
214 | cleanlocks(vp, ddi_get_pid(), 0); | |
215 | cleanshares(vp, ddi_get_pid()); | |
216 | ||
b128c09f BB |
217 | ZFS_ENTER(zfsvfs); |
218 | ZFS_VERIFY_ZP(zp); | |
34dc7c2f BB |
219 | |
220 | /* Decrement the synchronous opens in the znode */ | |
221 | if ((flag & (FSYNC | FDSYNC)) && (count == 1)) | |
222 | atomic_dec_32(&zp->z_sync_cnt); | |
223 | ||
34dc7c2f BB |
224 | if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && |
225 | ZTOV(zp)->v_type == VREG && | |
226 | !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && | |
227 | zp->z_phys->zp_size > 0) | |
228 | VERIFY(fs_vscan(vp, cr, 1) == 0); | |
229 | ||
b128c09f | 230 | ZFS_EXIT(zfsvfs); |
34dc7c2f BB |
231 | return (0); |
232 | } | |
233 | ||
234 | /* | |
235 | * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and | |
236 | * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. | |
237 | */ | |
238 | static int | |
239 | zfs_holey(vnode_t *vp, int cmd, offset_t *off) | |
240 | { | |
241 | znode_t *zp = VTOZ(vp); | |
242 | uint64_t noff = (uint64_t)*off; /* new offset */ | |
243 | uint64_t file_sz; | |
244 | int error; | |
245 | boolean_t hole; | |
246 | ||
247 | file_sz = zp->z_phys->zp_size; | |
248 | if (noff >= file_sz) { | |
249 | return (ENXIO); | |
250 | } | |
251 | ||
252 | if (cmd == _FIO_SEEK_HOLE) | |
253 | hole = B_TRUE; | |
254 | else | |
255 | hole = B_FALSE; | |
256 | ||
257 | error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); | |
258 | ||
259 | /* end of file? */ | |
260 | if ((error == ESRCH) || (noff > file_sz)) { | |
261 | /* | |
262 | * Handle the virtual hole at the end of file. | |
263 | */ | |
264 | if (hole) { | |
265 | *off = file_sz; | |
266 | return (0); | |
267 | } | |
268 | return (ENXIO); | |
269 | } | |
270 | ||
271 | if (noff < *off) | |
272 | return (error); | |
273 | *off = noff; | |
274 | return (error); | |
275 | } | |
276 | ||
277 | /* ARGSUSED */ | |
278 | static int | |
279 | zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, | |
280 | int *rvalp, caller_context_t *ct) | |
281 | { | |
282 | offset_t off; | |
283 | int error; | |
284 | zfsvfs_t *zfsvfs; | |
285 | znode_t *zp; | |
286 | ||
287 | switch (com) { | |
288 | case _FIOFFS: | |
289 | return (zfs_sync(vp->v_vfsp, 0, cred)); | |
290 | ||
291 | /* | |
292 | * The following two ioctls are used by bfu. Faking out, | |
293 | * necessary to avoid bfu errors. | |
294 | */ | |
295 | case _FIOGDIO: | |
296 | case _FIOSDIO: | |
297 | return (0); | |
298 | ||
299 | case _FIO_SEEK_DATA: | |
300 | case _FIO_SEEK_HOLE: | |
301 | if (ddi_copyin((void *)data, &off, sizeof (off), flag)) | |
302 | return (EFAULT); | |
303 | ||
304 | zp = VTOZ(vp); | |
305 | zfsvfs = zp->z_zfsvfs; | |
306 | ZFS_ENTER(zfsvfs); | |
307 | ZFS_VERIFY_ZP(zp); | |
308 | ||
309 | /* offset parameter is in/out */ | |
310 | error = zfs_holey(vp, com, &off); | |
311 | ZFS_EXIT(zfsvfs); | |
312 | if (error) | |
313 | return (error); | |
314 | if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) | |
315 | return (EFAULT); | |
316 | return (0); | |
317 | } | |
318 | return (ENOTTY); | |
319 | } | |
320 | ||
b128c09f BB |
321 | /* |
322 | * Utility functions to map and unmap a single physical page. These | |
323 | * are used to manage the mappable copies of ZFS file data, and therefore | |
324 | * do not update ref/mod bits. | |
325 | */ | |
326 | caddr_t | |
327 | zfs_map_page(page_t *pp, enum seg_rw rw) | |
328 | { | |
329 | if (kpm_enable) | |
330 | return (hat_kpm_mapin(pp, 0)); | |
331 | ASSERT(rw == S_READ || rw == S_WRITE); | |
332 | return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), | |
333 | (caddr_t)-1)); | |
334 | } | |
335 | ||
336 | void | |
337 | zfs_unmap_page(page_t *pp, caddr_t addr) | |
338 | { | |
339 | if (kpm_enable) { | |
340 | hat_kpm_mapout(pp, 0, addr); | |
341 | } else { | |
342 | ppmapout(addr); | |
343 | } | |
344 | } | |
345 | ||
34dc7c2f BB |
346 | /* |
347 | * When a file is memory mapped, we must keep the IO data synchronized | |
348 | * between the DMU cache and the memory mapped pages. What this means: | |
349 | * | |
350 | * On Write: If we find a memory mapped page, we write to *both* | |
351 | * the page and the dmu buffer. | |
34dc7c2f | 352 | */ |
d164b209 BB |
353 | static void |
354 | update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) | |
34dc7c2f | 355 | { |
d164b209 | 356 | int64_t off; |
34dc7c2f | 357 | |
34dc7c2f BB |
358 | off = start & PAGEOFFSET; |
359 | for (start &= PAGEMASK; len > 0; start += PAGESIZE) { | |
360 | page_t *pp; | |
d164b209 | 361 | uint64_t nbytes = MIN(PAGESIZE - off, len); |
34dc7c2f | 362 | |
34dc7c2f BB |
363 | if (pp = page_lookup(vp, start, SE_SHARED)) { |
364 | caddr_t va; | |
365 | ||
b128c09f | 366 | va = zfs_map_page(pp, S_WRITE); |
9babb374 BB |
367 | (void) dmu_read(os, oid, start+off, nbytes, va+off, |
368 | DMU_READ_PREFETCH); | |
b128c09f | 369 | zfs_unmap_page(pp, va); |
34dc7c2f | 370 | page_unlock(pp); |
34dc7c2f | 371 | } |
d164b209 | 372 | len -= nbytes; |
34dc7c2f | 373 | off = 0; |
34dc7c2f | 374 | } |
34dc7c2f BB |
375 | } |
376 | ||
377 | /* | |
378 | * When a file is memory mapped, we must keep the IO data synchronized | |
379 | * between the DMU cache and the memory mapped pages. What this means: | |
380 | * | |
381 | * On Read: We "read" preferentially from memory mapped pages, | |
382 | * else we default from the dmu buffer. | |
383 | * | |
384 | * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when | |
385 | * the file is memory mapped. | |
386 | */ | |
387 | static int | |
388 | mappedread(vnode_t *vp, int nbytes, uio_t *uio) | |
389 | { | |
390 | znode_t *zp = VTOZ(vp); | |
391 | objset_t *os = zp->z_zfsvfs->z_os; | |
392 | int64_t start, off; | |
393 | int len = nbytes; | |
394 | int error = 0; | |
395 | ||
396 | start = uio->uio_loffset; | |
397 | off = start & PAGEOFFSET; | |
398 | for (start &= PAGEMASK; len > 0; start += PAGESIZE) { | |
399 | page_t *pp; | |
400 | uint64_t bytes = MIN(PAGESIZE - off, len); | |
401 | ||
402 | if (pp = page_lookup(vp, start, SE_SHARED)) { | |
403 | caddr_t va; | |
404 | ||
b128c09f | 405 | va = zfs_map_page(pp, S_READ); |
34dc7c2f | 406 | error = uiomove(va + off, bytes, UIO_READ, uio); |
b128c09f | 407 | zfs_unmap_page(pp, va); |
34dc7c2f BB |
408 | page_unlock(pp); |
409 | } else { | |
410 | error = dmu_read_uio(os, zp->z_id, uio, bytes); | |
411 | } | |
412 | len -= bytes; | |
413 | off = 0; | |
414 | if (error) | |
415 | break; | |
416 | } | |
417 | return (error); | |
418 | } | |
419 | ||
420 | offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ | |
421 | ||
422 | /* | |
423 | * Read bytes from specified file into supplied buffer. | |
424 | * | |
425 | * IN: vp - vnode of file to be read from. | |
426 | * uio - structure supplying read location, range info, | |
427 | * and return buffer. | |
428 | * ioflag - SYNC flags; used to provide FRSYNC semantics. | |
429 | * cr - credentials of caller. | |
430 | * ct - caller context | |
431 | * | |
432 | * OUT: uio - updated offset and range, buffer filled. | |
433 | * | |
434 | * RETURN: 0 if success | |
435 | * error code if failure | |
436 | * | |
437 | * Side Effects: | |
438 | * vp - atime updated if byte count > 0 | |
439 | */ | |
440 | /* ARGSUSED */ | |
441 | static int | |
442 | zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) | |
443 | { | |
444 | znode_t *zp = VTOZ(vp); | |
445 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
446 | objset_t *os; | |
447 | ssize_t n, nbytes; | |
448 | int error; | |
449 | rl_t *rl; | |
450 | ||
451 | ZFS_ENTER(zfsvfs); | |
452 | ZFS_VERIFY_ZP(zp); | |
453 | os = zfsvfs->z_os; | |
454 | ||
455 | if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) { | |
456 | ZFS_EXIT(zfsvfs); | |
457 | return (EACCES); | |
458 | } | |
459 | ||
460 | /* | |
461 | * Validate file offset | |
462 | */ | |
463 | if (uio->uio_loffset < (offset_t)0) { | |
464 | ZFS_EXIT(zfsvfs); | |
465 | return (EINVAL); | |
466 | } | |
467 | ||
468 | /* | |
469 | * Fasttrack empty reads | |
470 | */ | |
471 | if (uio->uio_resid == 0) { | |
472 | ZFS_EXIT(zfsvfs); | |
473 | return (0); | |
474 | } | |
475 | ||
476 | /* | |
477 | * Check for mandatory locks | |
478 | */ | |
479 | if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { | |
480 | if (error = chklock(vp, FREAD, | |
481 | uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { | |
482 | ZFS_EXIT(zfsvfs); | |
483 | return (error); | |
484 | } | |
485 | } | |
486 | ||
487 | /* | |
488 | * If we're in FRSYNC mode, sync out this znode before reading it. | |
489 | */ | |
490 | if (ioflag & FRSYNC) | |
491 | zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); | |
492 | ||
493 | /* | |
494 | * Lock the range against changes. | |
495 | */ | |
496 | rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); | |
497 | ||
498 | /* | |
499 | * If we are reading past end-of-file we can skip | |
500 | * to the end; but we might still need to set atime. | |
501 | */ | |
502 | if (uio->uio_loffset >= zp->z_phys->zp_size) { | |
503 | error = 0; | |
504 | goto out; | |
505 | } | |
506 | ||
507 | ASSERT(uio->uio_loffset < zp->z_phys->zp_size); | |
508 | n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); | |
509 | ||
510 | while (n > 0) { | |
511 | nbytes = MIN(n, zfs_read_chunk_size - | |
512 | P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); | |
513 | ||
514 | if (vn_has_cached_data(vp)) | |
515 | error = mappedread(vp, nbytes, uio); | |
516 | else | |
517 | error = dmu_read_uio(os, zp->z_id, uio, nbytes); | |
b128c09f BB |
518 | if (error) { |
519 | /* convert checksum errors into IO errors */ | |
520 | if (error == ECKSUM) | |
521 | error = EIO; | |
34dc7c2f | 522 | break; |
b128c09f | 523 | } |
34dc7c2f BB |
524 | |
525 | n -= nbytes; | |
526 | } | |
527 | ||
528 | out: | |
529 | zfs_range_unlock(rl); | |
530 | ||
531 | ZFS_ACCESSTIME_STAMP(zfsvfs, zp); | |
532 | ZFS_EXIT(zfsvfs); | |
533 | return (error); | |
534 | } | |
535 | ||
34dc7c2f BB |
536 | /* |
537 | * Write the bytes to a file. | |
538 | * | |
539 | * IN: vp - vnode of file to be written to. | |
540 | * uio - structure supplying write location, range info, | |
541 | * and data buffer. | |
542 | * ioflag - FAPPEND flag set if in append mode. | |
543 | * cr - credentials of caller. | |
544 | * ct - caller context (NFS/CIFS fem monitor only) | |
545 | * | |
546 | * OUT: uio - updated offset and range. | |
547 | * | |
548 | * RETURN: 0 if success | |
549 | * error code if failure | |
550 | * | |
551 | * Timestamps: | |
552 | * vp - ctime|mtime updated if byte count > 0 | |
553 | */ | |
554 | /* ARGSUSED */ | |
555 | static int | |
556 | zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) | |
557 | { | |
558 | znode_t *zp = VTOZ(vp); | |
559 | rlim64_t limit = uio->uio_llimit; | |
560 | ssize_t start_resid = uio->uio_resid; | |
561 | ssize_t tx_bytes; | |
562 | uint64_t end_size; | |
563 | dmu_tx_t *tx; | |
564 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
565 | zilog_t *zilog; | |
566 | offset_t woff; | |
567 | ssize_t n, nbytes; | |
568 | rl_t *rl; | |
569 | int max_blksz = zfsvfs->z_max_blksz; | |
b128c09f | 570 | uint64_t pflags; |
34dc7c2f | 571 | int error; |
9babb374 | 572 | arc_buf_t *abuf; |
34dc7c2f | 573 | |
34dc7c2f BB |
574 | /* |
575 | * Fasttrack empty write | |
576 | */ | |
577 | n = start_resid; | |
578 | if (n == 0) | |
579 | return (0); | |
580 | ||
581 | if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) | |
582 | limit = MAXOFFSET_T; | |
583 | ||
584 | ZFS_ENTER(zfsvfs); | |
585 | ZFS_VERIFY_ZP(zp); | |
b128c09f BB |
586 | |
587 | /* | |
588 | * If immutable or not appending then return EPERM | |
589 | */ | |
590 | pflags = zp->z_phys->zp_flags; | |
591 | if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || | |
592 | ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && | |
593 | (uio->uio_loffset < zp->z_phys->zp_size))) { | |
594 | ZFS_EXIT(zfsvfs); | |
595 | return (EPERM); | |
596 | } | |
597 | ||
34dc7c2f BB |
598 | zilog = zfsvfs->z_log; |
599 | ||
600 | /* | |
601 | * Pre-fault the pages to ensure slow (eg NFS) pages | |
602 | * don't hold up txg. | |
603 | */ | |
b128c09f | 604 | uio_prefaultpages(n, uio); |
34dc7c2f BB |
605 | |
606 | /* | |
607 | * If in append mode, set the io offset pointer to eof. | |
608 | */ | |
609 | if (ioflag & FAPPEND) { | |
610 | /* | |
611 | * Range lock for a file append: | |
612 | * The value for the start of range will be determined by | |
613 | * zfs_range_lock() (to guarantee append semantics). | |
614 | * If this write will cause the block size to increase, | |
615 | * zfs_range_lock() will lock the entire file, so we must | |
616 | * later reduce the range after we grow the block size. | |
617 | */ | |
618 | rl = zfs_range_lock(zp, 0, n, RL_APPEND); | |
619 | if (rl->r_len == UINT64_MAX) { | |
620 | /* overlocked, zp_size can't change */ | |
621 | woff = uio->uio_loffset = zp->z_phys->zp_size; | |
622 | } else { | |
623 | woff = uio->uio_loffset = rl->r_off; | |
624 | } | |
625 | } else { | |
626 | woff = uio->uio_loffset; | |
627 | /* | |
628 | * Validate file offset | |
629 | */ | |
630 | if (woff < 0) { | |
631 | ZFS_EXIT(zfsvfs); | |
632 | return (EINVAL); | |
633 | } | |
634 | ||
635 | /* | |
636 | * If we need to grow the block size then zfs_range_lock() | |
637 | * will lock a wider range than we request here. | |
638 | * Later after growing the block size we reduce the range. | |
639 | */ | |
640 | rl = zfs_range_lock(zp, woff, n, RL_WRITER); | |
641 | } | |
642 | ||
643 | if (woff >= limit) { | |
644 | zfs_range_unlock(rl); | |
645 | ZFS_EXIT(zfsvfs); | |
646 | return (EFBIG); | |
647 | } | |
648 | ||
649 | if ((woff + n) > limit || woff > (limit - n)) | |
650 | n = limit - woff; | |
651 | ||
652 | /* | |
653 | * Check for mandatory locks | |
654 | */ | |
655 | if (MANDMODE((mode_t)zp->z_phys->zp_mode) && | |
656 | (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { | |
657 | zfs_range_unlock(rl); | |
658 | ZFS_EXIT(zfsvfs); | |
659 | return (error); | |
660 | } | |
661 | end_size = MAX(zp->z_phys->zp_size, woff + n); | |
662 | ||
663 | /* | |
664 | * Write the file in reasonable size chunks. Each chunk is written | |
665 | * in a separate transaction; this keeps the intent log records small | |
666 | * and allows us to do more fine-grained space accounting. | |
667 | */ | |
668 | while (n > 0) { | |
9babb374 BB |
669 | abuf = NULL; |
670 | woff = uio->uio_loffset; | |
671 | ||
672 | again: | |
673 | if (zfs_usergroup_overquota(zfsvfs, | |
674 | B_FALSE, zp->z_phys->zp_uid) || | |
675 | zfs_usergroup_overquota(zfsvfs, | |
676 | B_TRUE, zp->z_phys->zp_gid)) { | |
677 | if (abuf != NULL) | |
678 | dmu_return_arcbuf(abuf); | |
679 | error = EDQUOT; | |
680 | break; | |
681 | } | |
682 | ||
683 | /* | |
684 | * If dmu_assign_arcbuf() is expected to execute with minimum | |
685 | * overhead loan an arc buffer and copy user data to it before | |
686 | * we enter a txg. This avoids holding a txg forever while we | |
687 | * pagefault on a hanging NFS server mapping. | |
688 | */ | |
689 | if (abuf == NULL && n >= max_blksz && | |
690 | woff >= zp->z_phys->zp_size && | |
691 | P2PHASE(woff, max_blksz) == 0 && | |
692 | zp->z_blksz == max_blksz) { | |
693 | size_t cbytes; | |
694 | ||
695 | abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz); | |
696 | ASSERT(abuf != NULL); | |
697 | ASSERT(arc_buf_size(abuf) == max_blksz); | |
698 | if (error = uiocopy(abuf->b_data, max_blksz, | |
699 | UIO_WRITE, uio, &cbytes)) { | |
700 | dmu_return_arcbuf(abuf); | |
701 | break; | |
702 | } | |
703 | ASSERT(cbytes == max_blksz); | |
704 | } | |
705 | ||
34dc7c2f BB |
706 | /* |
707 | * Start a transaction. | |
708 | */ | |
34dc7c2f BB |
709 | tx = dmu_tx_create(zfsvfs->z_os); |
710 | dmu_tx_hold_bonus(tx, zp->z_id); | |
711 | dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); | |
fb5f0bc8 | 712 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f | 713 | if (error) { |
fb5f0bc8 | 714 | if (error == ERESTART) { |
34dc7c2f BB |
715 | dmu_tx_wait(tx); |
716 | dmu_tx_abort(tx); | |
9babb374 | 717 | goto again; |
34dc7c2f BB |
718 | } |
719 | dmu_tx_abort(tx); | |
9babb374 BB |
720 | if (abuf != NULL) |
721 | dmu_return_arcbuf(abuf); | |
34dc7c2f BB |
722 | break; |
723 | } | |
724 | ||
725 | /* | |
726 | * If zfs_range_lock() over-locked we grow the blocksize | |
727 | * and then reduce the lock range. This will only happen | |
728 | * on the first iteration since zfs_range_reduce() will | |
729 | * shrink down r_len to the appropriate size. | |
730 | */ | |
731 | if (rl->r_len == UINT64_MAX) { | |
732 | uint64_t new_blksz; | |
733 | ||
734 | if (zp->z_blksz > max_blksz) { | |
735 | ASSERT(!ISP2(zp->z_blksz)); | |
736 | new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); | |
737 | } else { | |
738 | new_blksz = MIN(end_size, max_blksz); | |
739 | } | |
740 | zfs_grow_blocksize(zp, new_blksz, tx); | |
741 | zfs_range_reduce(rl, woff, n); | |
742 | } | |
743 | ||
744 | /* | |
745 | * XXX - should we really limit each write to z_max_blksz? | |
746 | * Perhaps we should use SPA_MAXBLOCKSIZE chunks? | |
747 | */ | |
748 | nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); | |
34dc7c2f | 749 | |
9babb374 BB |
750 | if (abuf == NULL) { |
751 | tx_bytes = uio->uio_resid; | |
752 | error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio, | |
753 | nbytes, tx); | |
754 | tx_bytes -= uio->uio_resid; | |
755 | } else { | |
756 | tx_bytes = nbytes; | |
757 | ASSERT(tx_bytes == max_blksz); | |
758 | dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx); | |
759 | ASSERT(tx_bytes <= uio->uio_resid); | |
760 | uioskip(uio, tx_bytes); | |
761 | } | |
762 | if (tx_bytes && vn_has_cached_data(vp)) { | |
d164b209 BB |
763 | update_pages(vp, woff, |
764 | tx_bytes, zfsvfs->z_os, zp->z_id); | |
9babb374 | 765 | } |
34dc7c2f BB |
766 | |
767 | /* | |
768 | * If we made no progress, we're done. If we made even | |
769 | * partial progress, update the znode and ZIL accordingly. | |
770 | */ | |
771 | if (tx_bytes == 0) { | |
772 | dmu_tx_commit(tx); | |
773 | ASSERT(error != 0); | |
774 | break; | |
775 | } | |
776 | ||
777 | /* | |
778 | * Clear Set-UID/Set-GID bits on successful write if not | |
779 | * privileged and at least one of the excute bits is set. | |
780 | * | |
781 | * It would be nice to to this after all writes have | |
782 | * been done, but that would still expose the ISUID/ISGID | |
783 | * to another app after the partial write is committed. | |
784 | * | |
785 | * Note: we don't call zfs_fuid_map_id() here because | |
786 | * user 0 is not an ephemeral uid. | |
787 | */ | |
788 | mutex_enter(&zp->z_acl_lock); | |
789 | if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | | |
790 | (S_IXUSR >> 6))) != 0 && | |
791 | (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && | |
792 | secpolicy_vnode_setid_retain(cr, | |
793 | (zp->z_phys->zp_mode & S_ISUID) != 0 && | |
794 | zp->z_phys->zp_uid == 0) != 0) { | |
795 | zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); | |
796 | } | |
797 | mutex_exit(&zp->z_acl_lock); | |
798 | ||
799 | /* | |
800 | * Update time stamp. NOTE: This marks the bonus buffer as | |
801 | * dirty, so we don't have to do it again for zp_size. | |
802 | */ | |
803 | zfs_time_stamper(zp, CONTENT_MODIFIED, tx); | |
804 | ||
805 | /* | |
806 | * Update the file size (zp_size) if it has changed; | |
807 | * account for possible concurrent updates. | |
808 | */ | |
809 | while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) | |
810 | (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, | |
811 | uio->uio_loffset); | |
812 | zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); | |
813 | dmu_tx_commit(tx); | |
814 | ||
815 | if (error != 0) | |
816 | break; | |
817 | ASSERT(tx_bytes == nbytes); | |
818 | n -= nbytes; | |
819 | } | |
820 | ||
821 | zfs_range_unlock(rl); | |
822 | ||
823 | /* | |
824 | * If we're in replay mode, or we made no progress, return error. | |
825 | * Otherwise, it's at least a partial write, so it's successful. | |
826 | */ | |
fb5f0bc8 | 827 | if (zfsvfs->z_replay || uio->uio_resid == start_resid) { |
34dc7c2f BB |
828 | ZFS_EXIT(zfsvfs); |
829 | return (error); | |
830 | } | |
831 | ||
832 | if (ioflag & (FSYNC | FDSYNC)) | |
833 | zil_commit(zilog, zp->z_last_itx, zp->z_id); | |
834 | ||
835 | ZFS_EXIT(zfsvfs); | |
836 | return (0); | |
837 | } | |
838 | ||
839 | void | |
840 | zfs_get_done(dmu_buf_t *db, void *vzgd) | |
841 | { | |
842 | zgd_t *zgd = (zgd_t *)vzgd; | |
843 | rl_t *rl = zgd->zgd_rl; | |
844 | vnode_t *vp = ZTOV(rl->r_zp); | |
9babb374 | 845 | objset_t *os = rl->r_zp->z_zfsvfs->z_os; |
34dc7c2f BB |
846 | |
847 | dmu_buf_rele(db, vzgd); | |
848 | zfs_range_unlock(rl); | |
9babb374 BB |
849 | /* |
850 | * Release the vnode asynchronously as we currently have the | |
851 | * txg stopped from syncing. | |
852 | */ | |
853 | VN_RELE_ASYNC(vp, dsl_pool_vnrele_taskq(dmu_objset_pool(os))); | |
34dc7c2f BB |
854 | zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); |
855 | kmem_free(zgd, sizeof (zgd_t)); | |
856 | } | |
857 | ||
45d1cae3 BB |
858 | #ifdef DEBUG |
859 | static int zil_fault_io = 0; | |
860 | #endif | |
861 | ||
34dc7c2f BB |
862 | /* |
863 | * Get data to generate a TX_WRITE intent log record. | |
864 | */ | |
865 | int | |
866 | zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) | |
867 | { | |
868 | zfsvfs_t *zfsvfs = arg; | |
869 | objset_t *os = zfsvfs->z_os; | |
870 | znode_t *zp; | |
871 | uint64_t off = lr->lr_offset; | |
872 | dmu_buf_t *db; | |
873 | rl_t *rl; | |
874 | zgd_t *zgd; | |
875 | int dlen = lr->lr_length; /* length of user data */ | |
876 | int error = 0; | |
877 | ||
878 | ASSERT(zio); | |
879 | ASSERT(dlen != 0); | |
880 | ||
881 | /* | |
882 | * Nothing to do if the file has been removed | |
883 | */ | |
884 | if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0) | |
885 | return (ENOENT); | |
886 | if (zp->z_unlinked) { | |
9babb374 BB |
887 | /* |
888 | * Release the vnode asynchronously as we currently have the | |
889 | * txg stopped from syncing. | |
890 | */ | |
891 | VN_RELE_ASYNC(ZTOV(zp), | |
892 | dsl_pool_vnrele_taskq(dmu_objset_pool(os))); | |
34dc7c2f BB |
893 | return (ENOENT); |
894 | } | |
895 | ||
896 | /* | |
897 | * Write records come in two flavors: immediate and indirect. | |
898 | * For small writes it's cheaper to store the data with the | |
899 | * log record (immediate); for large writes it's cheaper to | |
900 | * sync the data and get a pointer to it (indirect) so that | |
901 | * we don't have to write the data twice. | |
902 | */ | |
903 | if (buf != NULL) { /* immediate write */ | |
904 | rl = zfs_range_lock(zp, off, dlen, RL_READER); | |
905 | /* test for truncation needs to be done while range locked */ | |
906 | if (off >= zp->z_phys->zp_size) { | |
907 | error = ENOENT; | |
908 | goto out; | |
909 | } | |
9babb374 BB |
910 | VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf, |
911 | DMU_READ_NO_PREFETCH)); | |
34dc7c2f BB |
912 | } else { /* indirect write */ |
913 | uint64_t boff; /* block starting offset */ | |
914 | ||
915 | /* | |
916 | * Have to lock the whole block to ensure when it's | |
917 | * written out and it's checksum is being calculated | |
918 | * that no one can change the data. We need to re-check | |
919 | * blocksize after we get the lock in case it's changed! | |
920 | */ | |
921 | for (;;) { | |
922 | if (ISP2(zp->z_blksz)) { | |
923 | boff = P2ALIGN_TYPED(off, zp->z_blksz, | |
924 | uint64_t); | |
925 | } else { | |
926 | boff = 0; | |
927 | } | |
928 | dlen = zp->z_blksz; | |
929 | rl = zfs_range_lock(zp, boff, dlen, RL_READER); | |
930 | if (zp->z_blksz == dlen) | |
931 | break; | |
932 | zfs_range_unlock(rl); | |
933 | } | |
934 | /* test for truncation needs to be done while range locked */ | |
935 | if (off >= zp->z_phys->zp_size) { | |
936 | error = ENOENT; | |
937 | goto out; | |
938 | } | |
939 | zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); | |
940 | zgd->zgd_rl = rl; | |
941 | zgd->zgd_zilog = zfsvfs->z_log; | |
942 | zgd->zgd_bp = &lr->lr_blkptr; | |
45d1cae3 BB |
943 | #ifdef DEBUG |
944 | if (zil_fault_io) { | |
945 | error = EIO; | |
946 | zil_fault_io = 0; | |
947 | } else { | |
948 | error = dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db); | |
949 | } | |
950 | #else | |
951 | error = dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db); | |
952 | #endif | |
953 | if (error != 0) { | |
954 | kmem_free(zgd, sizeof (zgd_t)); | |
955 | goto out; | |
956 | } | |
957 | ||
34dc7c2f BB |
958 | ASSERT(boff == db->db_offset); |
959 | lr->lr_blkoff = off - boff; | |
960 | error = dmu_sync(zio, db, &lr->lr_blkptr, | |
961 | lr->lr_common.lrc_txg, zfs_get_done, zgd); | |
962 | ASSERT((error && error != EINPROGRESS) || | |
963 | lr->lr_length <= zp->z_blksz); | |
964 | if (error == 0) | |
965 | zil_add_block(zfsvfs->z_log, &lr->lr_blkptr); | |
966 | /* | |
967 | * If we get EINPROGRESS, then we need to wait for a | |
968 | * write IO initiated by dmu_sync() to complete before | |
969 | * we can release this dbuf. We will finish everything | |
970 | * up in the zfs_get_done() callback. | |
971 | */ | |
972 | if (error == EINPROGRESS) | |
973 | return (0); | |
974 | dmu_buf_rele(db, zgd); | |
975 | kmem_free(zgd, sizeof (zgd_t)); | |
976 | } | |
977 | out: | |
978 | zfs_range_unlock(rl); | |
9babb374 BB |
979 | /* |
980 | * Release the vnode asynchronously as we currently have the | |
981 | * txg stopped from syncing. | |
982 | */ | |
983 | VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); | |
34dc7c2f BB |
984 | return (error); |
985 | } | |
986 | ||
987 | /*ARGSUSED*/ | |
988 | static int | |
989 | zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, | |
990 | caller_context_t *ct) | |
991 | { | |
992 | znode_t *zp = VTOZ(vp); | |
993 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
994 | int error; | |
995 | ||
996 | ZFS_ENTER(zfsvfs); | |
997 | ZFS_VERIFY_ZP(zp); | |
998 | ||
999 | if (flag & V_ACE_MASK) | |
1000 | error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); | |
1001 | else | |
1002 | error = zfs_zaccess_rwx(zp, mode, flag, cr); | |
1003 | ||
1004 | ZFS_EXIT(zfsvfs); | |
1005 | return (error); | |
1006 | } | |
1007 | ||
45d1cae3 BB |
1008 | /* |
1009 | * If vnode is for a device return a specfs vnode instead. | |
1010 | */ | |
1011 | static int | |
1012 | specvp_check(vnode_t **vpp, cred_t *cr) | |
1013 | { | |
1014 | int error = 0; | |
1015 | ||
1016 | if (IS_DEVVP(*vpp)) { | |
1017 | struct vnode *svp; | |
1018 | ||
1019 | svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); | |
1020 | VN_RELE(*vpp); | |
1021 | if (svp == NULL) | |
1022 | error = ENOSYS; | |
1023 | *vpp = svp; | |
1024 | } | |
1025 | return (error); | |
1026 | } | |
1027 | ||
1028 | ||
34dc7c2f BB |
1029 | /* |
1030 | * Lookup an entry in a directory, or an extended attribute directory. | |
1031 | * If it exists, return a held vnode reference for it. | |
1032 | * | |
1033 | * IN: dvp - vnode of directory to search. | |
1034 | * nm - name of entry to lookup. | |
1035 | * pnp - full pathname to lookup [UNUSED]. | |
1036 | * flags - LOOKUP_XATTR set if looking for an attribute. | |
1037 | * rdir - root directory vnode [UNUSED]. | |
1038 | * cr - credentials of caller. | |
1039 | * ct - caller context | |
1040 | * direntflags - directory lookup flags | |
1041 | * realpnp - returned pathname. | |
1042 | * | |
1043 | * OUT: vpp - vnode of located entry, NULL if not found. | |
1044 | * | |
1045 | * RETURN: 0 if success | |
1046 | * error code if failure | |
1047 | * | |
1048 | * Timestamps: | |
1049 | * NA | |
1050 | */ | |
1051 | /* ARGSUSED */ | |
1052 | static int | |
1053 | zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, | |
1054 | int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, | |
1055 | int *direntflags, pathname_t *realpnp) | |
1056 | { | |
1057 | znode_t *zdp = VTOZ(dvp); | |
1058 | zfsvfs_t *zfsvfs = zdp->z_zfsvfs; | |
45d1cae3 BB |
1059 | int error = 0; |
1060 | ||
1061 | /* fast path */ | |
1062 | if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { | |
1063 | ||
1064 | if (dvp->v_type != VDIR) { | |
1065 | return (ENOTDIR); | |
1066 | } else if (zdp->z_dbuf == NULL) { | |
1067 | return (EIO); | |
1068 | } | |
1069 | ||
1070 | if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { | |
1071 | error = zfs_fastaccesschk_execute(zdp, cr); | |
1072 | if (!error) { | |
1073 | *vpp = dvp; | |
1074 | VN_HOLD(*vpp); | |
1075 | return (0); | |
1076 | } | |
1077 | return (error); | |
1078 | } else { | |
1079 | vnode_t *tvp = dnlc_lookup(dvp, nm); | |
1080 | ||
1081 | if (tvp) { | |
1082 | error = zfs_fastaccesschk_execute(zdp, cr); | |
1083 | if (error) { | |
1084 | VN_RELE(tvp); | |
1085 | return (error); | |
1086 | } | |
1087 | if (tvp == DNLC_NO_VNODE) { | |
1088 | VN_RELE(tvp); | |
1089 | return (ENOENT); | |
1090 | } else { | |
1091 | *vpp = tvp; | |
1092 | return (specvp_check(vpp, cr)); | |
1093 | } | |
1094 | } | |
1095 | } | |
1096 | } | |
1097 | ||
1098 | DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); | |
34dc7c2f BB |
1099 | |
1100 | ZFS_ENTER(zfsvfs); | |
1101 | ZFS_VERIFY_ZP(zdp); | |
1102 | ||
1103 | *vpp = NULL; | |
1104 | ||
1105 | if (flags & LOOKUP_XATTR) { | |
1106 | /* | |
1107 | * If the xattr property is off, refuse the lookup request. | |
1108 | */ | |
1109 | if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { | |
1110 | ZFS_EXIT(zfsvfs); | |
1111 | return (EINVAL); | |
1112 | } | |
1113 | ||
1114 | /* | |
1115 | * We don't allow recursive attributes.. | |
1116 | * Maybe someday we will. | |
1117 | */ | |
1118 | if (zdp->z_phys->zp_flags & ZFS_XATTR) { | |
1119 | ZFS_EXIT(zfsvfs); | |
1120 | return (EINVAL); | |
1121 | } | |
1122 | ||
1123 | if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { | |
1124 | ZFS_EXIT(zfsvfs); | |
1125 | return (error); | |
1126 | } | |
1127 | ||
1128 | /* | |
1129 | * Do we have permission to get into attribute directory? | |
1130 | */ | |
1131 | ||
1132 | if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, | |
1133 | B_FALSE, cr)) { | |
1134 | VN_RELE(*vpp); | |
1135 | *vpp = NULL; | |
1136 | } | |
1137 | ||
1138 | ZFS_EXIT(zfsvfs); | |
1139 | return (error); | |
1140 | } | |
1141 | ||
1142 | if (dvp->v_type != VDIR) { | |
1143 | ZFS_EXIT(zfsvfs); | |
1144 | return (ENOTDIR); | |
1145 | } | |
1146 | ||
1147 | /* | |
1148 | * Check accessibility of directory. | |
1149 | */ | |
1150 | ||
1151 | if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { | |
1152 | ZFS_EXIT(zfsvfs); | |
1153 | return (error); | |
1154 | } | |
1155 | ||
1156 | if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), | |
1157 | NULL, U8_VALIDATE_ENTIRE, &error) < 0) { | |
1158 | ZFS_EXIT(zfsvfs); | |
1159 | return (EILSEQ); | |
1160 | } | |
1161 | ||
1162 | error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); | |
45d1cae3 BB |
1163 | if (error == 0) |
1164 | error = specvp_check(vpp, cr); | |
34dc7c2f BB |
1165 | |
1166 | ZFS_EXIT(zfsvfs); | |
1167 | return (error); | |
1168 | } | |
1169 | ||
1170 | /* | |
1171 | * Attempt to create a new entry in a directory. If the entry | |
1172 | * already exists, truncate the file if permissible, else return | |
1173 | * an error. Return the vp of the created or trunc'd file. | |
1174 | * | |
1175 | * IN: dvp - vnode of directory to put new file entry in. | |
1176 | * name - name of new file entry. | |
1177 | * vap - attributes of new file. | |
1178 | * excl - flag indicating exclusive or non-exclusive mode. | |
1179 | * mode - mode to open file with. | |
1180 | * cr - credentials of caller. | |
1181 | * flag - large file flag [UNUSED]. | |
1182 | * ct - caller context | |
1183 | * vsecp - ACL to be set | |
1184 | * | |
1185 | * OUT: vpp - vnode of created or trunc'd entry. | |
1186 | * | |
1187 | * RETURN: 0 if success | |
1188 | * error code if failure | |
1189 | * | |
1190 | * Timestamps: | |
1191 | * dvp - ctime|mtime updated if new entry created | |
1192 | * vp - ctime|mtime always, atime if new | |
1193 | */ | |
1194 | ||
1195 | /* ARGSUSED */ | |
1196 | static int | |
1197 | zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, | |
1198 | int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, | |
1199 | vsecattr_t *vsecp) | |
1200 | { | |
1201 | znode_t *zp, *dzp = VTOZ(dvp); | |
1202 | zfsvfs_t *zfsvfs = dzp->z_zfsvfs; | |
1203 | zilog_t *zilog; | |
1204 | objset_t *os; | |
1205 | zfs_dirlock_t *dl; | |
1206 | dmu_tx_t *tx; | |
1207 | int error; | |
b128c09f BB |
1208 | ksid_t *ksid; |
1209 | uid_t uid; | |
1210 | gid_t gid = crgetgid(cr); | |
9babb374 BB |
1211 | zfs_acl_ids_t acl_ids; |
1212 | boolean_t fuid_dirtied; | |
34dc7c2f BB |
1213 | |
1214 | /* | |
1215 | * If we have an ephemeral id, ACL, or XVATTR then | |
1216 | * make sure file system is at proper version | |
1217 | */ | |
1218 | ||
b128c09f BB |
1219 | ksid = crgetsid(cr, KSID_OWNER); |
1220 | if (ksid) | |
1221 | uid = ksid_getid(ksid); | |
1222 | else | |
1223 | uid = crgetuid(cr); | |
1224 | ||
34dc7c2f BB |
1225 | if (zfsvfs->z_use_fuids == B_FALSE && |
1226 | (vsecp || (vap->va_mask & AT_XVATTR) || | |
b128c09f | 1227 | IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) |
34dc7c2f BB |
1228 | return (EINVAL); |
1229 | ||
1230 | ZFS_ENTER(zfsvfs); | |
1231 | ZFS_VERIFY_ZP(dzp); | |
1232 | os = zfsvfs->z_os; | |
1233 | zilog = zfsvfs->z_log; | |
1234 | ||
1235 | if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), | |
1236 | NULL, U8_VALIDATE_ENTIRE, &error) < 0) { | |
1237 | ZFS_EXIT(zfsvfs); | |
1238 | return (EILSEQ); | |
1239 | } | |
1240 | ||
1241 | if (vap->va_mask & AT_XVATTR) { | |
1242 | if ((error = secpolicy_xvattr((xvattr_t *)vap, | |
1243 | crgetuid(cr), cr, vap->va_type)) != 0) { | |
1244 | ZFS_EXIT(zfsvfs); | |
1245 | return (error); | |
1246 | } | |
1247 | } | |
1248 | top: | |
1249 | *vpp = NULL; | |
1250 | ||
1251 | if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) | |
1252 | vap->va_mode &= ~VSVTX; | |
1253 | ||
1254 | if (*name == '\0') { | |
1255 | /* | |
1256 | * Null component name refers to the directory itself. | |
1257 | */ | |
1258 | VN_HOLD(dvp); | |
1259 | zp = dzp; | |
1260 | dl = NULL; | |
1261 | error = 0; | |
1262 | } else { | |
1263 | /* possible VN_HOLD(zp) */ | |
1264 | int zflg = 0; | |
1265 | ||
1266 | if (flag & FIGNORECASE) | |
1267 | zflg |= ZCILOOK; | |
1268 | ||
1269 | error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, | |
1270 | NULL, NULL); | |
1271 | if (error) { | |
1272 | if (strcmp(name, "..") == 0) | |
1273 | error = EISDIR; | |
1274 | ZFS_EXIT(zfsvfs); | |
34dc7c2f BB |
1275 | return (error); |
1276 | } | |
1277 | } | |
34dc7c2f BB |
1278 | if (zp == NULL) { |
1279 | uint64_t txtype; | |
1280 | ||
1281 | /* | |
1282 | * Create a new file object and update the directory | |
1283 | * to reference it. | |
1284 | */ | |
1285 | if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { | |
1286 | goto out; | |
1287 | } | |
1288 | ||
1289 | /* | |
1290 | * We only support the creation of regular files in | |
1291 | * extended attribute directories. | |
1292 | */ | |
1293 | if ((dzp->z_phys->zp_flags & ZFS_XATTR) && | |
1294 | (vap->va_type != VREG)) { | |
1295 | error = EINVAL; | |
1296 | goto out; | |
1297 | } | |
1298 | ||
9babb374 BB |
1299 | if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, |
1300 | &acl_ids)) != 0) | |
1301 | goto out; | |
1302 | if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { | |
45d1cae3 | 1303 | zfs_acl_ids_free(&acl_ids); |
9babb374 BB |
1304 | error = EDQUOT; |
1305 | goto out; | |
1306 | } | |
1307 | ||
34dc7c2f BB |
1308 | tx = dmu_tx_create(os); |
1309 | dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); | |
9babb374 BB |
1310 | fuid_dirtied = zfsvfs->z_fuid_dirty; |
1311 | if (fuid_dirtied) | |
1312 | zfs_fuid_txhold(zfsvfs, tx); | |
34dc7c2f BB |
1313 | dmu_tx_hold_bonus(tx, dzp->z_id); |
1314 | dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); | |
9babb374 | 1315 | if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { |
34dc7c2f BB |
1316 | dmu_tx_hold_write(tx, DMU_NEW_OBJECT, |
1317 | 0, SPA_MAXBLOCKSIZE); | |
1318 | } | |
fb5f0bc8 | 1319 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f | 1320 | if (error) { |
9babb374 | 1321 | zfs_acl_ids_free(&acl_ids); |
34dc7c2f | 1322 | zfs_dirent_unlock(dl); |
fb5f0bc8 | 1323 | if (error == ERESTART) { |
34dc7c2f BB |
1324 | dmu_tx_wait(tx); |
1325 | dmu_tx_abort(tx); | |
1326 | goto top; | |
1327 | } | |
1328 | dmu_tx_abort(tx); | |
1329 | ZFS_EXIT(zfsvfs); | |
34dc7c2f BB |
1330 | return (error); |
1331 | } | |
9babb374 BB |
1332 | zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); |
1333 | ||
1334 | if (fuid_dirtied) | |
1335 | zfs_fuid_sync(zfsvfs, tx); | |
1336 | ||
34dc7c2f | 1337 | (void) zfs_link_create(dl, zp, tx, ZNEW); |
9babb374 | 1338 | |
34dc7c2f BB |
1339 | txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); |
1340 | if (flag & FIGNORECASE) | |
1341 | txtype |= TX_CI; | |
1342 | zfs_log_create(zilog, tx, txtype, dzp, zp, name, | |
9babb374 BB |
1343 | vsecp, acl_ids.z_fuidp, vap); |
1344 | zfs_acl_ids_free(&acl_ids); | |
34dc7c2f BB |
1345 | dmu_tx_commit(tx); |
1346 | } else { | |
1347 | int aflags = (flag & FAPPEND) ? V_APPEND : 0; | |
1348 | ||
1349 | /* | |
1350 | * A directory entry already exists for this name. | |
1351 | */ | |
1352 | /* | |
1353 | * Can't truncate an existing file if in exclusive mode. | |
1354 | */ | |
1355 | if (excl == EXCL) { | |
1356 | error = EEXIST; | |
1357 | goto out; | |
1358 | } | |
1359 | /* | |
1360 | * Can't open a directory for writing. | |
1361 | */ | |
1362 | if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { | |
1363 | error = EISDIR; | |
1364 | goto out; | |
1365 | } | |
1366 | /* | |
1367 | * Verify requested access to file. | |
1368 | */ | |
1369 | if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { | |
1370 | goto out; | |
1371 | } | |
1372 | ||
1373 | mutex_enter(&dzp->z_lock); | |
1374 | dzp->z_seq++; | |
1375 | mutex_exit(&dzp->z_lock); | |
1376 | ||
1377 | /* | |
1378 | * Truncate regular files if requested. | |
1379 | */ | |
1380 | if ((ZTOV(zp)->v_type == VREG) && | |
1381 | (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { | |
b128c09f BB |
1382 | /* we can't hold any locks when calling zfs_freesp() */ |
1383 | zfs_dirent_unlock(dl); | |
1384 | dl = NULL; | |
34dc7c2f | 1385 | error = zfs_freesp(zp, 0, 0, mode, TRUE); |
34dc7c2f BB |
1386 | if (error == 0) { |
1387 | vnevent_create(ZTOV(zp), ct); | |
1388 | } | |
1389 | } | |
1390 | } | |
1391 | out: | |
1392 | ||
1393 | if (dl) | |
1394 | zfs_dirent_unlock(dl); | |
1395 | ||
1396 | if (error) { | |
1397 | if (zp) | |
1398 | VN_RELE(ZTOV(zp)); | |
1399 | } else { | |
1400 | *vpp = ZTOV(zp); | |
45d1cae3 | 1401 | error = specvp_check(vpp, cr); |
34dc7c2f | 1402 | } |
34dc7c2f BB |
1403 | |
1404 | ZFS_EXIT(zfsvfs); | |
1405 | return (error); | |
1406 | } | |
1407 | ||
1408 | /* | |
1409 | * Remove an entry from a directory. | |
1410 | * | |
1411 | * IN: dvp - vnode of directory to remove entry from. | |
1412 | * name - name of entry to remove. | |
1413 | * cr - credentials of caller. | |
1414 | * ct - caller context | |
1415 | * flags - case flags | |
1416 | * | |
1417 | * RETURN: 0 if success | |
1418 | * error code if failure | |
1419 | * | |
1420 | * Timestamps: | |
1421 | * dvp - ctime|mtime | |
1422 | * vp - ctime (if nlink > 0) | |
1423 | */ | |
1424 | /*ARGSUSED*/ | |
1425 | static int | |
1426 | zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, | |
1427 | int flags) | |
1428 | { | |
1429 | znode_t *zp, *dzp = VTOZ(dvp); | |
1430 | znode_t *xzp = NULL; | |
1431 | vnode_t *vp; | |
1432 | zfsvfs_t *zfsvfs = dzp->z_zfsvfs; | |
1433 | zilog_t *zilog; | |
1434 | uint64_t acl_obj, xattr_obj; | |
1435 | zfs_dirlock_t *dl; | |
1436 | dmu_tx_t *tx; | |
1437 | boolean_t may_delete_now, delete_now = FALSE; | |
b128c09f | 1438 | boolean_t unlinked, toobig = FALSE; |
34dc7c2f BB |
1439 | uint64_t txtype; |
1440 | pathname_t *realnmp = NULL; | |
1441 | pathname_t realnm; | |
1442 | int error; | |
1443 | int zflg = ZEXISTS; | |
1444 | ||
1445 | ZFS_ENTER(zfsvfs); | |
1446 | ZFS_VERIFY_ZP(dzp); | |
1447 | zilog = zfsvfs->z_log; | |
1448 | ||
1449 | if (flags & FIGNORECASE) { | |
1450 | zflg |= ZCILOOK; | |
1451 | pn_alloc(&realnm); | |
1452 | realnmp = &realnm; | |
1453 | } | |
1454 | ||
1455 | top: | |
1456 | /* | |
1457 | * Attempt to lock directory; fail if entry doesn't exist. | |
1458 | */ | |
1459 | if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, | |
1460 | NULL, realnmp)) { | |
1461 | if (realnmp) | |
1462 | pn_free(realnmp); | |
1463 | ZFS_EXIT(zfsvfs); | |
1464 | return (error); | |
1465 | } | |
1466 | ||
1467 | vp = ZTOV(zp); | |
1468 | ||
1469 | if (error = zfs_zaccess_delete(dzp, zp, cr)) { | |
1470 | goto out; | |
1471 | } | |
1472 | ||
1473 | /* | |
1474 | * Need to use rmdir for removing directories. | |
1475 | */ | |
1476 | if (vp->v_type == VDIR) { | |
1477 | error = EPERM; | |
1478 | goto out; | |
1479 | } | |
1480 | ||
1481 | vnevent_remove(vp, dvp, name, ct); | |
1482 | ||
1483 | if (realnmp) | |
1484 | dnlc_remove(dvp, realnmp->pn_buf); | |
1485 | else | |
1486 | dnlc_remove(dvp, name); | |
1487 | ||
1488 | mutex_enter(&vp->v_lock); | |
1489 | may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); | |
1490 | mutex_exit(&vp->v_lock); | |
1491 | ||
1492 | /* | |
1493 | * We may delete the znode now, or we may put it in the unlinked set; | |
1494 | * it depends on whether we're the last link, and on whether there are | |
1495 | * other holds on the vnode. So we dmu_tx_hold() the right things to | |
1496 | * allow for either case. | |
1497 | */ | |
1498 | tx = dmu_tx_create(zfsvfs->z_os); | |
1499 | dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); | |
1500 | dmu_tx_hold_bonus(tx, zp->z_id); | |
b128c09f BB |
1501 | if (may_delete_now) { |
1502 | toobig = | |
1503 | zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; | |
1504 | /* if the file is too big, only hold_free a token amount */ | |
1505 | dmu_tx_hold_free(tx, zp->z_id, 0, | |
1506 | (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); | |
1507 | } | |
34dc7c2f BB |
1508 | |
1509 | /* are there any extended attributes? */ | |
1510 | if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { | |
1511 | /* XXX - do we need this if we are deleting? */ | |
1512 | dmu_tx_hold_bonus(tx, xattr_obj); | |
1513 | } | |
1514 | ||
1515 | /* are there any additional acls */ | |
1516 | if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && | |
1517 | may_delete_now) | |
1518 | dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); | |
1519 | ||
1520 | /* charge as an update -- would be nice not to charge at all */ | |
1521 | dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); | |
1522 | ||
fb5f0bc8 | 1523 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f BB |
1524 | if (error) { |
1525 | zfs_dirent_unlock(dl); | |
1526 | VN_RELE(vp); | |
fb5f0bc8 | 1527 | if (error == ERESTART) { |
34dc7c2f BB |
1528 | dmu_tx_wait(tx); |
1529 | dmu_tx_abort(tx); | |
1530 | goto top; | |
1531 | } | |
1532 | if (realnmp) | |
1533 | pn_free(realnmp); | |
1534 | dmu_tx_abort(tx); | |
1535 | ZFS_EXIT(zfsvfs); | |
1536 | return (error); | |
1537 | } | |
1538 | ||
1539 | /* | |
1540 | * Remove the directory entry. | |
1541 | */ | |
1542 | error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); | |
1543 | ||
1544 | if (error) { | |
1545 | dmu_tx_commit(tx); | |
1546 | goto out; | |
1547 | } | |
1548 | ||
1549 | if (unlinked) { | |
1550 | mutex_enter(&vp->v_lock); | |
b128c09f | 1551 | delete_now = may_delete_now && !toobig && |
34dc7c2f BB |
1552 | vp->v_count == 1 && !vn_has_cached_data(vp) && |
1553 | zp->z_phys->zp_xattr == xattr_obj && | |
1554 | zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; | |
1555 | mutex_exit(&vp->v_lock); | |
1556 | } | |
1557 | ||
1558 | if (delete_now) { | |
1559 | if (zp->z_phys->zp_xattr) { | |
1560 | error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); | |
1561 | ASSERT3U(error, ==, 0); | |
1562 | ASSERT3U(xzp->z_phys->zp_links, ==, 2); | |
1563 | dmu_buf_will_dirty(xzp->z_dbuf, tx); | |
1564 | mutex_enter(&xzp->z_lock); | |
1565 | xzp->z_unlinked = 1; | |
1566 | xzp->z_phys->zp_links = 0; | |
1567 | mutex_exit(&xzp->z_lock); | |
1568 | zfs_unlinked_add(xzp, tx); | |
1569 | zp->z_phys->zp_xattr = 0; /* probably unnecessary */ | |
1570 | } | |
1571 | mutex_enter(&zp->z_lock); | |
1572 | mutex_enter(&vp->v_lock); | |
1573 | vp->v_count--; | |
1574 | ASSERT3U(vp->v_count, ==, 0); | |
1575 | mutex_exit(&vp->v_lock); | |
1576 | mutex_exit(&zp->z_lock); | |
1577 | zfs_znode_delete(zp, tx); | |
1578 | } else if (unlinked) { | |
1579 | zfs_unlinked_add(zp, tx); | |
1580 | } | |
1581 | ||
1582 | txtype = TX_REMOVE; | |
1583 | if (flags & FIGNORECASE) | |
1584 | txtype |= TX_CI; | |
1585 | zfs_log_remove(zilog, tx, txtype, dzp, name); | |
1586 | ||
1587 | dmu_tx_commit(tx); | |
1588 | out: | |
1589 | if (realnmp) | |
1590 | pn_free(realnmp); | |
1591 | ||
1592 | zfs_dirent_unlock(dl); | |
1593 | ||
1594 | if (!delete_now) { | |
1595 | VN_RELE(vp); | |
1596 | } else if (xzp) { | |
b128c09f | 1597 | /* this rele is delayed to prevent nesting transactions */ |
34dc7c2f BB |
1598 | VN_RELE(ZTOV(xzp)); |
1599 | } | |
1600 | ||
1601 | ZFS_EXIT(zfsvfs); | |
1602 | return (error); | |
1603 | } | |
1604 | ||
1605 | /* | |
1606 | * Create a new directory and insert it into dvp using the name | |
1607 | * provided. Return a pointer to the inserted directory. | |
1608 | * | |
1609 | * IN: dvp - vnode of directory to add subdir to. | |
1610 | * dirname - name of new directory. | |
1611 | * vap - attributes of new directory. | |
1612 | * cr - credentials of caller. | |
1613 | * ct - caller context | |
1614 | * vsecp - ACL to be set | |
1615 | * | |
1616 | * OUT: vpp - vnode of created directory. | |
1617 | * | |
1618 | * RETURN: 0 if success | |
1619 | * error code if failure | |
1620 | * | |
1621 | * Timestamps: | |
1622 | * dvp - ctime|mtime updated | |
1623 | * vp - ctime|mtime|atime updated | |
1624 | */ | |
1625 | /*ARGSUSED*/ | |
1626 | static int | |
1627 | zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, | |
1628 | caller_context_t *ct, int flags, vsecattr_t *vsecp) | |
1629 | { | |
1630 | znode_t *zp, *dzp = VTOZ(dvp); | |
1631 | zfsvfs_t *zfsvfs = dzp->z_zfsvfs; | |
1632 | zilog_t *zilog; | |
1633 | zfs_dirlock_t *dl; | |
1634 | uint64_t txtype; | |
1635 | dmu_tx_t *tx; | |
1636 | int error; | |
34dc7c2f | 1637 | int zf = ZNEW; |
b128c09f BB |
1638 | ksid_t *ksid; |
1639 | uid_t uid; | |
1640 | gid_t gid = crgetgid(cr); | |
9babb374 BB |
1641 | zfs_acl_ids_t acl_ids; |
1642 | boolean_t fuid_dirtied; | |
34dc7c2f BB |
1643 | |
1644 | ASSERT(vap->va_type == VDIR); | |
1645 | ||
1646 | /* | |
1647 | * If we have an ephemeral id, ACL, or XVATTR then | |
1648 | * make sure file system is at proper version | |
1649 | */ | |
1650 | ||
b128c09f BB |
1651 | ksid = crgetsid(cr, KSID_OWNER); |
1652 | if (ksid) | |
1653 | uid = ksid_getid(ksid); | |
1654 | else | |
1655 | uid = crgetuid(cr); | |
34dc7c2f | 1656 | if (zfsvfs->z_use_fuids == B_FALSE && |
b128c09f BB |
1657 | (vsecp || (vap->va_mask & AT_XVATTR) || |
1658 | IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) | |
34dc7c2f BB |
1659 | return (EINVAL); |
1660 | ||
1661 | ZFS_ENTER(zfsvfs); | |
1662 | ZFS_VERIFY_ZP(dzp); | |
1663 | zilog = zfsvfs->z_log; | |
1664 | ||
1665 | if (dzp->z_phys->zp_flags & ZFS_XATTR) { | |
1666 | ZFS_EXIT(zfsvfs); | |
1667 | return (EINVAL); | |
1668 | } | |
1669 | ||
1670 | if (zfsvfs->z_utf8 && u8_validate(dirname, | |
1671 | strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { | |
1672 | ZFS_EXIT(zfsvfs); | |
1673 | return (EILSEQ); | |
1674 | } | |
1675 | if (flags & FIGNORECASE) | |
1676 | zf |= ZCILOOK; | |
1677 | ||
1678 | if (vap->va_mask & AT_XVATTR) | |
1679 | if ((error = secpolicy_xvattr((xvattr_t *)vap, | |
1680 | crgetuid(cr), cr, vap->va_type)) != 0) { | |
1681 | ZFS_EXIT(zfsvfs); | |
1682 | return (error); | |
1683 | } | |
1684 | ||
1685 | /* | |
1686 | * First make sure the new directory doesn't exist. | |
1687 | */ | |
1688 | top: | |
1689 | *vpp = NULL; | |
1690 | ||
1691 | if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, | |
1692 | NULL, NULL)) { | |
1693 | ZFS_EXIT(zfsvfs); | |
1694 | return (error); | |
1695 | } | |
1696 | ||
1697 | if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { | |
1698 | zfs_dirent_unlock(dl); | |
1699 | ZFS_EXIT(zfsvfs); | |
1700 | return (error); | |
1701 | } | |
1702 | ||
9babb374 BB |
1703 | if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, |
1704 | &acl_ids)) != 0) { | |
1705 | zfs_dirent_unlock(dl); | |
1706 | ZFS_EXIT(zfsvfs); | |
1707 | return (error); | |
34dc7c2f | 1708 | } |
9babb374 | 1709 | if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { |
45d1cae3 | 1710 | zfs_acl_ids_free(&acl_ids); |
9babb374 BB |
1711 | zfs_dirent_unlock(dl); |
1712 | ZFS_EXIT(zfsvfs); | |
1713 | return (EDQUOT); | |
1714 | } | |
1715 | ||
34dc7c2f BB |
1716 | /* |
1717 | * Add a new entry to the directory. | |
1718 | */ | |
1719 | tx = dmu_tx_create(zfsvfs->z_os); | |
1720 | dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); | |
1721 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); | |
9babb374 BB |
1722 | fuid_dirtied = zfsvfs->z_fuid_dirty; |
1723 | if (fuid_dirtied) | |
1724 | zfs_fuid_txhold(zfsvfs, tx); | |
1725 | if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) | |
34dc7c2f BB |
1726 | dmu_tx_hold_write(tx, DMU_NEW_OBJECT, |
1727 | 0, SPA_MAXBLOCKSIZE); | |
fb5f0bc8 | 1728 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f | 1729 | if (error) { |
9babb374 | 1730 | zfs_acl_ids_free(&acl_ids); |
34dc7c2f | 1731 | zfs_dirent_unlock(dl); |
fb5f0bc8 | 1732 | if (error == ERESTART) { |
34dc7c2f BB |
1733 | dmu_tx_wait(tx); |
1734 | dmu_tx_abort(tx); | |
1735 | goto top; | |
1736 | } | |
1737 | dmu_tx_abort(tx); | |
1738 | ZFS_EXIT(zfsvfs); | |
34dc7c2f BB |
1739 | return (error); |
1740 | } | |
1741 | ||
1742 | /* | |
1743 | * Create new node. | |
1744 | */ | |
9babb374 | 1745 | zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); |
34dc7c2f | 1746 | |
9babb374 BB |
1747 | if (fuid_dirtied) |
1748 | zfs_fuid_sync(zfsvfs, tx); | |
34dc7c2f BB |
1749 | /* |
1750 | * Now put new name in parent dir. | |
1751 | */ | |
1752 | (void) zfs_link_create(dl, zp, tx, ZNEW); | |
1753 | ||
1754 | *vpp = ZTOV(zp); | |
1755 | ||
1756 | txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); | |
1757 | if (flags & FIGNORECASE) | |
1758 | txtype |= TX_CI; | |
9babb374 BB |
1759 | zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, |
1760 | acl_ids.z_fuidp, vap); | |
34dc7c2f | 1761 | |
9babb374 | 1762 | zfs_acl_ids_free(&acl_ids); |
34dc7c2f BB |
1763 | dmu_tx_commit(tx); |
1764 | ||
1765 | zfs_dirent_unlock(dl); | |
1766 | ||
1767 | ZFS_EXIT(zfsvfs); | |
1768 | return (0); | |
1769 | } | |
1770 | ||
1771 | /* | |
1772 | * Remove a directory subdir entry. If the current working | |
1773 | * directory is the same as the subdir to be removed, the | |
1774 | * remove will fail. | |
1775 | * | |
1776 | * IN: dvp - vnode of directory to remove from. | |
1777 | * name - name of directory to be removed. | |
1778 | * cwd - vnode of current working directory. | |
1779 | * cr - credentials of caller. | |
1780 | * ct - caller context | |
1781 | * flags - case flags | |
1782 | * | |
1783 | * RETURN: 0 if success | |
1784 | * error code if failure | |
1785 | * | |
1786 | * Timestamps: | |
1787 | * dvp - ctime|mtime updated | |
1788 | */ | |
1789 | /*ARGSUSED*/ | |
1790 | static int | |
1791 | zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, | |
1792 | caller_context_t *ct, int flags) | |
1793 | { | |
1794 | znode_t *dzp = VTOZ(dvp); | |
1795 | znode_t *zp; | |
1796 | vnode_t *vp; | |
1797 | zfsvfs_t *zfsvfs = dzp->z_zfsvfs; | |
1798 | zilog_t *zilog; | |
1799 | zfs_dirlock_t *dl; | |
1800 | dmu_tx_t *tx; | |
1801 | int error; | |
1802 | int zflg = ZEXISTS; | |
1803 | ||
1804 | ZFS_ENTER(zfsvfs); | |
1805 | ZFS_VERIFY_ZP(dzp); | |
1806 | zilog = zfsvfs->z_log; | |
1807 | ||
1808 | if (flags & FIGNORECASE) | |
1809 | zflg |= ZCILOOK; | |
1810 | top: | |
1811 | zp = NULL; | |
1812 | ||
1813 | /* | |
1814 | * Attempt to lock directory; fail if entry doesn't exist. | |
1815 | */ | |
1816 | if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, | |
1817 | NULL, NULL)) { | |
1818 | ZFS_EXIT(zfsvfs); | |
1819 | return (error); | |
1820 | } | |
1821 | ||
1822 | vp = ZTOV(zp); | |
1823 | ||
1824 | if (error = zfs_zaccess_delete(dzp, zp, cr)) { | |
1825 | goto out; | |
1826 | } | |
1827 | ||
1828 | if (vp->v_type != VDIR) { | |
1829 | error = ENOTDIR; | |
1830 | goto out; | |
1831 | } | |
1832 | ||
1833 | if (vp == cwd) { | |
1834 | error = EINVAL; | |
1835 | goto out; | |
1836 | } | |
1837 | ||
1838 | vnevent_rmdir(vp, dvp, name, ct); | |
1839 | ||
1840 | /* | |
1841 | * Grab a lock on the directory to make sure that noone is | |
1842 | * trying to add (or lookup) entries while we are removing it. | |
1843 | */ | |
1844 | rw_enter(&zp->z_name_lock, RW_WRITER); | |
1845 | ||
1846 | /* | |
1847 | * Grab a lock on the parent pointer to make sure we play well | |
1848 | * with the treewalk and directory rename code. | |
1849 | */ | |
1850 | rw_enter(&zp->z_parent_lock, RW_WRITER); | |
1851 | ||
1852 | tx = dmu_tx_create(zfsvfs->z_os); | |
1853 | dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); | |
1854 | dmu_tx_hold_bonus(tx, zp->z_id); | |
1855 | dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); | |
fb5f0bc8 | 1856 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f BB |
1857 | if (error) { |
1858 | rw_exit(&zp->z_parent_lock); | |
1859 | rw_exit(&zp->z_name_lock); | |
1860 | zfs_dirent_unlock(dl); | |
1861 | VN_RELE(vp); | |
fb5f0bc8 | 1862 | if (error == ERESTART) { |
34dc7c2f BB |
1863 | dmu_tx_wait(tx); |
1864 | dmu_tx_abort(tx); | |
1865 | goto top; | |
1866 | } | |
1867 | dmu_tx_abort(tx); | |
1868 | ZFS_EXIT(zfsvfs); | |
1869 | return (error); | |
1870 | } | |
1871 | ||
1872 | error = zfs_link_destroy(dl, zp, tx, zflg, NULL); | |
1873 | ||
1874 | if (error == 0) { | |
1875 | uint64_t txtype = TX_RMDIR; | |
1876 | if (flags & FIGNORECASE) | |
1877 | txtype |= TX_CI; | |
1878 | zfs_log_remove(zilog, tx, txtype, dzp, name); | |
1879 | } | |
1880 | ||
1881 | dmu_tx_commit(tx); | |
1882 | ||
1883 | rw_exit(&zp->z_parent_lock); | |
1884 | rw_exit(&zp->z_name_lock); | |
1885 | out: | |
1886 | zfs_dirent_unlock(dl); | |
1887 | ||
1888 | VN_RELE(vp); | |
1889 | ||
1890 | ZFS_EXIT(zfsvfs); | |
1891 | return (error); | |
1892 | } | |
1893 | ||
1894 | /* | |
1895 | * Read as many directory entries as will fit into the provided | |
1896 | * buffer from the given directory cursor position (specified in | |
1897 | * the uio structure. | |
1898 | * | |
1899 | * IN: vp - vnode of directory to read. | |
1900 | * uio - structure supplying read location, range info, | |
1901 | * and return buffer. | |
1902 | * cr - credentials of caller. | |
1903 | * ct - caller context | |
1904 | * flags - case flags | |
1905 | * | |
1906 | * OUT: uio - updated offset and range, buffer filled. | |
1907 | * eofp - set to true if end-of-file detected. | |
1908 | * | |
1909 | * RETURN: 0 if success | |
1910 | * error code if failure | |
1911 | * | |
1912 | * Timestamps: | |
1913 | * vp - atime updated | |
1914 | * | |
1915 | * Note that the low 4 bits of the cookie returned by zap is always zero. | |
1916 | * This allows us to use the low range for "special" directory entries: | |
1917 | * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, | |
1918 | * we use the offset 2 for the '.zfs' directory. | |
1919 | */ | |
1920 | /* ARGSUSED */ | |
1921 | static int | |
1922 | zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, | |
1923 | caller_context_t *ct, int flags) | |
1924 | { | |
1925 | znode_t *zp = VTOZ(vp); | |
1926 | iovec_t *iovp; | |
1927 | edirent_t *eodp; | |
1928 | dirent64_t *odp; | |
1929 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
1930 | objset_t *os; | |
1931 | caddr_t outbuf; | |
1932 | size_t bufsize; | |
1933 | zap_cursor_t zc; | |
1934 | zap_attribute_t zap; | |
1935 | uint_t bytes_wanted; | |
1936 | uint64_t offset; /* must be unsigned; checks for < 1 */ | |
1937 | int local_eof; | |
1938 | int outcount; | |
1939 | int error; | |
1940 | uint8_t prefetch; | |
1941 | boolean_t check_sysattrs; | |
1942 | ||
1943 | ZFS_ENTER(zfsvfs); | |
1944 | ZFS_VERIFY_ZP(zp); | |
1945 | ||
1946 | /* | |
1947 | * If we are not given an eof variable, | |
1948 | * use a local one. | |
1949 | */ | |
1950 | if (eofp == NULL) | |
1951 | eofp = &local_eof; | |
1952 | ||
1953 | /* | |
1954 | * Check for valid iov_len. | |
1955 | */ | |
1956 | if (uio->uio_iov->iov_len <= 0) { | |
1957 | ZFS_EXIT(zfsvfs); | |
1958 | return (EINVAL); | |
1959 | } | |
1960 | ||
1961 | /* | |
1962 | * Quit if directory has been removed (posix) | |
1963 | */ | |
1964 | if ((*eofp = zp->z_unlinked) != 0) { | |
1965 | ZFS_EXIT(zfsvfs); | |
1966 | return (0); | |
1967 | } | |
1968 | ||
1969 | error = 0; | |
1970 | os = zfsvfs->z_os; | |
1971 | offset = uio->uio_loffset; | |
1972 | prefetch = zp->z_zn_prefetch; | |
1973 | ||
1974 | /* | |
1975 | * Initialize the iterator cursor. | |
1976 | */ | |
1977 | if (offset <= 3) { | |
1978 | /* | |
1979 | * Start iteration from the beginning of the directory. | |
1980 | */ | |
1981 | zap_cursor_init(&zc, os, zp->z_id); | |
1982 | } else { | |
1983 | /* | |
1984 | * The offset is a serialized cursor. | |
1985 | */ | |
1986 | zap_cursor_init_serialized(&zc, os, zp->z_id, offset); | |
1987 | } | |
1988 | ||
1989 | /* | |
1990 | * Get space to change directory entries into fs independent format. | |
1991 | */ | |
1992 | iovp = uio->uio_iov; | |
1993 | bytes_wanted = iovp->iov_len; | |
1994 | if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { | |
1995 | bufsize = bytes_wanted; | |
1996 | outbuf = kmem_alloc(bufsize, KM_SLEEP); | |
1997 | odp = (struct dirent64 *)outbuf; | |
1998 | } else { | |
1999 | bufsize = bytes_wanted; | |
2000 | odp = (struct dirent64 *)iovp->iov_base; | |
2001 | } | |
2002 | eodp = (struct edirent *)odp; | |
2003 | ||
2004 | /* | |
b128c09f BB |
2005 | * If this VFS supports the system attribute view interface; and |
2006 | * we're looking at an extended attribute directory; and we care | |
2007 | * about normalization conflicts on this vfs; then we must check | |
2008 | * for normalization conflicts with the sysattr name space. | |
34dc7c2f | 2009 | */ |
b128c09f | 2010 | check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && |
34dc7c2f BB |
2011 | (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && |
2012 | (flags & V_RDDIR_ENTFLAGS); | |
2013 | ||
2014 | /* | |
2015 | * Transform to file-system independent format | |
2016 | */ | |
2017 | outcount = 0; | |
2018 | while (outcount < bytes_wanted) { | |
2019 | ino64_t objnum; | |
2020 | ushort_t reclen; | |
2021 | off64_t *next; | |
2022 | ||
2023 | /* | |
2024 | * Special case `.', `..', and `.zfs'. | |
2025 | */ | |
2026 | if (offset == 0) { | |
2027 | (void) strcpy(zap.za_name, "."); | |
2028 | zap.za_normalization_conflict = 0; | |
2029 | objnum = zp->z_id; | |
2030 | } else if (offset == 1) { | |
2031 | (void) strcpy(zap.za_name, ".."); | |
2032 | zap.za_normalization_conflict = 0; | |
2033 | objnum = zp->z_phys->zp_parent; | |
2034 | } else if (offset == 2 && zfs_show_ctldir(zp)) { | |
2035 | (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); | |
2036 | zap.za_normalization_conflict = 0; | |
2037 | objnum = ZFSCTL_INO_ROOT; | |
2038 | } else { | |
2039 | /* | |
2040 | * Grab next entry. | |
2041 | */ | |
2042 | if (error = zap_cursor_retrieve(&zc, &zap)) { | |
2043 | if ((*eofp = (error == ENOENT)) != 0) | |
2044 | break; | |
2045 | else | |
2046 | goto update; | |
2047 | } | |
2048 | ||
2049 | if (zap.za_integer_length != 8 || | |
2050 | zap.za_num_integers != 1) { | |
2051 | cmn_err(CE_WARN, "zap_readdir: bad directory " | |
2052 | "entry, obj = %lld, offset = %lld\n", | |
2053 | (u_longlong_t)zp->z_id, | |
2054 | (u_longlong_t)offset); | |
2055 | error = ENXIO; | |
2056 | goto update; | |
2057 | } | |
2058 | ||
2059 | objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); | |
2060 | /* | |
2061 | * MacOS X can extract the object type here such as: | |
2062 | * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); | |
2063 | */ | |
2064 | ||
2065 | if (check_sysattrs && !zap.za_normalization_conflict) { | |
2066 | zap.za_normalization_conflict = | |
2067 | xattr_sysattr_casechk(zap.za_name); | |
2068 | } | |
2069 | } | |
2070 | ||
9babb374 BB |
2071 | if (flags & V_RDDIR_ACCFILTER) { |
2072 | /* | |
2073 | * If we have no access at all, don't include | |
2074 | * this entry in the returned information | |
2075 | */ | |
2076 | znode_t *ezp; | |
2077 | if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) | |
2078 | goto skip_entry; | |
2079 | if (!zfs_has_access(ezp, cr)) { | |
2080 | VN_RELE(ZTOV(ezp)); | |
2081 | goto skip_entry; | |
2082 | } | |
2083 | VN_RELE(ZTOV(ezp)); | |
2084 | } | |
2085 | ||
34dc7c2f BB |
2086 | if (flags & V_RDDIR_ENTFLAGS) |
2087 | reclen = EDIRENT_RECLEN(strlen(zap.za_name)); | |
2088 | else | |
2089 | reclen = DIRENT64_RECLEN(strlen(zap.za_name)); | |
2090 | ||
2091 | /* | |
2092 | * Will this entry fit in the buffer? | |
2093 | */ | |
2094 | if (outcount + reclen > bufsize) { | |
2095 | /* | |
2096 | * Did we manage to fit anything in the buffer? | |
2097 | */ | |
2098 | if (!outcount) { | |
2099 | error = EINVAL; | |
2100 | goto update; | |
2101 | } | |
2102 | break; | |
2103 | } | |
2104 | if (flags & V_RDDIR_ENTFLAGS) { | |
2105 | /* | |
2106 | * Add extended flag entry: | |
2107 | */ | |
2108 | eodp->ed_ino = objnum; | |
2109 | eodp->ed_reclen = reclen; | |
2110 | /* NOTE: ed_off is the offset for the *next* entry */ | |
2111 | next = &(eodp->ed_off); | |
2112 | eodp->ed_eflags = zap.za_normalization_conflict ? | |
2113 | ED_CASE_CONFLICT : 0; | |
2114 | (void) strncpy(eodp->ed_name, zap.za_name, | |
2115 | EDIRENT_NAMELEN(reclen)); | |
2116 | eodp = (edirent_t *)((intptr_t)eodp + reclen); | |
2117 | } else { | |
2118 | /* | |
2119 | * Add normal entry: | |
2120 | */ | |
2121 | odp->d_ino = objnum; | |
2122 | odp->d_reclen = reclen; | |
2123 | /* NOTE: d_off is the offset for the *next* entry */ | |
2124 | next = &(odp->d_off); | |
2125 | (void) strncpy(odp->d_name, zap.za_name, | |
2126 | DIRENT64_NAMELEN(reclen)); | |
2127 | odp = (dirent64_t *)((intptr_t)odp + reclen); | |
2128 | } | |
2129 | outcount += reclen; | |
2130 | ||
2131 | ASSERT(outcount <= bufsize); | |
2132 | ||
2133 | /* Prefetch znode */ | |
2134 | if (prefetch) | |
2135 | dmu_prefetch(os, objnum, 0, 0); | |
2136 | ||
9babb374 | 2137 | skip_entry: |
34dc7c2f BB |
2138 | /* |
2139 | * Move to the next entry, fill in the previous offset. | |
2140 | */ | |
2141 | if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { | |
2142 | zap_cursor_advance(&zc); | |
2143 | offset = zap_cursor_serialize(&zc); | |
2144 | } else { | |
2145 | offset += 1; | |
2146 | } | |
2147 | *next = offset; | |
2148 | } | |
2149 | zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ | |
2150 | ||
2151 | if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { | |
2152 | iovp->iov_base += outcount; | |
2153 | iovp->iov_len -= outcount; | |
2154 | uio->uio_resid -= outcount; | |
2155 | } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { | |
2156 | /* | |
2157 | * Reset the pointer. | |
2158 | */ | |
2159 | offset = uio->uio_loffset; | |
2160 | } | |
2161 | ||
2162 | update: | |
2163 | zap_cursor_fini(&zc); | |
2164 | if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) | |
2165 | kmem_free(outbuf, bufsize); | |
2166 | ||
2167 | if (error == ENOENT) | |
2168 | error = 0; | |
2169 | ||
2170 | ZFS_ACCESSTIME_STAMP(zfsvfs, zp); | |
2171 | ||
2172 | uio->uio_loffset = offset; | |
2173 | ZFS_EXIT(zfsvfs); | |
2174 | return (error); | |
2175 | } | |
2176 | ||
2177 | ulong_t zfs_fsync_sync_cnt = 4; | |
2178 | ||
2179 | static int | |
2180 | zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) | |
2181 | { | |
2182 | znode_t *zp = VTOZ(vp); | |
2183 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
2184 | ||
2185 | /* | |
2186 | * Regardless of whether this is required for standards conformance, | |
2187 | * this is the logical behavior when fsync() is called on a file with | |
2188 | * dirty pages. We use B_ASYNC since the ZIL transactions are already | |
2189 | * going to be pushed out as part of the zil_commit(). | |
2190 | */ | |
2191 | if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && | |
2192 | (vp->v_type == VREG) && !(IS_SWAPVP(vp))) | |
2193 | (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); | |
2194 | ||
2195 | (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); | |
2196 | ||
2197 | ZFS_ENTER(zfsvfs); | |
2198 | ZFS_VERIFY_ZP(zp); | |
2199 | zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); | |
2200 | ZFS_EXIT(zfsvfs); | |
2201 | return (0); | |
2202 | } | |
2203 | ||
2204 | ||
2205 | /* | |
2206 | * Get the requested file attributes and place them in the provided | |
2207 | * vattr structure. | |
2208 | * | |
2209 | * IN: vp - vnode of file. | |
2210 | * vap - va_mask identifies requested attributes. | |
2211 | * If AT_XVATTR set, then optional attrs are requested | |
2212 | * flags - ATTR_NOACLCHECK (CIFS server context) | |
2213 | * cr - credentials of caller. | |
2214 | * ct - caller context | |
2215 | * | |
2216 | * OUT: vap - attribute values. | |
2217 | * | |
2218 | * RETURN: 0 (always succeeds) | |
2219 | */ | |
2220 | /* ARGSUSED */ | |
2221 | static int | |
2222 | zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, | |
2223 | caller_context_t *ct) | |
2224 | { | |
2225 | znode_t *zp = VTOZ(vp); | |
2226 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
2227 | znode_phys_t *pzp; | |
2228 | int error = 0; | |
2229 | uint64_t links; | |
2230 | xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ | |
2231 | xoptattr_t *xoap = NULL; | |
2232 | boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; | |
2233 | ||
2234 | ZFS_ENTER(zfsvfs); | |
2235 | ZFS_VERIFY_ZP(zp); | |
2236 | pzp = zp->z_phys; | |
2237 | ||
34dc7c2f BB |
2238 | /* |
2239 | * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. | |
2240 | * Also, if we are the owner don't bother, since owner should | |
2241 | * always be allowed to read basic attributes of file. | |
2242 | */ | |
2243 | if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) && | |
2244 | (pzp->zp_uid != crgetuid(cr))) { | |
2245 | if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, | |
2246 | skipaclchk, cr)) { | |
34dc7c2f BB |
2247 | ZFS_EXIT(zfsvfs); |
2248 | return (error); | |
2249 | } | |
2250 | } | |
2251 | ||
2252 | /* | |
2253 | * Return all attributes. It's cheaper to provide the answer | |
2254 | * than to determine whether we were asked the question. | |
2255 | */ | |
2256 | ||
9babb374 | 2257 | mutex_enter(&zp->z_lock); |
34dc7c2f BB |
2258 | vap->va_type = vp->v_type; |
2259 | vap->va_mode = pzp->zp_mode & MODEMASK; | |
2260 | zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); | |
2261 | vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; | |
2262 | vap->va_nodeid = zp->z_id; | |
2263 | if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) | |
2264 | links = pzp->zp_links + 1; | |
2265 | else | |
2266 | links = pzp->zp_links; | |
2267 | vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ | |
2268 | vap->va_size = pzp->zp_size; | |
2269 | vap->va_rdev = vp->v_rdev; | |
2270 | vap->va_seq = zp->z_seq; | |
2271 | ||
2272 | /* | |
2273 | * Add in any requested optional attributes and the create time. | |
2274 | * Also set the corresponding bits in the returned attribute bitmap. | |
2275 | */ | |
2276 | if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { | |
2277 | if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { | |
2278 | xoap->xoa_archive = | |
2279 | ((pzp->zp_flags & ZFS_ARCHIVE) != 0); | |
2280 | XVA_SET_RTN(xvap, XAT_ARCHIVE); | |
2281 | } | |
2282 | ||
2283 | if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { | |
2284 | xoap->xoa_readonly = | |
2285 | ((pzp->zp_flags & ZFS_READONLY) != 0); | |
2286 | XVA_SET_RTN(xvap, XAT_READONLY); | |
2287 | } | |
2288 | ||
2289 | if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { | |
2290 | xoap->xoa_system = | |
2291 | ((pzp->zp_flags & ZFS_SYSTEM) != 0); | |
2292 | XVA_SET_RTN(xvap, XAT_SYSTEM); | |
2293 | } | |
2294 | ||
2295 | if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { | |
2296 | xoap->xoa_hidden = | |
2297 | ((pzp->zp_flags & ZFS_HIDDEN) != 0); | |
2298 | XVA_SET_RTN(xvap, XAT_HIDDEN); | |
2299 | } | |
2300 | ||
2301 | if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { | |
2302 | xoap->xoa_nounlink = | |
2303 | ((pzp->zp_flags & ZFS_NOUNLINK) != 0); | |
2304 | XVA_SET_RTN(xvap, XAT_NOUNLINK); | |
2305 | } | |
2306 | ||
2307 | if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { | |
2308 | xoap->xoa_immutable = | |
2309 | ((pzp->zp_flags & ZFS_IMMUTABLE) != 0); | |
2310 | XVA_SET_RTN(xvap, XAT_IMMUTABLE); | |
2311 | } | |
2312 | ||
2313 | if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { | |
2314 | xoap->xoa_appendonly = | |
2315 | ((pzp->zp_flags & ZFS_APPENDONLY) != 0); | |
2316 | XVA_SET_RTN(xvap, XAT_APPENDONLY); | |
2317 | } | |
2318 | ||
2319 | if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { | |
2320 | xoap->xoa_nodump = | |
2321 | ((pzp->zp_flags & ZFS_NODUMP) != 0); | |
2322 | XVA_SET_RTN(xvap, XAT_NODUMP); | |
2323 | } | |
2324 | ||
2325 | if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { | |
2326 | xoap->xoa_opaque = | |
2327 | ((pzp->zp_flags & ZFS_OPAQUE) != 0); | |
2328 | XVA_SET_RTN(xvap, XAT_OPAQUE); | |
2329 | } | |
2330 | ||
2331 | if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { | |
2332 | xoap->xoa_av_quarantined = | |
2333 | ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0); | |
2334 | XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); | |
2335 | } | |
2336 | ||
2337 | if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { | |
2338 | xoap->xoa_av_modified = | |
2339 | ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0); | |
2340 | XVA_SET_RTN(xvap, XAT_AV_MODIFIED); | |
2341 | } | |
2342 | ||
2343 | if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && | |
2344 | vp->v_type == VREG && | |
2345 | (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) { | |
2346 | size_t len; | |
2347 | dmu_object_info_t doi; | |
2348 | ||
2349 | /* | |
2350 | * Only VREG files have anti-virus scanstamps, so we | |
2351 | * won't conflict with symlinks in the bonus buffer. | |
2352 | */ | |
2353 | dmu_object_info_from_db(zp->z_dbuf, &doi); | |
2354 | len = sizeof (xoap->xoa_av_scanstamp) + | |
2355 | sizeof (znode_phys_t); | |
2356 | if (len <= doi.doi_bonus_size) { | |
2357 | /* | |
2358 | * pzp points to the start of the | |
2359 | * znode_phys_t. pzp + 1 points to the | |
2360 | * first byte after the znode_phys_t. | |
2361 | */ | |
2362 | (void) memcpy(xoap->xoa_av_scanstamp, | |
2363 | pzp + 1, | |
2364 | sizeof (xoap->xoa_av_scanstamp)); | |
2365 | XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); | |
2366 | } | |
2367 | } | |
2368 | ||
2369 | if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { | |
2370 | ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime); | |
2371 | XVA_SET_RTN(xvap, XAT_CREATETIME); | |
2372 | } | |
2373 | } | |
2374 | ||
2375 | ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); | |
2376 | ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); | |
2377 | ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); | |
2378 | ||
2379 | mutex_exit(&zp->z_lock); | |
2380 | ||
2381 | dmu_object_size_from_db(zp->z_dbuf, &vap->va_blksize, &vap->va_nblocks); | |
2382 | ||
2383 | if (zp->z_blksz == 0) { | |
2384 | /* | |
2385 | * Block size hasn't been set; suggest maximal I/O transfers. | |
2386 | */ | |
2387 | vap->va_blksize = zfsvfs->z_max_blksz; | |
2388 | } | |
2389 | ||
2390 | ZFS_EXIT(zfsvfs); | |
2391 | return (0); | |
2392 | } | |
2393 | ||
2394 | /* | |
2395 | * Set the file attributes to the values contained in the | |
2396 | * vattr structure. | |
2397 | * | |
2398 | * IN: vp - vnode of file to be modified. | |
2399 | * vap - new attribute values. | |
2400 | * If AT_XVATTR set, then optional attrs are being set | |
2401 | * flags - ATTR_UTIME set if non-default time values provided. | |
2402 | * - ATTR_NOACLCHECK (CIFS context only). | |
2403 | * cr - credentials of caller. | |
2404 | * ct - caller context | |
2405 | * | |
2406 | * RETURN: 0 if success | |
2407 | * error code if failure | |
2408 | * | |
2409 | * Timestamps: | |
2410 | * vp - ctime updated, mtime updated if size changed. | |
2411 | */ | |
2412 | /* ARGSUSED */ | |
2413 | static int | |
2414 | zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, | |
2415 | caller_context_t *ct) | |
2416 | { | |
2417 | znode_t *zp = VTOZ(vp); | |
2418 | znode_phys_t *pzp; | |
2419 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
2420 | zilog_t *zilog; | |
2421 | dmu_tx_t *tx; | |
2422 | vattr_t oldva; | |
fb5f0bc8 | 2423 | xvattr_t tmpxvattr; |
34dc7c2f BB |
2424 | uint_t mask = vap->va_mask; |
2425 | uint_t saved_mask; | |
2426 | int trim_mask = 0; | |
2427 | uint64_t new_mode; | |
9babb374 | 2428 | uint64_t new_uid, new_gid; |
34dc7c2f BB |
2429 | znode_t *attrzp; |
2430 | int need_policy = FALSE; | |
2431 | int err; | |
2432 | zfs_fuid_info_t *fuidp = NULL; | |
2433 | xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ | |
2434 | xoptattr_t *xoap; | |
2435 | zfs_acl_t *aclp = NULL; | |
2436 | boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; | |
9babb374 | 2437 | boolean_t fuid_dirtied = B_FALSE; |
34dc7c2f BB |
2438 | |
2439 | if (mask == 0) | |
2440 | return (0); | |
2441 | ||
2442 | if (mask & AT_NOSET) | |
2443 | return (EINVAL); | |
2444 | ||
2445 | ZFS_ENTER(zfsvfs); | |
2446 | ZFS_VERIFY_ZP(zp); | |
2447 | ||
2448 | pzp = zp->z_phys; | |
2449 | zilog = zfsvfs->z_log; | |
2450 | ||
2451 | /* | |
2452 | * Make sure that if we have ephemeral uid/gid or xvattr specified | |
2453 | * that file system is at proper version level | |
2454 | */ | |
2455 | ||
2456 | if (zfsvfs->z_use_fuids == B_FALSE && | |
2457 | (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || | |
2458 | ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || | |
2459 | (mask & AT_XVATTR))) { | |
2460 | ZFS_EXIT(zfsvfs); | |
2461 | return (EINVAL); | |
2462 | } | |
2463 | ||
2464 | if (mask & AT_SIZE && vp->v_type == VDIR) { | |
2465 | ZFS_EXIT(zfsvfs); | |
2466 | return (EISDIR); | |
2467 | } | |
2468 | ||
2469 | if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { | |
2470 | ZFS_EXIT(zfsvfs); | |
2471 | return (EINVAL); | |
2472 | } | |
2473 | ||
2474 | /* | |
2475 | * If this is an xvattr_t, then get a pointer to the structure of | |
2476 | * optional attributes. If this is NULL, then we have a vattr_t. | |
2477 | */ | |
2478 | xoap = xva_getxoptattr(xvap); | |
2479 | ||
fb5f0bc8 BB |
2480 | xva_init(&tmpxvattr); |
2481 | ||
34dc7c2f BB |
2482 | /* |
2483 | * Immutable files can only alter immutable bit and atime | |
2484 | */ | |
2485 | if ((pzp->zp_flags & ZFS_IMMUTABLE) && | |
2486 | ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || | |
2487 | ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { | |
2488 | ZFS_EXIT(zfsvfs); | |
2489 | return (EPERM); | |
2490 | } | |
2491 | ||
2492 | if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) { | |
2493 | ZFS_EXIT(zfsvfs); | |
2494 | return (EPERM); | |
2495 | } | |
2496 | ||
2497 | /* | |
2498 | * Verify timestamps doesn't overflow 32 bits. | |
2499 | * ZFS can handle large timestamps, but 32bit syscalls can't | |
2500 | * handle times greater than 2039. This check should be removed | |
2501 | * once large timestamps are fully supported. | |
2502 | */ | |
2503 | if (mask & (AT_ATIME | AT_MTIME)) { | |
2504 | if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || | |
2505 | ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { | |
2506 | ZFS_EXIT(zfsvfs); | |
2507 | return (EOVERFLOW); | |
2508 | } | |
2509 | } | |
2510 | ||
2511 | top: | |
2512 | attrzp = NULL; | |
2513 | ||
45d1cae3 | 2514 | /* Can this be moved to before the top label? */ |
34dc7c2f BB |
2515 | if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { |
2516 | ZFS_EXIT(zfsvfs); | |
2517 | return (EROFS); | |
2518 | } | |
2519 | ||
2520 | /* | |
2521 | * First validate permissions | |
2522 | */ | |
2523 | ||
2524 | if (mask & AT_SIZE) { | |
2525 | err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); | |
2526 | if (err) { | |
2527 | ZFS_EXIT(zfsvfs); | |
2528 | return (err); | |
2529 | } | |
2530 | /* | |
2531 | * XXX - Note, we are not providing any open | |
2532 | * mode flags here (like FNDELAY), so we may | |
2533 | * block if there are locks present... this | |
2534 | * should be addressed in openat(). | |
2535 | */ | |
b128c09f BB |
2536 | /* XXX - would it be OK to generate a log record here? */ |
2537 | err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); | |
34dc7c2f BB |
2538 | if (err) { |
2539 | ZFS_EXIT(zfsvfs); | |
2540 | return (err); | |
2541 | } | |
2542 | } | |
2543 | ||
2544 | if (mask & (AT_ATIME|AT_MTIME) || | |
2545 | ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || | |
2546 | XVA_ISSET_REQ(xvap, XAT_READONLY) || | |
2547 | XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || | |
2548 | XVA_ISSET_REQ(xvap, XAT_CREATETIME) || | |
2549 | XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) | |
2550 | need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, | |
2551 | skipaclchk, cr); | |
2552 | ||
2553 | if (mask & (AT_UID|AT_GID)) { | |
2554 | int idmask = (mask & (AT_UID|AT_GID)); | |
2555 | int take_owner; | |
2556 | int take_group; | |
2557 | ||
2558 | /* | |
2559 | * NOTE: even if a new mode is being set, | |
2560 | * we may clear S_ISUID/S_ISGID bits. | |
2561 | */ | |
2562 | ||
2563 | if (!(mask & AT_MODE)) | |
2564 | vap->va_mode = pzp->zp_mode; | |
2565 | ||
2566 | /* | |
2567 | * Take ownership or chgrp to group we are a member of | |
2568 | */ | |
2569 | ||
2570 | take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); | |
2571 | take_group = (mask & AT_GID) && | |
2572 | zfs_groupmember(zfsvfs, vap->va_gid, cr); | |
2573 | ||
2574 | /* | |
2575 | * If both AT_UID and AT_GID are set then take_owner and | |
2576 | * take_group must both be set in order to allow taking | |
2577 | * ownership. | |
2578 | * | |
2579 | * Otherwise, send the check through secpolicy_vnode_setattr() | |
2580 | * | |
2581 | */ | |
2582 | ||
2583 | if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || | |
2584 | ((idmask == AT_UID) && take_owner) || | |
2585 | ((idmask == AT_GID) && take_group)) { | |
2586 | if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, | |
2587 | skipaclchk, cr) == 0) { | |
2588 | /* | |
2589 | * Remove setuid/setgid for non-privileged users | |
2590 | */ | |
2591 | secpolicy_setid_clear(vap, cr); | |
2592 | trim_mask = (mask & (AT_UID|AT_GID)); | |
2593 | } else { | |
2594 | need_policy = TRUE; | |
2595 | } | |
2596 | } else { | |
2597 | need_policy = TRUE; | |
2598 | } | |
2599 | } | |
2600 | ||
2601 | mutex_enter(&zp->z_lock); | |
2602 | oldva.va_mode = pzp->zp_mode; | |
2603 | zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); | |
2604 | if (mask & AT_XVATTR) { | |
fb5f0bc8 BB |
2605 | /* |
2606 | * Update xvattr mask to include only those attributes | |
2607 | * that are actually changing. | |
2608 | * | |
2609 | * the bits will be restored prior to actually setting | |
2610 | * the attributes so the caller thinks they were set. | |
2611 | */ | |
2612 | if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { | |
2613 | if (xoap->xoa_appendonly != | |
2614 | ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) { | |
2615 | need_policy = TRUE; | |
2616 | } else { | |
2617 | XVA_CLR_REQ(xvap, XAT_APPENDONLY); | |
2618 | XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); | |
2619 | } | |
2620 | } | |
2621 | ||
2622 | if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { | |
2623 | if (xoap->xoa_nounlink != | |
2624 | ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) { | |
2625 | need_policy = TRUE; | |
2626 | } else { | |
2627 | XVA_CLR_REQ(xvap, XAT_NOUNLINK); | |
2628 | XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); | |
2629 | } | |
2630 | } | |
2631 | ||
2632 | if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { | |
2633 | if (xoap->xoa_immutable != | |
2634 | ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) { | |
2635 | need_policy = TRUE; | |
2636 | } else { | |
2637 | XVA_CLR_REQ(xvap, XAT_IMMUTABLE); | |
2638 | XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); | |
2639 | } | |
2640 | } | |
2641 | ||
2642 | if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { | |
2643 | if (xoap->xoa_nodump != | |
2644 | ((pzp->zp_flags & ZFS_NODUMP) != 0)) { | |
2645 | need_policy = TRUE; | |
2646 | } else { | |
2647 | XVA_CLR_REQ(xvap, XAT_NODUMP); | |
2648 | XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); | |
2649 | } | |
2650 | } | |
2651 | ||
2652 | if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { | |
2653 | if (xoap->xoa_av_modified != | |
2654 | ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) { | |
2655 | need_policy = TRUE; | |
2656 | } else { | |
2657 | XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); | |
2658 | XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); | |
2659 | } | |
2660 | } | |
2661 | ||
2662 | if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { | |
2663 | if ((vp->v_type != VREG && | |
2664 | xoap->xoa_av_quarantined) || | |
2665 | xoap->xoa_av_quarantined != | |
2666 | ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) { | |
2667 | need_policy = TRUE; | |
2668 | } else { | |
2669 | XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); | |
2670 | XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); | |
2671 | } | |
2672 | } | |
2673 | ||
2674 | if (need_policy == FALSE && | |
2675 | (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || | |
2676 | XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { | |
34dc7c2f BB |
2677 | need_policy = TRUE; |
2678 | } | |
2679 | } | |
2680 | ||
2681 | mutex_exit(&zp->z_lock); | |
2682 | ||
2683 | if (mask & AT_MODE) { | |
2684 | if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { | |
2685 | err = secpolicy_setid_setsticky_clear(vp, vap, | |
2686 | &oldva, cr); | |
2687 | if (err) { | |
2688 | ZFS_EXIT(zfsvfs); | |
2689 | return (err); | |
2690 | } | |
2691 | trim_mask |= AT_MODE; | |
2692 | } else { | |
2693 | need_policy = TRUE; | |
2694 | } | |
2695 | } | |
2696 | ||
2697 | if (need_policy) { | |
2698 | /* | |
2699 | * If trim_mask is set then take ownership | |
2700 | * has been granted or write_acl is present and user | |
2701 | * has the ability to modify mode. In that case remove | |
2702 | * UID|GID and or MODE from mask so that | |
2703 | * secpolicy_vnode_setattr() doesn't revoke it. | |
2704 | */ | |
2705 | ||
2706 | if (trim_mask) { | |
2707 | saved_mask = vap->va_mask; | |
2708 | vap->va_mask &= ~trim_mask; | |
2709 | } | |
2710 | err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, | |
2711 | (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); | |
2712 | if (err) { | |
2713 | ZFS_EXIT(zfsvfs); | |
2714 | return (err); | |
2715 | } | |
2716 | ||
2717 | if (trim_mask) | |
2718 | vap->va_mask |= saved_mask; | |
2719 | } | |
2720 | ||
2721 | /* | |
2722 | * secpolicy_vnode_setattr, or take ownership may have | |
2723 | * changed va_mask | |
2724 | */ | |
2725 | mask = vap->va_mask; | |
2726 | ||
2727 | tx = dmu_tx_create(zfsvfs->z_os); | |
2728 | dmu_tx_hold_bonus(tx, zp->z_id); | |
34dc7c2f BB |
2729 | |
2730 | if (mask & AT_MODE) { | |
2731 | uint64_t pmode = pzp->zp_mode; | |
2732 | ||
2733 | new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); | |
2734 | ||
9babb374 BB |
2735 | if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) |
2736 | goto out; | |
34dc7c2f BB |
2737 | if (pzp->zp_acl.z_acl_extern_obj) { |
2738 | /* Are we upgrading ACL from old V0 format to new V1 */ | |
2739 | if (zfsvfs->z_version <= ZPL_VERSION_FUID && | |
2740 | pzp->zp_acl.z_acl_version == | |
2741 | ZFS_ACL_VERSION_INITIAL) { | |
2742 | dmu_tx_hold_free(tx, | |
2743 | pzp->zp_acl.z_acl_extern_obj, 0, | |
2744 | DMU_OBJECT_END); | |
2745 | dmu_tx_hold_write(tx, DMU_NEW_OBJECT, | |
2746 | 0, aclp->z_acl_bytes); | |
2747 | } else { | |
2748 | dmu_tx_hold_write(tx, | |
2749 | pzp->zp_acl.z_acl_extern_obj, 0, | |
2750 | aclp->z_acl_bytes); | |
2751 | } | |
2752 | } else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { | |
2753 | dmu_tx_hold_write(tx, DMU_NEW_OBJECT, | |
2754 | 0, aclp->z_acl_bytes); | |
2755 | } | |
2756 | } | |
2757 | ||
9babb374 BB |
2758 | if (mask & (AT_UID | AT_GID)) { |
2759 | if (pzp->zp_xattr) { | |
2760 | err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp); | |
2761 | if (err) | |
2762 | goto out; | |
2763 | dmu_tx_hold_bonus(tx, attrzp->z_id); | |
2764 | } | |
2765 | if (mask & AT_UID) { | |
2766 | new_uid = zfs_fuid_create(zfsvfs, | |
2767 | (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); | |
2768 | if (new_uid != pzp->zp_uid && | |
2769 | zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) { | |
2770 | err = EDQUOT; | |
2771 | goto out; | |
2772 | } | |
2773 | } | |
2774 | ||
2775 | if (mask & AT_GID) { | |
2776 | new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, | |
2777 | cr, ZFS_GROUP, &fuidp); | |
2778 | if (new_gid != pzp->zp_gid && | |
2779 | zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) { | |
2780 | err = EDQUOT; | |
2781 | goto out; | |
2782 | } | |
2783 | } | |
2784 | fuid_dirtied = zfsvfs->z_fuid_dirty; | |
2785 | if (fuid_dirtied) { | |
2786 | if (zfsvfs->z_fuid_obj == 0) { | |
2787 | dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); | |
2788 | dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, | |
2789 | FUID_SIZE_ESTIMATE(zfsvfs)); | |
2790 | dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, | |
2791 | FALSE, NULL); | |
2792 | } else { | |
2793 | dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); | |
2794 | dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, | |
2795 | FUID_SIZE_ESTIMATE(zfsvfs)); | |
2796 | } | |
34dc7c2f | 2797 | } |
34dc7c2f BB |
2798 | } |
2799 | ||
fb5f0bc8 | 2800 | err = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f | 2801 | if (err) { |
9babb374 | 2802 | if (err == ERESTART) |
34dc7c2f | 2803 | dmu_tx_wait(tx); |
9babb374 | 2804 | goto out; |
34dc7c2f BB |
2805 | } |
2806 | ||
2807 | dmu_buf_will_dirty(zp->z_dbuf, tx); | |
2808 | ||
2809 | /* | |
2810 | * Set each attribute requested. | |
2811 | * We group settings according to the locks they need to acquire. | |
2812 | * | |
2813 | * Note: you cannot set ctime directly, although it will be | |
2814 | * updated as a side-effect of calling this function. | |
2815 | */ | |
2816 | ||
2817 | mutex_enter(&zp->z_lock); | |
2818 | ||
2819 | if (mask & AT_MODE) { | |
2820 | mutex_enter(&zp->z_acl_lock); | |
2821 | zp->z_phys->zp_mode = new_mode; | |
9babb374 | 2822 | err = zfs_aclset_common(zp, aclp, cr, tx); |
34dc7c2f | 2823 | ASSERT3U(err, ==, 0); |
45d1cae3 BB |
2824 | zp->z_acl_cached = aclp; |
2825 | aclp = NULL; | |
34dc7c2f BB |
2826 | mutex_exit(&zp->z_acl_lock); |
2827 | } | |
2828 | ||
2829 | if (attrzp) | |
2830 | mutex_enter(&attrzp->z_lock); | |
2831 | ||
2832 | if (mask & AT_UID) { | |
9babb374 BB |
2833 | pzp->zp_uid = new_uid; |
2834 | if (attrzp) | |
2835 | attrzp->z_phys->zp_uid = new_uid; | |
34dc7c2f BB |
2836 | } |
2837 | ||
2838 | if (mask & AT_GID) { | |
9babb374 | 2839 | pzp->zp_gid = new_gid; |
34dc7c2f | 2840 | if (attrzp) |
9babb374 | 2841 | attrzp->z_phys->zp_gid = new_gid; |
34dc7c2f BB |
2842 | } |
2843 | ||
34dc7c2f BB |
2844 | if (attrzp) |
2845 | mutex_exit(&attrzp->z_lock); | |
2846 | ||
2847 | if (mask & AT_ATIME) | |
2848 | ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); | |
2849 | ||
2850 | if (mask & AT_MTIME) | |
2851 | ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); | |
2852 | ||
b128c09f | 2853 | /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ |
34dc7c2f BB |
2854 | if (mask & AT_SIZE) |
2855 | zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); | |
2856 | else if (mask != 0) | |
2857 | zfs_time_stamper_locked(zp, STATE_CHANGED, tx); | |
2858 | /* | |
2859 | * Do this after setting timestamps to prevent timestamp | |
2860 | * update from toggling bit | |
2861 | */ | |
2862 | ||
2863 | if (xoap && (mask & AT_XVATTR)) { | |
fb5f0bc8 BB |
2864 | |
2865 | /* | |
2866 | * restore trimmed off masks | |
2867 | * so that return masks can be set for caller. | |
2868 | */ | |
2869 | ||
2870 | if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { | |
2871 | XVA_SET_REQ(xvap, XAT_APPENDONLY); | |
2872 | } | |
2873 | if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { | |
2874 | XVA_SET_REQ(xvap, XAT_NOUNLINK); | |
2875 | } | |
2876 | if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { | |
2877 | XVA_SET_REQ(xvap, XAT_IMMUTABLE); | |
2878 | } | |
2879 | if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { | |
2880 | XVA_SET_REQ(xvap, XAT_NODUMP); | |
2881 | } | |
2882 | if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { | |
2883 | XVA_SET_REQ(xvap, XAT_AV_MODIFIED); | |
2884 | } | |
2885 | if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { | |
2886 | XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); | |
2887 | } | |
2888 | ||
34dc7c2f BB |
2889 | if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { |
2890 | size_t len; | |
2891 | dmu_object_info_t doi; | |
2892 | ||
2893 | ASSERT(vp->v_type == VREG); | |
2894 | ||
2895 | /* Grow the bonus buffer if necessary. */ | |
2896 | dmu_object_info_from_db(zp->z_dbuf, &doi); | |
2897 | len = sizeof (xoap->xoa_av_scanstamp) + | |
2898 | sizeof (znode_phys_t); | |
2899 | if (len > doi.doi_bonus_size) | |
2900 | VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0); | |
2901 | } | |
2902 | zfs_xvattr_set(zp, xvap); | |
2903 | } | |
2904 | ||
9babb374 BB |
2905 | if (fuid_dirtied) |
2906 | zfs_fuid_sync(zfsvfs, tx); | |
2907 | ||
34dc7c2f BB |
2908 | if (mask != 0) |
2909 | zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); | |
2910 | ||
34dc7c2f BB |
2911 | mutex_exit(&zp->z_lock); |
2912 | ||
9babb374 | 2913 | out: |
34dc7c2f BB |
2914 | if (attrzp) |
2915 | VN_RELE(ZTOV(attrzp)); | |
2916 | ||
45d1cae3 | 2917 | if (aclp) |
9babb374 | 2918 | zfs_acl_free(aclp); |
9babb374 BB |
2919 | |
2920 | if (fuidp) { | |
2921 | zfs_fuid_info_free(fuidp); | |
2922 | fuidp = NULL; | |
2923 | } | |
2924 | ||
2925 | if (err) | |
2926 | dmu_tx_abort(tx); | |
2927 | else | |
2928 | dmu_tx_commit(tx); | |
2929 | ||
2930 | if (err == ERESTART) | |
2931 | goto top; | |
34dc7c2f BB |
2932 | |
2933 | ZFS_EXIT(zfsvfs); | |
2934 | return (err); | |
2935 | } | |
2936 | ||
2937 | typedef struct zfs_zlock { | |
2938 | krwlock_t *zl_rwlock; /* lock we acquired */ | |
2939 | znode_t *zl_znode; /* znode we held */ | |
2940 | struct zfs_zlock *zl_next; /* next in list */ | |
2941 | } zfs_zlock_t; | |
2942 | ||
2943 | /* | |
2944 | * Drop locks and release vnodes that were held by zfs_rename_lock(). | |
2945 | */ | |
2946 | static void | |
2947 | zfs_rename_unlock(zfs_zlock_t **zlpp) | |
2948 | { | |
2949 | zfs_zlock_t *zl; | |
2950 | ||
2951 | while ((zl = *zlpp) != NULL) { | |
2952 | if (zl->zl_znode != NULL) | |
2953 | VN_RELE(ZTOV(zl->zl_znode)); | |
2954 | rw_exit(zl->zl_rwlock); | |
2955 | *zlpp = zl->zl_next; | |
2956 | kmem_free(zl, sizeof (*zl)); | |
2957 | } | |
2958 | } | |
2959 | ||
2960 | /* | |
2961 | * Search back through the directory tree, using the ".." entries. | |
2962 | * Lock each directory in the chain to prevent concurrent renames. | |
2963 | * Fail any attempt to move a directory into one of its own descendants. | |
2964 | * XXX - z_parent_lock can overlap with map or grow locks | |
2965 | */ | |
2966 | static int | |
2967 | zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) | |
2968 | { | |
2969 | zfs_zlock_t *zl; | |
2970 | znode_t *zp = tdzp; | |
2971 | uint64_t rootid = zp->z_zfsvfs->z_root; | |
2972 | uint64_t *oidp = &zp->z_id; | |
2973 | krwlock_t *rwlp = &szp->z_parent_lock; | |
2974 | krw_t rw = RW_WRITER; | |
2975 | ||
2976 | /* | |
2977 | * First pass write-locks szp and compares to zp->z_id. | |
2978 | * Later passes read-lock zp and compare to zp->z_parent. | |
2979 | */ | |
2980 | do { | |
2981 | if (!rw_tryenter(rwlp, rw)) { | |
2982 | /* | |
2983 | * Another thread is renaming in this path. | |
2984 | * Note that if we are a WRITER, we don't have any | |
2985 | * parent_locks held yet. | |
2986 | */ | |
2987 | if (rw == RW_READER && zp->z_id > szp->z_id) { | |
2988 | /* | |
2989 | * Drop our locks and restart | |
2990 | */ | |
2991 | zfs_rename_unlock(&zl); | |
2992 | *zlpp = NULL; | |
2993 | zp = tdzp; | |
2994 | oidp = &zp->z_id; | |
2995 | rwlp = &szp->z_parent_lock; | |
2996 | rw = RW_WRITER; | |
2997 | continue; | |
2998 | } else { | |
2999 | /* | |
3000 | * Wait for other thread to drop its locks | |
3001 | */ | |
3002 | rw_enter(rwlp, rw); | |
3003 | } | |
3004 | } | |
3005 | ||
3006 | zl = kmem_alloc(sizeof (*zl), KM_SLEEP); | |
3007 | zl->zl_rwlock = rwlp; | |
3008 | zl->zl_znode = NULL; | |
3009 | zl->zl_next = *zlpp; | |
3010 | *zlpp = zl; | |
3011 | ||
3012 | if (*oidp == szp->z_id) /* We're a descendant of szp */ | |
3013 | return (EINVAL); | |
3014 | ||
3015 | if (*oidp == rootid) /* We've hit the top */ | |
3016 | return (0); | |
3017 | ||
3018 | if (rw == RW_READER) { /* i.e. not the first pass */ | |
3019 | int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); | |
3020 | if (error) | |
3021 | return (error); | |
3022 | zl->zl_znode = zp; | |
3023 | } | |
3024 | oidp = &zp->z_phys->zp_parent; | |
3025 | rwlp = &zp->z_parent_lock; | |
3026 | rw = RW_READER; | |
3027 | ||
3028 | } while (zp->z_id != sdzp->z_id); | |
3029 | ||
3030 | return (0); | |
3031 | } | |
3032 | ||
3033 | /* | |
3034 | * Move an entry from the provided source directory to the target | |
3035 | * directory. Change the entry name as indicated. | |
3036 | * | |
3037 | * IN: sdvp - Source directory containing the "old entry". | |
3038 | * snm - Old entry name. | |
3039 | * tdvp - Target directory to contain the "new entry". | |
3040 | * tnm - New entry name. | |
3041 | * cr - credentials of caller. | |
3042 | * ct - caller context | |
3043 | * flags - case flags | |
3044 | * | |
3045 | * RETURN: 0 if success | |
3046 | * error code if failure | |
3047 | * | |
3048 | * Timestamps: | |
3049 | * sdvp,tdvp - ctime|mtime updated | |
3050 | */ | |
3051 | /*ARGSUSED*/ | |
3052 | static int | |
3053 | zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, | |
3054 | caller_context_t *ct, int flags) | |
3055 | { | |
3056 | znode_t *tdzp, *szp, *tzp; | |
3057 | znode_t *sdzp = VTOZ(sdvp); | |
3058 | zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; | |
3059 | zilog_t *zilog; | |
3060 | vnode_t *realvp; | |
3061 | zfs_dirlock_t *sdl, *tdl; | |
3062 | dmu_tx_t *tx; | |
3063 | zfs_zlock_t *zl; | |
3064 | int cmp, serr, terr; | |
3065 | int error = 0; | |
3066 | int zflg = 0; | |
3067 | ||
3068 | ZFS_ENTER(zfsvfs); | |
3069 | ZFS_VERIFY_ZP(sdzp); | |
3070 | zilog = zfsvfs->z_log; | |
3071 | ||
3072 | /* | |
3073 | * Make sure we have the real vp for the target directory. | |
3074 | */ | |
3075 | if (VOP_REALVP(tdvp, &realvp, ct) == 0) | |
3076 | tdvp = realvp; | |
3077 | ||
3078 | if (tdvp->v_vfsp != sdvp->v_vfsp) { | |
3079 | ZFS_EXIT(zfsvfs); | |
3080 | return (EXDEV); | |
3081 | } | |
3082 | ||
3083 | tdzp = VTOZ(tdvp); | |
3084 | ZFS_VERIFY_ZP(tdzp); | |
3085 | if (zfsvfs->z_utf8 && u8_validate(tnm, | |
3086 | strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { | |
3087 | ZFS_EXIT(zfsvfs); | |
3088 | return (EILSEQ); | |
3089 | } | |
3090 | ||
3091 | if (flags & FIGNORECASE) | |
3092 | zflg |= ZCILOOK; | |
3093 | ||
3094 | top: | |
3095 | szp = NULL; | |
3096 | tzp = NULL; | |
3097 | zl = NULL; | |
3098 | ||
3099 | /* | |
3100 | * This is to prevent the creation of links into attribute space | |
3101 | * by renaming a linked file into/outof an attribute directory. | |
3102 | * See the comment in zfs_link() for why this is considered bad. | |
3103 | */ | |
3104 | if ((tdzp->z_phys->zp_flags & ZFS_XATTR) != | |
3105 | (sdzp->z_phys->zp_flags & ZFS_XATTR)) { | |
3106 | ZFS_EXIT(zfsvfs); | |
3107 | return (EINVAL); | |
3108 | } | |
3109 | ||
3110 | /* | |
3111 | * Lock source and target directory entries. To prevent deadlock, | |
3112 | * a lock ordering must be defined. We lock the directory with | |
3113 | * the smallest object id first, or if it's a tie, the one with | |
3114 | * the lexically first name. | |
3115 | */ | |
3116 | if (sdzp->z_id < tdzp->z_id) { | |
3117 | cmp = -1; | |
3118 | } else if (sdzp->z_id > tdzp->z_id) { | |
3119 | cmp = 1; | |
3120 | } else { | |
3121 | /* | |
3122 | * First compare the two name arguments without | |
3123 | * considering any case folding. | |
3124 | */ | |
3125 | int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); | |
3126 | ||
3127 | cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); | |
3128 | ASSERT(error == 0 || !zfsvfs->z_utf8); | |
3129 | if (cmp == 0) { | |
3130 | /* | |
3131 | * POSIX: "If the old argument and the new argument | |
3132 | * both refer to links to the same existing file, | |
3133 | * the rename() function shall return successfully | |
3134 | * and perform no other action." | |
3135 | */ | |
3136 | ZFS_EXIT(zfsvfs); | |
3137 | return (0); | |
3138 | } | |
3139 | /* | |
3140 | * If the file system is case-folding, then we may | |
3141 | * have some more checking to do. A case-folding file | |
3142 | * system is either supporting mixed case sensitivity | |
3143 | * access or is completely case-insensitive. Note | |
3144 | * that the file system is always case preserving. | |
3145 | * | |
3146 | * In mixed sensitivity mode case sensitive behavior | |
3147 | * is the default. FIGNORECASE must be used to | |
3148 | * explicitly request case insensitive behavior. | |
3149 | * | |
3150 | * If the source and target names provided differ only | |
3151 | * by case (e.g., a request to rename 'tim' to 'Tim'), | |
3152 | * we will treat this as a special case in the | |
3153 | * case-insensitive mode: as long as the source name | |
3154 | * is an exact match, we will allow this to proceed as | |
3155 | * a name-change request. | |
3156 | */ | |
3157 | if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || | |
3158 | (zfsvfs->z_case == ZFS_CASE_MIXED && | |
3159 | flags & FIGNORECASE)) && | |
3160 | u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, | |
3161 | &error) == 0) { | |
3162 | /* | |
3163 | * case preserving rename request, require exact | |
3164 | * name matches | |
3165 | */ | |
3166 | zflg |= ZCIEXACT; | |
3167 | zflg &= ~ZCILOOK; | |
3168 | } | |
3169 | } | |
3170 | ||
3171 | if (cmp < 0) { | |
3172 | serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, | |
3173 | ZEXISTS | zflg, NULL, NULL); | |
3174 | terr = zfs_dirent_lock(&tdl, | |
3175 | tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); | |
3176 | } else { | |
3177 | terr = zfs_dirent_lock(&tdl, | |
3178 | tdzp, tnm, &tzp, zflg, NULL, NULL); | |
3179 | serr = zfs_dirent_lock(&sdl, | |
3180 | sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, | |
3181 | NULL, NULL); | |
3182 | } | |
3183 | ||
3184 | if (serr) { | |
3185 | /* | |
3186 | * Source entry invalid or not there. | |
3187 | */ | |
3188 | if (!terr) { | |
3189 | zfs_dirent_unlock(tdl); | |
3190 | if (tzp) | |
3191 | VN_RELE(ZTOV(tzp)); | |
3192 | } | |
3193 | if (strcmp(snm, "..") == 0) | |
3194 | serr = EINVAL; | |
3195 | ZFS_EXIT(zfsvfs); | |
3196 | return (serr); | |
3197 | } | |
3198 | if (terr) { | |
3199 | zfs_dirent_unlock(sdl); | |
3200 | VN_RELE(ZTOV(szp)); | |
3201 | if (strcmp(tnm, "..") == 0) | |
3202 | terr = EINVAL; | |
3203 | ZFS_EXIT(zfsvfs); | |
3204 | return (terr); | |
3205 | } | |
3206 | ||
3207 | /* | |
3208 | * Must have write access at the source to remove the old entry | |
3209 | * and write access at the target to create the new entry. | |
3210 | * Note that if target and source are the same, this can be | |
3211 | * done in a single check. | |
3212 | */ | |
3213 | ||
3214 | if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) | |
3215 | goto out; | |
3216 | ||
3217 | if (ZTOV(szp)->v_type == VDIR) { | |
3218 | /* | |
3219 | * Check to make sure rename is valid. | |
3220 | * Can't do a move like this: /usr/a/b to /usr/a/b/c/d | |
3221 | */ | |
3222 | if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) | |
3223 | goto out; | |
3224 | } | |
3225 | ||
3226 | /* | |
3227 | * Does target exist? | |
3228 | */ | |
3229 | if (tzp) { | |
3230 | /* | |
3231 | * Source and target must be the same type. | |
3232 | */ | |
3233 | if (ZTOV(szp)->v_type == VDIR) { | |
3234 | if (ZTOV(tzp)->v_type != VDIR) { | |
3235 | error = ENOTDIR; | |
3236 | goto out; | |
3237 | } | |
3238 | } else { | |
3239 | if (ZTOV(tzp)->v_type == VDIR) { | |
3240 | error = EISDIR; | |
3241 | goto out; | |
3242 | } | |
3243 | } | |
3244 | /* | |
3245 | * POSIX dictates that when the source and target | |
3246 | * entries refer to the same file object, rename | |
3247 | * must do nothing and exit without error. | |
3248 | */ | |
3249 | if (szp->z_id == tzp->z_id) { | |
3250 | error = 0; | |
3251 | goto out; | |
3252 | } | |
3253 | } | |
3254 | ||
3255 | vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); | |
3256 | if (tzp) | |
3257 | vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); | |
3258 | ||
3259 | /* | |
3260 | * notify the target directory if it is not the same | |
3261 | * as source directory. | |
3262 | */ | |
3263 | if (tdvp != sdvp) { | |
3264 | vnevent_rename_dest_dir(tdvp, ct); | |
3265 | } | |
3266 | ||
3267 | tx = dmu_tx_create(zfsvfs->z_os); | |
3268 | dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ | |
3269 | dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */ | |
3270 | dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); | |
3271 | dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); | |
3272 | if (sdzp != tdzp) | |
3273 | dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */ | |
3274 | if (tzp) | |
3275 | dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ | |
3276 | dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); | |
fb5f0bc8 | 3277 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f BB |
3278 | if (error) { |
3279 | if (zl != NULL) | |
3280 | zfs_rename_unlock(&zl); | |
3281 | zfs_dirent_unlock(sdl); | |
3282 | zfs_dirent_unlock(tdl); | |
3283 | VN_RELE(ZTOV(szp)); | |
3284 | if (tzp) | |
3285 | VN_RELE(ZTOV(tzp)); | |
fb5f0bc8 | 3286 | if (error == ERESTART) { |
34dc7c2f BB |
3287 | dmu_tx_wait(tx); |
3288 | dmu_tx_abort(tx); | |
3289 | goto top; | |
3290 | } | |
3291 | dmu_tx_abort(tx); | |
3292 | ZFS_EXIT(zfsvfs); | |
3293 | return (error); | |
3294 | } | |
3295 | ||
3296 | if (tzp) /* Attempt to remove the existing target */ | |
3297 | error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); | |
3298 | ||
3299 | if (error == 0) { | |
3300 | error = zfs_link_create(tdl, szp, tx, ZRENAMING); | |
3301 | if (error == 0) { | |
3302 | szp->z_phys->zp_flags |= ZFS_AV_MODIFIED; | |
3303 | ||
3304 | error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); | |
3305 | ASSERT(error == 0); | |
3306 | ||
3307 | zfs_log_rename(zilog, tx, | |
3308 | TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), | |
3309 | sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); | |
b128c09f BB |
3310 | |
3311 | /* Update path information for the target vnode */ | |
3312 | vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); | |
34dc7c2f BB |
3313 | } |
3314 | } | |
3315 | ||
3316 | dmu_tx_commit(tx); | |
3317 | out: | |
3318 | if (zl != NULL) | |
3319 | zfs_rename_unlock(&zl); | |
3320 | ||
3321 | zfs_dirent_unlock(sdl); | |
3322 | zfs_dirent_unlock(tdl); | |
3323 | ||
3324 | VN_RELE(ZTOV(szp)); | |
3325 | if (tzp) | |
3326 | VN_RELE(ZTOV(tzp)); | |
3327 | ||
3328 | ZFS_EXIT(zfsvfs); | |
3329 | return (error); | |
3330 | } | |
3331 | ||
3332 | /* | |
3333 | * Insert the indicated symbolic reference entry into the directory. | |
3334 | * | |
3335 | * IN: dvp - Directory to contain new symbolic link. | |
3336 | * link - Name for new symlink entry. | |
3337 | * vap - Attributes of new entry. | |
3338 | * target - Target path of new symlink. | |
3339 | * cr - credentials of caller. | |
3340 | * ct - caller context | |
3341 | * flags - case flags | |
3342 | * | |
3343 | * RETURN: 0 if success | |
3344 | * error code if failure | |
3345 | * | |
3346 | * Timestamps: | |
3347 | * dvp - ctime|mtime updated | |
3348 | */ | |
3349 | /*ARGSUSED*/ | |
3350 | static int | |
3351 | zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, | |
3352 | caller_context_t *ct, int flags) | |
3353 | { | |
3354 | znode_t *zp, *dzp = VTOZ(dvp); | |
3355 | zfs_dirlock_t *dl; | |
3356 | dmu_tx_t *tx; | |
3357 | zfsvfs_t *zfsvfs = dzp->z_zfsvfs; | |
3358 | zilog_t *zilog; | |
3359 | int len = strlen(link); | |
3360 | int error; | |
3361 | int zflg = ZNEW; | |
9babb374 BB |
3362 | zfs_acl_ids_t acl_ids; |
3363 | boolean_t fuid_dirtied; | |
34dc7c2f BB |
3364 | |
3365 | ASSERT(vap->va_type == VLNK); | |
3366 | ||
3367 | ZFS_ENTER(zfsvfs); | |
3368 | ZFS_VERIFY_ZP(dzp); | |
3369 | zilog = zfsvfs->z_log; | |
3370 | ||
3371 | if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), | |
3372 | NULL, U8_VALIDATE_ENTIRE, &error) < 0) { | |
3373 | ZFS_EXIT(zfsvfs); | |
3374 | return (EILSEQ); | |
3375 | } | |
3376 | if (flags & FIGNORECASE) | |
3377 | zflg |= ZCILOOK; | |
3378 | top: | |
3379 | if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { | |
3380 | ZFS_EXIT(zfsvfs); | |
3381 | return (error); | |
3382 | } | |
3383 | ||
3384 | if (len > MAXPATHLEN) { | |
3385 | ZFS_EXIT(zfsvfs); | |
3386 | return (ENAMETOOLONG); | |
3387 | } | |
3388 | ||
3389 | /* | |
3390 | * Attempt to lock directory; fail if entry already exists. | |
3391 | */ | |
3392 | error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); | |
3393 | if (error) { | |
3394 | ZFS_EXIT(zfsvfs); | |
3395 | return (error); | |
3396 | } | |
3397 | ||
9babb374 BB |
3398 | VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)); |
3399 | if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { | |
3400 | zfs_acl_ids_free(&acl_ids); | |
3401 | zfs_dirent_unlock(dl); | |
3402 | ZFS_EXIT(zfsvfs); | |
3403 | return (EDQUOT); | |
3404 | } | |
34dc7c2f | 3405 | tx = dmu_tx_create(zfsvfs->z_os); |
9babb374 | 3406 | fuid_dirtied = zfsvfs->z_fuid_dirty; |
34dc7c2f BB |
3407 | dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); |
3408 | dmu_tx_hold_bonus(tx, dzp->z_id); | |
3409 | dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); | |
9babb374 | 3410 | if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) |
34dc7c2f | 3411 | dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); |
9babb374 BB |
3412 | if (fuid_dirtied) |
3413 | zfs_fuid_txhold(zfsvfs, tx); | |
fb5f0bc8 | 3414 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f | 3415 | if (error) { |
9babb374 | 3416 | zfs_acl_ids_free(&acl_ids); |
34dc7c2f | 3417 | zfs_dirent_unlock(dl); |
fb5f0bc8 | 3418 | if (error == ERESTART) { |
34dc7c2f BB |
3419 | dmu_tx_wait(tx); |
3420 | dmu_tx_abort(tx); | |
3421 | goto top; | |
3422 | } | |
3423 | dmu_tx_abort(tx); | |
3424 | ZFS_EXIT(zfsvfs); | |
3425 | return (error); | |
3426 | } | |
3427 | ||
3428 | dmu_buf_will_dirty(dzp->z_dbuf, tx); | |
3429 | ||
3430 | /* | |
3431 | * Create a new object for the symlink. | |
3432 | * Put the link content into bonus buffer if it will fit; | |
3433 | * otherwise, store it just like any other file data. | |
3434 | */ | |
3435 | if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) { | |
9babb374 | 3436 | zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids); |
34dc7c2f BB |
3437 | if (len != 0) |
3438 | bcopy(link, zp->z_phys + 1, len); | |
3439 | } else { | |
3440 | dmu_buf_t *dbp; | |
3441 | ||
9babb374 BB |
3442 | zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); |
3443 | ||
3444 | if (fuid_dirtied) | |
3445 | zfs_fuid_sync(zfsvfs, tx); | |
34dc7c2f BB |
3446 | /* |
3447 | * Nothing can access the znode yet so no locking needed | |
3448 | * for growing the znode's blocksize. | |
3449 | */ | |
3450 | zfs_grow_blocksize(zp, len, tx); | |
3451 | ||
3452 | VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, | |
3453 | zp->z_id, 0, FTAG, &dbp)); | |
3454 | dmu_buf_will_dirty(dbp, tx); | |
3455 | ||
3456 | ASSERT3U(len, <=, dbp->db_size); | |
3457 | bcopy(link, dbp->db_data, len); | |
3458 | dmu_buf_rele(dbp, FTAG); | |
3459 | } | |
3460 | zp->z_phys->zp_size = len; | |
3461 | ||
3462 | /* | |
3463 | * Insert the new object into the directory. | |
3464 | */ | |
3465 | (void) zfs_link_create(dl, zp, tx, ZNEW); | |
34dc7c2f BB |
3466 | if (error == 0) { |
3467 | uint64_t txtype = TX_SYMLINK; | |
3468 | if (flags & FIGNORECASE) | |
3469 | txtype |= TX_CI; | |
3470 | zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); | |
3471 | } | |
9babb374 BB |
3472 | |
3473 | zfs_acl_ids_free(&acl_ids); | |
34dc7c2f BB |
3474 | |
3475 | dmu_tx_commit(tx); | |
3476 | ||
3477 | zfs_dirent_unlock(dl); | |
3478 | ||
3479 | VN_RELE(ZTOV(zp)); | |
3480 | ||
3481 | ZFS_EXIT(zfsvfs); | |
3482 | return (error); | |
3483 | } | |
3484 | ||
3485 | /* | |
3486 | * Return, in the buffer contained in the provided uio structure, | |
3487 | * the symbolic path referred to by vp. | |
3488 | * | |
3489 | * IN: vp - vnode of symbolic link. | |
3490 | * uoip - structure to contain the link path. | |
3491 | * cr - credentials of caller. | |
3492 | * ct - caller context | |
3493 | * | |
3494 | * OUT: uio - structure to contain the link path. | |
3495 | * | |
3496 | * RETURN: 0 if success | |
3497 | * error code if failure | |
3498 | * | |
3499 | * Timestamps: | |
3500 | * vp - atime updated | |
3501 | */ | |
3502 | /* ARGSUSED */ | |
3503 | static int | |
3504 | zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) | |
3505 | { | |
3506 | znode_t *zp = VTOZ(vp); | |
3507 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
3508 | size_t bufsz; | |
3509 | int error; | |
3510 | ||
3511 | ZFS_ENTER(zfsvfs); | |
3512 | ZFS_VERIFY_ZP(zp); | |
3513 | ||
3514 | bufsz = (size_t)zp->z_phys->zp_size; | |
3515 | if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { | |
3516 | error = uiomove(zp->z_phys + 1, | |
3517 | MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); | |
3518 | } else { | |
3519 | dmu_buf_t *dbp; | |
3520 | error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp); | |
3521 | if (error) { | |
3522 | ZFS_EXIT(zfsvfs); | |
3523 | return (error); | |
3524 | } | |
3525 | error = uiomove(dbp->db_data, | |
3526 | MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); | |
3527 | dmu_buf_rele(dbp, FTAG); | |
3528 | } | |
3529 | ||
3530 | ZFS_ACCESSTIME_STAMP(zfsvfs, zp); | |
3531 | ZFS_EXIT(zfsvfs); | |
3532 | return (error); | |
3533 | } | |
3534 | ||
3535 | /* | |
3536 | * Insert a new entry into directory tdvp referencing svp. | |
3537 | * | |
3538 | * IN: tdvp - Directory to contain new entry. | |
3539 | * svp - vnode of new entry. | |
3540 | * name - name of new entry. | |
3541 | * cr - credentials of caller. | |
3542 | * ct - caller context | |
3543 | * | |
3544 | * RETURN: 0 if success | |
3545 | * error code if failure | |
3546 | * | |
3547 | * Timestamps: | |
3548 | * tdvp - ctime|mtime updated | |
3549 | * svp - ctime updated | |
3550 | */ | |
3551 | /* ARGSUSED */ | |
3552 | static int | |
3553 | zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, | |
3554 | caller_context_t *ct, int flags) | |
3555 | { | |
3556 | znode_t *dzp = VTOZ(tdvp); | |
3557 | znode_t *tzp, *szp; | |
3558 | zfsvfs_t *zfsvfs = dzp->z_zfsvfs; | |
3559 | zilog_t *zilog; | |
3560 | zfs_dirlock_t *dl; | |
3561 | dmu_tx_t *tx; | |
3562 | vnode_t *realvp; | |
3563 | int error; | |
3564 | int zf = ZNEW; | |
3565 | uid_t owner; | |
3566 | ||
3567 | ASSERT(tdvp->v_type == VDIR); | |
3568 | ||
3569 | ZFS_ENTER(zfsvfs); | |
3570 | ZFS_VERIFY_ZP(dzp); | |
3571 | zilog = zfsvfs->z_log; | |
3572 | ||
3573 | if (VOP_REALVP(svp, &realvp, ct) == 0) | |
3574 | svp = realvp; | |
3575 | ||
3576 | if (svp->v_vfsp != tdvp->v_vfsp) { | |
3577 | ZFS_EXIT(zfsvfs); | |
3578 | return (EXDEV); | |
3579 | } | |
3580 | szp = VTOZ(svp); | |
3581 | ZFS_VERIFY_ZP(szp); | |
3582 | ||
3583 | if (zfsvfs->z_utf8 && u8_validate(name, | |
3584 | strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { | |
3585 | ZFS_EXIT(zfsvfs); | |
3586 | return (EILSEQ); | |
3587 | } | |
3588 | if (flags & FIGNORECASE) | |
3589 | zf |= ZCILOOK; | |
3590 | ||
3591 | top: | |
3592 | /* | |
3593 | * We do not support links between attributes and non-attributes | |
3594 | * because of the potential security risk of creating links | |
3595 | * into "normal" file space in order to circumvent restrictions | |
3596 | * imposed in attribute space. | |
3597 | */ | |
3598 | if ((szp->z_phys->zp_flags & ZFS_XATTR) != | |
3599 | (dzp->z_phys->zp_flags & ZFS_XATTR)) { | |
3600 | ZFS_EXIT(zfsvfs); | |
3601 | return (EINVAL); | |
3602 | } | |
3603 | ||
3604 | /* | |
3605 | * POSIX dictates that we return EPERM here. | |
3606 | * Better choices include ENOTSUP or EISDIR. | |
3607 | */ | |
3608 | if (svp->v_type == VDIR) { | |
3609 | ZFS_EXIT(zfsvfs); | |
3610 | return (EPERM); | |
3611 | } | |
3612 | ||
3613 | owner = zfs_fuid_map_id(zfsvfs, szp->z_phys->zp_uid, cr, ZFS_OWNER); | |
3614 | if (owner != crgetuid(cr) && | |
3615 | secpolicy_basic_link(cr) != 0) { | |
3616 | ZFS_EXIT(zfsvfs); | |
3617 | return (EPERM); | |
3618 | } | |
3619 | ||
3620 | if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { | |
3621 | ZFS_EXIT(zfsvfs); | |
3622 | return (error); | |
3623 | } | |
3624 | ||
3625 | /* | |
3626 | * Attempt to lock directory; fail if entry already exists. | |
3627 | */ | |
3628 | error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); | |
3629 | if (error) { | |
3630 | ZFS_EXIT(zfsvfs); | |
3631 | return (error); | |
3632 | } | |
3633 | ||
3634 | tx = dmu_tx_create(zfsvfs->z_os); | |
3635 | dmu_tx_hold_bonus(tx, szp->z_id); | |
3636 | dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); | |
fb5f0bc8 | 3637 | error = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f BB |
3638 | if (error) { |
3639 | zfs_dirent_unlock(dl); | |
fb5f0bc8 | 3640 | if (error == ERESTART) { |
34dc7c2f BB |
3641 | dmu_tx_wait(tx); |
3642 | dmu_tx_abort(tx); | |
3643 | goto top; | |
3644 | } | |
3645 | dmu_tx_abort(tx); | |
3646 | ZFS_EXIT(zfsvfs); | |
3647 | return (error); | |
3648 | } | |
3649 | ||
3650 | error = zfs_link_create(dl, szp, tx, 0); | |
3651 | ||
3652 | if (error == 0) { | |
3653 | uint64_t txtype = TX_LINK; | |
3654 | if (flags & FIGNORECASE) | |
3655 | txtype |= TX_CI; | |
3656 | zfs_log_link(zilog, tx, txtype, dzp, szp, name); | |
3657 | } | |
3658 | ||
3659 | dmu_tx_commit(tx); | |
3660 | ||
3661 | zfs_dirent_unlock(dl); | |
3662 | ||
3663 | if (error == 0) { | |
3664 | vnevent_link(svp, ct); | |
3665 | } | |
3666 | ||
3667 | ZFS_EXIT(zfsvfs); | |
3668 | return (error); | |
3669 | } | |
3670 | ||
3671 | /* | |
3672 | * zfs_null_putapage() is used when the file system has been force | |
3673 | * unmounted. It just drops the pages. | |
3674 | */ | |
3675 | /* ARGSUSED */ | |
3676 | static int | |
3677 | zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, | |
3678 | size_t *lenp, int flags, cred_t *cr) | |
3679 | { | |
3680 | pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); | |
3681 | return (0); | |
3682 | } | |
3683 | ||
3684 | /* | |
3685 | * Push a page out to disk, klustering if possible. | |
3686 | * | |
3687 | * IN: vp - file to push page to. | |
3688 | * pp - page to push. | |
3689 | * flags - additional flags. | |
3690 | * cr - credentials of caller. | |
3691 | * | |
3692 | * OUT: offp - start of range pushed. | |
3693 | * lenp - len of range pushed. | |
3694 | * | |
3695 | * RETURN: 0 if success | |
3696 | * error code if failure | |
3697 | * | |
3698 | * NOTE: callers must have locked the page to be pushed. On | |
3699 | * exit, the page (and all other pages in the kluster) must be | |
3700 | * unlocked. | |
3701 | */ | |
3702 | /* ARGSUSED */ | |
3703 | static int | |
3704 | zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, | |
3705 | size_t *lenp, int flags, cred_t *cr) | |
3706 | { | |
3707 | znode_t *zp = VTOZ(vp); | |
3708 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
34dc7c2f | 3709 | dmu_tx_t *tx; |
34dc7c2f BB |
3710 | u_offset_t off, koff; |
3711 | size_t len, klen; | |
3712 | uint64_t filesz; | |
3713 | int err; | |
3714 | ||
3715 | filesz = zp->z_phys->zp_size; | |
3716 | off = pp->p_offset; | |
3717 | len = PAGESIZE; | |
3718 | /* | |
3719 | * If our blocksize is bigger than the page size, try to kluster | |
fb5f0bc8 | 3720 | * multiple pages so that we write a full block (thus avoiding |
34dc7c2f BB |
3721 | * a read-modify-write). |
3722 | */ | |
3723 | if (off < filesz && zp->z_blksz > PAGESIZE) { | |
d164b209 BB |
3724 | klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); |
3725 | koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; | |
34dc7c2f BB |
3726 | ASSERT(koff <= filesz); |
3727 | if (koff + klen > filesz) | |
3728 | klen = P2ROUNDUP(filesz - koff, (uint64_t)PAGESIZE); | |
3729 | pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); | |
3730 | } | |
3731 | ASSERT3U(btop(len), ==, btopr(len)); | |
d164b209 | 3732 | |
34dc7c2f BB |
3733 | /* |
3734 | * Can't push pages past end-of-file. | |
3735 | */ | |
34dc7c2f BB |
3736 | if (off >= filesz) { |
3737 | /* ignore all pages */ | |
3738 | err = 0; | |
3739 | goto out; | |
3740 | } else if (off + len > filesz) { | |
3741 | int npages = btopr(filesz - off); | |
3742 | page_t *trunc; | |
3743 | ||
3744 | page_list_break(&pp, &trunc, npages); | |
3745 | /* ignore pages past end of file */ | |
3746 | if (trunc) | |
3747 | pvn_write_done(trunc, flags); | |
3748 | len = filesz - off; | |
3749 | } | |
9babb374 BB |
3750 | |
3751 | if (zfs_usergroup_overquota(zfsvfs, B_FALSE, zp->z_phys->zp_uid) || | |
3752 | zfs_usergroup_overquota(zfsvfs, B_TRUE, zp->z_phys->zp_gid)) { | |
3753 | err = EDQUOT; | |
3754 | goto out; | |
3755 | } | |
d164b209 | 3756 | top: |
34dc7c2f BB |
3757 | tx = dmu_tx_create(zfsvfs->z_os); |
3758 | dmu_tx_hold_write(tx, zp->z_id, off, len); | |
3759 | dmu_tx_hold_bonus(tx, zp->z_id); | |
fb5f0bc8 | 3760 | err = dmu_tx_assign(tx, TXG_NOWAIT); |
34dc7c2f | 3761 | if (err != 0) { |
fb5f0bc8 | 3762 | if (err == ERESTART) { |
34dc7c2f BB |
3763 | dmu_tx_wait(tx); |
3764 | dmu_tx_abort(tx); | |
34dc7c2f BB |
3765 | goto top; |
3766 | } | |
3767 | dmu_tx_abort(tx); | |
3768 | goto out; | |
3769 | } | |
3770 | ||
3771 | if (zp->z_blksz <= PAGESIZE) { | |
b128c09f | 3772 | caddr_t va = zfs_map_page(pp, S_READ); |
34dc7c2f BB |
3773 | ASSERT3U(len, <=, PAGESIZE); |
3774 | dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); | |
b128c09f | 3775 | zfs_unmap_page(pp, va); |
34dc7c2f BB |
3776 | } else { |
3777 | err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); | |
3778 | } | |
3779 | ||
3780 | if (err == 0) { | |
3781 | zfs_time_stamper(zp, CONTENT_MODIFIED, tx); | |
d164b209 | 3782 | zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); |
34dc7c2f | 3783 | } |
45d1cae3 | 3784 | dmu_tx_commit(tx); |
34dc7c2f BB |
3785 | |
3786 | out: | |
34dc7c2f BB |
3787 | pvn_write_done(pp, (err ? B_ERROR : 0) | flags); |
3788 | if (offp) | |
3789 | *offp = off; | |
3790 | if (lenp) | |
3791 | *lenp = len; | |
3792 | ||
3793 | return (err); | |
3794 | } | |
3795 | ||
3796 | /* | |
3797 | * Copy the portion of the file indicated from pages into the file. | |
3798 | * The pages are stored in a page list attached to the files vnode. | |
3799 | * | |
3800 | * IN: vp - vnode of file to push page data to. | |
3801 | * off - position in file to put data. | |
3802 | * len - amount of data to write. | |
3803 | * flags - flags to control the operation. | |
3804 | * cr - credentials of caller. | |
3805 | * ct - caller context. | |
3806 | * | |
3807 | * RETURN: 0 if success | |
3808 | * error code if failure | |
3809 | * | |
3810 | * Timestamps: | |
3811 | * vp - ctime|mtime updated | |
3812 | */ | |
3813 | /*ARGSUSED*/ | |
3814 | static int | |
3815 | zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, | |
3816 | caller_context_t *ct) | |
3817 | { | |
3818 | znode_t *zp = VTOZ(vp); | |
3819 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
3820 | page_t *pp; | |
3821 | size_t io_len; | |
3822 | u_offset_t io_off; | |
d164b209 BB |
3823 | uint_t blksz; |
3824 | rl_t *rl; | |
34dc7c2f BB |
3825 | int error = 0; |
3826 | ||
3827 | ZFS_ENTER(zfsvfs); | |
3828 | ZFS_VERIFY_ZP(zp); | |
3829 | ||
d164b209 BB |
3830 | /* |
3831 | * Align this request to the file block size in case we kluster. | |
3832 | * XXX - this can result in pretty aggresive locking, which can | |
3833 | * impact simultanious read/write access. One option might be | |
3834 | * to break up long requests (len == 0) into block-by-block | |
3835 | * operations to get narrower locking. | |
3836 | */ | |
3837 | blksz = zp->z_blksz; | |
3838 | if (ISP2(blksz)) | |
3839 | io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); | |
3840 | else | |
3841 | io_off = 0; | |
3842 | if (len > 0 && ISP2(blksz)) | |
9babb374 | 3843 | io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); |
d164b209 BB |
3844 | else |
3845 | io_len = 0; | |
3846 | ||
3847 | if (io_len == 0) { | |
34dc7c2f | 3848 | /* |
d164b209 | 3849 | * Search the entire vp list for pages >= io_off. |
34dc7c2f | 3850 | */ |
d164b209 BB |
3851 | rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); |
3852 | error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); | |
34dc7c2f BB |
3853 | goto out; |
3854 | } | |
d164b209 | 3855 | rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); |
34dc7c2f | 3856 | |
d164b209 | 3857 | if (off > zp->z_phys->zp_size) { |
34dc7c2f | 3858 | /* past end of file */ |
d164b209 | 3859 | zfs_range_unlock(rl); |
34dc7c2f BB |
3860 | ZFS_EXIT(zfsvfs); |
3861 | return (0); | |
3862 | } | |
3863 | ||
d164b209 | 3864 | len = MIN(io_len, P2ROUNDUP(zp->z_phys->zp_size, PAGESIZE) - io_off); |
34dc7c2f | 3865 | |
d164b209 | 3866 | for (off = io_off; io_off < off + len; io_off += io_len) { |
34dc7c2f BB |
3867 | if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { |
3868 | pp = page_lookup(vp, io_off, | |
3869 | (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); | |
3870 | } else { | |
3871 | pp = page_lookup_nowait(vp, io_off, | |
3872 | (flags & B_FREE) ? SE_EXCL : SE_SHARED); | |
3873 | } | |
3874 | ||
3875 | if (pp != NULL && pvn_getdirty(pp, flags)) { | |
3876 | int err; | |
3877 | ||
3878 | /* | |
3879 | * Found a dirty page to push | |
3880 | */ | |
3881 | err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); | |
3882 | if (err) | |
3883 | error = err; | |
3884 | } else { | |
3885 | io_len = PAGESIZE; | |
3886 | } | |
3887 | } | |
3888 | out: | |
d164b209 | 3889 | zfs_range_unlock(rl); |
34dc7c2f BB |
3890 | if ((flags & B_ASYNC) == 0) |
3891 | zil_commit(zfsvfs->z_log, UINT64_MAX, zp->z_id); | |
3892 | ZFS_EXIT(zfsvfs); | |
3893 | return (error); | |
3894 | } | |
3895 | ||
3896 | /*ARGSUSED*/ | |
3897 | void | |
3898 | zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) | |
3899 | { | |
3900 | znode_t *zp = VTOZ(vp); | |
3901 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
3902 | int error; | |
3903 | ||
3904 | rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); | |
3905 | if (zp->z_dbuf == NULL) { | |
3906 | /* | |
3907 | * The fs has been unmounted, or we did a | |
3908 | * suspend/resume and this file no longer exists. | |
3909 | */ | |
3910 | if (vn_has_cached_data(vp)) { | |
3911 | (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, | |
3912 | B_INVAL, cr); | |
3913 | } | |
3914 | ||
3915 | mutex_enter(&zp->z_lock); | |
3916 | vp->v_count = 0; /* count arrives as 1 */ | |
3917 | mutex_exit(&zp->z_lock); | |
3918 | rw_exit(&zfsvfs->z_teardown_inactive_lock); | |
3919 | zfs_znode_free(zp); | |
3920 | return; | |
3921 | } | |
3922 | ||
3923 | /* | |
3924 | * Attempt to push any data in the page cache. If this fails | |
3925 | * we will get kicked out later in zfs_zinactive(). | |
3926 | */ | |
3927 | if (vn_has_cached_data(vp)) { | |
3928 | (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, | |
3929 | cr); | |
3930 | } | |
3931 | ||
3932 | if (zp->z_atime_dirty && zp->z_unlinked == 0) { | |
3933 | dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); | |
3934 | ||
3935 | dmu_tx_hold_bonus(tx, zp->z_id); | |
3936 | error = dmu_tx_assign(tx, TXG_WAIT); | |
3937 | if (error) { | |
3938 | dmu_tx_abort(tx); | |
3939 | } else { | |
3940 | dmu_buf_will_dirty(zp->z_dbuf, tx); | |
3941 | mutex_enter(&zp->z_lock); | |
3942 | zp->z_atime_dirty = 0; | |
3943 | mutex_exit(&zp->z_lock); | |
3944 | dmu_tx_commit(tx); | |
3945 | } | |
3946 | } | |
3947 | ||
3948 | zfs_zinactive(zp); | |
3949 | rw_exit(&zfsvfs->z_teardown_inactive_lock); | |
3950 | } | |
3951 | ||
3952 | /* | |
3953 | * Bounds-check the seek operation. | |
3954 | * | |
3955 | * IN: vp - vnode seeking within | |
3956 | * ooff - old file offset | |
3957 | * noffp - pointer to new file offset | |
3958 | * ct - caller context | |
3959 | * | |
3960 | * RETURN: 0 if success | |
3961 | * EINVAL if new offset invalid | |
3962 | */ | |
3963 | /* ARGSUSED */ | |
3964 | static int | |
3965 | zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, | |
3966 | caller_context_t *ct) | |
3967 | { | |
3968 | if (vp->v_type == VDIR) | |
3969 | return (0); | |
3970 | return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); | |
3971 | } | |
3972 | ||
3973 | /* | |
3974 | * Pre-filter the generic locking function to trap attempts to place | |
3975 | * a mandatory lock on a memory mapped file. | |
3976 | */ | |
3977 | static int | |
3978 | zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, | |
3979 | flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) | |
3980 | { | |
3981 | znode_t *zp = VTOZ(vp); | |
3982 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
3983 | int error; | |
3984 | ||
3985 | ZFS_ENTER(zfsvfs); | |
3986 | ZFS_VERIFY_ZP(zp); | |
3987 | ||
3988 | /* | |
3989 | * We are following the UFS semantics with respect to mapcnt | |
3990 | * here: If we see that the file is mapped already, then we will | |
3991 | * return an error, but we don't worry about races between this | |
3992 | * function and zfs_map(). | |
3993 | */ | |
3994 | if (zp->z_mapcnt > 0 && MANDMODE((mode_t)zp->z_phys->zp_mode)) { | |
3995 | ZFS_EXIT(zfsvfs); | |
3996 | return (EAGAIN); | |
3997 | } | |
3998 | error = fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct); | |
3999 | ZFS_EXIT(zfsvfs); | |
4000 | return (error); | |
4001 | } | |
4002 | ||
4003 | /* | |
4004 | * If we can't find a page in the cache, we will create a new page | |
4005 | * and fill it with file data. For efficiency, we may try to fill | |
d164b209 | 4006 | * multiple pages at once (klustering) to fill up the supplied page |
9babb374 BB |
4007 | * list. Note that the pages to be filled are held with an exclusive |
4008 | * lock to prevent access by other threads while they are being filled. | |
34dc7c2f BB |
4009 | */ |
4010 | static int | |
4011 | zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, | |
4012 | caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) | |
4013 | { | |
4014 | znode_t *zp = VTOZ(vp); | |
4015 | page_t *pp, *cur_pp; | |
4016 | objset_t *os = zp->z_zfsvfs->z_os; | |
34dc7c2f | 4017 | u_offset_t io_off, total; |
34dc7c2f | 4018 | size_t io_len; |
34dc7c2f BB |
4019 | int err; |
4020 | ||
34dc7c2f | 4021 | if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { |
d164b209 BB |
4022 | /* |
4023 | * We only have a single page, don't bother klustering | |
4024 | */ | |
34dc7c2f BB |
4025 | io_off = off; |
4026 | io_len = PAGESIZE; | |
9babb374 BB |
4027 | pp = page_create_va(vp, io_off, io_len, |
4028 | PG_EXCL | PG_WAIT, seg, addr); | |
34dc7c2f BB |
4029 | } else { |
4030 | /* | |
d164b209 | 4031 | * Try to find enough pages to fill the page list |
34dc7c2f | 4032 | */ |
34dc7c2f | 4033 | pp = pvn_read_kluster(vp, off, seg, addr, &io_off, |
d164b209 | 4034 | &io_len, off, plsz, 0); |
34dc7c2f BB |
4035 | } |
4036 | if (pp == NULL) { | |
4037 | /* | |
d164b209 | 4038 | * The page already exists, nothing to do here. |
34dc7c2f BB |
4039 | */ |
4040 | *pl = NULL; | |
4041 | return (0); | |
4042 | } | |
4043 | ||
4044 | /* | |
4045 | * Fill the pages in the kluster. | |
4046 | */ | |
4047 | cur_pp = pp; | |
4048 | for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { | |
d164b209 BB |
4049 | caddr_t va; |
4050 | ||
34dc7c2f | 4051 | ASSERT3U(io_off, ==, cur_pp->p_offset); |
b128c09f | 4052 | va = zfs_map_page(cur_pp, S_WRITE); |
9babb374 BB |
4053 | err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, |
4054 | DMU_READ_PREFETCH); | |
b128c09f | 4055 | zfs_unmap_page(cur_pp, va); |
34dc7c2f BB |
4056 | if (err) { |
4057 | /* On error, toss the entire kluster */ | |
4058 | pvn_read_done(pp, B_ERROR); | |
b128c09f BB |
4059 | /* convert checksum errors into IO errors */ |
4060 | if (err == ECKSUM) | |
4061 | err = EIO; | |
34dc7c2f BB |
4062 | return (err); |
4063 | } | |
4064 | cur_pp = cur_pp->p_next; | |
4065 | } | |
d164b209 | 4066 | |
34dc7c2f | 4067 | /* |
d164b209 BB |
4068 | * Fill in the page list array from the kluster starting |
4069 | * from the desired offset `off'. | |
34dc7c2f BB |
4070 | * NOTE: the page list will always be null terminated. |
4071 | */ | |
4072 | pvn_plist_init(pp, pl, plsz, off, io_len, rw); | |
d164b209 | 4073 | ASSERT(pl == NULL || (*pl)->p_offset == off); |
34dc7c2f BB |
4074 | |
4075 | return (0); | |
4076 | } | |
4077 | ||
4078 | /* | |
4079 | * Return pointers to the pages for the file region [off, off + len] | |
4080 | * in the pl array. If plsz is greater than len, this function may | |
d164b209 BB |
4081 | * also return page pointers from after the specified region |
4082 | * (i.e. the region [off, off + plsz]). These additional pages are | |
4083 | * only returned if they are already in the cache, or were created as | |
4084 | * part of a klustered read. | |
34dc7c2f BB |
4085 | * |
4086 | * IN: vp - vnode of file to get data from. | |
4087 | * off - position in file to get data from. | |
4088 | * len - amount of data to retrieve. | |
4089 | * plsz - length of provided page list. | |
4090 | * seg - segment to obtain pages for. | |
4091 | * addr - virtual address of fault. | |
4092 | * rw - mode of created pages. | |
4093 | * cr - credentials of caller. | |
4094 | * ct - caller context. | |
4095 | * | |
4096 | * OUT: protp - protection mode of created pages. | |
4097 | * pl - list of pages created. | |
4098 | * | |
4099 | * RETURN: 0 if success | |
4100 | * error code if failure | |
4101 | * | |
4102 | * Timestamps: | |
4103 | * vp - atime updated | |
4104 | */ | |
4105 | /* ARGSUSED */ | |
4106 | static int | |
4107 | zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, | |
4108 | page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, | |
4109 | enum seg_rw rw, cred_t *cr, caller_context_t *ct) | |
4110 | { | |
4111 | znode_t *zp = VTOZ(vp); | |
4112 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
d164b209 BB |
4113 | page_t **pl0 = pl; |
4114 | int err = 0; | |
4115 | ||
4116 | /* we do our own caching, faultahead is unnecessary */ | |
4117 | if (pl == NULL) | |
4118 | return (0); | |
4119 | else if (len > plsz) | |
4120 | len = plsz; | |
4121 | else | |
4122 | len = P2ROUNDUP(len, PAGESIZE); | |
4123 | ASSERT(plsz >= len); | |
34dc7c2f BB |
4124 | |
4125 | ZFS_ENTER(zfsvfs); | |
4126 | ZFS_VERIFY_ZP(zp); | |
4127 | ||
4128 | if (protp) | |
4129 | *protp = PROT_ALL; | |
4130 | ||
34dc7c2f | 4131 | /* |
9babb374 | 4132 | * Loop through the requested range [off, off + len) looking |
34dc7c2f BB |
4133 | * for pages. If we don't find a page, we will need to create |
4134 | * a new page and fill it with data from the file. | |
4135 | */ | |
4136 | while (len > 0) { | |
d164b209 BB |
4137 | if (*pl = page_lookup(vp, off, SE_SHARED)) |
4138 | *(pl+1) = NULL; | |
4139 | else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) | |
4140 | goto out; | |
4141 | while (*pl) { | |
4142 | ASSERT3U((*pl)->p_offset, ==, off); | |
34dc7c2f BB |
4143 | off += PAGESIZE; |
4144 | addr += PAGESIZE; | |
d164b209 BB |
4145 | if (len > 0) { |
4146 | ASSERT3U(len, >=, PAGESIZE); | |
4147 | len -= PAGESIZE; | |
34dc7c2f | 4148 | } |
d164b209 BB |
4149 | ASSERT3U(plsz, >=, PAGESIZE); |
4150 | plsz -= PAGESIZE; | |
4151 | pl++; | |
34dc7c2f BB |
4152 | } |
4153 | } | |
4154 | ||
4155 | /* | |
4156 | * Fill out the page array with any pages already in the cache. | |
4157 | */ | |
d164b209 BB |
4158 | while (plsz > 0 && |
4159 | (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { | |
4160 | off += PAGESIZE; | |
4161 | plsz -= PAGESIZE; | |
34dc7c2f | 4162 | } |
34dc7c2f | 4163 | out: |
34dc7c2f BB |
4164 | if (err) { |
4165 | /* | |
4166 | * Release any pages we have previously locked. | |
4167 | */ | |
4168 | while (pl > pl0) | |
4169 | page_unlock(*--pl); | |
d164b209 BB |
4170 | } else { |
4171 | ZFS_ACCESSTIME_STAMP(zfsvfs, zp); | |
34dc7c2f BB |
4172 | } |
4173 | ||
4174 | *pl = NULL; | |
4175 | ||
34dc7c2f BB |
4176 | ZFS_EXIT(zfsvfs); |
4177 | return (err); | |
4178 | } | |
4179 | ||
4180 | /* | |
4181 | * Request a memory map for a section of a file. This code interacts | |
4182 | * with common code and the VM system as follows: | |
4183 | * | |
4184 | * common code calls mmap(), which ends up in smmap_common() | |
4185 | * | |
4186 | * this calls VOP_MAP(), which takes you into (say) zfs | |
4187 | * | |
4188 | * zfs_map() calls as_map(), passing segvn_create() as the callback | |
4189 | * | |
4190 | * segvn_create() creates the new segment and calls VOP_ADDMAP() | |
4191 | * | |
4192 | * zfs_addmap() updates z_mapcnt | |
4193 | */ | |
4194 | /*ARGSUSED*/ | |
4195 | static int | |
4196 | zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, | |
4197 | size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, | |
4198 | caller_context_t *ct) | |
4199 | { | |
4200 | znode_t *zp = VTOZ(vp); | |
4201 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
4202 | segvn_crargs_t vn_a; | |
4203 | int error; | |
4204 | ||
4205 | ZFS_ENTER(zfsvfs); | |
4206 | ZFS_VERIFY_ZP(zp); | |
4207 | ||
4208 | if ((prot & PROT_WRITE) && | |
4209 | (zp->z_phys->zp_flags & (ZFS_IMMUTABLE | ZFS_READONLY | | |
4210 | ZFS_APPENDONLY))) { | |
4211 | ZFS_EXIT(zfsvfs); | |
4212 | return (EPERM); | |
4213 | } | |
4214 | ||
4215 | if ((prot & (PROT_READ | PROT_EXEC)) && | |
4216 | (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED)) { | |
4217 | ZFS_EXIT(zfsvfs); | |
4218 | return (EACCES); | |
4219 | } | |
4220 | ||
4221 | if (vp->v_flag & VNOMAP) { | |
4222 | ZFS_EXIT(zfsvfs); | |
4223 | return (ENOSYS); | |
4224 | } | |
4225 | ||
4226 | if (off < 0 || len > MAXOFFSET_T - off) { | |
4227 | ZFS_EXIT(zfsvfs); | |
4228 | return (ENXIO); | |
4229 | } | |
4230 | ||
4231 | if (vp->v_type != VREG) { | |
4232 | ZFS_EXIT(zfsvfs); | |
4233 | return (ENODEV); | |
4234 | } | |
4235 | ||
4236 | /* | |
4237 | * If file is locked, disallow mapping. | |
4238 | */ | |
4239 | if (MANDMODE((mode_t)zp->z_phys->zp_mode) && vn_has_flocks(vp)) { | |
4240 | ZFS_EXIT(zfsvfs); | |
4241 | return (EAGAIN); | |
4242 | } | |
4243 | ||
4244 | as_rangelock(as); | |
4245 | error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); | |
4246 | if (error != 0) { | |
4247 | as_rangeunlock(as); | |
4248 | ZFS_EXIT(zfsvfs); | |
4249 | return (error); | |
4250 | } | |
4251 | ||
4252 | vn_a.vp = vp; | |
4253 | vn_a.offset = (u_offset_t)off; | |
4254 | vn_a.type = flags & MAP_TYPE; | |
4255 | vn_a.prot = prot; | |
4256 | vn_a.maxprot = maxprot; | |
4257 | vn_a.cred = cr; | |
4258 | vn_a.amp = NULL; | |
4259 | vn_a.flags = flags & ~MAP_TYPE; | |
4260 | vn_a.szc = 0; | |
4261 | vn_a.lgrp_mem_policy_flags = 0; | |
4262 | ||
4263 | error = as_map(as, *addrp, len, segvn_create, &vn_a); | |
4264 | ||
4265 | as_rangeunlock(as); | |
4266 | ZFS_EXIT(zfsvfs); | |
4267 | return (error); | |
4268 | } | |
4269 | ||
4270 | /* ARGSUSED */ | |
4271 | static int | |
4272 | zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, | |
4273 | size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, | |
4274 | caller_context_t *ct) | |
4275 | { | |
4276 | uint64_t pages = btopr(len); | |
4277 | ||
4278 | atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); | |
4279 | return (0); | |
4280 | } | |
4281 | ||
4282 | /* | |
4283 | * The reason we push dirty pages as part of zfs_delmap() is so that we get a | |
4284 | * more accurate mtime for the associated file. Since we don't have a way of | |
4285 | * detecting when the data was actually modified, we have to resort to | |
4286 | * heuristics. If an explicit msync() is done, then we mark the mtime when the | |
4287 | * last page is pushed. The problem occurs when the msync() call is omitted, | |
4288 | * which by far the most common case: | |
4289 | * | |
4290 | * open() | |
4291 | * mmap() | |
4292 | * <modify memory> | |
4293 | * munmap() | |
4294 | * close() | |
4295 | * <time lapse> | |
4296 | * putpage() via fsflush | |
4297 | * | |
4298 | * If we wait until fsflush to come along, we can have a modification time that | |
4299 | * is some arbitrary point in the future. In order to prevent this in the | |
4300 | * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is | |
4301 | * torn down. | |
4302 | */ | |
4303 | /* ARGSUSED */ | |
4304 | static int | |
4305 | zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, | |
4306 | size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, | |
4307 | caller_context_t *ct) | |
4308 | { | |
4309 | uint64_t pages = btopr(len); | |
4310 | ||
4311 | ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); | |
4312 | atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); | |
4313 | ||
4314 | if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && | |
4315 | vn_has_cached_data(vp)) | |
4316 | (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); | |
4317 | ||
4318 | return (0); | |
4319 | } | |
4320 | ||
4321 | /* | |
4322 | * Free or allocate space in a file. Currently, this function only | |
4323 | * supports the `F_FREESP' command. However, this command is somewhat | |
4324 | * misnamed, as its functionality includes the ability to allocate as | |
4325 | * well as free space. | |
4326 | * | |
4327 | * IN: vp - vnode of file to free data in. | |
4328 | * cmd - action to take (only F_FREESP supported). | |
4329 | * bfp - section of file to free/alloc. | |
4330 | * flag - current file open mode flags. | |
4331 | * offset - current file offset. | |
4332 | * cr - credentials of caller [UNUSED]. | |
4333 | * ct - caller context. | |
4334 | * | |
4335 | * RETURN: 0 if success | |
4336 | * error code if failure | |
4337 | * | |
4338 | * Timestamps: | |
4339 | * vp - ctime|mtime updated | |
4340 | */ | |
4341 | /* ARGSUSED */ | |
4342 | static int | |
4343 | zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, | |
4344 | offset_t offset, cred_t *cr, caller_context_t *ct) | |
4345 | { | |
4346 | znode_t *zp = VTOZ(vp); | |
4347 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
4348 | uint64_t off, len; | |
4349 | int error; | |
4350 | ||
4351 | ZFS_ENTER(zfsvfs); | |
4352 | ZFS_VERIFY_ZP(zp); | |
4353 | ||
34dc7c2f BB |
4354 | if (cmd != F_FREESP) { |
4355 | ZFS_EXIT(zfsvfs); | |
4356 | return (EINVAL); | |
4357 | } | |
4358 | ||
4359 | if (error = convoff(vp, bfp, 0, offset)) { | |
4360 | ZFS_EXIT(zfsvfs); | |
4361 | return (error); | |
4362 | } | |
4363 | ||
4364 | if (bfp->l_len < 0) { | |
4365 | ZFS_EXIT(zfsvfs); | |
4366 | return (EINVAL); | |
4367 | } | |
4368 | ||
4369 | off = bfp->l_start; | |
4370 | len = bfp->l_len; /* 0 means from off to end of file */ | |
4371 | ||
b128c09f | 4372 | error = zfs_freesp(zp, off, len, flag, TRUE); |
34dc7c2f BB |
4373 | |
4374 | ZFS_EXIT(zfsvfs); | |
4375 | return (error); | |
4376 | } | |
4377 | ||
4378 | /*ARGSUSED*/ | |
4379 | static int | |
4380 | zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) | |
4381 | { | |
4382 | znode_t *zp = VTOZ(vp); | |
4383 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
4384 | uint32_t gen; | |
4385 | uint64_t object = zp->z_id; | |
4386 | zfid_short_t *zfid; | |
4387 | int size, i; | |
4388 | ||
4389 | ZFS_ENTER(zfsvfs); | |
4390 | ZFS_VERIFY_ZP(zp); | |
4391 | gen = (uint32_t)zp->z_gen; | |
4392 | ||
4393 | size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; | |
4394 | if (fidp->fid_len < size) { | |
4395 | fidp->fid_len = size; | |
4396 | ZFS_EXIT(zfsvfs); | |
4397 | return (ENOSPC); | |
4398 | } | |
4399 | ||
4400 | zfid = (zfid_short_t *)fidp; | |
4401 | ||
4402 | zfid->zf_len = size; | |
4403 | ||
4404 | for (i = 0; i < sizeof (zfid->zf_object); i++) | |
4405 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
4406 | ||
4407 | /* Must have a non-zero generation number to distinguish from .zfs */ | |
4408 | if (gen == 0) | |
4409 | gen = 1; | |
4410 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
4411 | zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); | |
4412 | ||
4413 | if (size == LONG_FID_LEN) { | |
4414 | uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); | |
4415 | zfid_long_t *zlfid; | |
4416 | ||
4417 | zlfid = (zfid_long_t *)fidp; | |
4418 | ||
4419 | for (i = 0; i < sizeof (zlfid->zf_setid); i++) | |
4420 | zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); | |
4421 | ||
4422 | /* XXX - this should be the generation number for the objset */ | |
4423 | for (i = 0; i < sizeof (zlfid->zf_setgen); i++) | |
4424 | zlfid->zf_setgen[i] = 0; | |
4425 | } | |
4426 | ||
4427 | ZFS_EXIT(zfsvfs); | |
4428 | return (0); | |
4429 | } | |
4430 | ||
4431 | static int | |
4432 | zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, | |
4433 | caller_context_t *ct) | |
4434 | { | |
4435 | znode_t *zp, *xzp; | |
4436 | zfsvfs_t *zfsvfs; | |
4437 | zfs_dirlock_t *dl; | |
4438 | int error; | |
4439 | ||
4440 | switch (cmd) { | |
4441 | case _PC_LINK_MAX: | |
4442 | *valp = ULONG_MAX; | |
4443 | return (0); | |
4444 | ||
4445 | case _PC_FILESIZEBITS: | |
4446 | *valp = 64; | |
4447 | return (0); | |
4448 | ||
4449 | case _PC_XATTR_EXISTS: | |
4450 | zp = VTOZ(vp); | |
4451 | zfsvfs = zp->z_zfsvfs; | |
4452 | ZFS_ENTER(zfsvfs); | |
4453 | ZFS_VERIFY_ZP(zp); | |
4454 | *valp = 0; | |
4455 | error = zfs_dirent_lock(&dl, zp, "", &xzp, | |
4456 | ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); | |
4457 | if (error == 0) { | |
4458 | zfs_dirent_unlock(dl); | |
4459 | if (!zfs_dirempty(xzp)) | |
4460 | *valp = 1; | |
4461 | VN_RELE(ZTOV(xzp)); | |
4462 | } else if (error == ENOENT) { | |
4463 | /* | |
4464 | * If there aren't extended attributes, it's the | |
4465 | * same as having zero of them. | |
4466 | */ | |
4467 | error = 0; | |
4468 | } | |
4469 | ZFS_EXIT(zfsvfs); | |
4470 | return (error); | |
4471 | ||
4472 | case _PC_SATTR_ENABLED: | |
4473 | case _PC_SATTR_EXISTS: | |
b128c09f | 4474 | *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && |
34dc7c2f BB |
4475 | (vp->v_type == VREG || vp->v_type == VDIR); |
4476 | return (0); | |
4477 | ||
9babb374 BB |
4478 | case _PC_ACCESS_FILTERING: |
4479 | *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && | |
4480 | vp->v_type == VDIR; | |
4481 | return (0); | |
4482 | ||
34dc7c2f BB |
4483 | case _PC_ACL_ENABLED: |
4484 | *valp = _ACL_ACE_ENABLED; | |
4485 | return (0); | |
4486 | ||
4487 | case _PC_MIN_HOLE_SIZE: | |
4488 | *valp = (ulong_t)SPA_MINBLOCKSIZE; | |
4489 | return (0); | |
4490 | ||
4491 | default: | |
4492 | return (fs_pathconf(vp, cmd, valp, cr, ct)); | |
4493 | } | |
4494 | } | |
4495 | ||
4496 | /*ARGSUSED*/ | |
4497 | static int | |
4498 | zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, | |
4499 | caller_context_t *ct) | |
4500 | { | |
4501 | znode_t *zp = VTOZ(vp); | |
4502 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
4503 | int error; | |
4504 | boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; | |
4505 | ||
4506 | ZFS_ENTER(zfsvfs); | |
4507 | ZFS_VERIFY_ZP(zp); | |
4508 | error = zfs_getacl(zp, vsecp, skipaclchk, cr); | |
4509 | ZFS_EXIT(zfsvfs); | |
4510 | ||
4511 | return (error); | |
4512 | } | |
4513 | ||
4514 | /*ARGSUSED*/ | |
4515 | static int | |
4516 | zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, | |
4517 | caller_context_t *ct) | |
4518 | { | |
4519 | znode_t *zp = VTOZ(vp); | |
4520 | zfsvfs_t *zfsvfs = zp->z_zfsvfs; | |
4521 | int error; | |
4522 | boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; | |
4523 | ||
4524 | ZFS_ENTER(zfsvfs); | |
4525 | ZFS_VERIFY_ZP(zp); | |
4526 | error = zfs_setacl(zp, vsecp, skipaclchk, cr); | |
4527 | ZFS_EXIT(zfsvfs); | |
4528 | return (error); | |
4529 | } | |
4530 | ||
4531 | /* | |
4532 | * Predeclare these here so that the compiler assumes that | |
4533 | * this is an "old style" function declaration that does | |
4534 | * not include arguments => we won't get type mismatch errors | |
4535 | * in the initializations that follow. | |
4536 | */ | |
4537 | static int zfs_inval(); | |
4538 | static int zfs_isdir(); | |
4539 | ||
4540 | static int | |
4541 | zfs_inval() | |
4542 | { | |
4543 | return (EINVAL); | |
4544 | } | |
4545 | ||
4546 | static int | |
4547 | zfs_isdir() | |
4548 | { | |
4549 | return (EISDIR); | |
4550 | } | |
4551 | /* | |
4552 | * Directory vnode operations template | |
4553 | */ | |
4554 | vnodeops_t *zfs_dvnodeops; | |
4555 | const fs_operation_def_t zfs_dvnodeops_template[] = { | |
4556 | VOPNAME_OPEN, { .vop_open = zfs_open }, | |
4557 | VOPNAME_CLOSE, { .vop_close = zfs_close }, | |
4558 | VOPNAME_READ, { .error = zfs_isdir }, | |
4559 | VOPNAME_WRITE, { .error = zfs_isdir }, | |
4560 | VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, | |
4561 | VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, | |
4562 | VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, | |
4563 | VOPNAME_ACCESS, { .vop_access = zfs_access }, | |
4564 | VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, | |
4565 | VOPNAME_CREATE, { .vop_create = zfs_create }, | |
4566 | VOPNAME_REMOVE, { .vop_remove = zfs_remove }, | |
4567 | VOPNAME_LINK, { .vop_link = zfs_link }, | |
4568 | VOPNAME_RENAME, { .vop_rename = zfs_rename }, | |
4569 | VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, | |
4570 | VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, | |
4571 | VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, | |
4572 | VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, | |
4573 | VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, | |
4574 | VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, | |
4575 | VOPNAME_FID, { .vop_fid = zfs_fid }, | |
4576 | VOPNAME_SEEK, { .vop_seek = zfs_seek }, | |
4577 | VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, | |
4578 | VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, | |
4579 | VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, | |
4580 | VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, | |
4581 | NULL, NULL | |
4582 | }; | |
4583 | ||
4584 | /* | |
4585 | * Regular file vnode operations template | |
4586 | */ | |
4587 | vnodeops_t *zfs_fvnodeops; | |
4588 | const fs_operation_def_t zfs_fvnodeops_template[] = { | |
4589 | VOPNAME_OPEN, { .vop_open = zfs_open }, | |
4590 | VOPNAME_CLOSE, { .vop_close = zfs_close }, | |
4591 | VOPNAME_READ, { .vop_read = zfs_read }, | |
4592 | VOPNAME_WRITE, { .vop_write = zfs_write }, | |
4593 | VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, | |
4594 | VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, | |
4595 | VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, | |
4596 | VOPNAME_ACCESS, { .vop_access = zfs_access }, | |
4597 | VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, | |
4598 | VOPNAME_RENAME, { .vop_rename = zfs_rename }, | |
4599 | VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, | |
4600 | VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, | |
4601 | VOPNAME_FID, { .vop_fid = zfs_fid }, | |
4602 | VOPNAME_SEEK, { .vop_seek = zfs_seek }, | |
4603 | VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, | |
4604 | VOPNAME_SPACE, { .vop_space = zfs_space }, | |
4605 | VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, | |
4606 | VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, | |
4607 | VOPNAME_MAP, { .vop_map = zfs_map }, | |
4608 | VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, | |
4609 | VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, | |
4610 | VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, | |
4611 | VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, | |
4612 | VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, | |
4613 | VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, | |
4614 | NULL, NULL | |
4615 | }; | |
4616 | ||
4617 | /* | |
4618 | * Symbolic link vnode operations template | |
4619 | */ | |
4620 | vnodeops_t *zfs_symvnodeops; | |
4621 | const fs_operation_def_t zfs_symvnodeops_template[] = { | |
4622 | VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, | |
4623 | VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, | |
4624 | VOPNAME_ACCESS, { .vop_access = zfs_access }, | |
4625 | VOPNAME_RENAME, { .vop_rename = zfs_rename }, | |
4626 | VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, | |
4627 | VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, | |
4628 | VOPNAME_FID, { .vop_fid = zfs_fid }, | |
4629 | VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, | |
4630 | VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, | |
4631 | NULL, NULL | |
4632 | }; | |
4633 | ||
9babb374 BB |
4634 | /* |
4635 | * special share hidden files vnode operations template | |
4636 | */ | |
4637 | vnodeops_t *zfs_sharevnodeops; | |
4638 | const fs_operation_def_t zfs_sharevnodeops_template[] = { | |
4639 | VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, | |
4640 | VOPNAME_ACCESS, { .vop_access = zfs_access }, | |
4641 | VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, | |
4642 | VOPNAME_FID, { .vop_fid = zfs_fid }, | |
4643 | VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, | |
4644 | VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, | |
4645 | VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, | |
4646 | VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, | |
4647 | NULL, NULL | |
4648 | }; | |
4649 | ||
34dc7c2f BB |
4650 | /* |
4651 | * Extended attribute directory vnode operations template | |
4652 | * This template is identical to the directory vnodes | |
4653 | * operation template except for restricted operations: | |
4654 | * VOP_MKDIR() | |
4655 | * VOP_SYMLINK() | |
4656 | * Note that there are other restrictions embedded in: | |
4657 | * zfs_create() - restrict type to VREG | |
4658 | * zfs_link() - no links into/out of attribute space | |
4659 | * zfs_rename() - no moves into/out of attribute space | |
4660 | */ | |
4661 | vnodeops_t *zfs_xdvnodeops; | |
4662 | const fs_operation_def_t zfs_xdvnodeops_template[] = { | |
4663 | VOPNAME_OPEN, { .vop_open = zfs_open }, | |
4664 | VOPNAME_CLOSE, { .vop_close = zfs_close }, | |
4665 | VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, | |
4666 | VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, | |
4667 | VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, | |
4668 | VOPNAME_ACCESS, { .vop_access = zfs_access }, | |
4669 | VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, | |
4670 | VOPNAME_CREATE, { .vop_create = zfs_create }, | |
4671 | VOPNAME_REMOVE, { .vop_remove = zfs_remove }, | |
4672 | VOPNAME_LINK, { .vop_link = zfs_link }, | |
4673 | VOPNAME_RENAME, { .vop_rename = zfs_rename }, | |
4674 | VOPNAME_MKDIR, { .error = zfs_inval }, | |
4675 | VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, | |
4676 | VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, | |
4677 | VOPNAME_SYMLINK, { .error = zfs_inval }, | |
4678 | VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, | |
4679 | VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, | |
4680 | VOPNAME_FID, { .vop_fid = zfs_fid }, | |
4681 | VOPNAME_SEEK, { .vop_seek = zfs_seek }, | |
4682 | VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, | |
4683 | VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, | |
4684 | VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, | |
4685 | VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, | |
4686 | NULL, NULL | |
4687 | }; | |
4688 | ||
4689 | /* | |
4690 | * Error vnode operations template | |
4691 | */ | |
4692 | vnodeops_t *zfs_evnodeops; | |
4693 | const fs_operation_def_t zfs_evnodeops_template[] = { | |
4694 | VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, | |
4695 | VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, | |
4696 | NULL, NULL | |
4697 | }; |