]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
34dc7c2f BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
572e2857 | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
49d39798 | 23 | * Copyright (c) 2012 Cyril Plisko. All rights reserved. |
1e0457e7 | 24 | * Copyright (c) 2013, 2017 by Delphix. All rights reserved. |
67a1b037 | 25 | * Copyright (c) 2021, 2022 by Pawel Jakub Dawidek |
34dc7c2f BB |
26 | */ |
27 | ||
34dc7c2f BB |
28 | #include <sys/types.h> |
29 | #include <sys/param.h> | |
34dc7c2f BB |
30 | #include <sys/sysmacros.h> |
31 | #include <sys/cmn_err.h> | |
32 | #include <sys/kmem.h> | |
33 | #include <sys/thread.h> | |
34 | #include <sys/file.h> | |
35 | #include <sys/fcntl.h> | |
36 | #include <sys/vfs.h> | |
37 | #include <sys/fs/zfs.h> | |
38 | #include <sys/zfs_znode.h> | |
39 | #include <sys/zfs_dir.h> | |
40 | #include <sys/zfs_acl.h> | |
41 | #include <sys/zfs_fuid.h> | |
5484965a | 42 | #include <sys/zfs_vnops.h> |
34dc7c2f BB |
43 | #include <sys/spa.h> |
44 | #include <sys/zil.h> | |
45 | #include <sys/byteorder.h> | |
46 | #include <sys/stat.h> | |
34dc7c2f BB |
47 | #include <sys/acl.h> |
48 | #include <sys/atomic.h> | |
49 | #include <sys/cred.h> | |
5484965a | 50 | #include <sys/zpl.h> |
361a7e82 JP |
51 | #include <sys/dmu_objset.h> |
52 | #include <sys/zfeature.h> | |
34dc7c2f | 53 | |
ba434b18 MM |
54 | /* |
55 | * NB: FreeBSD expects to be able to do vnode locking in lookup and | |
56 | * hold the locks across all subsequent VOPs until vput is called. | |
57 | * This means that its zfs vnops routines can't do any internal locking. | |
58 | * In order to have the same contract as the Linux vnops there would | |
59 | * needed to be duplicate locked vnops. If the vnops were used more widely | |
60 | * in common code this would likely be preferable. However, currently | |
61 | * this is the only file where this is the case. | |
62 | */ | |
63 | ||
34dc7c2f BB |
64 | /* |
65 | * Functions to replay ZFS intent log (ZIL) records | |
66 | * The functions are called through a function vector (zfs_replay_vector) | |
67 | * which is indexed by the transaction type. | |
68 | */ | |
69 | ||
70 | static void | |
5484965a | 71 | zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, |
e9aa730c | 72 | uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid) |
34dc7c2f | 73 | { |
861166b0 | 74 | memset(vap, 0, sizeof (*vap)); |
5484965a | 75 | vap->va_mask = (uint_t)mask; |
5484965a | 76 | vap->va_mode = mode; |
273730d5 | 77 | #if defined(__FreeBSD__) || defined(__APPLE__) |
3d91490f MM |
78 | vap->va_type = IFTOVT(mode); |
79 | #endif | |
5484965a BB |
80 | vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid; |
81 | vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid; | |
13a9a6f5 | 82 | vap->va_rdev = zfs_cmpldev(rdev); |
5484965a | 83 | vap->va_nodeid = nodeid; |
34dc7c2f BB |
84 | } |
85 | ||
34dc7c2f | 86 | static int |
867959b5 | 87 | zfs_replay_error(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 88 | { |
ef70eff1 | 89 | (void) arg1, (void) arg2, (void) byteswap; |
2e528b49 | 90 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
91 | } |
92 | ||
93 | static void | |
94 | zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) | |
95 | { | |
96 | xoptattr_t *xoap = NULL; | |
97 | uint64_t *attrs; | |
98 | uint64_t *crtime; | |
99 | uint32_t *bitmap; | |
100 | void *scanstamp; | |
101 | int i; | |
102 | ||
5484965a | 103 | xvap->xva_vattr.va_mask |= ATTR_XVATTR; |
34dc7c2f | 104 | if ((xoap = xva_getxoptattr(xvap)) == NULL) { |
5484965a | 105 | xvap->xva_vattr.va_mask &= ~ATTR_XVATTR; /* shouldn't happen */ |
34dc7c2f BB |
106 | return; |
107 | } | |
108 | ||
109 | ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize); | |
110 | ||
111 | bitmap = &lrattr->lr_attr_bitmap; | |
112 | for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++) | |
113 | xvap->xva_reqattrmap[i] = *bitmap; | |
114 | ||
115 | attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1); | |
116 | crtime = attrs + 1; | |
117 | scanstamp = (caddr_t)(crtime + 2); | |
118 | ||
119 | if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) | |
120 | xoap->xoa_hidden = ((*attrs & XAT0_HIDDEN) != 0); | |
121 | if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) | |
122 | xoap->xoa_system = ((*attrs & XAT0_SYSTEM) != 0); | |
123 | if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) | |
124 | xoap->xoa_archive = ((*attrs & XAT0_ARCHIVE) != 0); | |
125 | if (XVA_ISSET_REQ(xvap, XAT_READONLY)) | |
126 | xoap->xoa_readonly = ((*attrs & XAT0_READONLY) != 0); | |
127 | if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) | |
128 | xoap->xoa_immutable = ((*attrs & XAT0_IMMUTABLE) != 0); | |
129 | if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) | |
130 | xoap->xoa_nounlink = ((*attrs & XAT0_NOUNLINK) != 0); | |
131 | if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) | |
132 | xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0); | |
133 | if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) | |
134 | xoap->xoa_nodump = ((*attrs & XAT0_NODUMP) != 0); | |
135 | if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) | |
136 | xoap->xoa_opaque = ((*attrs & XAT0_OPAQUE) != 0); | |
137 | if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) | |
138 | xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0); | |
139 | if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) | |
140 | xoap->xoa_av_quarantined = | |
141 | ((*attrs & XAT0_AV_QUARANTINED) != 0); | |
142 | if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) | |
143 | ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime); | |
9c5167d1 NF |
144 | if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { |
145 | ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID)); | |
146 | ||
861166b0 | 147 | memcpy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ); |
9c5167d1 NF |
148 | } else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { |
149 | /* | |
150 | * XAT_PROJID and XAT_AV_SCANSTAMP will never be valid | |
151 | * at the same time, so we can share the same space. | |
152 | */ | |
861166b0 | 153 | memcpy(&xoap->xoa_projid, scanstamp, sizeof (uint64_t)); |
9c5167d1 | 154 | } |
428870ff BB |
155 | if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) |
156 | xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0); | |
572e2857 BB |
157 | if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) |
158 | xoap->xoa_offline = ((*attrs & XAT0_OFFLINE) != 0); | |
159 | if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) | |
160 | xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0); | |
9c5167d1 NF |
161 | if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) |
162 | xoap->xoa_projinherit = ((*attrs & XAT0_PROJINHERIT) != 0); | |
34dc7c2f BB |
163 | } |
164 | ||
165 | static int | |
166 | zfs_replay_domain_cnt(uint64_t uid, uint64_t gid) | |
167 | { | |
168 | uint64_t uid_idx; | |
169 | uint64_t gid_idx; | |
170 | int domcnt = 0; | |
171 | ||
172 | uid_idx = FUID_INDEX(uid); | |
173 | gid_idx = FUID_INDEX(gid); | |
174 | if (uid_idx) | |
175 | domcnt++; | |
176 | if (gid_idx > 0 && gid_idx != uid_idx) | |
177 | domcnt++; | |
178 | ||
179 | return (domcnt); | |
180 | } | |
181 | ||
182 | static void * | |
183 | zfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start, | |
184 | int domcnt) | |
185 | { | |
186 | int i; | |
187 | ||
188 | for (i = 0; i != domcnt; i++) { | |
189 | fuid_infop->z_domain_table[i] = start; | |
190 | start = (caddr_t)start + strlen(start) + 1; | |
191 | } | |
192 | ||
193 | return (start); | |
194 | } | |
195 | ||
196 | /* | |
197 | * Set the uid/gid in the fuid_info structure. | |
198 | */ | |
199 | static void | |
200 | zfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid) | |
201 | { | |
202 | /* | |
203 | * If owner or group are log specific FUIDs then slurp up | |
204 | * domain information and build zfs_fuid_info_t | |
205 | */ | |
206 | if (IS_EPHEMERAL(uid)) | |
207 | fuid_infop->z_fuid_owner = uid; | |
208 | ||
209 | if (IS_EPHEMERAL(gid)) | |
210 | fuid_infop->z_fuid_group = gid; | |
211 | } | |
212 | ||
213 | /* | |
214 | * Load fuid domains into fuid_info_t | |
215 | */ | |
216 | static zfs_fuid_info_t * | |
217 | zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid) | |
218 | { | |
219 | int domcnt; | |
220 | ||
221 | zfs_fuid_info_t *fuid_infop; | |
222 | ||
223 | fuid_infop = zfs_fuid_info_alloc(); | |
224 | ||
225 | domcnt = zfs_replay_domain_cnt(uid, gid); | |
226 | ||
227 | if (domcnt == 0) | |
228 | return (fuid_infop); | |
229 | ||
230 | fuid_infop->z_domain_table = | |
160987b5 | 231 | kmem_zalloc(domcnt * sizeof (char *), KM_SLEEP); |
34dc7c2f BB |
232 | |
233 | zfs_replay_fuid_ugid(fuid_infop, uid, gid); | |
234 | ||
235 | fuid_infop->z_domain_cnt = domcnt; | |
236 | *end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt); | |
237 | return (fuid_infop); | |
238 | } | |
239 | ||
240 | /* | |
241 | * load zfs_fuid_t's and fuid_domains into fuid_info_t | |
242 | */ | |
243 | static zfs_fuid_info_t * | |
244 | zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid, | |
245 | uint64_t gid) | |
246 | { | |
247 | uint64_t *log_fuid = (uint64_t *)start; | |
248 | zfs_fuid_info_t *fuid_infop; | |
249 | int i; | |
250 | ||
251 | fuid_infop = zfs_fuid_info_alloc(); | |
252 | fuid_infop->z_domain_cnt = domcnt; | |
253 | ||
254 | fuid_infop->z_domain_table = | |
160987b5 | 255 | kmem_zalloc(domcnt * sizeof (char *), KM_SLEEP); |
34dc7c2f BB |
256 | |
257 | for (i = 0; i != idcnt; i++) { | |
258 | zfs_fuid_t *zfuid; | |
259 | ||
260 | zfuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP); | |
261 | zfuid->z_logfuid = *log_fuid; | |
262 | zfuid->z_id = -1; | |
263 | zfuid->z_domidx = 0; | |
264 | list_insert_tail(&fuid_infop->z_fuids, zfuid); | |
265 | log_fuid++; | |
266 | } | |
267 | ||
268 | zfs_replay_fuid_ugid(fuid_infop, uid, gid); | |
269 | ||
270 | *end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt); | |
271 | return (fuid_infop); | |
272 | } | |
273 | ||
274 | static void | |
275 | zfs_replay_swap_attrs(lr_attr_t *lrattr) | |
276 | { | |
277 | /* swap the lr_attr structure */ | |
278 | byteswap_uint32_array(lrattr, sizeof (*lrattr)); | |
279 | /* swap the bitmap */ | |
280 | byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) * | |
281 | sizeof (uint32_t)); | |
282 | /* swap the attributes, create time + 64 bit word for attributes */ | |
283 | byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) * | |
284 | (lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t)); | |
285 | } | |
286 | ||
287 | /* | |
288 | * Replay file create with optional ACL, xvattr information as well | |
289 | * as option FUID information. | |
290 | */ | |
291 | static int | |
867959b5 | 292 | zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 293 | { |
867959b5 BB |
294 | zfsvfs_t *zfsvfs = arg1; |
295 | lr_acl_create_t *lracl = arg2; | |
34dc7c2f BB |
296 | char *name = NULL; /* location determined later */ |
297 | lr_create_t *lr = (lr_create_t *)lracl; | |
298 | znode_t *dzp; | |
657ce253 | 299 | znode_t *zp; |
34dc7c2f | 300 | xvattr_t xva; |
633e8030 | 301 | int vflg = 0; |
5484965a | 302 | vsecattr_t vsec = { 0 }; |
34dc7c2f BB |
303 | lr_attr_t *lrattr; |
304 | void *aclstart; | |
305 | void *fuidstart; | |
306 | size_t xvatlen = 0; | |
307 | uint64_t txtype; | |
50c957f7 NB |
308 | uint64_t objid; |
309 | uint64_t dnodesize; | |
34dc7c2f BB |
310 | int error; |
311 | ||
2a27fd41 AM |
312 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lracl)); |
313 | ||
428870ff | 314 | txtype = (lr->lr_common.lrc_txtype & ~TX_CI); |
34dc7c2f BB |
315 | if (byteswap) { |
316 | byteswap_uint64_array(lracl, sizeof (*lracl)); | |
34dc7c2f BB |
317 | if (txtype == TX_CREATE_ACL_ATTR || |
318 | txtype == TX_MKDIR_ACL_ATTR) { | |
319 | lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); | |
320 | zfs_replay_swap_attrs(lrattr); | |
321 | xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); | |
322 | } | |
323 | ||
324 | aclstart = (caddr_t)(lracl + 1) + xvatlen; | |
325 | zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE); | |
326 | /* swap fuids */ | |
327 | if (lracl->lr_fuidcnt) { | |
328 | byteswap_uint64_array((caddr_t)aclstart + | |
329 | ZIL_ACE_LENGTH(lracl->lr_acl_bytes), | |
330 | lracl->lr_fuidcnt * sizeof (uint64_t)); | |
331 | } | |
332 | } | |
333 | ||
0037b49e | 334 | if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) |
34dc7c2f BB |
335 | return (error); |
336 | ||
50c957f7 NB |
337 | objid = LR_FOID_GET_OBJ(lr->lr_foid); |
338 | dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT; | |
339 | ||
34dc7c2f | 340 | xva_init(&xva); |
6742abf9 | 341 | zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, |
50c957f7 | 342 | lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid); |
34dc7c2f BB |
343 | |
344 | /* | |
345 | * All forms of zfs create (create, mkdir, mkxattrdir, symlink) | |
346 | * eventually end up in zfs_mknode(), which assigns the object's | |
50c957f7 NB |
347 | * creation time, generation number, and dnode size. The generic |
348 | * zfs_create() has no concept of these attributes, so we smuggle | |
349 | * the values inside the vattr's otherwise unused va_ctime, | |
350 | * va_nblocks, and va_fsid fields. | |
34dc7c2f BB |
351 | */ |
352 | ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); | |
353 | xva.xva_vattr.va_nblocks = lr->lr_gen; | |
50c957f7 | 354 | xva.xva_vattr.va_fsid = dnodesize; |
34dc7c2f | 355 | |
035e9611 CC |
356 | error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); |
357 | if (error) | |
34dc7c2f BB |
358 | goto bail; |
359 | ||
360 | if (lr->lr_common.lrc_txtype & TX_CI) | |
361 | vflg |= FIGNORECASE; | |
428870ff | 362 | switch (txtype) { |
34dc7c2f BB |
363 | case TX_CREATE_ACL: |
364 | aclstart = (caddr_t)(lracl + 1); | |
365 | fuidstart = (caddr_t)aclstart + | |
366 | ZIL_ACE_LENGTH(lracl->lr_acl_bytes); | |
0037b49e | 367 | zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, |
34dc7c2f BB |
368 | (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, |
369 | lr->lr_uid, lr->lr_gid); | |
9a70e97f | 370 | zfs_fallthrough; |
34dc7c2f BB |
371 | case TX_CREATE_ACL_ATTR: |
372 | if (name == NULL) { | |
373 | lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); | |
374 | xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); | |
5484965a | 375 | xva.xva_vattr.va_mask |= ATTR_XVATTR; |
34dc7c2f BB |
376 | zfs_replay_xvattr(lrattr, &xva); |
377 | } | |
378 | vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; | |
379 | vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; | |
380 | vsec.vsa_aclcnt = lracl->lr_aclcnt; | |
381 | vsec.vsa_aclentsz = lracl->lr_acl_bytes; | |
382 | vsec.vsa_aclflags = lracl->lr_acl_flags; | |
0037b49e | 383 | if (zfsvfs->z_fuid_replay == NULL) { |
34dc7c2f BB |
384 | fuidstart = (caddr_t)(lracl + 1) + xvatlen + |
385 | ZIL_ACE_LENGTH(lracl->lr_acl_bytes); | |
0037b49e | 386 | zfsvfs->z_fuid_replay = |
34dc7c2f BB |
387 | zfs_replay_fuids(fuidstart, |
388 | (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, | |
389 | lr->lr_uid, lr->lr_gid); | |
390 | } | |
391 | ||
f224eddf YY |
392 | #if defined(__linux__) |
393 | error = zfs_create(dzp, name, &xva.xva_vattr, | |
d4dc53da | 394 | 0, 0, &zp, kcred, vflg, &vsec, zfs_init_idmap); |
f224eddf | 395 | #else |
657ce253 | 396 | error = zfs_create(dzp, name, &xva.xva_vattr, |
2a068a13 | 397 | 0, 0, &zp, kcred, vflg, &vsec, NULL); |
f224eddf | 398 | #endif |
34dc7c2f BB |
399 | break; |
400 | case TX_MKDIR_ACL: | |
401 | aclstart = (caddr_t)(lracl + 1); | |
402 | fuidstart = (caddr_t)aclstart + | |
403 | ZIL_ACE_LENGTH(lracl->lr_acl_bytes); | |
0037b49e | 404 | zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, |
34dc7c2f BB |
405 | (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, |
406 | lr->lr_uid, lr->lr_gid); | |
9a70e97f | 407 | zfs_fallthrough; |
34dc7c2f BB |
408 | case TX_MKDIR_ACL_ATTR: |
409 | if (name == NULL) { | |
410 | lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); | |
411 | xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); | |
412 | zfs_replay_xvattr(lrattr, &xva); | |
413 | } | |
414 | vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; | |
415 | vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; | |
416 | vsec.vsa_aclcnt = lracl->lr_aclcnt; | |
417 | vsec.vsa_aclentsz = lracl->lr_acl_bytes; | |
418 | vsec.vsa_aclflags = lracl->lr_acl_flags; | |
0037b49e | 419 | if (zfsvfs->z_fuid_replay == NULL) { |
34dc7c2f BB |
420 | fuidstart = (caddr_t)(lracl + 1) + xvatlen + |
421 | ZIL_ACE_LENGTH(lracl->lr_acl_bytes); | |
0037b49e | 422 | zfsvfs->z_fuid_replay = |
34dc7c2f BB |
423 | zfs_replay_fuids(fuidstart, |
424 | (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, | |
425 | lr->lr_uid, lr->lr_gid); | |
426 | } | |
f224eddf YY |
427 | #if defined(__linux__) |
428 | error = zfs_mkdir(dzp, name, &xva.xva_vattr, | |
d4dc53da | 429 | &zp, kcred, vflg, &vsec, zfs_init_idmap); |
f224eddf | 430 | #else |
657ce253 | 431 | error = zfs_mkdir(dzp, name, &xva.xva_vattr, |
2a068a13 | 432 | &zp, kcred, vflg, &vsec, NULL); |
f224eddf | 433 | #endif |
34dc7c2f BB |
434 | break; |
435 | default: | |
2e528b49 | 436 | error = SET_ERROR(ENOTSUP); |
34dc7c2f BB |
437 | } |
438 | ||
439 | bail: | |
3d91490f MM |
440 | if (error == 0 && zp != NULL) { |
441 | #ifdef __FreeBSD__ | |
442 | VOP_UNLOCK1(ZTOV(zp)); | |
443 | #endif | |
657ce253 | 444 | zrele(zp); |
3d91490f | 445 | } |
657ce253 | 446 | zrele(dzp); |
34dc7c2f | 447 | |
0037b49e BB |
448 | if (zfsvfs->z_fuid_replay) |
449 | zfs_fuid_info_free(zfsvfs->z_fuid_replay); | |
450 | zfsvfs->z_fuid_replay = NULL; | |
34dc7c2f BB |
451 | |
452 | return (error); | |
453 | } | |
454 | ||
455 | static int | |
867959b5 | 456 | zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 457 | { |
867959b5 BB |
458 | zfsvfs_t *zfsvfs = arg1; |
459 | lr_create_t *lr = arg2; | |
34dc7c2f BB |
460 | char *name = NULL; /* location determined later */ |
461 | char *link; /* symlink content follows name */ | |
462 | znode_t *dzp; | |
657ce253 | 463 | znode_t *zp = NULL; |
34dc7c2f BB |
464 | xvattr_t xva; |
465 | int vflg = 0; | |
466 | size_t lrsize = sizeof (lr_create_t); | |
467 | lr_attr_t *lrattr; | |
633e8030 | 468 | void *start; |
5484965a | 469 | size_t xvatlen; |
34dc7c2f | 470 | uint64_t txtype; |
50c957f7 NB |
471 | uint64_t objid; |
472 | uint64_t dnodesize; | |
34dc7c2f BB |
473 | int error; |
474 | ||
2a27fd41 AM |
475 | ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); |
476 | ||
428870ff | 477 | txtype = (lr->lr_common.lrc_txtype & ~TX_CI); |
34dc7c2f BB |
478 | if (byteswap) { |
479 | byteswap_uint64_array(lr, sizeof (*lr)); | |
34dc7c2f BB |
480 | if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR) |
481 | zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); | |
482 | } | |
483 | ||
484 | ||
0037b49e | 485 | if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) |
34dc7c2f BB |
486 | return (error); |
487 | ||
50c957f7 NB |
488 | objid = LR_FOID_GET_OBJ(lr->lr_foid); |
489 | dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT; | |
490 | ||
34dc7c2f | 491 | xva_init(&xva); |
6742abf9 | 492 | zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, |
50c957f7 | 493 | lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid); |
34dc7c2f BB |
494 | |
495 | /* | |
496 | * All forms of zfs create (create, mkdir, mkxattrdir, symlink) | |
497 | * eventually end up in zfs_mknode(), which assigns the object's | |
50c957f7 NB |
498 | * creation time, generation number, and dnode slot count. The |
499 | * generic zfs_create() has no concept of these attributes, so | |
1e0457e7 MA |
500 | * we smuggle the values inside the vattr's otherwise unused |
501 | * va_ctime, va_nblocks, and va_fsid fields. | |
34dc7c2f BB |
502 | */ |
503 | ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); | |
504 | xva.xva_vattr.va_nblocks = lr->lr_gen; | |
50c957f7 | 505 | xva.xva_vattr.va_fsid = dnodesize; |
34dc7c2f | 506 | |
035e9611 CC |
507 | error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); |
508 | if (error) | |
34dc7c2f BB |
509 | goto out; |
510 | ||
511 | if (lr->lr_common.lrc_txtype & TX_CI) | |
512 | vflg |= FIGNORECASE; | |
513 | ||
514 | /* | |
515 | * Symlinks don't have fuid info, and CIFS never creates | |
516 | * symlinks. | |
517 | * | |
518 | * The _ATTR versions will grab the fuid info in their subcases. | |
519 | */ | |
f04cb31e RY |
520 | if (txtype != TX_SYMLINK && |
521 | txtype != TX_MKDIR_ATTR && | |
522 | txtype != TX_CREATE_ATTR) { | |
34dc7c2f | 523 | start = (lr + 1); |
0037b49e | 524 | zfsvfs->z_fuid_replay = |
34dc7c2f BB |
525 | zfs_replay_fuid_domain(start, &start, |
526 | lr->lr_uid, lr->lr_gid); | |
527 | } | |
528 | ||
428870ff | 529 | switch (txtype) { |
34dc7c2f BB |
530 | case TX_CREATE_ATTR: |
531 | lrattr = (lr_attr_t *)(caddr_t)(lr + 1); | |
532 | xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); | |
533 | zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); | |
534 | start = (caddr_t)(lr + 1) + xvatlen; | |
0037b49e | 535 | zfsvfs->z_fuid_replay = |
34dc7c2f BB |
536 | zfs_replay_fuid_domain(start, &start, |
537 | lr->lr_uid, lr->lr_gid); | |
538 | name = (char *)start; | |
9a70e97f | 539 | zfs_fallthrough; |
34dc7c2f | 540 | |
34dc7c2f BB |
541 | case TX_CREATE: |
542 | if (name == NULL) | |
543 | name = (char *)start; | |
544 | ||
f224eddf YY |
545 | #if defined(__linux__) |
546 | error = zfs_create(dzp, name, &xva.xva_vattr, | |
d4dc53da | 547 | 0, 0, &zp, kcred, vflg, NULL, zfs_init_idmap); |
f224eddf | 548 | #else |
657ce253 | 549 | error = zfs_create(dzp, name, &xva.xva_vattr, |
2a068a13 | 550 | 0, 0, &zp, kcred, vflg, NULL, NULL); |
f224eddf | 551 | #endif |
34dc7c2f BB |
552 | break; |
553 | case TX_MKDIR_ATTR: | |
554 | lrattr = (lr_attr_t *)(caddr_t)(lr + 1); | |
555 | xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); | |
556 | zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); | |
557 | start = (caddr_t)(lr + 1) + xvatlen; | |
0037b49e | 558 | zfsvfs->z_fuid_replay = |
34dc7c2f BB |
559 | zfs_replay_fuid_domain(start, &start, |
560 | lr->lr_uid, lr->lr_gid); | |
561 | name = (char *)start; | |
9a70e97f | 562 | zfs_fallthrough; |
34dc7c2f | 563 | |
34dc7c2f BB |
564 | case TX_MKDIR: |
565 | if (name == NULL) | |
566 | name = (char *)(lr + 1); | |
567 | ||
f224eddf YY |
568 | #if defined(__linux__) |
569 | error = zfs_mkdir(dzp, name, &xva.xva_vattr, | |
d4dc53da | 570 | &zp, kcred, vflg, NULL, zfs_init_idmap); |
f224eddf | 571 | #else |
657ce253 | 572 | error = zfs_mkdir(dzp, name, &xva.xva_vattr, |
2a068a13 | 573 | &zp, kcred, vflg, NULL, NULL); |
f224eddf YY |
574 | #endif |
575 | ||
34dc7c2f BB |
576 | break; |
577 | case TX_MKXATTR: | |
657ce253 | 578 | error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &zp, kcred); |
34dc7c2f BB |
579 | break; |
580 | case TX_SYMLINK: | |
581 | name = (char *)(lr + 1); | |
582 | link = name + strlen(name) + 1; | |
f224eddf YY |
583 | #if defined(__linux__) |
584 | error = zfs_symlink(dzp, name, &xva.xva_vattr, | |
d4dc53da | 585 | link, &zp, kcred, vflg, zfs_init_idmap); |
f224eddf | 586 | #else |
657ce253 | 587 | error = zfs_symlink(dzp, name, &xva.xva_vattr, |
2a068a13 | 588 | link, &zp, kcred, vflg, NULL); |
f224eddf | 589 | #endif |
34dc7c2f BB |
590 | break; |
591 | default: | |
2e528b49 | 592 | error = SET_ERROR(ENOTSUP); |
34dc7c2f BB |
593 | } |
594 | ||
3d91490f MM |
595 | out: |
596 | if (error == 0 && zp != NULL) { | |
13a9a6f5 | 597 | #ifdef __FreeBSD__ |
3d91490f | 598 | VOP_UNLOCK1(ZTOV(zp)); |
13a9a6f5 | 599 | #endif |
657ce253 | 600 | zrele(zp); |
3d91490f | 601 | } |
657ce253 | 602 | zrele(dzp); |
34dc7c2f | 603 | |
0037b49e BB |
604 | if (zfsvfs->z_fuid_replay) |
605 | zfs_fuid_info_free(zfsvfs->z_fuid_replay); | |
606 | zfsvfs->z_fuid_replay = NULL; | |
34dc7c2f BB |
607 | return (error); |
608 | } | |
609 | ||
610 | static int | |
867959b5 | 611 | zfs_replay_remove(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 612 | { |
867959b5 BB |
613 | zfsvfs_t *zfsvfs = arg1; |
614 | lr_remove_t *lr = arg2; | |
34dc7c2f BB |
615 | char *name = (char *)(lr + 1); /* name follows lr_remove_t */ |
616 | znode_t *dzp; | |
617 | int error; | |
618 | int vflg = 0; | |
619 | ||
2a27fd41 AM |
620 | ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); |
621 | ||
34dc7c2f BB |
622 | if (byteswap) |
623 | byteswap_uint64_array(lr, sizeof (*lr)); | |
624 | ||
0037b49e | 625 | if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) |
34dc7c2f BB |
626 | return (error); |
627 | ||
628 | if (lr->lr_common.lrc_txtype & TX_CI) | |
629 | vflg |= FIGNORECASE; | |
630 | ||
631 | switch ((int)lr->lr_common.lrc_txtype) { | |
632 | case TX_REMOVE: | |
657ce253 | 633 | error = zfs_remove(dzp, name, kcred, vflg); |
34dc7c2f BB |
634 | break; |
635 | case TX_RMDIR: | |
657ce253 | 636 | error = zfs_rmdir(dzp, name, NULL, kcred, vflg); |
34dc7c2f BB |
637 | break; |
638 | default: | |
2e528b49 | 639 | error = SET_ERROR(ENOTSUP); |
34dc7c2f BB |
640 | } |
641 | ||
657ce253 | 642 | zrele(dzp); |
34dc7c2f BB |
643 | |
644 | return (error); | |
645 | } | |
646 | ||
647 | static int | |
867959b5 | 648 | zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 649 | { |
867959b5 BB |
650 | zfsvfs_t *zfsvfs = arg1; |
651 | lr_link_t *lr = arg2; | |
34dc7c2f BB |
652 | char *name = (char *)(lr + 1); /* name follows lr_link_t */ |
653 | znode_t *dzp, *zp; | |
654 | int error; | |
5484965a | 655 | int vflg = 0; |
34dc7c2f | 656 | |
2a27fd41 AM |
657 | ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); |
658 | ||
34dc7c2f BB |
659 | if (byteswap) |
660 | byteswap_uint64_array(lr, sizeof (*lr)); | |
661 | ||
0037b49e | 662 | if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) |
34dc7c2f BB |
663 | return (error); |
664 | ||
0037b49e | 665 | if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) { |
657ce253 | 666 | zrele(dzp); |
34dc7c2f BB |
667 | return (error); |
668 | } | |
669 | ||
670 | if (lr->lr_common.lrc_txtype & TX_CI) | |
671 | vflg |= FIGNORECASE; | |
672 | ||
657ce253 | 673 | error = zfs_link(dzp, zp, name, kcred, vflg); |
657ce253 MM |
674 | zrele(zp); |
675 | zrele(dzp); | |
34dc7c2f BB |
676 | |
677 | return (error); | |
678 | } | |
679 | ||
680 | static int | |
dbf6108b AS |
681 | do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname, |
682 | char *tname, uint64_t rflags, vattr_t *wo_vap) | |
34dc7c2f | 683 | { |
34dc7c2f | 684 | znode_t *sdzp, *tdzp; |
dbf6108b | 685 | int error, vflg = 0; |
34dc7c2f | 686 | |
dbf6108b AS |
687 | /* Only Linux currently supports RENAME_* flags. */ |
688 | #ifdef __linux__ | |
689 | VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT)); | |
690 | ||
691 | /* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */ | |
692 | VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL); | |
693 | #else | |
694 | VERIFY0(rflags); | |
695 | #endif | |
34dc7c2f | 696 | |
0037b49e | 697 | if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) |
34dc7c2f BB |
698 | return (error); |
699 | ||
0037b49e | 700 | if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) { |
657ce253 | 701 | zrele(sdzp); |
34dc7c2f BB |
702 | return (error); |
703 | } | |
704 | ||
705 | if (lr->lr_common.lrc_txtype & TX_CI) | |
706 | vflg |= FIGNORECASE; | |
707 | ||
f224eddf YY |
708 | #if defined(__linux__) |
709 | error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags, | |
d4dc53da | 710 | wo_vap, zfs_init_idmap); |
f224eddf | 711 | #else |
dbf6108b AS |
712 | error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags, |
713 | wo_vap, NULL); | |
f224eddf | 714 | #endif |
34dc7c2f | 715 | |
657ce253 MM |
716 | zrele(tdzp); |
717 | zrele(sdzp); | |
34dc7c2f BB |
718 | return (error); |
719 | } | |
720 | ||
dbf6108b AS |
721 | static int |
722 | zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) | |
723 | { | |
724 | zfsvfs_t *zfsvfs = arg1; | |
725 | lr_rename_t *lr = arg2; | |
2a27fd41 AM |
726 | |
727 | ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); | |
dbf6108b AS |
728 | |
729 | if (byteswap) | |
730 | byteswap_uint64_array(lr, sizeof (*lr)); | |
731 | ||
2a27fd41 AM |
732 | char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ |
733 | char *tname = sname + strlen(sname) + 1; | |
dbf6108b AS |
734 | return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL)); |
735 | } | |
736 | ||
737 | static int | |
738 | zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap) | |
739 | { | |
740 | #ifdef __linux__ | |
741 | zfsvfs_t *zfsvfs = arg1; | |
742 | lr_rename_t *lr = arg2; | |
2a27fd41 AM |
743 | |
744 | ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); | |
dbf6108b AS |
745 | |
746 | if (byteswap) | |
747 | byteswap_uint64_array(lr, sizeof (*lr)); | |
748 | ||
2a27fd41 AM |
749 | char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ |
750 | char *tname = sname + strlen(sname) + 1; | |
dbf6108b AS |
751 | return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE, |
752 | NULL)); | |
753 | #else | |
754 | return (SET_ERROR(ENOTSUP)); | |
755 | #endif | |
756 | } | |
757 | ||
758 | static int | |
759 | zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) | |
760 | { | |
761 | #ifdef __linux__ | |
762 | zfsvfs_t *zfsvfs = arg1; | |
763 | lr_rename_whiteout_t *lr = arg2; | |
764 | int error; | |
dbf6108b AS |
765 | /* For the whiteout file. */ |
766 | xvattr_t xva; | |
767 | uint64_t objid; | |
768 | uint64_t dnodesize; | |
769 | ||
2a27fd41 AM |
770 | ASSERT3U(lr->lr_rename.lr_common.lrc_reclen, >, sizeof (*lr)); |
771 | ||
dbf6108b AS |
772 | if (byteswap) |
773 | byteswap_uint64_array(lr, sizeof (*lr)); | |
774 | ||
775 | objid = LR_FOID_GET_OBJ(lr->lr_wfoid); | |
776 | dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT; | |
777 | ||
778 | xva_init(&xva); | |
779 | zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, | |
780 | lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid); | |
781 | ||
782 | /* | |
783 | * As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which | |
784 | * assigns the object's creation time, generation number, and dnode | |
785 | * slot count. The generic zfs_rename() has no concept of these | |
786 | * attributes, so we smuggle the values inside the vattr's otherwise | |
787 | * unused va_ctime, va_nblocks, and va_fsid fields. | |
788 | */ | |
789 | ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime); | |
790 | xva.xva_vattr.va_nblocks = lr->lr_wgen; | |
791 | xva.xva_vattr.va_fsid = dnodesize; | |
792 | ||
793 | error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); | |
794 | if (error) | |
795 | return (error); | |
796 | ||
2a27fd41 AM |
797 | /* sname and tname follow lr_rename_whiteout_t */ |
798 | char *sname = (char *)(lr + 1); | |
799 | char *tname = sname + strlen(sname) + 1; | |
dbf6108b AS |
800 | return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname, |
801 | RENAME_WHITEOUT, &xva.xva_vattr)); | |
802 | #else | |
803 | return (SET_ERROR(ENOTSUP)); | |
804 | #endif | |
805 | } | |
806 | ||
34dc7c2f | 807 | static int |
867959b5 | 808 | zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 809 | { |
867959b5 BB |
810 | zfsvfs_t *zfsvfs = arg1; |
811 | lr_write_t *lr = arg2; | |
34dc7c2f BB |
812 | char *data = (char *)(lr + 1); /* data follows lr_write_t */ |
813 | znode_t *zp; | |
13a9a6f5 | 814 | int error; |
572e2857 | 815 | uint64_t eod, offset, length; |
34dc7c2f | 816 | |
2a27fd41 AM |
817 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); |
818 | ||
34dc7c2f BB |
819 | if (byteswap) |
820 | byteswap_uint64_array(lr, sizeof (*lr)); | |
821 | ||
0037b49e | 822 | if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { |
34dc7c2f BB |
823 | /* |
824 | * As we can log writes out of order, it's possible the | |
825 | * file has been removed. In this case just drop the write | |
826 | * and return success. | |
827 | */ | |
828 | if (error == ENOENT) | |
829 | error = 0; | |
830 | return (error); | |
831 | } | |
832 | ||
428870ff BB |
833 | offset = lr->lr_offset; |
834 | length = lr->lr_length; | |
572e2857 | 835 | eod = offset + length; /* end of data for this write */ |
428870ff | 836 | |
572e2857 BB |
837 | /* |
838 | * This may be a write from a dmu_sync() for a whole block, | |
839 | * and may extend beyond the current end of the file. | |
840 | * We can't just replay what was written for this TX_WRITE as | |
841 | * a future TX_WRITE2 may extend the eof and the data for that | |
842 | * write needs to be there. So we write the whole block and | |
843 | * reduce the eof. This needs to be done within the single dmu | |
844 | * transaction created within vn_rdwr -> zfs_write. So a possible | |
0037b49e | 845 | * new end of file is passed through in zfsvfs->z_replay_eof |
572e2857 BB |
846 | */ |
847 | ||
0037b49e | 848 | zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */ |
428870ff BB |
849 | |
850 | /* If it's a dmu_sync() block, write the whole block */ | |
851 | if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { | |
852 | uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); | |
853 | if (length < blocksize) { | |
854 | offset -= offset % blocksize; | |
855 | length = blocksize; | |
856 | } | |
572e2857 | 857 | if (zp->z_size < eod) |
0037b49e | 858 | zfsvfs->z_replay_eof = eod; |
428870ff | 859 | } |
13a9a6f5 | 860 | error = zfs_write_simple(zp, data, length, offset, NULL); |
657ce253 | 861 | zrele(zp); |
0037b49e | 862 | zfsvfs->z_replay_eof = 0; /* safety */ |
428870ff BB |
863 | |
864 | return (error); | |
865 | } | |
866 | ||
867 | /* | |
868 | * TX_WRITE2 are only generated when dmu_sync() returns EALREADY | |
869 | * meaning the pool block is already being synced. So now that we always write | |
870 | * out full blocks, all we have to do is expand the eof if | |
871 | * the file is grown. | |
872 | */ | |
873 | static int | |
867959b5 | 874 | zfs_replay_write2(void *arg1, void *arg2, boolean_t byteswap) |
428870ff | 875 | { |
867959b5 BB |
876 | zfsvfs_t *zfsvfs = arg1; |
877 | lr_write_t *lr = arg2; | |
428870ff BB |
878 | znode_t *zp; |
879 | int error; | |
880 | uint64_t end; | |
881 | ||
2a27fd41 AM |
882 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); |
883 | ||
428870ff BB |
884 | if (byteswap) |
885 | byteswap_uint64_array(lr, sizeof (*lr)); | |
886 | ||
0037b49e | 887 | if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
428870ff BB |
888 | return (error); |
889 | ||
572e2857 | 890 | top: |
428870ff BB |
891 | end = lr->lr_offset + lr->lr_length; |
892 | if (end > zp->z_size) { | |
0037b49e | 893 | dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); |
572e2857 | 894 | |
428870ff | 895 | zp->z_size = end; |
572e2857 BB |
896 | dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); |
897 | error = dmu_tx_assign(tx, TXG_WAIT); | |
898 | if (error) { | |
657ce253 | 899 | zrele(zp); |
572e2857 BB |
900 | if (error == ERESTART) { |
901 | dmu_tx_wait(tx); | |
902 | dmu_tx_abort(tx); | |
903 | goto top; | |
904 | } | |
905 | dmu_tx_abort(tx); | |
906 | return (error); | |
907 | } | |
0037b49e | 908 | (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), |
572e2857 BB |
909 | (void *)&zp->z_size, sizeof (uint64_t), tx); |
910 | ||
911 | /* Ensure the replayed seq is updated */ | |
0037b49e | 912 | (void) zil_replaying(zfsvfs->z_log, tx); |
572e2857 BB |
913 | |
914 | dmu_tx_commit(tx); | |
428870ff | 915 | } |
34dc7c2f | 916 | |
657ce253 | 917 | zrele(zp); |
34dc7c2f BB |
918 | |
919 | return (error); | |
920 | } | |
921 | ||
922 | static int | |
867959b5 | 923 | zfs_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 924 | { |
867959b5 BB |
925 | zfsvfs_t *zfsvfs = arg1; |
926 | lr_truncate_t *lr = arg2; | |
34dc7c2f | 927 | znode_t *zp; |
861166b0 | 928 | flock64_t fl = {0}; |
34dc7c2f BB |
929 | int error; |
930 | ||
2a27fd41 AM |
931 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); |
932 | ||
34dc7c2f BB |
933 | if (byteswap) |
934 | byteswap_uint64_array(lr, sizeof (*lr)); | |
935 | ||
0037b49e | 936 | if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
34dc7c2f | 937 | return (error); |
34dc7c2f | 938 | |
34dc7c2f | 939 | fl.l_type = F_WRLCK; |
126d0fa7 | 940 | fl.l_whence = SEEK_SET; |
34dc7c2f BB |
941 | fl.l_start = lr->lr_offset; |
942 | fl.l_len = lr->lr_length; | |
943 | ||
657ce253 | 944 | error = zfs_space(zp, F_FREESP, &fl, O_RDWR | O_LARGEFILE, |
633e8030 | 945 | lr->lr_offset, kcred); |
34dc7c2f | 946 | |
657ce253 | 947 | zrele(zp); |
34dc7c2f BB |
948 | |
949 | return (error); | |
950 | } | |
951 | ||
952 | static int | |
867959b5 | 953 | zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 954 | { |
867959b5 BB |
955 | zfsvfs_t *zfsvfs = arg1; |
956 | lr_setattr_t *lr = arg2; | |
34dc7c2f BB |
957 | znode_t *zp; |
958 | xvattr_t xva; | |
5484965a | 959 | vattr_t *vap = &xva.xva_vattr; |
34dc7c2f BB |
960 | int error; |
961 | void *start; | |
962 | ||
2a27fd41 AM |
963 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); |
964 | ||
34dc7c2f BB |
965 | xva_init(&xva); |
966 | if (byteswap) { | |
967 | byteswap_uint64_array(lr, sizeof (*lr)); | |
968 | ||
5484965a | 969 | if ((lr->lr_mask & ATTR_XVATTR) && |
0037b49e | 970 | zfsvfs->z_version >= ZPL_VERSION_INITIAL) |
34dc7c2f BB |
971 | zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); |
972 | } | |
973 | ||
0037b49e | 974 | if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
34dc7c2f | 975 | return (error); |
34dc7c2f | 976 | |
5484965a BB |
977 | zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode, |
978 | lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); | |
34dc7c2f | 979 | |
5484965a BB |
980 | vap->va_size = lr->lr_size; |
981 | ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime); | |
982 | ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime); | |
87f9371a NB |
983 | gethrestime(&vap->va_ctime); |
984 | vap->va_mask |= ATTR_CTIME; | |
34dc7c2f BB |
985 | |
986 | /* | |
987 | * Fill in xvattr_t portions if necessary. | |
988 | */ | |
989 | ||
990 | start = (lr_setattr_t *)(lr + 1); | |
5484965a | 991 | if (vap->va_mask & ATTR_XVATTR) { |
34dc7c2f BB |
992 | zfs_replay_xvattr((lr_attr_t *)start, &xva); |
993 | start = (caddr_t)start + | |
994 | ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize); | |
995 | } else | |
5484965a | 996 | xva.xva_vattr.va_mask &= ~ATTR_XVATTR; |
34dc7c2f | 997 | |
0037b49e | 998 | zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, |
34dc7c2f BB |
999 | lr->lr_uid, lr->lr_gid); |
1000 | ||
f224eddf | 1001 | #if defined(__linux__) |
d4dc53da | 1002 | error = zfs_setattr(zp, vap, 0, kcred, zfs_init_idmap); |
f224eddf | 1003 | #else |
2a068a13 | 1004 | error = zfs_setattr(zp, vap, 0, kcred, NULL); |
f224eddf | 1005 | #endif |
34dc7c2f | 1006 | |
0037b49e BB |
1007 | zfs_fuid_info_free(zfsvfs->z_fuid_replay); |
1008 | zfsvfs->z_fuid_replay = NULL; | |
657ce253 | 1009 | zrele(zp); |
34dc7c2f BB |
1010 | |
1011 | return (error); | |
1012 | } | |
1013 | ||
361a7e82 JP |
1014 | static int |
1015 | zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap) | |
1016 | { | |
1017 | zfsvfs_t *zfsvfs = arg1; | |
1018 | lr_setsaxattr_t *lr = arg2; | |
1019 | znode_t *zp; | |
1020 | nvlist_t *nvl; | |
1021 | size_t sa_size; | |
1022 | char *name; | |
1023 | char *value; | |
1024 | size_t size; | |
1025 | int error = 0; | |
1026 | ||
2a27fd41 AM |
1027 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); |
1028 | ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr) + lr->lr_size); | |
1029 | ||
361a7e82 JP |
1030 | ASSERT(spa_feature_is_active(zfsvfs->z_os->os_spa, |
1031 | SPA_FEATURE_ZILSAXATTR)); | |
1032 | if (byteswap) | |
1033 | byteswap_uint64_array(lr, sizeof (*lr)); | |
1034 | ||
1035 | if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) | |
1036 | return (error); | |
1037 | ||
1038 | rw_enter(&zp->z_xattr_lock, RW_WRITER); | |
1039 | mutex_enter(&zp->z_lock); | |
1040 | if (zp->z_xattr_cached == NULL) | |
1041 | error = zfs_sa_get_xattr(zp); | |
1042 | mutex_exit(&zp->z_lock); | |
1043 | ||
1044 | if (error) | |
1045 | goto out; | |
1046 | ||
1047 | ASSERT(zp->z_xattr_cached); | |
1048 | nvl = zp->z_xattr_cached; | |
1049 | ||
1050 | /* Get xattr name, value and size from log record */ | |
1051 | size = lr->lr_size; | |
1052 | name = (char *)(lr + 1); | |
1053 | if (size == 0) { | |
1054 | value = NULL; | |
1055 | error = nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); | |
1056 | } else { | |
1057 | value = name + strlen(name) + 1; | |
1058 | /* Limited to 32k to keep nvpair memory allocations small */ | |
1059 | if (size > DXATTR_MAX_ENTRY_SIZE) { | |
1060 | error = SET_ERROR(EFBIG); | |
1061 | goto out; | |
1062 | } | |
1063 | ||
1064 | /* Prevent the DXATTR SA from consuming the entire SA region */ | |
1065 | error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); | |
1066 | if (error) | |
1067 | goto out; | |
1068 | ||
1069 | if (sa_size > DXATTR_MAX_SA_SIZE) { | |
1070 | error = SET_ERROR(EFBIG); | |
1071 | goto out; | |
1072 | } | |
1073 | ||
1074 | error = nvlist_add_byte_array(nvl, name, (uchar_t *)value, | |
1075 | size); | |
1076 | } | |
1077 | ||
1078 | /* | |
1079 | * Update the SA for additions, modifications, and removals. On | |
1080 | * error drop the inconsistent cached version of the nvlist, it | |
1081 | * will be reconstructed from the ARC when next accessed. | |
1082 | */ | |
1083 | if (error == 0) | |
1084 | error = zfs_sa_set_xattr(zp, name, value, size); | |
1085 | ||
1086 | if (error) { | |
1087 | nvlist_free(nvl); | |
1088 | zp->z_xattr_cached = NULL; | |
1089 | } | |
1090 | ||
1091 | out: | |
1092 | rw_exit(&zp->z_xattr_lock); | |
1093 | zrele(zp); | |
1094 | return (error); | |
1095 | } | |
1096 | ||
34dc7c2f | 1097 | static int |
867959b5 | 1098 | zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 1099 | { |
867959b5 BB |
1100 | zfsvfs_t *zfsvfs = arg1; |
1101 | lr_acl_v0_t *lr = arg2; | |
34dc7c2f | 1102 | ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ |
861166b0 | 1103 | vsecattr_t vsa = {0}; |
34dc7c2f BB |
1104 | znode_t *zp; |
1105 | int error; | |
1106 | ||
2a27fd41 AM |
1107 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); |
1108 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr) + | |
1109 | sizeof (ace_t) * lr->lr_aclcnt); | |
1110 | ||
34dc7c2f BB |
1111 | if (byteswap) { |
1112 | byteswap_uint64_array(lr, sizeof (*lr)); | |
1113 | zfs_oldace_byteswap(ace, lr->lr_aclcnt); | |
1114 | } | |
1115 | ||
0037b49e | 1116 | if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
34dc7c2f | 1117 | return (error); |
34dc7c2f | 1118 | |
34dc7c2f BB |
1119 | vsa.vsa_mask = VSA_ACE | VSA_ACECNT; |
1120 | vsa.vsa_aclcnt = lr->lr_aclcnt; | |
b128c09f BB |
1121 | vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt; |
1122 | vsa.vsa_aclflags = 0; | |
34dc7c2f BB |
1123 | vsa.vsa_aclentp = ace; |
1124 | ||
657ce253 | 1125 | error = zfs_setsecattr(zp, &vsa, 0, kcred); |
34dc7c2f | 1126 | |
657ce253 | 1127 | zrele(zp); |
34dc7c2f BB |
1128 | |
1129 | return (error); | |
1130 | } | |
1131 | ||
1132 | /* | |
1133 | * Replaying ACLs is complicated by FUID support. | |
1134 | * The log record may contain some optional data | |
1135 | * to be used for replaying FUID's. These pieces | |
1136 | * are the actual FUIDs that were created initially. | |
1137 | * The FUID table index may no longer be valid and | |
1138 | * during zfs_create() a new index may be assigned. | |
1139 | * Because of this the log will contain the original | |
4e33ba4c | 1140 | * domain+rid in order to create a new FUID. |
34dc7c2f BB |
1141 | * |
1142 | * The individual ACEs may contain an ephemeral uid/gid which is no | |
1143 | * longer valid and will need to be replaced with an actual FUID. | |
1144 | * | |
1145 | */ | |
1146 | static int | |
867959b5 | 1147 | zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap) |
34dc7c2f | 1148 | { |
867959b5 BB |
1149 | zfsvfs_t *zfsvfs = arg1; |
1150 | lr_acl_t *lr = arg2; | |
34dc7c2f | 1151 | ace_t *ace = (ace_t *)(lr + 1); |
861166b0 | 1152 | vsecattr_t vsa = {0}; |
34dc7c2f BB |
1153 | znode_t *zp; |
1154 | int error; | |
1155 | ||
2a27fd41 AM |
1156 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); |
1157 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr) + lr->lr_acl_bytes); | |
1158 | ||
34dc7c2f BB |
1159 | if (byteswap) { |
1160 | byteswap_uint64_array(lr, sizeof (*lr)); | |
1161 | zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE); | |
1162 | if (lr->lr_fuidcnt) { | |
1163 | byteswap_uint64_array((caddr_t)ace + | |
1164 | ZIL_ACE_LENGTH(lr->lr_acl_bytes), | |
1165 | lr->lr_fuidcnt * sizeof (uint64_t)); | |
1166 | } | |
1167 | } | |
1168 | ||
0037b49e | 1169 | if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
34dc7c2f | 1170 | return (error); |
34dc7c2f | 1171 | |
34dc7c2f BB |
1172 | vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS; |
1173 | vsa.vsa_aclcnt = lr->lr_aclcnt; | |
1174 | vsa.vsa_aclentp = ace; | |
1175 | vsa.vsa_aclentsz = lr->lr_acl_bytes; | |
1176 | vsa.vsa_aclflags = lr->lr_acl_flags; | |
1177 | ||
1178 | if (lr->lr_fuidcnt) { | |
1179 | void *fuidstart = (caddr_t)ace + | |
1180 | ZIL_ACE_LENGTH(lr->lr_acl_bytes); | |
1181 | ||
0037b49e | 1182 | zfsvfs->z_fuid_replay = |
34dc7c2f BB |
1183 | zfs_replay_fuids(fuidstart, &fuidstart, |
1184 | lr->lr_fuidcnt, lr->lr_domcnt, 0, 0); | |
1185 | } | |
1186 | ||
657ce253 | 1187 | error = zfs_setsecattr(zp, &vsa, 0, kcred); |
34dc7c2f | 1188 | |
0037b49e BB |
1189 | if (zfsvfs->z_fuid_replay) |
1190 | zfs_fuid_info_free(zfsvfs->z_fuid_replay); | |
34dc7c2f | 1191 | |
0037b49e | 1192 | zfsvfs->z_fuid_replay = NULL; |
657ce253 | 1193 | zrele(zp); |
34dc7c2f BB |
1194 | |
1195 | return (error); | |
1196 | } | |
1197 | ||
67a1b037 PJD |
1198 | static int |
1199 | zfs_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap) | |
1200 | { | |
1201 | zfsvfs_t *zfsvfs = arg1; | |
1202 | lr_clone_range_t *lr = arg2; | |
1203 | znode_t *zp; | |
1204 | int error; | |
1205 | ||
2a27fd41 AM |
1206 | ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); |
1207 | ASSERT3U(lr->lr_common.lrc_reclen, >=, offsetof(lr_clone_range_t, | |
1208 | lr_bps[lr->lr_nbps])); | |
1209 | ||
67a1b037 PJD |
1210 | if (byteswap) |
1211 | byteswap_uint64_array(lr, sizeof (*lr)); | |
1212 | ||
1213 | if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { | |
1214 | /* | |
1215 | * Clones can be logged out of order, so don't be surprised if | |
1216 | * the file is gone - just return success. | |
1217 | */ | |
1218 | if (error == ENOENT) | |
1219 | error = 0; | |
1220 | return (error); | |
1221 | } | |
1222 | ||
1223 | error = zfs_clone_range_replay(zp, lr->lr_offset, lr->lr_length, | |
1224 | lr->lr_blksz, lr->lr_bps, lr->lr_nbps); | |
1225 | ||
1226 | zrele(zp); | |
1227 | return (error); | |
1228 | } | |
1229 | ||
34dc7c2f BB |
1230 | /* |
1231 | * Callback vectors for replaying records | |
1232 | */ | |
18168da7 | 1233 | zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = { |
867959b5 BB |
1234 | zfs_replay_error, /* no such type */ |
1235 | zfs_replay_create, /* TX_CREATE */ | |
1236 | zfs_replay_create, /* TX_MKDIR */ | |
1237 | zfs_replay_create, /* TX_MKXATTR */ | |
1238 | zfs_replay_create, /* TX_SYMLINK */ | |
1239 | zfs_replay_remove, /* TX_REMOVE */ | |
1240 | zfs_replay_remove, /* TX_RMDIR */ | |
1241 | zfs_replay_link, /* TX_LINK */ | |
1242 | zfs_replay_rename, /* TX_RENAME */ | |
1243 | zfs_replay_write, /* TX_WRITE */ | |
1244 | zfs_replay_truncate, /* TX_TRUNCATE */ | |
1245 | zfs_replay_setattr, /* TX_SETATTR */ | |
1246 | zfs_replay_acl_v0, /* TX_ACL_V0 */ | |
1247 | zfs_replay_acl, /* TX_ACL */ | |
1248 | zfs_replay_create_acl, /* TX_CREATE_ACL */ | |
1249 | zfs_replay_create, /* TX_CREATE_ATTR */ | |
1250 | zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */ | |
1251 | zfs_replay_create_acl, /* TX_MKDIR_ACL */ | |
1252 | zfs_replay_create, /* TX_MKDIR_ATTR */ | |
1253 | zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ | |
1254 | zfs_replay_write2, /* TX_WRITE2 */ | |
361a7e82 | 1255 | zfs_replay_setsaxattr, /* TX_SETSAXATTR */ |
dbf6108b AS |
1256 | zfs_replay_rename_exchange, /* TX_RENAME_EXCHANGE */ |
1257 | zfs_replay_rename_whiteout, /* TX_RENAME_WHITEOUT */ | |
67a1b037 | 1258 | zfs_replay_clone_range, /* TX_CLONE_RANGE */ |
34dc7c2f | 1259 | }; |