]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_replay.c
Use SEEK_{SET,CUR,END} for file seek "whence"
[mirror_zfs.git] / module / zfs / zfs_replay.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
572e2857 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
49d39798 23 * Copyright (c) 2012 Cyril Plisko. All rights reserved.
1e0457e7 24 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
34dc7c2f
BB
25 */
26
34dc7c2f
BB
27#include <sys/types.h>
28#include <sys/param.h>
34dc7c2f
BB
29#include <sys/sysmacros.h>
30#include <sys/cmn_err.h>
31#include <sys/kmem.h>
32#include <sys/thread.h>
33#include <sys/file.h>
34#include <sys/fcntl.h>
35#include <sys/vfs.h>
36#include <sys/fs/zfs.h>
37#include <sys/zfs_znode.h>
38#include <sys/zfs_dir.h>
39#include <sys/zfs_acl.h>
40#include <sys/zfs_fuid.h>
5484965a 41#include <sys/zfs_vnops.h>
34dc7c2f
BB
42#include <sys/spa.h>
43#include <sys/zil.h>
44#include <sys/byteorder.h>
45#include <sys/stat.h>
46#include <sys/mode.h>
47#include <sys/acl.h>
48#include <sys/atomic.h>
49#include <sys/cred.h>
5484965a 50#include <sys/zpl.h>
34dc7c2f
BB
51
52/*
53 * Functions to replay ZFS intent log (ZIL) records
54 * The functions are called through a function vector (zfs_replay_vector)
55 * which is indexed by the transaction type.
56 */
57
58static void
5484965a 59zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
e9aa730c 60 uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
34dc7c2f 61{
5484965a
BB
62 bzero(vap, sizeof (*vap));
63 vap->va_mask = (uint_t)mask;
64 vap->va_type = IFTOVT(mode);
65 vap->va_mode = mode;
66 vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid;
67 vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid;
68 vap->va_rdev = rdev;
69 vap->va_nodeid = nodeid;
34dc7c2f
BB
70}
71
72/* ARGSUSED */
73static int
867959b5 74zfs_replay_error(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 75{
2e528b49 76 return (SET_ERROR(ENOTSUP));
34dc7c2f
BB
77}
78
79static void
80zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
81{
82 xoptattr_t *xoap = NULL;
83 uint64_t *attrs;
84 uint64_t *crtime;
85 uint32_t *bitmap;
86 void *scanstamp;
87 int i;
88
5484965a 89 xvap->xva_vattr.va_mask |= ATTR_XVATTR;
34dc7c2f 90 if ((xoap = xva_getxoptattr(xvap)) == NULL) {
5484965a 91 xvap->xva_vattr.va_mask &= ~ATTR_XVATTR; /* shouldn't happen */
34dc7c2f
BB
92 return;
93 }
94
95 ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize);
96
97 bitmap = &lrattr->lr_attr_bitmap;
98 for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++)
99 xvap->xva_reqattrmap[i] = *bitmap;
100
101 attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1);
102 crtime = attrs + 1;
103 scanstamp = (caddr_t)(crtime + 2);
104
105 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
106 xoap->xoa_hidden = ((*attrs & XAT0_HIDDEN) != 0);
107 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
108 xoap->xoa_system = ((*attrs & XAT0_SYSTEM) != 0);
109 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
110 xoap->xoa_archive = ((*attrs & XAT0_ARCHIVE) != 0);
111 if (XVA_ISSET_REQ(xvap, XAT_READONLY))
112 xoap->xoa_readonly = ((*attrs & XAT0_READONLY) != 0);
113 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
114 xoap->xoa_immutable = ((*attrs & XAT0_IMMUTABLE) != 0);
115 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
116 xoap->xoa_nounlink = ((*attrs & XAT0_NOUNLINK) != 0);
117 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
118 xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0);
119 if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
120 xoap->xoa_nodump = ((*attrs & XAT0_NODUMP) != 0);
121 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
122 xoap->xoa_opaque = ((*attrs & XAT0_OPAQUE) != 0);
123 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
124 xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0);
125 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
126 xoap->xoa_av_quarantined =
127 ((*attrs & XAT0_AV_QUARANTINED) != 0);
128 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
129 ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime);
9c5167d1
NF
130 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
131 ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID));
132
34dc7c2f 133 bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ);
9c5167d1
NF
134 } else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
135 /*
136 * XAT_PROJID and XAT_AV_SCANSTAMP will never be valid
137 * at the same time, so we can share the same space.
138 */
139 bcopy(scanstamp, &xoap->xoa_projid, sizeof (uint64_t));
140 }
428870ff
BB
141 if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
142 xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0);
572e2857
BB
143 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
144 xoap->xoa_offline = ((*attrs & XAT0_OFFLINE) != 0);
145 if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
146 xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0);
9c5167d1
NF
147 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT))
148 xoap->xoa_projinherit = ((*attrs & XAT0_PROJINHERIT) != 0);
34dc7c2f
BB
149}
150
151static int
152zfs_replay_domain_cnt(uint64_t uid, uint64_t gid)
153{
154 uint64_t uid_idx;
155 uint64_t gid_idx;
156 int domcnt = 0;
157
158 uid_idx = FUID_INDEX(uid);
159 gid_idx = FUID_INDEX(gid);
160 if (uid_idx)
161 domcnt++;
162 if (gid_idx > 0 && gid_idx != uid_idx)
163 domcnt++;
164
165 return (domcnt);
166}
167
168static void *
169zfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start,
170 int domcnt)
171{
172 int i;
173
174 for (i = 0; i != domcnt; i++) {
175 fuid_infop->z_domain_table[i] = start;
176 start = (caddr_t)start + strlen(start) + 1;
177 }
178
179 return (start);
180}
181
182/*
183 * Set the uid/gid in the fuid_info structure.
184 */
185static void
186zfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid)
187{
188 /*
189 * If owner or group are log specific FUIDs then slurp up
190 * domain information and build zfs_fuid_info_t
191 */
192 if (IS_EPHEMERAL(uid))
193 fuid_infop->z_fuid_owner = uid;
194
195 if (IS_EPHEMERAL(gid))
196 fuid_infop->z_fuid_group = gid;
197}
198
199/*
200 * Load fuid domains into fuid_info_t
201 */
202static zfs_fuid_info_t *
203zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid)
204{
205 int domcnt;
206
207 zfs_fuid_info_t *fuid_infop;
208
209 fuid_infop = zfs_fuid_info_alloc();
210
211 domcnt = zfs_replay_domain_cnt(uid, gid);
212
213 if (domcnt == 0)
214 return (fuid_infop);
215
216 fuid_infop->z_domain_table =
160987b5 217 kmem_zalloc(domcnt * sizeof (char *), KM_SLEEP);
34dc7c2f
BB
218
219 zfs_replay_fuid_ugid(fuid_infop, uid, gid);
220
221 fuid_infop->z_domain_cnt = domcnt;
222 *end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt);
223 return (fuid_infop);
224}
225
226/*
227 * load zfs_fuid_t's and fuid_domains into fuid_info_t
228 */
229static zfs_fuid_info_t *
230zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid,
231 uint64_t gid)
232{
233 uint64_t *log_fuid = (uint64_t *)start;
234 zfs_fuid_info_t *fuid_infop;
235 int i;
236
237 fuid_infop = zfs_fuid_info_alloc();
238 fuid_infop->z_domain_cnt = domcnt;
239
240 fuid_infop->z_domain_table =
160987b5 241 kmem_zalloc(domcnt * sizeof (char *), KM_SLEEP);
34dc7c2f
BB
242
243 for (i = 0; i != idcnt; i++) {
244 zfs_fuid_t *zfuid;
245
246 zfuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
247 zfuid->z_logfuid = *log_fuid;
248 zfuid->z_id = -1;
249 zfuid->z_domidx = 0;
250 list_insert_tail(&fuid_infop->z_fuids, zfuid);
251 log_fuid++;
252 }
253
254 zfs_replay_fuid_ugid(fuid_infop, uid, gid);
255
256 *end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt);
257 return (fuid_infop);
258}
259
260static void
261zfs_replay_swap_attrs(lr_attr_t *lrattr)
262{
263 /* swap the lr_attr structure */
264 byteswap_uint32_array(lrattr, sizeof (*lrattr));
265 /* swap the bitmap */
266 byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) *
267 sizeof (uint32_t));
268 /* swap the attributes, create time + 64 bit word for attributes */
269 byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) *
270 (lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t));
271}
272
273/*
274 * Replay file create with optional ACL, xvattr information as well
275 * as option FUID information.
276 */
277static int
867959b5 278zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 279{
867959b5
BB
280 zfsvfs_t *zfsvfs = arg1;
281 lr_acl_create_t *lracl = arg2;
34dc7c2f
BB
282 char *name = NULL; /* location determined later */
283 lr_create_t *lr = (lr_create_t *)lracl;
284 znode_t *dzp;
633e8030 285 struct inode *ip = NULL;
34dc7c2f 286 xvattr_t xva;
633e8030 287 int vflg = 0;
5484965a 288 vsecattr_t vsec = { 0 };
34dc7c2f
BB
289 lr_attr_t *lrattr;
290 void *aclstart;
291 void *fuidstart;
292 size_t xvatlen = 0;
293 uint64_t txtype;
50c957f7
NB
294 uint64_t objid;
295 uint64_t dnodesize;
34dc7c2f
BB
296 int error;
297
428870ff 298 txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
34dc7c2f
BB
299 if (byteswap) {
300 byteswap_uint64_array(lracl, sizeof (*lracl));
34dc7c2f
BB
301 if (txtype == TX_CREATE_ACL_ATTR ||
302 txtype == TX_MKDIR_ACL_ATTR) {
303 lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
304 zfs_replay_swap_attrs(lrattr);
305 xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
306 }
307
308 aclstart = (caddr_t)(lracl + 1) + xvatlen;
309 zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE);
310 /* swap fuids */
311 if (lracl->lr_fuidcnt) {
312 byteswap_uint64_array((caddr_t)aclstart +
313 ZIL_ACE_LENGTH(lracl->lr_acl_bytes),
314 lracl->lr_fuidcnt * sizeof (uint64_t));
315 }
316 }
317
0037b49e 318 if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
34dc7c2f
BB
319 return (error);
320
50c957f7
NB
321 objid = LR_FOID_GET_OBJ(lr->lr_foid);
322 dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT;
323
34dc7c2f 324 xva_init(&xva);
6742abf9 325 zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
50c957f7 326 lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid);
34dc7c2f
BB
327
328 /*
329 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
330 * eventually end up in zfs_mknode(), which assigns the object's
50c957f7
NB
331 * creation time, generation number, and dnode size. The generic
332 * zfs_create() has no concept of these attributes, so we smuggle
333 * the values inside the vattr's otherwise unused va_ctime,
334 * va_nblocks, and va_fsid fields.
34dc7c2f
BB
335 */
336 ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
337 xva.xva_vattr.va_nblocks = lr->lr_gen;
50c957f7 338 xva.xva_vattr.va_fsid = dnodesize;
34dc7c2f 339
0037b49e 340 error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
34dc7c2f
BB
341 if (error != ENOENT)
342 goto bail;
343
344 if (lr->lr_common.lrc_txtype & TX_CI)
345 vflg |= FIGNORECASE;
428870ff 346 switch (txtype) {
34dc7c2f
BB
347 case TX_CREATE_ACL:
348 aclstart = (caddr_t)(lracl + 1);
349 fuidstart = (caddr_t)aclstart +
350 ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
0037b49e 351 zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
34dc7c2f
BB
352 (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
353 lr->lr_uid, lr->lr_gid);
354 /*FALLTHROUGH*/
355 case TX_CREATE_ACL_ATTR:
356 if (name == NULL) {
357 lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
358 xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
5484965a 359 xva.xva_vattr.va_mask |= ATTR_XVATTR;
34dc7c2f
BB
360 zfs_replay_xvattr(lrattr, &xva);
361 }
362 vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
363 vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
364 vsec.vsa_aclcnt = lracl->lr_aclcnt;
365 vsec.vsa_aclentsz = lracl->lr_acl_bytes;
366 vsec.vsa_aclflags = lracl->lr_acl_flags;
0037b49e 367 if (zfsvfs->z_fuid_replay == NULL) {
34dc7c2f
BB
368 fuidstart = (caddr_t)(lracl + 1) + xvatlen +
369 ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
0037b49e 370 zfsvfs->z_fuid_replay =
34dc7c2f
BB
371 zfs_replay_fuids(fuidstart,
372 (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
373 lr->lr_uid, lr->lr_gid);
374 }
375
633e8030 376 error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
5484965a 377 0, 0, &ip, kcred, vflg, &vsec);
34dc7c2f
BB
378 break;
379 case TX_MKDIR_ACL:
380 aclstart = (caddr_t)(lracl + 1);
381 fuidstart = (caddr_t)aclstart +
382 ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
0037b49e 383 zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
34dc7c2f
BB
384 (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
385 lr->lr_uid, lr->lr_gid);
386 /*FALLTHROUGH*/
387 case TX_MKDIR_ACL_ATTR:
388 if (name == NULL) {
389 lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
390 xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
391 zfs_replay_xvattr(lrattr, &xva);
392 }
393 vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
394 vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
395 vsec.vsa_aclcnt = lracl->lr_aclcnt;
396 vsec.vsa_aclentsz = lracl->lr_acl_bytes;
397 vsec.vsa_aclflags = lracl->lr_acl_flags;
0037b49e 398 if (zfsvfs->z_fuid_replay == NULL) {
34dc7c2f
BB
399 fuidstart = (caddr_t)(lracl + 1) + xvatlen +
400 ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
0037b49e 401 zfsvfs->z_fuid_replay =
34dc7c2f
BB
402 zfs_replay_fuids(fuidstart,
403 (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
404 lr->lr_uid, lr->lr_gid);
405 }
633e8030 406 error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
5484965a 407 &ip, kcred, vflg, &vsec);
34dc7c2f
BB
408 break;
409 default:
2e528b49 410 error = SET_ERROR(ENOTSUP);
34dc7c2f
BB
411 }
412
413bail:
633e8030
BB
414 if (error == 0 && ip != NULL)
415 iput(ip);
34dc7c2f 416
633e8030 417 iput(ZTOI(dzp));
34dc7c2f 418
0037b49e
BB
419 if (zfsvfs->z_fuid_replay)
420 zfs_fuid_info_free(zfsvfs->z_fuid_replay);
421 zfsvfs->z_fuid_replay = NULL;
34dc7c2f
BB
422
423 return (error);
424}
425
426static int
867959b5 427zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 428{
867959b5
BB
429 zfsvfs_t *zfsvfs = arg1;
430 lr_create_t *lr = arg2;
34dc7c2f
BB
431 char *name = NULL; /* location determined later */
432 char *link; /* symlink content follows name */
433 znode_t *dzp;
633e8030 434 struct inode *ip = NULL;
34dc7c2f
BB
435 xvattr_t xva;
436 int vflg = 0;
437 size_t lrsize = sizeof (lr_create_t);
438 lr_attr_t *lrattr;
633e8030 439 void *start;
5484965a 440 size_t xvatlen;
34dc7c2f 441 uint64_t txtype;
50c957f7
NB
442 uint64_t objid;
443 uint64_t dnodesize;
34dc7c2f
BB
444 int error;
445
428870ff 446 txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
34dc7c2f
BB
447 if (byteswap) {
448 byteswap_uint64_array(lr, sizeof (*lr));
34dc7c2f
BB
449 if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
450 zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
451 }
452
453
0037b49e 454 if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
34dc7c2f
BB
455 return (error);
456
50c957f7
NB
457 objid = LR_FOID_GET_OBJ(lr->lr_foid);
458 dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT;
459
34dc7c2f 460 xva_init(&xva);
6742abf9 461 zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
50c957f7 462 lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid);
34dc7c2f
BB
463
464 /*
465 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
466 * eventually end up in zfs_mknode(), which assigns the object's
50c957f7
NB
467 * creation time, generation number, and dnode slot count. The
468 * generic zfs_create() has no concept of these attributes, so
1e0457e7
MA
469 * we smuggle the values inside the vattr's otherwise unused
470 * va_ctime, va_nblocks, and va_fsid fields.
34dc7c2f
BB
471 */
472 ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
473 xva.xva_vattr.va_nblocks = lr->lr_gen;
50c957f7 474 xva.xva_vattr.va_fsid = dnodesize;
34dc7c2f 475
0037b49e 476 error = dmu_object_info(zfsvfs->z_os, objid, NULL);
34dc7c2f
BB
477 if (error != ENOENT)
478 goto out;
479
480 if (lr->lr_common.lrc_txtype & TX_CI)
481 vflg |= FIGNORECASE;
482
483 /*
484 * Symlinks don't have fuid info, and CIFS never creates
485 * symlinks.
486 *
487 * The _ATTR versions will grab the fuid info in their subcases.
488 */
489 if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK &&
490 (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
491 (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
492 start = (lr + 1);
0037b49e 493 zfsvfs->z_fuid_replay =
34dc7c2f
BB
494 zfs_replay_fuid_domain(start, &start,
495 lr->lr_uid, lr->lr_gid);
496 }
497
428870ff 498 switch (txtype) {
34dc7c2f
BB
499 case TX_CREATE_ATTR:
500 lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
501 xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
502 zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
503 start = (caddr_t)(lr + 1) + xvatlen;
0037b49e 504 zfsvfs->z_fuid_replay =
34dc7c2f
BB
505 zfs_replay_fuid_domain(start, &start,
506 lr->lr_uid, lr->lr_gid);
507 name = (char *)start;
508
509 /*FALLTHROUGH*/
510 case TX_CREATE:
511 if (name == NULL)
512 name = (char *)start;
513
633e8030
BB
514 error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
515 0, 0, &ip, kcred, vflg, NULL);
34dc7c2f
BB
516 break;
517 case TX_MKDIR_ATTR:
518 lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
519 xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
520 zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
521 start = (caddr_t)(lr + 1) + xvatlen;
0037b49e 522 zfsvfs->z_fuid_replay =
34dc7c2f
BB
523 zfs_replay_fuid_domain(start, &start,
524 lr->lr_uid, lr->lr_gid);
525 name = (char *)start;
526
527 /*FALLTHROUGH*/
528 case TX_MKDIR:
529 if (name == NULL)
530 name = (char *)(lr + 1);
531
633e8030
BB
532 error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
533 &ip, kcred, vflg, NULL);
34dc7c2f
BB
534 break;
535 case TX_MKXATTR:
633e8030 536 error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &ip, kcred);
34dc7c2f
BB
537 break;
538 case TX_SYMLINK:
539 name = (char *)(lr + 1);
540 link = name + strlen(name) + 1;
633e8030
BB
541 error = zfs_symlink(ZTOI(dzp), name, &xva.xva_vattr,
542 link, &ip, kcred, vflg);
34dc7c2f
BB
543 break;
544 default:
2e528b49 545 error = SET_ERROR(ENOTSUP);
34dc7c2f
BB
546 }
547
548out:
633e8030
BB
549 if (error == 0 && ip != NULL)
550 iput(ip);
34dc7c2f 551
633e8030 552 iput(ZTOI(dzp));
34dc7c2f 553
0037b49e
BB
554 if (zfsvfs->z_fuid_replay)
555 zfs_fuid_info_free(zfsvfs->z_fuid_replay);
556 zfsvfs->z_fuid_replay = NULL;
34dc7c2f
BB
557 return (error);
558}
559
560static int
867959b5 561zfs_replay_remove(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 562{
867959b5
BB
563 zfsvfs_t *zfsvfs = arg1;
564 lr_remove_t *lr = arg2;
34dc7c2f
BB
565 char *name = (char *)(lr + 1); /* name follows lr_remove_t */
566 znode_t *dzp;
567 int error;
568 int vflg = 0;
569
570 if (byteswap)
571 byteswap_uint64_array(lr, sizeof (*lr));
572
0037b49e 573 if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
34dc7c2f
BB
574 return (error);
575
576 if (lr->lr_common.lrc_txtype & TX_CI)
577 vflg |= FIGNORECASE;
578
579 switch ((int)lr->lr_common.lrc_txtype) {
580 case TX_REMOVE:
da5e151f 581 error = zfs_remove(ZTOI(dzp), name, kcred, vflg);
34dc7c2f
BB
582 break;
583 case TX_RMDIR:
633e8030 584 error = zfs_rmdir(ZTOI(dzp), name, NULL, kcred, vflg);
34dc7c2f
BB
585 break;
586 default:
2e528b49 587 error = SET_ERROR(ENOTSUP);
34dc7c2f
BB
588 }
589
633e8030 590 iput(ZTOI(dzp));
34dc7c2f
BB
591
592 return (error);
593}
594
595static int
867959b5 596zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 597{
867959b5
BB
598 zfsvfs_t *zfsvfs = arg1;
599 lr_link_t *lr = arg2;
34dc7c2f
BB
600 char *name = (char *)(lr + 1); /* name follows lr_link_t */
601 znode_t *dzp, *zp;
602 int error;
5484965a 603 int vflg = 0;
34dc7c2f
BB
604
605 if (byteswap)
606 byteswap_uint64_array(lr, sizeof (*lr));
607
0037b49e 608 if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
34dc7c2f
BB
609 return (error);
610
0037b49e 611 if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) {
633e8030 612 iput(ZTOI(dzp));
34dc7c2f
BB
613 return (error);
614 }
615
616 if (lr->lr_common.lrc_txtype & TX_CI)
617 vflg |= FIGNORECASE;
618
da5e151f 619 error = zfs_link(ZTOI(dzp), ZTOI(zp), name, kcred, vflg);
34dc7c2f 620
633e8030
BB
621 iput(ZTOI(zp));
622 iput(ZTOI(dzp));
34dc7c2f
BB
623
624 return (error);
625}
626
627static int
867959b5 628zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 629{
867959b5
BB
630 zfsvfs_t *zfsvfs = arg1;
631 lr_rename_t *lr = arg2;
34dc7c2f
BB
632 char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
633 char *tname = sname + strlen(sname) + 1;
634 znode_t *sdzp, *tdzp;
635 int error;
636 int vflg = 0;
637
638 if (byteswap)
639 byteswap_uint64_array(lr, sizeof (*lr));
640
0037b49e 641 if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
34dc7c2f
BB
642 return (error);
643
0037b49e 644 if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) {
633e8030 645 iput(ZTOI(sdzp));
34dc7c2f
BB
646 return (error);
647 }
648
649 if (lr->lr_common.lrc_txtype & TX_CI)
650 vflg |= FIGNORECASE;
651
633e8030 652 error = zfs_rename(ZTOI(sdzp), sname, ZTOI(tdzp), tname, kcred, vflg);
34dc7c2f 653
633e8030
BB
654 iput(ZTOI(tdzp));
655 iput(ZTOI(sdzp));
34dc7c2f
BB
656
657 return (error);
658}
659
660static int
867959b5 661zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 662{
867959b5
BB
663 zfsvfs_t *zfsvfs = arg1;
664 lr_write_t *lr = arg2;
34dc7c2f
BB
665 char *data = (char *)(lr + 1); /* data follows lr_write_t */
666 znode_t *zp;
49d39798 667 int error, written;
572e2857 668 uint64_t eod, offset, length;
34dc7c2f
BB
669
670 if (byteswap)
671 byteswap_uint64_array(lr, sizeof (*lr));
672
0037b49e 673 if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
34dc7c2f
BB
674 /*
675 * As we can log writes out of order, it's possible the
676 * file has been removed. In this case just drop the write
677 * and return success.
678 */
679 if (error == ENOENT)
680 error = 0;
681 return (error);
682 }
683
428870ff
BB
684 offset = lr->lr_offset;
685 length = lr->lr_length;
572e2857 686 eod = offset + length; /* end of data for this write */
428870ff 687
572e2857
BB
688 /*
689 * This may be a write from a dmu_sync() for a whole block,
690 * and may extend beyond the current end of the file.
691 * We can't just replay what was written for this TX_WRITE as
692 * a future TX_WRITE2 may extend the eof and the data for that
693 * write needs to be there. So we write the whole block and
694 * reduce the eof. This needs to be done within the single dmu
695 * transaction created within vn_rdwr -> zfs_write. So a possible
0037b49e 696 * new end of file is passed through in zfsvfs->z_replay_eof
572e2857
BB
697 */
698
0037b49e 699 zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */
428870ff
BB
700
701 /* If it's a dmu_sync() block, write the whole block */
702 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
703 uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
704 if (length < blocksize) {
705 offset -= offset % blocksize;
706 length = blocksize;
707 }
572e2857 708 if (zp->z_size < eod)
0037b49e 709 zfsvfs->z_replay_eof = eod;
428870ff
BB
710 }
711
cd3939c5 712 written = zpl_write_common(ZTOI(zp), data, length, &offset,
5484965a 713 UIO_SYSSPACE, 0, kcred);
49d39798
CP
714 if (written < 0)
715 error = -written;
716 else if (written < length)
2e528b49 717 error = SET_ERROR(EIO); /* short write */
428870ff 718
633e8030 719 iput(ZTOI(zp));
0037b49e 720 zfsvfs->z_replay_eof = 0; /* safety */
428870ff
BB
721
722 return (error);
723}
724
725/*
726 * TX_WRITE2 are only generated when dmu_sync() returns EALREADY
727 * meaning the pool block is already being synced. So now that we always write
728 * out full blocks, all we have to do is expand the eof if
729 * the file is grown.
730 */
731static int
867959b5 732zfs_replay_write2(void *arg1, void *arg2, boolean_t byteswap)
428870ff 733{
867959b5
BB
734 zfsvfs_t *zfsvfs = arg1;
735 lr_write_t *lr = arg2;
428870ff
BB
736 znode_t *zp;
737 int error;
738 uint64_t end;
739
740 if (byteswap)
741 byteswap_uint64_array(lr, sizeof (*lr));
742
0037b49e 743 if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
428870ff
BB
744 return (error);
745
572e2857 746top:
428870ff
BB
747 end = lr->lr_offset + lr->lr_length;
748 if (end > zp->z_size) {
0037b49e 749 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
572e2857 750
428870ff 751 zp->z_size = end;
572e2857
BB
752 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
753 error = dmu_tx_assign(tx, TXG_WAIT);
754 if (error) {
633e8030 755 iput(ZTOI(zp));
572e2857
BB
756 if (error == ERESTART) {
757 dmu_tx_wait(tx);
758 dmu_tx_abort(tx);
759 goto top;
760 }
761 dmu_tx_abort(tx);
762 return (error);
763 }
0037b49e 764 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
572e2857
BB
765 (void *)&zp->z_size, sizeof (uint64_t), tx);
766
767 /* Ensure the replayed seq is updated */
0037b49e 768 (void) zil_replaying(zfsvfs->z_log, tx);
572e2857
BB
769
770 dmu_tx_commit(tx);
428870ff 771 }
34dc7c2f 772
633e8030 773 iput(ZTOI(zp));
34dc7c2f
BB
774
775 return (error);
776}
777
778static int
867959b5 779zfs_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 780{
867959b5
BB
781 zfsvfs_t *zfsvfs = arg1;
782 lr_truncate_t *lr = arg2;
34dc7c2f
BB
783 znode_t *zp;
784 flock64_t fl;
785 int error;
786
787 if (byteswap)
788 byteswap_uint64_array(lr, sizeof (*lr));
789
0037b49e 790 if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
34dc7c2f 791 return (error);
34dc7c2f
BB
792
793 bzero(&fl, sizeof (fl));
794 fl.l_type = F_WRLCK;
126d0fa7 795 fl.l_whence = SEEK_SET;
34dc7c2f
BB
796 fl.l_start = lr->lr_offset;
797 fl.l_len = lr->lr_length;
798
633e8030
BB
799 error = zfs_space(ZTOI(zp), F_FREESP, &fl, FWRITE | FOFFMAX,
800 lr->lr_offset, kcred);
34dc7c2f 801
633e8030 802 iput(ZTOI(zp));
34dc7c2f
BB
803
804 return (error);
805}
806
807static int
867959b5 808zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 809{
867959b5
BB
810 zfsvfs_t *zfsvfs = arg1;
811 lr_setattr_t *lr = arg2;
34dc7c2f
BB
812 znode_t *zp;
813 xvattr_t xva;
5484965a 814 vattr_t *vap = &xva.xva_vattr;
34dc7c2f
BB
815 int error;
816 void *start;
817
818 xva_init(&xva);
819 if (byteswap) {
820 byteswap_uint64_array(lr, sizeof (*lr));
821
5484965a 822 if ((lr->lr_mask & ATTR_XVATTR) &&
0037b49e 823 zfsvfs->z_version >= ZPL_VERSION_INITIAL)
34dc7c2f
BB
824 zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
825 }
826
0037b49e 827 if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
34dc7c2f 828 return (error);
34dc7c2f 829
5484965a
BB
830 zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode,
831 lr->lr_uid, lr->lr_gid, 0, lr->lr_foid);
34dc7c2f 832
5484965a
BB
833 vap->va_size = lr->lr_size;
834 ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime);
835 ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime);
87f9371a
NB
836 gethrestime(&vap->va_ctime);
837 vap->va_mask |= ATTR_CTIME;
34dc7c2f
BB
838
839 /*
840 * Fill in xvattr_t portions if necessary.
841 */
842
843 start = (lr_setattr_t *)(lr + 1);
5484965a 844 if (vap->va_mask & ATTR_XVATTR) {
34dc7c2f
BB
845 zfs_replay_xvattr((lr_attr_t *)start, &xva);
846 start = (caddr_t)start +
847 ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize);
848 } else
5484965a 849 xva.xva_vattr.va_mask &= ~ATTR_XVATTR;
34dc7c2f 850
0037b49e 851 zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
34dc7c2f
BB
852 lr->lr_uid, lr->lr_gid);
853
5484965a 854 error = zfs_setattr(ZTOI(zp), vap, 0, kcred);
34dc7c2f 855
0037b49e
BB
856 zfs_fuid_info_free(zfsvfs->z_fuid_replay);
857 zfsvfs->z_fuid_replay = NULL;
633e8030 858 iput(ZTOI(zp));
34dc7c2f
BB
859
860 return (error);
861}
862
863static int
867959b5 864zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 865{
867959b5
BB
866 zfsvfs_t *zfsvfs = arg1;
867 lr_acl_v0_t *lr = arg2;
34dc7c2f
BB
868 ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */
869 vsecattr_t vsa;
870 znode_t *zp;
871 int error;
872
873 if (byteswap) {
874 byteswap_uint64_array(lr, sizeof (*lr));
875 zfs_oldace_byteswap(ace, lr->lr_aclcnt);
876 }
877
0037b49e 878 if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
34dc7c2f 879 return (error);
34dc7c2f
BB
880
881 bzero(&vsa, sizeof (vsa));
882 vsa.vsa_mask = VSA_ACE | VSA_ACECNT;
883 vsa.vsa_aclcnt = lr->lr_aclcnt;
b128c09f
BB
884 vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt;
885 vsa.vsa_aclflags = 0;
34dc7c2f
BB
886 vsa.vsa_aclentp = ace;
887
633e8030 888 error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
34dc7c2f 889
633e8030 890 iput(ZTOI(zp));
34dc7c2f
BB
891
892 return (error);
893}
894
895/*
896 * Replaying ACLs is complicated by FUID support.
897 * The log record may contain some optional data
898 * to be used for replaying FUID's. These pieces
899 * are the actual FUIDs that were created initially.
900 * The FUID table index may no longer be valid and
901 * during zfs_create() a new index may be assigned.
902 * Because of this the log will contain the original
4e33ba4c 903 * domain+rid in order to create a new FUID.
34dc7c2f
BB
904 *
905 * The individual ACEs may contain an ephemeral uid/gid which is no
906 * longer valid and will need to be replaced with an actual FUID.
907 *
908 */
909static int
867959b5 910zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap)
34dc7c2f 911{
867959b5
BB
912 zfsvfs_t *zfsvfs = arg1;
913 lr_acl_t *lr = arg2;
34dc7c2f
BB
914 ace_t *ace = (ace_t *)(lr + 1);
915 vsecattr_t vsa;
916 znode_t *zp;
917 int error;
918
919 if (byteswap) {
920 byteswap_uint64_array(lr, sizeof (*lr));
921 zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE);
922 if (lr->lr_fuidcnt) {
923 byteswap_uint64_array((caddr_t)ace +
924 ZIL_ACE_LENGTH(lr->lr_acl_bytes),
925 lr->lr_fuidcnt * sizeof (uint64_t));
926 }
927 }
928
0037b49e 929 if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
34dc7c2f 930 return (error);
34dc7c2f
BB
931
932 bzero(&vsa, sizeof (vsa));
933 vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS;
934 vsa.vsa_aclcnt = lr->lr_aclcnt;
935 vsa.vsa_aclentp = ace;
936 vsa.vsa_aclentsz = lr->lr_acl_bytes;
937 vsa.vsa_aclflags = lr->lr_acl_flags;
938
939 if (lr->lr_fuidcnt) {
940 void *fuidstart = (caddr_t)ace +
941 ZIL_ACE_LENGTH(lr->lr_acl_bytes);
942
0037b49e 943 zfsvfs->z_fuid_replay =
34dc7c2f
BB
944 zfs_replay_fuids(fuidstart, &fuidstart,
945 lr->lr_fuidcnt, lr->lr_domcnt, 0, 0);
946 }
947
633e8030 948 error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
34dc7c2f 949
0037b49e
BB
950 if (zfsvfs->z_fuid_replay)
951 zfs_fuid_info_free(zfsvfs->z_fuid_replay);
34dc7c2f 952
0037b49e 953 zfsvfs->z_fuid_replay = NULL;
633e8030 954 iput(ZTOI(zp));
34dc7c2f
BB
955
956 return (error);
957}
958
959/*
960 * Callback vectors for replaying records
961 */
867959b5
BB
962zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
963 zfs_replay_error, /* no such type */
964 zfs_replay_create, /* TX_CREATE */
965 zfs_replay_create, /* TX_MKDIR */
966 zfs_replay_create, /* TX_MKXATTR */
967 zfs_replay_create, /* TX_SYMLINK */
968 zfs_replay_remove, /* TX_REMOVE */
969 zfs_replay_remove, /* TX_RMDIR */
970 zfs_replay_link, /* TX_LINK */
971 zfs_replay_rename, /* TX_RENAME */
972 zfs_replay_write, /* TX_WRITE */
973 zfs_replay_truncate, /* TX_TRUNCATE */
974 zfs_replay_setattr, /* TX_SETATTR */
975 zfs_replay_acl_v0, /* TX_ACL_V0 */
976 zfs_replay_acl, /* TX_ACL */
977 zfs_replay_create_acl, /* TX_CREATE_ACL */
978 zfs_replay_create, /* TX_CREATE_ATTR */
979 zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */
980 zfs_replay_create_acl, /* TX_MKDIR_ACL */
981 zfs_replay_create, /* TX_MKDIR_ATTR */
982 zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
983 zfs_replay_write2, /* TX_WRITE2 */
34dc7c2f 984};