]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_log.c
Project Quota on ZFS
[mirror_zfs.git] / module / zfs / zfs_log.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
572e2857 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
e9aa730c 23 * Copyright (c) 2015 by Delphix. All rights reserved.
34dc7c2f
BB
24 */
25
60101509 26
34dc7c2f
BB
27#include <sys/types.h>
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/sysmacros.h>
31#include <sys/cmn_err.h>
32#include <sys/kmem.h>
33#include <sys/thread.h>
34#include <sys/file.h>
35#include <sys/vfs.h>
36#include <sys/zfs_znode.h>
37#include <sys/zfs_dir.h>
38#include <sys/zil.h>
39#include <sys/zil_impl.h>
40#include <sys/byteorder.h>
41#include <sys/policy.h>
42#include <sys/stat.h>
43#include <sys/mode.h>
44#include <sys/acl.h>
45#include <sys/dmu.h>
46#include <sys/spa.h>
47#include <sys/zfs_fuid.h>
48#include <sys/ddi.h>
fb5f0bc8
BB
49#include <sys/dsl_dataset.h>
50
34dc7c2f 51/*
fb5f0bc8
BB
52 * These zfs_log_* functions must be called within a dmu tx, in one
53 * of 2 contexts depending on zilog->z_replay:
54 *
55 * Non replay mode
56 * ---------------
57 * We need to record the transaction so that if it is committed to
58 * the Intent Log then it can be replayed. An intent log transaction
59 * structure (itx_t) is allocated and all the information necessary to
60 * possibly replay the transaction is saved in it. The itx is then assigned
61 * a sequence number and inserted in the in-memory list anchored in the zilog.
62 *
63 * Replay mode
64 * -----------
65 * We need to mark the intent log record as replayed in the log header.
66 * This is done in the same transaction as the replay so that they
67 * commit atomically.
34dc7c2f
BB
68 */
69
70int
71zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
72{
5484965a 73 int isxvattr = (vap->va_mask & ATTR_XVATTR);
34dc7c2f
BB
74 switch (type) {
75 case Z_FILE:
76 if (vsecp == NULL && !isxvattr)
77 return (TX_CREATE);
78 if (vsecp && isxvattr)
79 return (TX_CREATE_ACL_ATTR);
80 if (vsecp)
81 return (TX_CREATE_ACL);
82 else
83 return (TX_CREATE_ATTR);
84 /*NOTREACHED*/
85 case Z_DIR:
86 if (vsecp == NULL && !isxvattr)
87 return (TX_MKDIR);
88 if (vsecp && isxvattr)
89 return (TX_MKDIR_ACL_ATTR);
90 if (vsecp)
91 return (TX_MKDIR_ACL);
92 else
93 return (TX_MKDIR_ATTR);
94 case Z_XATTRDIR:
95 return (TX_MKXATTR);
96 }
97 ASSERT(0);
98 return (TX_MAX_TYPE);
99}
100
101/*
102 * build up the log data necessary for logging xvattr_t
103 * First lr_attr_t is initialized. following the lr_attr_t
104 * is the mapsize and attribute bitmap copied from the xvattr_t.
105 * Following the bitmap and bitmapsize two 64 bit words are reserved
106 * for the create time which may be set. Following the create time
107 * records a single 64 bit integer which has the bits to set on
108 * replay for the xvattr.
109 */
110static void
111zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
112{
113 uint32_t *bitmap;
114 uint64_t *attrs;
115 uint64_t *crtime;
116 xoptattr_t *xoap;
117 void *scanstamp;
118 int i;
119
120 xoap = xva_getxoptattr(xvap);
121 ASSERT(xoap);
122
123 lrattr->lr_attr_masksize = xvap->xva_mapsize;
124 bitmap = &lrattr->lr_attr_bitmap;
125 for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) {
126 *bitmap = xvap->xva_reqattrmap[i];
127 }
128
129 /* Now pack the attributes up in a single uint64_t */
130 attrs = (uint64_t *)bitmap;
131 crtime = attrs + 1;
132 scanstamp = (caddr_t)(crtime + 2);
133 *attrs = 0;
134 if (XVA_ISSET_REQ(xvap, XAT_READONLY))
135 *attrs |= (xoap->xoa_readonly == 0) ? 0 :
136 XAT0_READONLY;
137 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
138 *attrs |= (xoap->xoa_hidden == 0) ? 0 :
139 XAT0_HIDDEN;
140 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
141 *attrs |= (xoap->xoa_system == 0) ? 0 :
142 XAT0_SYSTEM;
143 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
144 *attrs |= (xoap->xoa_archive == 0) ? 0 :
145 XAT0_ARCHIVE;
146 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
147 *attrs |= (xoap->xoa_immutable == 0) ? 0 :
148 XAT0_IMMUTABLE;
149 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
150 *attrs |= (xoap->xoa_nounlink == 0) ? 0 :
151 XAT0_NOUNLINK;
152 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
153 *attrs |= (xoap->xoa_appendonly == 0) ? 0 :
154 XAT0_APPENDONLY;
155 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
156 *attrs |= (xoap->xoa_opaque == 0) ? 0 :
157 XAT0_APPENDONLY;
158 if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
159 *attrs |= (xoap->xoa_nodump == 0) ? 0 :
160 XAT0_NODUMP;
161 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
162 *attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
163 XAT0_AV_QUARANTINED;
164 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
165 *attrs |= (xoap->xoa_av_modified == 0) ? 0 :
166 XAT0_AV_MODIFIED;
167 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
168 ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
9c5167d1
NF
169 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
170 ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID));
171
34dc7c2f 172 bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
9c5167d1
NF
173 } else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
174 /*
175 * XAT_PROJID and XAT_AV_SCANSTAMP will never be valid
176 * at the same time, so we can share the same space.
177 */
178 bcopy(&xoap->xoa_projid, scanstamp, sizeof (uint64_t));
179 }
428870ff
BB
180 if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
181 *attrs |= (xoap->xoa_reparse == 0) ? 0 :
182 XAT0_REPARSE;
572e2857
BB
183 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
184 *attrs |= (xoap->xoa_offline == 0) ? 0 :
185 XAT0_OFFLINE;
186 if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
187 *attrs |= (xoap->xoa_sparse == 0) ? 0 :
188 XAT0_SPARSE;
9c5167d1
NF
189 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT))
190 *attrs |= (xoap->xoa_projinherit == 0) ? 0 :
191 XAT0_PROJINHERIT;
34dc7c2f
BB
192}
193
194static void *
195zfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start)
196{
197 zfs_fuid_t *zfuid;
198 uint64_t *fuidloc = start;
199
200 /* First copy in the ACE FUIDs */
201 for (zfuid = list_head(&fuidp->z_fuids); zfuid;
202 zfuid = list_next(&fuidp->z_fuids, zfuid)) {
203 *fuidloc++ = zfuid->z_logfuid;
204 }
205 return (fuidloc);
206}
207
208
209static void *
210zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
211{
212 zfs_fuid_domain_t *zdomain;
213
214 /* now copy in the domain info, if any */
215 if (fuidp->z_domain_str_sz != 0) {
216 for (zdomain = list_head(&fuidp->z_domains); zdomain;
217 zdomain = list_next(&fuidp->z_domains, zdomain)) {
218 bcopy((void *)zdomain->z_domain, start,
219 strlen(zdomain->z_domain) + 1);
220 start = (caddr_t)start +
221 strlen(zdomain->z_domain) + 1;
222 }
223 }
224 return (start);
225}
226
98701490
CC
227/*
228 * If zp is an xattr node, check whether the xattr owner is unlinked.
229 * We don't want to log anything if the owner is unlinked.
230 */
231static int
232zfs_xattr_owner_unlinked(znode_t *zp)
233{
234 int unlinked = 0;
235 znode_t *dzp;
236 igrab(ZTOI(zp));
237 /*
238 * if zp is XATTR node, keep walking up via z_xattr_parent until we
239 * get the owner
240 */
241 while (zp->z_pflags & ZFS_XATTR) {
242 ASSERT3U(zp->z_xattr_parent, !=, 0);
243 if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent, &dzp) != 0) {
244 unlinked = 1;
245 break;
246 }
247 iput(ZTOI(zp));
248 zp = dzp;
249 unlinked = zp->z_unlinked;
250 }
251 iput(ZTOI(zp));
252 return (unlinked);
253}
254
34dc7c2f 255/*
d3cc8b15
WA
256 * Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
257 * TK_MKXATTR transactions.
34dc7c2f
BB
258 *
259 * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID
260 * domain information appended prior to the name. In this case the
261 * uid/gid in the log record will be a log centric FUID.
262 *
263 * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that
264 * may contain attributes, ACL and optional fuid information.
265 *
266 * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify
267 * and ACL and normal users/groups in the ACEs.
268 *
269 * There may be an optional xvattr attribute information similar
270 * to zfs_log_setattr.
271 *
272 * Also, after the file name "domain" strings may be appended.
273 */
274void
275zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
276 znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp,
277 zfs_fuid_info_t *fuidp, vattr_t *vap)
278{
279 itx_t *itx;
34dc7c2f
BB
280 lr_create_t *lr;
281 lr_acl_create_t *lracl;
5484965a 282 size_t aclsize = 0;
34dc7c2f
BB
283 size_t xvatsize = 0;
284 size_t txsize;
a117a6d6 285 xvattr_t *xvap = (xvattr_t *)vap;
34dc7c2f
BB
286 void *end;
287 size_t lrsize;
34dc7c2f
BB
288 size_t namesize = strlen(name) + 1;
289 size_t fuidsz = 0;
290
98701490 291 if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
34dc7c2f
BB
292 return;
293
294 /*
295 * If we have FUIDs present then add in space for
296 * domains and ACE fuid's if any.
297 */
298 if (fuidp) {
299 fuidsz += fuidp->z_domain_str_sz;
300 fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t);
301 }
302
5484965a 303 if (vap->va_mask & ATTR_XVATTR)
34dc7c2f
BB
304 xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize);
305
306 if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR ||
307 (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR ||
308 (int)txtype == TX_MKXATTR) {
309 txsize = sizeof (*lr) + namesize + fuidsz + xvatsize;
310 lrsize = sizeof (*lr);
311 } else {
34dc7c2f
BB
312 txsize =
313 sizeof (lr_acl_create_t) + namesize + fuidsz +
314 ZIL_ACE_LENGTH(aclsize) + xvatsize;
315 lrsize = sizeof (lr_acl_create_t);
316 }
317
318 itx = zil_itx_create(txtype, txsize);
319
320 lr = (lr_create_t *)&itx->itx_lr;
321 lr->lr_doid = dzp->z_id;
322 lr->lr_foid = zp->z_id;
50c957f7
NB
323 /* Store dnode slot count in 8 bits above object id. */
324 LR_FOID_SET_SLOTS(lr->lr_foid, zp->z_dnodesize >> DNODE_SHIFT);
428870ff 325 lr->lr_mode = zp->z_mode;
2c6abf15
NB
326 if (!IS_EPHEMERAL(KUID_TO_SUID(ZTOI(zp)->i_uid))) {
327 lr->lr_uid = (uint64_t)KUID_TO_SUID(ZTOI(zp)->i_uid);
34dc7c2f
BB
328 } else {
329 lr->lr_uid = fuidp->z_fuid_owner;
330 }
2c6abf15
NB
331 if (!IS_EPHEMERAL(KGID_TO_SGID(ZTOI(zp)->i_gid))) {
332 lr->lr_gid = (uint64_t)KGID_TO_SGID(ZTOI(zp)->i_gid);
34dc7c2f
BB
333 } else {
334 lr->lr_gid = fuidp->z_fuid_group;
335 }
633e8030 336 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
428870ff 337 sizeof (uint64_t));
633e8030 338 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
428870ff
BB
339 lr->lr_crtime, sizeof (uint64_t) * 2);
340
633e8030 341 if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev,
428870ff
BB
342 sizeof (lr->lr_rdev)) != 0)
343 lr->lr_rdev = 0;
34dc7c2f
BB
344
345 /*
346 * Fill in xvattr info if any
347 */
5484965a 348 if (vap->va_mask & ATTR_XVATTR) {
34dc7c2f
BB
349 zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap);
350 end = (caddr_t)lr + lrsize + xvatsize;
351 } else {
352 end = (caddr_t)lr + lrsize;
353 }
354
355 /* Now fill in any ACL info */
356
357 if (vsecp) {
358 lracl = (lr_acl_create_t *)&itx->itx_lr;
359 lracl->lr_aclcnt = vsecp->vsa_aclcnt;
360 lracl->lr_acl_bytes = aclsize;
361 lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
362 lracl->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0;
363 if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS)
364 lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
365 else
366 lracl->lr_acl_flags = 0;
367
368 bcopy(vsecp->vsa_aclentp, end, aclsize);
369 end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize);
370 }
371
372 /* drop in FUID info */
373 if (fuidp) {
374 end = zfs_log_fuid_ids(fuidp, end);
375 end = zfs_log_fuid_domains(fuidp, end);
376 }
377 /*
378 * Now place file name in log record
379 */
380 bcopy(name, end, namesize);
381
572e2857 382 zil_itx_assign(zilog, itx, tx);
34dc7c2f
BB
383}
384
385/*
d3cc8b15 386 * Handles both TX_REMOVE and TX_RMDIR transactions.
34dc7c2f
BB
387 */
388void
389zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
e9aa730c 390 znode_t *dzp, char *name, uint64_t foid)
34dc7c2f
BB
391{
392 itx_t *itx;
34dc7c2f
BB
393 lr_remove_t *lr;
394 size_t namesize = strlen(name) + 1;
395
98701490 396 if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
34dc7c2f
BB
397 return;
398
399 itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
400 lr = (lr_remove_t *)&itx->itx_lr;
401 lr->lr_doid = dzp->z_id;
402 bcopy(name, (char *)(lr + 1), namesize);
403
572e2857
BB
404 itx->itx_oid = foid;
405
406 zil_itx_assign(zilog, itx, tx);
34dc7c2f
BB
407}
408
409/*
d3cc8b15 410 * Handles TX_LINK transactions.
34dc7c2f
BB
411 */
412void
413zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
e9aa730c 414 znode_t *dzp, znode_t *zp, char *name)
34dc7c2f
BB
415{
416 itx_t *itx;
34dc7c2f
BB
417 lr_link_t *lr;
418 size_t namesize = strlen(name) + 1;
419
428870ff 420 if (zil_replaying(zilog, tx))
34dc7c2f
BB
421 return;
422
423 itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
424 lr = (lr_link_t *)&itx->itx_lr;
425 lr->lr_doid = dzp->z_id;
426 lr->lr_link_obj = zp->z_id;
427 bcopy(name, (char *)(lr + 1), namesize);
428
572e2857 429 zil_itx_assign(zilog, itx, tx);
34dc7c2f
BB
430}
431
432/*
d3cc8b15 433 * Handles TX_SYMLINK transactions.
34dc7c2f
BB
434 */
435void
436zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
437 znode_t *dzp, znode_t *zp, char *name, char *link)
438{
439 itx_t *itx;
34dc7c2f
BB
440 lr_create_t *lr;
441 size_t namesize = strlen(name) + 1;
442 size_t linksize = strlen(link) + 1;
443
428870ff 444 if (zil_replaying(zilog, tx))
34dc7c2f
BB
445 return;
446
447 itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
448 lr = (lr_create_t *)&itx->itx_lr;
449 lr->lr_doid = dzp->z_id;
450 lr->lr_foid = zp->z_id;
2c6abf15
NB
451 lr->lr_uid = KUID_TO_SUID(ZTOI(zp)->i_uid);
452 lr->lr_gid = KGID_TO_SGID(ZTOI(zp)->i_gid);
428870ff 453 lr->lr_mode = zp->z_mode;
3558fd73 454 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
428870ff 455 sizeof (uint64_t));
3558fd73 456 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
428870ff 457 lr->lr_crtime, sizeof (uint64_t) * 2);
34dc7c2f
BB
458 bcopy(name, (char *)(lr + 1), namesize);
459 bcopy(link, (char *)(lr + 1) + namesize, linksize);
460
572e2857 461 zil_itx_assign(zilog, itx, tx);
34dc7c2f
BB
462}
463
464/*
d3cc8b15 465 * Handles TX_RENAME transactions.
34dc7c2f
BB
466 */
467void
468zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
e9aa730c 469 znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
34dc7c2f
BB
470{
471 itx_t *itx;
34dc7c2f
BB
472 lr_rename_t *lr;
473 size_t snamesize = strlen(sname) + 1;
474 size_t dnamesize = strlen(dname) + 1;
475
428870ff 476 if (zil_replaying(zilog, tx))
34dc7c2f
BB
477 return;
478
479 itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
480 lr = (lr_rename_t *)&itx->itx_lr;
481 lr->lr_sdoid = sdzp->z_id;
482 lr->lr_tdoid = tdzp->z_id;
483 bcopy(sname, (char *)(lr + 1), snamesize);
484 bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
572e2857 485 itx->itx_oid = szp->z_id;
34dc7c2f 486
572e2857 487 zil_itx_assign(zilog, itx, tx);
34dc7c2f
BB
488}
489
490/*
119a394a
ED
491 * zfs_log_write() handles TX_WRITE transactions. The specified callback is
492 * called as soon as the write is on stable storage (be it via a DMU sync or a
493 * ZIL commit).
34dc7c2f 494 */
15fd2749 495long zfs_immediate_write_sz = 32768;
34dc7c2f 496
34dc7c2f
BB
497void
498zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
e9aa730c
GM
499 znode_t *zp, offset_t off, ssize_t resid, int ioflag,
500 zil_callback_t callback, void *callback_data)
34dc7c2f 501{
1b7c1e5c 502 uint32_t blocksize = zp->z_blksz;
34dc7c2f 503 itx_wr_state_t write_state;
d5446cfc 504 uintptr_t fsync_cnt;
34dc7c2f 505
98701490
CC
506 if (zil_replaying(zilog, tx) || zp->z_unlinked ||
507 zfs_xattr_owner_unlinked(zp)) {
119a394a
ED
508 if (callback != NULL)
509 callback(callback_data);
34dc7c2f 510 return;
119a394a 511 }
34dc7c2f 512
1b7c1e5c
GDN
513 if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
514 write_state = WR_INDIRECT;
515 else if (!spa_has_slogs(zilog->zl_spa) &&
516 resid >= zfs_immediate_write_sz)
34dc7c2f
BB
517 write_state = WR_INDIRECT;
518 else if (ioflag & (FSYNC | FDSYNC))
519 write_state = WR_COPIED;
520 else
521 write_state = WR_NEED_COPY;
522
d5446cfc
BB
523 if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
524 (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
525 }
526
34dc7c2f
BB
527 while (resid) {
528 itx_t *itx;
529 lr_write_t *lr;
1b7c1e5c
GDN
530 itx_wr_state_t wr_state = write_state;
531 ssize_t len = resid;
34dc7c2f 532
1b7c1e5c
GDN
533 if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
534 wr_state = WR_NEED_COPY;
535 else if (wr_state == WR_INDIRECT)
536 len = MIN(blocksize - P2PHASE(off, blocksize), resid);
34dc7c2f
BB
537
538 itx = zil_itx_create(txtype, sizeof (*lr) +
1b7c1e5c 539 (wr_state == WR_COPIED ? len : 0));
34dc7c2f 540 lr = (lr_write_t *)&itx->itx_lr;
1b7c1e5c 541 if (wr_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
9babb374 542 zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
428870ff 543 zil_itx_destroy(itx);
34dc7c2f
BB
544 itx = zil_itx_create(txtype, sizeof (*lr));
545 lr = (lr_write_t *)&itx->itx_lr;
1b7c1e5c 546 wr_state = WR_NEED_COPY;
34dc7c2f
BB
547 }
548
1b7c1e5c 549 itx->itx_wr_state = wr_state;
34dc7c2f
BB
550 lr->lr_foid = zp->z_id;
551 lr->lr_offset = off;
552 lr->lr_length = len;
553 lr->lr_blkoff = 0;
554 BP_ZERO(&lr->lr_blkptr);
555
3558fd73 556 itx->itx_private = ZTOZSB(zp);
34dc7c2f 557
d5446cfc
BB
558 if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
559 (fsync_cnt == 0))
34dc7c2f
BB
560 itx->itx_sync = B_FALSE;
561
119a394a
ED
562 itx->itx_callback = callback;
563 itx->itx_callback_data = callback_data;
572e2857 564 zil_itx_assign(zilog, itx, tx);
34dc7c2f
BB
565
566 off += len;
567 resid -= len;
568 }
569}
570
571/*
d3cc8b15 572 * Handles TX_TRUNCATE transactions.
34dc7c2f
BB
573 */
574void
575zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
e9aa730c 576 znode_t *zp, uint64_t off, uint64_t len)
34dc7c2f
BB
577{
578 itx_t *itx;
34dc7c2f
BB
579 lr_truncate_t *lr;
580
98701490
CC
581 if (zil_replaying(zilog, tx) || zp->z_unlinked ||
582 zfs_xattr_owner_unlinked(zp))
34dc7c2f
BB
583 return;
584
585 itx = zil_itx_create(txtype, sizeof (*lr));
586 lr = (lr_truncate_t *)&itx->itx_lr;
587 lr->lr_foid = zp->z_id;
588 lr->lr_offset = off;
589 lr->lr_length = len;
590
591 itx->itx_sync = (zp->z_sync_cnt != 0);
572e2857 592 zil_itx_assign(zilog, itx, tx);
34dc7c2f
BB
593}
594
595/*
d3cc8b15 596 * Handles TX_SETATTR transactions.
34dc7c2f
BB
597 */
598void
5484965a
BB
599zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
600 znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
34dc7c2f
BB
601{
602 itx_t *itx;
34dc7c2f
BB
603 lr_setattr_t *lr;
604 xvattr_t *xvap = (xvattr_t *)vap;
605 size_t recsize = sizeof (lr_setattr_t);
606 void *start;
607
428870ff 608 if (zil_replaying(zilog, tx) || zp->z_unlinked)
34dc7c2f
BB
609 return;
610
611 /*
612 * If XVATTR set, then log record size needs to allow
613 * for lr_attr_t + xvattr mask, mapsize and create time
614 * plus actual attribute values
615 */
5484965a 616 if (vap->va_mask & ATTR_XVATTR)
34dc7c2f
BB
617 recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize);
618
619 if (fuidp)
620 recsize += fuidp->z_domain_str_sz;
621
622 itx = zil_itx_create(txtype, recsize);
623 lr = (lr_setattr_t *)&itx->itx_lr;
624 lr->lr_foid = zp->z_id;
625 lr->lr_mask = (uint64_t)mask_applied;
5484965a
BB
626 lr->lr_mode = (uint64_t)vap->va_mode;
627 if ((mask_applied & ATTR_UID) && IS_EPHEMERAL(vap->va_uid))
34dc7c2f
BB
628 lr->lr_uid = fuidp->z_fuid_owner;
629 else
5484965a 630 lr->lr_uid = (uint64_t)vap->va_uid;
34dc7c2f 631
5484965a 632 if ((mask_applied & ATTR_GID) && IS_EPHEMERAL(vap->va_gid))
34dc7c2f
BB
633 lr->lr_gid = fuidp->z_fuid_group;
634 else
5484965a 635 lr->lr_gid = (uint64_t)vap->va_gid;
34dc7c2f 636
5484965a
BB
637 lr->lr_size = (uint64_t)vap->va_size;
638 ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime);
639 ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime);
34dc7c2f 640 start = (lr_setattr_t *)(lr + 1);
5484965a 641 if (vap->va_mask & ATTR_XVATTR) {
34dc7c2f
BB
642 zfs_log_xvattr((lr_attr_t *)start, xvap);
643 start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize);
644 }
645
646 /*
647 * Now stick on domain information if any on end
648 */
649
650 if (fuidp)
651 (void) zfs_log_fuid_domains(fuidp, start);
652
653 itx->itx_sync = (zp->z_sync_cnt != 0);
572e2857 654 zil_itx_assign(zilog, itx, tx);
34dc7c2f
BB
655}
656
657/*
d3cc8b15 658 * Handles TX_ACL transactions.
34dc7c2f
BB
659 */
660void
661zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
662 vsecattr_t *vsecp, zfs_fuid_info_t *fuidp)
663{
664 itx_t *itx;
34dc7c2f
BB
665 lr_acl_v0_t *lrv0;
666 lr_acl_t *lr;
667 int txtype;
668 int lrsize;
669 size_t txsize;
670 size_t aclbytes = vsecp->vsa_aclentsz;
671
428870ff 672 if (zil_replaying(zilog, tx) || zp->z_unlinked)
b128c09f
BB
673 return;
674
3558fd73 675 txtype = (ZTOZSB(zp)->z_version < ZPL_VERSION_FUID) ?
34dc7c2f
BB
676 TX_ACL_V0 : TX_ACL;
677
678 if (txtype == TX_ACL)
679 lrsize = sizeof (*lr);
680 else
681 lrsize = sizeof (*lrv0);
682
34dc7c2f
BB
683 txsize = lrsize +
684 ((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) +
685 (fuidp ? fuidp->z_domain_str_sz : 0) +
b128c09f 686 sizeof (uint64_t) * (fuidp ? fuidp->z_fuid_cnt : 0);
34dc7c2f
BB
687
688 itx = zil_itx_create(txtype, txsize);
689
690 lr = (lr_acl_t *)&itx->itx_lr;
691 lr->lr_foid = zp->z_id;
692 if (txtype == TX_ACL) {
693 lr->lr_acl_bytes = aclbytes;
694 lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
695 lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0;
696 if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS)
697 lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
698 else
699 lr->lr_acl_flags = 0;
700 }
701 lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt;
702
703 if (txtype == TX_ACL_V0) {
704 lrv0 = (lr_acl_v0_t *)lr;
705 bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes);
706 } else {
707 void *start = (ace_t *)(lr + 1);
708
709 bcopy(vsecp->vsa_aclentp, start, aclbytes);
710
711 start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes);
712
713 if (fuidp) {
714 start = zfs_log_fuid_ids(fuidp, start);
715 (void) zfs_log_fuid_domains(fuidp, start);
716 }
717 }
718
719 itx->itx_sync = (zp->z_sync_cnt != 0);
572e2857 720 zil_itx_assign(zilog, itx, tx);
34dc7c2f 721}
15fd2749
CP
722
723#if defined(_KERNEL) && defined(HAVE_SPL)
724module_param(zfs_immediate_write_sz, long, 0644);
725MODULE_PARM_DESC(zfs_immediate_write_sz, "Largest data block to write to zil");
726#endif