* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2022 by Pawel Jakub Dawidek
*/
#include <sys/types.h>
#include <sys/param.h>
-#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/byteorder.h>
#include <sys/policy.h>
#include <sys/stat.h>
-#include <sys/mode.h>
#include <sys/acl.h>
#include <sys/dmu.h>
+#include <sys/dbuf.h>
#include <sys/spa.h>
#include <sys/zfs_fuid.h>
-#include <sys/ddi.h>
#include <sys/dsl_dataset.h>
/*
return (TX_CREATE_ACL);
else
return (TX_CREATE_ATTR);
- /*NOTREACHED*/
case Z_DIR:
if (vsecp == NULL && !isxvattr)
return (TX_MKDIR);
static void
zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
{
- uint32_t *bitmap;
- uint64_t *attrs;
- uint64_t *crtime;
- xoptattr_t *xoap;
- void *scanstamp;
- int i;
+ xoptattr_t *xoap;
xoap = xva_getxoptattr(xvap);
ASSERT(xoap);
lrattr->lr_attr_masksize = xvap->xva_mapsize;
- bitmap = &lrattr->lr_attr_bitmap;
- for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) {
+ uint32_t *bitmap = &lrattr->lr_attr_bitmap;
+ for (int i = 0; i != xvap->xva_mapsize; i++, bitmap++)
*bitmap = xvap->xva_reqattrmap[i];
- }
- /* Now pack the attributes up in a single uint64_t */
- attrs = (uint64_t *)bitmap;
- crtime = attrs + 1;
- scanstamp = (caddr_t)(crtime + 2);
- *attrs = 0;
+ lr_attr_end_t *end = (lr_attr_end_t *)bitmap;
+ end->lr_attr_attrs = 0;
+ end->lr_attr_crtime[0] = 0;
+ end->lr_attr_crtime[1] = 0;
+ memset(end->lr_attr_scanstamp, 0, AV_SCANSTAMP_SZ);
+
if (XVA_ISSET_REQ(xvap, XAT_READONLY))
- *attrs |= (xoap->xoa_readonly == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_readonly == 0) ? 0 :
XAT0_READONLY;
if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
- *attrs |= (xoap->xoa_hidden == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_hidden == 0) ? 0 :
XAT0_HIDDEN;
if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
- *attrs |= (xoap->xoa_system == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_system == 0) ? 0 :
XAT0_SYSTEM;
if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
- *attrs |= (xoap->xoa_archive == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_archive == 0) ? 0 :
XAT0_ARCHIVE;
if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
- *attrs |= (xoap->xoa_immutable == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_immutable == 0) ? 0 :
XAT0_IMMUTABLE;
if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
- *attrs |= (xoap->xoa_nounlink == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_nounlink == 0) ? 0 :
XAT0_NOUNLINK;
if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
- *attrs |= (xoap->xoa_appendonly == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_appendonly == 0) ? 0 :
XAT0_APPENDONLY;
if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
- *attrs |= (xoap->xoa_opaque == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_opaque == 0) ? 0 :
XAT0_APPENDONLY;
if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
- *attrs |= (xoap->xoa_nodump == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_nodump == 0) ? 0 :
XAT0_NODUMP;
if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
- *attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
XAT0_AV_QUARANTINED;
if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
- *attrs |= (xoap->xoa_av_modified == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_av_modified == 0) ? 0 :
XAT0_AV_MODIFIED;
if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
- ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
- if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
- bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
+ ZFS_TIME_ENCODE(&xoap->xoa_createtime, end->lr_attr_crtime);
+ if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
+ ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID));
+
+ memcpy(end->lr_attr_scanstamp, xoap->xoa_av_scanstamp,
+ AV_SCANSTAMP_SZ);
+ } else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
+ /*
+ * XAT_PROJID and XAT_AV_SCANSTAMP will never be valid
+ * at the same time, so we can share the same space.
+ */
+ memcpy(end->lr_attr_scanstamp, &xoap->xoa_projid,
+ sizeof (uint64_t));
+ }
if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
- *attrs |= (xoap->xoa_reparse == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_reparse == 0) ? 0 :
XAT0_REPARSE;
if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
- *attrs |= (xoap->xoa_offline == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_offline == 0) ? 0 :
XAT0_OFFLINE;
if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
- *attrs |= (xoap->xoa_sparse == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_sparse == 0) ? 0 :
XAT0_SPARSE;
+ if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT))
+ end->lr_attr_attrs |= (xoap->xoa_projinherit == 0) ? 0 :
+ XAT0_PROJINHERIT;
}
static void *
if (fuidp->z_domain_str_sz != 0) {
for (zdomain = list_head(&fuidp->z_domains); zdomain;
zdomain = list_next(&fuidp->z_domains, zdomain)) {
- bcopy((void *)zdomain->z_domain, start,
+ memcpy(start, zdomain->z_domain,
strlen(zdomain->z_domain) + 1);
start = (caddr_t)start +
strlen(zdomain->z_domain) + 1;
return (start);
}
+/*
+ * If zp is an xattr node, check whether the xattr owner is unlinked.
+ * We don't want to log anything if the owner is unlinked.
+ */
+static int
+zfs_xattr_owner_unlinked(znode_t *zp)
+{
+ int unlinked = 0;
+ znode_t *dzp;
+#ifdef __FreeBSD__
+ znode_t *tzp = zp;
+
+ /*
+ * zrele drops the vnode lock which violates the VOP locking contract
+ * on FreeBSD. See comment at the top of zfs_replay.c for more detail.
+ */
+ /*
+ * if zp is XATTR node, keep walking up via z_xattr_parent until we
+ * get the owner
+ */
+ while (tzp->z_pflags & ZFS_XATTR) {
+ ASSERT3U(zp->z_xattr_parent, !=, 0);
+ if (zfs_zget(ZTOZSB(tzp), tzp->z_xattr_parent, &dzp) != 0) {
+ unlinked = 1;
+ break;
+ }
+
+ if (tzp != zp)
+ zrele(tzp);
+ tzp = dzp;
+ unlinked = tzp->z_unlinked;
+ }
+ if (tzp != zp)
+ zrele(tzp);
+#else
+ zhold(zp);
+ /*
+ * if zp is XATTR node, keep walking up via z_xattr_parent until we
+ * get the owner
+ */
+ while (zp->z_pflags & ZFS_XATTR) {
+ ASSERT3U(zp->z_xattr_parent, !=, 0);
+ if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent, &dzp) != 0) {
+ unlinked = 1;
+ break;
+ }
+
+ zrele(zp);
+ zp = dzp;
+ unlinked = zp->z_unlinked;
+ }
+ zrele(zp);
+#endif
+ return (unlinked);
+}
+
/*
* Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
* TK_MKXATTR transactions.
*/
void
zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp,
+ znode_t *dzp, znode_t *zp, const char *name, vsecattr_t *vsecp,
zfs_fuid_info_t *fuidp, vattr_t *vap)
{
itx_t *itx;
size_t namesize = strlen(name) + 1;
size_t fuidsz = 0;
- if (zil_replaying(zilog, tx))
+ if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
return;
/*
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
+ /* Store dnode slot count in 8 bits above object id. */
+ LR_FOID_SET_SLOTS(lr->lr_foid, zp->z_dnodesize >> DNODE_SHIFT);
lr->lr_mode = zp->z_mode;
- if (!IS_EPHEMERAL(zp->z_uid)) {
- lr->lr_uid = (uint64_t)zp->z_uid;
+ if (!IS_EPHEMERAL(KUID_TO_SUID(ZTOUID(zp)))) {
+ lr->lr_uid = (uint64_t)KUID_TO_SUID(ZTOUID(zp));
} else {
lr->lr_uid = fuidp->z_fuid_owner;
}
- if (!IS_EPHEMERAL(zp->z_gid)) {
- lr->lr_gid = (uint64_t)zp->z_gid;
+ if (!IS_EPHEMERAL(KGID_TO_SGID(ZTOGID(zp)))) {
+ lr->lr_gid = (uint64_t)KGID_TO_SGID(ZTOGID(zp));
} else {
lr->lr_gid = fuidp->z_fuid_group;
}
else
lracl->lr_acl_flags = 0;
- bcopy(vsecp->vsa_aclentp, end, aclsize);
+ memcpy(end, vsecp->vsa_aclentp, aclsize);
end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize);
}
/*
* Now place file name in log record
*/
- bcopy(name, end, namesize);
+ memcpy(end, name, namesize);
zil_itx_assign(zilog, itx, tx);
}
*/
void
zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *dzp, char *name, uint64_t foid)
+ znode_t *dzp, const char *name, uint64_t foid, boolean_t unlinked)
{
itx_t *itx;
lr_remove_t *lr;
size_t namesize = strlen(name) + 1;
- if (zil_replaying(zilog, tx))
+ if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr = (lr_remove_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
- bcopy(name, (char *)(lr + 1), namesize);
+ memcpy(lr + 1, name, namesize);
itx->itx_oid = foid;
+ /*
+ * Object ids can be re-instantiated in the next txg so
+ * remove any async transactions to avoid future leaks.
+ * This can happen if a fsync occurs on the re-instantiated
+ * object for a WR_INDIRECT or WR_NEED_COPY write, which gets
+ * the new file data and flushes a write record for the old object.
+ */
+ if (unlinked) {
+ ASSERT((txtype & ~TX_CI) == TX_REMOVE);
+ zil_remove_async(zilog, foid);
+ }
zil_itx_assign(zilog, itx, tx);
}
*/
void
zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *dzp, znode_t *zp, char *name)
+ znode_t *dzp, znode_t *zp, const char *name)
{
itx_t *itx;
lr_link_t *lr;
lr = (lr_link_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_link_obj = zp->z_id;
- bcopy(name, (char *)(lr + 1), namesize);
+ memcpy(lr + 1, name, namesize);
zil_itx_assign(zilog, itx, tx);
}
*/
void
zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *dzp, znode_t *zp, char *name, char *link)
+ znode_t *dzp, znode_t *zp, const char *name, const char *link)
{
itx_t *itx;
lr_create_t *lr;
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
- lr->lr_uid = zp->z_uid;
- lr->lr_gid = zp->z_gid;
+ lr->lr_uid = KUID_TO_SUID(ZTOUID(zp));
+ lr->lr_gid = KGID_TO_SGID(ZTOGID(zp));
lr->lr_mode = zp->z_mode;
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
sizeof (uint64_t));
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
lr->lr_crtime, sizeof (uint64_t) * 2);
- bcopy(name, (char *)(lr + 1), namesize);
- bcopy(link, (char *)(lr + 1) + namesize, linksize);
+ memcpy((char *)(lr + 1), name, namesize);
+ memcpy((char *)(lr + 1) + namesize, link, linksize);
zil_itx_assign(zilog, itx, tx);
}
-/*
- * Handles TX_RENAME transactions.
- */
-void
-zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
+static void
+do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
+ const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
{
itx_t *itx;
lr_rename_t *lr;
lr = (lr_rename_t *)&itx->itx_lr;
lr->lr_sdoid = sdzp->z_id;
lr->lr_tdoid = tdzp->z_id;
- bcopy(sname, (char *)(lr + 1), snamesize);
- bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
+ memcpy((char *)(lr + 1), sname, snamesize);
+ memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
itx->itx_oid = szp->z_id;
zil_itx_assign(zilog, itx, tx);
}
/*
- * Handles TX_WRITE transactions.
+ * Handles TX_RENAME transactions.
+ */
+void
+zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
+ const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
+{
+ txtype |= TX_RENAME;
+ do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
+}
+
+/*
+ * Handles TX_RENAME_EXCHANGE transactions.
*/
-long zfs_immediate_write_sz = 32768;
+void
+zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
+ znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
+ znode_t *szp)
+{
+ txtype |= TX_RENAME_EXCHANGE;
+ do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
+}
+
+/*
+ * Handles TX_RENAME_WHITEOUT transactions.
+ *
+ * Unfortunately we cannot reuse do_zfs_log_rename because we we need to call
+ * zfs_mknode() on replay which requires stashing bits as with TX_CREATE.
+ */
+void
+zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
+ znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
+ znode_t *szp, znode_t *wzp)
+{
+ itx_t *itx;
+ lr_rename_whiteout_t *lr;
+ size_t snamesize = strlen(sname) + 1;
+ size_t dnamesize = strlen(dname) + 1;
+
+ if (zil_replaying(zilog, tx))
+ return;
+
+ txtype |= TX_RENAME_WHITEOUT;
+ itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
+ lr = (lr_rename_whiteout_t *)&itx->itx_lr;
+ lr->lr_rename.lr_sdoid = sdzp->z_id;
+ lr->lr_rename.lr_tdoid = tdzp->z_id;
+
+ /*
+ * RENAME_WHITEOUT will create an entry at the source znode, so we need
+ * to store the same data that the equivalent call to zfs_log_create()
+ * would.
+ */
+ lr->lr_wfoid = wzp->z_id;
+ LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT);
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen,
+ sizeof (uint64_t));
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)),
+ lr->lr_wcrtime, sizeof (uint64_t) * 2);
+ lr->lr_wmode = wzp->z_mode;
+ lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp));
+ lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp));
+
+ /*
+ * This rdev will always be makdevice(0, 0) but because the ZIL log and
+ * replay code needs to be platform independent (and there is no
+ * platform independent makdev()) we need to copy the one created
+ * during the rename operation.
+ */
+ (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev,
+ sizeof (lr->lr_wrdev));
+
+ memcpy((char *)(lr + 1), sname, snamesize);
+ memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
+ itx->itx_oid = szp->z_id;
+
+ zil_itx_assign(zilog, itx, tx);
+}
+
+/*
+ * zfs_log_write() handles TX_WRITE transactions. The specified callback is
+ * called as soon as the write is on stable storage (be it via a DMU sync or a
+ * ZIL commit).
+ */
+static int64_t zfs_immediate_write_sz = 32768;
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, offset_t off, ssize_t resid, int ioflag)
+ znode_t *zp, offset_t off, ssize_t resid, boolean_t commit,
+ zil_callback_t callback, void *callback_data)
{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
+ uint32_t blocksize = zp->z_blksz;
itx_wr_state_t write_state;
- boolean_t slogging;
- uintptr_t fsync_cnt;
- ssize_t immediate_write_sz;
+ uint64_t gen = 0;
+ ssize_t size = resid;
- if (zil_replaying(zilog, tx) || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked ||
+ zfs_xattr_owner_unlinked(zp)) {
+ if (callback != NULL)
+ callback(callback_data);
return;
+ }
- immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
- ? 0 : (ssize_t)zfs_immediate_write_sz;
-
- slogging = spa_has_slogs(zilog->zl_spa) &&
- (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
- if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
+ if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ write_state = WR_INDIRECT;
+ else if (!spa_has_slogs(zilog->zl_spa) &&
+ resid >= zfs_immediate_write_sz)
write_state = WR_INDIRECT;
- else if (ioflag & (FSYNC | FDSYNC))
+ else if (commit)
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
- if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
- (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
- }
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
+ sizeof (gen));
while (resid) {
itx_t *itx;
lr_write_t *lr;
- ssize_t len;
+ itx_wr_state_t wr_state = write_state;
+ ssize_t len = resid;
/*
- * If the write would overflow the largest block then split it.
+ * A WR_COPIED record must fit entirely in one log block.
+ * Large writes can use WR_NEED_COPY, which the ZIL will
+ * split into multiple records across several log blocks
+ * if necessary.
*/
- if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
- len = SPA_MAXBLOCKSIZE >> 1;
- else
- len = resid;
+ if (wr_state == WR_COPIED &&
+ resid > zil_max_copied_data(zilog))
+ wr_state = WR_NEED_COPY;
+ else if (wr_state == WR_INDIRECT)
+ len = MIN(blocksize - P2PHASE(off, blocksize), resid);
itx = zil_itx_create(txtype, sizeof (*lr) +
- (write_state == WR_COPIED ? len : 0));
+ (wr_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
- if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
- zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
- zil_itx_destroy(itx);
- itx = zil_itx_create(txtype, sizeof (*lr));
- lr = (lr_write_t *)&itx->itx_lr;
- write_state = WR_NEED_COPY;
+
+ /*
+ * For WR_COPIED records, copy the data into the lr_write_t.
+ */
+ if (wr_state == WR_COPIED) {
+ int err;
+ DB_DNODE_ENTER(db);
+ err = dmu_read_by_dnode(DB_DNODE(db), off, len, lr + 1,
+ DMU_READ_NO_PREFETCH);
+ if (err != 0) {
+ zil_itx_destroy(itx);
+ itx = zil_itx_create(txtype, sizeof (*lr));
+ lr = (lr_write_t *)&itx->itx_lr;
+ wr_state = WR_NEED_COPY;
+ }
+ DB_DNODE_EXIT(db);
}
- itx->itx_wr_state = write_state;
- if (write_state == WR_NEED_COPY)
- itx->itx_sod += len;
+ itx->itx_wr_state = wr_state;
lr->lr_foid = zp->z_id;
lr->lr_offset = off;
lr->lr_length = len;
BP_ZERO(&lr->lr_blkptr);
itx->itx_private = ZTOZSB(zp);
+ itx->itx_sync = (zp->z_sync_cnt != 0);
+ itx->itx_gen = gen;
- if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
- (fsync_cnt == 0))
- itx->itx_sync = B_FALSE;
-
+ itx->itx_callback = callback;
+ itx->itx_callback_data = callback_data;
zil_itx_assign(zilog, itx, tx);
off += len;
resid -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
+ }
}
/*
*/
void
zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, uint64_t off, uint64_t len)
+ znode_t *zp, uint64_t off, uint64_t len)
{
itx_t *itx;
lr_truncate_t *lr;
- if (zil_replaying(zilog, tx) || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked ||
+ zfs_xattr_owner_unlinked(zp))
return;
itx = zil_itx_create(txtype, sizeof (*lr));
zil_itx_assign(zilog, itx, tx);
}
+/*
+ * Handles TX_SETSAXATTR transactions.
+ */
+void
+zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
+ znode_t *zp, const char *name, const void *value, size_t size)
+{
+ itx_t *itx;
+ lr_setsaxattr_t *lr;
+ size_t recsize = sizeof (lr_setsaxattr_t);
+ void *xattrstart;
+ int namelen;
+
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
+ return;
+
+ namelen = strlen(name) + 1;
+ recsize += (namelen + size);
+ itx = zil_itx_create(txtype, recsize);
+ lr = (lr_setsaxattr_t *)&itx->itx_lr;
+ lr->lr_foid = zp->z_id;
+ xattrstart = (char *)(lr + 1);
+ memcpy(xattrstart, name, namelen);
+ if (value != NULL) {
+ memcpy((char *)xattrstart + namelen, value, size);
+ lr->lr_size = size;
+ } else {
+ lr->lr_size = 0;
+ }
+
+ itx->itx_sync = (zp->z_sync_cnt != 0);
+ zil_itx_assign(zilog, itx, tx);
+}
+
/*
* Handles TX_ACL transactions.
*/
if (txtype == TX_ACL_V0) {
lrv0 = (lr_acl_v0_t *)lr;
- bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes);
+ memcpy(lrv0 + 1, vsecp->vsa_aclentp, aclbytes);
} else {
void *start = (ace_t *)(lr + 1);
- bcopy(vsecp->vsa_aclentp, start, aclbytes);
+ memcpy(start, vsecp->vsa_aclentp, aclbytes);
start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes);
zil_itx_assign(zilog, itx, tx);
}
-#if defined(_KERNEL) && defined(HAVE_SPL)
-module_param(zfs_immediate_write_sz, long, 0644);
-MODULE_PARM_DESC(zfs_immediate_write_sz, "Largest data block to write to zil");
-#endif
+/*
+ * Handles TX_CLONE_RANGE transactions.
+ */
+void
+zfs_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp,
+ uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps,
+ size_t nbps)
+{
+ itx_t *itx;
+ lr_clone_range_t *lr;
+ uint64_t partlen, max_log_data;
+ size_t i, partnbps;
+
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
+ return;
+
+ max_log_data = zil_max_log_data(zilog, sizeof (lr_clone_range_t));
+
+ while (nbps > 0) {
+ partnbps = MIN(nbps, max_log_data / sizeof (bps[0]));
+ partlen = 0;
+ for (i = 0; i < partnbps; i++) {
+ partlen += BP_GET_LSIZE(&bps[i]);
+ }
+ partlen = MIN(partlen, len);
+
+ itx = zil_itx_create(txtype,
+ sizeof (*lr) + sizeof (bps[0]) * partnbps);
+ lr = (lr_clone_range_t *)&itx->itx_lr;
+ lr->lr_foid = zp->z_id;
+ lr->lr_offset = off;
+ lr->lr_length = partlen;
+ lr->lr_blksz = blksz;
+ lr->lr_nbps = partnbps;
+ memcpy(lr->lr_bps, bps, sizeof (bps[0]) * partnbps);
+
+ itx->itx_sync = (zp->z_sync_cnt != 0);
+
+ zil_itx_assign(zilog, itx, tx);
+
+ bps += partnbps;
+ ASSERT3U(nbps, >=, partnbps);
+ nbps -= partnbps;
+ off += partlen;
+ ASSERT3U(len, >=, partlen);
+ len -= partlen;
+ }
+}
+
+ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, S64, ZMOD_RW,
+ "Largest data block to write to zil");