#include <sys/fs/zfs.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
+#include <sys/dsl_crypt.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/dbuf.h>
#include <sys/zfs_znode.h>
-static ulong_t zfs_fsync_sync_cnt = 4;
-
int
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
{
int error = 0;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
- (void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt);
-
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
- goto out;
+ return (error);
atomic_inc_32(&zp->z_sync_writes_cnt);
zil_commit(zfsvfs->z_log, zp->z_id);
atomic_dec_32(&zp->z_sync_writes_cnt);
zfs_exit(zfsvfs, FTAG);
}
-out:
- tsd_set(zfs_fsyncer_key, NULL);
-
return (error);
}
uint64_t end_size = MAX(zp->z_size, woff + n);
zilog_t *zilog = zfsvfs->z_log;
+ boolean_t commit = (ioflag & (O_SYNC | O_DSYNC)) ||
+ (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
const uint64_t gid = KGID_TO_SGID(ZTOGID(zp));
* zfs_clear_setid_bits_if_necessary must precede any of
* the TX_WRITE records logged here.
*/
- zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
+ zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit,
NULL, NULL);
dmu_tx_commit(tx);
return (error);
}
- if (ioflag & (O_SYNC | O_DSYNC) ||
- zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (commit)
zil_commit(zilog, zp->z_id);
const int64_t nwritten = start_resid - zfs_uio_resid(uio);
zfsvfs_t *zfsvfs = ZTOZSB(zp);
int error;
boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
- zilog_t *zilog = zfsvfs->z_log;
+ zilog_t *zilog;
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
return (error);
-
+ zilog = zfsvfs->z_log;
error = zfs_setacl(zp, vsecp, skipaclchk, cr);
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
uint64_t zp_gen;
ASSERT3P(lwb, !=, NULL);
- ASSERT3P(zio, !=, NULL);
ASSERT3U(size, !=, 0);
/*
}
ASSERT(error == 0 || error == ENOENT);
} else { /* indirect write */
+ ASSERT3P(zio, !=, NULL);
/*
* Have to lock the whole block to ensure when it's
* written out and its checksum is being calculated
}
#endif
if (error == 0)
- error = dmu_buf_hold(os, object, offset, zgd, &db,
- DMU_READ_NO_PREFETCH);
+ error = dmu_buf_hold_noread(os, object, offset, zgd,
+ &db);
if (error == 0) {
blkptr_t *bp = &lr->lr_blkptr;
*
* On success, the function return the number of bytes copied in *lenp.
* Note, it doesn't return how much bytes are left to be copied.
+ * On errors which are caused by any file system limitations or
+ * brt limitations `EINVAL` is returned. In the most cases a user
+ * requested bad parameters, it could be possible to clone the file but
+ * some parameters don't match the requirements.
*/
int
zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
ASSERT(!outzfsvfs->z_replay);
+ /*
+ * Block cloning from an unencrypted dataset into an encrypted
+ * dataset and vice versa is not supported.
+ */
+ if (inos->os_encrypted != outos->os_encrypted) {
+ zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
+ return (SET_ERROR(EXDEV));
+ }
+
+ /*
+ * Cloning across encrypted datasets is possible only if they
+ * share the same master key.
+ */
+ if (inos != outos && inos->os_encrypted &&
+ !dmu_objset_crypto_key_equal(inos, outos)) {
+ zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
+ return (SET_ERROR(EXDEV));
+ }
+
error = zfs_verify_zp(inzp);
if (error == 0)
error = zfs_verify_zp(outzp);
inblksz = inzp->z_blksz;
/*
- * We cannot clone into files with different block size.
+ * We cannot clone into a file with different block size if we can't
+ * grow it (block size is already bigger, has more than one block, or
+ * not locked for growth). There are other possible reasons for the
+ * grow to fail, but we cover what we can before opening transaction
+ * and the rest detect after we try to do it.
*/
- if (inblksz != outzp->z_blksz && outzp->z_size > inblksz) {
- error = SET_ERROR(EXDEV);
+ if (inblksz < outzp->z_blksz) {
+ error = SET_ERROR(EINVAL);
+ goto unlock;
+ }
+ if (inblksz != outzp->z_blksz && (outzp->z_size > outzp->z_blksz ||
+ outlr->lr_length != UINT64_MAX)) {
+ error = SET_ERROR(EINVAL);
+ goto unlock;
+ }
+
+ /*
+ * Block size must be power-of-2 if destination offset != 0.
+ * There can be no multiple blocks of non-power-of-2 size.
+ */
+ if (outoff != 0 && !ISP2(inblksz)) {
+ error = SET_ERROR(EINVAL);
goto unlock;
}
* Offsets and len must be at block boundries.
*/
if ((inoff % inblksz) != 0 || (outoff % inblksz) != 0) {
- error = SET_ERROR(EXDEV);
+ error = SET_ERROR(EINVAL);
goto unlock;
}
/*
*/
if ((len % inblksz) != 0 &&
(len < inzp->z_size - inoff || len < outzp->z_size - outoff)) {
- error = SET_ERROR(EXDEV);
+ error = SET_ERROR(EINVAL);
+ goto unlock;
+ }
+
+ /*
+ * If we are copying only one block and it is smaller than recordsize
+ * property, do not allow destination to grow beyond one block if it
+ * is not there yet. Otherwise the destination will get stuck with
+ * that block size forever, that can be as small as 512 bytes, no
+ * matter how big the destination grow later.
+ */
+ if (len <= inblksz && inblksz < outzfsvfs->z_max_blksz &&
+ outzp->z_size <= inblksz && outoff + len > inblksz) {
+ error = SET_ERROR(EINVAL);
goto unlock;
}
&nbps);
if (error != 0) {
/*
- * If we are tyring to clone a block that was created
- * in the current transaction group. Return an error,
- * so the caller can fallback to just copying the data.
+ * If we are trying to clone a block that was created
+ * in the current transaction group, error will be
+ * EAGAIN here, which we can just return to the caller
+ * so it can fallback if it likes.
*/
- if (error == EAGAIN) {
- error = SET_ERROR(EXDEV);
- }
break;
}
- /*
- * Encrypted data is fine as long as it comes from the same
- * dataset.
- * TODO: We want to extend it in the future to allow cloning to
- * datasets with the same keys, like clones or to be able to
- * clone a file from a snapshot of an encrypted dataset into the
- * dataset itself.
- */
- if (BP_IS_PROTECTED(&bps[0])) {
- if (inzfsvfs != outzfsvfs) {
- error = SET_ERROR(EXDEV);
- break;
- }
- }
/*
* Start a transaction.
}
/*
- * Copy source znode's block size. This only happens on the
- * first iteration since zfs_rangelock_reduce() will shrink down
- * lr_len to the appropriate size.
+ * Copy source znode's block size. This is done only if the
+ * whole znode is locked (see zfs_rangelock_cb()) and only
+ * on the first iteration since zfs_rangelock_reduce() will
+ * shrink down lr_length to the appropriate size.
*/
if (outlr->lr_length == UINT64_MAX) {
zfs_grow_blocksize(outzp, inblksz, tx);
+
+ /*
+ * Block growth may fail for many reasons we can not
+ * predict here. If it happen the cloning is doomed.
+ */
+ if (inblksz != outzp->z_blksz) {
+ error = SET_ERROR(EINVAL);
+ dmu_tx_abort(tx);
+ break;
+ }
+
/*
* Round range lock up to the block boundary, so we
* prevent appends until we are done.
}
error = dmu_brt_clone(outos, outzp->z_id, outoff, size, tx,
- bps, nbps, B_FALSE);
+ bps, nbps);
if (error != 0) {
dmu_tx_commit(tx);
break;
*inoffp += done;
*outoffp += done;
*lenp = done;
+ } else {
+ /*
+ * If we made no progress, there must be a good reason.
+ * EOF is handled explicitly above, before the loop.
+ */
+ ASSERT3S(error, !=, 0);
}
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
if (zp->z_blksz < blksz)
zfs_grow_blocksize(zp, blksz, tx);
- dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps, B_TRUE);
+ dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps);
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);