* Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
* Copyright (c) 2020, George Amanakis. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
* Copyright (c) 2020, The FreeBSD Foundation [1]
*
* [1] Portions of this software were developed by Allan Jude
{ "dbuf_size", KSTAT_DATA_UINT64 },
{ "dnode_size", KSTAT_DATA_UINT64 },
{ "bonus_size", KSTAT_DATA_UINT64 },
+#if defined(COMPAT_FREEBSD11)
+ { "other_size", KSTAT_DATA_UINT64 },
+#endif
{ "anon_size", KSTAT_DATA_UINT64 },
{ "anon_evictable_data", KSTAT_DATA_UINT64 },
{ "anon_evictable_metadata", KSTAT_DATA_UINT64 },
int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
int l2arc_feed_again = B_TRUE; /* turbo warmup */
int l2arc_norw = B_FALSE; /* no reads during writes */
+int l2arc_meta_percent = 33; /* limit on headers size */
/*
* L2ARC Internals
HDR_GET_COMPRESS(hdr) : ZIO_COMPRESS_OFF);
}
+uint8_t
+arc_get_complevel(arc_buf_t *buf)
+{
+ return (buf->b_hdr->b_complevel);
+}
+
static inline boolean_t
arc_buf_is_shared(arc_buf_t *buf)
{
static arc_buf_hdr_t *
arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev,
dva_t dva, uint64_t daddr, int32_t psize, uint64_t birth,
- enum zio_compress compress, boolean_t protected, boolean_t prefetch)
+ enum zio_compress compress, uint8_t complevel, boolean_t protected,
+ boolean_t prefetch)
{
arc_buf_hdr_t *hdr;
HDR_SET_LSIZE(hdr, size);
HDR_SET_PSIZE(hdr, psize);
arc_hdr_set_compress(hdr, compress);
+ hdr->b_complevel = complevel;
if (protected)
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
if (prefetch)
tmpbuf = zio_buf_alloc(lsize);
abd = abd_get_from_buf(tmpbuf, lsize);
abd_take_ownership_of_buf(abd, B_TRUE);
-
csize = zio_compress_data(HDR_GET_COMPRESS(hdr),
- hdr->b_l1hdr.b_pabd, tmpbuf, lsize);
+ hdr->b_l1hdr.b_pabd, tmpbuf, lsize, hdr->b_complevel);
ASSERT3U(csize, <=, psize);
abd_zero_off(abd, csize, psize - csize);
}
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
- HDR_GET_LSIZE(hdr));
+ HDR_GET_LSIZE(hdr), &hdr->b_complevel);
if (ret != 0) {
abd_return_buf(cabd, tmp, arc_hdr_size(hdr));
goto error;
} else {
error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
hdr->b_l1hdr.b_pabd, buf->b_data,
- HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
+ HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr),
+ &hdr->b_complevel);
/*
* Absent hardware errors or software bugs, this should
*/
ret = SET_ERROR(EIO);
spa_log_error(spa, zb);
- zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, zb, NULL, 0, 0);
}
arc_buf_t *
arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
- enum zio_compress compression_type)
+ enum zio_compress compression_type, uint8_t complevel)
{
arc_buf_t *buf = arc_alloc_compressed_buf(spa, arc_onloan_tag,
- psize, lsize, compression_type);
+ psize, lsize, compression_type, complevel);
arc_loaned_bytes_update(arc_buf_size(buf));
arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
- enum zio_compress compression_type)
+ enum zio_compress compression_type, uint8_t complevel)
{
arc_buf_t *buf = arc_alloc_raw_buf(spa, arc_onloan_tag, dsobj,
- byteorder, salt, iv, mac, ot, psize, lsize, compression_type);
+ byteorder, salt, iv, mac, ot, psize, lsize, compression_type,
+ complevel);
atomic_add_64(&arc_loaned_bytes, psize);
return (buf);
static arc_buf_hdr_t *
arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
- boolean_t protected, enum zio_compress compression_type,
+ boolean_t protected, enum zio_compress compression_type, uint8_t complevel,
arc_buf_contents_t type, boolean_t alloc_rdata)
{
arc_buf_hdr_t *hdr;
hdr->b_flags = 0;
arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR);
arc_hdr_set_compress(hdr, compression_type);
+ hdr->b_complevel = complevel;
if (protected)
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
{
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
- B_FALSE, ZIO_COMPRESS_OFF, type, B_FALSE);
+ B_FALSE, ZIO_COMPRESS_OFF, 0, type, B_FALSE);
arc_buf_t *buf = NULL;
VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE,
*/
arc_buf_t *
arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
- enum zio_compress compression_type)
+ enum zio_compress compression_type, uint8_t complevel)
{
ASSERT3U(lsize, >, 0);
ASSERT3U(lsize, >=, psize);
ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- B_FALSE, compression_type, ARC_BUFC_DATA, B_FALSE);
+ B_FALSE, compression_type, complevel, ARC_BUFC_DATA, B_FALSE);
arc_buf_t *buf = NULL;
VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE,
arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
- enum zio_compress compression_type)
+ enum zio_compress compression_type, uint8_t complevel)
{
arc_buf_hdr_t *hdr;
arc_buf_t *buf;
ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
- compression_type, type, B_TRUE);
+ compression_type, complevel, type, B_TRUE);
hdr->b_crypt_hdr.b_dsobj = dsobj;
hdr->b_crypt_hdr.b_ot = ot;
int64_t mrug_size = zfs_refcount_count(&arc_mru_ghost->arcs_size);
int64_t mfug_size = zfs_refcount_count(&arc_mfu_ghost->arcs_size);
- if (state == arc_l2c_only)
- return;
-
ASSERT(bytes > 0);
/*
* Adapt the target size of the MRU list:
} else {
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
}
+ if (!HDR_L2_READING(hdr)) {
+ hdr->b_complevel = zio->io_prop.zp_complevel;
+ }
}
arc_hdr_clear_flags(hdr, ARC_FLAG_L2_EVICTED);
error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(zio->io_spa, &acb->acb_zb);
- zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+ (void) zfs_ereport_post(
+ FM_EREPORT_ZFS_AUTHENTICATION,
zio->io_spa, NULL, &acb->acb_zb, zio, 0, 0);
}
}
rc = SET_ERROR(EIO);
if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, zb);
- zfs_ereport_post(
+ (void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, zb, NULL, 0, 0);
}
arc_buf_hdr_t *exists = NULL;
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), type,
+ BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type,
encrypted_read);
if (!embedded_bp) {
* buffer which will be freed in arc_write().
*/
nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
- compress, type, HDR_HAS_RABD(hdr));
+ compress, hdr->b_complevel, type, HDR_HAS_RABD(hdr));
ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(nhdr->b_l1hdr.b_bufcnt);
ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
}
HDR_SET_PSIZE(hdr, psize);
arc_hdr_set_compress(hdr, compress);
+ hdr->b_complevel = zio->io_prop.zp_complevel;
if (zio->io_error != 0 || psize == 0)
goto out;
ASSERT(ARC_BUF_COMPRESSED(buf));
localprop.zp_encrypt = B_TRUE;
localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+ localprop.zp_complevel = hdr->b_complevel;
localprop.zp_byteorder =
(hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
} else if (ARC_BUF_COMPRESSED(buf)) {
ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf));
localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+ localprop.zp_complevel = hdr->b_complevel;
zio_flags |= ZIO_FLAG_RAW_COMPRESS;
}
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
ARCSTAT(arcstat_hdr_size) = aggsum_value(&astat_hdr_size);
ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size);
ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size);
+#if defined(COMPAT_FREEBSD11)
+ ARCSTAT(arcstat_other_size) = aggsum_value(&astat_bonus_size) +
+ aggsum_value(&astat_dnode_size) +
+ aggsum_value(&astat_dbuf_size);
+#endif
ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size);
ARCSTAT(arcstat_abd_chunk_waste_size) =
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
- HDR_GET_LSIZE(hdr));
+ HDR_GET_LSIZE(hdr), &hdr->b_complevel);
if (ret != 0) {
abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr);
(HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd));
zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
+ zio->io_prop.zp_complevel = hdr->b_complevel;
valid_cksum = arc_cksum_is_equal(hdr, zio);
cabd = abd_alloc_for_io(asize, ismd);
tmp = abd_borrow_buf(cabd, asize);
- psize = zio_compress_data(compress, to_write, tmp, size);
+ psize = zio_compress_data(compress, to_write, tmp, size,
+ hdr->b_complevel);
+
+ if (psize >= size) {
+ abd_return_buf(cabd, tmp, asize);
+ HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
+ to_write = cabd;
+ abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
+ if (size != asize)
+ abd_zero_off(to_write, size, asize - size);
+ goto encrypt;
+ }
ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
if (psize < asize)
bzero((char *)tmp + psize, asize - psize);
to_write = cabd;
}
+encrypt:
if (HDR_ENCRYPTED(hdr)) {
eabd = abd_alloc_for_io(asize, ismd);
return (write_asize);
}
+static boolean_t
+l2arc_hdr_limit_reached(void)
+{
+ int64_t s = aggsum_upper_bound(&astat_l2_hdr_size);
+
+ return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) ||
+ (s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100));
+}
+
/*
* This thread feeds the L2ARC at regular intervals. This is the beating
* heart of the L2ARC.
/*
* Avoid contributing to memory pressure.
*/
- if (arc_reclaim_needed()) {
+ if (l2arc_hdr_limit_reached()) {
ARCSTAT_BUMP(arcstat_l2_abort_lowmem);
spa_config_exit(spa, SCL_L2ARC, dev);
continue;
ASSERT(!l2arc_vdev_present(vd));
+ vdev_ashift_optimize(vd);
+
/*
* Create a new l2arc device entry.
*/
* online the L2ARC dev at a later time (or re-import the pool)
* to reconstruct it (when there's less memory pressure).
*/
- if (arc_reclaim_needed()) {
+ if (l2arc_hdr_limit_reached()) {
ARCSTAT_BUMP(arcstat_l2_rebuild_abort_lowmem);
cmn_err(CE_NOTE, "System running low on memory, "
"aborting L2ARC rebuild.");
abd_copy_from_buf_off(abd, this_lb, 0, asize);
if ((err = zio_decompress_data(
L2BLK_GET_COMPRESS((this_lbp)->lbp_prop),
- abd, this_lb, asize, sizeof (*this_lb))) != 0) {
+ abd, this_lb, asize, sizeof (*this_lb), NULL)) != 0) {
err = SET_ERROR(EINVAL);
goto cleanup;
}
uint64_t size = 0, asize = 0;
uint64_t log_entries = dev->l2ad_log_entries;
+ /*
+ * Usually arc_adapt() is called only for data, not headers, but
+ * since we may allocate significant amount of memory here, let ARC
+ * grow its arc_c.
+ */
+ arc_adapt(log_entries * HDR_L2ONLY_SIZE, arc_l2c_only);
+
for (int i = log_entries - 1; i >= 0; i--) {
/*
* Restore goes in the reverse temporal direction to preserve
hdr = arc_buf_alloc_l2only(L2BLK_GET_LSIZE((le)->le_prop), type,
dev, le->le_dva, le->le_daddr,
L2BLK_GET_PSIZE((le)->le_prop), le->le_birth,
- L2BLK_GET_COMPRESS((le)->le_prop),
+ L2BLK_GET_COMPRESS((le)->le_prop), le->le_complevel,
L2BLK_GET_PROTECTED((le)->le_prop),
L2BLK_GET_PREFETCH((le)->le_prop));
asize = vdev_psize_to_asize(dev->l2ad_vdev,
/* try to compress the buffer */
psize = zio_compress_data(ZIO_COMPRESS_LZ4,
- abd_buf->abd, tmpbuf, sizeof (*lb));
+ abd_buf->abd, tmpbuf, sizeof (*lb), 0);
/* a log block is never entirely zero */
ASSERT(psize != 0);
L2BLK_SET_LSIZE((le)->le_prop, HDR_GET_LSIZE(hdr));
L2BLK_SET_PSIZE((le)->le_prop, HDR_GET_PSIZE(hdr));
L2BLK_SET_COMPRESS((le)->le_prop, HDR_GET_COMPRESS(hdr));
+ le->le_complevel = hdr->b_complevel;
L2BLK_SET_TYPE((le)->le_prop, hdr->b_type);
L2BLK_SET_PROTECTED((le)->le_prop, !!(HDR_PROTECTED(hdr)));
L2BLK_SET_PREFETCH((le)->le_prop, !!(HDR_PREFETCH(hdr)));
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, norw, INT, ZMOD_RW,
"No reads during writes");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, INT, ZMOD_RW,
+ "Percent of ARC size allowed for L2ARC-only headers");
+
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_enabled, INT, ZMOD_RW,
"Rebuild the L2ARC when importing a pool");