* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ */
#include <sys/zfs_context.h>
#include <sys/spa.h>
* 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
* track buffer). At most zfs_vdev_cache_size bytes will be kept in each
* vdev's vdev_cache.
+ *
+ * TODO: Note that with the current ZFS code, it turns out that the
+ * vdev cache is not helpful, and in some cases actually harmful. It
+ * is better if we disable this. Once some time has passed, we should
+ * actually remove this to simplify the code. For now we just disable
+ * it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11
+ * has made these same changes.
*/
int zfs_vdev_cache_max = 1<<14; /* 16KB */
-int zfs_vdev_cache_size = 10ULL << 20; /* 10MB */
+int zfs_vdev_cache_size = 0;
int zfs_vdev_cache_bshift = 16;
#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */
vdev_cache_evict(vc, ve);
}
- ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
+ ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_PUSHPAGE);
ve->ve_offset = offset;
- ve->ve_lastused = lbolt;
+ ve->ve_lastused = ddi_get_lbolt();
ve->ve_data = zio_buf_alloc(VCBS);
avl_add(&vc->vc_offset_tree, ve);
ASSERT(MUTEX_HELD(&vc->vc_lock));
ASSERT(ve->ve_fill_io == NULL);
- if (ve->ve_lastused != lbolt) {
+ if (ve->ve_lastused != ddi_get_lbolt()) {
avl_remove(&vc->vc_lastused_tree, ve);
- ve->ve_lastused = lbolt;
+ ve->ve_lastused = ddi_get_lbolt();
avl_add(&vc->vc_lastused_tree, ve);
}
vdev_cache_read(zio_t *zio)
{
vdev_cache_t *vc = &zio->io_vd->vdev_cache;
- vdev_cache_entry_t *ve, ve_search;
+ vdev_cache_entry_t *ve, *ve_search;
uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
- uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
zio_t *fio;
+ ASSERTV(uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS));
ASSERT(zio->io_type == ZIO_TYPE_READ);
if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
- return (EINVAL);
+ return (SET_ERROR(EINVAL));
if (zio->io_size > zfs_vdev_cache_max)
- return (EOVERFLOW);
+ return (SET_ERROR(EOVERFLOW));
/*
* If the I/O straddles two or more cache blocks, don't cache it.
*/
if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
- return (EXDEV);
+ return (SET_ERROR(EXDEV));
ASSERT(cache_phase + zio->io_size <= VCBS);
mutex_enter(&vc->vc_lock);
- ve_search.ve_offset = cache_offset;
- ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL);
+ ve_search = kmem_alloc(sizeof (vdev_cache_entry_t), KM_PUSHPAGE);
+ ve_search->ve_offset = cache_offset;
+ ve = avl_find(&vc->vc_offset_tree, ve_search, NULL);
+ kmem_free(ve_search, sizeof (vdev_cache_entry_t));
if (ve != NULL) {
if (ve->ve_missed_update) {
mutex_exit(&vc->vc_lock);
- return (ESTALE);
+ return (SET_ERROR(ESTALE));
}
if ((fio = ve->ve_fill_io) != NULL) {
if (ve == NULL) {
mutex_exit(&vc->vc_lock);
- return (ENOMEM);
+ return (SET_ERROR(ENOMEM));
}
fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
- ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_CACHE_FILL,
+ ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW,
ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve);
ve->ve_fill_io = fio;
vdc_ksp = NULL;
}
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(zfs_vdev_cache_max, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_cache_max, "Inflate reads small than max");
+
+module_param(zfs_vdev_cache_size, int, 0444);
+MODULE_PARM_DESC(zfs_vdev_cache_size, "Total size of the per-disk cache");
+
+module_param(zfs_vdev_cache_bshift, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_cache_bshift, "Shift size to inflate reads too");
+#endif