vdev_queue_init(vdev_t *vd)
{
vdev_queue_t *vq = &vd->vdev_queue;
+ int i;
mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&vq->vq_pending_tree, vdev_queue_offset_compare,
sizeof (zio_t), offsetof(struct zio, io_offset_node));
+
+ /*
+ * A list of buffers which can be used for aggregate I/O, this
+ * avoids the need to allocate them on demand when memory is low.
+ */
+ list_create(&vq->vq_io_list, sizeof (vdev_io_t),
+ offsetof(vdev_io_t, vi_node));
+
+ for (i = 0; i < zfs_vdev_max_pending; i++)
+ list_insert_tail(&vq->vq_io_list, zio_vdev_alloc());
}
void
vdev_queue_fini(vdev_t *vd)
{
vdev_queue_t *vq = &vd->vdev_queue;
+ vdev_io_t *vi;
avl_destroy(&vq->vq_deadline_tree);
avl_destroy(&vq->vq_read_tree);
avl_destroy(&vq->vq_write_tree);
avl_destroy(&vq->vq_pending_tree);
+ while ((vi = list_head(&vq->vq_io_list)) != NULL) {
+ list_remove(&vq->vq_io_list, vi);
+ zio_vdev_free(vi);
+ }
+
+ list_destroy(&vq->vq_io_list);
+
mutex_destroy(&vq->vq_lock);
}
static void
vdev_queue_agg_io_done(zio_t *aio)
{
+ vdev_queue_t *vq = &aio->io_vd->vdev_queue;
+ vdev_io_t *vi = aio->io_data;
zio_t *pio;
while ((pio = zio_walk_parents(aio)) != NULL)
bcopy((char *)aio->io_data + (pio->io_offset -
aio->io_offset), pio->io_data, pio->io_size);
- zio_buf_free(aio->io_data, aio->io_size);
+ mutex_enter(&vq->vq_lock);
+ list_insert_tail(&vq->vq_io_list, vi);
+ mutex_exit(&vq->vq_lock);
}
/*
{
zio_t *fio, *lio, *aio, *dio, *nio, *mio;
avl_tree_t *t;
+ vdev_io_t *vi;
int flags;
uint64_t maxspan = zfs_vdev_aggregation_limit;
uint64_t maxgap;
flags = fio->io_flags & ZIO_FLAG_AGG_INHERIT;
maxgap = (t == &vq->vq_read_tree) ? zfs_vdev_read_gap_limit : 0;
+ vi = list_head(&vq->vq_io_list);
+ if (vi == NULL) {
+ vi = zio_vdev_alloc();
+ list_insert_head(&vq->vq_io_list, vi);
+ }
+
if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) {
/*
* We can aggregate I/Os that are sufficiently adjacent and of
if (fio != lio) {
uint64_t size = IO_SPAN(fio, lio);
ASSERT(size <= zfs_vdev_aggregation_limit);
+ ASSERT(vi != NULL);
aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset,
- zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_AGG,
+ vi, size, fio->io_type, ZIO_PRIORITY_AGG,
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
vdev_queue_agg_io_done, NULL);
} while (dio != lio);
avl_add(&vq->vq_pending_tree, aio);
+ list_remove(&vq->vq_io_list, vi);
return (aio);
}
*/
kmem_cache_t *zio_cache;
kmem_cache_t *zio_link_cache;
+kmem_cache_t *zio_vdev_cache;
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
int zio_bulk_flags = 0;
zio_cons, zio_dest, NULL, NULL, NULL, KMC_KMEM);
zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM);
+ zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof(vdev_io_t),
+ PAGESIZE, NULL, NULL, NULL, NULL, NULL, KMC_VMEM);
/*
* For small buffers, we want a cache for each multiple of
zio_data_buf_cache[c] = NULL;
}
+ kmem_cache_destroy(zio_vdev_cache);
kmem_cache_destroy(zio_link_cache);
kmem_cache_destroy(zio_cache);
kmem_cache_free(zio_data_buf_cache[c], buf);
}
+/*
+ * Dedicated I/O buffers to ensure that memory fragmentation never prevents
+ * or significantly delays the issuing of a zio. These buffers are used
+ * to aggregate I/O and could be used for raidz stripes.
+ */
+void *
+zio_vdev_alloc(void)
+{
+ return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE));
+}
+
+void
+zio_vdev_free(void *buf)
+{
+ kmem_cache_free(zio_vdev_cache, buf);
+
+}
+
/*
* ==========================================================================
* Push and pop I/O transform buffers