struct dm_thin_new_mapping;
/*
- * The pool runs in 4 modes. Ordered in degraded order for comparisons.
+ * The pool runs in various modes. Ordered in degraded order for comparisons.
*/
enum pool_mode {
PM_WRITE, /* metadata may be changed */
PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */
+
+ /*
+ * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY.
+ */
+ PM_OUT_OF_METADATA_SPACE,
PM_READ_ONLY, /* metadata may not be changed */
+
PM_FAIL, /* all I/O fails */
};
spinlock_t lock;
struct bio_list deferred_flush_bios;
+ struct bio_list deferred_flush_completions;
struct list_head prepared_mappings;
struct list_head prepared_discards;
struct list_head prepared_discards_pt2;
struct dm_bio_prison_cell **cell_sort_array;
};
-static enum pool_mode get_pool_mode(struct pool *pool);
static void metadata_operation_failed(struct pool *pool, const char *op, int r);
+static enum pool_mode get_pool_mode(struct pool *pool)
+{
+ return pool->pf.mode;
+}
+
+static void notify_of_pool_mode_change(struct pool *pool)
+{
+ const char *descs[] = {
+ "write",
+ "out-of-data-space",
+ "read-only",
+ "read-only",
+ "fail"
+ };
+ const char *extra_desc = NULL;
+ enum pool_mode mode = get_pool_mode(pool);
+
+ if (mode == PM_OUT_OF_DATA_SPACE) {
+ if (!pool->pf.error_if_no_space)
+ extra_desc = " (queue IO)";
+ else
+ extra_desc = " (error IO)";
+ }
+
+ dm_table_event(pool->ti->table);
+ DMINFO("%s: switching pool to %s%s mode",
+ dm_device_name(pool->pool_md),
+ descs[(int)mode], extra_desc ? : "");
+}
+
/*
* Target context for a pool.
*/
mempool_free(m, m->tc->pool->mapping_pool);
}
+static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
+{
+ struct pool *pool = tc->pool;
+ unsigned long flags;
+
+ /*
+ * If the bio has the REQ_FUA flag set we must commit the metadata
+ * before signaling its completion.
+ */
+ if (!bio_triggers_commit(tc, bio)) {
+ bio_endio(bio);
+ return;
+ }
+
+ /*
+ * Complete bio with an error if earlier I/O caused changes to the
+ * metadata that can't be committed, e.g, due to I/O errors on the
+ * metadata device.
+ */
+ if (dm_thin_aborted_changes(tc->td)) {
+ bio_io_error(bio);
+ return;
+ }
+
+ /*
+ * Batch together any bios that trigger commits and then issue a
+ * single commit for them in process_deferred_bios().
+ */
+ spin_lock_irqsave(&pool->lock, flags);
+ bio_list_add(&pool->deferred_flush_completions, bio);
+ spin_unlock_irqrestore(&pool->lock, flags);
+}
+
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
{
struct thin_c *tc = m->tc;
*/
if (bio) {
inc_remap_and_issue_cell(tc, m->cell, m->data_block);
- bio_endio(bio);
+ complete_overwrite_bio(tc, bio);
} else {
inc_all_io_entry(tc->pool, m->cell->holder);
remap_and_issue(tc, m->cell->holder, m->data_block);
* passdown we have to check that these blocks are now unused.
*/
int r = 0;
- bool used = true;
+ bool shared = true;
struct thin_c *tc = m->tc;
struct pool *pool = tc->pool;
dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
while (b != end) {
/* find start of unmapped run */
for (; b < end; b++) {
- r = dm_pool_block_is_used(pool->pmd, b, &used);
+ r = dm_pool_block_is_shared(pool->pmd, b, &shared);
if (r)
goto out;
- if (!used)
+ if (!shared)
break;
}
/* find end of run */
for (e = b + 1; e != end; e++) {
- r = dm_pool_block_is_used(pool->pmd, e, &used);
+ r = dm_pool_block_is_shared(pool->pmd, e, &shared);
if (r)
goto out;
- if (used)
+ if (shared)
break;
}
static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
-static void check_for_space(struct pool *pool)
+static void requeue_bios(struct pool *pool);
+
+static bool is_read_only_pool_mode(enum pool_mode mode)
+{
+ return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY);
+}
+
+static bool is_read_only(struct pool *pool)
+{
+ return is_read_only_pool_mode(get_pool_mode(pool));
+}
+
+static void check_for_metadata_space(struct pool *pool)
+{
+ int r;
+ const char *ooms_reason = NULL;
+ dm_block_t nr_free;
+
+ r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
+ if (r)
+ ooms_reason = "Could not get free metadata blocks";
+ else if (!nr_free)
+ ooms_reason = "No free metadata blocks";
+
+ if (ooms_reason && !is_read_only(pool)) {
+ DMERR("%s", ooms_reason);
+ set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
+ }
+}
+
+static void check_for_data_space(struct pool *pool)
{
int r;
dm_block_t nr_free;
if (r)
return;
- if (nr_free)
+ if (nr_free) {
set_pool_mode(pool, PM_WRITE);
+ requeue_bios(pool);
+ }
}
/*
{
int r;
- if (get_pool_mode(pool) >= PM_READ_ONLY)
+ if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
return -EINVAL;
r = dm_pool_commit_metadata(pool->pmd);
if (r)
metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
- else
- check_for_space(pool);
+ else {
+ check_for_metadata_space(pool);
+ check_for_data_space(pool);
+ }
return r;
}
r = dm_pool_alloc_data_block(pool->pmd, result);
if (r) {
- metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
+ if (r == -ENOSPC)
+ set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
+ else
+ metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
+ return r;
+ }
+
+ r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
+ if (r) {
+ metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
return r;
}
+ if (!free_blocks) {
+ /* Let's commit before we use up the metadata reserve. */
+ r = commit(pool);
+ if (r)
+ return r;
+ }
+
return 0;
}
case PM_OUT_OF_DATA_SPACE:
return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
+ case PM_OUT_OF_METADATA_SPACE:
case PM_READ_ONLY:
case PM_FAIL:
return BLK_STS_IOERR;
{
unsigned long flags;
struct bio *bio;
- struct bio_list bios;
+ struct bio_list bios, bio_completions;
struct thin_c *tc;
tc = get_first_thin(pool);
}
/*
- * If there are any deferred flush bios, we must commit
- * the metadata before issuing them.
+ * If there are any deferred flush bios, we must commit the metadata
+ * before issuing them or signaling their completion.
*/
bio_list_init(&bios);
+ bio_list_init(&bio_completions);
+
spin_lock_irqsave(&pool->lock, flags);
bio_list_merge(&bios, &pool->deferred_flush_bios);
bio_list_init(&pool->deferred_flush_bios);
+
+ bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
+ bio_list_init(&pool->deferred_flush_completions);
spin_unlock_irqrestore(&pool->lock, flags);
- if (bio_list_empty(&bios) &&
+ if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
!(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
return;
if (commit(pool)) {
+ bio_list_merge(&bios, &bio_completions);
+
while ((bio = bio_list_pop(&bios)))
bio_io_error(bio);
return;
}
pool->last_commit_jiffies = jiffies;
+ while ((bio = bio_list_pop(&bio_completions)))
+ bio_endio(bio);
+
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
}
-static void notify_of_pool_mode_change_to_oods(struct pool *pool);
-
/*
* We're holding onto IO to allow userland time to react. After the
* timeout either the pool will have been resized (and thus back in
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
pool->pf.error_if_no_space = true;
- notify_of_pool_mode_change_to_oods(pool);
+ notify_of_pool_mode_change(pool);
error_retry_list_with_code(pool, BLK_STS_NOSPC);
}
}
/*----------------------------------------------------------------*/
-static enum pool_mode get_pool_mode(struct pool *pool)
-{
- return pool->pf.mode;
-}
-
-static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
-{
- dm_table_event(pool->ti->table);
- DMINFO("%s: switching pool to %s mode",
- dm_device_name(pool->pool_md), new_mode);
-}
-
-static void notify_of_pool_mode_change_to_oods(struct pool *pool)
-{
- if (!pool->pf.error_if_no_space)
- notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
- else
- notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
-}
-
static bool passdown_enabled(struct pool_c *pt)
{
return pt->adjusted_pf.discard_passdown;
switch (new_mode) {
case PM_FAIL:
- if (old_mode != new_mode)
- notify_of_pool_mode_change(pool, "failure");
dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_fail;
pool->process_discard = process_bio_fail;
error_retry_list(pool);
break;
+ case PM_OUT_OF_METADATA_SPACE:
case PM_READ_ONLY:
- if (old_mode != new_mode)
- notify_of_pool_mode_change(pool, "read-only");
dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_read_only;
pool->process_discard = process_bio_success;
* alarming rate. Adjust your low water mark if you're
* frequently seeing this mode.
*/
- if (old_mode != new_mode)
- notify_of_pool_mode_change_to_oods(pool);
pool->out_of_data_space = true;
pool->process_bio = process_bio_read_only;
pool->process_discard = process_discard_bio;
break;
case PM_WRITE:
- if (old_mode != new_mode)
- notify_of_pool_mode_change(pool, "write");
+ if (old_mode == PM_OUT_OF_DATA_SPACE)
+ cancel_delayed_work_sync(&pool->no_space_timeout);
pool->out_of_data_space = false;
pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
dm_pool_metadata_read_write(pool->pmd);
* doesn't cause an unexpected mode transition on resume.
*/
pt->adjusted_pf.mode = new_mode;
+
+ if (old_mode != new_mode)
+ notify_of_pool_mode_change(pool);
}
static void abort_transaction(struct pool *pool)
INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
spin_lock_init(&pool->lock);
bio_list_init(&pool->deferred_flush_bios);
+ bio_list_init(&pool->deferred_flush_completions);
INIT_LIST_HEAD(&pool->prepared_mappings);
INIT_LIST_HEAD(&pool->prepared_discards);
INIT_LIST_HEAD(&pool->prepared_discards_pt2);
as.argc = argc;
as.argv = argv;
+ /* make sure metadata and data are different devices */
+ if (!strcmp(argv[0], argv[1])) {
+ ti->error = "Error setting metadata or data device";
+ r = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* Set default pool features.
*/
DMINFO("%s: growing the metadata device from %llu to %llu blocks",
dm_device_name(pool->pool_md),
sb_metadata_dev_size, metadata_dev_size);
+
+ if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
+ set_pool_mode(pool, PM_WRITE);
+
r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
if (r) {
metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- if (get_pool_mode(pool) >= PM_READ_ONLY) {
+ if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
dm_device_name(pool->pool_md));
return -EOPNOTSUPP;
dm_block_t nr_blocks_data;
dm_block_t nr_blocks_metadata;
dm_block_t held_root;
+ enum pool_mode mode;
char buf[BDEVNAME_SIZE];
char buf2[BDEVNAME_SIZE];
struct pool_c *pt = ti->private;
else
DMEMIT("- ");
- if (pool->pf.mode == PM_OUT_OF_DATA_SPACE)
+ mode = get_pool_mode(pool);
+ if (mode == PM_OUT_OF_DATA_SPACE)
DMEMIT("out_of_data_space ");
- else if (pool->pf.mode == PM_READ_ONLY)
+ else if (is_read_only_pool_mode(mode))
DMEMIT("ro ");
else
DMEMIT("rw ");
tc->sort_bio_list = RB_ROOT;
if (argc == 3) {
+ if (!strcmp(argv[0], argv[2])) {
+ ti->error = "Error setting origin device";
+ r = -EINVAL;
+ goto bad_origin_dev;
+ }
+
r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
if (r) {
ti->error = "Error opening origin device";