0 is the original format used in the Chromium OS.
The salt is appended when hashing, digests are stored continuously and
- the rest of the block is padded with zeros.
+ the rest of the block is padded with zeroes.
1 is the current format that should be used for new devices.
The salt is prepended when hashing and each digest is
- padded with zeros to the power of two.
+ padded with zeroes to the power of two.
<dev>
This is the device containing data, the integrity of which needs to be
not compatible with ignore_corruption and requires user space support to
avoid restart loops.
+ignore_zero_blocks
+ Do not verify blocks that are expected to contain zeroes and always return
+ zeroes instead. This may be useful if the partition contains unused blocks
+ that are not guaranteed to contain zeroes.
+
+use_fec_from_device <fec_dev>
+ Use forward error correction (FEC) to recover from corruption if hash
+ verification fails. Use encoding data from the specified device. This
+ may be the same device where data and hash blocks reside, in which case
+ fec_start must be outside data and hash areas.
+
+ If the encoding data covers additional metadata, it must be accessible
+ on the hash device after the hash blocks.
+
+ Note: block sizes for data and hash devices must match. Also, if the
+ verity <dev> is encrypted the <fec_dev> should be too.
+
+fec_roots <num>
+ Number of generator roots. This equals to the number of parity bytes in
+ the encoding data. For example, in RS(M, N) encoding, the number of roots
+ is M-N.
+
+fec_blocks <num>
+ The number of encoding data blocks on the FEC device. The block size for
+ the FEC device is <data_block_size>.
+
+fec_start <offset>
+ This is the offset, in <data_block_size> blocks, from the start of the
+ FEC device to the beginning of the encoding data.
+
+
Theory of operation
===================
into the page cache. Block hashes are stored linearly, aligned to the nearest
block size.
+If forward error correction (FEC) support is enabled any recovery of
+corrupted data will be verified using the cryptographic hash of the
+corresponding data. This is why combining error correction with
+integrity checking is essential.
+
Hash Tree
---------
as a cache, holding recently-read blocks in memory and performing
delayed writes.
+config DM_DEBUG_BLOCK_STACK_TRACING
+ bool "Keep stack trace of persistent data block lock holders"
+ depends on STACKTRACE_SUPPORT && DM_BUFIO
+ select STACKTRACE
+ ---help---
+ Enable this for messages that may help debug problems with the
+ block manager locking used by thin provisioning and caching.
+
+ If unsure, say N.
config DM_BIO_PRISON
tristate
depends on BLK_DEV_DM
If unsure, say N.
+config DM_VERITY_FEC
+ bool "Verity forward error correction support"
+ depends on DM_VERITY
+ select REED_SOLOMON
+ select REED_SOLOMON_DEC8
+ ---help---
+ Add forward error correction support to dm-verity. This option
+ makes it possible to use pre-generated error correction data to
+ recover from corrupted blocks.
+
+ If unsure, say N.
+
config DM_SWITCH
tristate "Switch target support (EXPERIMENTAL)"
depends on BLK_DEV_DM
dm-cache-smq-y += dm-cache-policy-smq.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
dm-era-y += dm-era-target.o
+dm-verity-y += dm-verity-target.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o raid5-cache.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
endif
+
+ifeq ($(CONFIG_DM_VERITY_FEC),y)
+dm-verity-objs += dm-verity-fec.o
+endif
#include <linux/shrinker.h>
#include <linux/module.h>
#include <linux/rbtree.h>
+#include <linux/stacktrace.h>
#define DM_MSG_PREFIX "bufio"
struct list_head write_list;
struct bio bio;
struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+#define MAX_STACK 10
+ struct stack_trace stack_trace;
+ unsigned long stack_entries[MAX_STACK];
+#endif
};
/*----------------------------------------------------------------*/
*/
static DEFINE_MUTEX(dm_bufio_clients_lock);
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+static void buffer_record_stack(struct dm_buffer *b)
+{
+ b->stack_trace.nr_entries = 0;
+ b->stack_trace.max_entries = MAX_STACK;
+ b->stack_trace.entries = b->stack_entries;
+ b->stack_trace.skip = 2;
+ save_stack_trace(&b->stack_trace);
+}
+#endif
+
/*----------------------------------------------------------------
* A red/black tree acts as an index for all the buffers.
*--------------------------------------------------------------*/
adjust_total_allocated(b->data_mode, (long)c->block_size);
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+ memset(&b->stack_trace, 0, sizeof(b->stack_trace));
+#endif
return b;
}
dm_bufio_lock(c);
b = __bufio_new(c, block, nf, &need_submit, &write_list);
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+ if (b && b->hold_count == 1)
+ buffer_record_stack(b);
+#endif
dm_bufio_unlock(c);
__flush_write_list(&write_list);
if (!b)
- return b;
+ return NULL;
if (need_submit)
submit_io(b, READ, b->block, read_endio);
{
struct dm_buffer *b;
int i;
+ bool warned = false;
BUG_ON(dm_bufio_in_request());
__free_buffer_wake(b);
for (i = 0; i < LIST_SIZE; i++)
- list_for_each_entry(b, &c->lru[i], lru_list)
+ list_for_each_entry(b, &c->lru[i], lru_list) {
+ WARN_ON(!warned);
+ warned = true;
DMERR("leaked buffer %llx, hold count %u, list %d",
(unsigned long long)b->block, b->hold_count, i);
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+ print_stack_trace(&b->stack_trace, 1);
+ b->hold_count = 0; /* mark unclaimed to avoid BUG_ON below */
+#endif
+ }
+
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+ while ((b = __get_unclaimed_buffer(c)))
+ __free_buffer_wake(b);
+#endif
for (i = 0; i < LIST_SIZE; i++)
BUG_ON(!list_empty(&c->lru[i]));
bug = 1;
}
- if (bug)
- BUG();
+ BUG_ON(bug);
}
module_init(dm_bufio_init)
*/
struct dm_hook_info {
bio_end_io_t *bi_end_io;
- void *bi_private;
};
static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
bio_end_io_t *bi_end_io, void *bi_private)
{
h->bi_end_io = bio->bi_end_io;
- h->bi_private = bio->bi_private;
bio->bi_end_io = bi_end_io;
bio->bi_private = bi_private;
static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
{
bio->bi_end_io = h->bi_end_io;
- bio->bi_private = h->bi_private;
}
/*----------------------------------------------------------------*/
* Update the metadata with this exception.
*/
void (*commit_exception) (struct dm_exception_store *store,
- struct dm_exception *e,
+ struct dm_exception *e, int valid,
void (*callback) (void *, int success),
void *callback_context);
}
static void persistent_commit_exception(struct dm_exception_store *store,
- struct dm_exception *e,
+ struct dm_exception *e, int valid,
void (*callback) (void *, int success),
void *callback_context)
{
struct core_exception ce;
struct commit_callback *cb;
+ if (!valid)
+ ps->valid = 0;
+
ce.old_chunk = e->old_chunk;
ce.new_chunk = e->new_chunk;
write_exception(ps, ps->current_committed++, &ce);
}
static void transient_commit_exception(struct dm_exception_store *store,
- struct dm_exception *e,
+ struct dm_exception *e, int valid,
void (*callback) (void *, int success),
void *callback_context)
{
/* Just succeed */
- callback(callback_context, 1);
+ callback(callback_context, valid);
}
static void transient_usage(struct dm_exception_store *store,
*/
struct bio *full_bio;
bio_end_io_t *full_bio_end_io;
- void *full_bio_private;
};
/*
dm_table_event(s->ti->table);
}
-static void pending_complete(struct dm_snap_pending_exception *pe, int success)
+static void pending_complete(void *context, int success)
{
+ struct dm_snap_pending_exception *pe = context;
struct dm_exception *e;
struct dm_snapshot *s = pe->snap;
struct bio *origin_bios = NULL;
snapshot_bios = bio_list_get(&pe->snapshot_bios);
origin_bios = bio_list_get(&pe->origin_bios);
full_bio = pe->full_bio;
- if (full_bio) {
+ if (full_bio)
full_bio->bi_end_io = pe->full_bio_end_io;
- full_bio->bi_private = pe->full_bio_private;
- }
increment_pending_exceptions_done_count();
up_write(&s->lock);
free_pending_exception(pe);
}
-static void commit_callback(void *context, int success)
-{
- struct dm_snap_pending_exception *pe = context;
-
- pending_complete(pe, success);
-}
-
static void complete_exception(struct dm_snap_pending_exception *pe)
{
struct dm_snapshot *s = pe->snap;
- if (unlikely(pe->copy_error))
- pending_complete(pe, 0);
-
- else
- /* Update the metadata if we are persistent */
- s->store->type->commit_exception(s->store, &pe->e,
- commit_callback, pe);
+ /* Update the metadata if we are persistent */
+ s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error,
+ pending_complete, pe);
}
/*
pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io;
- pe->full_bio_private = bio->bi_private;
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe);
return td->snapshotted_time > time;
}
-int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
- int can_issue_io, struct dm_thin_lookup_result *result)
+static void unpack_lookup_result(struct dm_thin_device *td, __le64 value,
+ struct dm_thin_lookup_result *result)
+{
+ uint64_t block_time = 0;
+ dm_block_t exception_block;
+ uint32_t exception_time;
+
+ block_time = le64_to_cpu(value);
+ unpack_block_time(block_time, &exception_block, &exception_time);
+ result->block = exception_block;
+ result->shared = __snapshotted_since(td, exception_time);
+}
+
+static int __find_block(struct dm_thin_device *td, dm_block_t block,
+ int can_issue_io, struct dm_thin_lookup_result *result)
{
int r;
__le64 value;
dm_block_t keys[2] = { td->id, block };
struct dm_btree_info *info;
- down_read(&pmd->root_lock);
- if (pmd->fail_io) {
- up_read(&pmd->root_lock);
- return -EINVAL;
- }
-
if (can_issue_io) {
info = &pmd->info;
} else
info = &pmd->nb_info;
r = dm_btree_lookup(info, pmd->root, keys, &value);
- if (!r) {
- uint64_t block_time = 0;
- dm_block_t exception_block;
- uint32_t exception_time;
-
- block_time = le64_to_cpu(value);
- unpack_block_time(block_time, &exception_block,
- &exception_time);
- result->block = exception_block;
- result->shared = __snapshotted_since(td, exception_time);
+ if (!r)
+ unpack_lookup_result(td, value, result);
+
+ return r;
+}
+
+int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
+ int can_issue_io, struct dm_thin_lookup_result *result)
+{
+ int r;
+ struct dm_pool_metadata *pmd = td->pmd;
+
+ down_read(&pmd->root_lock);
+ if (pmd->fail_io) {
+ up_read(&pmd->root_lock);
+ return -EINVAL;
}
+ r = __find_block(td, block, can_issue_io, result);
+
up_read(&pmd->root_lock);
return r;
}
-/* FIXME: write a more efficient one in btree */
-int dm_thin_find_mapped_range(struct dm_thin_device *td,
- dm_block_t begin, dm_block_t end,
- dm_block_t *thin_begin, dm_block_t *thin_end,
- dm_block_t *pool_begin, bool *maybe_shared)
+static int __find_next_mapped_block(struct dm_thin_device *td, dm_block_t block,
+ dm_block_t *vblock,
+ struct dm_thin_lookup_result *result)
+{
+ int r;
+ __le64 value;
+ struct dm_pool_metadata *pmd = td->pmd;
+ dm_block_t keys[2] = { td->id, block };
+
+ r = dm_btree_lookup_next(&pmd->info, pmd->root, keys, vblock, &value);
+ if (!r)
+ unpack_lookup_result(td, value, result);
+
+ return r;
+}
+
+static int __find_mapped_range(struct dm_thin_device *td,
+ dm_block_t begin, dm_block_t end,
+ dm_block_t *thin_begin, dm_block_t *thin_end,
+ dm_block_t *pool_begin, bool *maybe_shared)
{
int r;
dm_block_t pool_end;
if (end < begin)
return -ENODATA;
- /*
- * Find first mapped block.
- */
- while (begin < end) {
- r = dm_thin_find_block(td, begin, true, &lookup);
- if (r) {
- if (r != -ENODATA)
- return r;
- } else
- break;
-
- begin++;
- }
+ r = __find_next_mapped_block(td, begin, &begin, &lookup);
+ if (r)
+ return r;
- if (begin == end)
+ if (begin >= end)
return -ENODATA;
*thin_begin = begin;
begin++;
pool_end = *pool_begin + 1;
while (begin != end) {
- r = dm_thin_find_block(td, begin, true, &lookup);
+ r = __find_block(td, begin, true, &lookup);
if (r) {
if (r == -ENODATA)
break;
return 0;
}
+int dm_thin_find_mapped_range(struct dm_thin_device *td,
+ dm_block_t begin, dm_block_t end,
+ dm_block_t *thin_begin, dm_block_t *thin_end,
+ dm_block_t *pool_begin, bool *maybe_shared)
+{
+ int r = -EINVAL;
+ struct dm_pool_metadata *pmd = td->pmd;
+
+ down_read(&pmd->root_lock);
+ if (!pmd->fail_io) {
+ r = __find_mapped_range(td, begin, end, thin_begin, thin_end,
+ pool_begin, maybe_shared);
+ }
+ up_read(&pmd->root_lock);
+
+ return r;
+}
+
static int __insert(struct dm_thin_device *td, dm_block_t block,
dm_block_t data_block)
{
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- cancel_delayed_work(&pool->waker);
- cancel_delayed_work(&pool->no_space_timeout);
+ cancel_delayed_work_sync(&pool->waker);
+ cancel_delayed_work_sync(&pool->no_space_timeout);
flush_workqueue(pool->wq);
(void) commit(pool);
}
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
- .version = {1, 16, 0},
+ .version = {1, 17, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 16, 0},
+ .version = {1, 17, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
--- /dev/null
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include "dm-verity-fec.h"
+#include <linux/math64.h>
+
+#define DM_MSG_PREFIX "verity-fec"
+
+/*
+ * If error correction has been configured, returns true.
+ */
+bool verity_fec_is_enabled(struct dm_verity *v)
+{
+ return v->fec && v->fec->dev;
+}
+
+/*
+ * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
+ * length fields.
+ */
+static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io)
+{
+ return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io);
+}
+
+/*
+ * Return an interleaved offset for a byte in RS block.
+ */
+static inline u64 fec_interleave(struct dm_verity *v, u64 offset)
+{
+ u32 mod;
+
+ mod = do_div(offset, v->fec->rsn);
+ return offset + mod * (v->fec->rounds << v->data_dev_block_bits);
+}
+
+/*
+ * Decode an RS block using Reed-Solomon.
+ */
+static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
+ u8 *data, u8 *fec, int neras)
+{
+ int i;
+ uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
+
+ for (i = 0; i < v->fec->roots; i++)
+ par[i] = fec[i];
+
+ return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras,
+ fio->erasures, 0, NULL);
+}
+
+/*
+ * Read error-correcting codes for the requested RS block. Returns a pointer
+ * to the data block. Caller is responsible for releasing buf.
+ */
+static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
+ unsigned *offset, struct dm_buffer **buf)
+{
+ u64 position, block;
+ u8 *res;
+
+ position = (index + rsb) * v->fec->roots;
+ block = position >> v->data_dev_block_bits;
+ *offset = (unsigned)(position - (block << v->data_dev_block_bits));
+
+ res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
+ if (unlikely(IS_ERR(res))) {
+ DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
+ v->data_dev->name, (unsigned long long)rsb,
+ (unsigned long long)(v->fec->start + block),
+ PTR_ERR(res));
+ *buf = NULL;
+ }
+
+ return res;
+}
+
+/* Loop over each preallocated buffer slot. */
+#define fec_for_each_prealloc_buffer(__i) \
+ for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
+
+/* Loop over each extra buffer slot. */
+#define fec_for_each_extra_buffer(io, __i) \
+ for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
+
+/* Loop over each allocated buffer. */
+#define fec_for_each_buffer(io, __i) \
+ for (__i = 0; __i < (io)->nbufs; __i++)
+
+/* Loop over each RS block in each allocated buffer. */
+#define fec_for_each_buffer_rs_block(io, __i, __j) \
+ fec_for_each_buffer(io, __i) \
+ for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
+
+/*
+ * Return a pointer to the current RS block when called inside
+ * fec_for_each_buffer_rs_block.
+ */
+static inline u8 *fec_buffer_rs_block(struct dm_verity *v,
+ struct dm_verity_fec_io *fio,
+ unsigned i, unsigned j)
+{
+ return &fio->bufs[i][j * v->fec->rsn];
+}
+
+/*
+ * Return an index to the current RS block when called inside
+ * fec_for_each_buffer_rs_block.
+ */
+static inline unsigned fec_buffer_rs_index(unsigned i, unsigned j)
+{
+ return (i << DM_VERITY_FEC_BUF_RS_BITS) + j;
+}
+
+/*
+ * Decode all RS blocks from buffers and copy corrected bytes into fio->output
+ * starting from block_offset.
+ */
+static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
+ u64 rsb, int byte_index, unsigned block_offset,
+ int neras)
+{
+ int r, corrected = 0, res;
+ struct dm_buffer *buf;
+ unsigned n, i, offset;
+ u8 *par, *block;
+
+ par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+ if (IS_ERR(par))
+ return PTR_ERR(par);
+
+ /*
+ * Decode the RS blocks we have in bufs. Each RS block results in
+ * one corrected target byte and consumes fec->roots parity bytes.
+ */
+ fec_for_each_buffer_rs_block(fio, n, i) {
+ block = fec_buffer_rs_block(v, fio, n, i);
+ res = fec_decode_rs8(v, fio, block, &par[offset], neras);
+ if (res < 0) {
+ dm_bufio_release(buf);
+
+ r = res;
+ goto error;
+ }
+
+ corrected += res;
+ fio->output[block_offset] = block[byte_index];
+
+ block_offset++;
+ if (block_offset >= 1 << v->data_dev_block_bits)
+ goto done;
+
+ /* read the next block when we run out of parity bytes */
+ offset += v->fec->roots;
+ if (offset >= 1 << v->data_dev_block_bits) {
+ dm_bufio_release(buf);
+
+ par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+ if (unlikely(IS_ERR(par)))
+ return PTR_ERR(par);
+ }
+ }
+done:
+ r = corrected;
+error:
+ if (r < 0 && neras)
+ DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
+ v->data_dev->name, (unsigned long long)rsb, r);
+ else if (r > 0)
+ DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
+ v->data_dev->name, (unsigned long long)rsb, r);
+
+ return r;
+}
+
+/*
+ * Locate data block erasures using verity hashes.
+ */
+static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *want_digest, u8 *data)
+{
+ if (unlikely(verity_hash(v, verity_io_hash_desc(v, io),
+ data, 1 << v->data_dev_block_bits,
+ verity_io_real_digest(v, io))))
+ return 0;
+
+ return memcmp(verity_io_real_digest(v, io), want_digest,
+ v->digest_size) != 0;
+}
+
+/*
+ * Read data blocks that are part of the RS block and deinterleave as much as
+ * fits into buffers. Check for erasure locations if @neras is non-NULL.
+ */
+static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
+ u64 rsb, u64 target, unsigned block_offset,
+ int *neras)
+{
+ bool is_zero;
+ int i, j, target_index = -1;
+ struct dm_buffer *buf;
+ struct dm_bufio_client *bufio;
+ struct dm_verity_fec_io *fio = fec_io(io);
+ u64 block, ileaved;
+ u8 *bbuf, *rs_block;
+ u8 want_digest[v->digest_size];
+ unsigned n, k;
+
+ if (neras)
+ *neras = 0;
+
+ /*
+ * read each of the rsn data blocks that are part of the RS block, and
+ * interleave contents to available bufs
+ */
+ for (i = 0; i < v->fec->rsn; i++) {
+ ileaved = fec_interleave(v, rsb * v->fec->rsn + i);
+
+ /*
+ * target is the data block we want to correct, target_index is
+ * the index of this block within the rsn RS blocks
+ */
+ if (ileaved == target)
+ target_index = i;
+
+ block = ileaved >> v->data_dev_block_bits;
+ bufio = v->fec->data_bufio;
+
+ if (block >= v->data_blocks) {
+ block -= v->data_blocks;
+
+ /*
+ * blocks outside the area were assumed to contain
+ * zeros when encoding data was generated
+ */
+ if (unlikely(block >= v->fec->hash_blocks))
+ continue;
+
+ block += v->hash_start;
+ bufio = v->bufio;
+ }
+
+ bbuf = dm_bufio_read(bufio, block, &buf);
+ if (unlikely(IS_ERR(bbuf))) {
+ DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
+ v->data_dev->name,
+ (unsigned long long)rsb,
+ (unsigned long long)block, PTR_ERR(bbuf));
+
+ /* assume the block is corrupted */
+ if (neras && *neras <= v->fec->roots)
+ fio->erasures[(*neras)++] = i;
+
+ continue;
+ }
+
+ /* locate erasures if the block is on the data device */
+ if (bufio == v->fec->data_bufio &&
+ verity_hash_for_block(v, io, block, want_digest,
+ &is_zero) == 0) {
+ /* skip known zero blocks entirely */
+ if (is_zero)
+ continue;
+
+ /*
+ * skip if we have already found the theoretical
+ * maximum number (i.e. fec->roots) of erasures
+ */
+ if (neras && *neras <= v->fec->roots &&
+ fec_is_erasure(v, io, want_digest, bbuf))
+ fio->erasures[(*neras)++] = i;
+ }
+
+ /*
+ * deinterleave and copy the bytes that fit into bufs,
+ * starting from block_offset
+ */
+ fec_for_each_buffer_rs_block(fio, n, j) {
+ k = fec_buffer_rs_index(n, j) + block_offset;
+
+ if (k >= 1 << v->data_dev_block_bits)
+ goto done;
+
+ rs_block = fec_buffer_rs_block(v, fio, n, j);
+ rs_block[i] = bbuf[k];
+ }
+done:
+ dm_bufio_release(buf);
+ }
+
+ return target_index;
+}
+
+/*
+ * Allocate RS control structure and FEC buffers from preallocated mempools,
+ * and attempt to allocate as many extra buffers as available.
+ */
+static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
+{
+ unsigned n;
+
+ if (!fio->rs) {
+ fio->rs = mempool_alloc(v->fec->rs_pool, 0);
+ if (unlikely(!fio->rs)) {
+ DMERR("failed to allocate RS");
+ return -ENOMEM;
+ }
+ }
+
+ fec_for_each_prealloc_buffer(n) {
+ if (fio->bufs[n])
+ continue;
+
+ fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO);
+ if (unlikely(!fio->bufs[n])) {
+ DMERR("failed to allocate FEC buffer");
+ return -ENOMEM;
+ }
+ }
+
+ /* try to allocate the maximum number of buffers */
+ fec_for_each_extra_buffer(fio, n) {
+ if (fio->bufs[n])
+ continue;
+
+ fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO);
+ /* we can manage with even one buffer if necessary */
+ if (unlikely(!fio->bufs[n]))
+ break;
+ }
+ fio->nbufs = n;
+
+ if (!fio->output) {
+ fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO);
+
+ if (!fio->output) {
+ DMERR("failed to allocate FEC page");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
+ * zeroed before deinterleaving.
+ */
+static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
+{
+ unsigned n;
+
+ fec_for_each_buffer(fio, n)
+ memset(fio->bufs[n], 0, v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS);
+
+ memset(fio->erasures, 0, sizeof(fio->erasures));
+}
+
+/*
+ * Decode all RS blocks in a single data block and return the target block
+ * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
+ * hashes to locate erasures.
+ */
+static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
+ struct dm_verity_fec_io *fio, u64 rsb, u64 offset,
+ bool use_erasures)
+{
+ int r, neras = 0;
+ unsigned pos;
+
+ r = fec_alloc_bufs(v, fio);
+ if (unlikely(r < 0))
+ return r;
+
+ for (pos = 0; pos < 1 << v->data_dev_block_bits; ) {
+ fec_init_bufs(v, fio);
+
+ r = fec_read_bufs(v, io, rsb, offset, pos,
+ use_erasures ? &neras : NULL);
+ if (unlikely(r < 0))
+ return r;
+
+ r = fec_decode_bufs(v, fio, rsb, r, pos, neras);
+ if (r < 0)
+ return r;
+
+ pos += fio->nbufs << DM_VERITY_FEC_BUF_RS_BITS;
+ }
+
+ /* Always re-validate the corrected block against the expected hash */
+ r = verity_hash(v, verity_io_hash_desc(v, io), fio->output,
+ 1 << v->data_dev_block_bits,
+ verity_io_real_digest(v, io));
+ if (unlikely(r < 0))
+ return r;
+
+ if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io),
+ v->digest_size)) {
+ DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
+ v->data_dev->name, (unsigned long long)rsb, neras);
+ return -EILSEQ;
+ }
+
+ return 0;
+}
+
+static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data,
+ size_t len)
+{
+ struct dm_verity_fec_io *fio = fec_io(io);
+
+ memcpy(data, &fio->output[fio->output_pos], len);
+ fio->output_pos += len;
+
+ return 0;
+}
+
+/*
+ * Correct errors in a block. Copies corrected block to dest if non-NULL,
+ * otherwise to a bio_vec starting from iter.
+ */
+int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+ enum verity_block_type type, sector_t block, u8 *dest,
+ struct bvec_iter *iter)
+{
+ int r;
+ struct dm_verity_fec_io *fio = fec_io(io);
+ u64 offset, res, rsb;
+
+ if (!verity_fec_is_enabled(v))
+ return -EOPNOTSUPP;
+
+ if (type == DM_VERITY_BLOCK_TYPE_METADATA)
+ block += v->data_blocks;
+
+ /*
+ * For RS(M, N), the continuous FEC data is divided into blocks of N
+ * bytes. Since block size may not be divisible by N, the last block
+ * is zero padded when decoding.
+ *
+ * Each byte of the block is covered by a different RS(M, N) code,
+ * and each code is interleaved over N blocks to make it less likely
+ * that bursty corruption will leave us in unrecoverable state.
+ */
+
+ offset = block << v->data_dev_block_bits;
+
+ res = offset;
+ div64_u64(res, v->fec->rounds << v->data_dev_block_bits);
+
+ /*
+ * The base RS block we can feed to the interleaver to find out all
+ * blocks required for decoding.
+ */
+ rsb = offset - res * (v->fec->rounds << v->data_dev_block_bits);
+
+ /*
+ * Locating erasures is slow, so attempt to recover the block without
+ * them first. Do a second attempt with erasures if the corruption is
+ * bad enough.
+ */
+ r = fec_decode_rsb(v, io, fio, rsb, offset, false);
+ if (r < 0) {
+ r = fec_decode_rsb(v, io, fio, rsb, offset, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (dest)
+ memcpy(dest, fio->output, 1 << v->data_dev_block_bits);
+ else if (iter) {
+ fio->output_pos = 0;
+ r = verity_for_bv_block(v, io, iter, fec_bv_copy);
+ }
+
+ return r;
+}
+
+/*
+ * Clean up per-bio data.
+ */
+void verity_fec_finish_io(struct dm_verity_io *io)
+{
+ unsigned n;
+ struct dm_verity_fec *f = io->v->fec;
+ struct dm_verity_fec_io *fio = fec_io(io);
+
+ if (!verity_fec_is_enabled(io->v))
+ return;
+
+ mempool_free(fio->rs, f->rs_pool);
+
+ fec_for_each_prealloc_buffer(n)
+ mempool_free(fio->bufs[n], f->prealloc_pool);
+
+ fec_for_each_extra_buffer(fio, n)
+ mempool_free(fio->bufs[n], f->extra_pool);
+
+ mempool_free(fio->output, f->output_pool);
+}
+
+/*
+ * Initialize per-bio data.
+ */
+void verity_fec_init_io(struct dm_verity_io *io)
+{
+ struct dm_verity_fec_io *fio = fec_io(io);
+
+ if (!verity_fec_is_enabled(io->v))
+ return;
+
+ fio->rs = NULL;
+ memset(fio->bufs, 0, sizeof(fio->bufs));
+ fio->nbufs = 0;
+ fio->output = NULL;
+}
+
+/*
+ * Append feature arguments and values to the status table.
+ */
+unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
+ char *result, unsigned maxlen)
+{
+ if (!verity_fec_is_enabled(v))
+ return sz;
+
+ DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s "
+ DM_VERITY_OPT_FEC_BLOCKS " %llu "
+ DM_VERITY_OPT_FEC_START " %llu "
+ DM_VERITY_OPT_FEC_ROOTS " %d",
+ v->fec->dev->name,
+ (unsigned long long)v->fec->blocks,
+ (unsigned long long)v->fec->start,
+ v->fec->roots);
+
+ return sz;
+}
+
+void verity_fec_dtr(struct dm_verity *v)
+{
+ struct dm_verity_fec *f = v->fec;
+
+ if (!verity_fec_is_enabled(v))
+ goto out;
+
+ mempool_destroy(f->rs_pool);
+ mempool_destroy(f->prealloc_pool);
+ mempool_destroy(f->extra_pool);
+ kmem_cache_destroy(f->cache);
+
+ if (f->data_bufio)
+ dm_bufio_client_destroy(f->data_bufio);
+ if (f->bufio)
+ dm_bufio_client_destroy(f->bufio);
+
+ if (f->dev)
+ dm_put_device(v->ti, f->dev);
+out:
+ kfree(f);
+ v->fec = NULL;
+}
+
+static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ struct dm_verity *v = (struct dm_verity *)pool_data;
+
+ return init_rs(8, 0x11d, 0, 1, v->fec->roots);
+}
+
+static void fec_rs_free(void *element, void *pool_data)
+{
+ struct rs_control *rs = (struct rs_control *)element;
+
+ if (rs)
+ free_rs(rs);
+}
+
+bool verity_is_fec_opt_arg(const char *arg_name)
+{
+ return (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV) ||
+ !strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS) ||
+ !strcasecmp(arg_name, DM_VERITY_OPT_FEC_START) ||
+ !strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS));
+}
+
+int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
+ unsigned *argc, const char *arg_name)
+{
+ int r;
+ struct dm_target *ti = v->ti;
+ const char *arg_value;
+ unsigned long long num_ll;
+ unsigned char num_c;
+ char dummy;
+
+ if (!*argc) {
+ ti->error = "FEC feature arguments require a value";
+ return -EINVAL;
+ }
+
+ arg_value = dm_shift_arg(as);
+ (*argc)--;
+
+ if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
+ r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev);
+ if (r) {
+ ti->error = "FEC device lookup failed";
+ return r;
+ }
+
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS)) {
+ if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
+ ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+ return -EINVAL;
+ }
+ v->fec->blocks = num_ll;
+
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START)) {
+ if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
+ ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) >>
+ (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_START;
+ return -EINVAL;
+ }
+ v->fec->start = num_ll;
+
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)) {
+ if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 || !num_c ||
+ num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) ||
+ num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) {
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS;
+ return -EINVAL;
+ }
+ v->fec->roots = num_c;
+
+ } else {
+ ti->error = "Unrecognized verity FEC feature request";
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
+ */
+int verity_fec_ctr_alloc(struct dm_verity *v)
+{
+ struct dm_verity_fec *f;
+
+ f = kzalloc(sizeof(struct dm_verity_fec), GFP_KERNEL);
+ if (!f) {
+ v->ti->error = "Cannot allocate FEC structure";
+ return -ENOMEM;
+ }
+ v->fec = f;
+
+ return 0;
+}
+
+/*
+ * Validate arguments and preallocate memory. Must be called after arguments
+ * have been parsed using verity_fec_parse_opt_args.
+ */
+int verity_fec_ctr(struct dm_verity *v)
+{
+ struct dm_verity_fec *f = v->fec;
+ struct dm_target *ti = v->ti;
+ u64 hash_blocks;
+
+ if (!verity_fec_is_enabled(v)) {
+ verity_fec_dtr(v);
+ return 0;
+ }
+
+ /*
+ * FEC is computed over data blocks, possible metadata, and
+ * hash blocks. In other words, FEC covers total of fec_blocks
+ * blocks consisting of the following:
+ *
+ * data blocks | hash blocks | metadata (optional)
+ *
+ * We allow metadata after hash blocks to support a use case
+ * where all data is stored on the same device and FEC covers
+ * the entire area.
+ *
+ * If metadata is included, we require it to be available on the
+ * hash device after the hash blocks.
+ */
+
+ hash_blocks = v->hash_blocks - v->hash_start;
+
+ /*
+ * Require matching block sizes for data and hash devices for
+ * simplicity.
+ */
+ if (v->data_dev_block_bits != v->hash_dev_block_bits) {
+ ti->error = "Block sizes must match to use FEC";
+ return -EINVAL;
+ }
+
+ if (!f->roots) {
+ ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS;
+ return -EINVAL;
+ }
+ f->rsn = DM_VERITY_FEC_RSM - f->roots;
+
+ if (!f->blocks) {
+ ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS;
+ return -EINVAL;
+ }
+
+ f->rounds = f->blocks;
+ if (sector_div(f->rounds, f->rsn))
+ f->rounds++;
+
+ /*
+ * Due to optional metadata, f->blocks can be larger than
+ * data_blocks and hash_blocks combined.
+ */
+ if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) {
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+ return -EINVAL;
+ }
+
+ /*
+ * Metadata is accessed through the hash device, so we require
+ * it to be large enough.
+ */
+ f->hash_blocks = f->blocks - v->data_blocks;
+ if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) {
+ ti->error = "Hash device is too small for "
+ DM_VERITY_OPT_FEC_BLOCKS;
+ return -E2BIG;
+ }
+
+ f->bufio = dm_bufio_client_create(f->dev->bdev,
+ 1 << v->data_dev_block_bits,
+ 1, 0, NULL, NULL);
+ if (IS_ERR(f->bufio)) {
+ ti->error = "Cannot initialize FEC bufio client";
+ return PTR_ERR(f->bufio);
+ }
+
+ if (dm_bufio_get_device_size(f->bufio) <
+ ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) {
+ ti->error = "FEC device is too small";
+ return -E2BIG;
+ }
+
+ f->data_bufio = dm_bufio_client_create(v->data_dev->bdev,
+ 1 << v->data_dev_block_bits,
+ 1, 0, NULL, NULL);
+ if (IS_ERR(f->data_bufio)) {
+ ti->error = "Cannot initialize FEC data bufio client";
+ return PTR_ERR(f->data_bufio);
+ }
+
+ if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) {
+ ti->error = "Data device is too small";
+ return -E2BIG;
+ }
+
+ /* Preallocate an rs_control structure for each worker thread */
+ f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc,
+ fec_rs_free, (void *) v);
+ if (!f->rs_pool) {
+ ti->error = "Cannot allocate RS pool";
+ return -ENOMEM;
+ }
+
+ f->cache = kmem_cache_create("dm_verity_fec_buffers",
+ f->rsn << DM_VERITY_FEC_BUF_RS_BITS,
+ 0, 0, NULL);
+ if (!f->cache) {
+ ti->error = "Cannot create FEC buffer cache";
+ return -ENOMEM;
+ }
+
+ /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
+ f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() *
+ DM_VERITY_FEC_BUF_PREALLOC,
+ f->cache);
+ if (!f->prealloc_pool) {
+ ti->error = "Cannot allocate FEC buffer prealloc pool";
+ return -ENOMEM;
+ }
+
+ f->extra_pool = mempool_create_slab_pool(0, f->cache);
+ if (!f->extra_pool) {
+ ti->error = "Cannot allocate FEC buffer extra pool";
+ return -ENOMEM;
+ }
+
+ /* Preallocate an output buffer for each thread */
+ f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(),
+ 1 << v->data_dev_block_bits);
+ if (!f->output_pool) {
+ ti->error = "Cannot allocate FEC output pool";
+ return -ENOMEM;
+ }
+
+ /* Reserve space for our per-bio data */
+ ti->per_bio_data_size += sizeof(struct dm_verity_fec_io);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef DM_VERITY_FEC_H
+#define DM_VERITY_FEC_H
+
+#include "dm-verity.h"
+#include <linux/rslib.h>
+
+/* Reed-Solomon(M, N) parameters */
+#define DM_VERITY_FEC_RSM 255
+#define DM_VERITY_FEC_MAX_RSN 253
+#define DM_VERITY_FEC_MIN_RSN 231 /* ~10% space overhead */
+
+/* buffers for deinterleaving and decoding */
+#define DM_VERITY_FEC_BUF_PREALLOC 1 /* buffers to preallocate */
+#define DM_VERITY_FEC_BUF_RS_BITS 4 /* 1 << RS blocks per buffer */
+/* we need buffers for at most 1 << block size RS blocks */
+#define DM_VERITY_FEC_BUF_MAX \
+ (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
+
+#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device"
+#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks"
+#define DM_VERITY_OPT_FEC_START "fec_start"
+#define DM_VERITY_OPT_FEC_ROOTS "fec_roots"
+
+/* configuration */
+struct dm_verity_fec {
+ struct dm_dev *dev; /* parity data device */
+ struct dm_bufio_client *data_bufio; /* for data dev access */
+ struct dm_bufio_client *bufio; /* for parity data access */
+ sector_t start; /* parity data start in blocks */
+ sector_t blocks; /* number of blocks covered */
+ sector_t rounds; /* number of interleaving rounds */
+ sector_t hash_blocks; /* blocks covered after v->hash_start */
+ unsigned char roots; /* number of parity bytes, M-N of RS(M, N) */
+ unsigned char rsn; /* N of RS(M, N) */
+ mempool_t *rs_pool; /* mempool for fio->rs */
+ mempool_t *prealloc_pool; /* mempool for preallocated buffers */
+ mempool_t *extra_pool; /* mempool for extra buffers */
+ mempool_t *output_pool; /* mempool for output */
+ struct kmem_cache *cache; /* cache for buffers */
+};
+
+/* per-bio data */
+struct dm_verity_fec_io {
+ struct rs_control *rs; /* Reed-Solomon state */
+ int erasures[DM_VERITY_FEC_MAX_RSN]; /* erasures for decode_rs8 */
+ u8 *bufs[DM_VERITY_FEC_BUF_MAX]; /* bufs for deinterleaving */
+ unsigned nbufs; /* number of buffers allocated */
+ u8 *output; /* buffer for corrected output */
+ size_t output_pos;
+};
+
+#ifdef CONFIG_DM_VERITY_FEC
+
+/* each feature parameter requires a value */
+#define DM_VERITY_OPTS_FEC 8
+
+extern bool verity_fec_is_enabled(struct dm_verity *v);
+
+extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+ enum verity_block_type type, sector_t block,
+ u8 *dest, struct bvec_iter *iter);
+
+extern unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
+ char *result, unsigned maxlen);
+
+extern void verity_fec_finish_io(struct dm_verity_io *io);
+extern void verity_fec_init_io(struct dm_verity_io *io);
+
+extern bool verity_is_fec_opt_arg(const char *arg_name);
+extern int verity_fec_parse_opt_args(struct dm_arg_set *as,
+ struct dm_verity *v, unsigned *argc,
+ const char *arg_name);
+
+extern void verity_fec_dtr(struct dm_verity *v);
+
+extern int verity_fec_ctr_alloc(struct dm_verity *v);
+extern int verity_fec_ctr(struct dm_verity *v);
+
+#else /* !CONFIG_DM_VERITY_FEC */
+
+#define DM_VERITY_OPTS_FEC 0
+
+static inline bool verity_fec_is_enabled(struct dm_verity *v)
+{
+ return false;
+}
+
+static inline int verity_fec_decode(struct dm_verity *v,
+ struct dm_verity_io *io,
+ enum verity_block_type type,
+ sector_t block, u8 *dest,
+ struct bvec_iter *iter)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline unsigned verity_fec_status_table(struct dm_verity *v,
+ unsigned sz, char *result,
+ unsigned maxlen)
+{
+ return sz;
+}
+
+static inline void verity_fec_finish_io(struct dm_verity_io *io)
+{
+}
+
+static inline void verity_fec_init_io(struct dm_verity_io *io)
+{
+}
+
+static inline bool verity_is_fec_opt_arg(const char *arg_name)
+{
+ return false;
+}
+
+static inline int verity_fec_parse_opt_args(struct dm_arg_set *as,
+ struct dm_verity *v,
+ unsigned *argc,
+ const char *arg_name)
+{
+ return -EINVAL;
+}
+
+static inline void verity_fec_dtr(struct dm_verity *v)
+{
+}
+
+static inline int verity_fec_ctr_alloc(struct dm_verity *v)
+{
+ return 0;
+}
+
+static inline int verity_fec_ctr(struct dm_verity *v)
+{
+ return 0;
+}
+
+#endif /* CONFIG_DM_VERITY_FEC */
+
+#endif /* DM_VERITY_FEC_H */
--- /dev/null
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
+ *
+ * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
+ *
+ * This file is released under the GPLv2.
+ *
+ * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
+ * default prefetch value. Data are read in "prefetch_cluster" chunks from the
+ * hash device. Setting this greatly improves performance when data and hash
+ * are on the same disk on different partitions on devices with poor random
+ * access behavior.
+ */
+
+#include "dm-verity.h"
+#include "dm-verity-fec.h"
+
+#include <linux/module.h>
+#include <linux/reboot.h>
+
+#define DM_MSG_PREFIX "verity"
+
+#define DM_VERITY_ENV_LENGTH 42
+#define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR"
+
+#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
+
+#define DM_VERITY_MAX_CORRUPTED_ERRS 100
+
+#define DM_VERITY_OPT_LOGGING "ignore_corruption"
+#define DM_VERITY_OPT_RESTART "restart_on_corruption"
+#define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks"
+
+#define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC)
+
+static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
+
+module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
+
+struct dm_verity_prefetch_work {
+ struct work_struct work;
+ struct dm_verity *v;
+ sector_t block;
+ unsigned n_blocks;
+};
+
+/*
+ * Auxiliary structure appended to each dm-bufio buffer. If the value
+ * hash_verified is nonzero, hash of the block has been verified.
+ *
+ * The variable hash_verified is set to 0 when allocating the buffer, then
+ * it can be changed to 1 and it is never reset to 0 again.
+ *
+ * There is no lock around this value, a race condition can at worst cause
+ * that multiple processes verify the hash of the same buffer simultaneously
+ * and write 1 to hash_verified simultaneously.
+ * This condition is harmless, so we don't need locking.
+ */
+struct buffer_aux {
+ int hash_verified;
+};
+
+/*
+ * Initialize struct buffer_aux for a freshly created buffer.
+ */
+static void dm_bufio_alloc_callback(struct dm_buffer *buf)
+{
+ struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
+
+ aux->hash_verified = 0;
+}
+
+/*
+ * Translate input sector number to the sector number on the target device.
+ */
+static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
+{
+ return v->data_start + dm_target_offset(v->ti, bi_sector);
+}
+
+/*
+ * Return hash position of a specified block at a specified tree level
+ * (0 is the lowest level).
+ * The lowest "hash_per_block_bits"-bits of the result denote hash position
+ * inside a hash block. The remaining bits denote location of the hash block.
+ */
+static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
+ int level)
+{
+ return block >> (level * v->hash_per_block_bits);
+}
+
+/*
+ * Wrapper for crypto_shash_init, which handles verity salting.
+ */
+static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc)
+{
+ int r;
+
+ desc->tfm = v->tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ r = crypto_shash_init(desc);
+
+ if (unlikely(r < 0)) {
+ DMERR("crypto_shash_init failed: %d", r);
+ return r;
+ }
+
+ if (likely(v->version >= 1)) {
+ r = crypto_shash_update(desc, v->salt, v->salt_size);
+
+ if (unlikely(r < 0)) {
+ DMERR("crypto_shash_update failed: %d", r);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc,
+ const u8 *data, size_t len)
+{
+ int r = crypto_shash_update(desc, data, len);
+
+ if (unlikely(r < 0))
+ DMERR("crypto_shash_update failed: %d", r);
+
+ return r;
+}
+
+static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
+ u8 *digest)
+{
+ int r;
+
+ if (unlikely(!v->version)) {
+ r = crypto_shash_update(desc, v->salt, v->salt_size);
+
+ if (r < 0) {
+ DMERR("crypto_shash_update failed: %d", r);
+ return r;
+ }
+ }
+
+ r = crypto_shash_final(desc, digest);
+
+ if (unlikely(r < 0))
+ DMERR("crypto_shash_final failed: %d", r);
+
+ return r;
+}
+
+int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+ const u8 *data, size_t len, u8 *digest)
+{
+ int r;
+
+ r = verity_hash_init(v, desc);
+ if (unlikely(r < 0))
+ return r;
+
+ r = verity_hash_update(v, desc, data, len);
+ if (unlikely(r < 0))
+ return r;
+
+ return verity_hash_final(v, desc, digest);
+}
+
+static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
+ sector_t *hash_block, unsigned *offset)
+{
+ sector_t position = verity_position_at_level(v, block, level);
+ unsigned idx;
+
+ *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
+
+ if (!offset)
+ return;
+
+ idx = position & ((1 << v->hash_per_block_bits) - 1);
+ if (!v->version)
+ *offset = idx * v->digest_size;
+ else
+ *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
+}
+
+/*
+ * Handle verification errors.
+ */
+static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
+ unsigned long long block)
+{
+ char verity_env[DM_VERITY_ENV_LENGTH];
+ char *envp[] = { verity_env, NULL };
+ const char *type_str = "";
+ struct mapped_device *md = dm_table_get_md(v->ti->table);
+
+ /* Corruption should be visible in device status in all modes */
+ v->hash_failed = 1;
+
+ if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS)
+ goto out;
+
+ v->corrupted_errs++;
+
+ switch (type) {
+ case DM_VERITY_BLOCK_TYPE_DATA:
+ type_str = "data";
+ break;
+ case DM_VERITY_BLOCK_TYPE_METADATA:
+ type_str = "metadata";
+ break;
+ default:
+ BUG();
+ }
+
+ DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str,
+ block);
+
+ if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
+ DMERR("%s: reached maximum errors", v->data_dev->name);
+
+ snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu",
+ DM_VERITY_ENV_VAR_NAME, type, block);
+
+ kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp);
+
+out:
+ if (v->mode == DM_VERITY_MODE_LOGGING)
+ return 0;
+
+ if (v->mode == DM_VERITY_MODE_RESTART)
+ kernel_restart("dm-verity device corrupted");
+
+ return 1;
+}
+
+/*
+ * Verify hash of a metadata block pertaining to the specified data block
+ * ("block" argument) at a specified level ("level" argument).
+ *
+ * On successful return, verity_io_want_digest(v, io) contains the hash value
+ * for a lower tree level or for the data block (if we're at the lowest level).
+ *
+ * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
+ * If "skip_unverified" is false, unverified buffer is hashed and verified
+ * against current value of verity_io_want_digest(v, io).
+ */
+static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
+ sector_t block, int level, bool skip_unverified,
+ u8 *want_digest)
+{
+ struct dm_buffer *buf;
+ struct buffer_aux *aux;
+ u8 *data;
+ int r;
+ sector_t hash_block;
+ unsigned offset;
+
+ verity_hash_at_level(v, block, level, &hash_block, &offset);
+
+ data = dm_bufio_read(v->bufio, hash_block, &buf);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ aux = dm_bufio_get_aux_data(buf);
+
+ if (!aux->hash_verified) {
+ if (skip_unverified) {
+ r = 1;
+ goto release_ret_r;
+ }
+
+ r = verity_hash(v, verity_io_hash_desc(v, io),
+ data, 1 << v->hash_dev_block_bits,
+ verity_io_real_digest(v, io));
+ if (unlikely(r < 0))
+ goto release_ret_r;
+
+ if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
+ v->digest_size) == 0))
+ aux->hash_verified = 1;
+ else if (verity_fec_decode(v, io,
+ DM_VERITY_BLOCK_TYPE_METADATA,
+ hash_block, data, NULL) == 0)
+ aux->hash_verified = 1;
+ else if (verity_handle_err(v,
+ DM_VERITY_BLOCK_TYPE_METADATA,
+ hash_block)) {
+ r = -EIO;
+ goto release_ret_r;
+ }
+ }
+
+ data += offset;
+ memcpy(want_digest, data, v->digest_size);
+ r = 0;
+
+release_ret_r:
+ dm_bufio_release(buf);
+ return r;
+}
+
+/*
+ * Find a hash for a given block, write it to digest and verify the integrity
+ * of the hash tree if necessary.
+ */
+int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+ sector_t block, u8 *digest, bool *is_zero)
+{
+ int r = 0, i;
+
+ if (likely(v->levels)) {
+ /*
+ * First, we try to get the requested hash for
+ * the current block. If the hash block itself is
+ * verified, zero is returned. If it isn't, this
+ * function returns 1 and we fall back to whole
+ * chain verification.
+ */
+ r = verity_verify_level(v, io, block, 0, true, digest);
+ if (likely(r <= 0))
+ goto out;
+ }
+
+ memcpy(digest, v->root_digest, v->digest_size);
+
+ for (i = v->levels - 1; i >= 0; i--) {
+ r = verity_verify_level(v, io, block, i, false, digest);
+ if (unlikely(r))
+ goto out;
+ }
+out:
+ if (!r && v->zero_digest)
+ *is_zero = !memcmp(v->zero_digest, digest, v->digest_size);
+ else
+ *is_zero = false;
+
+ return r;
+}
+
+/*
+ * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec
+ * starting from iter.
+ */
+int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+ struct bvec_iter *iter,
+ int (*process)(struct dm_verity *v,
+ struct dm_verity_io *io, u8 *data,
+ size_t len))
+{
+ unsigned todo = 1 << v->data_dev_block_bits;
+ struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
+
+ do {
+ int r;
+ u8 *page;
+ unsigned len;
+ struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+ page = kmap_atomic(bv.bv_page);
+ len = bv.bv_len;
+
+ if (likely(len >= todo))
+ len = todo;
+
+ r = process(v, io, page + bv.bv_offset, len);
+ kunmap_atomic(page);
+
+ if (r < 0)
+ return r;
+
+ bio_advance_iter(bio, iter, len);
+ todo -= len;
+ } while (todo);
+
+ return 0;
+}
+
+static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+{
+ return verity_hash_update(v, verity_io_hash_desc(v, io), data, len);
+}
+
+static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+{
+ memset(data, 0, len);
+ return 0;
+}
+
+/*
+ * Verify one "dm_verity_io" structure.
+ */
+static int verity_verify_io(struct dm_verity_io *io)
+{
+ bool is_zero;
+ struct dm_verity *v = io->v;
+ struct bvec_iter start;
+ unsigned b;
+
+ for (b = 0; b < io->n_blocks; b++) {
+ int r;
+ struct shash_desc *desc = verity_io_hash_desc(v, io);
+
+ r = verity_hash_for_block(v, io, io->block + b,
+ verity_io_want_digest(v, io),
+ &is_zero);
+ if (unlikely(r < 0))
+ return r;
+
+ if (is_zero) {
+ /*
+ * If we expect a zero block, don't validate, just
+ * return zeros.
+ */
+ r = verity_for_bv_block(v, io, &io->iter,
+ verity_bv_zero);
+ if (unlikely(r < 0))
+ return r;
+
+ continue;
+ }
+
+ r = verity_hash_init(v, desc);
+ if (unlikely(r < 0))
+ return r;
+
+ start = io->iter;
+ r = verity_for_bv_block(v, io, &io->iter, verity_bv_hash_update);
+ if (unlikely(r < 0))
+ return r;
+
+ r = verity_hash_final(v, desc, verity_io_real_digest(v, io));
+ if (unlikely(r < 0))
+ return r;
+
+ if (likely(memcmp(verity_io_real_digest(v, io),
+ verity_io_want_digest(v, io), v->digest_size) == 0))
+ continue;
+ else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
+ io->block + b, NULL, &start) == 0)
+ continue;
+ else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
+ io->block + b))
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/*
+ * End one "io" structure with a given error.
+ */
+static void verity_finish_io(struct dm_verity_io *io, int error)
+{
+ struct dm_verity *v = io->v;
+ struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
+
+ bio->bi_end_io = io->orig_bi_end_io;
+ bio->bi_error = error;
+
+ verity_fec_finish_io(io);
+
+ bio_endio(bio);
+}
+
+static void verity_work(struct work_struct *w)
+{
+ struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
+
+ verity_finish_io(io, verity_verify_io(io));
+}
+
+static void verity_end_io(struct bio *bio)
+{
+ struct dm_verity_io *io = bio->bi_private;
+
+ if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
+ verity_finish_io(io, bio->bi_error);
+ return;
+ }
+
+ INIT_WORK(&io->work, verity_work);
+ queue_work(io->v->verify_wq, &io->work);
+}
+
+/*
+ * Prefetch buffers for the specified io.
+ * The root buffer is not prefetched, it is assumed that it will be cached
+ * all the time.
+ */
+static void verity_prefetch_io(struct work_struct *work)
+{
+ struct dm_verity_prefetch_work *pw =
+ container_of(work, struct dm_verity_prefetch_work, work);
+ struct dm_verity *v = pw->v;
+ int i;
+
+ for (i = v->levels - 2; i >= 0; i--) {
+ sector_t hash_block_start;
+ sector_t hash_block_end;
+ verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
+ verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
+ if (!i) {
+ unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
+
+ cluster >>= v->data_dev_block_bits;
+ if (unlikely(!cluster))
+ goto no_prefetch_cluster;
+
+ if (unlikely(cluster & (cluster - 1)))
+ cluster = 1 << __fls(cluster);
+
+ hash_block_start &= ~(sector_t)(cluster - 1);
+ hash_block_end |= cluster - 1;
+ if (unlikely(hash_block_end >= v->hash_blocks))
+ hash_block_end = v->hash_blocks - 1;
+ }
+no_prefetch_cluster:
+ dm_bufio_prefetch(v->bufio, hash_block_start,
+ hash_block_end - hash_block_start + 1);
+ }
+
+ kfree(pw);
+}
+
+static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
+{
+ struct dm_verity_prefetch_work *pw;
+
+ pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
+ GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+ if (!pw)
+ return;
+
+ INIT_WORK(&pw->work, verity_prefetch_io);
+ pw->v = v;
+ pw->block = io->block;
+ pw->n_blocks = io->n_blocks;
+ queue_work(v->verify_wq, &pw->work);
+}
+
+/*
+ * Bio map function. It allocates dm_verity_io structure and bio vector and
+ * fills them. Then it issues prefetches and the I/O.
+ */
+static int verity_map(struct dm_target *ti, struct bio *bio)
+{
+ struct dm_verity *v = ti->private;
+ struct dm_verity_io *io;
+
+ bio->bi_bdev = v->data_dev->bdev;
+ bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector);
+
+ if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
+ ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
+ DMERR_LIMIT("unaligned io");
+ return -EIO;
+ }
+
+ if (bio_end_sector(bio) >>
+ (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
+ DMERR_LIMIT("io out of range");
+ return -EIO;
+ }
+
+ if (bio_data_dir(bio) == WRITE)
+ return -EIO;
+
+ io = dm_per_bio_data(bio, ti->per_bio_data_size);
+ io->v = v;
+ io->orig_bi_end_io = bio->bi_end_io;
+ io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
+ io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
+
+ bio->bi_end_io = verity_end_io;
+ bio->bi_private = io;
+ io->iter = bio->bi_iter;
+
+ verity_fec_init_io(io);
+
+ verity_submit_prefetch(v, io);
+
+ generic_make_request(bio);
+
+ return DM_MAPIO_SUBMITTED;
+}
+
+/*
+ * Status: V (valid) or C (corruption found)
+ */
+static void verity_status(struct dm_target *ti, status_type_t type,
+ unsigned status_flags, char *result, unsigned maxlen)
+{
+ struct dm_verity *v = ti->private;
+ unsigned args = 0;
+ unsigned sz = 0;
+ unsigned x;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ DMEMIT("%c", v->hash_failed ? 'C' : 'V');
+ break;
+ case STATUSTYPE_TABLE:
+ DMEMIT("%u %s %s %u %u %llu %llu %s ",
+ v->version,
+ v->data_dev->name,
+ v->hash_dev->name,
+ 1 << v->data_dev_block_bits,
+ 1 << v->hash_dev_block_bits,
+ (unsigned long long)v->data_blocks,
+ (unsigned long long)v->hash_start,
+ v->alg_name
+ );
+ for (x = 0; x < v->digest_size; x++)
+ DMEMIT("%02x", v->root_digest[x]);
+ DMEMIT(" ");
+ if (!v->salt_size)
+ DMEMIT("-");
+ else
+ for (x = 0; x < v->salt_size; x++)
+ DMEMIT("%02x", v->salt[x]);
+ if (v->mode != DM_VERITY_MODE_EIO)
+ args++;
+ if (verity_fec_is_enabled(v))
+ args += DM_VERITY_OPTS_FEC;
+ if (v->zero_digest)
+ args++;
+ if (!args)
+ return;
+ DMEMIT(" %u", args);
+ if (v->mode != DM_VERITY_MODE_EIO) {
+ DMEMIT(" ");
+ switch (v->mode) {
+ case DM_VERITY_MODE_LOGGING:
+ DMEMIT(DM_VERITY_OPT_LOGGING);
+ break;
+ case DM_VERITY_MODE_RESTART:
+ DMEMIT(DM_VERITY_OPT_RESTART);
+ break;
+ default:
+ BUG();
+ }
+ }
+ if (v->zero_digest)
+ DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES);
+ sz = verity_fec_status_table(v, sz, result, maxlen);
+ break;
+ }
+}
+
+static int verity_prepare_ioctl(struct dm_target *ti,
+ struct block_device **bdev, fmode_t *mode)
+{
+ struct dm_verity *v = ti->private;
+
+ *bdev = v->data_dev->bdev;
+
+ if (v->data_start ||
+ ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
+ return 1;
+ return 0;
+}
+
+static int verity_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct dm_verity *v = ti->private;
+
+ return fn(ti, v->data_dev, v->data_start, ti->len, data);
+}
+
+static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+ struct dm_verity *v = ti->private;
+
+ if (limits->logical_block_size < 1 << v->data_dev_block_bits)
+ limits->logical_block_size = 1 << v->data_dev_block_bits;
+
+ if (limits->physical_block_size < 1 << v->data_dev_block_bits)
+ limits->physical_block_size = 1 << v->data_dev_block_bits;
+
+ blk_limits_io_min(limits, limits->logical_block_size);
+}
+
+static void verity_dtr(struct dm_target *ti)
+{
+ struct dm_verity *v = ti->private;
+
+ if (v->verify_wq)
+ destroy_workqueue(v->verify_wq);
+
+ if (v->bufio)
+ dm_bufio_client_destroy(v->bufio);
+
+ kfree(v->salt);
+ kfree(v->root_digest);
+ kfree(v->zero_digest);
+
+ if (v->tfm)
+ crypto_free_shash(v->tfm);
+
+ kfree(v->alg_name);
+
+ if (v->hash_dev)
+ dm_put_device(ti, v->hash_dev);
+
+ if (v->data_dev)
+ dm_put_device(ti, v->data_dev);
+
+ verity_fec_dtr(v);
+
+ kfree(v);
+}
+
+static int verity_alloc_zero_digest(struct dm_verity *v)
+{
+ int r = -ENOMEM;
+ struct shash_desc *desc;
+ u8 *zero_data;
+
+ v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
+
+ if (!v->zero_digest)
+ return r;
+
+ desc = kmalloc(v->shash_descsize, GFP_KERNEL);
+
+ if (!desc)
+ return r; /* verity_dtr will free zero_digest */
+
+ zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
+
+ if (!zero_data)
+ goto out;
+
+ r = verity_hash(v, desc, zero_data, 1 << v->data_dev_block_bits,
+ v->zero_digest);
+
+out:
+ kfree(desc);
+ kfree(zero_data);
+
+ return r;
+}
+
+static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
+{
+ int r;
+ unsigned argc;
+ struct dm_target *ti = v->ti;
+ const char *arg_name;
+
+ static struct dm_arg _args[] = {
+ {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"},
+ };
+
+ r = dm_read_arg_group(_args, as, &argc, &ti->error);
+ if (r)
+ return -EINVAL;
+
+ if (!argc)
+ return 0;
+
+ do {
+ arg_name = dm_shift_arg(as);
+ argc--;
+
+ if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) {
+ v->mode = DM_VERITY_MODE_LOGGING;
+ continue;
+
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) {
+ v->mode = DM_VERITY_MODE_RESTART;
+ continue;
+
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) {
+ r = verity_alloc_zero_digest(v);
+ if (r) {
+ ti->error = "Cannot allocate zero digest";
+ return r;
+ }
+ continue;
+
+ } else if (verity_is_fec_opt_arg(arg_name)) {
+ r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
+ if (r)
+ return r;
+ continue;
+ }
+
+ ti->error = "Unrecognized verity feature request";
+ return -EINVAL;
+ } while (argc && !r);
+
+ return r;
+}
+
+/*
+ * Target parameters:
+ * <version> The current format is version 1.
+ * Vsn 0 is compatible with original Chromium OS releases.
+ * <data device>
+ * <hash device>
+ * <data block size>
+ * <hash block size>
+ * <the number of data blocks>
+ * <hash start block>
+ * <algorithm>
+ * <digest>
+ * <salt> Hex string or "-" if no salt.
+ */
+static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+ struct dm_verity *v;
+ struct dm_arg_set as;
+ unsigned int num;
+ unsigned long long num_ll;
+ int r;
+ int i;
+ sector_t hash_position;
+ char dummy;
+
+ v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
+ if (!v) {
+ ti->error = "Cannot allocate verity structure";
+ return -ENOMEM;
+ }
+ ti->private = v;
+ v->ti = ti;
+
+ r = verity_fec_ctr_alloc(v);
+ if (r)
+ goto bad;
+
+ if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
+ ti->error = "Device must be readonly";
+ r = -EINVAL;
+ goto bad;
+ }
+
+ if (argc < 10) {
+ ti->error = "Not enough arguments";
+ r = -EINVAL;
+ goto bad;
+ }
+
+ if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 ||
+ num > 1) {
+ ti->error = "Invalid version";
+ r = -EINVAL;
+ goto bad;
+ }
+ v->version = num;
+
+ r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
+ if (r) {
+ ti->error = "Data device lookup failed";
+ goto bad;
+ }
+
+ r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
+ if (r) {
+ ti->error = "Data device lookup failed";
+ goto bad;
+ }
+
+ if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
+ !num || (num & (num - 1)) ||
+ num < bdev_logical_block_size(v->data_dev->bdev) ||
+ num > PAGE_SIZE) {
+ ti->error = "Invalid data device block size";
+ r = -EINVAL;
+ goto bad;
+ }
+ v->data_dev_block_bits = __ffs(num);
+
+ if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
+ !num || (num & (num - 1)) ||
+ num < bdev_logical_block_size(v->hash_dev->bdev) ||
+ num > INT_MAX) {
+ ti->error = "Invalid hash device block size";
+ r = -EINVAL;
+ goto bad;
+ }
+ v->hash_dev_block_bits = __ffs(num);
+
+ if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
+ (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+ ti->error = "Invalid data blocks";
+ r = -EINVAL;
+ goto bad;
+ }
+ v->data_blocks = num_ll;
+
+ if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
+ ti->error = "Data device is too small";
+ r = -EINVAL;
+ goto bad;
+ }
+
+ if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
+ (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
+ >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+ ti->error = "Invalid hash start";
+ r = -EINVAL;
+ goto bad;
+ }
+ v->hash_start = num_ll;
+
+ v->alg_name = kstrdup(argv[7], GFP_KERNEL);
+ if (!v->alg_name) {
+ ti->error = "Cannot allocate algorithm name";
+ r = -ENOMEM;
+ goto bad;
+ }
+
+ v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
+ if (IS_ERR(v->tfm)) {
+ ti->error = "Cannot initialize hash function";
+ r = PTR_ERR(v->tfm);
+ v->tfm = NULL;
+ goto bad;
+ }
+ v->digest_size = crypto_shash_digestsize(v->tfm);
+ if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
+ ti->error = "Digest size too big";
+ r = -EINVAL;
+ goto bad;
+ }
+ v->shash_descsize =
+ sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
+
+ v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
+ if (!v->root_digest) {
+ ti->error = "Cannot allocate root digest";
+ r = -ENOMEM;
+ goto bad;
+ }
+ if (strlen(argv[8]) != v->digest_size * 2 ||
+ hex2bin(v->root_digest, argv[8], v->digest_size)) {
+ ti->error = "Invalid root digest";
+ r = -EINVAL;
+ goto bad;
+ }
+
+ if (strcmp(argv[9], "-")) {
+ v->salt_size = strlen(argv[9]) / 2;
+ v->salt = kmalloc(v->salt_size, GFP_KERNEL);
+ if (!v->salt) {
+ ti->error = "Cannot allocate salt";
+ r = -ENOMEM;
+ goto bad;
+ }
+ if (strlen(argv[9]) != v->salt_size * 2 ||
+ hex2bin(v->salt, argv[9], v->salt_size)) {
+ ti->error = "Invalid salt";
+ r = -EINVAL;
+ goto bad;
+ }
+ }
+
+ argv += 10;
+ argc -= 10;
+
+ /* Optional parameters */
+ if (argc) {
+ as.argc = argc;
+ as.argv = argv;
+
+ r = verity_parse_opt_args(&as, v);
+ if (r < 0)
+ goto bad;
+ }
+
+ v->hash_per_block_bits =
+ __fls((1 << v->hash_dev_block_bits) / v->digest_size);
+
+ v->levels = 0;
+ if (v->data_blocks)
+ while (v->hash_per_block_bits * v->levels < 64 &&
+ (unsigned long long)(v->data_blocks - 1) >>
+ (v->hash_per_block_bits * v->levels))
+ v->levels++;
+
+ if (v->levels > DM_VERITY_MAX_LEVELS) {
+ ti->error = "Too many tree levels";
+ r = -E2BIG;
+ goto bad;
+ }
+
+ hash_position = v->hash_start;
+ for (i = v->levels - 1; i >= 0; i--) {
+ sector_t s;
+ v->hash_level_block[i] = hash_position;
+ s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1)
+ >> ((i + 1) * v->hash_per_block_bits);
+ if (hash_position + s < hash_position) {
+ ti->error = "Hash device offset overflow";
+ r = -E2BIG;
+ goto bad;
+ }
+ hash_position += s;
+ }
+ v->hash_blocks = hash_position;
+
+ v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
+ 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
+ dm_bufio_alloc_callback, NULL);
+ if (IS_ERR(v->bufio)) {
+ ti->error = "Cannot initialize dm-bufio";
+ r = PTR_ERR(v->bufio);
+ v->bufio = NULL;
+ goto bad;
+ }
+
+ if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
+ ti->error = "Hash device is too small";
+ r = -E2BIG;
+ goto bad;
+ }
+
+ /* WQ_UNBOUND greatly improves performance when running on ramdisk */
+ v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
+ if (!v->verify_wq) {
+ ti->error = "Cannot allocate workqueue";
+ r = -ENOMEM;
+ goto bad;
+ }
+
+ ti->per_bio_data_size = sizeof(struct dm_verity_io) +
+ v->shash_descsize + v->digest_size * 2;
+
+ r = verity_fec_ctr(v);
+ if (r)
+ goto bad;
+
+ ti->per_bio_data_size = roundup(ti->per_bio_data_size,
+ __alignof__(struct dm_verity_io));
+
+ return 0;
+
+bad:
+ verity_dtr(ti);
+
+ return r;
+}
+
+static struct target_type verity_target = {
+ .name = "verity",
+ .version = {1, 3, 0},
+ .module = THIS_MODULE,
+ .ctr = verity_ctr,
+ .dtr = verity_dtr,
+ .map = verity_map,
+ .status = verity_status,
+ .prepare_ioctl = verity_prepare_ioctl,
+ .iterate_devices = verity_iterate_devices,
+ .io_hints = verity_io_hints,
+};
+
+static int __init dm_verity_init(void)
+{
+ int r;
+
+ r = dm_register_target(&verity_target);
+ if (r < 0)
+ DMERR("register failed %d", r);
+
+ return r;
+}
+
+static void __exit dm_verity_exit(void)
+{
+ dm_unregister_target(&verity_target);
+}
+
+module_init(dm_verity_init);
+module_exit(dm_verity_exit);
+
+MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
+MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
+MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
+MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
+MODULE_LICENSE("GPL");
+++ /dev/null
-/*
- * Copyright (C) 2012 Red Hat, Inc.
- *
- * Author: Mikulas Patocka <mpatocka@redhat.com>
- *
- * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
- *
- * This file is released under the GPLv2.
- *
- * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
- * default prefetch value. Data are read in "prefetch_cluster" chunks from the
- * hash device. Setting this greatly improves performance when data and hash
- * are on the same disk on different partitions on devices with poor random
- * access behavior.
- */
-
-#include "dm-bufio.h"
-
-#include <linux/module.h>
-#include <linux/device-mapper.h>
-#include <linux/reboot.h>
-#include <crypto/hash.h>
-
-#define DM_MSG_PREFIX "verity"
-
-#define DM_VERITY_ENV_LENGTH 42
-#define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR"
-
-#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
-
-#define DM_VERITY_MAX_LEVELS 63
-#define DM_VERITY_MAX_CORRUPTED_ERRS 100
-
-#define DM_VERITY_OPT_LOGGING "ignore_corruption"
-#define DM_VERITY_OPT_RESTART "restart_on_corruption"
-
-static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
-
-module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
-
-enum verity_mode {
- DM_VERITY_MODE_EIO,
- DM_VERITY_MODE_LOGGING,
- DM_VERITY_MODE_RESTART
-};
-
-enum verity_block_type {
- DM_VERITY_BLOCK_TYPE_DATA,
- DM_VERITY_BLOCK_TYPE_METADATA
-};
-
-struct dm_verity {
- struct dm_dev *data_dev;
- struct dm_dev *hash_dev;
- struct dm_target *ti;
- struct dm_bufio_client *bufio;
- char *alg_name;
- struct crypto_shash *tfm;
- u8 *root_digest; /* digest of the root block */
- u8 *salt; /* salt: its size is salt_size */
- unsigned salt_size;
- sector_t data_start; /* data offset in 512-byte sectors */
- sector_t hash_start; /* hash start in blocks */
- sector_t data_blocks; /* the number of data blocks */
- sector_t hash_blocks; /* the number of hash blocks */
- unsigned char data_dev_block_bits; /* log2(data blocksize) */
- unsigned char hash_dev_block_bits; /* log2(hash blocksize) */
- unsigned char hash_per_block_bits; /* log2(hashes in hash block) */
- unsigned char levels; /* the number of tree levels */
- unsigned char version;
- unsigned digest_size; /* digest size for the current hash algorithm */
- unsigned shash_descsize;/* the size of temporary space for crypto */
- int hash_failed; /* set to 1 if hash of any block failed */
- enum verity_mode mode; /* mode for handling verification errors */
- unsigned corrupted_errs;/* Number of errors for corrupted blocks */
-
- struct workqueue_struct *verify_wq;
-
- /* starting blocks for each tree level. 0 is the lowest level. */
- sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
-};
-
-struct dm_verity_io {
- struct dm_verity *v;
-
- /* original values of bio->bi_end_io and bio->bi_private */
- bio_end_io_t *orig_bi_end_io;
- void *orig_bi_private;
-
- sector_t block;
- unsigned n_blocks;
-
- struct bvec_iter iter;
-
- struct work_struct work;
-
- /*
- * Three variably-size fields follow this struct:
- *
- * u8 hash_desc[v->shash_descsize];
- * u8 real_digest[v->digest_size];
- * u8 want_digest[v->digest_size];
- *
- * To access them use: io_hash_desc(), io_real_digest() and io_want_digest().
- */
-};
-
-struct dm_verity_prefetch_work {
- struct work_struct work;
- struct dm_verity *v;
- sector_t block;
- unsigned n_blocks;
-};
-
-static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
-{
- return (struct shash_desc *)(io + 1);
-}
-
-static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
- return (u8 *)(io + 1) + v->shash_descsize;
-}
-
-static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
- return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
-}
-
-/*
- * Auxiliary structure appended to each dm-bufio buffer. If the value
- * hash_verified is nonzero, hash of the block has been verified.
- *
- * The variable hash_verified is set to 0 when allocating the buffer, then
- * it can be changed to 1 and it is never reset to 0 again.
- *
- * There is no lock around this value, a race condition can at worst cause
- * that multiple processes verify the hash of the same buffer simultaneously
- * and write 1 to hash_verified simultaneously.
- * This condition is harmless, so we don't need locking.
- */
-struct buffer_aux {
- int hash_verified;
-};
-
-/*
- * Initialize struct buffer_aux for a freshly created buffer.
- */
-static void dm_bufio_alloc_callback(struct dm_buffer *buf)
-{
- struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
-
- aux->hash_verified = 0;
-}
-
-/*
- * Translate input sector number to the sector number on the target device.
- */
-static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
-{
- return v->data_start + dm_target_offset(v->ti, bi_sector);
-}
-
-/*
- * Return hash position of a specified block at a specified tree level
- * (0 is the lowest level).
- * The lowest "hash_per_block_bits"-bits of the result denote hash position
- * inside a hash block. The remaining bits denote location of the hash block.
- */
-static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
- int level)
-{
- return block >> (level * v->hash_per_block_bits);
-}
-
-static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
- sector_t *hash_block, unsigned *offset)
-{
- sector_t position = verity_position_at_level(v, block, level);
- unsigned idx;
-
- *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
-
- if (!offset)
- return;
-
- idx = position & ((1 << v->hash_per_block_bits) - 1);
- if (!v->version)
- *offset = idx * v->digest_size;
- else
- *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
-}
-
-/*
- * Handle verification errors.
- */
-static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
- unsigned long long block)
-{
- char verity_env[DM_VERITY_ENV_LENGTH];
- char *envp[] = { verity_env, NULL };
- const char *type_str = "";
- struct mapped_device *md = dm_table_get_md(v->ti->table);
-
- /* Corruption should be visible in device status in all modes */
- v->hash_failed = 1;
-
- if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS)
- goto out;
-
- v->corrupted_errs++;
-
- switch (type) {
- case DM_VERITY_BLOCK_TYPE_DATA:
- type_str = "data";
- break;
- case DM_VERITY_BLOCK_TYPE_METADATA:
- type_str = "metadata";
- break;
- default:
- BUG();
- }
-
- DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str,
- block);
-
- if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
- DMERR("%s: reached maximum errors", v->data_dev->name);
-
- snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu",
- DM_VERITY_ENV_VAR_NAME, type, block);
-
- kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp);
-
-out:
- if (v->mode == DM_VERITY_MODE_LOGGING)
- return 0;
-
- if (v->mode == DM_VERITY_MODE_RESTART)
- kernel_restart("dm-verity device corrupted");
-
- return 1;
-}
-
-/*
- * Verify hash of a metadata block pertaining to the specified data block
- * ("block" argument) at a specified level ("level" argument).
- *
- * On successful return, io_want_digest(v, io) contains the hash value for
- * a lower tree level or for the data block (if we're at the lowest leve).
- *
- * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
- * If "skip_unverified" is false, unverified buffer is hashed and verified
- * against current value of io_want_digest(v, io).
- */
-static int verity_verify_level(struct dm_verity_io *io, sector_t block,
- int level, bool skip_unverified)
-{
- struct dm_verity *v = io->v;
- struct dm_buffer *buf;
- struct buffer_aux *aux;
- u8 *data;
- int r;
- sector_t hash_block;
- unsigned offset;
-
- verity_hash_at_level(v, block, level, &hash_block, &offset);
-
- data = dm_bufio_read(v->bufio, hash_block, &buf);
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- aux = dm_bufio_get_aux_data(buf);
-
- if (!aux->hash_verified) {
- struct shash_desc *desc;
- u8 *result;
-
- if (skip_unverified) {
- r = 1;
- goto release_ret_r;
- }
-
- desc = io_hash_desc(v, io);
- desc->tfm = v->tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- r = crypto_shash_init(desc);
- if (r < 0) {
- DMERR("crypto_shash_init failed: %d", r);
- goto release_ret_r;
- }
-
- if (likely(v->version >= 1)) {
- r = crypto_shash_update(desc, v->salt, v->salt_size);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- goto release_ret_r;
- }
- }
-
- r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- goto release_ret_r;
- }
-
- if (!v->version) {
- r = crypto_shash_update(desc, v->salt, v->salt_size);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- goto release_ret_r;
- }
- }
-
- result = io_real_digest(v, io);
- r = crypto_shash_final(desc, result);
- if (r < 0) {
- DMERR("crypto_shash_final failed: %d", r);
- goto release_ret_r;
- }
- if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
- if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA,
- hash_block)) {
- r = -EIO;
- goto release_ret_r;
- }
- } else
- aux->hash_verified = 1;
- }
-
- data += offset;
-
- memcpy(io_want_digest(v, io), data, v->digest_size);
-
- dm_bufio_release(buf);
- return 0;
-
-release_ret_r:
- dm_bufio_release(buf);
-
- return r;
-}
-
-/*
- * Verify one "dm_verity_io" structure.
- */
-static int verity_verify_io(struct dm_verity_io *io)
-{
- struct dm_verity *v = io->v;
- struct bio *bio = dm_bio_from_per_bio_data(io,
- v->ti->per_bio_data_size);
- unsigned b;
- int i;
-
- for (b = 0; b < io->n_blocks; b++) {
- struct shash_desc *desc;
- u8 *result;
- int r;
- unsigned todo;
-
- if (likely(v->levels)) {
- /*
- * First, we try to get the requested hash for
- * the current block. If the hash block itself is
- * verified, zero is returned. If it isn't, this
- * function returns 0 and we fall back to whole
- * chain verification.
- */
- int r = verity_verify_level(io, io->block + b, 0, true);
- if (likely(!r))
- goto test_block_hash;
- if (r < 0)
- return r;
- }
-
- memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
-
- for (i = v->levels - 1; i >= 0; i--) {
- int r = verity_verify_level(io, io->block + b, i, false);
- if (unlikely(r))
- return r;
- }
-
-test_block_hash:
- desc = io_hash_desc(v, io);
- desc->tfm = v->tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- r = crypto_shash_init(desc);
- if (r < 0) {
- DMERR("crypto_shash_init failed: %d", r);
- return r;
- }
-
- if (likely(v->version >= 1)) {
- r = crypto_shash_update(desc, v->salt, v->salt_size);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- return r;
- }
- }
- todo = 1 << v->data_dev_block_bits;
- do {
- u8 *page;
- unsigned len;
- struct bio_vec bv = bio_iter_iovec(bio, io->iter);
-
- page = kmap_atomic(bv.bv_page);
- len = bv.bv_len;
- if (likely(len >= todo))
- len = todo;
- r = crypto_shash_update(desc, page + bv.bv_offset, len);
- kunmap_atomic(page);
-
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- return r;
- }
-
- bio_advance_iter(bio, &io->iter, len);
- todo -= len;
- } while (todo);
-
- if (!v->version) {
- r = crypto_shash_update(desc, v->salt, v->salt_size);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- return r;
- }
- }
-
- result = io_real_digest(v, io);
- r = crypto_shash_final(desc, result);
- if (r < 0) {
- DMERR("crypto_shash_final failed: %d", r);
- return r;
- }
- if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
- if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
- io->block + b))
- return -EIO;
- }
- }
-
- return 0;
-}
-
-/*
- * End one "io" structure with a given error.
- */
-static void verity_finish_io(struct dm_verity_io *io, int error)
-{
- struct dm_verity *v = io->v;
- struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
-
- bio->bi_end_io = io->orig_bi_end_io;
- bio->bi_private = io->orig_bi_private;
- bio->bi_error = error;
-
- bio_endio(bio);
-}
-
-static void verity_work(struct work_struct *w)
-{
- struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
-
- verity_finish_io(io, verity_verify_io(io));
-}
-
-static void verity_end_io(struct bio *bio)
-{
- struct dm_verity_io *io = bio->bi_private;
-
- if (bio->bi_error) {
- verity_finish_io(io, bio->bi_error);
- return;
- }
-
- INIT_WORK(&io->work, verity_work);
- queue_work(io->v->verify_wq, &io->work);
-}
-
-/*
- * Prefetch buffers for the specified io.
- * The root buffer is not prefetched, it is assumed that it will be cached
- * all the time.
- */
-static void verity_prefetch_io(struct work_struct *work)
-{
- struct dm_verity_prefetch_work *pw =
- container_of(work, struct dm_verity_prefetch_work, work);
- struct dm_verity *v = pw->v;
- int i;
-
- for (i = v->levels - 2; i >= 0; i--) {
- sector_t hash_block_start;
- sector_t hash_block_end;
- verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
- verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
- if (!i) {
- unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
-
- cluster >>= v->data_dev_block_bits;
- if (unlikely(!cluster))
- goto no_prefetch_cluster;
-
- if (unlikely(cluster & (cluster - 1)))
- cluster = 1 << __fls(cluster);
-
- hash_block_start &= ~(sector_t)(cluster - 1);
- hash_block_end |= cluster - 1;
- if (unlikely(hash_block_end >= v->hash_blocks))
- hash_block_end = v->hash_blocks - 1;
- }
-no_prefetch_cluster:
- dm_bufio_prefetch(v->bufio, hash_block_start,
- hash_block_end - hash_block_start + 1);
- }
-
- kfree(pw);
-}
-
-static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
-{
- struct dm_verity_prefetch_work *pw;
-
- pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
- GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-
- if (!pw)
- return;
-
- INIT_WORK(&pw->work, verity_prefetch_io);
- pw->v = v;
- pw->block = io->block;
- pw->n_blocks = io->n_blocks;
- queue_work(v->verify_wq, &pw->work);
-}
-
-/*
- * Bio map function. It allocates dm_verity_io structure and bio vector and
- * fills them. Then it issues prefetches and the I/O.
- */
-static int verity_map(struct dm_target *ti, struct bio *bio)
-{
- struct dm_verity *v = ti->private;
- struct dm_verity_io *io;
-
- bio->bi_bdev = v->data_dev->bdev;
- bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector);
-
- if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
- ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
- DMERR_LIMIT("unaligned io");
- return -EIO;
- }
-
- if (bio_end_sector(bio) >>
- (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
- DMERR_LIMIT("io out of range");
- return -EIO;
- }
-
- if (bio_data_dir(bio) == WRITE)
- return -EIO;
-
- io = dm_per_bio_data(bio, ti->per_bio_data_size);
- io->v = v;
- io->orig_bi_end_io = bio->bi_end_io;
- io->orig_bi_private = bio->bi_private;
- io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
- io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
-
- bio->bi_end_io = verity_end_io;
- bio->bi_private = io;
- io->iter = bio->bi_iter;
-
- verity_submit_prefetch(v, io);
-
- generic_make_request(bio);
-
- return DM_MAPIO_SUBMITTED;
-}
-
-/*
- * Status: V (valid) or C (corruption found)
- */
-static void verity_status(struct dm_target *ti, status_type_t type,
- unsigned status_flags, char *result, unsigned maxlen)
-{
- struct dm_verity *v = ti->private;
- unsigned sz = 0;
- unsigned x;
-
- switch (type) {
- case STATUSTYPE_INFO:
- DMEMIT("%c", v->hash_failed ? 'C' : 'V');
- break;
- case STATUSTYPE_TABLE:
- DMEMIT("%u %s %s %u %u %llu %llu %s ",
- v->version,
- v->data_dev->name,
- v->hash_dev->name,
- 1 << v->data_dev_block_bits,
- 1 << v->hash_dev_block_bits,
- (unsigned long long)v->data_blocks,
- (unsigned long long)v->hash_start,
- v->alg_name
- );
- for (x = 0; x < v->digest_size; x++)
- DMEMIT("%02x", v->root_digest[x]);
- DMEMIT(" ");
- if (!v->salt_size)
- DMEMIT("-");
- else
- for (x = 0; x < v->salt_size; x++)
- DMEMIT("%02x", v->salt[x]);
- if (v->mode != DM_VERITY_MODE_EIO) {
- DMEMIT(" 1 ");
- switch (v->mode) {
- case DM_VERITY_MODE_LOGGING:
- DMEMIT(DM_VERITY_OPT_LOGGING);
- break;
- case DM_VERITY_MODE_RESTART:
- DMEMIT(DM_VERITY_OPT_RESTART);
- break;
- default:
- BUG();
- }
- }
- break;
- }
-}
-
-static int verity_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
-{
- struct dm_verity *v = ti->private;
-
- *bdev = v->data_dev->bdev;
-
- if (v->data_start ||
- ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
- return 1;
- return 0;
-}
-
-static int verity_iterate_devices(struct dm_target *ti,
- iterate_devices_callout_fn fn, void *data)
-{
- struct dm_verity *v = ti->private;
-
- return fn(ti, v->data_dev, v->data_start, ti->len, data);
-}
-
-static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
-{
- struct dm_verity *v = ti->private;
-
- if (limits->logical_block_size < 1 << v->data_dev_block_bits)
- limits->logical_block_size = 1 << v->data_dev_block_bits;
-
- if (limits->physical_block_size < 1 << v->data_dev_block_bits)
- limits->physical_block_size = 1 << v->data_dev_block_bits;
-
- blk_limits_io_min(limits, limits->logical_block_size);
-}
-
-static void verity_dtr(struct dm_target *ti)
-{
- struct dm_verity *v = ti->private;
-
- if (v->verify_wq)
- destroy_workqueue(v->verify_wq);
-
- if (v->bufio)
- dm_bufio_client_destroy(v->bufio);
-
- kfree(v->salt);
- kfree(v->root_digest);
-
- if (v->tfm)
- crypto_free_shash(v->tfm);
-
- kfree(v->alg_name);
-
- if (v->hash_dev)
- dm_put_device(ti, v->hash_dev);
-
- if (v->data_dev)
- dm_put_device(ti, v->data_dev);
-
- kfree(v);
-}
-
-/*
- * Target parameters:
- * <version> The current format is version 1.
- * Vsn 0 is compatible with original Chromium OS releases.
- * <data device>
- * <hash device>
- * <data block size>
- * <hash block size>
- * <the number of data blocks>
- * <hash start block>
- * <algorithm>
- * <digest>
- * <salt> Hex string or "-" if no salt.
- */
-static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
-{
- struct dm_verity *v;
- struct dm_arg_set as;
- const char *opt_string;
- unsigned int num, opt_params;
- unsigned long long num_ll;
- int r;
- int i;
- sector_t hash_position;
- char dummy;
-
- static struct dm_arg _args[] = {
- {0, 1, "Invalid number of feature args"},
- };
-
- v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
- if (!v) {
- ti->error = "Cannot allocate verity structure";
- return -ENOMEM;
- }
- ti->private = v;
- v->ti = ti;
-
- if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
- ti->error = "Device must be readonly";
- r = -EINVAL;
- goto bad;
- }
-
- if (argc < 10) {
- ti->error = "Not enough arguments";
- r = -EINVAL;
- goto bad;
- }
-
- if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 ||
- num > 1) {
- ti->error = "Invalid version";
- r = -EINVAL;
- goto bad;
- }
- v->version = num;
-
- r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
- if (r) {
- ti->error = "Data device lookup failed";
- goto bad;
- }
-
- r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
- if (r) {
- ti->error = "Data device lookup failed";
- goto bad;
- }
-
- if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
- !num || (num & (num - 1)) ||
- num < bdev_logical_block_size(v->data_dev->bdev) ||
- num > PAGE_SIZE) {
- ti->error = "Invalid data device block size";
- r = -EINVAL;
- goto bad;
- }
- v->data_dev_block_bits = __ffs(num);
-
- if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
- !num || (num & (num - 1)) ||
- num < bdev_logical_block_size(v->hash_dev->bdev) ||
- num > INT_MAX) {
- ti->error = "Invalid hash device block size";
- r = -EINVAL;
- goto bad;
- }
- v->hash_dev_block_bits = __ffs(num);
-
- if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
- (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
- >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
- ti->error = "Invalid data blocks";
- r = -EINVAL;
- goto bad;
- }
- v->data_blocks = num_ll;
-
- if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
- ti->error = "Data device is too small";
- r = -EINVAL;
- goto bad;
- }
-
- if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
- (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
- >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
- ti->error = "Invalid hash start";
- r = -EINVAL;
- goto bad;
- }
- v->hash_start = num_ll;
-
- v->alg_name = kstrdup(argv[7], GFP_KERNEL);
- if (!v->alg_name) {
- ti->error = "Cannot allocate algorithm name";
- r = -ENOMEM;
- goto bad;
- }
-
- v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
- if (IS_ERR(v->tfm)) {
- ti->error = "Cannot initialize hash function";
- r = PTR_ERR(v->tfm);
- v->tfm = NULL;
- goto bad;
- }
- v->digest_size = crypto_shash_digestsize(v->tfm);
- if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
- ti->error = "Digest size too big";
- r = -EINVAL;
- goto bad;
- }
- v->shash_descsize =
- sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
-
- v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
- if (!v->root_digest) {
- ti->error = "Cannot allocate root digest";
- r = -ENOMEM;
- goto bad;
- }
- if (strlen(argv[8]) != v->digest_size * 2 ||
- hex2bin(v->root_digest, argv[8], v->digest_size)) {
- ti->error = "Invalid root digest";
- r = -EINVAL;
- goto bad;
- }
-
- if (strcmp(argv[9], "-")) {
- v->salt_size = strlen(argv[9]) / 2;
- v->salt = kmalloc(v->salt_size, GFP_KERNEL);
- if (!v->salt) {
- ti->error = "Cannot allocate salt";
- r = -ENOMEM;
- goto bad;
- }
- if (strlen(argv[9]) != v->salt_size * 2 ||
- hex2bin(v->salt, argv[9], v->salt_size)) {
- ti->error = "Invalid salt";
- r = -EINVAL;
- goto bad;
- }
- }
-
- argv += 10;
- argc -= 10;
-
- /* Optional parameters */
- if (argc) {
- as.argc = argc;
- as.argv = argv;
-
- r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
- if (r)
- goto bad;
-
- while (opt_params) {
- opt_params--;
- opt_string = dm_shift_arg(&as);
- if (!opt_string) {
- ti->error = "Not enough feature arguments";
- r = -EINVAL;
- goto bad;
- }
-
- if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING))
- v->mode = DM_VERITY_MODE_LOGGING;
- else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART))
- v->mode = DM_VERITY_MODE_RESTART;
- else {
- ti->error = "Invalid feature arguments";
- r = -EINVAL;
- goto bad;
- }
- }
- }
-
- v->hash_per_block_bits =
- __fls((1 << v->hash_dev_block_bits) / v->digest_size);
-
- v->levels = 0;
- if (v->data_blocks)
- while (v->hash_per_block_bits * v->levels < 64 &&
- (unsigned long long)(v->data_blocks - 1) >>
- (v->hash_per_block_bits * v->levels))
- v->levels++;
-
- if (v->levels > DM_VERITY_MAX_LEVELS) {
- ti->error = "Too many tree levels";
- r = -E2BIG;
- goto bad;
- }
-
- hash_position = v->hash_start;
- for (i = v->levels - 1; i >= 0; i--) {
- sector_t s;
- v->hash_level_block[i] = hash_position;
- s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1)
- >> ((i + 1) * v->hash_per_block_bits);
- if (hash_position + s < hash_position) {
- ti->error = "Hash device offset overflow";
- r = -E2BIG;
- goto bad;
- }
- hash_position += s;
- }
- v->hash_blocks = hash_position;
-
- v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
- 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
- dm_bufio_alloc_callback, NULL);
- if (IS_ERR(v->bufio)) {
- ti->error = "Cannot initialize dm-bufio";
- r = PTR_ERR(v->bufio);
- v->bufio = NULL;
- goto bad;
- }
-
- if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
- ti->error = "Hash device is too small";
- r = -E2BIG;
- goto bad;
- }
-
- ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io));
-
- /* WQ_UNBOUND greatly improves performance when running on ramdisk */
- v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
- if (!v->verify_wq) {
- ti->error = "Cannot allocate workqueue";
- r = -ENOMEM;
- goto bad;
- }
-
- return 0;
-
-bad:
- verity_dtr(ti);
-
- return r;
-}
-
-static struct target_type verity_target = {
- .name = "verity",
- .version = {1, 2, 0},
- .module = THIS_MODULE,
- .ctr = verity_ctr,
- .dtr = verity_dtr,
- .map = verity_map,
- .status = verity_status,
- .prepare_ioctl = verity_prepare_ioctl,
- .iterate_devices = verity_iterate_devices,
- .io_hints = verity_io_hints,
-};
-
-static int __init dm_verity_init(void)
-{
- int r;
-
- r = dm_register_target(&verity_target);
- if (r < 0)
- DMERR("register failed %d", r);
-
- return r;
-}
-
-static void __exit dm_verity_exit(void)
-{
- dm_unregister_target(&verity_target);
-}
-
-module_init(dm_verity_init);
-module_exit(dm_verity_exit);
-
-MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
-MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
-MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
-MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
-MODULE_LICENSE("GPL");
--- /dev/null
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
+ *
+ * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef DM_VERITY_H
+#define DM_VERITY_H
+
+#include "dm-bufio.h"
+#include <linux/device-mapper.h>
+#include <crypto/hash.h>
+
+#define DM_VERITY_MAX_LEVELS 63
+
+enum verity_mode {
+ DM_VERITY_MODE_EIO,
+ DM_VERITY_MODE_LOGGING,
+ DM_VERITY_MODE_RESTART
+};
+
+enum verity_block_type {
+ DM_VERITY_BLOCK_TYPE_DATA,
+ DM_VERITY_BLOCK_TYPE_METADATA
+};
+
+struct dm_verity_fec;
+
+struct dm_verity {
+ struct dm_dev *data_dev;
+ struct dm_dev *hash_dev;
+ struct dm_target *ti;
+ struct dm_bufio_client *bufio;
+ char *alg_name;
+ struct crypto_shash *tfm;
+ u8 *root_digest; /* digest of the root block */
+ u8 *salt; /* salt: its size is salt_size */
+ u8 *zero_digest; /* digest for a zero block */
+ unsigned salt_size;
+ sector_t data_start; /* data offset in 512-byte sectors */
+ sector_t hash_start; /* hash start in blocks */
+ sector_t data_blocks; /* the number of data blocks */
+ sector_t hash_blocks; /* the number of hash blocks */
+ unsigned char data_dev_block_bits; /* log2(data blocksize) */
+ unsigned char hash_dev_block_bits; /* log2(hash blocksize) */
+ unsigned char hash_per_block_bits; /* log2(hashes in hash block) */
+ unsigned char levels; /* the number of tree levels */
+ unsigned char version;
+ unsigned digest_size; /* digest size for the current hash algorithm */
+ unsigned shash_descsize;/* the size of temporary space for crypto */
+ int hash_failed; /* set to 1 if hash of any block failed */
+ enum verity_mode mode; /* mode for handling verification errors */
+ unsigned corrupted_errs;/* Number of errors for corrupted blocks */
+
+ struct workqueue_struct *verify_wq;
+
+ /* starting blocks for each tree level. 0 is the lowest level. */
+ sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
+
+ struct dm_verity_fec *fec; /* forward error correction */
+};
+
+struct dm_verity_io {
+ struct dm_verity *v;
+
+ /* original value of bio->bi_end_io */
+ bio_end_io_t *orig_bi_end_io;
+
+ sector_t block;
+ unsigned n_blocks;
+
+ struct bvec_iter iter;
+
+ struct work_struct work;
+
+ /*
+ * Three variably-size fields follow this struct:
+ *
+ * u8 hash_desc[v->shash_descsize];
+ * u8 real_digest[v->digest_size];
+ * u8 want_digest[v->digest_size];
+ *
+ * To access them use: verity_io_hash_desc(), verity_io_real_digest()
+ * and verity_io_want_digest().
+ */
+};
+
+static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ return (struct shash_desc *)(io + 1);
+}
+
+static inline u8 *verity_io_real_digest(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ return (u8 *)(io + 1) + v->shash_descsize;
+}
+
+static inline u8 *verity_io_want_digest(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
+}
+
+static inline u8 *verity_io_digest_end(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ return verity_io_want_digest(v, io) + v->digest_size;
+}
+
+extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+ struct bvec_iter *iter,
+ int (*process)(struct dm_verity *v,
+ struct dm_verity_io *io,
+ u8 *data, size_t len));
+
+extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+ const u8 *data, size_t len, u8 *digest);
+
+extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+ sector_t block, u8 *digest, bool *is_zero);
+
+#endif /* DM_VERITY_H */
Library providing immutable on-disk data structure support for
device-mapper targets such as the thin provisioning target.
-config DM_DEBUG_BLOCK_STACK_TRACING
- bool "Keep stack trace of persistent data block lock holders"
- depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA
- select STACKTRACE
- ---help---
- Enable this for messages that may help debug problems with the
- block manager locking used by thin provisioning and caching.
-
- If unsure, say N.
static int __check_holder(struct block_lock *lock)
{
unsigned i;
-#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
- static struct stack_trace t;
- static stack_entries entries;
-#endif
for (i = 0; i < MAX_HOLDERS; i++) {
if (lock->holders[i] == current) {
print_stack_trace(lock->traces + i, 4);
DMERR("subsequent acquisition attempted here:");
- t.nr_entries = 0;
- t.max_entries = MAX_STACK;
- t.entries = entries;
- t.skip = 3;
- save_stack_trace(&t);
- print_stack_trace(&t, 4);
+ dump_stack();
#endif
return -EINVAL;
}
return 0;
}
+static bool need_insert(struct btree_node *node, uint64_t *keys,
+ unsigned level, unsigned index)
+{
+ return ((index >= le32_to_cpu(node->header.nr_entries)) ||
+ (le64_to_cpu(node->keys[index]) != keys[level]));
+}
+
static int insert(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, void *value, dm_block_t *new_root,
int *inserted)
__dm_written_to_disk(value)
{
- int r, need_insert;
+ int r;
unsigned level, index = -1, last_level = info->levels - 1;
dm_block_t block = root;
struct shadow_spine spine;
goto bad;
n = dm_block_data(shadow_current(&spine));
- need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
- (le64_to_cpu(n->keys[index]) != keys[level]));
- if (need_insert) {
+ if (need_insert(n, keys, level, index)) {
dm_block_t new_tree;
__le64 new_le;
goto bad;
n = dm_block_data(shadow_current(&spine));
- need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
- (le64_to_cpu(n->keys[index]) != keys[level]));
- if (need_insert) {
+ if (need_insert(n, keys, level, index)) {
if (inserted)
*inserted = 1;
static int brb_pop(struct bop_ring_buffer *brb)
{
- struct block_op *bop;
-
if (brb_empty(brb))
return -ENODATA;
- bop = brb->bops + brb->begin;
brb->begin = brb_next(brb, brb->begin);
return 0;