]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge tag 'dm-4.5-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Jan 2016 06:25:00 +0000 (22:25 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Jan 2016 06:25:00 +0000 (22:25 -0800)
Pull device mapper updates from Mike Snitzer:

 - The most significant set of changes this cycle is the Forward Error
   Correction (FEC) support that has been added to the DM verity target.

   Google uses DM verity on all Android devices and it is believed that
   this FEC support will enable DM verity to recover from storage
   failures seen since DM verity was first deployed as part of Android.

 - A stable fix for a race in the destruction of DM thin pool's
   workqueue

 - A stable fix for hung IO if a DM snapshot copy hit an error

 - A few small cleanups in DM core and DM persistent data.

 - A couple DM thinp range discard improvements (address atomicity of
   finding a range and the efficiency of discarding a partially mapped
   thin device)

 - Add ability to debug DM bufio leaks by recording stack trace when a
   buffer is allocated.  Upon detected leak the recorded stack is
   dumped.

* tag 'dm-4.5-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm snapshot: fix hung bios when copy error occurs
  dm thin: bump thin and thin-pool target versions
  dm thin: fix race condition when destroying thin pool workqueue
  dm space map metadata: remove unused variable in brb_pop()
  dm verity: add ignore_zero_blocks feature
  dm verity: add support for forward error correction
  dm verity: factor out verity_for_bv_block()
  dm verity: factor out structures and functions useful to separate object
  dm verity: move dm-verity.c to dm-verity-target.c
  dm verity: separate function for parsing opt args
  dm verity: clean up duplicate hashing code
  dm btree: factor out need_insert() helper
  dm bufio: use BUG_ON instead of conditional call to BUG
  dm bufio: store stacktrace in buffers to help find buffer leaks
  dm bufio: return NULL to improve code clarity
  dm block manager: cleanup code that prints stacktrace
  dm: don't save and restore bi_private
  dm thin metadata: make dm_thin_find_mapped_range() atomic
  dm thin metadata: speed up discard of partially mapped volumes

20 files changed:
Documentation/device-mapper/verity.txt
drivers/md/Kconfig
drivers/md/Makefile
drivers/md/dm-bufio.c
drivers/md/dm-cache-target.c
drivers/md/dm-exception-store.h
drivers/md/dm-snap-persistent.c
drivers/md/dm-snap-transient.c
drivers/md/dm-snap.c
drivers/md/dm-thin-metadata.c
drivers/md/dm-thin.c
drivers/md/dm-verity-fec.c [new file with mode: 0644]
drivers/md/dm-verity-fec.h [new file with mode: 0644]
drivers/md/dm-verity-target.c [new file with mode: 0644]
drivers/md/dm-verity.c [deleted file]
drivers/md/dm-verity.h [new file with mode: 0644]
drivers/md/persistent-data/Kconfig
drivers/md/persistent-data/dm-block-manager.c
drivers/md/persistent-data/dm-btree.c
drivers/md/persistent-data/dm-space-map-metadata.c

index e15bc1a0fb98ab23563681210cc6ed1865234816..89fd8f9a259f69b9c9423da9bb16771ed0596cad 100644 (file)
@@ -18,11 +18,11 @@ Construction Parameters
 
     0 is the original format used in the Chromium OS.
       The salt is appended when hashing, digests are stored continuously and
-      the rest of the block is padded with zeros.
+      the rest of the block is padded with zeroes.
 
     1 is the current format that should be used for new devices.
       The salt is prepended when hashing and each digest is
-      padded with zeros to the power of two.
+      padded with zeroes to the power of two.
 
 <dev>
     This is the device containing data, the integrity of which needs to be
@@ -79,6 +79,37 @@ restart_on_corruption
     not compatible with ignore_corruption and requires user space support to
     avoid restart loops.
 
+ignore_zero_blocks
+    Do not verify blocks that are expected to contain zeroes and always return
+    zeroes instead. This may be useful if the partition contains unused blocks
+    that are not guaranteed to contain zeroes.
+
+use_fec_from_device <fec_dev>
+    Use forward error correction (FEC) to recover from corruption if hash
+    verification fails. Use encoding data from the specified device. This
+    may be the same device where data and hash blocks reside, in which case
+    fec_start must be outside data and hash areas.
+
+    If the encoding data covers additional metadata, it must be accessible
+    on the hash device after the hash blocks.
+
+    Note: block sizes for data and hash devices must match. Also, if the
+    verity <dev> is encrypted the <fec_dev> should be too.
+
+fec_roots <num>
+    Number of generator roots. This equals to the number of parity bytes in
+    the encoding data. For example, in RS(M, N) encoding, the number of roots
+    is M-N.
+
+fec_blocks <num>
+    The number of encoding data blocks on the FEC device. The block size for
+    the FEC device is <data_block_size>.
+
+fec_start <offset>
+    This is the offset, in <data_block_size> blocks, from the start of the
+    FEC device to the beginning of the encoding data.
+
+
 Theory of operation
 ===================
 
@@ -98,6 +129,11 @@ per-block basis. This allows for a lightweight hash computation on first read
 into the page cache. Block hashes are stored linearly, aligned to the nearest
 block size.
 
+If forward error correction (FEC) support is enabled any recovery of
+corrupted data will be verified using the cryptographic hash of the
+corresponding data. This is why combining error correction with
+integrity checking is essential.
+
 Hash Tree
 ---------
 
index 7913fdcfc8496bdd7c719a8c3334f6136c74eced..0a2e7273db9e996ea67d9d461172342aaa65d4f1 100644 (file)
@@ -240,6 +240,15 @@ config DM_BUFIO
         as a cache, holding recently-read blocks in memory and performing
         delayed writes.
 
+config DM_DEBUG_BLOCK_STACK_TRACING
+       bool "Keep stack trace of persistent data block lock holders"
+       depends on STACKTRACE_SUPPORT && DM_BUFIO
+       select STACKTRACE
+       ---help---
+        Enable this for messages that may help debug problems with the
+        block manager locking used by thin provisioning and caching.
+
+        If unsure, say N.
 config DM_BIO_PRISON
        tristate
        depends on BLK_DEV_DM
@@ -458,6 +467,18 @@ config DM_VERITY
 
          If unsure, say N.
 
+config DM_VERITY_FEC
+       bool "Verity forward error correction support"
+       depends on DM_VERITY
+       select REED_SOLOMON
+       select REED_SOLOMON_DEC8
+       ---help---
+         Add forward error correction support to dm-verity. This option
+         makes it possible to use pre-generated error correction data to
+         recover from corrupted blocks.
+
+         If unsure, say N.
+
 config DM_SWITCH
        tristate "Switch target support (EXPERIMENTAL)"
        depends on BLK_DEV_DM
index f34979cd141aed02d867f11f3f03c9403d327005..62a65764e8e0f093cede342f0eb4b7d0aec2eb4e 100644 (file)
@@ -16,6 +16,7 @@ dm-cache-mq-y   += dm-cache-policy-mq.o
 dm-cache-smq-y   += dm-cache-policy-smq.o
 dm-cache-cleaner-y += dm-cache-policy-cleaner.o
 dm-era-y       += dm-era-target.o
+dm-verity-y    += dm-verity-target.o
 md-mod-y       += md.o bitmap.o
 raid456-y      += raid5.o raid5-cache.o
 
@@ -63,3 +64,7 @@ obj-$(CONFIG_DM_LOG_WRITES)   += dm-log-writes.o
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs                    += dm-uevent.o
 endif
+
+ifeq ($(CONFIG_DM_VERITY_FEC),y)
+dm-verity-objs                 += dm-verity-fec.o
+endif
index 2dd33085b331da5bee79392dcf30f7e2c734f70b..6b832e06580dd6527bbf6e1a92e36c044db62f4a 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/shrinker.h>
 #include <linux/module.h>
 #include <linux/rbtree.h>
+#include <linux/stacktrace.h>
 
 #define DM_MSG_PREFIX "bufio"
 
@@ -149,6 +150,11 @@ struct dm_buffer {
        struct list_head write_list;
        struct bio bio;
        struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+#define MAX_STACK 10
+       struct stack_trace stack_trace;
+       unsigned long stack_entries[MAX_STACK];
+#endif
 };
 
 /*----------------------------------------------------------------*/
@@ -253,6 +259,17 @@ static LIST_HEAD(dm_bufio_all_clients);
  */
 static DEFINE_MUTEX(dm_bufio_clients_lock);
 
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+static void buffer_record_stack(struct dm_buffer *b)
+{
+       b->stack_trace.nr_entries = 0;
+       b->stack_trace.max_entries = MAX_STACK;
+       b->stack_trace.entries = b->stack_entries;
+       b->stack_trace.skip = 2;
+       save_stack_trace(&b->stack_trace);
+}
+#endif
+
 /*----------------------------------------------------------------
  * A red/black tree acts as an index for all the buffers.
  *--------------------------------------------------------------*/
@@ -454,6 +471,9 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
 
        adjust_total_allocated(b->data_mode, (long)c->block_size);
 
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+       memset(&b->stack_trace, 0, sizeof(b->stack_trace));
+#endif
        return b;
 }
 
@@ -1063,12 +1083,16 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
 
        dm_bufio_lock(c);
        b = __bufio_new(c, block, nf, &need_submit, &write_list);
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+       if (b && b->hold_count == 1)
+               buffer_record_stack(b);
+#endif
        dm_bufio_unlock(c);
 
        __flush_write_list(&write_list);
 
        if (!b)
-               return b;
+               return NULL;
 
        if (need_submit)
                submit_io(b, READ, b->block, read_endio);
@@ -1462,6 +1486,7 @@ static void drop_buffers(struct dm_bufio_client *c)
 {
        struct dm_buffer *b;
        int i;
+       bool warned = false;
 
        BUG_ON(dm_bufio_in_request());
 
@@ -1476,9 +1501,21 @@ static void drop_buffers(struct dm_bufio_client *c)
                __free_buffer_wake(b);
 
        for (i = 0; i < LIST_SIZE; i++)
-               list_for_each_entry(b, &c->lru[i], lru_list)
+               list_for_each_entry(b, &c->lru[i], lru_list) {
+                       WARN_ON(!warned);
+                       warned = true;
                        DMERR("leaked buffer %llx, hold count %u, list %d",
                              (unsigned long long)b->block, b->hold_count, i);
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+                       print_stack_trace(&b->stack_trace, 1);
+                       b->hold_count = 0; /* mark unclaimed to avoid BUG_ON below */
+#endif
+               }
+
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+       while ((b = __get_unclaimed_buffer(c)))
+               __free_buffer_wake(b);
+#endif
 
        for (i = 0; i < LIST_SIZE; i++)
                BUG_ON(!list_empty(&c->lru[i]));
@@ -1891,8 +1928,7 @@ static void __exit dm_bufio_exit(void)
                bug = 1;
        }
 
-       if (bug)
-               BUG();
+       BUG_ON(bug);
 }
 
 module_init(dm_bufio_init)
index 2fd4c82961441e08b2b20d8db98bfb2bad86e05e..5780accffa3059dfa979ef8ff81752bfabd46452 100644 (file)
@@ -118,14 +118,12 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
  */
 struct dm_hook_info {
        bio_end_io_t *bi_end_io;
-       void *bi_private;
 };
 
 static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
                        bio_end_io_t *bi_end_io, void *bi_private)
 {
        h->bi_end_io = bio->bi_end_io;
-       h->bi_private = bio->bi_private;
 
        bio->bi_end_io = bi_end_io;
        bio->bi_private = bi_private;
@@ -134,7 +132,6 @@ static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
 static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
 {
        bio->bi_end_io = h->bi_end_io;
-       bio->bi_private = h->bi_private;
 }
 
 /*----------------------------------------------------------------*/
index fae34e7a0b1e4e4d60b5867eff9422e432fba83b..12b5216c2cfed2758d19f55ba9d2c2d4f58d4deb 100644 (file)
@@ -69,7 +69,7 @@ struct dm_exception_store_type {
         * Update the metadata with this exception.
         */
        void (*commit_exception) (struct dm_exception_store *store,
-                                 struct dm_exception *e,
+                                 struct dm_exception *e, int valid,
                                  void (*callback) (void *, int success),
                                  void *callback_context);
 
index 3164b8bce2948591999f429e4633235c1f7b1062..4d3909393f2cce5488ced8843ccab1375d40d2d9 100644 (file)
@@ -695,7 +695,7 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
 }
 
 static void persistent_commit_exception(struct dm_exception_store *store,
-                                       struct dm_exception *e,
+                                       struct dm_exception *e, int valid,
                                        void (*callback) (void *, int success),
                                        void *callback_context)
 {
@@ -704,6 +704,9 @@ static void persistent_commit_exception(struct dm_exception_store *store,
        struct core_exception ce;
        struct commit_callback *cb;
 
+       if (!valid)
+               ps->valid = 0;
+
        ce.old_chunk = e->old_chunk;
        ce.new_chunk = e->new_chunk;
        write_exception(ps, ps->current_committed++, &ce);
index 9b7c8c8049d6186f54bdfec114c43cb3ce4d77fa..4d50a12cf00c699b85a0df1c0b1dd86f0137b691 100644 (file)
@@ -52,12 +52,12 @@ static int transient_prepare_exception(struct dm_exception_store *store,
 }
 
 static void transient_commit_exception(struct dm_exception_store *store,
-                                      struct dm_exception *e,
+                                      struct dm_exception *e, int valid,
                                       void (*callback) (void *, int success),
                                       void *callback_context)
 {
        /* Just succeed */
-       callback(callback_context, 1);
+       callback(callback_context, valid);
 }
 
 static void transient_usage(struct dm_exception_store *store,
index c06b74e91cd6aeef00ef4eefae9953d4d8c8f91b..3766386080a48fbfb06226ae80646b5e2f0e653a 100644 (file)
@@ -207,7 +207,6 @@ struct dm_snap_pending_exception {
         */
        struct bio *full_bio;
        bio_end_io_t *full_bio_end_io;
-       void *full_bio_private;
 };
 
 /*
@@ -1438,8 +1437,9 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
        dm_table_event(s->ti->table);
 }
 
-static void pending_complete(struct dm_snap_pending_exception *pe, int success)
+static void pending_complete(void *context, int success)
 {
+       struct dm_snap_pending_exception *pe = context;
        struct dm_exception *e;
        struct dm_snapshot *s = pe->snap;
        struct bio *origin_bios = NULL;
@@ -1485,10 +1485,8 @@ out:
        snapshot_bios = bio_list_get(&pe->snapshot_bios);
        origin_bios = bio_list_get(&pe->origin_bios);
        full_bio = pe->full_bio;
-       if (full_bio) {
+       if (full_bio)
                full_bio->bi_end_io = pe->full_bio_end_io;
-               full_bio->bi_private = pe->full_bio_private;
-       }
        increment_pending_exceptions_done_count();
 
        up_write(&s->lock);
@@ -1509,24 +1507,13 @@ out:
        free_pending_exception(pe);
 }
 
-static void commit_callback(void *context, int success)
-{
-       struct dm_snap_pending_exception *pe = context;
-
-       pending_complete(pe, success);
-}
-
 static void complete_exception(struct dm_snap_pending_exception *pe)
 {
        struct dm_snapshot *s = pe->snap;
 
-       if (unlikely(pe->copy_error))
-               pending_complete(pe, 0);
-
-       else
-               /* Update the metadata if we are persistent */
-               s->store->type->commit_exception(s->store, &pe->e,
-                                                commit_callback, pe);
+       /* Update the metadata if we are persistent */
+       s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error,
+                                        pending_complete, pe);
 }
 
 /*
@@ -1605,7 +1592,6 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
 
        pe->full_bio = bio;
        pe->full_bio_end_io = bio->bi_end_io;
-       pe->full_bio_private = bio->bi_private;
 
        callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
                                                   copy_callback, pe);
index c219a053c7f66d1ebae80a19b005af38914dbe8c..f962d6453afd64e33b739253ca4166a276307dab 100644 (file)
@@ -1395,8 +1395,21 @@ static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time)
        return td->snapshotted_time > time;
 }
 
-int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
-                      int can_issue_io, struct dm_thin_lookup_result *result)
+static void unpack_lookup_result(struct dm_thin_device *td, __le64 value,
+                                struct dm_thin_lookup_result *result)
+{
+       uint64_t block_time = 0;
+       dm_block_t exception_block;
+       uint32_t exception_time;
+
+       block_time = le64_to_cpu(value);
+       unpack_block_time(block_time, &exception_block, &exception_time);
+       result->block = exception_block;
+       result->shared = __snapshotted_since(td, exception_time);
+}
+
+static int __find_block(struct dm_thin_device *td, dm_block_t block,
+                       int can_issue_io, struct dm_thin_lookup_result *result)
 {
        int r;
        __le64 value;
@@ -1404,39 +1417,56 @@ int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
        dm_block_t keys[2] = { td->id, block };
        struct dm_btree_info *info;
 
-       down_read(&pmd->root_lock);
-       if (pmd->fail_io) {
-               up_read(&pmd->root_lock);
-               return -EINVAL;
-       }
-
        if (can_issue_io) {
                info = &pmd->info;
        } else
                info = &pmd->nb_info;
 
        r = dm_btree_lookup(info, pmd->root, keys, &value);
-       if (!r) {
-               uint64_t block_time = 0;
-               dm_block_t exception_block;
-               uint32_t exception_time;
-
-               block_time = le64_to_cpu(value);
-               unpack_block_time(block_time, &exception_block,
-                                 &exception_time);
-               result->block = exception_block;
-               result->shared = __snapshotted_since(td, exception_time);
+       if (!r)
+               unpack_lookup_result(td, value, result);
+
+       return r;
+}
+
+int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
+                      int can_issue_io, struct dm_thin_lookup_result *result)
+{
+       int r;
+       struct dm_pool_metadata *pmd = td->pmd;
+
+       down_read(&pmd->root_lock);
+       if (pmd->fail_io) {
+               up_read(&pmd->root_lock);
+               return -EINVAL;
        }
 
+       r = __find_block(td, block, can_issue_io, result);
+
        up_read(&pmd->root_lock);
        return r;
 }
 
-/* FIXME: write a more efficient one in btree */
-int dm_thin_find_mapped_range(struct dm_thin_device *td,
-                             dm_block_t begin, dm_block_t end,
-                             dm_block_t *thin_begin, dm_block_t *thin_end,
-                             dm_block_t *pool_begin, bool *maybe_shared)
+static int __find_next_mapped_block(struct dm_thin_device *td, dm_block_t block,
+                                         dm_block_t *vblock,
+                                         struct dm_thin_lookup_result *result)
+{
+       int r;
+       __le64 value;
+       struct dm_pool_metadata *pmd = td->pmd;
+       dm_block_t keys[2] = { td->id, block };
+
+       r = dm_btree_lookup_next(&pmd->info, pmd->root, keys, vblock, &value);
+       if (!r)
+               unpack_lookup_result(td, value, result);
+
+       return r;
+}
+
+static int __find_mapped_range(struct dm_thin_device *td,
+                              dm_block_t begin, dm_block_t end,
+                              dm_block_t *thin_begin, dm_block_t *thin_end,
+                              dm_block_t *pool_begin, bool *maybe_shared)
 {
        int r;
        dm_block_t pool_end;
@@ -1445,21 +1475,11 @@ int dm_thin_find_mapped_range(struct dm_thin_device *td,
        if (end < begin)
                return -ENODATA;
 
-       /*
-        * Find first mapped block.
-        */
-       while (begin < end) {
-               r = dm_thin_find_block(td, begin, true, &lookup);
-               if (r) {
-                       if (r != -ENODATA)
-                               return r;
-               } else
-                       break;
-
-               begin++;
-       }
+       r = __find_next_mapped_block(td, begin, &begin, &lookup);
+       if (r)
+               return r;
 
-       if (begin == end)
+       if (begin >= end)
                return -ENODATA;
 
        *thin_begin = begin;
@@ -1469,7 +1489,7 @@ int dm_thin_find_mapped_range(struct dm_thin_device *td,
        begin++;
        pool_end = *pool_begin + 1;
        while (begin != end) {
-               r = dm_thin_find_block(td, begin, true, &lookup);
+               r = __find_block(td, begin, true, &lookup);
                if (r) {
                        if (r == -ENODATA)
                                break;
@@ -1489,6 +1509,24 @@ int dm_thin_find_mapped_range(struct dm_thin_device *td,
        return 0;
 }
 
+int dm_thin_find_mapped_range(struct dm_thin_device *td,
+                             dm_block_t begin, dm_block_t end,
+                             dm_block_t *thin_begin, dm_block_t *thin_end,
+                             dm_block_t *pool_begin, bool *maybe_shared)
+{
+       int r = -EINVAL;
+       struct dm_pool_metadata *pmd = td->pmd;
+
+       down_read(&pmd->root_lock);
+       if (!pmd->fail_io) {
+               r = __find_mapped_range(td, begin, end, thin_begin, thin_end,
+                                       pool_begin, maybe_shared);
+       }
+       up_read(&pmd->root_lock);
+
+       return r;
+}
+
 static int __insert(struct dm_thin_device *td, dm_block_t block,
                    dm_block_t data_block)
 {
index 63903a5a5d9ee3b580d552673b42b9716b99322c..72d91f477683f2c1dfad5a3a9614aaf0b26992e1 100644 (file)
@@ -3453,8 +3453,8 @@ static void pool_postsuspend(struct dm_target *ti)
        struct pool_c *pt = ti->private;
        struct pool *pool = pt->pool;
 
-       cancel_delayed_work(&pool->waker);
-       cancel_delayed_work(&pool->no_space_timeout);
+       cancel_delayed_work_sync(&pool->waker);
+       cancel_delayed_work_sync(&pool->no_space_timeout);
        flush_workqueue(pool->wq);
        (void) commit(pool);
 }
@@ -3886,7 +3886,7 @@ static struct target_type pool_target = {
        .name = "thin-pool",
        .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
                    DM_TARGET_IMMUTABLE,
-       .version = {1, 16, 0},
+       .version = {1, 17, 0},
        .module = THIS_MODULE,
        .ctr = pool_ctr,
        .dtr = pool_dtr,
@@ -4260,7 +4260,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type thin_target = {
        .name = "thin",
-       .version = {1, 16, 0},
+       .version = {1, 17, 0},
        .module = THIS_MODULE,
        .ctr = thin_ctr,
        .dtr = thin_dtr,
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
new file mode 100644 (file)
index 0000000..1cc10c4
--- /dev/null
@@ -0,0 +1,818 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include "dm-verity-fec.h"
+#include <linux/math64.h>
+
+#define DM_MSG_PREFIX  "verity-fec"
+
+/*
+ * If error correction has been configured, returns true.
+ */
+bool verity_fec_is_enabled(struct dm_verity *v)
+{
+       return v->fec && v->fec->dev;
+}
+
+/*
+ * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
+ * length fields.
+ */
+static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io)
+{
+       return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io);
+}
+
+/*
+ * Return an interleaved offset for a byte in RS block.
+ */
+static inline u64 fec_interleave(struct dm_verity *v, u64 offset)
+{
+       u32 mod;
+
+       mod = do_div(offset, v->fec->rsn);
+       return offset + mod * (v->fec->rounds << v->data_dev_block_bits);
+}
+
+/*
+ * Decode an RS block using Reed-Solomon.
+ */
+static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
+                         u8 *data, u8 *fec, int neras)
+{
+       int i;
+       uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
+
+       for (i = 0; i < v->fec->roots; i++)
+               par[i] = fec[i];
+
+       return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras,
+                         fio->erasures, 0, NULL);
+}
+
+/*
+ * Read error-correcting codes for the requested RS block. Returns a pointer
+ * to the data block. Caller is responsible for releasing buf.
+ */
+static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
+                          unsigned *offset, struct dm_buffer **buf)
+{
+       u64 position, block;
+       u8 *res;
+
+       position = (index + rsb) * v->fec->roots;
+       block = position >> v->data_dev_block_bits;
+       *offset = (unsigned)(position - (block << v->data_dev_block_bits));
+
+       res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
+       if (unlikely(IS_ERR(res))) {
+               DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
+                     v->data_dev->name, (unsigned long long)rsb,
+                     (unsigned long long)(v->fec->start + block),
+                     PTR_ERR(res));
+               *buf = NULL;
+       }
+
+       return res;
+}
+
+/* Loop over each preallocated buffer slot. */
+#define fec_for_each_prealloc_buffer(__i) \
+       for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
+
+/* Loop over each extra buffer slot. */
+#define fec_for_each_extra_buffer(io, __i) \
+       for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
+
+/* Loop over each allocated buffer. */
+#define fec_for_each_buffer(io, __i) \
+       for (__i = 0; __i < (io)->nbufs; __i++)
+
+/* Loop over each RS block in each allocated buffer. */
+#define fec_for_each_buffer_rs_block(io, __i, __j) \
+       fec_for_each_buffer(io, __i) \
+               for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
+
+/*
+ * Return a pointer to the current RS block when called inside
+ * fec_for_each_buffer_rs_block.
+ */
+static inline u8 *fec_buffer_rs_block(struct dm_verity *v,
+                                     struct dm_verity_fec_io *fio,
+                                     unsigned i, unsigned j)
+{
+       return &fio->bufs[i][j * v->fec->rsn];
+}
+
+/*
+ * Return an index to the current RS block when called inside
+ * fec_for_each_buffer_rs_block.
+ */
+static inline unsigned fec_buffer_rs_index(unsigned i, unsigned j)
+{
+       return (i << DM_VERITY_FEC_BUF_RS_BITS) + j;
+}
+
+/*
+ * Decode all RS blocks from buffers and copy corrected bytes into fio->output
+ * starting from block_offset.
+ */
+static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
+                          u64 rsb, int byte_index, unsigned block_offset,
+                          int neras)
+{
+       int r, corrected = 0, res;
+       struct dm_buffer *buf;
+       unsigned n, i, offset;
+       u8 *par, *block;
+
+       par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+       if (IS_ERR(par))
+               return PTR_ERR(par);
+
+       /*
+        * Decode the RS blocks we have in bufs. Each RS block results in
+        * one corrected target byte and consumes fec->roots parity bytes.
+        */
+       fec_for_each_buffer_rs_block(fio, n, i) {
+               block = fec_buffer_rs_block(v, fio, n, i);
+               res = fec_decode_rs8(v, fio, block, &par[offset], neras);
+               if (res < 0) {
+                       dm_bufio_release(buf);
+
+                       r = res;
+                       goto error;
+               }
+
+               corrected += res;
+               fio->output[block_offset] = block[byte_index];
+
+               block_offset++;
+               if (block_offset >= 1 << v->data_dev_block_bits)
+                       goto done;
+
+               /* read the next block when we run out of parity bytes */
+               offset += v->fec->roots;
+               if (offset >= 1 << v->data_dev_block_bits) {
+                       dm_bufio_release(buf);
+
+                       par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+                       if (unlikely(IS_ERR(par)))
+                               return PTR_ERR(par);
+               }
+       }
+done:
+       r = corrected;
+error:
+       if (r < 0 && neras)
+               DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
+                           v->data_dev->name, (unsigned long long)rsb, r);
+       else if (r > 0)
+               DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
+                            v->data_dev->name, (unsigned long long)rsb, r);
+
+       return r;
+}
+
+/*
+ * Locate data block erasures using verity hashes.
+ */
+static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
+                         u8 *want_digest, u8 *data)
+{
+       if (unlikely(verity_hash(v, verity_io_hash_desc(v, io),
+                                data, 1 << v->data_dev_block_bits,
+                                verity_io_real_digest(v, io))))
+               return 0;
+
+       return memcmp(verity_io_real_digest(v, io), want_digest,
+                     v->digest_size) != 0;
+}
+
+/*
+ * Read data blocks that are part of the RS block and deinterleave as much as
+ * fits into buffers. Check for erasure locations if @neras is non-NULL.
+ */
+static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
+                        u64 rsb, u64 target, unsigned block_offset,
+                        int *neras)
+{
+       bool is_zero;
+       int i, j, target_index = -1;
+       struct dm_buffer *buf;
+       struct dm_bufio_client *bufio;
+       struct dm_verity_fec_io *fio = fec_io(io);
+       u64 block, ileaved;
+       u8 *bbuf, *rs_block;
+       u8 want_digest[v->digest_size];
+       unsigned n, k;
+
+       if (neras)
+               *neras = 0;
+
+       /*
+        * read each of the rsn data blocks that are part of the RS block, and
+        * interleave contents to available bufs
+        */
+       for (i = 0; i < v->fec->rsn; i++) {
+               ileaved = fec_interleave(v, rsb * v->fec->rsn + i);
+
+               /*
+                * target is the data block we want to correct, target_index is
+                * the index of this block within the rsn RS blocks
+                */
+               if (ileaved == target)
+                       target_index = i;
+
+               block = ileaved >> v->data_dev_block_bits;
+               bufio = v->fec->data_bufio;
+
+               if (block >= v->data_blocks) {
+                       block -= v->data_blocks;
+
+                       /*
+                        * blocks outside the area were assumed to contain
+                        * zeros when encoding data was generated
+                        */
+                       if (unlikely(block >= v->fec->hash_blocks))
+                               continue;
+
+                       block += v->hash_start;
+                       bufio = v->bufio;
+               }
+
+               bbuf = dm_bufio_read(bufio, block, &buf);
+               if (unlikely(IS_ERR(bbuf))) {
+                       DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
+                                    v->data_dev->name,
+                                    (unsigned long long)rsb,
+                                    (unsigned long long)block, PTR_ERR(bbuf));
+
+                       /* assume the block is corrupted */
+                       if (neras && *neras <= v->fec->roots)
+                               fio->erasures[(*neras)++] = i;
+
+                       continue;
+               }
+
+               /* locate erasures if the block is on the data device */
+               if (bufio == v->fec->data_bufio &&
+                   verity_hash_for_block(v, io, block, want_digest,
+                                         &is_zero) == 0) {
+                       /* skip known zero blocks entirely */
+                       if (is_zero)
+                               continue;
+
+                       /*
+                        * skip if we have already found the theoretical
+                        * maximum number (i.e. fec->roots) of erasures
+                        */
+                       if (neras && *neras <= v->fec->roots &&
+                           fec_is_erasure(v, io, want_digest, bbuf))
+                               fio->erasures[(*neras)++] = i;
+               }
+
+               /*
+                * deinterleave and copy the bytes that fit into bufs,
+                * starting from block_offset
+                */
+               fec_for_each_buffer_rs_block(fio, n, j) {
+                       k = fec_buffer_rs_index(n, j) + block_offset;
+
+                       if (k >= 1 << v->data_dev_block_bits)
+                               goto done;
+
+                       rs_block = fec_buffer_rs_block(v, fio, n, j);
+                       rs_block[i] = bbuf[k];
+               }
+done:
+               dm_bufio_release(buf);
+       }
+
+       return target_index;
+}
+
+/*
+ * Allocate RS control structure and FEC buffers from preallocated mempools,
+ * and attempt to allocate as many extra buffers as available.
+ */
+static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
+{
+       unsigned n;
+
+       if (!fio->rs) {
+               fio->rs = mempool_alloc(v->fec->rs_pool, 0);
+               if (unlikely(!fio->rs)) {
+                       DMERR("failed to allocate RS");
+                       return -ENOMEM;
+               }
+       }
+
+       fec_for_each_prealloc_buffer(n) {
+               if (fio->bufs[n])
+                       continue;
+
+               fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO);
+               if (unlikely(!fio->bufs[n])) {
+                       DMERR("failed to allocate FEC buffer");
+                       return -ENOMEM;
+               }
+       }
+
+       /* try to allocate the maximum number of buffers */
+       fec_for_each_extra_buffer(fio, n) {
+               if (fio->bufs[n])
+                       continue;
+
+               fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO);
+               /* we can manage with even one buffer if necessary */
+               if (unlikely(!fio->bufs[n]))
+                       break;
+       }
+       fio->nbufs = n;
+
+       if (!fio->output) {
+               fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO);
+
+               if (!fio->output) {
+                       DMERR("failed to allocate FEC page");
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
+ * zeroed before deinterleaving.
+ */
+static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
+{
+       unsigned n;
+
+       fec_for_each_buffer(fio, n)
+               memset(fio->bufs[n], 0, v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS);
+
+       memset(fio->erasures, 0, sizeof(fio->erasures));
+}
+
+/*
+ * Decode all RS blocks in a single data block and return the target block
+ * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
+ * hashes to locate erasures.
+ */
+static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
+                         struct dm_verity_fec_io *fio, u64 rsb, u64 offset,
+                         bool use_erasures)
+{
+       int r, neras = 0;
+       unsigned pos;
+
+       r = fec_alloc_bufs(v, fio);
+       if (unlikely(r < 0))
+               return r;
+
+       for (pos = 0; pos < 1 << v->data_dev_block_bits; ) {
+               fec_init_bufs(v, fio);
+
+               r = fec_read_bufs(v, io, rsb, offset, pos,
+                                 use_erasures ? &neras : NULL);
+               if (unlikely(r < 0))
+                       return r;
+
+               r = fec_decode_bufs(v, fio, rsb, r, pos, neras);
+               if (r < 0)
+                       return r;
+
+               pos += fio->nbufs << DM_VERITY_FEC_BUF_RS_BITS;
+       }
+
+       /* Always re-validate the corrected block against the expected hash */
+       r = verity_hash(v, verity_io_hash_desc(v, io), fio->output,
+                       1 << v->data_dev_block_bits,
+                       verity_io_real_digest(v, io));
+       if (unlikely(r < 0))
+               return r;
+
+       if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io),
+                  v->digest_size)) {
+               DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
+                           v->data_dev->name, (unsigned long long)rsb, neras);
+               return -EILSEQ;
+       }
+
+       return 0;
+}
+
+static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data,
+                      size_t len)
+{
+       struct dm_verity_fec_io *fio = fec_io(io);
+
+       memcpy(data, &fio->output[fio->output_pos], len);
+       fio->output_pos += len;
+
+       return 0;
+}
+
+/*
+ * Correct errors in a block. Copies corrected block to dest if non-NULL,
+ * otherwise to a bio_vec starting from iter.
+ */
+int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+                     enum verity_block_type type, sector_t block, u8 *dest,
+                     struct bvec_iter *iter)
+{
+       int r;
+       struct dm_verity_fec_io *fio = fec_io(io);
+       u64 offset, res, rsb;
+
+       if (!verity_fec_is_enabled(v))
+               return -EOPNOTSUPP;
+
+       if (type == DM_VERITY_BLOCK_TYPE_METADATA)
+               block += v->data_blocks;
+
+       /*
+        * For RS(M, N), the continuous FEC data is divided into blocks of N
+        * bytes. Since block size may not be divisible by N, the last block
+        * is zero padded when decoding.
+        *
+        * Each byte of the block is covered by a different RS(M, N) code,
+        * and each code is interleaved over N blocks to make it less likely
+        * that bursty corruption will leave us in unrecoverable state.
+        */
+
+       offset = block << v->data_dev_block_bits;
+
+       res = offset;
+       div64_u64(res, v->fec->rounds << v->data_dev_block_bits);
+
+       /*
+        * The base RS block we can feed to the interleaver to find out all
+        * blocks required for decoding.
+        */
+       rsb = offset - res * (v->fec->rounds << v->data_dev_block_bits);
+
+       /*
+        * Locating erasures is slow, so attempt to recover the block without
+        * them first. Do a second attempt with erasures if the corruption is
+        * bad enough.
+        */
+       r = fec_decode_rsb(v, io, fio, rsb, offset, false);
+       if (r < 0) {
+               r = fec_decode_rsb(v, io, fio, rsb, offset, true);
+               if (r < 0)
+                       return r;
+       }
+
+       if (dest)
+               memcpy(dest, fio->output, 1 << v->data_dev_block_bits);
+       else if (iter) {
+               fio->output_pos = 0;
+               r = verity_for_bv_block(v, io, iter, fec_bv_copy);
+       }
+
+       return r;
+}
+
+/*
+ * Clean up per-bio data.
+ */
+void verity_fec_finish_io(struct dm_verity_io *io)
+{
+       unsigned n;
+       struct dm_verity_fec *f = io->v->fec;
+       struct dm_verity_fec_io *fio = fec_io(io);
+
+       if (!verity_fec_is_enabled(io->v))
+               return;
+
+       mempool_free(fio->rs, f->rs_pool);
+
+       fec_for_each_prealloc_buffer(n)
+               mempool_free(fio->bufs[n], f->prealloc_pool);
+
+       fec_for_each_extra_buffer(fio, n)
+               mempool_free(fio->bufs[n], f->extra_pool);
+
+       mempool_free(fio->output, f->output_pool);
+}
+
+/*
+ * Initialize per-bio data.
+ */
+void verity_fec_init_io(struct dm_verity_io *io)
+{
+       struct dm_verity_fec_io *fio = fec_io(io);
+
+       if (!verity_fec_is_enabled(io->v))
+               return;
+
+       fio->rs = NULL;
+       memset(fio->bufs, 0, sizeof(fio->bufs));
+       fio->nbufs = 0;
+       fio->output = NULL;
+}
+
+/*
+ * Append feature arguments and values to the status table.
+ */
+unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
+                                char *result, unsigned maxlen)
+{
+       if (!verity_fec_is_enabled(v))
+               return sz;
+
+       DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s "
+              DM_VERITY_OPT_FEC_BLOCKS " %llu "
+              DM_VERITY_OPT_FEC_START " %llu "
+              DM_VERITY_OPT_FEC_ROOTS " %d",
+              v->fec->dev->name,
+              (unsigned long long)v->fec->blocks,
+              (unsigned long long)v->fec->start,
+              v->fec->roots);
+
+       return sz;
+}
+
+void verity_fec_dtr(struct dm_verity *v)
+{
+       struct dm_verity_fec *f = v->fec;
+
+       if (!verity_fec_is_enabled(v))
+               goto out;
+
+       mempool_destroy(f->rs_pool);
+       mempool_destroy(f->prealloc_pool);
+       mempool_destroy(f->extra_pool);
+       kmem_cache_destroy(f->cache);
+
+       if (f->data_bufio)
+               dm_bufio_client_destroy(f->data_bufio);
+       if (f->bufio)
+               dm_bufio_client_destroy(f->bufio);
+
+       if (f->dev)
+               dm_put_device(v->ti, f->dev);
+out:
+       kfree(f);
+       v->fec = NULL;
+}
+
+static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data)
+{
+       struct dm_verity *v = (struct dm_verity *)pool_data;
+
+       return init_rs(8, 0x11d, 0, 1, v->fec->roots);
+}
+
+static void fec_rs_free(void *element, void *pool_data)
+{
+       struct rs_control *rs = (struct rs_control *)element;
+
+       if (rs)
+               free_rs(rs);
+}
+
+bool verity_is_fec_opt_arg(const char *arg_name)
+{
+       return (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV) ||
+               !strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS) ||
+               !strcasecmp(arg_name, DM_VERITY_OPT_FEC_START) ||
+               !strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS));
+}
+
+int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
+                             unsigned *argc, const char *arg_name)
+{
+       int r;
+       struct dm_target *ti = v->ti;
+       const char *arg_value;
+       unsigned long long num_ll;
+       unsigned char num_c;
+       char dummy;
+
+       if (!*argc) {
+               ti->error = "FEC feature arguments require a value";
+               return -EINVAL;
+       }
+
+       arg_value = dm_shift_arg(as);
+       (*argc)--;
+
+       if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
+               r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev);
+               if (r) {
+                       ti->error = "FEC device lookup failed";
+                       return r;
+               }
+
+       } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS)) {
+               if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
+                   ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
+                    >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
+                       ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+                       return -EINVAL;
+               }
+               v->fec->blocks = num_ll;
+
+       } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START)) {
+               if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
+                   ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) >>
+                    (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
+                       ti->error = "Invalid " DM_VERITY_OPT_FEC_START;
+                       return -EINVAL;
+               }
+               v->fec->start = num_ll;
+
+       } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)) {
+               if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 || !num_c ||
+                   num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) ||
+                   num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) {
+                       ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS;
+                       return -EINVAL;
+               }
+               v->fec->roots = num_c;
+
+       } else {
+               ti->error = "Unrecognized verity FEC feature request";
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
+ */
+int verity_fec_ctr_alloc(struct dm_verity *v)
+{
+       struct dm_verity_fec *f;
+
+       f = kzalloc(sizeof(struct dm_verity_fec), GFP_KERNEL);
+       if (!f) {
+               v->ti->error = "Cannot allocate FEC structure";
+               return -ENOMEM;
+       }
+       v->fec = f;
+
+       return 0;
+}
+
+/*
+ * Validate arguments and preallocate memory. Must be called after arguments
+ * have been parsed using verity_fec_parse_opt_args.
+ */
+int verity_fec_ctr(struct dm_verity *v)
+{
+       struct dm_verity_fec *f = v->fec;
+       struct dm_target *ti = v->ti;
+       u64 hash_blocks;
+
+       if (!verity_fec_is_enabled(v)) {
+               verity_fec_dtr(v);
+               return 0;
+       }
+
+       /*
+        * FEC is computed over data blocks, possible metadata, and
+        * hash blocks. In other words, FEC covers total of fec_blocks
+        * blocks consisting of the following:
+        *
+        *  data blocks | hash blocks | metadata (optional)
+        *
+        * We allow metadata after hash blocks to support a use case
+        * where all data is stored on the same device and FEC covers
+        * the entire area.
+        *
+        * If metadata is included, we require it to be available on the
+        * hash device after the hash blocks.
+        */
+
+       hash_blocks = v->hash_blocks - v->hash_start;
+
+       /*
+        * Require matching block sizes for data and hash devices for
+        * simplicity.
+        */
+       if (v->data_dev_block_bits != v->hash_dev_block_bits) {
+               ti->error = "Block sizes must match to use FEC";
+               return -EINVAL;
+       }
+
+       if (!f->roots) {
+               ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS;
+               return -EINVAL;
+       }
+       f->rsn = DM_VERITY_FEC_RSM - f->roots;
+
+       if (!f->blocks) {
+               ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS;
+               return -EINVAL;
+       }
+
+       f->rounds = f->blocks;
+       if (sector_div(f->rounds, f->rsn))
+               f->rounds++;
+
+       /*
+        * Due to optional metadata, f->blocks can be larger than
+        * data_blocks and hash_blocks combined.
+        */
+       if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) {
+               ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+               return -EINVAL;
+       }
+
+       /*
+        * Metadata is accessed through the hash device, so we require
+        * it to be large enough.
+        */
+       f->hash_blocks = f->blocks - v->data_blocks;
+       if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) {
+               ti->error = "Hash device is too small for "
+                       DM_VERITY_OPT_FEC_BLOCKS;
+               return -E2BIG;
+       }
+
+       f->bufio = dm_bufio_client_create(f->dev->bdev,
+                                         1 << v->data_dev_block_bits,
+                                         1, 0, NULL, NULL);
+       if (IS_ERR(f->bufio)) {
+               ti->error = "Cannot initialize FEC bufio client";
+               return PTR_ERR(f->bufio);
+       }
+
+       if (dm_bufio_get_device_size(f->bufio) <
+           ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) {
+               ti->error = "FEC device is too small";
+               return -E2BIG;
+       }
+
+       f->data_bufio = dm_bufio_client_create(v->data_dev->bdev,
+                                              1 << v->data_dev_block_bits,
+                                              1, 0, NULL, NULL);
+       if (IS_ERR(f->data_bufio)) {
+               ti->error = "Cannot initialize FEC data bufio client";
+               return PTR_ERR(f->data_bufio);
+       }
+
+       if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) {
+               ti->error = "Data device is too small";
+               return -E2BIG;
+       }
+
+       /* Preallocate an rs_control structure for each worker thread */
+       f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc,
+                                   fec_rs_free, (void *) v);
+       if (!f->rs_pool) {
+               ti->error = "Cannot allocate RS pool";
+               return -ENOMEM;
+       }
+
+       f->cache = kmem_cache_create("dm_verity_fec_buffers",
+                                    f->rsn << DM_VERITY_FEC_BUF_RS_BITS,
+                                    0, 0, NULL);
+       if (!f->cache) {
+               ti->error = "Cannot create FEC buffer cache";
+               return -ENOMEM;
+       }
+
+       /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
+       f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() *
+                                                   DM_VERITY_FEC_BUF_PREALLOC,
+                                                   f->cache);
+       if (!f->prealloc_pool) {
+               ti->error = "Cannot allocate FEC buffer prealloc pool";
+               return -ENOMEM;
+       }
+
+       f->extra_pool = mempool_create_slab_pool(0, f->cache);
+       if (!f->extra_pool) {
+               ti->error = "Cannot allocate FEC buffer extra pool";
+               return -ENOMEM;
+       }
+
+       /* Preallocate an output buffer for each thread */
+       f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(),
+                                                    1 << v->data_dev_block_bits);
+       if (!f->output_pool) {
+               ti->error = "Cannot allocate FEC output pool";
+               return -ENOMEM;
+       }
+
+       /* Reserve space for our per-bio data */
+       ti->per_bio_data_size += sizeof(struct dm_verity_fec_io);
+
+       return 0;
+}
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
new file mode 100644 (file)
index 0000000..7fa0298
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef DM_VERITY_FEC_H
+#define DM_VERITY_FEC_H
+
+#include "dm-verity.h"
+#include <linux/rslib.h>
+
+/* Reed-Solomon(M, N) parameters */
+#define DM_VERITY_FEC_RSM              255
+#define DM_VERITY_FEC_MAX_RSN          253
+#define DM_VERITY_FEC_MIN_RSN          231     /* ~10% space overhead */
+
+/* buffers for deinterleaving and decoding */
+#define DM_VERITY_FEC_BUF_PREALLOC     1       /* buffers to preallocate */
+#define DM_VERITY_FEC_BUF_RS_BITS      4       /* 1 << RS blocks per buffer */
+/* we need buffers for at most 1 << block size RS blocks */
+#define DM_VERITY_FEC_BUF_MAX \
+       (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
+
+#define DM_VERITY_OPT_FEC_DEV          "use_fec_from_device"
+#define DM_VERITY_OPT_FEC_BLOCKS       "fec_blocks"
+#define DM_VERITY_OPT_FEC_START                "fec_start"
+#define DM_VERITY_OPT_FEC_ROOTS                "fec_roots"
+
+/* configuration */
+struct dm_verity_fec {
+       struct dm_dev *dev;     /* parity data device */
+       struct dm_bufio_client *data_bufio;     /* for data dev access */
+       struct dm_bufio_client *bufio;          /* for parity data access */
+       sector_t start;         /* parity data start in blocks */
+       sector_t blocks;        /* number of blocks covered */
+       sector_t rounds;        /* number of interleaving rounds */
+       sector_t hash_blocks;   /* blocks covered after v->hash_start */
+       unsigned char roots;    /* number of parity bytes, M-N of RS(M, N) */
+       unsigned char rsn;      /* N of RS(M, N) */
+       mempool_t *rs_pool;     /* mempool for fio->rs */
+       mempool_t *prealloc_pool;       /* mempool for preallocated buffers */
+       mempool_t *extra_pool;  /* mempool for extra buffers */
+       mempool_t *output_pool; /* mempool for output */
+       struct kmem_cache *cache;       /* cache for buffers */
+};
+
+/* per-bio data */
+struct dm_verity_fec_io {
+       struct rs_control *rs;  /* Reed-Solomon state */
+       int erasures[DM_VERITY_FEC_MAX_RSN];    /* erasures for decode_rs8 */
+       u8 *bufs[DM_VERITY_FEC_BUF_MAX];        /* bufs for deinterleaving */
+       unsigned nbufs;         /* number of buffers allocated */
+       u8 *output;             /* buffer for corrected output */
+       size_t output_pos;
+};
+
+#ifdef CONFIG_DM_VERITY_FEC
+
+/* each feature parameter requires a value */
+#define DM_VERITY_OPTS_FEC     8
+
+extern bool verity_fec_is_enabled(struct dm_verity *v);
+
+extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+                            enum verity_block_type type, sector_t block,
+                            u8 *dest, struct bvec_iter *iter);
+
+extern unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
+                                       char *result, unsigned maxlen);
+
+extern void verity_fec_finish_io(struct dm_verity_io *io);
+extern void verity_fec_init_io(struct dm_verity_io *io);
+
+extern bool verity_is_fec_opt_arg(const char *arg_name);
+extern int verity_fec_parse_opt_args(struct dm_arg_set *as,
+                                    struct dm_verity *v, unsigned *argc,
+                                    const char *arg_name);
+
+extern void verity_fec_dtr(struct dm_verity *v);
+
+extern int verity_fec_ctr_alloc(struct dm_verity *v);
+extern int verity_fec_ctr(struct dm_verity *v);
+
+#else /* !CONFIG_DM_VERITY_FEC */
+
+#define DM_VERITY_OPTS_FEC     0
+
+static inline bool verity_fec_is_enabled(struct dm_verity *v)
+{
+       return false;
+}
+
+static inline int verity_fec_decode(struct dm_verity *v,
+                                   struct dm_verity_io *io,
+                                   enum verity_block_type type,
+                                   sector_t block, u8 *dest,
+                                   struct bvec_iter *iter)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline unsigned verity_fec_status_table(struct dm_verity *v,
+                                              unsigned sz, char *result,
+                                              unsigned maxlen)
+{
+       return sz;
+}
+
+static inline void verity_fec_finish_io(struct dm_verity_io *io)
+{
+}
+
+static inline void verity_fec_init_io(struct dm_verity_io *io)
+{
+}
+
+static inline bool verity_is_fec_opt_arg(const char *arg_name)
+{
+       return false;
+}
+
+static inline int verity_fec_parse_opt_args(struct dm_arg_set *as,
+                                           struct dm_verity *v,
+                                           unsigned *argc,
+                                           const char *arg_name)
+{
+       return -EINVAL;
+}
+
+static inline void verity_fec_dtr(struct dm_verity *v)
+{
+}
+
+static inline int verity_fec_ctr_alloc(struct dm_verity *v)
+{
+       return 0;
+}
+
+static inline int verity_fec_ctr(struct dm_verity *v)
+{
+       return 0;
+}
+
+#endif /* CONFIG_DM_VERITY_FEC */
+
+#endif /* DM_VERITY_FEC_H */
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
new file mode 100644 (file)
index 0000000..5c5d30c
--- /dev/null
@@ -0,0 +1,1093 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
+ *
+ * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
+ *
+ * This file is released under the GPLv2.
+ *
+ * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
+ * default prefetch value. Data are read in "prefetch_cluster" chunks from the
+ * hash device. Setting this greatly improves performance when data and hash
+ * are on the same disk on different partitions on devices with poor random
+ * access behavior.
+ */
+
+#include "dm-verity.h"
+#include "dm-verity-fec.h"
+
+#include <linux/module.h>
+#include <linux/reboot.h>
+
+#define DM_MSG_PREFIX                  "verity"
+
+#define DM_VERITY_ENV_LENGTH           42
+#define DM_VERITY_ENV_VAR_NAME         "DM_VERITY_ERR_BLOCK_NR"
+
+#define DM_VERITY_DEFAULT_PREFETCH_SIZE        262144
+
+#define DM_VERITY_MAX_CORRUPTED_ERRS   100
+
+#define DM_VERITY_OPT_LOGGING          "ignore_corruption"
+#define DM_VERITY_OPT_RESTART          "restart_on_corruption"
+#define DM_VERITY_OPT_IGN_ZEROES       "ignore_zero_blocks"
+
+#define DM_VERITY_OPTS_MAX             (2 + DM_VERITY_OPTS_FEC)
+
+static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
+
+module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
+
+struct dm_verity_prefetch_work {
+       struct work_struct work;
+       struct dm_verity *v;
+       sector_t block;
+       unsigned n_blocks;
+};
+
+/*
+ * Auxiliary structure appended to each dm-bufio buffer. If the value
+ * hash_verified is nonzero, hash of the block has been verified.
+ *
+ * The variable hash_verified is set to 0 when allocating the buffer, then
+ * it can be changed to 1 and it is never reset to 0 again.
+ *
+ * There is no lock around this value, a race condition can at worst cause
+ * that multiple processes verify the hash of the same buffer simultaneously
+ * and write 1 to hash_verified simultaneously.
+ * This condition is harmless, so we don't need locking.
+ */
+struct buffer_aux {
+       int hash_verified;
+};
+
+/*
+ * Initialize struct buffer_aux for a freshly created buffer.
+ */
+static void dm_bufio_alloc_callback(struct dm_buffer *buf)
+{
+       struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
+
+       aux->hash_verified = 0;
+}
+
+/*
+ * Translate input sector number to the sector number on the target device.
+ */
+static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
+{
+       return v->data_start + dm_target_offset(v->ti, bi_sector);
+}
+
+/*
+ * Return hash position of a specified block at a specified tree level
+ * (0 is the lowest level).
+ * The lowest "hash_per_block_bits"-bits of the result denote hash position
+ * inside a hash block. The remaining bits denote location of the hash block.
+ */
+static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
+                                        int level)
+{
+       return block >> (level * v->hash_per_block_bits);
+}
+
+/*
+ * Wrapper for crypto_shash_init, which handles verity salting.
+ */
+static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc)
+{
+       int r;
+
+       desc->tfm = v->tfm;
+       desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       r = crypto_shash_init(desc);
+
+       if (unlikely(r < 0)) {
+               DMERR("crypto_shash_init failed: %d", r);
+               return r;
+       }
+
+       if (likely(v->version >= 1)) {
+               r = crypto_shash_update(desc, v->salt, v->salt_size);
+
+               if (unlikely(r < 0)) {
+                       DMERR("crypto_shash_update failed: %d", r);
+                       return r;
+               }
+       }
+
+       return 0;
+}
+
+static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc,
+                             const u8 *data, size_t len)
+{
+       int r = crypto_shash_update(desc, data, len);
+
+       if (unlikely(r < 0))
+               DMERR("crypto_shash_update failed: %d", r);
+
+       return r;
+}
+
+static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
+                            u8 *digest)
+{
+       int r;
+
+       if (unlikely(!v->version)) {
+               r = crypto_shash_update(desc, v->salt, v->salt_size);
+
+               if (r < 0) {
+                       DMERR("crypto_shash_update failed: %d", r);
+                       return r;
+               }
+       }
+
+       r = crypto_shash_final(desc, digest);
+
+       if (unlikely(r < 0))
+               DMERR("crypto_shash_final failed: %d", r);
+
+       return r;
+}
+
+int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+               const u8 *data, size_t len, u8 *digest)
+{
+       int r;
+
+       r = verity_hash_init(v, desc);
+       if (unlikely(r < 0))
+               return r;
+
+       r = verity_hash_update(v, desc, data, len);
+       if (unlikely(r < 0))
+               return r;
+
+       return verity_hash_final(v, desc, digest);
+}
+
+static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
+                                sector_t *hash_block, unsigned *offset)
+{
+       sector_t position = verity_position_at_level(v, block, level);
+       unsigned idx;
+
+       *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
+
+       if (!offset)
+               return;
+
+       idx = position & ((1 << v->hash_per_block_bits) - 1);
+       if (!v->version)
+               *offset = idx * v->digest_size;
+       else
+               *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
+}
+
+/*
+ * Handle verification errors.
+ */
+static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
+                            unsigned long long block)
+{
+       char verity_env[DM_VERITY_ENV_LENGTH];
+       char *envp[] = { verity_env, NULL };
+       const char *type_str = "";
+       struct mapped_device *md = dm_table_get_md(v->ti->table);
+
+       /* Corruption should be visible in device status in all modes */
+       v->hash_failed = 1;
+
+       if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS)
+               goto out;
+
+       v->corrupted_errs++;
+
+       switch (type) {
+       case DM_VERITY_BLOCK_TYPE_DATA:
+               type_str = "data";
+               break;
+       case DM_VERITY_BLOCK_TYPE_METADATA:
+               type_str = "metadata";
+               break;
+       default:
+               BUG();
+       }
+
+       DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str,
+               block);
+
+       if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
+               DMERR("%s: reached maximum errors", v->data_dev->name);
+
+       snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu",
+               DM_VERITY_ENV_VAR_NAME, type, block);
+
+       kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp);
+
+out:
+       if (v->mode == DM_VERITY_MODE_LOGGING)
+               return 0;
+
+       if (v->mode == DM_VERITY_MODE_RESTART)
+               kernel_restart("dm-verity device corrupted");
+
+       return 1;
+}
+
+/*
+ * Verify hash of a metadata block pertaining to the specified data block
+ * ("block" argument) at a specified level ("level" argument).
+ *
+ * On successful return, verity_io_want_digest(v, io) contains the hash value
+ * for a lower tree level or for the data block (if we're at the lowest level).
+ *
+ * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
+ * If "skip_unverified" is false, unverified buffer is hashed and verified
+ * against current value of verity_io_want_digest(v, io).
+ */
+static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
+                              sector_t block, int level, bool skip_unverified,
+                              u8 *want_digest)
+{
+       struct dm_buffer *buf;
+       struct buffer_aux *aux;
+       u8 *data;
+       int r;
+       sector_t hash_block;
+       unsigned offset;
+
+       verity_hash_at_level(v, block, level, &hash_block, &offset);
+
+       data = dm_bufio_read(v->bufio, hash_block, &buf);
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       aux = dm_bufio_get_aux_data(buf);
+
+       if (!aux->hash_verified) {
+               if (skip_unverified) {
+                       r = 1;
+                       goto release_ret_r;
+               }
+
+               r = verity_hash(v, verity_io_hash_desc(v, io),
+                               data, 1 << v->hash_dev_block_bits,
+                               verity_io_real_digest(v, io));
+               if (unlikely(r < 0))
+                       goto release_ret_r;
+
+               if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
+                                 v->digest_size) == 0))
+                       aux->hash_verified = 1;
+               else if (verity_fec_decode(v, io,
+                                          DM_VERITY_BLOCK_TYPE_METADATA,
+                                          hash_block, data, NULL) == 0)
+                       aux->hash_verified = 1;
+               else if (verity_handle_err(v,
+                                          DM_VERITY_BLOCK_TYPE_METADATA,
+                                          hash_block)) {
+                       r = -EIO;
+                       goto release_ret_r;
+               }
+       }
+
+       data += offset;
+       memcpy(want_digest, data, v->digest_size);
+       r = 0;
+
+release_ret_r:
+       dm_bufio_release(buf);
+       return r;
+}
+
+/*
+ * Find a hash for a given block, write it to digest and verify the integrity
+ * of the hash tree if necessary.
+ */
+int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+                         sector_t block, u8 *digest, bool *is_zero)
+{
+       int r = 0, i;
+
+       if (likely(v->levels)) {
+               /*
+                * First, we try to get the requested hash for
+                * the current block. If the hash block itself is
+                * verified, zero is returned. If it isn't, this
+                * function returns 1 and we fall back to whole
+                * chain verification.
+                */
+               r = verity_verify_level(v, io, block, 0, true, digest);
+               if (likely(r <= 0))
+                       goto out;
+       }
+
+       memcpy(digest, v->root_digest, v->digest_size);
+
+       for (i = v->levels - 1; i >= 0; i--) {
+               r = verity_verify_level(v, io, block, i, false, digest);
+               if (unlikely(r))
+                       goto out;
+       }
+out:
+       if (!r && v->zero_digest)
+               *is_zero = !memcmp(v->zero_digest, digest, v->digest_size);
+       else
+               *is_zero = false;
+
+       return r;
+}
+
+/*
+ * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec
+ * starting from iter.
+ */
+int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+                       struct bvec_iter *iter,
+                       int (*process)(struct dm_verity *v,
+                                      struct dm_verity_io *io, u8 *data,
+                                      size_t len))
+{
+       unsigned todo = 1 << v->data_dev_block_bits;
+       struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
+
+       do {
+               int r;
+               u8 *page;
+               unsigned len;
+               struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+               page = kmap_atomic(bv.bv_page);
+               len = bv.bv_len;
+
+               if (likely(len >= todo))
+                       len = todo;
+
+               r = process(v, io, page + bv.bv_offset, len);
+               kunmap_atomic(page);
+
+               if (r < 0)
+                       return r;
+
+               bio_advance_iter(bio, iter, len);
+               todo -= len;
+       } while (todo);
+
+       return 0;
+}
+
+static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
+                                u8 *data, size_t len)
+{
+       return verity_hash_update(v, verity_io_hash_desc(v, io), data, len);
+}
+
+static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
+                         u8 *data, size_t len)
+{
+       memset(data, 0, len);
+       return 0;
+}
+
+/*
+ * Verify one "dm_verity_io" structure.
+ */
+static int verity_verify_io(struct dm_verity_io *io)
+{
+       bool is_zero;
+       struct dm_verity *v = io->v;
+       struct bvec_iter start;
+       unsigned b;
+
+       for (b = 0; b < io->n_blocks; b++) {
+               int r;
+               struct shash_desc *desc = verity_io_hash_desc(v, io);
+
+               r = verity_hash_for_block(v, io, io->block + b,
+                                         verity_io_want_digest(v, io),
+                                         &is_zero);
+               if (unlikely(r < 0))
+                       return r;
+
+               if (is_zero) {
+                       /*
+                        * If we expect a zero block, don't validate, just
+                        * return zeros.
+                        */
+                       r = verity_for_bv_block(v, io, &io->iter,
+                                               verity_bv_zero);
+                       if (unlikely(r < 0))
+                               return r;
+
+                       continue;
+               }
+
+               r = verity_hash_init(v, desc);
+               if (unlikely(r < 0))
+                       return r;
+
+               start = io->iter;
+               r = verity_for_bv_block(v, io, &io->iter, verity_bv_hash_update);
+               if (unlikely(r < 0))
+                       return r;
+
+               r = verity_hash_final(v, desc, verity_io_real_digest(v, io));
+               if (unlikely(r < 0))
+                       return r;
+
+               if (likely(memcmp(verity_io_real_digest(v, io),
+                                 verity_io_want_digest(v, io), v->digest_size) == 0))
+                       continue;
+               else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
+                                          io->block + b, NULL, &start) == 0)
+                       continue;
+               else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
+                                          io->block + b))
+                       return -EIO;
+       }
+
+       return 0;
+}
+
+/*
+ * End one "io" structure with a given error.
+ */
+static void verity_finish_io(struct dm_verity_io *io, int error)
+{
+       struct dm_verity *v = io->v;
+       struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
+
+       bio->bi_end_io = io->orig_bi_end_io;
+       bio->bi_error = error;
+
+       verity_fec_finish_io(io);
+
+       bio_endio(bio);
+}
+
+static void verity_work(struct work_struct *w)
+{
+       struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
+
+       verity_finish_io(io, verity_verify_io(io));
+}
+
+static void verity_end_io(struct bio *bio)
+{
+       struct dm_verity_io *io = bio->bi_private;
+
+       if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
+               verity_finish_io(io, bio->bi_error);
+               return;
+       }
+
+       INIT_WORK(&io->work, verity_work);
+       queue_work(io->v->verify_wq, &io->work);
+}
+
+/*
+ * Prefetch buffers for the specified io.
+ * The root buffer is not prefetched, it is assumed that it will be cached
+ * all the time.
+ */
+static void verity_prefetch_io(struct work_struct *work)
+{
+       struct dm_verity_prefetch_work *pw =
+               container_of(work, struct dm_verity_prefetch_work, work);
+       struct dm_verity *v = pw->v;
+       int i;
+
+       for (i = v->levels - 2; i >= 0; i--) {
+               sector_t hash_block_start;
+               sector_t hash_block_end;
+               verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
+               verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
+               if (!i) {
+                       unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
+
+                       cluster >>= v->data_dev_block_bits;
+                       if (unlikely(!cluster))
+                               goto no_prefetch_cluster;
+
+                       if (unlikely(cluster & (cluster - 1)))
+                               cluster = 1 << __fls(cluster);
+
+                       hash_block_start &= ~(sector_t)(cluster - 1);
+                       hash_block_end |= cluster - 1;
+                       if (unlikely(hash_block_end >= v->hash_blocks))
+                               hash_block_end = v->hash_blocks - 1;
+               }
+no_prefetch_cluster:
+               dm_bufio_prefetch(v->bufio, hash_block_start,
+                                 hash_block_end - hash_block_start + 1);
+       }
+
+       kfree(pw);
+}
+
+static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
+{
+       struct dm_verity_prefetch_work *pw;
+
+       pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
+               GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+       if (!pw)
+               return;
+
+       INIT_WORK(&pw->work, verity_prefetch_io);
+       pw->v = v;
+       pw->block = io->block;
+       pw->n_blocks = io->n_blocks;
+       queue_work(v->verify_wq, &pw->work);
+}
+
+/*
+ * Bio map function. It allocates dm_verity_io structure and bio vector and
+ * fills them. Then it issues prefetches and the I/O.
+ */
+static int verity_map(struct dm_target *ti, struct bio *bio)
+{
+       struct dm_verity *v = ti->private;
+       struct dm_verity_io *io;
+
+       bio->bi_bdev = v->data_dev->bdev;
+       bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector);
+
+       if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
+           ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
+               DMERR_LIMIT("unaligned io");
+               return -EIO;
+       }
+
+       if (bio_end_sector(bio) >>
+           (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
+               DMERR_LIMIT("io out of range");
+               return -EIO;
+       }
+
+       if (bio_data_dir(bio) == WRITE)
+               return -EIO;
+
+       io = dm_per_bio_data(bio, ti->per_bio_data_size);
+       io->v = v;
+       io->orig_bi_end_io = bio->bi_end_io;
+       io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
+       io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
+
+       bio->bi_end_io = verity_end_io;
+       bio->bi_private = io;
+       io->iter = bio->bi_iter;
+
+       verity_fec_init_io(io);
+
+       verity_submit_prefetch(v, io);
+
+       generic_make_request(bio);
+
+       return DM_MAPIO_SUBMITTED;
+}
+
+/*
+ * Status: V (valid) or C (corruption found)
+ */
+static void verity_status(struct dm_target *ti, status_type_t type,
+                         unsigned status_flags, char *result, unsigned maxlen)
+{
+       struct dm_verity *v = ti->private;
+       unsigned args = 0;
+       unsigned sz = 0;
+       unsigned x;
+
+       switch (type) {
+       case STATUSTYPE_INFO:
+               DMEMIT("%c", v->hash_failed ? 'C' : 'V');
+               break;
+       case STATUSTYPE_TABLE:
+               DMEMIT("%u %s %s %u %u %llu %llu %s ",
+                       v->version,
+                       v->data_dev->name,
+                       v->hash_dev->name,
+                       1 << v->data_dev_block_bits,
+                       1 << v->hash_dev_block_bits,
+                       (unsigned long long)v->data_blocks,
+                       (unsigned long long)v->hash_start,
+                       v->alg_name
+                       );
+               for (x = 0; x < v->digest_size; x++)
+                       DMEMIT("%02x", v->root_digest[x]);
+               DMEMIT(" ");
+               if (!v->salt_size)
+                       DMEMIT("-");
+               else
+                       for (x = 0; x < v->salt_size; x++)
+                               DMEMIT("%02x", v->salt[x]);
+               if (v->mode != DM_VERITY_MODE_EIO)
+                       args++;
+               if (verity_fec_is_enabled(v))
+                       args += DM_VERITY_OPTS_FEC;
+               if (v->zero_digest)
+                       args++;
+               if (!args)
+                       return;
+               DMEMIT(" %u", args);
+               if (v->mode != DM_VERITY_MODE_EIO) {
+                       DMEMIT(" ");
+                       switch (v->mode) {
+                       case DM_VERITY_MODE_LOGGING:
+                               DMEMIT(DM_VERITY_OPT_LOGGING);
+                               break;
+                       case DM_VERITY_MODE_RESTART:
+                               DMEMIT(DM_VERITY_OPT_RESTART);
+                               break;
+                       default:
+                               BUG();
+                       }
+               }
+               if (v->zero_digest)
+                       DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES);
+               sz = verity_fec_status_table(v, sz, result, maxlen);
+               break;
+       }
+}
+
+static int verity_prepare_ioctl(struct dm_target *ti,
+               struct block_device **bdev, fmode_t *mode)
+{
+       struct dm_verity *v = ti->private;
+
+       *bdev = v->data_dev->bdev;
+
+       if (v->data_start ||
+           ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
+               return 1;
+       return 0;
+}
+
+static int verity_iterate_devices(struct dm_target *ti,
+                                 iterate_devices_callout_fn fn, void *data)
+{
+       struct dm_verity *v = ti->private;
+
+       return fn(ti, v->data_dev, v->data_start, ti->len, data);
+}
+
+static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+       struct dm_verity *v = ti->private;
+
+       if (limits->logical_block_size < 1 << v->data_dev_block_bits)
+               limits->logical_block_size = 1 << v->data_dev_block_bits;
+
+       if (limits->physical_block_size < 1 << v->data_dev_block_bits)
+               limits->physical_block_size = 1 << v->data_dev_block_bits;
+
+       blk_limits_io_min(limits, limits->logical_block_size);
+}
+
+static void verity_dtr(struct dm_target *ti)
+{
+       struct dm_verity *v = ti->private;
+
+       if (v->verify_wq)
+               destroy_workqueue(v->verify_wq);
+
+       if (v->bufio)
+               dm_bufio_client_destroy(v->bufio);
+
+       kfree(v->salt);
+       kfree(v->root_digest);
+       kfree(v->zero_digest);
+
+       if (v->tfm)
+               crypto_free_shash(v->tfm);
+
+       kfree(v->alg_name);
+
+       if (v->hash_dev)
+               dm_put_device(ti, v->hash_dev);
+
+       if (v->data_dev)
+               dm_put_device(ti, v->data_dev);
+
+       verity_fec_dtr(v);
+
+       kfree(v);
+}
+
+static int verity_alloc_zero_digest(struct dm_verity *v)
+{
+       int r = -ENOMEM;
+       struct shash_desc *desc;
+       u8 *zero_data;
+
+       v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
+
+       if (!v->zero_digest)
+               return r;
+
+       desc = kmalloc(v->shash_descsize, GFP_KERNEL);
+
+       if (!desc)
+               return r; /* verity_dtr will free zero_digest */
+
+       zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
+
+       if (!zero_data)
+               goto out;
+
+       r = verity_hash(v, desc, zero_data, 1 << v->data_dev_block_bits,
+                       v->zero_digest);
+
+out:
+       kfree(desc);
+       kfree(zero_data);
+
+       return r;
+}
+
+static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
+{
+       int r;
+       unsigned argc;
+       struct dm_target *ti = v->ti;
+       const char *arg_name;
+
+       static struct dm_arg _args[] = {
+               {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"},
+       };
+
+       r = dm_read_arg_group(_args, as, &argc, &ti->error);
+       if (r)
+               return -EINVAL;
+
+       if (!argc)
+               return 0;
+
+       do {
+               arg_name = dm_shift_arg(as);
+               argc--;
+
+               if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) {
+                       v->mode = DM_VERITY_MODE_LOGGING;
+                       continue;
+
+               } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) {
+                       v->mode = DM_VERITY_MODE_RESTART;
+                       continue;
+
+               } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) {
+                       r = verity_alloc_zero_digest(v);
+                       if (r) {
+                               ti->error = "Cannot allocate zero digest";
+                               return r;
+                       }
+                       continue;
+
+               } else if (verity_is_fec_opt_arg(arg_name)) {
+                       r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
+                       if (r)
+                               return r;
+                       continue;
+               }
+
+               ti->error = "Unrecognized verity feature request";
+               return -EINVAL;
+       } while (argc && !r);
+
+       return r;
+}
+
+/*
+ * Target parameters:
+ *     <version>       The current format is version 1.
+ *                     Vsn 0 is compatible with original Chromium OS releases.
+ *     <data device>
+ *     <hash device>
+ *     <data block size>
+ *     <hash block size>
+ *     <the number of data blocks>
+ *     <hash start block>
+ *     <algorithm>
+ *     <digest>
+ *     <salt>          Hex string or "-" if no salt.
+ */
+static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+       struct dm_verity *v;
+       struct dm_arg_set as;
+       unsigned int num;
+       unsigned long long num_ll;
+       int r;
+       int i;
+       sector_t hash_position;
+       char dummy;
+
+       v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
+       if (!v) {
+               ti->error = "Cannot allocate verity structure";
+               return -ENOMEM;
+       }
+       ti->private = v;
+       v->ti = ti;
+
+       r = verity_fec_ctr_alloc(v);
+       if (r)
+               goto bad;
+
+       if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
+               ti->error = "Device must be readonly";
+               r = -EINVAL;
+               goto bad;
+       }
+
+       if (argc < 10) {
+               ti->error = "Not enough arguments";
+               r = -EINVAL;
+               goto bad;
+       }
+
+       if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 ||
+           num > 1) {
+               ti->error = "Invalid version";
+               r = -EINVAL;
+               goto bad;
+       }
+       v->version = num;
+
+       r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
+       if (r) {
+               ti->error = "Data device lookup failed";
+               goto bad;
+       }
+
+       r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
+       if (r) {
+               ti->error = "Data device lookup failed";
+               goto bad;
+       }
+
+       if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
+           !num || (num & (num - 1)) ||
+           num < bdev_logical_block_size(v->data_dev->bdev) ||
+           num > PAGE_SIZE) {
+               ti->error = "Invalid data device block size";
+               r = -EINVAL;
+               goto bad;
+       }
+       v->data_dev_block_bits = __ffs(num);
+
+       if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
+           !num || (num & (num - 1)) ||
+           num < bdev_logical_block_size(v->hash_dev->bdev) ||
+           num > INT_MAX) {
+               ti->error = "Invalid hash device block size";
+               r = -EINVAL;
+               goto bad;
+       }
+       v->hash_dev_block_bits = __ffs(num);
+
+       if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
+           (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
+           >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+               ti->error = "Invalid data blocks";
+               r = -EINVAL;
+               goto bad;
+       }
+       v->data_blocks = num_ll;
+
+       if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
+               ti->error = "Data device is too small";
+               r = -EINVAL;
+               goto bad;
+       }
+
+       if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
+           (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
+           >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+               ti->error = "Invalid hash start";
+               r = -EINVAL;
+               goto bad;
+       }
+       v->hash_start = num_ll;
+
+       v->alg_name = kstrdup(argv[7], GFP_KERNEL);
+       if (!v->alg_name) {
+               ti->error = "Cannot allocate algorithm name";
+               r = -ENOMEM;
+               goto bad;
+       }
+
+       v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
+       if (IS_ERR(v->tfm)) {
+               ti->error = "Cannot initialize hash function";
+               r = PTR_ERR(v->tfm);
+               v->tfm = NULL;
+               goto bad;
+       }
+       v->digest_size = crypto_shash_digestsize(v->tfm);
+       if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
+               ti->error = "Digest size too big";
+               r = -EINVAL;
+               goto bad;
+       }
+       v->shash_descsize =
+               sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
+
+       v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
+       if (!v->root_digest) {
+               ti->error = "Cannot allocate root digest";
+               r = -ENOMEM;
+               goto bad;
+       }
+       if (strlen(argv[8]) != v->digest_size * 2 ||
+           hex2bin(v->root_digest, argv[8], v->digest_size)) {
+               ti->error = "Invalid root digest";
+               r = -EINVAL;
+               goto bad;
+       }
+
+       if (strcmp(argv[9], "-")) {
+               v->salt_size = strlen(argv[9]) / 2;
+               v->salt = kmalloc(v->salt_size, GFP_KERNEL);
+               if (!v->salt) {
+                       ti->error = "Cannot allocate salt";
+                       r = -ENOMEM;
+                       goto bad;
+               }
+               if (strlen(argv[9]) != v->salt_size * 2 ||
+                   hex2bin(v->salt, argv[9], v->salt_size)) {
+                       ti->error = "Invalid salt";
+                       r = -EINVAL;
+                       goto bad;
+               }
+       }
+
+       argv += 10;
+       argc -= 10;
+
+       /* Optional parameters */
+       if (argc) {
+               as.argc = argc;
+               as.argv = argv;
+
+               r = verity_parse_opt_args(&as, v);
+               if (r < 0)
+                       goto bad;
+       }
+
+       v->hash_per_block_bits =
+               __fls((1 << v->hash_dev_block_bits) / v->digest_size);
+
+       v->levels = 0;
+       if (v->data_blocks)
+               while (v->hash_per_block_bits * v->levels < 64 &&
+                      (unsigned long long)(v->data_blocks - 1) >>
+                      (v->hash_per_block_bits * v->levels))
+                       v->levels++;
+
+       if (v->levels > DM_VERITY_MAX_LEVELS) {
+               ti->error = "Too many tree levels";
+               r = -E2BIG;
+               goto bad;
+       }
+
+       hash_position = v->hash_start;
+       for (i = v->levels - 1; i >= 0; i--) {
+               sector_t s;
+               v->hash_level_block[i] = hash_position;
+               s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1)
+                                       >> ((i + 1) * v->hash_per_block_bits);
+               if (hash_position + s < hash_position) {
+                       ti->error = "Hash device offset overflow";
+                       r = -E2BIG;
+                       goto bad;
+               }
+               hash_position += s;
+       }
+       v->hash_blocks = hash_position;
+
+       v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
+               1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
+               dm_bufio_alloc_callback, NULL);
+       if (IS_ERR(v->bufio)) {
+               ti->error = "Cannot initialize dm-bufio";
+               r = PTR_ERR(v->bufio);
+               v->bufio = NULL;
+               goto bad;
+       }
+
+       if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
+               ti->error = "Hash device is too small";
+               r = -E2BIG;
+               goto bad;
+       }
+
+       /* WQ_UNBOUND greatly improves performance when running on ramdisk */
+       v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
+       if (!v->verify_wq) {
+               ti->error = "Cannot allocate workqueue";
+               r = -ENOMEM;
+               goto bad;
+       }
+
+       ti->per_bio_data_size = sizeof(struct dm_verity_io) +
+                               v->shash_descsize + v->digest_size * 2;
+
+       r = verity_fec_ctr(v);
+       if (r)
+               goto bad;
+
+       ti->per_bio_data_size = roundup(ti->per_bio_data_size,
+                                       __alignof__(struct dm_verity_io));
+
+       return 0;
+
+bad:
+       verity_dtr(ti);
+
+       return r;
+}
+
+static struct target_type verity_target = {
+       .name           = "verity",
+       .version        = {1, 3, 0},
+       .module         = THIS_MODULE,
+       .ctr            = verity_ctr,
+       .dtr            = verity_dtr,
+       .map            = verity_map,
+       .status         = verity_status,
+       .prepare_ioctl  = verity_prepare_ioctl,
+       .iterate_devices = verity_iterate_devices,
+       .io_hints       = verity_io_hints,
+};
+
+static int __init dm_verity_init(void)
+{
+       int r;
+
+       r = dm_register_target(&verity_target);
+       if (r < 0)
+               DMERR("register failed %d", r);
+
+       return r;
+}
+
+static void __exit dm_verity_exit(void)
+{
+       dm_unregister_target(&verity_target);
+}
+
+module_init(dm_verity_init);
+module_exit(dm_verity_exit);
+
+MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
+MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
+MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
+MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
deleted file mode 100644 (file)
index ccf4188..0000000
+++ /dev/null
@@ -1,995 +0,0 @@
-/*
- * Copyright (C) 2012 Red Hat, Inc.
- *
- * Author: Mikulas Patocka <mpatocka@redhat.com>
- *
- * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
- *
- * This file is released under the GPLv2.
- *
- * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
- * default prefetch value. Data are read in "prefetch_cluster" chunks from the
- * hash device. Setting this greatly improves performance when data and hash
- * are on the same disk on different partitions on devices with poor random
- * access behavior.
- */
-
-#include "dm-bufio.h"
-
-#include <linux/module.h>
-#include <linux/device-mapper.h>
-#include <linux/reboot.h>
-#include <crypto/hash.h>
-
-#define DM_MSG_PREFIX                  "verity"
-
-#define DM_VERITY_ENV_LENGTH           42
-#define DM_VERITY_ENV_VAR_NAME         "DM_VERITY_ERR_BLOCK_NR"
-
-#define DM_VERITY_DEFAULT_PREFETCH_SIZE        262144
-
-#define DM_VERITY_MAX_LEVELS           63
-#define DM_VERITY_MAX_CORRUPTED_ERRS   100
-
-#define DM_VERITY_OPT_LOGGING          "ignore_corruption"
-#define DM_VERITY_OPT_RESTART          "restart_on_corruption"
-
-static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
-
-module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
-
-enum verity_mode {
-       DM_VERITY_MODE_EIO,
-       DM_VERITY_MODE_LOGGING,
-       DM_VERITY_MODE_RESTART
-};
-
-enum verity_block_type {
-       DM_VERITY_BLOCK_TYPE_DATA,
-       DM_VERITY_BLOCK_TYPE_METADATA
-};
-
-struct dm_verity {
-       struct dm_dev *data_dev;
-       struct dm_dev *hash_dev;
-       struct dm_target *ti;
-       struct dm_bufio_client *bufio;
-       char *alg_name;
-       struct crypto_shash *tfm;
-       u8 *root_digest;        /* digest of the root block */
-       u8 *salt;               /* salt: its size is salt_size */
-       unsigned salt_size;
-       sector_t data_start;    /* data offset in 512-byte sectors */
-       sector_t hash_start;    /* hash start in blocks */
-       sector_t data_blocks;   /* the number of data blocks */
-       sector_t hash_blocks;   /* the number of hash blocks */
-       unsigned char data_dev_block_bits;      /* log2(data blocksize) */
-       unsigned char hash_dev_block_bits;      /* log2(hash blocksize) */
-       unsigned char hash_per_block_bits;      /* log2(hashes in hash block) */
-       unsigned char levels;   /* the number of tree levels */
-       unsigned char version;
-       unsigned digest_size;   /* digest size for the current hash algorithm */
-       unsigned shash_descsize;/* the size of temporary space for crypto */
-       int hash_failed;        /* set to 1 if hash of any block failed */
-       enum verity_mode mode;  /* mode for handling verification errors */
-       unsigned corrupted_errs;/* Number of errors for corrupted blocks */
-
-       struct workqueue_struct *verify_wq;
-
-       /* starting blocks for each tree level. 0 is the lowest level. */
-       sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
-};
-
-struct dm_verity_io {
-       struct dm_verity *v;
-
-       /* original values of bio->bi_end_io and bio->bi_private */
-       bio_end_io_t *orig_bi_end_io;
-       void *orig_bi_private;
-
-       sector_t block;
-       unsigned n_blocks;
-
-       struct bvec_iter iter;
-
-       struct work_struct work;
-
-       /*
-        * Three variably-size fields follow this struct:
-        *
-        * u8 hash_desc[v->shash_descsize];
-        * u8 real_digest[v->digest_size];
-        * u8 want_digest[v->digest_size];
-        *
-        * To access them use: io_hash_desc(), io_real_digest() and io_want_digest().
-        */
-};
-
-struct dm_verity_prefetch_work {
-       struct work_struct work;
-       struct dm_verity *v;
-       sector_t block;
-       unsigned n_blocks;
-};
-
-static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
-{
-       return (struct shash_desc *)(io + 1);
-}
-
-static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
-       return (u8 *)(io + 1) + v->shash_descsize;
-}
-
-static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
-       return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
-}
-
-/*
- * Auxiliary structure appended to each dm-bufio buffer. If the value
- * hash_verified is nonzero, hash of the block has been verified.
- *
- * The variable hash_verified is set to 0 when allocating the buffer, then
- * it can be changed to 1 and it is never reset to 0 again.
- *
- * There is no lock around this value, a race condition can at worst cause
- * that multiple processes verify the hash of the same buffer simultaneously
- * and write 1 to hash_verified simultaneously.
- * This condition is harmless, so we don't need locking.
- */
-struct buffer_aux {
-       int hash_verified;
-};
-
-/*
- * Initialize struct buffer_aux for a freshly created buffer.
- */
-static void dm_bufio_alloc_callback(struct dm_buffer *buf)
-{
-       struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
-
-       aux->hash_verified = 0;
-}
-
-/*
- * Translate input sector number to the sector number on the target device.
- */
-static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
-{
-       return v->data_start + dm_target_offset(v->ti, bi_sector);
-}
-
-/*
- * Return hash position of a specified block at a specified tree level
- * (0 is the lowest level).
- * The lowest "hash_per_block_bits"-bits of the result denote hash position
- * inside a hash block. The remaining bits denote location of the hash block.
- */
-static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
-                                        int level)
-{
-       return block >> (level * v->hash_per_block_bits);
-}
-
-static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
-                                sector_t *hash_block, unsigned *offset)
-{
-       sector_t position = verity_position_at_level(v, block, level);
-       unsigned idx;
-
-       *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
-
-       if (!offset)
-               return;
-
-       idx = position & ((1 << v->hash_per_block_bits) - 1);
-       if (!v->version)
-               *offset = idx * v->digest_size;
-       else
-               *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
-}
-
-/*
- * Handle verification errors.
- */
-static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
-                            unsigned long long block)
-{
-       char verity_env[DM_VERITY_ENV_LENGTH];
-       char *envp[] = { verity_env, NULL };
-       const char *type_str = "";
-       struct mapped_device *md = dm_table_get_md(v->ti->table);
-
-       /* Corruption should be visible in device status in all modes */
-       v->hash_failed = 1;
-
-       if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS)
-               goto out;
-
-       v->corrupted_errs++;
-
-       switch (type) {
-       case DM_VERITY_BLOCK_TYPE_DATA:
-               type_str = "data";
-               break;
-       case DM_VERITY_BLOCK_TYPE_METADATA:
-               type_str = "metadata";
-               break;
-       default:
-               BUG();
-       }
-
-       DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str,
-               block);
-
-       if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
-               DMERR("%s: reached maximum errors", v->data_dev->name);
-
-       snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu",
-               DM_VERITY_ENV_VAR_NAME, type, block);
-
-       kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp);
-
-out:
-       if (v->mode == DM_VERITY_MODE_LOGGING)
-               return 0;
-
-       if (v->mode == DM_VERITY_MODE_RESTART)
-               kernel_restart("dm-verity device corrupted");
-
-       return 1;
-}
-
-/*
- * Verify hash of a metadata block pertaining to the specified data block
- * ("block" argument) at a specified level ("level" argument).
- *
- * On successful return, io_want_digest(v, io) contains the hash value for
- * a lower tree level or for the data block (if we're at the lowest leve).
- *
- * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
- * If "skip_unverified" is false, unverified buffer is hashed and verified
- * against current value of io_want_digest(v, io).
- */
-static int verity_verify_level(struct dm_verity_io *io, sector_t block,
-                              int level, bool skip_unverified)
-{
-       struct dm_verity *v = io->v;
-       struct dm_buffer *buf;
-       struct buffer_aux *aux;
-       u8 *data;
-       int r;
-       sector_t hash_block;
-       unsigned offset;
-
-       verity_hash_at_level(v, block, level, &hash_block, &offset);
-
-       data = dm_bufio_read(v->bufio, hash_block, &buf);
-       if (IS_ERR(data))
-               return PTR_ERR(data);
-
-       aux = dm_bufio_get_aux_data(buf);
-
-       if (!aux->hash_verified) {
-               struct shash_desc *desc;
-               u8 *result;
-
-               if (skip_unverified) {
-                       r = 1;
-                       goto release_ret_r;
-               }
-
-               desc = io_hash_desc(v, io);
-               desc->tfm = v->tfm;
-               desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-               r = crypto_shash_init(desc);
-               if (r < 0) {
-                       DMERR("crypto_shash_init failed: %d", r);
-                       goto release_ret_r;
-               }
-
-               if (likely(v->version >= 1)) {
-                       r = crypto_shash_update(desc, v->salt, v->salt_size);
-                       if (r < 0) {
-                               DMERR("crypto_shash_update failed: %d", r);
-                               goto release_ret_r;
-                       }
-               }
-
-               r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
-               if (r < 0) {
-                       DMERR("crypto_shash_update failed: %d", r);
-                       goto release_ret_r;
-               }
-
-               if (!v->version) {
-                       r = crypto_shash_update(desc, v->salt, v->salt_size);
-                       if (r < 0) {
-                               DMERR("crypto_shash_update failed: %d", r);
-                               goto release_ret_r;
-                       }
-               }
-
-               result = io_real_digest(v, io);
-               r = crypto_shash_final(desc, result);
-               if (r < 0) {
-                       DMERR("crypto_shash_final failed: %d", r);
-                       goto release_ret_r;
-               }
-               if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
-                       if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA,
-                                             hash_block)) {
-                               r = -EIO;
-                               goto release_ret_r;
-                       }
-               } else
-                       aux->hash_verified = 1;
-       }
-
-       data += offset;
-
-       memcpy(io_want_digest(v, io), data, v->digest_size);
-
-       dm_bufio_release(buf);
-       return 0;
-
-release_ret_r:
-       dm_bufio_release(buf);
-
-       return r;
-}
-
-/*
- * Verify one "dm_verity_io" structure.
- */
-static int verity_verify_io(struct dm_verity_io *io)
-{
-       struct dm_verity *v = io->v;
-       struct bio *bio = dm_bio_from_per_bio_data(io,
-                                                  v->ti->per_bio_data_size);
-       unsigned b;
-       int i;
-
-       for (b = 0; b < io->n_blocks; b++) {
-               struct shash_desc *desc;
-               u8 *result;
-               int r;
-               unsigned todo;
-
-               if (likely(v->levels)) {
-                       /*
-                        * First, we try to get the requested hash for
-                        * the current block. If the hash block itself is
-                        * verified, zero is returned. If it isn't, this
-                        * function returns 0 and we fall back to whole
-                        * chain verification.
-                        */
-                       int r = verity_verify_level(io, io->block + b, 0, true);
-                       if (likely(!r))
-                               goto test_block_hash;
-                       if (r < 0)
-                               return r;
-               }
-
-               memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
-
-               for (i = v->levels - 1; i >= 0; i--) {
-                       int r = verity_verify_level(io, io->block + b, i, false);
-                       if (unlikely(r))
-                               return r;
-               }
-
-test_block_hash:
-               desc = io_hash_desc(v, io);
-               desc->tfm = v->tfm;
-               desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-               r = crypto_shash_init(desc);
-               if (r < 0) {
-                       DMERR("crypto_shash_init failed: %d", r);
-                       return r;
-               }
-
-               if (likely(v->version >= 1)) {
-                       r = crypto_shash_update(desc, v->salt, v->salt_size);
-                       if (r < 0) {
-                               DMERR("crypto_shash_update failed: %d", r);
-                               return r;
-                       }
-               }
-               todo = 1 << v->data_dev_block_bits;
-               do {
-                       u8 *page;
-                       unsigned len;
-                       struct bio_vec bv = bio_iter_iovec(bio, io->iter);
-
-                       page = kmap_atomic(bv.bv_page);
-                       len = bv.bv_len;
-                       if (likely(len >= todo))
-                               len = todo;
-                       r = crypto_shash_update(desc, page + bv.bv_offset, len);
-                       kunmap_atomic(page);
-
-                       if (r < 0) {
-                               DMERR("crypto_shash_update failed: %d", r);
-                               return r;
-                       }
-
-                       bio_advance_iter(bio, &io->iter, len);
-                       todo -= len;
-               } while (todo);
-
-               if (!v->version) {
-                       r = crypto_shash_update(desc, v->salt, v->salt_size);
-                       if (r < 0) {
-                               DMERR("crypto_shash_update failed: %d", r);
-                               return r;
-                       }
-               }
-
-               result = io_real_digest(v, io);
-               r = crypto_shash_final(desc, result);
-               if (r < 0) {
-                       DMERR("crypto_shash_final failed: %d", r);
-                       return r;
-               }
-               if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
-                       if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
-                                             io->block + b))
-                               return -EIO;
-               }
-       }
-
-       return 0;
-}
-
-/*
- * End one "io" structure with a given error.
- */
-static void verity_finish_io(struct dm_verity_io *io, int error)
-{
-       struct dm_verity *v = io->v;
-       struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
-
-       bio->bi_end_io = io->orig_bi_end_io;
-       bio->bi_private = io->orig_bi_private;
-       bio->bi_error = error;
-
-       bio_endio(bio);
-}
-
-static void verity_work(struct work_struct *w)
-{
-       struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
-
-       verity_finish_io(io, verity_verify_io(io));
-}
-
-static void verity_end_io(struct bio *bio)
-{
-       struct dm_verity_io *io = bio->bi_private;
-
-       if (bio->bi_error) {
-               verity_finish_io(io, bio->bi_error);
-               return;
-       }
-
-       INIT_WORK(&io->work, verity_work);
-       queue_work(io->v->verify_wq, &io->work);
-}
-
-/*
- * Prefetch buffers for the specified io.
- * The root buffer is not prefetched, it is assumed that it will be cached
- * all the time.
- */
-static void verity_prefetch_io(struct work_struct *work)
-{
-       struct dm_verity_prefetch_work *pw =
-               container_of(work, struct dm_verity_prefetch_work, work);
-       struct dm_verity *v = pw->v;
-       int i;
-
-       for (i = v->levels - 2; i >= 0; i--) {
-               sector_t hash_block_start;
-               sector_t hash_block_end;
-               verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
-               verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
-               if (!i) {
-                       unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
-
-                       cluster >>= v->data_dev_block_bits;
-                       if (unlikely(!cluster))
-                               goto no_prefetch_cluster;
-
-                       if (unlikely(cluster & (cluster - 1)))
-                               cluster = 1 << __fls(cluster);
-
-                       hash_block_start &= ~(sector_t)(cluster - 1);
-                       hash_block_end |= cluster - 1;
-                       if (unlikely(hash_block_end >= v->hash_blocks))
-                               hash_block_end = v->hash_blocks - 1;
-               }
-no_prefetch_cluster:
-               dm_bufio_prefetch(v->bufio, hash_block_start,
-                                 hash_block_end - hash_block_start + 1);
-       }
-
-       kfree(pw);
-}
-
-static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
-{
-       struct dm_verity_prefetch_work *pw;
-
-       pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
-               GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-
-       if (!pw)
-               return;
-
-       INIT_WORK(&pw->work, verity_prefetch_io);
-       pw->v = v;
-       pw->block = io->block;
-       pw->n_blocks = io->n_blocks;
-       queue_work(v->verify_wq, &pw->work);
-}
-
-/*
- * Bio map function. It allocates dm_verity_io structure and bio vector and
- * fills them. Then it issues prefetches and the I/O.
- */
-static int verity_map(struct dm_target *ti, struct bio *bio)
-{
-       struct dm_verity *v = ti->private;
-       struct dm_verity_io *io;
-
-       bio->bi_bdev = v->data_dev->bdev;
-       bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector);
-
-       if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
-           ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
-               DMERR_LIMIT("unaligned io");
-               return -EIO;
-       }
-
-       if (bio_end_sector(bio) >>
-           (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
-               DMERR_LIMIT("io out of range");
-               return -EIO;
-       }
-
-       if (bio_data_dir(bio) == WRITE)
-               return -EIO;
-
-       io = dm_per_bio_data(bio, ti->per_bio_data_size);
-       io->v = v;
-       io->orig_bi_end_io = bio->bi_end_io;
-       io->orig_bi_private = bio->bi_private;
-       io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
-       io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
-
-       bio->bi_end_io = verity_end_io;
-       bio->bi_private = io;
-       io->iter = bio->bi_iter;
-
-       verity_submit_prefetch(v, io);
-
-       generic_make_request(bio);
-
-       return DM_MAPIO_SUBMITTED;
-}
-
-/*
- * Status: V (valid) or C (corruption found)
- */
-static void verity_status(struct dm_target *ti, status_type_t type,
-                         unsigned status_flags, char *result, unsigned maxlen)
-{
-       struct dm_verity *v = ti->private;
-       unsigned sz = 0;
-       unsigned x;
-
-       switch (type) {
-       case STATUSTYPE_INFO:
-               DMEMIT("%c", v->hash_failed ? 'C' : 'V');
-               break;
-       case STATUSTYPE_TABLE:
-               DMEMIT("%u %s %s %u %u %llu %llu %s ",
-                       v->version,
-                       v->data_dev->name,
-                       v->hash_dev->name,
-                       1 << v->data_dev_block_bits,
-                       1 << v->hash_dev_block_bits,
-                       (unsigned long long)v->data_blocks,
-                       (unsigned long long)v->hash_start,
-                       v->alg_name
-                       );
-               for (x = 0; x < v->digest_size; x++)
-                       DMEMIT("%02x", v->root_digest[x]);
-               DMEMIT(" ");
-               if (!v->salt_size)
-                       DMEMIT("-");
-               else
-                       for (x = 0; x < v->salt_size; x++)
-                               DMEMIT("%02x", v->salt[x]);
-               if (v->mode != DM_VERITY_MODE_EIO) {
-                       DMEMIT(" 1 ");
-                       switch (v->mode) {
-                       case DM_VERITY_MODE_LOGGING:
-                               DMEMIT(DM_VERITY_OPT_LOGGING);
-                               break;
-                       case DM_VERITY_MODE_RESTART:
-                               DMEMIT(DM_VERITY_OPT_RESTART);
-                               break;
-                       default:
-                               BUG();
-                       }
-               }
-               break;
-       }
-}
-
-static int verity_prepare_ioctl(struct dm_target *ti,
-               struct block_device **bdev, fmode_t *mode)
-{
-       struct dm_verity *v = ti->private;
-
-       *bdev = v->data_dev->bdev;
-
-       if (v->data_start ||
-           ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
-               return 1;
-       return 0;
-}
-
-static int verity_iterate_devices(struct dm_target *ti,
-                                 iterate_devices_callout_fn fn, void *data)
-{
-       struct dm_verity *v = ti->private;
-
-       return fn(ti, v->data_dev, v->data_start, ti->len, data);
-}
-
-static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
-{
-       struct dm_verity *v = ti->private;
-
-       if (limits->logical_block_size < 1 << v->data_dev_block_bits)
-               limits->logical_block_size = 1 << v->data_dev_block_bits;
-
-       if (limits->physical_block_size < 1 << v->data_dev_block_bits)
-               limits->physical_block_size = 1 << v->data_dev_block_bits;
-
-       blk_limits_io_min(limits, limits->logical_block_size);
-}
-
-static void verity_dtr(struct dm_target *ti)
-{
-       struct dm_verity *v = ti->private;
-
-       if (v->verify_wq)
-               destroy_workqueue(v->verify_wq);
-
-       if (v->bufio)
-               dm_bufio_client_destroy(v->bufio);
-
-       kfree(v->salt);
-       kfree(v->root_digest);
-
-       if (v->tfm)
-               crypto_free_shash(v->tfm);
-
-       kfree(v->alg_name);
-
-       if (v->hash_dev)
-               dm_put_device(ti, v->hash_dev);
-
-       if (v->data_dev)
-               dm_put_device(ti, v->data_dev);
-
-       kfree(v);
-}
-
-/*
- * Target parameters:
- *     <version>       The current format is version 1.
- *                     Vsn 0 is compatible with original Chromium OS releases.
- *     <data device>
- *     <hash device>
- *     <data block size>
- *     <hash block size>
- *     <the number of data blocks>
- *     <hash start block>
- *     <algorithm>
- *     <digest>
- *     <salt>          Hex string or "-" if no salt.
- */
-static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
-{
-       struct dm_verity *v;
-       struct dm_arg_set as;
-       const char *opt_string;
-       unsigned int num, opt_params;
-       unsigned long long num_ll;
-       int r;
-       int i;
-       sector_t hash_position;
-       char dummy;
-
-       static struct dm_arg _args[] = {
-               {0, 1, "Invalid number of feature args"},
-       };
-
-       v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
-       if (!v) {
-               ti->error = "Cannot allocate verity structure";
-               return -ENOMEM;
-       }
-       ti->private = v;
-       v->ti = ti;
-
-       if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
-               ti->error = "Device must be readonly";
-               r = -EINVAL;
-               goto bad;
-       }
-
-       if (argc < 10) {
-               ti->error = "Not enough arguments";
-               r = -EINVAL;
-               goto bad;
-       }
-
-       if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 ||
-           num > 1) {
-               ti->error = "Invalid version";
-               r = -EINVAL;
-               goto bad;
-       }
-       v->version = num;
-
-       r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
-       if (r) {
-               ti->error = "Data device lookup failed";
-               goto bad;
-       }
-
-       r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
-       if (r) {
-               ti->error = "Data device lookup failed";
-               goto bad;
-       }
-
-       if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
-           !num || (num & (num - 1)) ||
-           num < bdev_logical_block_size(v->data_dev->bdev) ||
-           num > PAGE_SIZE) {
-               ti->error = "Invalid data device block size";
-               r = -EINVAL;
-               goto bad;
-       }
-       v->data_dev_block_bits = __ffs(num);
-
-       if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
-           !num || (num & (num - 1)) ||
-           num < bdev_logical_block_size(v->hash_dev->bdev) ||
-           num > INT_MAX) {
-               ti->error = "Invalid hash device block size";
-               r = -EINVAL;
-               goto bad;
-       }
-       v->hash_dev_block_bits = __ffs(num);
-
-       if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
-           (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
-           >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
-               ti->error = "Invalid data blocks";
-               r = -EINVAL;
-               goto bad;
-       }
-       v->data_blocks = num_ll;
-
-       if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
-               ti->error = "Data device is too small";
-               r = -EINVAL;
-               goto bad;
-       }
-
-       if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
-           (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
-           >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
-               ti->error = "Invalid hash start";
-               r = -EINVAL;
-               goto bad;
-       }
-       v->hash_start = num_ll;
-
-       v->alg_name = kstrdup(argv[7], GFP_KERNEL);
-       if (!v->alg_name) {
-               ti->error = "Cannot allocate algorithm name";
-               r = -ENOMEM;
-               goto bad;
-       }
-
-       v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
-       if (IS_ERR(v->tfm)) {
-               ti->error = "Cannot initialize hash function";
-               r = PTR_ERR(v->tfm);
-               v->tfm = NULL;
-               goto bad;
-       }
-       v->digest_size = crypto_shash_digestsize(v->tfm);
-       if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
-               ti->error = "Digest size too big";
-               r = -EINVAL;
-               goto bad;
-       }
-       v->shash_descsize =
-               sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
-
-       v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
-       if (!v->root_digest) {
-               ti->error = "Cannot allocate root digest";
-               r = -ENOMEM;
-               goto bad;
-       }
-       if (strlen(argv[8]) != v->digest_size * 2 ||
-           hex2bin(v->root_digest, argv[8], v->digest_size)) {
-               ti->error = "Invalid root digest";
-               r = -EINVAL;
-               goto bad;
-       }
-
-       if (strcmp(argv[9], "-")) {
-               v->salt_size = strlen(argv[9]) / 2;
-               v->salt = kmalloc(v->salt_size, GFP_KERNEL);
-               if (!v->salt) {
-                       ti->error = "Cannot allocate salt";
-                       r = -ENOMEM;
-                       goto bad;
-               }
-               if (strlen(argv[9]) != v->salt_size * 2 ||
-                   hex2bin(v->salt, argv[9], v->salt_size)) {
-                       ti->error = "Invalid salt";
-                       r = -EINVAL;
-                       goto bad;
-               }
-       }
-
-       argv += 10;
-       argc -= 10;
-
-       /* Optional parameters */
-       if (argc) {
-               as.argc = argc;
-               as.argv = argv;
-
-               r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
-               if (r)
-                       goto bad;
-
-               while (opt_params) {
-                       opt_params--;
-                       opt_string = dm_shift_arg(&as);
-                       if (!opt_string) {
-                               ti->error = "Not enough feature arguments";
-                               r = -EINVAL;
-                               goto bad;
-                       }
-
-                       if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING))
-                               v->mode = DM_VERITY_MODE_LOGGING;
-                       else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART))
-                               v->mode = DM_VERITY_MODE_RESTART;
-                       else {
-                               ti->error = "Invalid feature arguments";
-                               r = -EINVAL;
-                               goto bad;
-                       }
-               }
-       }
-
-       v->hash_per_block_bits =
-               __fls((1 << v->hash_dev_block_bits) / v->digest_size);
-
-       v->levels = 0;
-       if (v->data_blocks)
-               while (v->hash_per_block_bits * v->levels < 64 &&
-                      (unsigned long long)(v->data_blocks - 1) >>
-                      (v->hash_per_block_bits * v->levels))
-                       v->levels++;
-
-       if (v->levels > DM_VERITY_MAX_LEVELS) {
-               ti->error = "Too many tree levels";
-               r = -E2BIG;
-               goto bad;
-       }
-
-       hash_position = v->hash_start;
-       for (i = v->levels - 1; i >= 0; i--) {
-               sector_t s;
-               v->hash_level_block[i] = hash_position;
-               s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1)
-                                       >> ((i + 1) * v->hash_per_block_bits);
-               if (hash_position + s < hash_position) {
-                       ti->error = "Hash device offset overflow";
-                       r = -E2BIG;
-                       goto bad;
-               }
-               hash_position += s;
-       }
-       v->hash_blocks = hash_position;
-
-       v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
-               1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
-               dm_bufio_alloc_callback, NULL);
-       if (IS_ERR(v->bufio)) {
-               ti->error = "Cannot initialize dm-bufio";
-               r = PTR_ERR(v->bufio);
-               v->bufio = NULL;
-               goto bad;
-       }
-
-       if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
-               ti->error = "Hash device is too small";
-               r = -E2BIG;
-               goto bad;
-       }
-
-       ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io));
-
-       /* WQ_UNBOUND greatly improves performance when running on ramdisk */
-       v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
-       if (!v->verify_wq) {
-               ti->error = "Cannot allocate workqueue";
-               r = -ENOMEM;
-               goto bad;
-       }
-
-       return 0;
-
-bad:
-       verity_dtr(ti);
-
-       return r;
-}
-
-static struct target_type verity_target = {
-       .name           = "verity",
-       .version        = {1, 2, 0},
-       .module         = THIS_MODULE,
-       .ctr            = verity_ctr,
-       .dtr            = verity_dtr,
-       .map            = verity_map,
-       .status         = verity_status,
-       .prepare_ioctl  = verity_prepare_ioctl,
-       .iterate_devices = verity_iterate_devices,
-       .io_hints       = verity_io_hints,
-};
-
-static int __init dm_verity_init(void)
-{
-       int r;
-
-       r = dm_register_target(&verity_target);
-       if (r < 0)
-               DMERR("register failed %d", r);
-
-       return r;
-}
-
-static void __exit dm_verity_exit(void)
-{
-       dm_unregister_target(&verity_target);
-}
-
-module_init(dm_verity_init);
-module_exit(dm_verity_exit);
-
-MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
-MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
-MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
-MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
-MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
new file mode 100644 (file)
index 0000000..fb419f4
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
+ *
+ * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef DM_VERITY_H
+#define DM_VERITY_H
+
+#include "dm-bufio.h"
+#include <linux/device-mapper.h>
+#include <crypto/hash.h>
+
+#define DM_VERITY_MAX_LEVELS           63
+
+enum verity_mode {
+       DM_VERITY_MODE_EIO,
+       DM_VERITY_MODE_LOGGING,
+       DM_VERITY_MODE_RESTART
+};
+
+enum verity_block_type {
+       DM_VERITY_BLOCK_TYPE_DATA,
+       DM_VERITY_BLOCK_TYPE_METADATA
+};
+
+struct dm_verity_fec;
+
+struct dm_verity {
+       struct dm_dev *data_dev;
+       struct dm_dev *hash_dev;
+       struct dm_target *ti;
+       struct dm_bufio_client *bufio;
+       char *alg_name;
+       struct crypto_shash *tfm;
+       u8 *root_digest;        /* digest of the root block */
+       u8 *salt;               /* salt: its size is salt_size */
+       u8 *zero_digest;        /* digest for a zero block */
+       unsigned salt_size;
+       sector_t data_start;    /* data offset in 512-byte sectors */
+       sector_t hash_start;    /* hash start in blocks */
+       sector_t data_blocks;   /* the number of data blocks */
+       sector_t hash_blocks;   /* the number of hash blocks */
+       unsigned char data_dev_block_bits;      /* log2(data blocksize) */
+       unsigned char hash_dev_block_bits;      /* log2(hash blocksize) */
+       unsigned char hash_per_block_bits;      /* log2(hashes in hash block) */
+       unsigned char levels;   /* the number of tree levels */
+       unsigned char version;
+       unsigned digest_size;   /* digest size for the current hash algorithm */
+       unsigned shash_descsize;/* the size of temporary space for crypto */
+       int hash_failed;        /* set to 1 if hash of any block failed */
+       enum verity_mode mode;  /* mode for handling verification errors */
+       unsigned corrupted_errs;/* Number of errors for corrupted blocks */
+
+       struct workqueue_struct *verify_wq;
+
+       /* starting blocks for each tree level. 0 is the lowest level. */
+       sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
+
+       struct dm_verity_fec *fec;      /* forward error correction */
+};
+
+struct dm_verity_io {
+       struct dm_verity *v;
+
+       /* original value of bio->bi_end_io */
+       bio_end_io_t *orig_bi_end_io;
+
+       sector_t block;
+       unsigned n_blocks;
+
+       struct bvec_iter iter;
+
+       struct work_struct work;
+
+       /*
+        * Three variably-size fields follow this struct:
+        *
+        * u8 hash_desc[v->shash_descsize];
+        * u8 real_digest[v->digest_size];
+        * u8 want_digest[v->digest_size];
+        *
+        * To access them use: verity_io_hash_desc(), verity_io_real_digest()
+        * and verity_io_want_digest().
+        */
+};
+
+static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v,
+                                                    struct dm_verity_io *io)
+{
+       return (struct shash_desc *)(io + 1);
+}
+
+static inline u8 *verity_io_real_digest(struct dm_verity *v,
+                                       struct dm_verity_io *io)
+{
+       return (u8 *)(io + 1) + v->shash_descsize;
+}
+
+static inline u8 *verity_io_want_digest(struct dm_verity *v,
+                                       struct dm_verity_io *io)
+{
+       return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
+}
+
+static inline u8 *verity_io_digest_end(struct dm_verity *v,
+                                      struct dm_verity_io *io)
+{
+       return verity_io_want_digest(v, io) + v->digest_size;
+}
+
+extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+                              struct bvec_iter *iter,
+                              int (*process)(struct dm_verity *v,
+                                             struct dm_verity_io *io,
+                                             u8 *data, size_t len));
+
+extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+                      const u8 *data, size_t len, u8 *digest);
+
+extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+                                sector_t block, u8 *digest, bool *is_zero);
+
+#endif /* DM_VERITY_H */
index 78c74bb71ba42f11ff5035a5a593ed5732df9791..a53cbc928af1927c1c10d8f79abce0c64a939ed7 100644 (file)
@@ -7,12 +7,3 @@ config DM_PERSISTENT_DATA
         Library providing immutable on-disk data structure support for
         device-mapper targets such as the thin provisioning target.
 
-config DM_DEBUG_BLOCK_STACK_TRACING
-       bool "Keep stack trace of persistent data block lock holders"
-       depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA
-       select STACKTRACE
-       ---help---
-        Enable this for messages that may help debug problems with the
-        block manager locking used by thin provisioning and caching.
-
-        If unsure, say N.
index f2393ba838eb57cfa977228da492814d7dda42af..1e33dd51c21ff766418a30c8f44cff6846a92f55 100644 (file)
@@ -97,10 +97,6 @@ static void __del_holder(struct block_lock *lock, struct task_struct *task)
 static int __check_holder(struct block_lock *lock)
 {
        unsigned i;
-#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-       static struct stack_trace t;
-       static stack_entries entries;
-#endif
 
        for (i = 0; i < MAX_HOLDERS; i++) {
                if (lock->holders[i] == current) {
@@ -110,12 +106,7 @@ static int __check_holder(struct block_lock *lock)
                        print_stack_trace(lock->traces + i, 4);
 
                        DMERR("subsequent acquisition attempted here:");
-                       t.nr_entries = 0;
-                       t.max_entries = MAX_STACK;
-                       t.entries = entries;
-                       t.skip = 3;
-                       save_stack_trace(&t);
-                       print_stack_trace(&t, 4);
+                       dump_stack();
 #endif
                        return -EINVAL;
                }
index b1ced58eb5e1475b440a15de0e26811fb90b8159..ea3d3b656fd0a1f8f55a0bcb0285bf4a94f6c38d 100644 (file)
@@ -754,12 +754,19 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
        return 0;
 }
 
+static bool need_insert(struct btree_node *node, uint64_t *keys,
+                       unsigned level, unsigned index)
+{
+        return ((index >= le32_to_cpu(node->header.nr_entries)) ||
+               (le64_to_cpu(node->keys[index]) != keys[level]));
+}
+
 static int insert(struct dm_btree_info *info, dm_block_t root,
                  uint64_t *keys, void *value, dm_block_t *new_root,
                  int *inserted)
                  __dm_written_to_disk(value)
 {
-       int r, need_insert;
+       int r;
        unsigned level, index = -1, last_level = info->levels - 1;
        dm_block_t block = root;
        struct shadow_spine spine;
@@ -775,10 +782,8 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
                        goto bad;
 
                n = dm_block_data(shadow_current(&spine));
-               need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
-                              (le64_to_cpu(n->keys[index]) != keys[level]));
 
-               if (need_insert) {
+               if (need_insert(n, keys, level, index)) {
                        dm_block_t new_tree;
                        __le64 new_le;
 
@@ -805,10 +810,8 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
                goto bad;
 
        n = dm_block_data(shadow_current(&spine));
-       need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
-                      (le64_to_cpu(n->keys[index]) != keys[level]));
 
-       if (need_insert) {
+       if (need_insert(n, keys, level, index)) {
                if (inserted)
                        *inserted = 1;
 
index fca6dbcf9a4727f85d61f7d161094870f5db24d5..7e44005595c1e77d1982e7e89b1259ddea97bf9a 100644 (file)
@@ -152,12 +152,9 @@ static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
 
 static int brb_pop(struct bop_ring_buffer *brb)
 {
-       struct block_op *bop;
-
        if (brb_empty(brb))
                return -ENODATA;
 
-       bop = brb->bops + brb->begin;
        brb->begin = brb_next(brb, brb->begin);
 
        return 0;