]> git.proxmox.com Git - zfsonlinux.git/blame - zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch
bump version to 0.7.11-pve1~bpo1
[zfsonlinux.git] / zfs-patches / 0014-zpool-reopen-should-detect-expanded-devices.patch
CommitLineData
a010b409
SI
1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Sara Hartse <sara.hartse@gmail.com>
3Date: Thu, 31 May 2018 10:36:37 -0700
4Subject: [PATCH] zpool reopen should detect expanded devices
5
6Update bdev_capacity to have wholedisk vdevs query the
7size of the underlying block device (correcting for the size
8of the efi parition and partition alignment) and therefore detect
9expanded space.
10
11Correct vdev_get_stats_ex so that the expandsize is aligned
12to metaslab size and new space is only reported if it is large
13enough for a new metaslab.
14
15Reviewed by: Don Brady <don.brady@delphix.com>
16Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
17Reviewed by: George Wilson <george.wilson@delphix.com>
18Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
19Reviewed by: John Wren Kennedy <jwk404@gmail.com>
20Signed-off-by: sara hartse <sara.hartse@delphix.com>
21External-issue: LX-165
22Closes #7546
23Issue #7582
24
25Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
26---
27 include/sys/vdev_disk.h | 12 +++++
28 lib/libefi/rdwr_efi.c | 20 +++++++-
29 lib/libzfs/libzfs_pool.c | 14 +-----
30 module/zfs/vdev.c | 3 +-
31 module/zfs/vdev_disk.c | 46 +++++++++++++-----
32 .../cli_root/zpool_expand/zpool_expand_002_pos.ksh | 54 +++++++++++++++-------
33 6 files changed, 107 insertions(+), 42 deletions(-)
34
35diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h
36index 15570b10..b8a32b31 100644
37--- a/include/sys/vdev_disk.h
38+++ b/include/sys/vdev_disk.h
39@@ -23,11 +23,23 @@
40 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
41 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
42 * LLNL-CODE-403049.
43+ * Copyright (c) 2018 by Delphix. All rights reserved.
44 */
45
46 #ifndef _SYS_VDEV_DISK_H
47 #define _SYS_VDEV_DISK_H
48
49+/*
50+ * Don't start the slice at the default block of 34; many storage
51+ * devices will use a stripe width of 128k, other vendors prefer a 1m
52+ * alignment. It is best to play it safe and ensure a 1m alignment
53+ * given 512B blocks. When the block size is larger by a power of 2
54+ * we will still be 1m aligned. Some devices are sensitive to the
55+ * partition ending alignment as well.
56+ */
57+#define NEW_START_BLOCK 2048
58+#define PARTITION_END_ALIGNMENT 2048
59+
60 #ifdef _KERNEL
61 #include <sys/vdev.h>
62
63diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c
64index 7935047e..19cb17e5 100644
65--- a/lib/libefi/rdwr_efi.c
66+++ b/lib/libefi/rdwr_efi.c
67@@ -22,6 +22,7 @@
68 /*
69 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
70 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
71+ * Copyright (c) 2018 by Delphix. All rights reserved.
72 */
73
74 #include <stdio.h>
75@@ -1153,7 +1154,7 @@ efi_use_whole_disk(int fd)
76
77 /*
78 * Find the last physically non-zero partition.
79- * This is the reserved partition.
80+ * This should be the reserved partition.
81 */
82 for (i = 0; i < efi_label->efi_nparts; i ++) {
83 if (resv_start < efi_label->efi_parts[i].p_start) {
84@@ -1163,6 +1164,23 @@ efi_use_whole_disk(int fd)
85 }
86
87 /*
88+ * Verify that we've found the reserved partition by checking
89+ * that it looks the way it did when we created it in zpool_label_disk.
90+ * If we've found the incorrect partition, then we know that this
91+ * device was reformatted and no longer is soley used by ZFS.
92+ */
93+ if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) ||
94+ (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) ||
95+ (resv_index != 8)) {
96+ if (efi_debug) {
97+ (void) fprintf(stderr,
98+ "efi_use_whole_disk: wholedisk not available\n");
99+ }
100+ efi_free(efi_label);
101+ return (VT_ENOSPC);
102+ }
103+
104+ /*
105 * Find the last physically non-zero partition before that.
106 * This is the data partition.
107 */
108diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
109index e00d5f51..53bc5034 100644
110--- a/lib/libzfs/libzfs_pool.c
111+++ b/lib/libzfs/libzfs_pool.c
112@@ -22,7 +22,7 @@
113 /*
114 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
115 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
116- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
117+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
118 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
119 * Copyright (c) 2017 Datto Inc.
120 */
121@@ -42,6 +42,7 @@
122 #include <sys/efi_partition.h>
123 #include <sys/vtoc.h>
124 #include <sys/zfs_ioctl.h>
125+#include <sys/vdev_disk.h>
126 #include <dlfcn.h>
127
128 #include "zfs_namecheck.h"
129@@ -913,17 +914,6 @@ zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
130 }
131
132 /*
133- * Don't start the slice at the default block of 34; many storage
134- * devices will use a stripe width of 128k, other vendors prefer a 1m
135- * alignment. It is best to play it safe and ensure a 1m alignment
136- * given 512B blocks. When the block size is larger by a power of 2
137- * we will still be 1m aligned. Some devices are sensitive to the
138- * partition ending alignment as well.
139- */
140-#define NEW_START_BLOCK 2048
141-#define PARTITION_END_ALIGNMENT 2048
142-
143-/*
144 * Validate the given pool name, optionally putting an extended error message in
145 * 'buf'.
146 */
147diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
148index acac2a97..b643bd35 100644
149--- a/module/zfs/vdev.c
150+++ b/module/zfs/vdev.c
151@@ -21,7 +21,7 @@
152
153 /*
154 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
155- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
156+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
157 * Copyright 2017 Nexenta Systems, Inc.
158 * Copyright (c) 2014 Integros [integros.com]
159 * Copyright 2016 Toomas Soome <tsoome@me.com>
160@@ -3039,7 +3039,6 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
161 vd->vdev_max_asize - vd->vdev_asize,
162 1ULL << tvd->vdev_ms_shift);
163 }
164- vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
165 if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
166 !vd->vdev_ishole) {
167 vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
168diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
169index 6761e755..6dc0544f 100644
170--- a/module/zfs/vdev_disk.c
171+++ b/module/zfs/vdev_disk.c
172@@ -23,7 +23,7 @@
173 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
174 * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
175 * LLNL-CODE-403049.
176- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
177+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
178 */
179
180 #include <sys/zfs_context.h>
181@@ -35,10 +35,14 @@
182 #include <sys/zio.h>
183 #include <sys/sunldi.h>
184 #include <linux/mod_compat.h>
185+#include <linux/msdos_fs.h>
186
187 char *zfs_vdev_scheduler = VDEV_SCHEDULER;
188 static void *zfs_vdev_holder = VDEV_HOLDER;
189
190+/* size of the "reserved" partition, in blocks */
191+#define EFI_MIN_RESV_SIZE (16 * 1024)
192+
193 /*
194 * Virtual device vector for disks.
195 */
196@@ -82,17 +86,39 @@ vdev_bdev_mode(int smode)
197 }
198 #endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
199
200+/* The capacity (in bytes) of a bdev that is available to be used by a vdev */
201 static uint64_t
202-bdev_capacity(struct block_device *bdev)
203+bdev_capacity(struct block_device *bdev, boolean_t wholedisk)
204 {
205 struct hd_struct *part = bdev->bd_part;
206+ uint64_t sectors = get_capacity(bdev->bd_disk);
207+ /* If there are no paritions, return the entire device capacity */
208+ if (part == NULL)
209+ return (sectors << SECTOR_BITS);
210
211- /* The partition capacity referenced by the block device */
212- if (part)
213- return (part->nr_sects << 9);
214-
215- /* Otherwise assume the full device capacity */
216- return (get_capacity(bdev->bd_disk) << 9);
217+ /*
218+ * If there are partitions, decide if we are using a `wholedisk`
219+ * layout (composed of part1 and part9) or just a single partition.
220+ */
221+ if (wholedisk) {
222+ /* Verify the expected device layout */
223+ ASSERT3P(bdev, !=, bdev->bd_contains);
224+ /*
225+ * Sectors used by the EFI partition (part9) as well as
226+ * partion alignment.
227+ */
228+ uint64_t used = EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
229+ PARTITION_END_ALIGNMENT;
230+
231+ /* Space available to the vdev, i.e. the size of part1 */
232+ if (sectors <= used)
233+ return (0);
234+ uint64_t available = sectors - used;
235+ return (available << SECTOR_BITS);
236+ } else {
237+ /* The partition capacity referenced by the block device */
238+ return (part->nr_sects << SECTOR_BITS);
239+ }
240 }
241
242 static void
243@@ -328,9 +354,7 @@ skip_open:
244 v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
245
246 /* Physical volume size in bytes */
247- *psize = bdev_capacity(vd->vd_bdev);
248-
249- /* TODO: report possible expansion size */
250+ *psize = bdev_capacity(vd->vd_bdev, v->vdev_wholedisk);
251 *max_psize = *psize;
252
253 /* Based on the minimum sector size set the block size */
254diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
255index d578ae60..66b6969d 100755
256--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
257+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
258@@ -26,7 +26,7 @@
259 #
260
261 #
262-# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
263+# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
264 # Copyright (c) 2017 Lawrence Livermore National Security, LLC.
265 #
266
267@@ -43,8 +43,9 @@
268 # 1) Create 3 files
269 # 2) Create a pool backed by the files
270 # 3) Expand the files' size with truncate
271-# 4) Use zpool online -e to online the vdevs
272-# 5) Check that the pool size was expanded
273+# 4) Use zpool reopen to check the expandsize
274+# 5) Use zpool online -e to online the vdevs
275+# 6) Check that the pool size was expanded
276 #
277
278 verify_runnable "global"
279@@ -64,8 +65,8 @@ log_onexit cleanup
280
281 log_assert "zpool can expand after zpool online -e zvol vdevs on LUN expansion"
282
283-
284 for type in " " mirror raidz raidz2; do
285+ # Initialize the file devices and the pool
286 for i in 1 2 3; do
287 log_must truncate -s $org_size ${TEMPFILE}.$i
288 done
289@@ -80,13 +81,35 @@ for type in " " mirror raidz raidz2; do
290 "$autoexp"
291 fi
292 typeset prev_size=$(get_pool_prop size $TESTPOOL1)
293- typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
294- awk '{print $3}')
295+ typeset zfs_prev_size=$(get_prop avail $TESTPOOL1)
296
297+ # Increase the size of the file devices
298 for i in 1 2 3; do
299 log_must truncate -s $exp_size ${TEMPFILE}.$i
300 done
301
302+ # Reopen the pool and check that the `expandsize` property is set
303+ log_must zpool reopen $TESTPOOL1
304+ typeset zpool_expandsize=$(get_pool_prop expandsize $TESTPOOL1)
305+
306+ if [[ $type == "mirror" ]]; then
307+ typeset expected_zpool_expandsize=$(($exp_size-$org_size))
308+ else
309+ typeset expected_zpool_expandsize=$((3*($exp_size-$org_size)))
310+ fi
311+
312+ if [[ "$zpool_expandsize" = "-" ]]; then
313+ log_fail "pool $TESTPOOL1 did not detect any " \
314+ "expandsize after reopen"
315+ fi
316+
317+ if [[ $zpool_expandsize -ne $expected_zpool_expandsize ]]; then
318+ log_fail "pool $TESTPOOL1 did not detect correct " \
319+ "expandsize after reopen: found $zpool_expandsize," \
320+ "expected $expected_zpool_expandsize"
321+ fi
322+
323+ # Online the devices to add the new space to the pool
324 for i in 1 2 3; do
325 log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
326 done
327@@ -96,8 +119,7 @@ for type in " " mirror raidz raidz2; do
328 sync
329
330 typeset expand_size=$(get_pool_prop size $TESTPOOL1)
331- typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
332- awk '{print $3}')
333+ typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
334 log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
335 "expanded size: $expand_size"
336
337@@ -112,8 +134,8 @@ for type in " " mirror raidz raidz2; do
338 grep "(+${expansion_size}" | wc -l)
339
340 if [[ $size_addition -ne $i ]]; then
341- log_fail "pool $TESTPOOL1 is not autoexpand " \
342- "after LUN expansion"
343+ log_fail "pool $TESTPOOL1 did not expand " \
344+ "after LUN expansion and zpool online -e"
345 fi
346 elif [[ $type == "mirror" ]]; then
347 typeset expansion_size=$(($exp_size-$org_size))
348@@ -123,8 +145,8 @@ for type in " " mirror raidz raidz2; do
349 grep "(+${expansion_size})" >/dev/null 2>&1
350
351 if [[ $? -ne 0 ]]; then
352- log_fail "pool $TESTPOOL1 is not autoexpand " \
353- "after LUN expansion"
354+ log_fail "pool $TESTPOOL1 did not expand " \
355+ "after LUN expansion and zpool online -e"
356 fi
357 else
358 typeset expansion_size=$((3*($exp_size-$org_size)))
359@@ -134,13 +156,13 @@ for type in " " mirror raidz raidz2; do
360 grep "(+${expansion_size})" >/dev/null 2>&1
361
362 if [[ $? -ne 0 ]] ; then
363- log_fail "pool $TESTPOOL1 is not autoexpand " \
364- "after LUN expansion"
365+ log_fail "pool $TESTPOOL1 did not expand " \
366+ "after LUN expansion and zpool online -e"
367 fi
368 fi
369 else
370- log_fail "pool $TESTPOOL1 is not autoexpanded after LUN " \
371- "expansion"
372+ log_fail "pool $TESTPOOL1 did not expand after LUN expansion " \
373+ "and zpool online -e"
374 fi
375 log_must zpool destroy $TESTPOOL1
376 done