]> git.proxmox.com Git - zfsonlinux.git/blob - zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch
bump version to 0.7.11-pve1~bpo1
[zfsonlinux.git] / zfs-patches / 0014-zpool-reopen-should-detect-expanded-devices.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Sara Hartse <sara.hartse@gmail.com>
3 Date: Thu, 31 May 2018 10:36:37 -0700
4 Subject: [PATCH] zpool reopen should detect expanded devices
5
6 Update bdev_capacity to have wholedisk vdevs query the
7 size of the underlying block device (correcting for the size
8 of the efi parition and partition alignment) and therefore detect
9 expanded space.
10
11 Correct vdev_get_stats_ex so that the expandsize is aligned
12 to metaslab size and new space is only reported if it is large
13 enough for a new metaslab.
14
15 Reviewed by: Don Brady <don.brady@delphix.com>
16 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
17 Reviewed by: George Wilson <george.wilson@delphix.com>
18 Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
19 Reviewed by: John Wren Kennedy <jwk404@gmail.com>
20 Signed-off-by: sara hartse <sara.hartse@delphix.com>
21 External-issue: LX-165
22 Closes #7546
23 Issue #7582
24
25 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
26 ---
27 include/sys/vdev_disk.h | 12 +++++
28 lib/libefi/rdwr_efi.c | 20 +++++++-
29 lib/libzfs/libzfs_pool.c | 14 +-----
30 module/zfs/vdev.c | 3 +-
31 module/zfs/vdev_disk.c | 46 +++++++++++++-----
32 .../cli_root/zpool_expand/zpool_expand_002_pos.ksh | 54 +++++++++++++++-------
33 6 files changed, 107 insertions(+), 42 deletions(-)
34
35 diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h
36 index 15570b10..b8a32b31 100644
37 --- a/include/sys/vdev_disk.h
38 +++ b/include/sys/vdev_disk.h
39 @@ -23,11 +23,23 @@
40 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
41 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
42 * LLNL-CODE-403049.
43 + * Copyright (c) 2018 by Delphix. All rights reserved.
44 */
45
46 #ifndef _SYS_VDEV_DISK_H
47 #define _SYS_VDEV_DISK_H
48
49 +/*
50 + * Don't start the slice at the default block of 34; many storage
51 + * devices will use a stripe width of 128k, other vendors prefer a 1m
52 + * alignment. It is best to play it safe and ensure a 1m alignment
53 + * given 512B blocks. When the block size is larger by a power of 2
54 + * we will still be 1m aligned. Some devices are sensitive to the
55 + * partition ending alignment as well.
56 + */
57 +#define NEW_START_BLOCK 2048
58 +#define PARTITION_END_ALIGNMENT 2048
59 +
60 #ifdef _KERNEL
61 #include <sys/vdev.h>
62
63 diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c
64 index 7935047e..19cb17e5 100644
65 --- a/lib/libefi/rdwr_efi.c
66 +++ b/lib/libefi/rdwr_efi.c
67 @@ -22,6 +22,7 @@
68 /*
69 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
70 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
71 + * Copyright (c) 2018 by Delphix. All rights reserved.
72 */
73
74 #include <stdio.h>
75 @@ -1153,7 +1154,7 @@ efi_use_whole_disk(int fd)
76
77 /*
78 * Find the last physically non-zero partition.
79 - * This is the reserved partition.
80 + * This should be the reserved partition.
81 */
82 for (i = 0; i < efi_label->efi_nparts; i ++) {
83 if (resv_start < efi_label->efi_parts[i].p_start) {
84 @@ -1163,6 +1164,23 @@ efi_use_whole_disk(int fd)
85 }
86
87 /*
88 + * Verify that we've found the reserved partition by checking
89 + * that it looks the way it did when we created it in zpool_label_disk.
90 + * If we've found the incorrect partition, then we know that this
91 + * device was reformatted and no longer is soley used by ZFS.
92 + */
93 + if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) ||
94 + (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) ||
95 + (resv_index != 8)) {
96 + if (efi_debug) {
97 + (void) fprintf(stderr,
98 + "efi_use_whole_disk: wholedisk not available\n");
99 + }
100 + efi_free(efi_label);
101 + return (VT_ENOSPC);
102 + }
103 +
104 + /*
105 * Find the last physically non-zero partition before that.
106 * This is the data partition.
107 */
108 diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
109 index e00d5f51..53bc5034 100644
110 --- a/lib/libzfs/libzfs_pool.c
111 +++ b/lib/libzfs/libzfs_pool.c
112 @@ -22,7 +22,7 @@
113 /*
114 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
115 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
116 - * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
117 + * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
118 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
119 * Copyright (c) 2017 Datto Inc.
120 */
121 @@ -42,6 +42,7 @@
122 #include <sys/efi_partition.h>
123 #include <sys/vtoc.h>
124 #include <sys/zfs_ioctl.h>
125 +#include <sys/vdev_disk.h>
126 #include <dlfcn.h>
127
128 #include "zfs_namecheck.h"
129 @@ -913,17 +914,6 @@ zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
130 }
131
132 /*
133 - * Don't start the slice at the default block of 34; many storage
134 - * devices will use a stripe width of 128k, other vendors prefer a 1m
135 - * alignment. It is best to play it safe and ensure a 1m alignment
136 - * given 512B blocks. When the block size is larger by a power of 2
137 - * we will still be 1m aligned. Some devices are sensitive to the
138 - * partition ending alignment as well.
139 - */
140 -#define NEW_START_BLOCK 2048
141 -#define PARTITION_END_ALIGNMENT 2048
142 -
143 -/*
144 * Validate the given pool name, optionally putting an extended error message in
145 * 'buf'.
146 */
147 diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
148 index acac2a97..b643bd35 100644
149 --- a/module/zfs/vdev.c
150 +++ b/module/zfs/vdev.c
151 @@ -21,7 +21,7 @@
152
153 /*
154 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
155 - * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
156 + * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
157 * Copyright 2017 Nexenta Systems, Inc.
158 * Copyright (c) 2014 Integros [integros.com]
159 * Copyright 2016 Toomas Soome <tsoome@me.com>
160 @@ -3039,7 +3039,6 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
161 vd->vdev_max_asize - vd->vdev_asize,
162 1ULL << tvd->vdev_ms_shift);
163 }
164 - vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
165 if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
166 !vd->vdev_ishole) {
167 vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
168 diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
169 index 6761e755..6dc0544f 100644
170 --- a/module/zfs/vdev_disk.c
171 +++ b/module/zfs/vdev_disk.c
172 @@ -23,7 +23,7 @@
173 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
174 * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
175 * LLNL-CODE-403049.
176 - * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
177 + * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
178 */
179
180 #include <sys/zfs_context.h>
181 @@ -35,10 +35,14 @@
182 #include <sys/zio.h>
183 #include <sys/sunldi.h>
184 #include <linux/mod_compat.h>
185 +#include <linux/msdos_fs.h>
186
187 char *zfs_vdev_scheduler = VDEV_SCHEDULER;
188 static void *zfs_vdev_holder = VDEV_HOLDER;
189
190 +/* size of the "reserved" partition, in blocks */
191 +#define EFI_MIN_RESV_SIZE (16 * 1024)
192 +
193 /*
194 * Virtual device vector for disks.
195 */
196 @@ -82,17 +86,39 @@ vdev_bdev_mode(int smode)
197 }
198 #endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
199
200 +/* The capacity (in bytes) of a bdev that is available to be used by a vdev */
201 static uint64_t
202 -bdev_capacity(struct block_device *bdev)
203 +bdev_capacity(struct block_device *bdev, boolean_t wholedisk)
204 {
205 struct hd_struct *part = bdev->bd_part;
206 + uint64_t sectors = get_capacity(bdev->bd_disk);
207 + /* If there are no paritions, return the entire device capacity */
208 + if (part == NULL)
209 + return (sectors << SECTOR_BITS);
210
211 - /* The partition capacity referenced by the block device */
212 - if (part)
213 - return (part->nr_sects << 9);
214 -
215 - /* Otherwise assume the full device capacity */
216 - return (get_capacity(bdev->bd_disk) << 9);
217 + /*
218 + * If there are partitions, decide if we are using a `wholedisk`
219 + * layout (composed of part1 and part9) or just a single partition.
220 + */
221 + if (wholedisk) {
222 + /* Verify the expected device layout */
223 + ASSERT3P(bdev, !=, bdev->bd_contains);
224 + /*
225 + * Sectors used by the EFI partition (part9) as well as
226 + * partion alignment.
227 + */
228 + uint64_t used = EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
229 + PARTITION_END_ALIGNMENT;
230 +
231 + /* Space available to the vdev, i.e. the size of part1 */
232 + if (sectors <= used)
233 + return (0);
234 + uint64_t available = sectors - used;
235 + return (available << SECTOR_BITS);
236 + } else {
237 + /* The partition capacity referenced by the block device */
238 + return (part->nr_sects << SECTOR_BITS);
239 + }
240 }
241
242 static void
243 @@ -328,9 +354,7 @@ skip_open:
244 v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
245
246 /* Physical volume size in bytes */
247 - *psize = bdev_capacity(vd->vd_bdev);
248 -
249 - /* TODO: report possible expansion size */
250 + *psize = bdev_capacity(vd->vd_bdev, v->vdev_wholedisk);
251 *max_psize = *psize;
252
253 /* Based on the minimum sector size set the block size */
254 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
255 index d578ae60..66b6969d 100755
256 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
257 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
258 @@ -26,7 +26,7 @@
259 #
260
261 #
262 -# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
263 +# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
264 # Copyright (c) 2017 Lawrence Livermore National Security, LLC.
265 #
266
267 @@ -43,8 +43,9 @@
268 # 1) Create 3 files
269 # 2) Create a pool backed by the files
270 # 3) Expand the files' size with truncate
271 -# 4) Use zpool online -e to online the vdevs
272 -# 5) Check that the pool size was expanded
273 +# 4) Use zpool reopen to check the expandsize
274 +# 5) Use zpool online -e to online the vdevs
275 +# 6) Check that the pool size was expanded
276 #
277
278 verify_runnable "global"
279 @@ -64,8 +65,8 @@ log_onexit cleanup
280
281 log_assert "zpool can expand after zpool online -e zvol vdevs on LUN expansion"
282
283 -
284 for type in " " mirror raidz raidz2; do
285 + # Initialize the file devices and the pool
286 for i in 1 2 3; do
287 log_must truncate -s $org_size ${TEMPFILE}.$i
288 done
289 @@ -80,13 +81,35 @@ for type in " " mirror raidz raidz2; do
290 "$autoexp"
291 fi
292 typeset prev_size=$(get_pool_prop size $TESTPOOL1)
293 - typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
294 - awk '{print $3}')
295 + typeset zfs_prev_size=$(get_prop avail $TESTPOOL1)
296
297 + # Increase the size of the file devices
298 for i in 1 2 3; do
299 log_must truncate -s $exp_size ${TEMPFILE}.$i
300 done
301
302 + # Reopen the pool and check that the `expandsize` property is set
303 + log_must zpool reopen $TESTPOOL1
304 + typeset zpool_expandsize=$(get_pool_prop expandsize $TESTPOOL1)
305 +
306 + if [[ $type == "mirror" ]]; then
307 + typeset expected_zpool_expandsize=$(($exp_size-$org_size))
308 + else
309 + typeset expected_zpool_expandsize=$((3*($exp_size-$org_size)))
310 + fi
311 +
312 + if [[ "$zpool_expandsize" = "-" ]]; then
313 + log_fail "pool $TESTPOOL1 did not detect any " \
314 + "expandsize after reopen"
315 + fi
316 +
317 + if [[ $zpool_expandsize -ne $expected_zpool_expandsize ]]; then
318 + log_fail "pool $TESTPOOL1 did not detect correct " \
319 + "expandsize after reopen: found $zpool_expandsize," \
320 + "expected $expected_zpool_expandsize"
321 + fi
322 +
323 + # Online the devices to add the new space to the pool
324 for i in 1 2 3; do
325 log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
326 done
327 @@ -96,8 +119,7 @@ for type in " " mirror raidz raidz2; do
328 sync
329
330 typeset expand_size=$(get_pool_prop size $TESTPOOL1)
331 - typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
332 - awk '{print $3}')
333 + typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
334 log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
335 "expanded size: $expand_size"
336
337 @@ -112,8 +134,8 @@ for type in " " mirror raidz raidz2; do
338 grep "(+${expansion_size}" | wc -l)
339
340 if [[ $size_addition -ne $i ]]; then
341 - log_fail "pool $TESTPOOL1 is not autoexpand " \
342 - "after LUN expansion"
343 + log_fail "pool $TESTPOOL1 did not expand " \
344 + "after LUN expansion and zpool online -e"
345 fi
346 elif [[ $type == "mirror" ]]; then
347 typeset expansion_size=$(($exp_size-$org_size))
348 @@ -123,8 +145,8 @@ for type in " " mirror raidz raidz2; do
349 grep "(+${expansion_size})" >/dev/null 2>&1
350
351 if [[ $? -ne 0 ]]; then
352 - log_fail "pool $TESTPOOL1 is not autoexpand " \
353 - "after LUN expansion"
354 + log_fail "pool $TESTPOOL1 did not expand " \
355 + "after LUN expansion and zpool online -e"
356 fi
357 else
358 typeset expansion_size=$((3*($exp_size-$org_size)))
359 @@ -134,13 +156,13 @@ for type in " " mirror raidz raidz2; do
360 grep "(+${expansion_size})" >/dev/null 2>&1
361
362 if [[ $? -ne 0 ]] ; then
363 - log_fail "pool $TESTPOOL1 is not autoexpand " \
364 - "after LUN expansion"
365 + log_fail "pool $TESTPOOL1 did not expand " \
366 + "after LUN expansion and zpool online -e"
367 fi
368 fi
369 else
370 - log_fail "pool $TESTPOOL1 is not autoexpanded after LUN " \
371 - "expansion"
372 + log_fail "pool $TESTPOOL1 did not expand after LUN expansion " \
373 + "and zpool online -e"
374 fi
375 log_must zpool destroy $TESTPOOL1
376 done