]>
Commit | Line | Data |
---|---|---|
b2255edc BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (c) 2018 Intel Corporation. | |
23 | * Copyright (c) 2020 by Lawrence Livermore National Security, LLC. | |
24 | */ | |
25 | ||
26 | #include <stdio.h> | |
27 | #include <zlib.h> | |
28 | #include <zfs_fletcher.h> | |
29 | #include <sys/vdev_draid.h> | |
30 | #include <sys/nvpair.h> | |
31 | #include <sys/stat.h> | |
32 | ||
33 | /* | |
34 | * The number of rows to generate for new permutation maps. | |
35 | */ | |
36 | #define MAP_ROWS_DEFAULT 256 | |
37 | ||
38 | /* | |
39 | * Key values for dRAID maps when stored as nvlists. | |
40 | */ | |
41 | #define MAP_SEED "seed" | |
42 | #define MAP_CHECKSUM "checksum" | |
43 | #define MAP_WORST_RATIO "worst_ratio" | |
44 | #define MAP_AVG_RATIO "avg_ratio" | |
45 | #define MAP_CHILDREN "children" | |
46 | #define MAP_NPERMS "nperms" | |
47 | #define MAP_PERMS "perms" | |
48 | ||
49 | static void | |
50 | draid_usage(void) | |
51 | { | |
52 | (void) fprintf(stderr, | |
53 | "usage: draid command args ...\n" | |
54 | "Available commands are:\n" | |
55 | "\n" | |
56 | "\tdraid generate [-cv] [-m min] [-n max] [-p passes] FILE\n" | |
57 | "\tdraid verify [-rv] FILE\n" | |
58 | "\tdraid dump [-v] [-m min] [-n max] FILE\n" | |
59 | "\tdraid table FILE\n" | |
60 | "\tdraid merge FILE SRC SRC...\n"); | |
61 | exit(1); | |
62 | } | |
63 | ||
64 | static int | |
65 | read_map(const char *filename, nvlist_t **allcfgs) | |
66 | { | |
67 | int block_size = 131072; | |
68 | int buf_size = 131072; | |
69 | int tmp_size, error; | |
70 | char *tmp_buf; | |
71 | ||
72 | struct stat64 stat; | |
73 | if (lstat64(filename, &stat) != 0) | |
74 | return (errno); | |
75 | ||
76 | if (stat.st_size == 0 || | |
77 | !(S_ISREG(stat.st_mode) || S_ISLNK(stat.st_mode))) { | |
78 | return (EINVAL); | |
79 | } | |
80 | ||
81 | gzFile fp = gzopen(filename, "rb"); | |
82 | if (fp == Z_NULL) | |
83 | return (errno); | |
84 | ||
85 | char *buf = malloc(buf_size); | |
86 | if (buf == NULL) { | |
87 | (void) gzclose(fp); | |
88 | return (ENOMEM); | |
89 | } | |
90 | ||
91 | ssize_t rc, bytes = 0; | |
92 | while (!gzeof(fp)) { | |
93 | rc = gzread(fp, buf + bytes, block_size); | |
94 | if ((rc < 0) || (rc == 0 && !gzeof(fp))) { | |
95 | free(buf); | |
96 | (void) gzclose(fp); | |
97 | (void) gzerror(fp, &error); | |
98 | return (error); | |
99 | } else { | |
100 | bytes += rc; | |
101 | ||
102 | if (bytes + block_size >= buf_size) { | |
103 | tmp_size = 2 * buf_size; | |
104 | tmp_buf = malloc(tmp_size); | |
105 | if (tmp_buf == NULL) { | |
106 | free(buf); | |
107 | (void) gzclose(fp); | |
108 | return (ENOMEM); | |
109 | } | |
110 | ||
111 | memcpy(tmp_buf, buf, bytes); | |
112 | free(buf); | |
113 | buf = tmp_buf; | |
114 | buf_size = tmp_size; | |
115 | } | |
116 | } | |
117 | } | |
118 | ||
119 | (void) gzclose(fp); | |
120 | ||
121 | error = nvlist_unpack(buf, bytes, allcfgs, 0); | |
122 | free(buf); | |
123 | ||
124 | return (error); | |
125 | } | |
126 | ||
127 | /* | |
128 | * Read a map from the specified filename. A file contains multiple maps | |
129 | * which are indexed by the number of children. The caller is responsible | |
130 | * for freeing the configuration returned. | |
131 | */ | |
132 | static int | |
133 | read_map_key(const char *filename, char *key, nvlist_t **cfg) | |
134 | { | |
135 | nvlist_t *allcfgs, *foundcfg = NULL; | |
136 | int error; | |
137 | ||
138 | error = read_map(filename, &allcfgs); | |
139 | if (error != 0) | |
140 | return (error); | |
141 | ||
142 | nvlist_lookup_nvlist(allcfgs, key, &foundcfg); | |
143 | if (foundcfg != NULL) { | |
144 | nvlist_dup(foundcfg, cfg, KM_SLEEP); | |
145 | error = 0; | |
146 | } else { | |
147 | error = ENOENT; | |
148 | } | |
149 | ||
150 | nvlist_free(allcfgs); | |
151 | ||
152 | return (error); | |
153 | } | |
154 | ||
155 | /* | |
156 | * Write all mappings to the map file. | |
157 | */ | |
158 | static int | |
159 | write_map(const char *filename, nvlist_t *allcfgs) | |
160 | { | |
161 | size_t buflen = 0; | |
162 | int error; | |
163 | ||
164 | error = nvlist_size(allcfgs, &buflen, NV_ENCODE_XDR); | |
165 | if (error) | |
166 | return (error); | |
167 | ||
168 | char *buf = malloc(buflen); | |
169 | if (buf == NULL) | |
170 | return (ENOMEM); | |
171 | ||
172 | error = nvlist_pack(allcfgs, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP); | |
173 | if (error) { | |
174 | free(buf); | |
175 | return (error); | |
176 | } | |
177 | ||
178 | /* | |
179 | * Atomically update the file using a temporary file and the | |
180 | * traditional unlink then rename steps. This code provides | |
181 | * no locking, it only guarantees the packed nvlist on disk | |
182 | * is updated atomically and is internally consistent. | |
183 | */ | |
955bf4dc | 184 | char *tmpname = calloc(1, MAXPATHLEN); |
b2255edc BB |
185 | if (tmpname == NULL) { |
186 | free(buf); | |
187 | return (ENOMEM); | |
188 | } | |
189 | ||
190 | snprintf(tmpname, MAXPATHLEN - 1, "%s.XXXXXX", filename); | |
191 | ||
192 | int fd = mkstemp(tmpname); | |
193 | if (fd < 0) { | |
194 | error = errno; | |
195 | free(buf); | |
196 | free(tmpname); | |
197 | return (error); | |
198 | } | |
199 | (void) close(fd); | |
200 | ||
201 | gzFile fp = gzopen(tmpname, "w9b"); | |
202 | if (fp == Z_NULL) { | |
203 | error = errno; | |
204 | free(buf); | |
205 | free(tmpname); | |
206 | return (errno); | |
207 | } | |
208 | ||
209 | ssize_t rc, bytes = 0; | |
210 | while (bytes < buflen) { | |
211 | size_t size = MIN(buflen - bytes, 131072); | |
212 | rc = gzwrite(fp, buf + bytes, size); | |
213 | if (rc < 0) { | |
214 | free(buf); | |
215 | (void) gzerror(fp, &error); | |
216 | (void) gzclose(fp); | |
217 | (void) unlink(tmpname); | |
218 | free(tmpname); | |
219 | return (error); | |
220 | } else if (rc == 0) { | |
221 | break; | |
222 | } else { | |
223 | bytes += rc; | |
224 | } | |
225 | } | |
226 | ||
227 | free(buf); | |
228 | (void) gzclose(fp); | |
229 | ||
230 | if (bytes != buflen) { | |
231 | (void) unlink(tmpname); | |
232 | free(tmpname); | |
233 | return (EIO); | |
234 | } | |
235 | ||
236 | /* | |
237 | * Unlink the previous config file and replace it with the updated | |
238 | * version. If we're able to unlink the file then directory is | |
239 | * writable by us and the subsequent rename should never fail. | |
240 | */ | |
241 | error = unlink(filename); | |
242 | if (error != 0 && errno != ENOENT) { | |
243 | error = errno; | |
244 | (void) unlink(tmpname); | |
245 | free(tmpname); | |
246 | return (error); | |
247 | } | |
248 | ||
249 | error = rename(tmpname, filename); | |
250 | if (error != 0) { | |
251 | error = errno; | |
252 | (void) unlink(tmpname); | |
253 | free(tmpname); | |
254 | return (error); | |
255 | } | |
256 | ||
257 | free(tmpname); | |
258 | ||
259 | return (0); | |
260 | } | |
261 | ||
262 | /* | |
263 | * Add the dRAID map to the file and write it out. | |
264 | */ | |
265 | static int | |
266 | write_map_key(const char *filename, char *key, draid_map_t *map, | |
267 | double worst_ratio, double avg_ratio) | |
268 | { | |
269 | nvlist_t *nv_cfg, *allcfgs; | |
270 | int error; | |
271 | ||
272 | /* | |
273 | * Add the configuration to an existing or new file. The new | |
274 | * configuration will replace an existing configuration with the | |
275 | * same key if it has a lower ratio and is therefore better. | |
276 | */ | |
277 | error = read_map(filename, &allcfgs); | |
278 | if (error == ENOENT) { | |
279 | allcfgs = fnvlist_alloc(); | |
280 | } else if (error != 0) { | |
281 | return (error); | |
282 | } | |
283 | ||
284 | error = nvlist_lookup_nvlist(allcfgs, key, &nv_cfg); | |
285 | if (error == 0) { | |
286 | uint64_t nv_cfg_worst_ratio = fnvlist_lookup_uint64(nv_cfg, | |
287 | MAP_WORST_RATIO); | |
288 | double nv_worst_ratio = (double)nv_cfg_worst_ratio / 1000.0; | |
289 | ||
290 | if (worst_ratio < nv_worst_ratio) { | |
291 | /* Replace old map with the more balanced new map. */ | |
292 | fnvlist_remove(allcfgs, key); | |
293 | } else { | |
294 | /* The old map is preferable, keep it. */ | |
295 | nvlist_free(allcfgs); | |
296 | return (EEXIST); | |
297 | } | |
298 | } | |
299 | ||
300 | nvlist_t *cfg = fnvlist_alloc(); | |
301 | fnvlist_add_uint64(cfg, MAP_SEED, map->dm_seed); | |
302 | fnvlist_add_uint64(cfg, MAP_CHECKSUM, map->dm_checksum); | |
303 | fnvlist_add_uint64(cfg, MAP_CHILDREN, map->dm_children); | |
304 | fnvlist_add_uint64(cfg, MAP_NPERMS, map->dm_nperms); | |
305 | fnvlist_add_uint8_array(cfg, MAP_PERMS, map->dm_perms, | |
306 | map->dm_children * map->dm_nperms * sizeof (uint8_t)); | |
307 | ||
308 | fnvlist_add_uint64(cfg, MAP_WORST_RATIO, | |
309 | (uint64_t)(worst_ratio * 1000.0)); | |
310 | fnvlist_add_uint64(cfg, MAP_AVG_RATIO, | |
311 | (uint64_t)(avg_ratio * 1000.0)); | |
312 | ||
313 | error = nvlist_add_nvlist(allcfgs, key, cfg); | |
314 | if (error == 0) | |
315 | error = write_map(filename, allcfgs); | |
316 | ||
317 | nvlist_free(cfg); | |
318 | nvlist_free(allcfgs); | |
319 | return (error); | |
320 | } | |
321 | ||
322 | static void | |
323 | dump_map(draid_map_t *map, char *key, double worst_ratio, double avg_ratio, | |
324 | int verbose) | |
325 | { | |
326 | if (verbose == 0) { | |
327 | return; | |
328 | } else if (verbose == 1) { | |
329 | printf(" \"%s\": seed: 0x%016llx worst_ratio: %2.03f " | |
330 | "avg_ratio: %2.03f\n", key, (u_longlong_t)map->dm_seed, | |
331 | worst_ratio, avg_ratio); | |
332 | return; | |
333 | } else { | |
334 | printf(" \"%s\":\n" | |
335 | " seed: 0x%016llx\n" | |
336 | " checksum: 0x%016llx\n" | |
337 | " worst_ratio: %2.03f\n" | |
338 | " avg_ratio: %2.03f\n" | |
339 | " children: %llu\n" | |
340 | " nperms: %llu\n", | |
341 | key, (u_longlong_t)map->dm_seed, | |
342 | (u_longlong_t)map->dm_checksum, worst_ratio, avg_ratio, | |
343 | (u_longlong_t)map->dm_children, | |
344 | (u_longlong_t)map->dm_nperms); | |
345 | ||
346 | if (verbose > 2) { | |
347 | printf(" perms = {\n"); | |
348 | for (int i = 0; i < map->dm_nperms; i++) { | |
349 | printf(" { "); | |
350 | for (int j = 0; j < map->dm_children; j++) { | |
351 | printf("%3d%s ", map->dm_perms[ | |
352 | i * map->dm_children + j], | |
353 | j < map->dm_children - 1 ? | |
354 | "," : ""); | |
355 | } | |
356 | printf(" },\n"); | |
357 | } | |
358 | printf(" }\n"); | |
359 | } else if (verbose == 2) { | |
360 | printf(" draid_perms = <omitted>\n"); | |
361 | } | |
362 | } | |
363 | } | |
364 | ||
365 | static void | |
366 | dump_map_nv(char *key, nvlist_t *cfg, int verbose) | |
367 | { | |
368 | draid_map_t map; | |
369 | uint_t c; | |
370 | ||
371 | uint64_t worst_ratio = fnvlist_lookup_uint64(cfg, MAP_WORST_RATIO); | |
372 | uint64_t avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO); | |
373 | ||
374 | map.dm_seed = fnvlist_lookup_uint64(cfg, MAP_SEED); | |
375 | map.dm_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM); | |
376 | map.dm_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN); | |
377 | map.dm_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS); | |
378 | nvlist_lookup_uint8_array(cfg, MAP_PERMS, &map.dm_perms, &c); | |
379 | ||
380 | dump_map(&map, key, (double)worst_ratio / 1000.0, | |
381 | avg_ratio / 1000.0, verbose); | |
382 | } | |
383 | ||
384 | /* | |
385 | * Print a summary of the mapping. | |
386 | */ | |
387 | static int | |
388 | dump_map_key(const char *filename, char *key, int verbose) | |
389 | { | |
390 | nvlist_t *cfg; | |
391 | int error; | |
392 | ||
393 | error = read_map_key(filename, key, &cfg); | |
394 | if (error != 0) | |
395 | return (error); | |
396 | ||
397 | dump_map_nv(key, cfg, verbose); | |
398 | ||
399 | return (0); | |
400 | } | |
401 | ||
402 | /* | |
403 | * Allocate a new permutation map for evaluation. | |
404 | */ | |
405 | static int | |
406 | alloc_new_map(uint64_t children, uint64_t nperms, uint64_t seed, | |
407 | draid_map_t **mapp) | |
408 | { | |
409 | draid_map_t *map; | |
410 | int error; | |
411 | ||
412 | map = malloc(sizeof (draid_map_t)); | |
413 | if (map == NULL) | |
414 | return (ENOMEM); | |
415 | ||
416 | map->dm_children = children; | |
417 | map->dm_nperms = nperms; | |
418 | map->dm_seed = seed; | |
419 | map->dm_checksum = 0; | |
420 | ||
421 | error = vdev_draid_generate_perms(map, &map->dm_perms); | |
422 | if (error) { | |
423 | free(map); | |
424 | return (error); | |
425 | } | |
426 | ||
427 | *mapp = map; | |
428 | ||
429 | return (0); | |
430 | } | |
431 | ||
432 | /* | |
433 | * Allocate the fixed permutation map for N children. | |
434 | */ | |
435 | static int | |
436 | alloc_fixed_map(uint64_t children, draid_map_t **mapp) | |
437 | { | |
438 | const draid_map_t *fixed_map; | |
439 | draid_map_t *map; | |
440 | int error; | |
441 | ||
442 | error = vdev_draid_lookup_map(children, &fixed_map); | |
443 | if (error) | |
444 | return (error); | |
445 | ||
446 | map = malloc(sizeof (draid_map_t)); | |
447 | if (map == NULL) | |
448 | return (ENOMEM); | |
449 | ||
450 | memcpy(map, fixed_map, sizeof (draid_map_t)); | |
451 | VERIFY3U(map->dm_checksum, !=, 0); | |
452 | ||
453 | error = vdev_draid_generate_perms(map, &map->dm_perms); | |
454 | if (error) { | |
455 | free(map); | |
456 | return (error); | |
457 | } | |
458 | ||
459 | *mapp = map; | |
460 | ||
461 | return (0); | |
462 | } | |
463 | ||
464 | /* | |
465 | * Free a permutation map. | |
466 | */ | |
467 | static void | |
468 | free_map(draid_map_t *map) | |
469 | { | |
470 | free(map->dm_perms); | |
471 | free(map); | |
472 | } | |
473 | ||
474 | /* | |
475 | * Check if dev is in the provided list of faulted devices. | |
476 | */ | |
477 | static inline boolean_t | |
478 | is_faulted(int *faulted_devs, int nfaulted, int dev) | |
479 | { | |
480 | for (int i = 0; i < nfaulted; i++) | |
481 | if (faulted_devs[i] == dev) | |
482 | return (B_TRUE); | |
483 | ||
484 | return (B_FALSE); | |
485 | } | |
486 | ||
487 | /* | |
488 | * Evaluate how resilvering I/O will be distributed given a list of faulted | |
489 | * vdevs. As a simplification we assume one IO is sufficient to repair each | |
490 | * damaged device in a group. | |
491 | */ | |
492 | static double | |
493 | eval_resilver(draid_map_t *map, uint64_t groupwidth, uint64_t nspares, | |
494 | int *faulted_devs, int nfaulted, int *min_child_ios, int *max_child_ios) | |
495 | { | |
496 | uint64_t children = map->dm_children; | |
497 | uint64_t ngroups = 1; | |
498 | uint64_t ndisks = children - nspares; | |
499 | ||
500 | /* | |
501 | * Calculate the minimum number of groups required to fill a slice. | |
502 | */ | |
503 | while (ngroups * (groupwidth) % (children - nspares) != 0) | |
504 | ngroups++; | |
505 | ||
506 | int *ios = calloc(map->dm_children, sizeof (uint64_t)); | |
507 | ||
508 | /* Resilver all rows */ | |
509 | for (int i = 0; i < map->dm_nperms; i++) { | |
510 | uint8_t *row = &map->dm_perms[i * map->dm_children]; | |
511 | ||
512 | /* Resilver all groups with faulted drives */ | |
513 | for (int j = 0; j < ngroups; j++) { | |
514 | uint64_t spareidx = map->dm_children - nspares; | |
515 | boolean_t repair_needed = B_FALSE; | |
516 | ||
517 | /* See if any devices in this group are faulted */ | |
518 | uint64_t groupstart = (j * groupwidth) % ndisks; | |
519 | ||
520 | for (int k = 0; k < groupwidth; k++) { | |
521 | uint64_t groupidx = (groupstart + k) % ndisks; | |
522 | ||
523 | repair_needed = is_faulted(faulted_devs, | |
524 | nfaulted, row[groupidx]); | |
525 | if (repair_needed) | |
526 | break; | |
527 | } | |
528 | ||
529 | if (repair_needed == B_FALSE) | |
530 | continue; | |
531 | ||
532 | /* | |
533 | * This group is degraded. Calculate the number of | |
534 | * reads the non-faulted drives require and the number | |
535 | * of writes to the distributed hot spare for this row. | |
536 | */ | |
537 | for (int k = 0; k < groupwidth; k++) { | |
538 | uint64_t groupidx = (groupstart + k) % ndisks; | |
539 | ||
540 | if (!is_faulted(faulted_devs, nfaulted, | |
541 | row[groupidx])) { | |
542 | ios[row[groupidx]]++; | |
543 | } else if (nspares > 0) { | |
544 | while (is_faulted(faulted_devs, | |
545 | nfaulted, row[spareidx])) { | |
546 | spareidx++; | |
547 | } | |
548 | ||
549 | ASSERT3U(spareidx, <, map->dm_children); | |
550 | ios[row[spareidx]]++; | |
551 | spareidx++; | |
552 | } | |
553 | } | |
554 | } | |
555 | } | |
556 | ||
557 | *min_child_ios = INT_MAX; | |
558 | *max_child_ios = 0; | |
559 | ||
560 | /* | |
561 | * Find the drives with fewest and most required I/O. These values | |
562 | * are used to calculate the imbalance ratio. To avoid returning an | |
563 | * infinite value for permutations which have children that perform | |
564 | * no IO a floor of 1 IO per child is set. This ensures a meaningful | |
565 | * ratio is returned for comparison and it is not an uncommon when | |
566 | * there are a large number of children. | |
567 | */ | |
568 | for (int i = 0; i < map->dm_children; i++) { | |
569 | ||
570 | if (is_faulted(faulted_devs, nfaulted, i)) { | |
571 | ASSERT0(ios[i]); | |
572 | continue; | |
573 | } | |
574 | ||
575 | if (ios[i] == 0) | |
576 | ios[i] = 1; | |
577 | ||
578 | if (ios[i] < *min_child_ios) | |
579 | *min_child_ios = ios[i]; | |
580 | ||
581 | if (ios[i] > *max_child_ios) | |
582 | *max_child_ios = ios[i]; | |
583 | } | |
584 | ||
585 | ASSERT3S(*min_child_ios, !=, INT_MAX); | |
586 | ASSERT3S(*max_child_ios, !=, 0); | |
587 | ||
588 | double ratio = (double)(*max_child_ios) / (double)(*min_child_ios); | |
589 | ||
590 | free(ios); | |
591 | ||
592 | return (ratio); | |
593 | } | |
594 | ||
595 | /* | |
596 | * Evaluate the quality of the permutation mapping by considering possible | |
597 | * device failures. Returns the imbalance ratio for the worst mapping which | |
598 | * is defined to be the largest number of child IOs over the fewest number | |
599 | * child IOs. A value of 1.0 indicates the mapping is perfectly balance and | |
600 | * all children perform an equal amount of work during reconstruction. | |
601 | */ | |
602 | static void | |
603 | eval_decluster(draid_map_t *map, double *worst_ratiop, double *avg_ratiop) | |
604 | { | |
605 | uint64_t children = map->dm_children; | |
606 | double worst_ratio = 1.0; | |
607 | double sum = 0; | |
608 | int worst_min_ios = 0, worst_max_ios = 0; | |
609 | int n = 0; | |
610 | ||
611 | /* | |
612 | * When there are only 2 children there can be no distributed | |
613 | * spare and no resilver to evaluate. Default to a ratio of 1.0 | |
614 | * for this degenerate case. | |
615 | */ | |
616 | if (children == VDEV_DRAID_MIN_CHILDREN) { | |
617 | *worst_ratiop = 1.0; | |
618 | *avg_ratiop = 1.0; | |
619 | return; | |
620 | } | |
621 | ||
622 | /* | |
623 | * Score the mapping as if it had either 1 or 2 distributed spares. | |
624 | */ | |
625 | for (int nspares = 1; nspares <= 2; nspares++) { | |
626 | uint64_t faults = nspares; | |
627 | ||
628 | /* | |
bf169e9f | 629 | * Score groupwidths up to 19. This value was chosen as the |
b2255edc BB |
630 | * largest reasonable width (16d+3p). dRAID pools may be still |
631 | * be created with wider stripes but they are not considered in | |
632 | * this analysis in order to optimize for the most common cases. | |
633 | */ | |
634 | for (uint64_t groupwidth = 2; | |
635 | groupwidth <= MIN(children - nspares, 19); | |
636 | groupwidth++) { | |
637 | int faulted_devs[2]; | |
638 | int min_ios, max_ios; | |
639 | ||
640 | /* | |
641 | * Score possible devices faults. This is limited | |
642 | * to exactly one fault per distributed spare for | |
643 | * the purposes of this similation. | |
644 | */ | |
645 | for (int f1 = 0; f1 < children; f1++) { | |
646 | faulted_devs[0] = f1; | |
647 | double ratio; | |
648 | ||
649 | if (faults == 1) { | |
650 | ratio = eval_resilver(map, groupwidth, | |
651 | nspares, faulted_devs, faults, | |
652 | &min_ios, &max_ios); | |
653 | ||
654 | if (ratio > worst_ratio) { | |
655 | worst_ratio = ratio; | |
656 | worst_min_ios = min_ios; | |
657 | worst_max_ios = max_ios; | |
658 | } | |
659 | ||
660 | sum += ratio; | |
661 | n++; | |
662 | } else if (faults == 2) { | |
663 | for (int f2 = f1 + 1; f2 < children; | |
664 | f2++) { | |
665 | faulted_devs[1] = f2; | |
666 | ||
667 | ratio = eval_resilver(map, | |
668 | groupwidth, nspares, | |
669 | faulted_devs, faults, | |
670 | &min_ios, &max_ios); | |
671 | ||
672 | if (ratio > worst_ratio) { | |
673 | worst_ratio = ratio; | |
674 | worst_min_ios = min_ios; | |
675 | worst_max_ios = max_ios; | |
676 | } | |
677 | ||
678 | sum += ratio; | |
679 | n++; | |
680 | } | |
681 | } | |
682 | } | |
683 | } | |
684 | } | |
685 | ||
686 | *worst_ratiop = worst_ratio; | |
687 | *avg_ratiop = sum / n; | |
688 | ||
689 | /* | |
690 | * Log the min/max io values for particularly unbalanced maps. | |
691 | * Since the maps are generated entirely randomly these are possible | |
692 | * be exceedingly unlikely. We log it for possible investigation. | |
693 | */ | |
694 | if (worst_ratio > 100.0) { | |
695 | dump_map(map, "DEBUG", worst_ratio, *avg_ratiop, 2); | |
696 | printf("worst_min_ios=%d worst_max_ios=%d\n", | |
697 | worst_min_ios, worst_max_ios); | |
698 | } | |
699 | } | |
700 | ||
701 | static int | |
702 | eval_maps(uint64_t children, int passes, uint64_t *map_seed, | |
703 | draid_map_t **best_mapp, double *best_ratiop, double *avg_ratiop) | |
704 | { | |
705 | draid_map_t *best_map = NULL; | |
706 | double best_worst_ratio = 1000.0; | |
707 | double best_avg_ratio = 1000.0; | |
708 | ||
709 | /* | |
710 | * Perform the requested number of passes evaluating randomly | |
711 | * generated permutation maps. Only the best version is kept. | |
712 | */ | |
713 | for (int i = 0; i < passes; i++) { | |
714 | double worst_ratio, avg_ratio; | |
715 | draid_map_t *map; | |
716 | int error; | |
717 | ||
718 | /* | |
719 | * Calculate the next seed and generate a new candidate map. | |
720 | */ | |
721 | error = alloc_new_map(children, MAP_ROWS_DEFAULT, | |
722 | vdev_draid_rand(map_seed), &map); | |
723 | if (error) | |
724 | return (error); | |
725 | ||
726 | /* | |
727 | * Consider maps with a lower worst_ratio to be of higher | |
728 | * quality. Some maps may have a lower avg_ratio but they | |
729 | * are discarded since they might include some particularly | |
bf169e9f | 730 | * imbalanced permutations. The average is tracked to in |
b2255edc BB |
731 | * order to get a sense of the average permutation quality. |
732 | */ | |
733 | eval_decluster(map, &worst_ratio, &avg_ratio); | |
734 | ||
735 | if (best_map == NULL || worst_ratio < best_worst_ratio) { | |
736 | ||
737 | if (best_map != NULL) | |
738 | free_map(best_map); | |
739 | ||
740 | best_map = map; | |
741 | best_worst_ratio = worst_ratio; | |
742 | best_avg_ratio = avg_ratio; | |
743 | } else { | |
744 | free_map(map); | |
745 | } | |
746 | } | |
747 | ||
748 | /* | |
749 | * After determining the best map generate a checksum over the full | |
750 | * permutation array. This checksum is verified when opening a dRAID | |
751 | * pool to ensure the generated in memory permutations are correct. | |
752 | */ | |
753 | zio_cksum_t cksum; | |
754 | fletcher_4_native_varsize(best_map->dm_perms, | |
755 | sizeof (uint8_t) * best_map->dm_children * best_map->dm_nperms, | |
756 | &cksum); | |
757 | best_map->dm_checksum = cksum.zc_word[0]; | |
758 | ||
759 | *best_mapp = best_map; | |
760 | *best_ratiop = best_worst_ratio; | |
761 | *avg_ratiop = best_avg_ratio; | |
762 | ||
763 | return (0); | |
764 | } | |
765 | ||
766 | static int | |
767 | draid_generate(int argc, char *argv[]) | |
768 | { | |
861166b0 | 769 | char filename[MAXPATHLEN] = {0}; |
b2255edc BB |
770 | uint64_t map_seed; |
771 | int c, fd, error, verbose = 0, passes = 1, continuous = 0; | |
772 | int min_children = VDEV_DRAID_MIN_CHILDREN; | |
773 | int max_children = VDEV_DRAID_MAX_CHILDREN; | |
774 | int restarts = 0; | |
775 | ||
776 | while ((c = getopt(argc, argv, ":cm:n:p:v")) != -1) { | |
777 | switch (c) { | |
778 | case 'c': | |
779 | continuous++; | |
780 | break; | |
781 | case 'm': | |
782 | min_children = (int)strtol(optarg, NULL, 0); | |
783 | if (min_children < VDEV_DRAID_MIN_CHILDREN) { | |
784 | (void) fprintf(stderr, "A minimum of 2 " | |
785 | "children are required.\n"); | |
786 | return (1); | |
787 | } | |
788 | ||
789 | break; | |
790 | case 'n': | |
791 | max_children = (int)strtol(optarg, NULL, 0); | |
792 | if (max_children > VDEV_DRAID_MAX_CHILDREN) { | |
793 | (void) fprintf(stderr, "A maximum of %d " | |
794 | "children are allowed.\n", | |
795 | VDEV_DRAID_MAX_CHILDREN); | |
796 | return (1); | |
797 | } | |
798 | break; | |
799 | case 'p': | |
800 | passes = (int)strtol(optarg, NULL, 0); | |
801 | break; | |
802 | case 'v': | |
803 | /* | |
804 | * 0 - Only log when a better map is added to the file. | |
805 | * 1 - Log the current best map for each child count. | |
806 | * Minimal output on a single summary line. | |
807 | * 2 - Log the current best map for each child count. | |
808 | * More verbose includes most map fields. | |
809 | * 3 - Log the current best map for each child count. | |
810 | * Very verbose all fields including the full map. | |
811 | */ | |
812 | verbose++; | |
813 | break; | |
814 | case ':': | |
815 | (void) fprintf(stderr, | |
816 | "missing argument for '%c' option\n", optopt); | |
817 | draid_usage(); | |
818 | break; | |
819 | case '?': | |
820 | (void) fprintf(stderr, "invalid option '%c'\n", | |
821 | optopt); | |
822 | draid_usage(); | |
823 | break; | |
824 | } | |
825 | } | |
826 | ||
861166b0 | 827 | if (argc > optind) |
b2255edc | 828 | strncpy(filename, argv[optind], MAXPATHLEN - 1); |
861166b0 | 829 | else { |
b2255edc BB |
830 | (void) fprintf(stderr, "A FILE must be specified.\n"); |
831 | return (1); | |
832 | } | |
833 | ||
834 | restart: | |
835 | /* | |
836 | * Start with a fresh seed from /dev/urandom. | |
837 | */ | |
838 | fd = open("/dev/urandom", O_RDONLY); | |
839 | if (fd < 0) { | |
840 | printf("Unable to open /dev/urandom: %s\n:", strerror(errno)); | |
841 | return (1); | |
842 | } else { | |
843 | ssize_t bytes = sizeof (map_seed); | |
844 | ssize_t bytes_read = 0; | |
845 | ||
846 | while (bytes_read < bytes) { | |
847 | ssize_t rc = read(fd, ((char *)&map_seed) + bytes_read, | |
848 | bytes - bytes_read); | |
849 | if (rc < 0) { | |
850 | printf("Unable to read /dev/urandom: %s\n:", | |
851 | strerror(errno)); | |
852 | return (1); | |
853 | } | |
854 | bytes_read += rc; | |
855 | } | |
856 | ||
857 | (void) close(fd); | |
858 | } | |
859 | ||
860 | if (restarts == 0) | |
861 | printf("Writing generated mappings to '%s':\n", filename); | |
862 | ||
863 | /* | |
864 | * Generate maps for all requested child counts. The best map for | |
865 | * each child count is written out to the specified file. If the file | |
866 | * already contains a better mapping this map will not be added. | |
867 | */ | |
868 | for (uint64_t children = min_children; | |
869 | children <= max_children; children++) { | |
870 | char key[8] = { 0 }; | |
871 | draid_map_t *map; | |
872 | double worst_ratio = 1000.0; | |
873 | double avg_ratio = 1000.0; | |
874 | ||
875 | error = eval_maps(children, passes, &map_seed, &map, | |
876 | &worst_ratio, &avg_ratio); | |
877 | if (error) { | |
878 | printf("Error eval_maps(): %s\n", strerror(error)); | |
879 | return (1); | |
880 | } | |
881 | ||
882 | if (worst_ratio < 1.0 || avg_ratio < 1.0) { | |
883 | printf("Error ratio < 1.0: worst_ratio = %2.03f " | |
884 | "avg_ratio = %2.03f\n", worst_ratio, avg_ratio); | |
885 | return (1); | |
886 | } | |
887 | ||
888 | snprintf(key, 7, "%llu", (u_longlong_t)children); | |
889 | error = write_map_key(filename, key, map, worst_ratio, | |
890 | avg_ratio); | |
891 | if (error == 0) { | |
892 | /* The new map was added to the file. */ | |
893 | dump_map(map, key, worst_ratio, avg_ratio, | |
894 | MAX(verbose, 1)); | |
895 | } else if (error == EEXIST) { | |
896 | /* The existing map was preferable and kept. */ | |
897 | if (verbose > 0) | |
898 | dump_map_key(filename, key, verbose); | |
899 | } else { | |
900 | printf("Error write_map_key(): %s\n", strerror(error)); | |
901 | return (1); | |
902 | } | |
903 | ||
904 | free_map(map); | |
905 | } | |
906 | ||
907 | /* | |
908 | * When the continuous option is set restart at the minimum number of | |
909 | * children instead of exiting. This option is useful as a mechanism | |
910 | * to continuous try and refine the discovered permutations. | |
911 | */ | |
912 | if (continuous) { | |
913 | restarts++; | |
914 | printf("Restarting by request (-c): %d\n", restarts); | |
915 | goto restart; | |
916 | } | |
917 | ||
918 | return (0); | |
919 | } | |
920 | ||
921 | /* | |
922 | * Verify each map in the file by generating its in-memory permutation array | |
923 | * and comfirming its checksum is correct. | |
924 | */ | |
925 | static int | |
926 | draid_verify(int argc, char *argv[]) | |
927 | { | |
861166b0 | 928 | char filename[MAXPATHLEN] = {0}; |
b2255edc BB |
929 | int n = 0, c, error, verbose = 1; |
930 | int check_ratios = 0; | |
931 | ||
932 | while ((c = getopt(argc, argv, ":rv")) != -1) { | |
933 | switch (c) { | |
934 | case 'r': | |
935 | check_ratios++; | |
936 | break; | |
937 | case 'v': | |
938 | verbose++; | |
939 | break; | |
940 | case ':': | |
941 | (void) fprintf(stderr, | |
942 | "missing argument for '%c' option\n", optopt); | |
943 | draid_usage(); | |
944 | break; | |
945 | case '?': | |
946 | (void) fprintf(stderr, "invalid option '%c'\n", | |
947 | optopt); | |
948 | draid_usage(); | |
949 | break; | |
950 | } | |
951 | } | |
952 | ||
953 | if (argc > optind) { | |
954 | char *abspath = malloc(MAXPATHLEN); | |
955 | if (abspath == NULL) | |
956 | return (ENOMEM); | |
957 | ||
b2255edc BB |
958 | if (realpath(argv[optind], abspath) != NULL) |
959 | strncpy(filename, abspath, MAXPATHLEN - 1); | |
960 | else | |
961 | strncpy(filename, argv[optind], MAXPATHLEN - 1); | |
962 | ||
963 | free(abspath); | |
964 | } else { | |
965 | (void) fprintf(stderr, "A FILE must be specified.\n"); | |
966 | return (1); | |
967 | } | |
968 | ||
969 | printf("Verifying permutation maps: '%s'\n", filename); | |
970 | ||
971 | /* | |
972 | * Lookup hardcoded permutation map for each valid number of children | |
973 | * and verify a generated map has the correct checksum. Then compare | |
974 | * the generated map values with the nvlist map values read from the | |
975 | * reference file to cross-check the permutation. | |
976 | */ | |
977 | for (uint64_t children = VDEV_DRAID_MIN_CHILDREN; | |
978 | children <= VDEV_DRAID_MAX_CHILDREN; | |
979 | children++) { | |
980 | draid_map_t *map; | |
861166b0 | 981 | char key[8] = {0}; |
b2255edc | 982 | |
b2255edc BB |
983 | snprintf(key, 8, "%llu", (u_longlong_t)children); |
984 | ||
985 | error = alloc_fixed_map(children, &map); | |
986 | if (error) { | |
987 | printf("Error alloc_fixed_map() failed: %s\n", | |
988 | error == ECKSUM ? "Invalid checksum" : | |
989 | strerror(error)); | |
990 | return (1); | |
991 | } | |
992 | ||
993 | uint64_t nv_seed, nv_checksum, nv_children, nv_nperms; | |
994 | uint8_t *nv_perms; | |
995 | nvlist_t *cfg; | |
996 | uint_t c; | |
997 | ||
998 | error = read_map_key(filename, key, &cfg); | |
999 | if (error != 0) { | |
1000 | printf("Error read_map_key() failed: %s\n", | |
1001 | strerror(error)); | |
1002 | free_map(map); | |
1003 | return (1); | |
1004 | } | |
1005 | ||
1006 | nv_seed = fnvlist_lookup_uint64(cfg, MAP_SEED); | |
1007 | nv_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM); | |
1008 | nv_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN); | |
1009 | nv_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS); | |
1010 | nvlist_lookup_uint8_array(cfg, MAP_PERMS, &nv_perms, &c); | |
1011 | ||
1012 | /* | |
1013 | * Compare draid_map_t and nvlist reference values. | |
1014 | */ | |
1015 | if (map->dm_seed != nv_seed) { | |
1016 | printf("Error different seeds: 0x%016llx != " | |
1017 | "0x%016llx\n", (u_longlong_t)map->dm_seed, | |
1018 | (u_longlong_t)nv_seed); | |
1019 | error = EINVAL; | |
1020 | } | |
1021 | ||
1022 | if (map->dm_checksum != nv_checksum) { | |
1023 | printf("Error different checksums: 0x%016llx " | |
1024 | "!= 0x%016llx\n", | |
1025 | (u_longlong_t)map->dm_checksum, | |
1026 | (u_longlong_t)nv_checksum); | |
1027 | error = EINVAL; | |
1028 | } | |
1029 | ||
1030 | if (map->dm_children != nv_children) { | |
1031 | printf("Error different children: %llu " | |
1032 | "!= %llu\n", (u_longlong_t)map->dm_children, | |
1033 | (u_longlong_t)nv_children); | |
1034 | error = EINVAL; | |
1035 | } | |
1036 | ||
1037 | if (map->dm_nperms != nv_nperms) { | |
1038 | printf("Error different nperms: %llu " | |
1039 | "!= %llu\n", (u_longlong_t)map->dm_nperms, | |
1040 | (u_longlong_t)nv_nperms); | |
1041 | error = EINVAL; | |
1042 | } | |
1043 | ||
1044 | for (uint64_t i = 0; i < nv_children * nv_nperms; i++) { | |
1045 | if (map->dm_perms[i] != nv_perms[i]) { | |
1046 | printf("Error different perms[%llu]: " | |
1047 | "%d != %d\n", (u_longlong_t)i, | |
1048 | (int)map->dm_perms[i], | |
1049 | (int)nv_perms[i]); | |
1050 | error = EINVAL; | |
1051 | break; | |
1052 | } | |
1053 | } | |
1054 | ||
1055 | /* | |
1056 | * For good measure recalculate the worst and average | |
1057 | * ratios and confirm they match the nvlist values. | |
1058 | */ | |
1059 | if (check_ratios) { | |
1060 | uint64_t nv_worst_ratio, nv_avg_ratio; | |
1061 | double worst_ratio, avg_ratio; | |
1062 | ||
1063 | eval_decluster(map, &worst_ratio, &avg_ratio); | |
1064 | ||
1065 | nv_worst_ratio = fnvlist_lookup_uint64(cfg, | |
1066 | MAP_WORST_RATIO); | |
1067 | nv_avg_ratio = fnvlist_lookup_uint64(cfg, | |
1068 | MAP_AVG_RATIO); | |
1069 | ||
1070 | if (worst_ratio < 1.0 || avg_ratio < 1.0) { | |
1071 | printf("Error ratio out of range %2.03f, " | |
1072 | "%2.03f\n", worst_ratio, avg_ratio); | |
1073 | error = EINVAL; | |
1074 | } | |
1075 | ||
1076 | if ((uint64_t)(worst_ratio * 1000.0) != | |
1077 | nv_worst_ratio) { | |
1078 | printf("Error different worst_ratio %2.03f " | |
1079 | "!= %2.03f\n", (double)nv_worst_ratio / | |
1080 | 1000.0, worst_ratio); | |
1081 | error = EINVAL; | |
1082 | } | |
1083 | ||
1084 | if ((uint64_t)(avg_ratio * 1000.0) != nv_avg_ratio) { | |
1085 | printf("Error different average_ratio %2.03f " | |
1086 | "!= %2.03f\n", (double)nv_avg_ratio / | |
1087 | 1000.0, avg_ratio); | |
1088 | error = EINVAL; | |
1089 | } | |
1090 | } | |
1091 | ||
1092 | if (error) { | |
1093 | free_map(map); | |
1094 | nvlist_free(cfg); | |
1095 | return (1); | |
1096 | } | |
1097 | ||
1098 | if (verbose > 0) { | |
1099 | printf("- %llu children: good\n", | |
1100 | (u_longlong_t)children); | |
1101 | } | |
1102 | n++; | |
1103 | ||
1104 | free_map(map); | |
1105 | nvlist_free(cfg); | |
1106 | } | |
1107 | ||
1108 | if (n != (VDEV_DRAID_MAX_CHILDREN - 1)) { | |
1109 | printf("Error permutation maps missing: %d / %d checked\n", | |
1110 | n, VDEV_DRAID_MAX_CHILDREN - 1); | |
1111 | return (1); | |
1112 | } | |
1113 | ||
1114 | printf("Successfully verified %d / %d permutation maps\n", | |
1115 | n, VDEV_DRAID_MAX_CHILDREN - 1); | |
1116 | ||
1117 | return (0); | |
1118 | } | |
1119 | ||
1120 | /* | |
1121 | * Dump the contents of the specified mapping(s) for inspection. | |
1122 | */ | |
1123 | static int | |
1124 | draid_dump(int argc, char *argv[]) | |
1125 | { | |
861166b0 | 1126 | char filename[MAXPATHLEN] = {0}; |
b2255edc BB |
1127 | int c, error, verbose = 1; |
1128 | int min_children = VDEV_DRAID_MIN_CHILDREN; | |
1129 | int max_children = VDEV_DRAID_MAX_CHILDREN; | |
1130 | ||
1131 | while ((c = getopt(argc, argv, ":vm:n:")) != -1) { | |
1132 | switch (c) { | |
1133 | case 'm': | |
1134 | min_children = (int)strtol(optarg, NULL, 0); | |
1135 | if (min_children < 2) { | |
1136 | (void) fprintf(stderr, "A minimum of 2 " | |
1137 | "children are required.\n"); | |
1138 | return (1); | |
1139 | } | |
1140 | ||
1141 | break; | |
1142 | case 'n': | |
1143 | max_children = (int)strtol(optarg, NULL, 0); | |
1144 | if (max_children > VDEV_DRAID_MAX_CHILDREN) { | |
1145 | (void) fprintf(stderr, "A maximum of %d " | |
1146 | "children are allowed.\n", | |
1147 | VDEV_DRAID_MAX_CHILDREN); | |
1148 | return (1); | |
1149 | } | |
1150 | break; | |
1151 | case 'v': | |
1152 | verbose++; | |
1153 | break; | |
1154 | case ':': | |
1155 | (void) fprintf(stderr, | |
1156 | "missing argument for '%c' option\n", optopt); | |
1157 | draid_usage(); | |
1158 | break; | |
1159 | case '?': | |
1160 | (void) fprintf(stderr, "invalid option '%c'\n", | |
1161 | optopt); | |
1162 | draid_usage(); | |
1163 | break; | |
1164 | } | |
1165 | } | |
1166 | ||
861166b0 | 1167 | if (argc > optind) |
b2255edc | 1168 | strncpy(filename, argv[optind], MAXPATHLEN - 1); |
861166b0 | 1169 | else { |
b2255edc BB |
1170 | (void) fprintf(stderr, "A FILE must be specified.\n"); |
1171 | return (1); | |
1172 | } | |
1173 | ||
1174 | /* | |
1175 | * Dump maps for the requested child counts. | |
1176 | */ | |
1177 | for (uint64_t children = min_children; | |
1178 | children <= max_children; children++) { | |
1179 | char key[8] = { 0 }; | |
1180 | ||
1181 | snprintf(key, 7, "%llu", (u_longlong_t)children); | |
1182 | error = dump_map_key(filename, key, verbose); | |
1183 | if (error) { | |
1184 | printf("Error dump_map_key(): %s\n", strerror(error)); | |
1185 | return (1); | |
1186 | } | |
1187 | } | |
1188 | ||
1189 | return (0); | |
1190 | } | |
1191 | ||
1192 | /* | |
bf169e9f AG |
1193 | * Print all of the mappings as a C formatted draid_map_t array. This table |
1194 | * is found in the module/zcommon/zfs_draid.c file and is the definitive | |
b2255edc BB |
1195 | * source for all mapping used by dRAID. It cannot be updated without |
1196 | * changing the dRAID on disk format. | |
1197 | */ | |
1198 | static int | |
1199 | draid_table(int argc, char *argv[]) | |
1200 | { | |
861166b0 | 1201 | char filename[MAXPATHLEN] = {0}; |
b2255edc BB |
1202 | int error; |
1203 | ||
861166b0 | 1204 | if (argc > optind) |
b2255edc | 1205 | strncpy(filename, argv[optind], MAXPATHLEN - 1); |
861166b0 | 1206 | else { |
b2255edc BB |
1207 | (void) fprintf(stderr, "A FILE must be specified.\n"); |
1208 | return (1); | |
1209 | } | |
1210 | ||
1211 | printf("static const draid_map_t " | |
1212 | "draid_maps[VDEV_DRAID_MAX_MAPS] = {\n"); | |
1213 | ||
1214 | for (uint64_t children = VDEV_DRAID_MIN_CHILDREN; | |
1215 | children <= VDEV_DRAID_MAX_CHILDREN; | |
1216 | children++) { | |
1217 | uint64_t seed, checksum, nperms, avg_ratio; | |
1218 | nvlist_t *cfg; | |
861166b0 | 1219 | char key[8] = {0}; |
b2255edc | 1220 | |
b2255edc BB |
1221 | snprintf(key, 8, "%llu", (u_longlong_t)children); |
1222 | ||
1223 | error = read_map_key(filename, key, &cfg); | |
1224 | if (error != 0) { | |
1225 | printf("Error read_map_key() failed: %s\n", | |
1226 | strerror(error)); | |
1227 | return (1); | |
1228 | } | |
1229 | ||
1230 | seed = fnvlist_lookup_uint64(cfg, MAP_SEED); | |
1231 | checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM); | |
1232 | children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN); | |
1233 | nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS); | |
1234 | avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO); | |
1235 | ||
1236 | printf("\t{ %3llu, %3llu, 0x%016llx, 0x%016llx },\t" | |
1237 | "/* %2.03f */\n", (u_longlong_t)children, | |
1238 | (u_longlong_t)nperms, (u_longlong_t)seed, | |
1239 | (u_longlong_t)checksum, (double)avg_ratio / 1000.0); | |
1240 | ||
1241 | nvlist_free(cfg); | |
1242 | } | |
1243 | ||
1244 | printf("};\n"); | |
1245 | ||
1246 | return (0); | |
1247 | } | |
1248 | ||
1249 | static int | |
1250 | draid_merge_impl(nvlist_t *allcfgs, const char *srcfilename, int *mergedp) | |
1251 | { | |
1252 | nvlist_t *srccfgs; | |
1253 | nvpair_t *elem = NULL; | |
1254 | int error, merged = 0; | |
1255 | ||
1256 | error = read_map(srcfilename, &srccfgs); | |
1257 | if (error != 0) | |
1258 | return (error); | |
1259 | ||
1260 | while ((elem = nvlist_next_nvpair(srccfgs, elem)) != NULL) { | |
1261 | uint64_t nv_worst_ratio; | |
1262 | uint64_t allcfg_worst_ratio; | |
1263 | nvlist_t *cfg, *allcfg; | |
1264 | char *key; | |
1265 | ||
1266 | switch (nvpair_type(elem)) { | |
1267 | case DATA_TYPE_NVLIST: | |
1268 | ||
1269 | (void) nvpair_value_nvlist(elem, &cfg); | |
1270 | key = nvpair_name(elem); | |
1271 | ||
1272 | nv_worst_ratio = fnvlist_lookup_uint64(cfg, | |
1273 | MAP_WORST_RATIO); | |
1274 | ||
1275 | error = nvlist_lookup_nvlist(allcfgs, key, &allcfg); | |
1276 | if (error == 0) { | |
1277 | allcfg_worst_ratio = fnvlist_lookup_uint64( | |
1278 | allcfg, MAP_WORST_RATIO); | |
1279 | ||
1280 | if (nv_worst_ratio < allcfg_worst_ratio) { | |
1281 | fnvlist_remove(allcfgs, key); | |
1282 | error = nvlist_add_nvlist(allcfgs, | |
1283 | key, cfg); | |
1284 | merged++; | |
1285 | } | |
1286 | } else if (error == ENOENT) { | |
1287 | error = nvlist_add_nvlist(allcfgs, key, cfg); | |
1288 | merged++; | |
1289 | } else { | |
1290 | return (error); | |
1291 | } | |
1292 | ||
1293 | break; | |
1294 | default: | |
1295 | continue; | |
1296 | } | |
1297 | } | |
1298 | ||
1299 | nvlist_free(srccfgs); | |
1300 | ||
1301 | *mergedp = merged; | |
1302 | ||
1303 | return (0); | |
1304 | } | |
1305 | ||
1306 | /* | |
1307 | * Merge the best map for each child count found in the listed files into | |
1308 | * a new file. This allows 'draid generate' to be run in parallel and for | |
1309 | * the results maps to be combined. | |
1310 | */ | |
1311 | static int | |
1312 | draid_merge(int argc, char *argv[]) | |
1313 | { | |
861166b0 | 1314 | char filename[MAXPATHLEN] = {0}; |
b2255edc BB |
1315 | int c, error, total_merged = 0, verbose = 0; |
1316 | nvlist_t *allcfgs; | |
1317 | ||
1318 | while ((c = getopt(argc, argv, ":v")) != -1) { | |
1319 | switch (c) { | |
1320 | case 'v': | |
1321 | verbose++; | |
1322 | break; | |
1323 | case ':': | |
1324 | (void) fprintf(stderr, | |
1325 | "missing argument for '%c' option\n", optopt); | |
1326 | draid_usage(); | |
1327 | break; | |
1328 | case '?': | |
1329 | (void) fprintf(stderr, "invalid option '%c'\n", | |
1330 | optopt); | |
1331 | draid_usage(); | |
1332 | break; | |
1333 | } | |
1334 | } | |
1335 | ||
1336 | if (argc < 4) { | |
1337 | (void) fprintf(stderr, | |
1338 | "A FILE and multiple SRCs must be specified.\n"); | |
1339 | return (1); | |
1340 | } | |
1341 | ||
b2255edc BB |
1342 | strncpy(filename, argv[optind], MAXPATHLEN - 1); |
1343 | optind++; | |
1344 | ||
1345 | error = read_map(filename, &allcfgs); | |
1346 | if (error == ENOENT) { | |
1347 | allcfgs = fnvlist_alloc(); | |
1348 | } else if (error != 0) { | |
1349 | printf("Error read_map(): %s\n", strerror(error)); | |
1350 | return (error); | |
1351 | } | |
1352 | ||
1353 | while (optind < argc) { | |
861166b0 | 1354 | char srcfilename[MAXPATHLEN] = {0}; |
b2255edc BB |
1355 | int merged = 0; |
1356 | ||
b2255edc BB |
1357 | strncpy(srcfilename, argv[optind], MAXPATHLEN - 1); |
1358 | ||
1359 | error = draid_merge_impl(allcfgs, srcfilename, &merged); | |
1360 | if (error) { | |
1361 | printf("Error draid_merge_impl(): %s\n", | |
1362 | strerror(error)); | |
1363 | nvlist_free(allcfgs); | |
1364 | return (1); | |
1365 | } | |
1366 | ||
1367 | total_merged += merged; | |
1368 | printf("Merged %d key(s) from '%s' into '%s'\n", merged, | |
1369 | srcfilename, filename); | |
1370 | ||
1371 | optind++; | |
1372 | } | |
1373 | ||
1374 | if (total_merged > 0) | |
1375 | write_map(filename, allcfgs); | |
1376 | ||
1377 | printf("Merged a total of %d key(s) into '%s'\n", total_merged, | |
1378 | filename); | |
1379 | ||
1380 | nvlist_free(allcfgs); | |
1381 | ||
1382 | return (0); | |
1383 | } | |
1384 | ||
1385 | int | |
1386 | main(int argc, char *argv[]) | |
1387 | { | |
1388 | if (argc < 2) | |
1389 | draid_usage(); | |
1390 | ||
1391 | char *subcommand = argv[1]; | |
1392 | ||
1393 | if (strcmp(subcommand, "generate") == 0) { | |
1394 | return (draid_generate(argc - 1, argv + 1)); | |
1395 | } else if (strcmp(subcommand, "verify") == 0) { | |
1396 | return (draid_verify(argc - 1, argv + 1)); | |
1397 | } else if (strcmp(subcommand, "dump") == 0) { | |
1398 | return (draid_dump(argc - 1, argv + 1)); | |
1399 | } else if (strcmp(subcommand, "table") == 0) { | |
1400 | return (draid_table(argc - 1, argv + 1)); | |
1401 | } else if (strcmp(subcommand, "merge") == 0) { | |
1402 | return (draid_merge(argc - 1, argv + 1)); | |
1403 | } else { | |
1404 | draid_usage(); | |
1405 | } | |
1406 | } |