]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (C) 2003 Sistina Software Limited. | |
3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | |
4 | * | |
5 | * This file is released under the GPL. | |
6 | */ | |
7 | ||
8 | #include "dm.h" | |
9 | #include "dm-path-selector.h" | |
10 | #include "dm-hw-handler.h" | |
11 | #include "dm-bio-list.h" | |
12 | #include "dm-bio-record.h" | |
13 | ||
14 | #include <linux/ctype.h> | |
15 | #include <linux/init.h> | |
16 | #include <linux/mempool.h> | |
17 | #include <linux/module.h> | |
18 | #include <linux/pagemap.h> | |
19 | #include <linux/slab.h> | |
20 | #include <linux/time.h> | |
21 | #include <linux/workqueue.h> | |
22 | #include <asm/atomic.h> | |
23 | ||
24 | #define MESG_STR(x) x, sizeof(x) | |
25 | ||
26 | /* Path properties */ | |
27 | struct pgpath { | |
28 | struct list_head list; | |
29 | ||
30 | struct priority_group *pg; /* Owning PG */ | |
31 | unsigned fail_count; /* Cumulative failure count */ | |
32 | ||
33 | struct path path; | |
34 | }; | |
35 | ||
36 | #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) | |
37 | ||
38 | /* | |
39 | * Paths are grouped into Priority Groups and numbered from 1 upwards. | |
40 | * Each has a path selector which controls which path gets used. | |
41 | */ | |
42 | struct priority_group { | |
43 | struct list_head list; | |
44 | ||
45 | struct multipath *m; /* Owning multipath instance */ | |
46 | struct path_selector ps; | |
47 | ||
48 | unsigned pg_num; /* Reference number */ | |
49 | unsigned bypassed; /* Temporarily bypass this PG? */ | |
50 | ||
51 | unsigned nr_pgpaths; /* Number of paths in PG */ | |
52 | struct list_head pgpaths; | |
53 | }; | |
54 | ||
55 | /* Multipath context */ | |
56 | struct multipath { | |
57 | struct list_head list; | |
58 | struct dm_target *ti; | |
59 | ||
60 | spinlock_t lock; | |
61 | ||
62 | struct hw_handler hw_handler; | |
63 | unsigned nr_priority_groups; | |
64 | struct list_head priority_groups; | |
65 | unsigned pg_init_required; /* pg_init needs calling? */ | |
66 | ||
67 | unsigned nr_valid_paths; /* Total number of usable paths */ | |
68 | struct pgpath *current_pgpath; | |
69 | struct priority_group *current_pg; | |
70 | struct priority_group *next_pg; /* Switch to this PG if set */ | |
71 | unsigned repeat_count; /* I/Os left before calling PS again */ | |
72 | ||
73 | unsigned queue_io; /* Must we queue all I/O? */ | |
74 | unsigned queue_if_no_path; /* Queue I/O if last path fails? */ | |
75 | unsigned suspended; /* Has dm core suspended our I/O? */ | |
76 | ||
77 | struct work_struct process_queued_ios; | |
78 | struct bio_list queued_ios; | |
79 | unsigned queue_size; | |
80 | ||
81 | struct work_struct trigger_event; | |
82 | ||
83 | /* | |
84 | * We must use a mempool of mpath_io structs so that we | |
85 | * can resubmit bios on error. | |
86 | */ | |
87 | mempool_t *mpio_pool; | |
88 | }; | |
89 | ||
90 | /* | |
91 | * Context information attached to each bio we process. | |
92 | */ | |
93 | struct mpath_io { | |
94 | struct pgpath *pgpath; | |
95 | struct dm_bio_details details; | |
96 | }; | |
97 | ||
98 | typedef int (*action_fn) (struct pgpath *pgpath); | |
99 | ||
100 | #define MIN_IOS 256 /* Mempool size */ | |
101 | ||
102 | static kmem_cache_t *_mpio_cache; | |
103 | ||
104 | static void process_queued_ios(void *data); | |
105 | static void trigger_event(void *data); | |
106 | ||
107 | ||
108 | /*----------------------------------------------- | |
109 | * Allocation routines | |
110 | *-----------------------------------------------*/ | |
111 | ||
112 | static struct pgpath *alloc_pgpath(void) | |
113 | { | |
114 | struct pgpath *pgpath = kmalloc(sizeof(*pgpath), GFP_KERNEL); | |
115 | ||
116 | if (pgpath) { | |
117 | memset(pgpath, 0, sizeof(*pgpath)); | |
118 | pgpath->path.is_active = 1; | |
119 | } | |
120 | ||
121 | return pgpath; | |
122 | } | |
123 | ||
124 | static inline void free_pgpath(struct pgpath *pgpath) | |
125 | { | |
126 | kfree(pgpath); | |
127 | } | |
128 | ||
129 | static struct priority_group *alloc_priority_group(void) | |
130 | { | |
131 | struct priority_group *pg; | |
132 | ||
133 | pg = kmalloc(sizeof(*pg), GFP_KERNEL); | |
134 | if (!pg) | |
135 | return NULL; | |
136 | ||
137 | memset(pg, 0, sizeof(*pg)); | |
138 | INIT_LIST_HEAD(&pg->pgpaths); | |
139 | ||
140 | return pg; | |
141 | } | |
142 | ||
143 | static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) | |
144 | { | |
145 | struct pgpath *pgpath, *tmp; | |
146 | ||
147 | list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { | |
148 | list_del(&pgpath->list); | |
149 | dm_put_device(ti, pgpath->path.dev); | |
150 | free_pgpath(pgpath); | |
151 | } | |
152 | } | |
153 | ||
154 | static void free_priority_group(struct priority_group *pg, | |
155 | struct dm_target *ti) | |
156 | { | |
157 | struct path_selector *ps = &pg->ps; | |
158 | ||
159 | if (ps->type) { | |
160 | ps->type->destroy(ps); | |
161 | dm_put_path_selector(ps->type); | |
162 | } | |
163 | ||
164 | free_pgpaths(&pg->pgpaths, ti); | |
165 | kfree(pg); | |
166 | } | |
167 | ||
168 | static struct multipath *alloc_multipath(void) | |
169 | { | |
170 | struct multipath *m; | |
171 | ||
172 | m = kmalloc(sizeof(*m), GFP_KERNEL); | |
173 | if (m) { | |
174 | memset(m, 0, sizeof(*m)); | |
175 | INIT_LIST_HEAD(&m->priority_groups); | |
176 | spin_lock_init(&m->lock); | |
177 | m->queue_io = 1; | |
178 | INIT_WORK(&m->process_queued_ios, process_queued_ios, m); | |
179 | INIT_WORK(&m->trigger_event, trigger_event, m); | |
180 | m->mpio_pool = mempool_create(MIN_IOS, mempool_alloc_slab, | |
181 | mempool_free_slab, _mpio_cache); | |
182 | if (!m->mpio_pool) { | |
183 | kfree(m); | |
184 | return NULL; | |
185 | } | |
186 | } | |
187 | ||
188 | return m; | |
189 | } | |
190 | ||
191 | static void free_multipath(struct multipath *m) | |
192 | { | |
193 | struct priority_group *pg, *tmp; | |
194 | struct hw_handler *hwh = &m->hw_handler; | |
195 | ||
196 | list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { | |
197 | list_del(&pg->list); | |
198 | free_priority_group(pg, m->ti); | |
199 | } | |
200 | ||
201 | if (hwh->type) { | |
202 | hwh->type->destroy(hwh); | |
203 | dm_put_hw_handler(hwh->type); | |
204 | } | |
205 | ||
206 | mempool_destroy(m->mpio_pool); | |
207 | kfree(m); | |
208 | } | |
209 | ||
210 | ||
211 | /*----------------------------------------------- | |
212 | * Path selection | |
213 | *-----------------------------------------------*/ | |
214 | ||
215 | static void __switch_pg(struct multipath *m, struct pgpath *pgpath) | |
216 | { | |
217 | struct hw_handler *hwh = &m->hw_handler; | |
218 | ||
219 | m->current_pg = pgpath->pg; | |
220 | ||
221 | /* Must we initialise the PG first, and queue I/O till it's ready? */ | |
222 | if (hwh->type && hwh->type->pg_init) { | |
223 | m->pg_init_required = 1; | |
224 | m->queue_io = 1; | |
225 | } else { | |
226 | m->pg_init_required = 0; | |
227 | m->queue_io = 0; | |
228 | } | |
229 | } | |
230 | ||
231 | static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg) | |
232 | { | |
233 | struct path *path; | |
234 | ||
235 | path = pg->ps.type->select_path(&pg->ps, &m->repeat_count); | |
236 | if (!path) | |
237 | return -ENXIO; | |
238 | ||
239 | m->current_pgpath = path_to_pgpath(path); | |
240 | ||
241 | if (m->current_pg != pg) | |
242 | __switch_pg(m, m->current_pgpath); | |
243 | ||
244 | return 0; | |
245 | } | |
246 | ||
247 | static void __choose_pgpath(struct multipath *m) | |
248 | { | |
249 | struct priority_group *pg; | |
250 | unsigned bypassed = 1; | |
251 | ||
252 | if (!m->nr_valid_paths) | |
253 | goto failed; | |
254 | ||
255 | /* Were we instructed to switch PG? */ | |
256 | if (m->next_pg) { | |
257 | pg = m->next_pg; | |
258 | m->next_pg = NULL; | |
259 | if (!__choose_path_in_pg(m, pg)) | |
260 | return; | |
261 | } | |
262 | ||
263 | /* Don't change PG until it has no remaining paths */ | |
264 | if (m->current_pg && !__choose_path_in_pg(m, m->current_pg)) | |
265 | return; | |
266 | ||
267 | /* | |
268 | * Loop through priority groups until we find a valid path. | |
269 | * First time we skip PGs marked 'bypassed'. | |
270 | * Second time we only try the ones we skipped. | |
271 | */ | |
272 | do { | |
273 | list_for_each_entry(pg, &m->priority_groups, list) { | |
274 | if (pg->bypassed == bypassed) | |
275 | continue; | |
276 | if (!__choose_path_in_pg(m, pg)) | |
277 | return; | |
278 | } | |
279 | } while (bypassed--); | |
280 | ||
281 | failed: | |
282 | m->current_pgpath = NULL; | |
283 | m->current_pg = NULL; | |
284 | } | |
285 | ||
286 | static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio, | |
287 | unsigned was_queued) | |
288 | { | |
289 | int r = 1; | |
290 | unsigned long flags; | |
291 | struct pgpath *pgpath; | |
292 | ||
293 | spin_lock_irqsave(&m->lock, flags); | |
294 | ||
295 | /* Do we need to select a new pgpath? */ | |
296 | if (!m->current_pgpath || | |
297 | (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) | |
298 | __choose_pgpath(m); | |
299 | ||
300 | pgpath = m->current_pgpath; | |
301 | ||
302 | if (was_queued) | |
303 | m->queue_size--; | |
304 | ||
305 | if ((pgpath && m->queue_io) || | |
306 | (!pgpath && m->queue_if_no_path && !m->suspended)) { | |
307 | /* Queue for the daemon to resubmit */ | |
308 | bio_list_add(&m->queued_ios, bio); | |
309 | m->queue_size++; | |
310 | if (m->pg_init_required || !m->queue_io) | |
311 | schedule_work(&m->process_queued_ios); | |
312 | pgpath = NULL; | |
313 | r = 0; | |
314 | } else if (!pgpath) | |
315 | r = -EIO; /* Failed */ | |
316 | else | |
317 | bio->bi_bdev = pgpath->path.dev->bdev; | |
318 | ||
319 | mpio->pgpath = pgpath; | |
320 | ||
321 | spin_unlock_irqrestore(&m->lock, flags); | |
322 | ||
323 | return r; | |
324 | } | |
325 | ||
326 | /* | |
327 | * If we run out of usable paths, should we queue I/O or error it? | |
328 | */ | |
329 | static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path) | |
330 | { | |
331 | unsigned long flags; | |
332 | ||
333 | spin_lock_irqsave(&m->lock, flags); | |
334 | ||
335 | m->queue_if_no_path = queue_if_no_path; | |
336 | if (!m->queue_if_no_path) | |
337 | schedule_work(&m->process_queued_ios); | |
338 | ||
339 | spin_unlock_irqrestore(&m->lock, flags); | |
340 | ||
341 | return 0; | |
342 | } | |
343 | ||
344 | /*----------------------------------------------------------------- | |
345 | * The multipath daemon is responsible for resubmitting queued ios. | |
346 | *---------------------------------------------------------------*/ | |
347 | ||
348 | static void dispatch_queued_ios(struct multipath *m) | |
349 | { | |
350 | int r; | |
351 | unsigned long flags; | |
352 | struct bio *bio = NULL, *next; | |
353 | struct mpath_io *mpio; | |
354 | union map_info *info; | |
355 | ||
356 | spin_lock_irqsave(&m->lock, flags); | |
357 | bio = bio_list_get(&m->queued_ios); | |
358 | spin_unlock_irqrestore(&m->lock, flags); | |
359 | ||
360 | while (bio) { | |
361 | next = bio->bi_next; | |
362 | bio->bi_next = NULL; | |
363 | ||
364 | info = dm_get_mapinfo(bio); | |
365 | mpio = info->ptr; | |
366 | ||
367 | r = map_io(m, bio, mpio, 1); | |
368 | if (r < 0) | |
369 | bio_endio(bio, bio->bi_size, r); | |
370 | else if (r == 1) | |
371 | generic_make_request(bio); | |
372 | ||
373 | bio = next; | |
374 | } | |
375 | } | |
376 | ||
377 | static void process_queued_ios(void *data) | |
378 | { | |
379 | struct multipath *m = (struct multipath *) data; | |
380 | struct hw_handler *hwh = &m->hw_handler; | |
381 | struct pgpath *pgpath; | |
382 | unsigned init_required, must_queue = 0; | |
383 | unsigned long flags; | |
384 | ||
385 | spin_lock_irqsave(&m->lock, flags); | |
386 | ||
387 | if (!m->current_pgpath) | |
388 | __choose_pgpath(m); | |
389 | ||
390 | pgpath = m->current_pgpath; | |
391 | ||
392 | if ((pgpath && m->queue_io) || | |
393 | (!pgpath && m->queue_if_no_path && !m->suspended)) | |
394 | must_queue = 1; | |
395 | ||
396 | init_required = m->pg_init_required; | |
397 | if (init_required) | |
398 | m->pg_init_required = 0; | |
399 | ||
400 | spin_unlock_irqrestore(&m->lock, flags); | |
401 | ||
402 | if (init_required) | |
403 | hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path); | |
404 | ||
405 | if (!must_queue) | |
406 | dispatch_queued_ios(m); | |
407 | } | |
408 | ||
409 | /* | |
410 | * An event is triggered whenever a path is taken out of use. | |
411 | * Includes path failure and PG bypass. | |
412 | */ | |
413 | static void trigger_event(void *data) | |
414 | { | |
415 | struct multipath *m = (struct multipath *) data; | |
416 | ||
417 | dm_table_event(m->ti->table); | |
418 | } | |
419 | ||
420 | /*----------------------------------------------------------------- | |
421 | * Constructor/argument parsing: | |
422 | * <#multipath feature args> [<arg>]* | |
423 | * <#hw_handler args> [hw_handler [<arg>]*] | |
424 | * <#priority groups> | |
425 | * <initial priority group> | |
426 | * [<selector> <#selector args> [<arg>]* | |
427 | * <#paths> <#per-path selector args> | |
428 | * [<path> [<arg>]* ]+ ]+ | |
429 | *---------------------------------------------------------------*/ | |
430 | struct param { | |
431 | unsigned min; | |
432 | unsigned max; | |
433 | char *error; | |
434 | }; | |
435 | ||
436 | #define ESTR(s) ("dm-multipath: " s) | |
437 | ||
438 | static int read_param(struct param *param, char *str, unsigned *v, char **error) | |
439 | { | |
440 | if (!str || | |
441 | (sscanf(str, "%u", v) != 1) || | |
442 | (*v < param->min) || | |
443 | (*v > param->max)) { | |
444 | *error = param->error; | |
445 | return -EINVAL; | |
446 | } | |
447 | ||
448 | return 0; | |
449 | } | |
450 | ||
451 | struct arg_set { | |
452 | unsigned argc; | |
453 | char **argv; | |
454 | }; | |
455 | ||
456 | static char *shift(struct arg_set *as) | |
457 | { | |
458 | char *r; | |
459 | ||
460 | if (as->argc) { | |
461 | as->argc--; | |
462 | r = *as->argv; | |
463 | as->argv++; | |
464 | return r; | |
465 | } | |
466 | ||
467 | return NULL; | |
468 | } | |
469 | ||
470 | static void consume(struct arg_set *as, unsigned n) | |
471 | { | |
472 | BUG_ON (as->argc < n); | |
473 | as->argc -= n; | |
474 | as->argv += n; | |
475 | } | |
476 | ||
477 | static int parse_path_selector(struct arg_set *as, struct priority_group *pg, | |
478 | struct dm_target *ti) | |
479 | { | |
480 | int r; | |
481 | struct path_selector_type *pst; | |
482 | unsigned ps_argc; | |
483 | ||
484 | static struct param _params[] = { | |
485 | {0, 1024, ESTR("invalid number of path selector args")}, | |
486 | }; | |
487 | ||
488 | pst = dm_get_path_selector(shift(as)); | |
489 | if (!pst) { | |
490 | ti->error = ESTR("unknown path selector type"); | |
491 | return -EINVAL; | |
492 | } | |
493 | ||
494 | r = read_param(_params, shift(as), &ps_argc, &ti->error); | |
495 | if (r) | |
496 | return -EINVAL; | |
497 | ||
498 | r = pst->create(&pg->ps, ps_argc, as->argv); | |
499 | if (r) { | |
500 | dm_put_path_selector(pst); | |
501 | ti->error = ESTR("path selector constructor failed"); | |
502 | return r; | |
503 | } | |
504 | ||
505 | pg->ps.type = pst; | |
506 | consume(as, ps_argc); | |
507 | ||
508 | return 0; | |
509 | } | |
510 | ||
511 | static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, | |
512 | struct dm_target *ti) | |
513 | { | |
514 | int r; | |
515 | struct pgpath *p; | |
516 | ||
517 | /* we need at least a path arg */ | |
518 | if (as->argc < 1) { | |
519 | ti->error = ESTR("no device given"); | |
520 | return NULL; | |
521 | } | |
522 | ||
523 | p = alloc_pgpath(); | |
524 | if (!p) | |
525 | return NULL; | |
526 | ||
527 | r = dm_get_device(ti, shift(as), ti->begin, ti->len, | |
528 | dm_table_get_mode(ti->table), &p->path.dev); | |
529 | if (r) { | |
530 | ti->error = ESTR("error getting device"); | |
531 | goto bad; | |
532 | } | |
533 | ||
534 | r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); | |
535 | if (r) { | |
536 | dm_put_device(ti, p->path.dev); | |
537 | goto bad; | |
538 | } | |
539 | ||
540 | return p; | |
541 | ||
542 | bad: | |
543 | free_pgpath(p); | |
544 | return NULL; | |
545 | } | |
546 | ||
547 | static struct priority_group *parse_priority_group(struct arg_set *as, | |
548 | struct multipath *m, | |
549 | struct dm_target *ti) | |
550 | { | |
551 | static struct param _params[] = { | |
552 | {1, 1024, ESTR("invalid number of paths")}, | |
553 | {0, 1024, ESTR("invalid number of selector args")} | |
554 | }; | |
555 | ||
556 | int r; | |
557 | unsigned i, nr_selector_args, nr_params; | |
558 | struct priority_group *pg; | |
559 | ||
560 | if (as->argc < 2) { | |
561 | as->argc = 0; | |
562 | ti->error = ESTR("not enough priority group aruments"); | |
563 | return NULL; | |
564 | } | |
565 | ||
566 | pg = alloc_priority_group(); | |
567 | if (!pg) { | |
568 | ti->error = ESTR("couldn't allocate priority group"); | |
569 | return NULL; | |
570 | } | |
571 | pg->m = m; | |
572 | ||
573 | r = parse_path_selector(as, pg, ti); | |
574 | if (r) | |
575 | goto bad; | |
576 | ||
577 | /* | |
578 | * read the paths | |
579 | */ | |
580 | r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); | |
581 | if (r) | |
582 | goto bad; | |
583 | ||
584 | r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); | |
585 | if (r) | |
586 | goto bad; | |
587 | ||
588 | nr_params = 1 + nr_selector_args; | |
589 | for (i = 0; i < pg->nr_pgpaths; i++) { | |
590 | struct pgpath *pgpath; | |
591 | struct arg_set path_args; | |
592 | ||
593 | if (as->argc < nr_params) | |
594 | goto bad; | |
595 | ||
596 | path_args.argc = nr_params; | |
597 | path_args.argv = as->argv; | |
598 | ||
599 | pgpath = parse_path(&path_args, &pg->ps, ti); | |
600 | if (!pgpath) | |
601 | goto bad; | |
602 | ||
603 | pgpath->pg = pg; | |
604 | list_add_tail(&pgpath->list, &pg->pgpaths); | |
605 | consume(as, nr_params); | |
606 | } | |
607 | ||
608 | return pg; | |
609 | ||
610 | bad: | |
611 | free_priority_group(pg, ti); | |
612 | return NULL; | |
613 | } | |
614 | ||
615 | static int parse_hw_handler(struct arg_set *as, struct multipath *m, | |
616 | struct dm_target *ti) | |
617 | { | |
618 | int r; | |
619 | struct hw_handler_type *hwht; | |
620 | unsigned hw_argc; | |
621 | ||
622 | static struct param _params[] = { | |
623 | {0, 1024, ESTR("invalid number of hardware handler args")}, | |
624 | }; | |
625 | ||
626 | r = read_param(_params, shift(as), &hw_argc, &ti->error); | |
627 | if (r) | |
628 | return -EINVAL; | |
629 | ||
630 | if (!hw_argc) | |
631 | return 0; | |
632 | ||
633 | hwht = dm_get_hw_handler(shift(as)); | |
634 | if (!hwht) { | |
635 | ti->error = ESTR("unknown hardware handler type"); | |
636 | return -EINVAL; | |
637 | } | |
638 | ||
639 | r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv); | |
640 | if (r) { | |
641 | dm_put_hw_handler(hwht); | |
642 | ti->error = ESTR("hardware handler constructor failed"); | |
643 | return r; | |
644 | } | |
645 | ||
646 | m->hw_handler.type = hwht; | |
647 | consume(as, hw_argc - 1); | |
648 | ||
649 | return 0; | |
650 | } | |
651 | ||
652 | static int parse_features(struct arg_set *as, struct multipath *m, | |
653 | struct dm_target *ti) | |
654 | { | |
655 | int r; | |
656 | unsigned argc; | |
657 | ||
658 | static struct param _params[] = { | |
659 | {0, 1, ESTR("invalid number of feature args")}, | |
660 | }; | |
661 | ||
662 | r = read_param(_params, shift(as), &argc, &ti->error); | |
663 | if (r) | |
664 | return -EINVAL; | |
665 | ||
666 | if (!argc) | |
667 | return 0; | |
668 | ||
669 | if (!strnicmp(shift(as), MESG_STR("queue_if_no_path"))) | |
670 | return queue_if_no_path(m, 1); | |
671 | else { | |
672 | ti->error = "Unrecognised multipath feature request"; | |
673 | return -EINVAL; | |
674 | } | |
675 | } | |
676 | ||
677 | static int multipath_ctr(struct dm_target *ti, unsigned int argc, | |
678 | char **argv) | |
679 | { | |
680 | /* target parameters */ | |
681 | static struct param _params[] = { | |
682 | {1, 1024, ESTR("invalid number of priority groups")}, | |
683 | {1, 1024, ESTR("invalid initial priority group number")}, | |
684 | }; | |
685 | ||
686 | int r; | |
687 | struct multipath *m; | |
688 | struct arg_set as; | |
689 | unsigned pg_count = 0; | |
690 | unsigned next_pg_num; | |
691 | ||
692 | as.argc = argc; | |
693 | as.argv = argv; | |
694 | ||
695 | m = alloc_multipath(); | |
696 | if (!m) { | |
697 | ti->error = ESTR("can't allocate multipath"); | |
698 | return -EINVAL; | |
699 | } | |
700 | ||
701 | r = parse_features(&as, m, ti); | |
702 | if (r) | |
703 | goto bad; | |
704 | ||
705 | r = parse_hw_handler(&as, m, ti); | |
706 | if (r) | |
707 | goto bad; | |
708 | ||
709 | r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); | |
710 | if (r) | |
711 | goto bad; | |
712 | ||
713 | r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); | |
714 | if (r) | |
715 | goto bad; | |
716 | ||
717 | /* parse the priority groups */ | |
718 | while (as.argc) { | |
719 | struct priority_group *pg; | |
720 | ||
721 | pg = parse_priority_group(&as, m, ti); | |
722 | if (!pg) { | |
723 | r = -EINVAL; | |
724 | goto bad; | |
725 | } | |
726 | ||
727 | m->nr_valid_paths += pg->nr_pgpaths; | |
728 | list_add_tail(&pg->list, &m->priority_groups); | |
729 | pg_count++; | |
730 | pg->pg_num = pg_count; | |
731 | if (!--next_pg_num) | |
732 | m->next_pg = pg; | |
733 | } | |
734 | ||
735 | if (pg_count != m->nr_priority_groups) { | |
736 | ti->error = ESTR("priority group count mismatch"); | |
737 | r = -EINVAL; | |
738 | goto bad; | |
739 | } | |
740 | ||
741 | ti->private = m; | |
742 | m->ti = ti; | |
743 | ||
744 | return 0; | |
745 | ||
746 | bad: | |
747 | free_multipath(m); | |
748 | return r; | |
749 | } | |
750 | ||
751 | static void multipath_dtr(struct dm_target *ti) | |
752 | { | |
753 | struct multipath *m = (struct multipath *) ti->private; | |
754 | free_multipath(m); | |
755 | } | |
756 | ||
757 | /* | |
758 | * Map bios, recording original fields for later in case we have to resubmit | |
759 | */ | |
760 | static int multipath_map(struct dm_target *ti, struct bio *bio, | |
761 | union map_info *map_context) | |
762 | { | |
763 | int r; | |
764 | struct mpath_io *mpio; | |
765 | struct multipath *m = (struct multipath *) ti->private; | |
766 | ||
767 | mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); | |
768 | dm_bio_record(&mpio->details, bio); | |
769 | ||
770 | map_context->ptr = mpio; | |
771 | bio->bi_rw |= (1 << BIO_RW_FAILFAST); | |
772 | r = map_io(m, bio, mpio, 0); | |
773 | if (r < 0) | |
774 | mempool_free(mpio, m->mpio_pool); | |
775 | ||
776 | return r; | |
777 | } | |
778 | ||
779 | /* | |
780 | * Take a path out of use. | |
781 | */ | |
782 | static int fail_path(struct pgpath *pgpath) | |
783 | { | |
784 | unsigned long flags; | |
785 | struct multipath *m = pgpath->pg->m; | |
786 | ||
787 | spin_lock_irqsave(&m->lock, flags); | |
788 | ||
789 | if (!pgpath->path.is_active) | |
790 | goto out; | |
791 | ||
792 | DMWARN("dm-multipath: Failing path %s.", pgpath->path.dev->name); | |
793 | ||
794 | pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); | |
795 | pgpath->path.is_active = 0; | |
796 | pgpath->fail_count++; | |
797 | ||
798 | m->nr_valid_paths--; | |
799 | ||
800 | if (pgpath == m->current_pgpath) | |
801 | m->current_pgpath = NULL; | |
802 | ||
803 | schedule_work(&m->trigger_event); | |
804 | ||
805 | out: | |
806 | spin_unlock_irqrestore(&m->lock, flags); | |
807 | ||
808 | return 0; | |
809 | } | |
810 | ||
811 | /* | |
812 | * Reinstate a previously-failed path | |
813 | */ | |
814 | static int reinstate_path(struct pgpath *pgpath) | |
815 | { | |
816 | int r = 0; | |
817 | unsigned long flags; | |
818 | struct multipath *m = pgpath->pg->m; | |
819 | ||
820 | spin_lock_irqsave(&m->lock, flags); | |
821 | ||
822 | if (pgpath->path.is_active) | |
823 | goto out; | |
824 | ||
825 | if (!pgpath->pg->ps.type) { | |
826 | DMWARN("Reinstate path not supported by path selector %s", | |
827 | pgpath->pg->ps.type->name); | |
828 | r = -EINVAL; | |
829 | goto out; | |
830 | } | |
831 | ||
832 | r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); | |
833 | if (r) | |
834 | goto out; | |
835 | ||
836 | pgpath->path.is_active = 1; | |
837 | ||
838 | m->current_pgpath = NULL; | |
839 | if (!m->nr_valid_paths++) | |
840 | schedule_work(&m->process_queued_ios); | |
841 | ||
842 | schedule_work(&m->trigger_event); | |
843 | ||
844 | out: | |
845 | spin_unlock_irqrestore(&m->lock, flags); | |
846 | ||
847 | return r; | |
848 | } | |
849 | ||
850 | /* | |
851 | * Fail or reinstate all paths that match the provided struct dm_dev. | |
852 | */ | |
853 | static int action_dev(struct multipath *m, struct dm_dev *dev, | |
854 | action_fn action) | |
855 | { | |
856 | int r = 0; | |
857 | struct pgpath *pgpath; | |
858 | struct priority_group *pg; | |
859 | ||
860 | list_for_each_entry(pg, &m->priority_groups, list) { | |
861 | list_for_each_entry(pgpath, &pg->pgpaths, list) { | |
862 | if (pgpath->path.dev == dev) | |
863 | r = action(pgpath); | |
864 | } | |
865 | } | |
866 | ||
867 | return r; | |
868 | } | |
869 | ||
870 | /* | |
871 | * Temporarily try to avoid having to use the specified PG | |
872 | */ | |
873 | static void bypass_pg(struct multipath *m, struct priority_group *pg, | |
874 | int bypassed) | |
875 | { | |
876 | unsigned long flags; | |
877 | ||
878 | spin_lock_irqsave(&m->lock, flags); | |
879 | ||
880 | pg->bypassed = bypassed; | |
881 | m->current_pgpath = NULL; | |
882 | m->current_pg = NULL; | |
883 | ||
884 | spin_unlock_irqrestore(&m->lock, flags); | |
885 | ||
886 | schedule_work(&m->trigger_event); | |
887 | } | |
888 | ||
889 | /* | |
890 | * Switch to using the specified PG from the next I/O that gets mapped | |
891 | */ | |
892 | static int switch_pg_num(struct multipath *m, const char *pgstr) | |
893 | { | |
894 | struct priority_group *pg; | |
895 | unsigned pgnum; | |
896 | unsigned long flags; | |
897 | ||
898 | if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || | |
899 | (pgnum > m->nr_priority_groups)) { | |
900 | DMWARN("invalid PG number supplied to switch_pg_num"); | |
901 | return -EINVAL; | |
902 | } | |
903 | ||
904 | spin_lock_irqsave(&m->lock, flags); | |
905 | list_for_each_entry(pg, &m->priority_groups, list) { | |
906 | pg->bypassed = 0; | |
907 | if (--pgnum) | |
908 | continue; | |
909 | ||
910 | m->current_pgpath = NULL; | |
911 | m->current_pg = NULL; | |
912 | m->next_pg = pg; | |
913 | } | |
914 | spin_unlock_irqrestore(&m->lock, flags); | |
915 | ||
916 | schedule_work(&m->trigger_event); | |
917 | return 0; | |
918 | } | |
919 | ||
920 | /* | |
921 | * Set/clear bypassed status of a PG. | |
922 | * PGs are numbered upwards from 1 in the order they were declared. | |
923 | */ | |
924 | static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) | |
925 | { | |
926 | struct priority_group *pg; | |
927 | unsigned pgnum; | |
928 | ||
929 | if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || | |
930 | (pgnum > m->nr_priority_groups)) { | |
931 | DMWARN("invalid PG number supplied to bypass_pg"); | |
932 | return -EINVAL; | |
933 | } | |
934 | ||
935 | list_for_each_entry(pg, &m->priority_groups, list) { | |
936 | if (!--pgnum) | |
937 | break; | |
938 | } | |
939 | ||
940 | bypass_pg(m, pg, bypassed); | |
941 | return 0; | |
942 | } | |
943 | ||
944 | /* | |
945 | * pg_init must call this when it has completed its initialisation | |
946 | */ | |
947 | void dm_pg_init_complete(struct path *path, unsigned err_flags) | |
948 | { | |
949 | struct pgpath *pgpath = path_to_pgpath(path); | |
950 | struct priority_group *pg = pgpath->pg; | |
951 | struct multipath *m = pg->m; | |
952 | unsigned long flags; | |
953 | ||
954 | /* We insist on failing the path if the PG is already bypassed. */ | |
955 | if (err_flags && pg->bypassed) | |
956 | err_flags |= MP_FAIL_PATH; | |
957 | ||
958 | if (err_flags & MP_FAIL_PATH) | |
959 | fail_path(pgpath); | |
960 | ||
961 | if (err_flags & MP_BYPASS_PG) | |
962 | bypass_pg(m, pg, 1); | |
963 | ||
964 | spin_lock_irqsave(&m->lock, flags); | |
965 | if (!err_flags) | |
966 | m->queue_io = 0; | |
967 | else { | |
968 | m->current_pgpath = NULL; | |
969 | m->current_pg = NULL; | |
970 | } | |
971 | schedule_work(&m->process_queued_ios); | |
972 | spin_unlock_irqrestore(&m->lock, flags); | |
973 | } | |
974 | ||
975 | /* | |
976 | * end_io handling | |
977 | */ | |
978 | static int do_end_io(struct multipath *m, struct bio *bio, | |
979 | int error, struct mpath_io *mpio) | |
980 | { | |
981 | struct hw_handler *hwh = &m->hw_handler; | |
982 | unsigned err_flags = MP_FAIL_PATH; /* Default behavior */ | |
983 | ||
984 | if (!error) | |
985 | return 0; /* I/O complete */ | |
986 | ||
987 | spin_lock(&m->lock); | |
988 | if (!m->nr_valid_paths) { | |
989 | if (!m->queue_if_no_path || m->suspended) { | |
990 | spin_unlock(&m->lock); | |
991 | return -EIO; | |
992 | } else { | |
993 | spin_unlock(&m->lock); | |
994 | goto requeue; | |
995 | } | |
996 | } | |
997 | spin_unlock(&m->lock); | |
998 | ||
999 | if (hwh->type && hwh->type->error) | |
1000 | err_flags = hwh->type->error(hwh, bio); | |
1001 | ||
1002 | if (mpio->pgpath) { | |
1003 | if (err_flags & MP_FAIL_PATH) | |
1004 | fail_path(mpio->pgpath); | |
1005 | ||
1006 | if (err_flags & MP_BYPASS_PG) | |
1007 | bypass_pg(m, mpio->pgpath->pg, 1); | |
1008 | } | |
1009 | ||
1010 | if (err_flags & MP_ERROR_IO) | |
1011 | return -EIO; | |
1012 | ||
1013 | requeue: | |
1014 | dm_bio_restore(&mpio->details, bio); | |
1015 | ||
1016 | /* queue for the daemon to resubmit or fail */ | |
1017 | spin_lock(&m->lock); | |
1018 | bio_list_add(&m->queued_ios, bio); | |
1019 | m->queue_size++; | |
1020 | if (!m->queue_io) | |
1021 | schedule_work(&m->process_queued_ios); | |
1022 | spin_unlock(&m->lock); | |
1023 | ||
1024 | return 1; /* io not complete */ | |
1025 | } | |
1026 | ||
1027 | static int multipath_end_io(struct dm_target *ti, struct bio *bio, | |
1028 | int error, union map_info *map_context) | |
1029 | { | |
1030 | struct multipath *m = (struct multipath *) ti->private; | |
1031 | struct mpath_io *mpio = (struct mpath_io *) map_context->ptr; | |
1032 | struct pgpath *pgpath = mpio->pgpath; | |
1033 | struct path_selector *ps; | |
1034 | int r; | |
1035 | ||
1036 | r = do_end_io(m, bio, error, mpio); | |
1037 | if (pgpath) { | |
1038 | ps = &pgpath->pg->ps; | |
1039 | if (ps->type->end_io) | |
1040 | ps->type->end_io(ps, &pgpath->path); | |
1041 | } | |
1042 | if (r <= 0) | |
1043 | mempool_free(mpio, m->mpio_pool); | |
1044 | ||
1045 | return r; | |
1046 | } | |
1047 | ||
1048 | /* | |
1049 | * Suspend can't complete until all the I/O is processed so if | |
1050 | * the last path failed we will now error any queued I/O. | |
1051 | */ | |
1052 | static void multipath_presuspend(struct dm_target *ti) | |
1053 | { | |
1054 | struct multipath *m = (struct multipath *) ti->private; | |
1055 | unsigned long flags; | |
1056 | ||
1057 | spin_lock_irqsave(&m->lock, flags); | |
1058 | m->suspended = 1; | |
1059 | if (m->queue_if_no_path) | |
1060 | schedule_work(&m->process_queued_ios); | |
1061 | spin_unlock_irqrestore(&m->lock, flags); | |
1062 | } | |
1063 | ||
1064 | static void multipath_resume(struct dm_target *ti) | |
1065 | { | |
1066 | struct multipath *m = (struct multipath *) ti->private; | |
1067 | unsigned long flags; | |
1068 | ||
1069 | spin_lock_irqsave(&m->lock, flags); | |
1070 | m->suspended = 0; | |
1071 | spin_unlock_irqrestore(&m->lock, flags); | |
1072 | } | |
1073 | ||
1074 | /* | |
1075 | * Info output has the following format: | |
1076 | * num_multipath_feature_args [multipath_feature_args]* | |
1077 | * num_handler_status_args [handler_status_args]* | |
1078 | * num_groups init_group_number | |
1079 | * [A|D|E num_ps_status_args [ps_status_args]* | |
1080 | * num_paths num_selector_args | |
1081 | * [path_dev A|F fail_count [selector_args]* ]+ ]+ | |
1082 | * | |
1083 | * Table output has the following format (identical to the constructor string): | |
1084 | * num_feature_args [features_args]* | |
1085 | * num_handler_args hw_handler [hw_handler_args]* | |
1086 | * num_groups init_group_number | |
1087 | * [priority selector-name num_ps_args [ps_args]* | |
1088 | * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ | |
1089 | */ | |
1090 | static int multipath_status(struct dm_target *ti, status_type_t type, | |
1091 | char *result, unsigned int maxlen) | |
1092 | { | |
1093 | int sz = 0; | |
1094 | unsigned long flags; | |
1095 | struct multipath *m = (struct multipath *) ti->private; | |
1096 | struct hw_handler *hwh = &m->hw_handler; | |
1097 | struct priority_group *pg; | |
1098 | struct pgpath *p; | |
1099 | unsigned pg_num; | |
1100 | char state; | |
1101 | ||
1102 | spin_lock_irqsave(&m->lock, flags); | |
1103 | ||
1104 | /* Features */ | |
1105 | if (type == STATUSTYPE_INFO) | |
1106 | DMEMIT("1 %u ", m->queue_size); | |
1107 | else if (m->queue_if_no_path) | |
1108 | DMEMIT("1 queue_if_no_path "); | |
1109 | else | |
1110 | DMEMIT("0 "); | |
1111 | ||
1112 | if (hwh->type && hwh->type->status) | |
1113 | sz += hwh->type->status(hwh, type, result + sz, maxlen - sz); | |
1114 | else if (!hwh->type || type == STATUSTYPE_INFO) | |
1115 | DMEMIT("0 "); | |
1116 | else | |
1117 | DMEMIT("1 %s ", hwh->type->name); | |
1118 | ||
1119 | DMEMIT("%u ", m->nr_priority_groups); | |
1120 | ||
1121 | if (m->next_pg) | |
1122 | pg_num = m->next_pg->pg_num; | |
1123 | else if (m->current_pg) | |
1124 | pg_num = m->current_pg->pg_num; | |
1125 | else | |
1126 | pg_num = 1; | |
1127 | ||
1128 | DMEMIT("%u ", pg_num); | |
1129 | ||
1130 | switch (type) { | |
1131 | case STATUSTYPE_INFO: | |
1132 | list_for_each_entry(pg, &m->priority_groups, list) { | |
1133 | if (pg->bypassed) | |
1134 | state = 'D'; /* Disabled */ | |
1135 | else if (pg == m->current_pg) | |
1136 | state = 'A'; /* Currently Active */ | |
1137 | else | |
1138 | state = 'E'; /* Enabled */ | |
1139 | ||
1140 | DMEMIT("%c ", state); | |
1141 | ||
1142 | if (pg->ps.type->status) | |
1143 | sz += pg->ps.type->status(&pg->ps, NULL, type, | |
1144 | result + sz, | |
1145 | maxlen - sz); | |
1146 | else | |
1147 | DMEMIT("0 "); | |
1148 | ||
1149 | DMEMIT("%u %u ", pg->nr_pgpaths, | |
1150 | pg->ps.type->info_args); | |
1151 | ||
1152 | list_for_each_entry(p, &pg->pgpaths, list) { | |
1153 | DMEMIT("%s %s %u ", p->path.dev->name, | |
1154 | p->path.is_active ? "A" : "F", | |
1155 | p->fail_count); | |
1156 | if (pg->ps.type->status) | |
1157 | sz += pg->ps.type->status(&pg->ps, | |
1158 | &p->path, type, result + sz, | |
1159 | maxlen - sz); | |
1160 | } | |
1161 | } | |
1162 | break; | |
1163 | ||
1164 | case STATUSTYPE_TABLE: | |
1165 | list_for_each_entry(pg, &m->priority_groups, list) { | |
1166 | DMEMIT("%s ", pg->ps.type->name); | |
1167 | ||
1168 | if (pg->ps.type->status) | |
1169 | sz += pg->ps.type->status(&pg->ps, NULL, type, | |
1170 | result + sz, | |
1171 | maxlen - sz); | |
1172 | else | |
1173 | DMEMIT("0 "); | |
1174 | ||
1175 | DMEMIT("%u %u ", pg->nr_pgpaths, | |
1176 | pg->ps.type->table_args); | |
1177 | ||
1178 | list_for_each_entry(p, &pg->pgpaths, list) { | |
1179 | DMEMIT("%s ", p->path.dev->name); | |
1180 | if (pg->ps.type->status) | |
1181 | sz += pg->ps.type->status(&pg->ps, | |
1182 | &p->path, type, result + sz, | |
1183 | maxlen - sz); | |
1184 | } | |
1185 | } | |
1186 | break; | |
1187 | } | |
1188 | ||
1189 | spin_unlock_irqrestore(&m->lock, flags); | |
1190 | ||
1191 | return 0; | |
1192 | } | |
1193 | ||
1194 | static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) | |
1195 | { | |
1196 | int r; | |
1197 | struct dm_dev *dev; | |
1198 | struct multipath *m = (struct multipath *) ti->private; | |
1199 | action_fn action; | |
1200 | ||
1201 | if (argc == 1) { | |
1202 | if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) | |
1203 | return queue_if_no_path(m, 1); | |
1204 | else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) | |
1205 | return queue_if_no_path(m, 0); | |
1206 | } | |
1207 | ||
1208 | if (argc != 2) | |
1209 | goto error; | |
1210 | ||
1211 | if (!strnicmp(argv[0], MESG_STR("disable_group"))) | |
1212 | return bypass_pg_num(m, argv[1], 1); | |
1213 | else if (!strnicmp(argv[0], MESG_STR("enable_group"))) | |
1214 | return bypass_pg_num(m, argv[1], 0); | |
1215 | else if (!strnicmp(argv[0], MESG_STR("switch_group"))) | |
1216 | return switch_pg_num(m, argv[1]); | |
1217 | else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) | |
1218 | action = reinstate_path; | |
1219 | else if (!strnicmp(argv[0], MESG_STR("fail_path"))) | |
1220 | action = fail_path; | |
1221 | else | |
1222 | goto error; | |
1223 | ||
1224 | r = dm_get_device(ti, argv[1], ti->begin, ti->len, | |
1225 | dm_table_get_mode(ti->table), &dev); | |
1226 | if (r) { | |
1227 | DMWARN("dm-multipath message: error getting device %s", | |
1228 | argv[1]); | |
1229 | return -EINVAL; | |
1230 | } | |
1231 | ||
1232 | r = action_dev(m, dev, action); | |
1233 | ||
1234 | dm_put_device(ti, dev); | |
1235 | ||
1236 | return r; | |
1237 | ||
1238 | error: | |
1239 | DMWARN("Unrecognised multipath message received."); | |
1240 | return -EINVAL; | |
1241 | } | |
1242 | ||
1243 | /*----------------------------------------------------------------- | |
1244 | * Module setup | |
1245 | *---------------------------------------------------------------*/ | |
1246 | static struct target_type multipath_target = { | |
1247 | .name = "multipath", | |
1248 | .version = {1, 0, 4}, | |
1249 | .module = THIS_MODULE, | |
1250 | .ctr = multipath_ctr, | |
1251 | .dtr = multipath_dtr, | |
1252 | .map = multipath_map, | |
1253 | .end_io = multipath_end_io, | |
1254 | .presuspend = multipath_presuspend, | |
1255 | .resume = multipath_resume, | |
1256 | .status = multipath_status, | |
1257 | .message = multipath_message, | |
1258 | }; | |
1259 | ||
1260 | static int __init dm_multipath_init(void) | |
1261 | { | |
1262 | int r; | |
1263 | ||
1264 | /* allocate a slab for the dm_ios */ | |
1265 | _mpio_cache = kmem_cache_create("dm_mpath", sizeof(struct mpath_io), | |
1266 | 0, 0, NULL, NULL); | |
1267 | if (!_mpio_cache) | |
1268 | return -ENOMEM; | |
1269 | ||
1270 | r = dm_register_target(&multipath_target); | |
1271 | if (r < 0) { | |
1272 | DMERR("%s: register failed %d", multipath_target.name, r); | |
1273 | kmem_cache_destroy(_mpio_cache); | |
1274 | return -EINVAL; | |
1275 | } | |
1276 | ||
1277 | DMINFO("dm-multipath version %u.%u.%u loaded", | |
1278 | multipath_target.version[0], multipath_target.version[1], | |
1279 | multipath_target.version[2]); | |
1280 | ||
1281 | return r; | |
1282 | } | |
1283 | ||
1284 | static void __exit dm_multipath_exit(void) | |
1285 | { | |
1286 | int r; | |
1287 | ||
1288 | r = dm_unregister_target(&multipath_target); | |
1289 | if (r < 0) | |
1290 | DMERR("%s: target unregister failed %d", | |
1291 | multipath_target.name, r); | |
1292 | kmem_cache_destroy(_mpio_cache); | |
1293 | } | |
1294 | ||
1295 | EXPORT_SYMBOL_GPL(dm_pg_init_complete); | |
1296 | ||
1297 | module_init(dm_multipath_init); | |
1298 | module_exit(dm_multipath_exit); | |
1299 | ||
1300 | MODULE_DESCRIPTION(DM_NAME " multipath target"); | |
1301 | MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); | |
1302 | MODULE_LICENSE("GPL"); |