]>
Commit | Line | Data |
---|---|---|
ca47bbd9 | 1 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ |
1da177e4 LT |
2 | /* |
3 | * aoedev.c | |
4 | * AoE device utility functions; maintains device list. | |
5 | */ | |
6 | ||
7 | #include <linux/hdreg.h> | |
3582dd29 | 8 | #include <linux/blk-mq.h> |
1da177e4 | 9 | #include <linux/netdevice.h> |
9bb237b6 | 10 | #include <linux/delay.h> |
5a0e3ad6 | 11 | #include <linux/slab.h> |
0c966214 EC |
12 | #include <linux/bitmap.h> |
13 | #include <linux/kdev_t.h> | |
4bcce1a3 | 14 | #include <linux/moduleparam.h> |
a88c1f0c | 15 | #include <linux/string.h> |
1da177e4 LT |
16 | #include "aoe.h" |
17 | ||
9bb237b6 EC |
18 | static void freetgt(struct aoedev *d, struct aoetgt *t); |
19 | static void skbpoolfree(struct aoedev *d); | |
262bf541 | 20 | |
08b60623 | 21 | static int aoe_dyndevs = 1; |
4bcce1a3 EC |
22 | module_param(aoe_dyndevs, int, 0644); |
23 | MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices."); | |
24 | ||
1da177e4 | 25 | static struct aoedev *devlist; |
476aed38 | 26 | static DEFINE_SPINLOCK(devlist_lock); |
1da177e4 | 27 | |
0c966214 EC |
28 | /* Because some systems will have one, many, or no |
29 | * - partitions, | |
30 | * - slots per shelf, | |
31 | * - or shelves, | |
32 | * we need some flexibility in the way the minor numbers | |
33 | * are allocated. So they are dynamic. | |
69cf2d85 | 34 | */ |
0c966214 EC |
35 | #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) |
36 | ||
37 | static DEFINE_SPINLOCK(used_minors_lock); | |
38 | static DECLARE_BITMAP(used_minors, N_DEVS); | |
39 | ||
40 | static int | |
4bcce1a3 | 41 | minor_get_dyn(ulong *sysminor) |
1da177e4 | 42 | { |
1da177e4 | 43 | ulong flags; |
0c966214 EC |
44 | ulong n; |
45 | int error = 0; | |
46 | ||
47 | spin_lock_irqsave(&used_minors_lock, flags); | |
48 | n = find_first_zero_bit(used_minors, N_DEVS); | |
49 | if (n < N_DEVS) | |
50 | set_bit(n, used_minors); | |
51 | else | |
52 | error = -1; | |
53 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
54 | ||
4bcce1a3 | 55 | *sysminor = n * AOE_PARTITIONS; |
0c966214 EC |
56 | return error; |
57 | } | |
1da177e4 | 58 | |
4bcce1a3 EC |
59 | static int |
60 | minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) | |
61 | { | |
62 | ulong flags; | |
63 | ulong n; | |
64 | int error = 0; | |
65 | enum { | |
66 | /* for backwards compatibility when !aoe_dyndevs, | |
67 | * a static number of supported slots per shelf */ | |
68 | NPERSHELF = 16, | |
69 | }; | |
70 | ||
e0b2bbab EC |
71 | if (aoemin >= NPERSHELF) { |
72 | pr_err("aoe: %s %d slots per shelf\n", | |
73 | "static minor device numbers support only", | |
74 | NPERSHELF); | |
75 | error = -1; | |
76 | goto out; | |
77 | } | |
78 | ||
4bcce1a3 | 79 | n = aoemaj * NPERSHELF + aoemin; |
e0b2bbab | 80 | if (n >= N_DEVS) { |
4bcce1a3 EC |
81 | pr_err("aoe: %s with e%ld.%d\n", |
82 | "cannot use static minor device numbers", | |
83 | aoemaj, aoemin); | |
84 | error = -1; | |
e0b2bbab | 85 | goto out; |
4bcce1a3 EC |
86 | } |
87 | ||
e0b2bbab EC |
88 | spin_lock_irqsave(&used_minors_lock, flags); |
89 | if (test_bit(n, used_minors)) { | |
90 | pr_err("aoe: %s %lu\n", | |
91 | "existing device already has static minor number", | |
92 | n); | |
93 | error = -1; | |
94 | } else | |
95 | set_bit(n, used_minors); | |
96 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
b91316f2 | 97 | *sysminor = n * AOE_PARTITIONS; |
e0b2bbab | 98 | out: |
4bcce1a3 EC |
99 | return error; |
100 | } | |
101 | ||
102 | static int | |
103 | minor_get(ulong *sysminor, ulong aoemaj, int aoemin) | |
104 | { | |
105 | if (aoe_dyndevs) | |
106 | return minor_get_dyn(sysminor); | |
107 | else | |
108 | return minor_get_static(sysminor, aoemaj, aoemin); | |
109 | } | |
110 | ||
0c966214 EC |
111 | static void |
112 | minor_free(ulong minor) | |
113 | { | |
114 | ulong flags; | |
1da177e4 | 115 | |
0c966214 EC |
116 | minor /= AOE_PARTITIONS; |
117 | BUG_ON(minor >= N_DEVS); | |
1da177e4 | 118 | |
0c966214 EC |
119 | spin_lock_irqsave(&used_minors_lock, flags); |
120 | BUG_ON(!test_bit(minor, used_minors)); | |
121 | clear_bit(minor, used_minors); | |
122 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
1da177e4 LT |
123 | } |
124 | ||
0c966214 EC |
125 | /* |
126 | * Users who grab a pointer to the device with aoedev_by_aoeaddr | |
127 | * automatically get a reference count and must be responsible | |
128 | * for performing a aoedev_put. With the addition of async | |
129 | * kthread processing I'm no longer confident that we can | |
130 | * guarantee consistency in the face of device flushes. | |
131 | * | |
132 | * For the time being, we only bother to add extra references for | |
133 | * frames sitting on the iocq. When the kthreads finish processing | |
134 | * these frames, they will aoedev_put the device. | |
135 | */ | |
136 | ||
69cf2d85 EC |
137 | void |
138 | aoedev_put(struct aoedev *d) | |
139 | { | |
140 | ulong flags; | |
141 | ||
142 | spin_lock_irqsave(&devlist_lock, flags); | |
143 | d->ref--; | |
144 | spin_unlock_irqrestore(&devlist_lock, flags); | |
145 | } | |
146 | ||
3ae1c24e | 147 | static void |
0e0cc9df | 148 | dummy_timer(struct timer_list *t) |
3ae1c24e EC |
149 | { |
150 | struct aoedev *d; | |
151 | ||
0e0cc9df | 152 | d = from_timer(d, t, timer); |
3ae1c24e EC |
153 | if (d->flags & DEVFL_TKILL) |
154 | return; | |
155 | d->timer.expires = jiffies + HZ; | |
156 | add_timer(&d->timer); | |
157 | } | |
158 | ||
69cf2d85 EC |
159 | static void |
160 | aoe_failip(struct aoedev *d) | |
1da177e4 | 161 | { |
69cf2d85 | 162 | struct request *rq; |
61e7712e | 163 | struct aoe_req *req; |
1da177e4 | 164 | struct bio *bio; |
69cf2d85 EC |
165 | |
166 | aoe_failbuf(d, d->ip.buf); | |
69cf2d85 EC |
167 | rq = d->ip.rq; |
168 | if (rq == NULL) | |
896831f5 | 169 | return; |
61e7712e CH |
170 | |
171 | req = blk_mq_rq_to_pdu(rq); | |
69cf2d85 | 172 | while ((bio = d->ip.nxbio)) { |
4e4cbee9 | 173 | bio->bi_status = BLK_STS_IOERR; |
69cf2d85 | 174 | d->ip.nxbio = bio->bi_next; |
61e7712e | 175 | req->nr_bios--; |
1da177e4 | 176 | } |
61e7712e CH |
177 | |
178 | if (!req->nr_bios) | |
69cf2d85 | 179 | aoe_end_request(d, rq, 0); |
896831f5 EC |
180 | } |
181 | ||
3fc9b032 EC |
182 | static void |
183 | downdev_frame(struct list_head *pos) | |
184 | { | |
185 | struct frame *f; | |
186 | ||
187 | f = list_entry(pos, struct frame, head); | |
188 | list_del(pos); | |
189 | if (f->buf) { | |
190 | f->buf->nframesout--; | |
191 | aoe_failbuf(f->t->d, f->buf); | |
192 | } | |
193 | aoe_freetframe(f); | |
194 | } | |
195 | ||
896831f5 EC |
196 | void |
197 | aoedev_downdev(struct aoedev *d) | |
198 | { | |
199 | struct aoetgt *t, **tt, **te; | |
896831f5 EC |
200 | struct list_head *head, *pos, *nx; |
201 | int i; | |
202 | ||
69cf2d85 EC |
203 | d->flags &= ~DEVFL_UP; |
204 | ||
3fc9b032 | 205 | /* clean out active and to-be-retransmitted buffers */ |
64a80f5a EC |
206 | for (i = 0; i < NFACTIVE; i++) { |
207 | head = &d->factive[i]; | |
3fc9b032 EC |
208 | list_for_each_safe(pos, nx, head) |
209 | downdev_frame(pos); | |
64a80f5a | 210 | } |
3fc9b032 EC |
211 | head = &d->rexmitq; |
212 | list_for_each_safe(pos, nx, head) | |
213 | downdev_frame(pos); | |
214 | ||
64a80f5a | 215 | /* reset window dressings */ |
896831f5 | 216 | tt = d->targets; |
71114ec4 | 217 | te = tt + d->ntargets; |
896831f5 | 218 | for (; tt < te && (t = *tt); tt++) { |
3a0c40d2 | 219 | aoecmd_wreset(t); |
896831f5 EC |
220 | t->nout = 0; |
221 | } | |
222 | ||
69cf2d85 EC |
223 | /* clean out the in-process request (if any) */ |
224 | aoe_failip(d); | |
1da177e4 | 225 | |
69cf2d85 EC |
226 | /* fast fail all pending I/O */ |
227 | if (d->blkq) { | |
3582dd29 JA |
228 | /* UP is cleared, freeze+quiesce to insure all are errored */ |
229 | blk_mq_freeze_queue(d->blkq); | |
230 | blk_mq_quiesce_queue(d->blkq); | |
231 | blk_mq_unquiesce_queue(d->blkq); | |
232 | blk_mq_unfreeze_queue(d->blkq); | |
1da177e4 LT |
233 | } |
234 | ||
1da177e4 | 235 | if (d->gd) |
80795aef | 236 | set_capacity(d->gd, 0); |
1da177e4 LT |
237 | } |
238 | ||
4ba9aa7f EC |
239 | /* return whether the user asked for this particular |
240 | * device to be flushed | |
241 | */ | |
242 | static int | |
243 | user_req(char *s, size_t slen, struct aoedev *d) | |
244 | { | |
a88c1f0c | 245 | const char *p; |
4ba9aa7f EC |
246 | size_t lim; |
247 | ||
248 | if (!d->gd) | |
249 | return 0; | |
a88c1f0c | 250 | p = kbasename(d->gd->disk_name); |
4ba9aa7f EC |
251 | lim = sizeof(d->gd->disk_name); |
252 | lim -= p - d->gd->disk_name; | |
253 | if (slen < lim) | |
254 | lim = slen; | |
255 | ||
256 | return !strncmp(s, p, lim); | |
257 | } | |
258 | ||
e52a2932 EC |
259 | static void |
260 | freedev(struct aoedev *d) | |
261 | { | |
262 | struct aoetgt **t, **e; | |
263 | int freeing = 0; | |
264 | unsigned long flags; | |
265 | ||
266 | spin_lock_irqsave(&d->lock, flags); | |
267 | if (d->flags & DEVFL_TKILL | |
268 | && !(d->flags & DEVFL_FREEING)) { | |
269 | d->flags |= DEVFL_FREEING; | |
270 | freeing = 1; | |
271 | } | |
272 | spin_unlock_irqrestore(&d->lock, flags); | |
273 | if (!freeing) | |
274 | return; | |
275 | ||
276 | del_timer_sync(&d->timer); | |
277 | if (d->gd) { | |
e8866cf2 | 278 | aoedisk_rm_debugfs(d); |
e52a2932 EC |
279 | del_gendisk(d->gd); |
280 | put_disk(d->gd); | |
3582dd29 | 281 | blk_mq_free_tag_set(&d->tag_set); |
e52a2932 EC |
282 | blk_cleanup_queue(d->blkq); |
283 | } | |
284 | t = d->targets; | |
71114ec4 | 285 | e = t + d->ntargets; |
e52a2932 EC |
286 | for (; t < e && *t; t++) |
287 | freetgt(d, *t); | |
69daf897 | 288 | |
289 | mempool_destroy(d->bufpool); | |
e52a2932 EC |
290 | skbpoolfree(d); |
291 | minor_free(d->sysminor); | |
292 | ||
293 | spin_lock_irqsave(&d->lock, flags); | |
294 | d->flags |= DEVFL_FREED; | |
295 | spin_unlock_irqrestore(&d->lock, flags); | |
296 | } | |
297 | ||
298 | enum flush_parms { | |
299 | NOT_EXITING = 0, | |
300 | EXITING = 1, | |
301 | }; | |
302 | ||
303 | static int | |
304 | flush(const char __user *str, size_t cnt, int exiting) | |
262bf541 EC |
305 | { |
306 | ulong flags; | |
307 | struct aoedev *d, **dd; | |
262bf541 EC |
308 | char buf[16]; |
309 | int all = 0; | |
4ba9aa7f | 310 | int specified = 0; /* flush a specific device */ |
e52a2932 EC |
311 | unsigned int skipflags; |
312 | ||
313 | skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL; | |
262bf541 | 314 | |
e52a2932 | 315 | if (!exiting && cnt >= 3) { |
262bf541 EC |
316 | if (cnt > sizeof buf) |
317 | cnt = sizeof buf; | |
318 | if (copy_from_user(buf, str, cnt)) | |
319 | return -EFAULT; | |
320 | all = !strncmp(buf, "all", 3); | |
4ba9aa7f EC |
321 | if (!all) |
322 | specified = 1; | |
262bf541 EC |
323 | } |
324 | ||
e52a2932 | 325 | flush_scheduled_work(); |
430380b4 HZ |
326 | /* pass one: do aoedev_downdev, which might sleep */ |
327 | restart1: | |
262bf541 | 328 | spin_lock_irqsave(&devlist_lock, flags); |
e52a2932 | 329 | for (d = devlist; d; d = d->next) { |
262bf541 | 330 | spin_lock(&d->lock); |
430380b4 HZ |
331 | if (d->flags & DEVFL_TKILL) |
332 | goto cont; | |
333 | ||
e52a2932 EC |
334 | if (exiting) { |
335 | /* unconditionally take each device down */ | |
336 | } else if (specified) { | |
4ba9aa7f | 337 | if (!user_req(buf, cnt, d)) |
e52a2932 | 338 | goto cont; |
4ba9aa7f | 339 | } else if ((!all && (d->flags & DEVFL_UP)) |
e52a2932 | 340 | || d->flags & skipflags |
69cf2d85 | 341 | || d->nopen |
4ba9aa7f | 342 | || d->ref) |
e52a2932 | 343 | goto cont; |
4ba9aa7f | 344 | |
430380b4 HZ |
345 | spin_unlock(&d->lock); |
346 | spin_unlock_irqrestore(&devlist_lock, flags); | |
262bf541 EC |
347 | aoedev_downdev(d); |
348 | d->flags |= DEVFL_TKILL; | |
430380b4 | 349 | goto restart1; |
e52a2932 | 350 | cont: |
262bf541 | 351 | spin_unlock(&d->lock); |
262bf541 EC |
352 | } |
353 | spin_unlock_irqrestore(&devlist_lock, flags); | |
e52a2932 EC |
354 | |
355 | /* pass two: call freedev, which might sleep, | |
356 | * for aoedevs marked with DEVFL_TKILL | |
357 | */ | |
430380b4 | 358 | restart2: |
e52a2932 EC |
359 | spin_lock_irqsave(&devlist_lock, flags); |
360 | for (d = devlist; d; d = d->next) { | |
361 | spin_lock(&d->lock); | |
362 | if (d->flags & DEVFL_TKILL | |
363 | && !(d->flags & DEVFL_FREEING)) { | |
364 | spin_unlock(&d->lock); | |
365 | spin_unlock_irqrestore(&devlist_lock, flags); | |
366 | freedev(d); | |
430380b4 | 367 | goto restart2; |
e52a2932 EC |
368 | } |
369 | spin_unlock(&d->lock); | |
262bf541 | 370 | } |
e52a2932 EC |
371 | |
372 | /* pass three: remove aoedevs marked with DEVFL_FREED */ | |
373 | for (dd = &devlist, d = *dd; d; d = *dd) { | |
374 | struct aoedev *doomed = NULL; | |
375 | ||
376 | spin_lock(&d->lock); | |
377 | if (d->flags & DEVFL_FREED) { | |
378 | *dd = d->next; | |
379 | doomed = d; | |
380 | } else { | |
381 | dd = &d->next; | |
382 | } | |
383 | spin_unlock(&d->lock); | |
71114ec4 EC |
384 | if (doomed) |
385 | kfree(doomed->targets); | |
e52a2932 EC |
386 | kfree(doomed); |
387 | } | |
388 | spin_unlock_irqrestore(&devlist_lock, flags); | |
389 | ||
262bf541 EC |
390 | return 0; |
391 | } | |
392 | ||
e52a2932 EC |
393 | int |
394 | aoedev_flush(const char __user *str, size_t cnt) | |
395 | { | |
396 | return flush(str, cnt, NOT_EXITING); | |
397 | } | |
398 | ||
69cf2d85 EC |
399 | /* This has been confirmed to occur once with Tms=3*1000 due to the |
400 | * driver changing link and not processing its transmit ring. The | |
401 | * problem is hard enough to solve by returning an error that I'm | |
402 | * still punting on "solving" this. | |
403 | */ | |
9bb237b6 EC |
404 | static void |
405 | skbfree(struct sk_buff *skb) | |
406 | { | |
69cf2d85 | 407 | enum { Sms = 250, Tms = 30 * 1000}; |
9bb237b6 EC |
408 | int i = Tms / Sms; |
409 | ||
410 | if (skb == NULL) | |
411 | return; | |
412 | while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) | |
413 | msleep(Sms); | |
94873111 | 414 | if (i < 0) { |
9bb237b6 EC |
415 | printk(KERN_ERR |
416 | "aoe: %s holds ref: %s\n", | |
417 | skb->dev ? skb->dev->name : "netif", | |
418 | "cannot free skb -- memory leaked."); | |
419 | return; | |
420 | } | |
3d5b0605 | 421 | skb->truesize -= skb->data_len; |
9bb237b6 EC |
422 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; |
423 | skb_trim(skb, 0); | |
424 | dev_kfree_skb(skb); | |
425 | } | |
426 | ||
427 | static void | |
428 | skbpoolfree(struct aoedev *d) | |
429 | { | |
e9bb8fb0 | 430 | struct sk_buff *skb, *tmp; |
9bb237b6 | 431 | |
e9bb8fb0 | 432 | skb_queue_walk_safe(&d->skbpool, skb, tmp) |
9bb237b6 | 433 | skbfree(skb); |
e9bb8fb0 DM |
434 | |
435 | __skb_queue_head_init(&d->skbpool); | |
9bb237b6 EC |
436 | } |
437 | ||
0c966214 | 438 | /* find it or allocate it */ |
1da177e4 | 439 | struct aoedev * |
0c966214 | 440 | aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) |
1da177e4 LT |
441 | { |
442 | struct aoedev *d; | |
64a80f5a | 443 | int i; |
1da177e4 | 444 | ulong flags; |
10935d05 | 445 | ulong sysminor = 0; |
1da177e4 LT |
446 | |
447 | spin_lock_irqsave(&devlist_lock, flags); | |
448 | ||
449 | for (d=devlist; d; d=d->next) | |
0c966214 | 450 | if (d->aoemajor == maj && d->aoeminor == min) { |
e52a2932 EC |
451 | spin_lock(&d->lock); |
452 | if (d->flags & DEVFL_TKILL) { | |
453 | spin_unlock(&d->lock); | |
454 | d = NULL; | |
455 | goto out; | |
456 | } | |
69cf2d85 | 457 | d->ref++; |
e52a2932 | 458 | spin_unlock(&d->lock); |
1da177e4 | 459 | break; |
69cf2d85 | 460 | } |
4bcce1a3 | 461 | if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) |
68e0d42f EC |
462 | goto out; |
463 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | |
464 | if (!d) | |
465 | goto out; | |
71114ec4 EC |
466 | d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC); |
467 | if (!d->targets) { | |
468 | kfree(d); | |
31279b14 | 469 | d = NULL; |
71114ec4 EC |
470 | goto out; |
471 | } | |
472 | d->ntargets = NTARGETS; | |
68e0d42f EC |
473 | INIT_WORK(&d->work, aoecmd_sleepwork); |
474 | spin_lock_init(&d->lock); | |
3582dd29 | 475 | INIT_LIST_HEAD(&d->rq_list); |
e9bb8fb0 | 476 | skb_queue_head_init(&d->skbpool); |
0e0cc9df | 477 | timer_setup(&d->timer, dummy_timer, 0); |
68e0d42f EC |
478 | d->timer.expires = jiffies + HZ; |
479 | add_timer(&d->timer); | |
480 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ | |
481 | d->tgt = d->targets; | |
69cf2d85 | 482 | d->ref = 1; |
64a80f5a EC |
483 | for (i = 0; i < NFACTIVE; i++) |
484 | INIT_LIST_HEAD(&d->factive[i]); | |
3a0c40d2 | 485 | INIT_LIST_HEAD(&d->rexmitq); |
68e0d42f | 486 | d->sysminor = sysminor; |
0c966214 EC |
487 | d->aoemajor = maj; |
488 | d->aoeminor = min; | |
3a0c40d2 EC |
489 | d->rttavg = RTTAVG_INIT; |
490 | d->rttdev = RTTDEV_INIT; | |
68e0d42f EC |
491 | d->next = devlist; |
492 | devlist = d; | |
493 | out: | |
3ae1c24e | 494 | spin_unlock_irqrestore(&devlist_lock, flags); |
1da177e4 LT |
495 | return d; |
496 | } | |
497 | ||
498 | static void | |
9bb237b6 | 499 | freetgt(struct aoedev *d, struct aoetgt *t) |
1da177e4 | 500 | { |
896831f5 EC |
501 | struct frame *f; |
502 | struct list_head *pos, *nx, *head; | |
1b86fda9 EC |
503 | struct aoeif *ifp; |
504 | ||
505 | for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { | |
506 | if (!ifp->nd) | |
507 | break; | |
508 | dev_put(ifp->nd); | |
509 | } | |
e407a7f6 | 510 | |
896831f5 EC |
511 | head = &t->ffree; |
512 | list_for_each_safe(pos, nx, head) { | |
513 | list_del(pos); | |
514 | f = list_entry(pos, struct frame, head); | |
9bb237b6 | 515 | skbfree(f->skb); |
896831f5 EC |
516 | kfree(f); |
517 | } | |
68e0d42f EC |
518 | kfree(t); |
519 | } | |
520 | ||
1da177e4 LT |
521 | void |
522 | aoedev_exit(void) | |
523 | { | |
e52a2932 | 524 | flush_scheduled_work(); |
e52a2932 | 525 | flush(NULL, 0, EXITING); |
1da177e4 LT |
526 | } |
527 | ||
528 | int __init | |
529 | aoedev_init(void) | |
530 | { | |
1da177e4 LT |
531 | return 0; |
532 | } |