]>
Commit | Line | Data |
---|---|---|
fea05a26 | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
1da177e4 LT |
2 | /* |
3 | * aoedev.c | |
4 | * AoE device utility functions; maintains device list. | |
5 | */ | |
6 | ||
7 | #include <linux/hdreg.h> | |
8 | #include <linux/blkdev.h> | |
9 | #include <linux/netdevice.h> | |
9bb237b6 | 10 | #include <linux/delay.h> |
5a0e3ad6 | 11 | #include <linux/slab.h> |
0c966214 EC |
12 | #include <linux/bitmap.h> |
13 | #include <linux/kdev_t.h> | |
4bcce1a3 | 14 | #include <linux/moduleparam.h> |
1da177e4 LT |
15 | #include "aoe.h" |
16 | ||
262bf541 | 17 | static void dummy_timer(ulong); |
9bb237b6 EC |
18 | static void freetgt(struct aoedev *d, struct aoetgt *t); |
19 | static void skbpoolfree(struct aoedev *d); | |
262bf541 | 20 | |
08b60623 | 21 | static int aoe_dyndevs = 1; |
4bcce1a3 EC |
22 | module_param(aoe_dyndevs, int, 0644); |
23 | MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices."); | |
24 | ||
1da177e4 | 25 | static struct aoedev *devlist; |
476aed38 | 26 | static DEFINE_SPINLOCK(devlist_lock); |
1da177e4 | 27 | |
0c966214 EC |
28 | /* Because some systems will have one, many, or no |
29 | * - partitions, | |
30 | * - slots per shelf, | |
31 | * - or shelves, | |
32 | * we need some flexibility in the way the minor numbers | |
33 | * are allocated. So they are dynamic. | |
69cf2d85 | 34 | */ |
0c966214 EC |
35 | #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) |
36 | ||
37 | static DEFINE_SPINLOCK(used_minors_lock); | |
38 | static DECLARE_BITMAP(used_minors, N_DEVS); | |
39 | ||
40 | static int | |
4bcce1a3 | 41 | minor_get_dyn(ulong *sysminor) |
1da177e4 | 42 | { |
1da177e4 | 43 | ulong flags; |
0c966214 EC |
44 | ulong n; |
45 | int error = 0; | |
46 | ||
47 | spin_lock_irqsave(&used_minors_lock, flags); | |
48 | n = find_first_zero_bit(used_minors, N_DEVS); | |
49 | if (n < N_DEVS) | |
50 | set_bit(n, used_minors); | |
51 | else | |
52 | error = -1; | |
53 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
54 | ||
4bcce1a3 | 55 | *sysminor = n * AOE_PARTITIONS; |
0c966214 EC |
56 | return error; |
57 | } | |
1da177e4 | 58 | |
4bcce1a3 EC |
59 | static int |
60 | minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) | |
61 | { | |
62 | ulong flags; | |
63 | ulong n; | |
64 | int error = 0; | |
65 | enum { | |
66 | /* for backwards compatibility when !aoe_dyndevs, | |
67 | * a static number of supported slots per shelf */ | |
68 | NPERSHELF = 16, | |
69 | }; | |
70 | ||
e0b2bbab EC |
71 | if (aoemin >= NPERSHELF) { |
72 | pr_err("aoe: %s %d slots per shelf\n", | |
73 | "static minor device numbers support only", | |
74 | NPERSHELF); | |
75 | error = -1; | |
76 | goto out; | |
77 | } | |
78 | ||
4bcce1a3 | 79 | n = aoemaj * NPERSHELF + aoemin; |
e0b2bbab | 80 | if (n >= N_DEVS) { |
4bcce1a3 EC |
81 | pr_err("aoe: %s with e%ld.%d\n", |
82 | "cannot use static minor device numbers", | |
83 | aoemaj, aoemin); | |
84 | error = -1; | |
e0b2bbab | 85 | goto out; |
4bcce1a3 EC |
86 | } |
87 | ||
e0b2bbab EC |
88 | spin_lock_irqsave(&used_minors_lock, flags); |
89 | if (test_bit(n, used_minors)) { | |
90 | pr_err("aoe: %s %lu\n", | |
91 | "existing device already has static minor number", | |
92 | n); | |
93 | error = -1; | |
94 | } else | |
95 | set_bit(n, used_minors); | |
96 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
b91316f2 | 97 | *sysminor = n * AOE_PARTITIONS; |
e0b2bbab | 98 | out: |
4bcce1a3 EC |
99 | return error; |
100 | } | |
101 | ||
102 | static int | |
103 | minor_get(ulong *sysminor, ulong aoemaj, int aoemin) | |
104 | { | |
105 | if (aoe_dyndevs) | |
106 | return minor_get_dyn(sysminor); | |
107 | else | |
108 | return minor_get_static(sysminor, aoemaj, aoemin); | |
109 | } | |
110 | ||
0c966214 EC |
111 | static void |
112 | minor_free(ulong minor) | |
113 | { | |
114 | ulong flags; | |
1da177e4 | 115 | |
0c966214 EC |
116 | minor /= AOE_PARTITIONS; |
117 | BUG_ON(minor >= N_DEVS); | |
1da177e4 | 118 | |
0c966214 EC |
119 | spin_lock_irqsave(&used_minors_lock, flags); |
120 | BUG_ON(!test_bit(minor, used_minors)); | |
121 | clear_bit(minor, used_minors); | |
122 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
1da177e4 LT |
123 | } |
124 | ||
0c966214 EC |
125 | /* |
126 | * Users who grab a pointer to the device with aoedev_by_aoeaddr | |
127 | * automatically get a reference count and must be responsible | |
128 | * for performing a aoedev_put. With the addition of async | |
129 | * kthread processing I'm no longer confident that we can | |
130 | * guarantee consistency in the face of device flushes. | |
131 | * | |
132 | * For the time being, we only bother to add extra references for | |
133 | * frames sitting on the iocq. When the kthreads finish processing | |
134 | * these frames, they will aoedev_put the device. | |
135 | */ | |
136 | ||
69cf2d85 EC |
137 | void |
138 | aoedev_put(struct aoedev *d) | |
139 | { | |
140 | ulong flags; | |
141 | ||
142 | spin_lock_irqsave(&devlist_lock, flags); | |
143 | d->ref--; | |
144 | spin_unlock_irqrestore(&devlist_lock, flags); | |
145 | } | |
146 | ||
3ae1c24e EC |
147 | static void |
148 | dummy_timer(ulong vp) | |
149 | { | |
150 | struct aoedev *d; | |
151 | ||
152 | d = (struct aoedev *)vp; | |
153 | if (d->flags & DEVFL_TKILL) | |
154 | return; | |
155 | d->timer.expires = jiffies + HZ; | |
156 | add_timer(&d->timer); | |
157 | } | |
158 | ||
69cf2d85 EC |
159 | static void |
160 | aoe_failip(struct aoedev *d) | |
1da177e4 | 161 | { |
69cf2d85 | 162 | struct request *rq; |
1da177e4 | 163 | struct bio *bio; |
69cf2d85 EC |
164 | unsigned long n; |
165 | ||
166 | aoe_failbuf(d, d->ip.buf); | |
1da177e4 | 167 | |
69cf2d85 EC |
168 | rq = d->ip.rq; |
169 | if (rq == NULL) | |
896831f5 | 170 | return; |
69cf2d85 EC |
171 | while ((bio = d->ip.nxbio)) { |
172 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | |
173 | d->ip.nxbio = bio->bi_next; | |
174 | n = (unsigned long) rq->special; | |
175 | rq->special = (void *) --n; | |
1da177e4 | 176 | } |
69cf2d85 EC |
177 | if ((unsigned long) rq->special == 0) |
178 | aoe_end_request(d, rq, 0); | |
896831f5 EC |
179 | } |
180 | ||
3fc9b032 EC |
181 | static void |
182 | downdev_frame(struct list_head *pos) | |
183 | { | |
184 | struct frame *f; | |
185 | ||
186 | f = list_entry(pos, struct frame, head); | |
187 | list_del(pos); | |
188 | if (f->buf) { | |
189 | f->buf->nframesout--; | |
190 | aoe_failbuf(f->t->d, f->buf); | |
191 | } | |
192 | aoe_freetframe(f); | |
193 | } | |
194 | ||
896831f5 EC |
195 | void |
196 | aoedev_downdev(struct aoedev *d) | |
197 | { | |
198 | struct aoetgt *t, **tt, **te; | |
896831f5 | 199 | struct list_head *head, *pos, *nx; |
69cf2d85 | 200 | struct request *rq; |
896831f5 EC |
201 | int i; |
202 | ||
69cf2d85 EC |
203 | d->flags &= ~DEVFL_UP; |
204 | ||
3fc9b032 | 205 | /* clean out active and to-be-retransmitted buffers */ |
64a80f5a EC |
206 | for (i = 0; i < NFACTIVE; i++) { |
207 | head = &d->factive[i]; | |
3fc9b032 EC |
208 | list_for_each_safe(pos, nx, head) |
209 | downdev_frame(pos); | |
64a80f5a | 210 | } |
3fc9b032 EC |
211 | head = &d->rexmitq; |
212 | list_for_each_safe(pos, nx, head) | |
213 | downdev_frame(pos); | |
214 | ||
64a80f5a | 215 | /* reset window dressings */ |
896831f5 | 216 | tt = d->targets; |
71114ec4 | 217 | te = tt + d->ntargets; |
896831f5 | 218 | for (; tt < te && (t = *tt); tt++) { |
3a0c40d2 | 219 | aoecmd_wreset(t); |
896831f5 EC |
220 | t->nout = 0; |
221 | } | |
222 | ||
69cf2d85 EC |
223 | /* clean out the in-process request (if any) */ |
224 | aoe_failip(d); | |
1da177e4 | 225 | |
69cf2d85 EC |
226 | /* fast fail all pending I/O */ |
227 | if (d->blkq) { | |
228 | while ((rq = blk_peek_request(d->blkq))) { | |
229 | blk_start_request(rq); | |
230 | aoe_end_request(d, rq, 1); | |
231 | } | |
1da177e4 LT |
232 | } |
233 | ||
1da177e4 | 234 | if (d->gd) |
80795aef | 235 | set_capacity(d->gd, 0); |
1da177e4 LT |
236 | } |
237 | ||
4ba9aa7f EC |
238 | /* return whether the user asked for this particular |
239 | * device to be flushed | |
240 | */ | |
241 | static int | |
242 | user_req(char *s, size_t slen, struct aoedev *d) | |
243 | { | |
244 | char *p; | |
245 | size_t lim; | |
246 | ||
247 | if (!d->gd) | |
248 | return 0; | |
249 | p = strrchr(d->gd->disk_name, '/'); | |
250 | if (!p) | |
251 | p = d->gd->disk_name; | |
252 | else | |
253 | p += 1; | |
254 | lim = sizeof(d->gd->disk_name); | |
255 | lim -= p - d->gd->disk_name; | |
256 | if (slen < lim) | |
257 | lim = slen; | |
258 | ||
259 | return !strncmp(s, p, lim); | |
260 | } | |
261 | ||
e52a2932 EC |
262 | static void |
263 | freedev(struct aoedev *d) | |
264 | { | |
265 | struct aoetgt **t, **e; | |
266 | int freeing = 0; | |
267 | unsigned long flags; | |
268 | ||
269 | spin_lock_irqsave(&d->lock, flags); | |
270 | if (d->flags & DEVFL_TKILL | |
271 | && !(d->flags & DEVFL_FREEING)) { | |
272 | d->flags |= DEVFL_FREEING; | |
273 | freeing = 1; | |
274 | } | |
275 | spin_unlock_irqrestore(&d->lock, flags); | |
276 | if (!freeing) | |
277 | return; | |
278 | ||
279 | del_timer_sync(&d->timer); | |
280 | if (d->gd) { | |
281 | aoedisk_rm_sysfs(d); | |
282 | del_gendisk(d->gd); | |
283 | put_disk(d->gd); | |
284 | blk_cleanup_queue(d->blkq); | |
285 | } | |
286 | t = d->targets; | |
71114ec4 | 287 | e = t + d->ntargets; |
e52a2932 EC |
288 | for (; t < e && *t; t++) |
289 | freetgt(d, *t); | |
290 | if (d->bufpool) | |
291 | mempool_destroy(d->bufpool); | |
292 | skbpoolfree(d); | |
293 | minor_free(d->sysminor); | |
294 | ||
295 | spin_lock_irqsave(&d->lock, flags); | |
296 | d->flags |= DEVFL_FREED; | |
297 | spin_unlock_irqrestore(&d->lock, flags); | |
298 | } | |
299 | ||
300 | enum flush_parms { | |
301 | NOT_EXITING = 0, | |
302 | EXITING = 1, | |
303 | }; | |
304 | ||
305 | static int | |
306 | flush(const char __user *str, size_t cnt, int exiting) | |
262bf541 EC |
307 | { |
308 | ulong flags; | |
309 | struct aoedev *d, **dd; | |
262bf541 EC |
310 | char buf[16]; |
311 | int all = 0; | |
4ba9aa7f | 312 | int specified = 0; /* flush a specific device */ |
e52a2932 EC |
313 | unsigned int skipflags; |
314 | ||
315 | skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL; | |
262bf541 | 316 | |
e52a2932 | 317 | if (!exiting && cnt >= 3) { |
262bf541 EC |
318 | if (cnt > sizeof buf) |
319 | cnt = sizeof buf; | |
320 | if (copy_from_user(buf, str, cnt)) | |
321 | return -EFAULT; | |
322 | all = !strncmp(buf, "all", 3); | |
4ba9aa7f EC |
323 | if (!all) |
324 | specified = 1; | |
262bf541 EC |
325 | } |
326 | ||
e52a2932 EC |
327 | flush_scheduled_work(); |
328 | /* pass one: without sleeping, do aoedev_downdev */ | |
262bf541 | 329 | spin_lock_irqsave(&devlist_lock, flags); |
e52a2932 | 330 | for (d = devlist; d; d = d->next) { |
262bf541 | 331 | spin_lock(&d->lock); |
e52a2932 EC |
332 | if (exiting) { |
333 | /* unconditionally take each device down */ | |
334 | } else if (specified) { | |
4ba9aa7f | 335 | if (!user_req(buf, cnt, d)) |
e52a2932 | 336 | goto cont; |
4ba9aa7f | 337 | } else if ((!all && (d->flags & DEVFL_UP)) |
e52a2932 | 338 | || d->flags & skipflags |
69cf2d85 | 339 | || d->nopen |
4ba9aa7f | 340 | || d->ref) |
e52a2932 | 341 | goto cont; |
4ba9aa7f | 342 | |
262bf541 EC |
343 | aoedev_downdev(d); |
344 | d->flags |= DEVFL_TKILL; | |
e52a2932 | 345 | cont: |
262bf541 | 346 | spin_unlock(&d->lock); |
262bf541 EC |
347 | } |
348 | spin_unlock_irqrestore(&devlist_lock, flags); | |
e52a2932 EC |
349 | |
350 | /* pass two: call freedev, which might sleep, | |
351 | * for aoedevs marked with DEVFL_TKILL | |
352 | */ | |
353 | restart: | |
354 | spin_lock_irqsave(&devlist_lock, flags); | |
355 | for (d = devlist; d; d = d->next) { | |
356 | spin_lock(&d->lock); | |
357 | if (d->flags & DEVFL_TKILL | |
358 | && !(d->flags & DEVFL_FREEING)) { | |
359 | spin_unlock(&d->lock); | |
360 | spin_unlock_irqrestore(&devlist_lock, flags); | |
361 | freedev(d); | |
362 | goto restart; | |
363 | } | |
364 | spin_unlock(&d->lock); | |
262bf541 | 365 | } |
e52a2932 EC |
366 | |
367 | /* pass three: remove aoedevs marked with DEVFL_FREED */ | |
368 | for (dd = &devlist, d = *dd; d; d = *dd) { | |
369 | struct aoedev *doomed = NULL; | |
370 | ||
371 | spin_lock(&d->lock); | |
372 | if (d->flags & DEVFL_FREED) { | |
373 | *dd = d->next; | |
374 | doomed = d; | |
375 | } else { | |
376 | dd = &d->next; | |
377 | } | |
378 | spin_unlock(&d->lock); | |
71114ec4 EC |
379 | if (doomed) |
380 | kfree(doomed->targets); | |
e52a2932 EC |
381 | kfree(doomed); |
382 | } | |
383 | spin_unlock_irqrestore(&devlist_lock, flags); | |
384 | ||
262bf541 EC |
385 | return 0; |
386 | } | |
387 | ||
e52a2932 EC |
388 | int |
389 | aoedev_flush(const char __user *str, size_t cnt) | |
390 | { | |
391 | return flush(str, cnt, NOT_EXITING); | |
392 | } | |
393 | ||
69cf2d85 EC |
394 | /* This has been confirmed to occur once with Tms=3*1000 due to the |
395 | * driver changing link and not processing its transmit ring. The | |
396 | * problem is hard enough to solve by returning an error that I'm | |
397 | * still punting on "solving" this. | |
398 | */ | |
9bb237b6 EC |
399 | static void |
400 | skbfree(struct sk_buff *skb) | |
401 | { | |
69cf2d85 | 402 | enum { Sms = 250, Tms = 30 * 1000}; |
9bb237b6 EC |
403 | int i = Tms / Sms; |
404 | ||
405 | if (skb == NULL) | |
406 | return; | |
407 | while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) | |
408 | msleep(Sms); | |
94873111 | 409 | if (i < 0) { |
9bb237b6 EC |
410 | printk(KERN_ERR |
411 | "aoe: %s holds ref: %s\n", | |
412 | skb->dev ? skb->dev->name : "netif", | |
413 | "cannot free skb -- memory leaked."); | |
414 | return; | |
415 | } | |
3d5b0605 | 416 | skb->truesize -= skb->data_len; |
9bb237b6 EC |
417 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; |
418 | skb_trim(skb, 0); | |
419 | dev_kfree_skb(skb); | |
420 | } | |
421 | ||
422 | static void | |
423 | skbpoolfree(struct aoedev *d) | |
424 | { | |
e9bb8fb0 | 425 | struct sk_buff *skb, *tmp; |
9bb237b6 | 426 | |
e9bb8fb0 | 427 | skb_queue_walk_safe(&d->skbpool, skb, tmp) |
9bb237b6 | 428 | skbfree(skb); |
e9bb8fb0 DM |
429 | |
430 | __skb_queue_head_init(&d->skbpool); | |
9bb237b6 EC |
431 | } |
432 | ||
0c966214 | 433 | /* find it or allocate it */ |
1da177e4 | 434 | struct aoedev * |
0c966214 | 435 | aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) |
1da177e4 LT |
436 | { |
437 | struct aoedev *d; | |
64a80f5a | 438 | int i; |
1da177e4 | 439 | ulong flags; |
10935d05 | 440 | ulong sysminor = 0; |
1da177e4 LT |
441 | |
442 | spin_lock_irqsave(&devlist_lock, flags); | |
443 | ||
444 | for (d=devlist; d; d=d->next) | |
0c966214 | 445 | if (d->aoemajor == maj && d->aoeminor == min) { |
e52a2932 EC |
446 | spin_lock(&d->lock); |
447 | if (d->flags & DEVFL_TKILL) { | |
448 | spin_unlock(&d->lock); | |
449 | d = NULL; | |
450 | goto out; | |
451 | } | |
69cf2d85 | 452 | d->ref++; |
e52a2932 | 453 | spin_unlock(&d->lock); |
1da177e4 | 454 | break; |
69cf2d85 | 455 | } |
4bcce1a3 | 456 | if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) |
68e0d42f EC |
457 | goto out; |
458 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | |
459 | if (!d) | |
460 | goto out; | |
71114ec4 EC |
461 | d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC); |
462 | if (!d->targets) { | |
463 | kfree(d); | |
464 | goto out; | |
465 | } | |
466 | d->ntargets = NTARGETS; | |
68e0d42f EC |
467 | INIT_WORK(&d->work, aoecmd_sleepwork); |
468 | spin_lock_init(&d->lock); | |
e9bb8fb0 | 469 | skb_queue_head_init(&d->skbpool); |
68e0d42f EC |
470 | init_timer(&d->timer); |
471 | d->timer.data = (ulong) d; | |
472 | d->timer.function = dummy_timer; | |
473 | d->timer.expires = jiffies + HZ; | |
474 | add_timer(&d->timer); | |
475 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ | |
476 | d->tgt = d->targets; | |
69cf2d85 | 477 | d->ref = 1; |
64a80f5a EC |
478 | for (i = 0; i < NFACTIVE; i++) |
479 | INIT_LIST_HEAD(&d->factive[i]); | |
3a0c40d2 | 480 | INIT_LIST_HEAD(&d->rexmitq); |
68e0d42f | 481 | d->sysminor = sysminor; |
0c966214 EC |
482 | d->aoemajor = maj; |
483 | d->aoeminor = min; | |
3a0c40d2 EC |
484 | d->rttavg = RTTAVG_INIT; |
485 | d->rttdev = RTTDEV_INIT; | |
68e0d42f EC |
486 | d->next = devlist; |
487 | devlist = d; | |
488 | out: | |
3ae1c24e | 489 | spin_unlock_irqrestore(&devlist_lock, flags); |
1da177e4 LT |
490 | return d; |
491 | } | |
492 | ||
493 | static void | |
9bb237b6 | 494 | freetgt(struct aoedev *d, struct aoetgt *t) |
1da177e4 | 495 | { |
896831f5 EC |
496 | struct frame *f; |
497 | struct list_head *pos, *nx, *head; | |
1b86fda9 EC |
498 | struct aoeif *ifp; |
499 | ||
500 | for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { | |
501 | if (!ifp->nd) | |
502 | break; | |
503 | dev_put(ifp->nd); | |
504 | } | |
e407a7f6 | 505 | |
896831f5 EC |
506 | head = &t->ffree; |
507 | list_for_each_safe(pos, nx, head) { | |
508 | list_del(pos); | |
509 | f = list_entry(pos, struct frame, head); | |
9bb237b6 | 510 | skbfree(f->skb); |
896831f5 EC |
511 | kfree(f); |
512 | } | |
68e0d42f EC |
513 | kfree(t); |
514 | } | |
515 | ||
1da177e4 LT |
516 | void |
517 | aoedev_exit(void) | |
518 | { | |
e52a2932 | 519 | flush_scheduled_work(); |
69cf2d85 | 520 | aoe_flush_iocq(); |
e52a2932 | 521 | flush(NULL, 0, EXITING); |
1da177e4 LT |
522 | } |
523 | ||
524 | int __init | |
525 | aoedev_init(void) | |
526 | { | |
1da177e4 LT |
527 | return 0; |
528 | } |