]>
Commit | Line | Data |
---|---|---|
ca47bbd9 | 1 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ |
1da177e4 LT |
2 | /* |
3 | * aoedev.c | |
4 | * AoE device utility functions; maintains device list. | |
5 | */ | |
6 | ||
7 | #include <linux/hdreg.h> | |
8 | #include <linux/blkdev.h> | |
9 | #include <linux/netdevice.h> | |
9bb237b6 | 10 | #include <linux/delay.h> |
5a0e3ad6 | 11 | #include <linux/slab.h> |
0c966214 EC |
12 | #include <linux/bitmap.h> |
13 | #include <linux/kdev_t.h> | |
4bcce1a3 | 14 | #include <linux/moduleparam.h> |
a88c1f0c | 15 | #include <linux/string.h> |
1da177e4 LT |
16 | #include "aoe.h" |
17 | ||
9bb237b6 EC |
18 | static void freetgt(struct aoedev *d, struct aoetgt *t); |
19 | static void skbpoolfree(struct aoedev *d); | |
262bf541 | 20 | |
08b60623 | 21 | static int aoe_dyndevs = 1; |
4bcce1a3 EC |
22 | module_param(aoe_dyndevs, int, 0644); |
23 | MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices."); | |
24 | ||
1da177e4 | 25 | static struct aoedev *devlist; |
476aed38 | 26 | static DEFINE_SPINLOCK(devlist_lock); |
1da177e4 | 27 | |
0c966214 EC |
28 | /* Because some systems will have one, many, or no |
29 | * - partitions, | |
30 | * - slots per shelf, | |
31 | * - or shelves, | |
32 | * we need some flexibility in the way the minor numbers | |
33 | * are allocated. So they are dynamic. | |
69cf2d85 | 34 | */ |
0c966214 EC |
35 | #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) |
36 | ||
37 | static DEFINE_SPINLOCK(used_minors_lock); | |
38 | static DECLARE_BITMAP(used_minors, N_DEVS); | |
39 | ||
40 | static int | |
4bcce1a3 | 41 | minor_get_dyn(ulong *sysminor) |
1da177e4 | 42 | { |
1da177e4 | 43 | ulong flags; |
0c966214 EC |
44 | ulong n; |
45 | int error = 0; | |
46 | ||
47 | spin_lock_irqsave(&used_minors_lock, flags); | |
48 | n = find_first_zero_bit(used_minors, N_DEVS); | |
49 | if (n < N_DEVS) | |
50 | set_bit(n, used_minors); | |
51 | else | |
52 | error = -1; | |
53 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
54 | ||
4bcce1a3 | 55 | *sysminor = n * AOE_PARTITIONS; |
0c966214 EC |
56 | return error; |
57 | } | |
1da177e4 | 58 | |
4bcce1a3 EC |
59 | static int |
60 | minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) | |
61 | { | |
62 | ulong flags; | |
63 | ulong n; | |
64 | int error = 0; | |
65 | enum { | |
66 | /* for backwards compatibility when !aoe_dyndevs, | |
67 | * a static number of supported slots per shelf */ | |
68 | NPERSHELF = 16, | |
69 | }; | |
70 | ||
e0b2bbab EC |
71 | if (aoemin >= NPERSHELF) { |
72 | pr_err("aoe: %s %d slots per shelf\n", | |
73 | "static minor device numbers support only", | |
74 | NPERSHELF); | |
75 | error = -1; | |
76 | goto out; | |
77 | } | |
78 | ||
4bcce1a3 | 79 | n = aoemaj * NPERSHELF + aoemin; |
e0b2bbab | 80 | if (n >= N_DEVS) { |
4bcce1a3 EC |
81 | pr_err("aoe: %s with e%ld.%d\n", |
82 | "cannot use static minor device numbers", | |
83 | aoemaj, aoemin); | |
84 | error = -1; | |
e0b2bbab | 85 | goto out; |
4bcce1a3 EC |
86 | } |
87 | ||
e0b2bbab EC |
88 | spin_lock_irqsave(&used_minors_lock, flags); |
89 | if (test_bit(n, used_minors)) { | |
90 | pr_err("aoe: %s %lu\n", | |
91 | "existing device already has static minor number", | |
92 | n); | |
93 | error = -1; | |
94 | } else | |
95 | set_bit(n, used_minors); | |
96 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
b91316f2 | 97 | *sysminor = n * AOE_PARTITIONS; |
e0b2bbab | 98 | out: |
4bcce1a3 EC |
99 | return error; |
100 | } | |
101 | ||
102 | static int | |
103 | minor_get(ulong *sysminor, ulong aoemaj, int aoemin) | |
104 | { | |
105 | if (aoe_dyndevs) | |
106 | return minor_get_dyn(sysminor); | |
107 | else | |
108 | return minor_get_static(sysminor, aoemaj, aoemin); | |
109 | } | |
110 | ||
0c966214 EC |
111 | static void |
112 | minor_free(ulong minor) | |
113 | { | |
114 | ulong flags; | |
1da177e4 | 115 | |
0c966214 EC |
116 | minor /= AOE_PARTITIONS; |
117 | BUG_ON(minor >= N_DEVS); | |
1da177e4 | 118 | |
0c966214 EC |
119 | spin_lock_irqsave(&used_minors_lock, flags); |
120 | BUG_ON(!test_bit(minor, used_minors)); | |
121 | clear_bit(minor, used_minors); | |
122 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
1da177e4 LT |
123 | } |
124 | ||
0c966214 EC |
125 | /* |
126 | * Users who grab a pointer to the device with aoedev_by_aoeaddr | |
127 | * automatically get a reference count and must be responsible | |
128 | * for performing a aoedev_put. With the addition of async | |
129 | * kthread processing I'm no longer confident that we can | |
130 | * guarantee consistency in the face of device flushes. | |
131 | * | |
132 | * For the time being, we only bother to add extra references for | |
133 | * frames sitting on the iocq. When the kthreads finish processing | |
134 | * these frames, they will aoedev_put the device. | |
135 | */ | |
136 | ||
69cf2d85 EC |
137 | void |
138 | aoedev_put(struct aoedev *d) | |
139 | { | |
140 | ulong flags; | |
141 | ||
142 | spin_lock_irqsave(&devlist_lock, flags); | |
143 | d->ref--; | |
144 | spin_unlock_irqrestore(&devlist_lock, flags); | |
145 | } | |
146 | ||
3ae1c24e | 147 | static void |
0e0cc9df | 148 | dummy_timer(struct timer_list *t) |
3ae1c24e EC |
149 | { |
150 | struct aoedev *d; | |
151 | ||
0e0cc9df | 152 | d = from_timer(d, t, timer); |
3ae1c24e EC |
153 | if (d->flags & DEVFL_TKILL) |
154 | return; | |
155 | d->timer.expires = jiffies + HZ; | |
156 | add_timer(&d->timer); | |
157 | } | |
158 | ||
69cf2d85 EC |
159 | static void |
160 | aoe_failip(struct aoedev *d) | |
1da177e4 | 161 | { |
69cf2d85 | 162 | struct request *rq; |
1da177e4 | 163 | struct bio *bio; |
69cf2d85 EC |
164 | unsigned long n; |
165 | ||
166 | aoe_failbuf(d, d->ip.buf); | |
1da177e4 | 167 | |
69cf2d85 EC |
168 | rq = d->ip.rq; |
169 | if (rq == NULL) | |
896831f5 | 170 | return; |
69cf2d85 | 171 | while ((bio = d->ip.nxbio)) { |
4e4cbee9 | 172 | bio->bi_status = BLK_STS_IOERR; |
69cf2d85 EC |
173 | d->ip.nxbio = bio->bi_next; |
174 | n = (unsigned long) rq->special; | |
175 | rq->special = (void *) --n; | |
1da177e4 | 176 | } |
69cf2d85 EC |
177 | if ((unsigned long) rq->special == 0) |
178 | aoe_end_request(d, rq, 0); | |
896831f5 EC |
179 | } |
180 | ||
3fc9b032 EC |
181 | static void |
182 | downdev_frame(struct list_head *pos) | |
183 | { | |
184 | struct frame *f; | |
185 | ||
186 | f = list_entry(pos, struct frame, head); | |
187 | list_del(pos); | |
188 | if (f->buf) { | |
189 | f->buf->nframesout--; | |
190 | aoe_failbuf(f->t->d, f->buf); | |
191 | } | |
192 | aoe_freetframe(f); | |
193 | } | |
194 | ||
896831f5 EC |
195 | void |
196 | aoedev_downdev(struct aoedev *d) | |
197 | { | |
198 | struct aoetgt *t, **tt, **te; | |
896831f5 | 199 | struct list_head *head, *pos, *nx; |
69cf2d85 | 200 | struct request *rq; |
896831f5 EC |
201 | int i; |
202 | ||
69cf2d85 EC |
203 | d->flags &= ~DEVFL_UP; |
204 | ||
3fc9b032 | 205 | /* clean out active and to-be-retransmitted buffers */ |
64a80f5a EC |
206 | for (i = 0; i < NFACTIVE; i++) { |
207 | head = &d->factive[i]; | |
3fc9b032 EC |
208 | list_for_each_safe(pos, nx, head) |
209 | downdev_frame(pos); | |
64a80f5a | 210 | } |
3fc9b032 EC |
211 | head = &d->rexmitq; |
212 | list_for_each_safe(pos, nx, head) | |
213 | downdev_frame(pos); | |
214 | ||
64a80f5a | 215 | /* reset window dressings */ |
896831f5 | 216 | tt = d->targets; |
71114ec4 | 217 | te = tt + d->ntargets; |
896831f5 | 218 | for (; tt < te && (t = *tt); tt++) { |
3a0c40d2 | 219 | aoecmd_wreset(t); |
896831f5 EC |
220 | t->nout = 0; |
221 | } | |
222 | ||
69cf2d85 EC |
223 | /* clean out the in-process request (if any) */ |
224 | aoe_failip(d); | |
1da177e4 | 225 | |
69cf2d85 EC |
226 | /* fast fail all pending I/O */ |
227 | if (d->blkq) { | |
228 | while ((rq = blk_peek_request(d->blkq))) { | |
229 | blk_start_request(rq); | |
230 | aoe_end_request(d, rq, 1); | |
231 | } | |
1da177e4 LT |
232 | } |
233 | ||
1da177e4 | 234 | if (d->gd) |
80795aef | 235 | set_capacity(d->gd, 0); |
1da177e4 LT |
236 | } |
237 | ||
4ba9aa7f EC |
238 | /* return whether the user asked for this particular |
239 | * device to be flushed | |
240 | */ | |
241 | static int | |
242 | user_req(char *s, size_t slen, struct aoedev *d) | |
243 | { | |
a88c1f0c | 244 | const char *p; |
4ba9aa7f EC |
245 | size_t lim; |
246 | ||
247 | if (!d->gd) | |
248 | return 0; | |
a88c1f0c | 249 | p = kbasename(d->gd->disk_name); |
4ba9aa7f EC |
250 | lim = sizeof(d->gd->disk_name); |
251 | lim -= p - d->gd->disk_name; | |
252 | if (slen < lim) | |
253 | lim = slen; | |
254 | ||
255 | return !strncmp(s, p, lim); | |
256 | } | |
257 | ||
e52a2932 EC |
258 | static void |
259 | freedev(struct aoedev *d) | |
260 | { | |
261 | struct aoetgt **t, **e; | |
262 | int freeing = 0; | |
263 | unsigned long flags; | |
264 | ||
265 | spin_lock_irqsave(&d->lock, flags); | |
266 | if (d->flags & DEVFL_TKILL | |
267 | && !(d->flags & DEVFL_FREEING)) { | |
268 | d->flags |= DEVFL_FREEING; | |
269 | freeing = 1; | |
270 | } | |
271 | spin_unlock_irqrestore(&d->lock, flags); | |
272 | if (!freeing) | |
273 | return; | |
274 | ||
275 | del_timer_sync(&d->timer); | |
276 | if (d->gd) { | |
e8866cf2 | 277 | aoedisk_rm_debugfs(d); |
e52a2932 EC |
278 | aoedisk_rm_sysfs(d); |
279 | del_gendisk(d->gd); | |
280 | put_disk(d->gd); | |
281 | blk_cleanup_queue(d->blkq); | |
282 | } | |
283 | t = d->targets; | |
71114ec4 | 284 | e = t + d->ntargets; |
e52a2932 EC |
285 | for (; t < e && *t; t++) |
286 | freetgt(d, *t); | |
287 | if (d->bufpool) | |
288 | mempool_destroy(d->bufpool); | |
289 | skbpoolfree(d); | |
290 | minor_free(d->sysminor); | |
291 | ||
292 | spin_lock_irqsave(&d->lock, flags); | |
293 | d->flags |= DEVFL_FREED; | |
294 | spin_unlock_irqrestore(&d->lock, flags); | |
295 | } | |
296 | ||
297 | enum flush_parms { | |
298 | NOT_EXITING = 0, | |
299 | EXITING = 1, | |
300 | }; | |
301 | ||
302 | static int | |
303 | flush(const char __user *str, size_t cnt, int exiting) | |
262bf541 EC |
304 | { |
305 | ulong flags; | |
306 | struct aoedev *d, **dd; | |
262bf541 EC |
307 | char buf[16]; |
308 | int all = 0; | |
4ba9aa7f | 309 | int specified = 0; /* flush a specific device */ |
e52a2932 EC |
310 | unsigned int skipflags; |
311 | ||
312 | skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL; | |
262bf541 | 313 | |
e52a2932 | 314 | if (!exiting && cnt >= 3) { |
262bf541 EC |
315 | if (cnt > sizeof buf) |
316 | cnt = sizeof buf; | |
317 | if (copy_from_user(buf, str, cnt)) | |
318 | return -EFAULT; | |
319 | all = !strncmp(buf, "all", 3); | |
4ba9aa7f EC |
320 | if (!all) |
321 | specified = 1; | |
262bf541 EC |
322 | } |
323 | ||
e52a2932 EC |
324 | flush_scheduled_work(); |
325 | /* pass one: without sleeping, do aoedev_downdev */ | |
262bf541 | 326 | spin_lock_irqsave(&devlist_lock, flags); |
e52a2932 | 327 | for (d = devlist; d; d = d->next) { |
262bf541 | 328 | spin_lock(&d->lock); |
e52a2932 EC |
329 | if (exiting) { |
330 | /* unconditionally take each device down */ | |
331 | } else if (specified) { | |
4ba9aa7f | 332 | if (!user_req(buf, cnt, d)) |
e52a2932 | 333 | goto cont; |
4ba9aa7f | 334 | } else if ((!all && (d->flags & DEVFL_UP)) |
e52a2932 | 335 | || d->flags & skipflags |
69cf2d85 | 336 | || d->nopen |
4ba9aa7f | 337 | || d->ref) |
e52a2932 | 338 | goto cont; |
4ba9aa7f | 339 | |
262bf541 EC |
340 | aoedev_downdev(d); |
341 | d->flags |= DEVFL_TKILL; | |
e52a2932 | 342 | cont: |
262bf541 | 343 | spin_unlock(&d->lock); |
262bf541 EC |
344 | } |
345 | spin_unlock_irqrestore(&devlist_lock, flags); | |
e52a2932 EC |
346 | |
347 | /* pass two: call freedev, which might sleep, | |
348 | * for aoedevs marked with DEVFL_TKILL | |
349 | */ | |
350 | restart: | |
351 | spin_lock_irqsave(&devlist_lock, flags); | |
352 | for (d = devlist; d; d = d->next) { | |
353 | spin_lock(&d->lock); | |
354 | if (d->flags & DEVFL_TKILL | |
355 | && !(d->flags & DEVFL_FREEING)) { | |
356 | spin_unlock(&d->lock); | |
357 | spin_unlock_irqrestore(&devlist_lock, flags); | |
358 | freedev(d); | |
359 | goto restart; | |
360 | } | |
361 | spin_unlock(&d->lock); | |
262bf541 | 362 | } |
e52a2932 EC |
363 | |
364 | /* pass three: remove aoedevs marked with DEVFL_FREED */ | |
365 | for (dd = &devlist, d = *dd; d; d = *dd) { | |
366 | struct aoedev *doomed = NULL; | |
367 | ||
368 | spin_lock(&d->lock); | |
369 | if (d->flags & DEVFL_FREED) { | |
370 | *dd = d->next; | |
371 | doomed = d; | |
372 | } else { | |
373 | dd = &d->next; | |
374 | } | |
375 | spin_unlock(&d->lock); | |
71114ec4 EC |
376 | if (doomed) |
377 | kfree(doomed->targets); | |
e52a2932 EC |
378 | kfree(doomed); |
379 | } | |
380 | spin_unlock_irqrestore(&devlist_lock, flags); | |
381 | ||
262bf541 EC |
382 | return 0; |
383 | } | |
384 | ||
e52a2932 EC |
385 | int |
386 | aoedev_flush(const char __user *str, size_t cnt) | |
387 | { | |
388 | return flush(str, cnt, NOT_EXITING); | |
389 | } | |
390 | ||
69cf2d85 EC |
391 | /* This has been confirmed to occur once with Tms=3*1000 due to the |
392 | * driver changing link and not processing its transmit ring. The | |
393 | * problem is hard enough to solve by returning an error that I'm | |
394 | * still punting on "solving" this. | |
395 | */ | |
9bb237b6 EC |
396 | static void |
397 | skbfree(struct sk_buff *skb) | |
398 | { | |
69cf2d85 | 399 | enum { Sms = 250, Tms = 30 * 1000}; |
9bb237b6 EC |
400 | int i = Tms / Sms; |
401 | ||
402 | if (skb == NULL) | |
403 | return; | |
404 | while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) | |
405 | msleep(Sms); | |
94873111 | 406 | if (i < 0) { |
9bb237b6 EC |
407 | printk(KERN_ERR |
408 | "aoe: %s holds ref: %s\n", | |
409 | skb->dev ? skb->dev->name : "netif", | |
410 | "cannot free skb -- memory leaked."); | |
411 | return; | |
412 | } | |
3d5b0605 | 413 | skb->truesize -= skb->data_len; |
9bb237b6 EC |
414 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; |
415 | skb_trim(skb, 0); | |
416 | dev_kfree_skb(skb); | |
417 | } | |
418 | ||
419 | static void | |
420 | skbpoolfree(struct aoedev *d) | |
421 | { | |
e9bb8fb0 | 422 | struct sk_buff *skb, *tmp; |
9bb237b6 | 423 | |
e9bb8fb0 | 424 | skb_queue_walk_safe(&d->skbpool, skb, tmp) |
9bb237b6 | 425 | skbfree(skb); |
e9bb8fb0 DM |
426 | |
427 | __skb_queue_head_init(&d->skbpool); | |
9bb237b6 EC |
428 | } |
429 | ||
0c966214 | 430 | /* find it or allocate it */ |
1da177e4 | 431 | struct aoedev * |
0c966214 | 432 | aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) |
1da177e4 LT |
433 | { |
434 | struct aoedev *d; | |
64a80f5a | 435 | int i; |
1da177e4 | 436 | ulong flags; |
10935d05 | 437 | ulong sysminor = 0; |
1da177e4 LT |
438 | |
439 | spin_lock_irqsave(&devlist_lock, flags); | |
440 | ||
441 | for (d=devlist; d; d=d->next) | |
0c966214 | 442 | if (d->aoemajor == maj && d->aoeminor == min) { |
e52a2932 EC |
443 | spin_lock(&d->lock); |
444 | if (d->flags & DEVFL_TKILL) { | |
445 | spin_unlock(&d->lock); | |
446 | d = NULL; | |
447 | goto out; | |
448 | } | |
69cf2d85 | 449 | d->ref++; |
e52a2932 | 450 | spin_unlock(&d->lock); |
1da177e4 | 451 | break; |
69cf2d85 | 452 | } |
4bcce1a3 | 453 | if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) |
68e0d42f EC |
454 | goto out; |
455 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | |
456 | if (!d) | |
457 | goto out; | |
71114ec4 EC |
458 | d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC); |
459 | if (!d->targets) { | |
460 | kfree(d); | |
31279b14 | 461 | d = NULL; |
71114ec4 EC |
462 | goto out; |
463 | } | |
464 | d->ntargets = NTARGETS; | |
68e0d42f EC |
465 | INIT_WORK(&d->work, aoecmd_sleepwork); |
466 | spin_lock_init(&d->lock); | |
e9bb8fb0 | 467 | skb_queue_head_init(&d->skbpool); |
0e0cc9df | 468 | timer_setup(&d->timer, dummy_timer, 0); |
68e0d42f EC |
469 | d->timer.expires = jiffies + HZ; |
470 | add_timer(&d->timer); | |
471 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ | |
472 | d->tgt = d->targets; | |
69cf2d85 | 473 | d->ref = 1; |
64a80f5a EC |
474 | for (i = 0; i < NFACTIVE; i++) |
475 | INIT_LIST_HEAD(&d->factive[i]); | |
3a0c40d2 | 476 | INIT_LIST_HEAD(&d->rexmitq); |
68e0d42f | 477 | d->sysminor = sysminor; |
0c966214 EC |
478 | d->aoemajor = maj; |
479 | d->aoeminor = min; | |
3a0c40d2 EC |
480 | d->rttavg = RTTAVG_INIT; |
481 | d->rttdev = RTTDEV_INIT; | |
68e0d42f EC |
482 | d->next = devlist; |
483 | devlist = d; | |
484 | out: | |
3ae1c24e | 485 | spin_unlock_irqrestore(&devlist_lock, flags); |
1da177e4 LT |
486 | return d; |
487 | } | |
488 | ||
489 | static void | |
9bb237b6 | 490 | freetgt(struct aoedev *d, struct aoetgt *t) |
1da177e4 | 491 | { |
896831f5 EC |
492 | struct frame *f; |
493 | struct list_head *pos, *nx, *head; | |
1b86fda9 EC |
494 | struct aoeif *ifp; |
495 | ||
496 | for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { | |
497 | if (!ifp->nd) | |
498 | break; | |
499 | dev_put(ifp->nd); | |
500 | } | |
e407a7f6 | 501 | |
896831f5 EC |
502 | head = &t->ffree; |
503 | list_for_each_safe(pos, nx, head) { | |
504 | list_del(pos); | |
505 | f = list_entry(pos, struct frame, head); | |
9bb237b6 | 506 | skbfree(f->skb); |
896831f5 EC |
507 | kfree(f); |
508 | } | |
68e0d42f EC |
509 | kfree(t); |
510 | } | |
511 | ||
1da177e4 LT |
512 | void |
513 | aoedev_exit(void) | |
514 | { | |
e52a2932 | 515 | flush_scheduled_work(); |
e52a2932 | 516 | flush(NULL, 0, EXITING); |
1da177e4 LT |
517 | } |
518 | ||
519 | int __init | |
520 | aoedev_init(void) | |
521 | { | |
1da177e4 LT |
522 | return 0; |
523 | } |