]>
Commit | Line | Data |
---|---|---|
fea05a26 | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
1da177e4 LT |
2 | /* |
3 | * aoedev.c | |
4 | * AoE device utility functions; maintains device list. | |
5 | */ | |
6 | ||
7 | #include <linux/hdreg.h> | |
8 | #include <linux/blkdev.h> | |
9 | #include <linux/netdevice.h> | |
9bb237b6 | 10 | #include <linux/delay.h> |
5a0e3ad6 | 11 | #include <linux/slab.h> |
0c966214 EC |
12 | #include <linux/bitmap.h> |
13 | #include <linux/kdev_t.h> | |
4bcce1a3 | 14 | #include <linux/moduleparam.h> |
1da177e4 LT |
15 | #include "aoe.h" |
16 | ||
262bf541 EC |
17 | static void dummy_timer(ulong); |
18 | static void aoedev_freedev(struct aoedev *); | |
9bb237b6 EC |
19 | static void freetgt(struct aoedev *d, struct aoetgt *t); |
20 | static void skbpoolfree(struct aoedev *d); | |
262bf541 | 21 | |
4bcce1a3 EC |
22 | static int aoe_dyndevs; |
23 | module_param(aoe_dyndevs, int, 0644); | |
24 | MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices."); | |
25 | ||
1da177e4 | 26 | static struct aoedev *devlist; |
476aed38 | 27 | static DEFINE_SPINLOCK(devlist_lock); |
1da177e4 | 28 | |
0c966214 EC |
29 | /* Because some systems will have one, many, or no |
30 | * - partitions, | |
31 | * - slots per shelf, | |
32 | * - or shelves, | |
33 | * we need some flexibility in the way the minor numbers | |
34 | * are allocated. So they are dynamic. | |
69cf2d85 | 35 | */ |
0c966214 EC |
36 | #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) |
37 | ||
38 | static DEFINE_SPINLOCK(used_minors_lock); | |
39 | static DECLARE_BITMAP(used_minors, N_DEVS); | |
40 | ||
41 | static int | |
4bcce1a3 | 42 | minor_get_dyn(ulong *sysminor) |
1da177e4 | 43 | { |
1da177e4 | 44 | ulong flags; |
0c966214 EC |
45 | ulong n; |
46 | int error = 0; | |
47 | ||
48 | spin_lock_irqsave(&used_minors_lock, flags); | |
49 | n = find_first_zero_bit(used_minors, N_DEVS); | |
50 | if (n < N_DEVS) | |
51 | set_bit(n, used_minors); | |
52 | else | |
53 | error = -1; | |
54 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
55 | ||
4bcce1a3 | 56 | *sysminor = n * AOE_PARTITIONS; |
0c966214 EC |
57 | return error; |
58 | } | |
1da177e4 | 59 | |
4bcce1a3 EC |
60 | static int |
61 | minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) | |
62 | { | |
63 | ulong flags; | |
64 | ulong n; | |
65 | int error = 0; | |
66 | enum { | |
67 | /* for backwards compatibility when !aoe_dyndevs, | |
68 | * a static number of supported slots per shelf */ | |
69 | NPERSHELF = 16, | |
70 | }; | |
71 | ||
72 | n = aoemaj * NPERSHELF + aoemin; | |
73 | if (aoemin >= NPERSHELF || n >= N_DEVS) { | |
74 | pr_err("aoe: %s with e%ld.%d\n", | |
75 | "cannot use static minor device numbers", | |
76 | aoemaj, aoemin); | |
77 | error = -1; | |
78 | } else { | |
79 | spin_lock_irqsave(&used_minors_lock, flags); | |
80 | if (test_bit(n, used_minors)) { | |
81 | pr_err("aoe: %s %lu\n", | |
82 | "existing device already has static minor number", | |
83 | n); | |
84 | error = -1; | |
85 | } else | |
86 | set_bit(n, used_minors); | |
87 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
88 | } | |
89 | ||
90 | *sysminor = n; | |
91 | return error; | |
92 | } | |
93 | ||
94 | static int | |
95 | minor_get(ulong *sysminor, ulong aoemaj, int aoemin) | |
96 | { | |
97 | if (aoe_dyndevs) | |
98 | return minor_get_dyn(sysminor); | |
99 | else | |
100 | return minor_get_static(sysminor, aoemaj, aoemin); | |
101 | } | |
102 | ||
0c966214 EC |
103 | static void |
104 | minor_free(ulong minor) | |
105 | { | |
106 | ulong flags; | |
1da177e4 | 107 | |
0c966214 EC |
108 | minor /= AOE_PARTITIONS; |
109 | BUG_ON(minor >= N_DEVS); | |
1da177e4 | 110 | |
0c966214 EC |
111 | spin_lock_irqsave(&used_minors_lock, flags); |
112 | BUG_ON(!test_bit(minor, used_minors)); | |
113 | clear_bit(minor, used_minors); | |
114 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
1da177e4 LT |
115 | } |
116 | ||
0c966214 EC |
117 | /* |
118 | * Users who grab a pointer to the device with aoedev_by_aoeaddr | |
119 | * automatically get a reference count and must be responsible | |
120 | * for performing a aoedev_put. With the addition of async | |
121 | * kthread processing I'm no longer confident that we can | |
122 | * guarantee consistency in the face of device flushes. | |
123 | * | |
124 | * For the time being, we only bother to add extra references for | |
125 | * frames sitting on the iocq. When the kthreads finish processing | |
126 | * these frames, they will aoedev_put the device. | |
127 | */ | |
128 | ||
69cf2d85 EC |
129 | void |
130 | aoedev_put(struct aoedev *d) | |
131 | { | |
132 | ulong flags; | |
133 | ||
134 | spin_lock_irqsave(&devlist_lock, flags); | |
135 | d->ref--; | |
136 | spin_unlock_irqrestore(&devlist_lock, flags); | |
137 | } | |
138 | ||
3ae1c24e EC |
139 | static void |
140 | dummy_timer(ulong vp) | |
141 | { | |
142 | struct aoedev *d; | |
143 | ||
144 | d = (struct aoedev *)vp; | |
145 | if (d->flags & DEVFL_TKILL) | |
146 | return; | |
147 | d->timer.expires = jiffies + HZ; | |
148 | add_timer(&d->timer); | |
149 | } | |
150 | ||
69cf2d85 EC |
151 | static void |
152 | aoe_failip(struct aoedev *d) | |
1da177e4 | 153 | { |
69cf2d85 | 154 | struct request *rq; |
1da177e4 | 155 | struct bio *bio; |
69cf2d85 EC |
156 | unsigned long n; |
157 | ||
158 | aoe_failbuf(d, d->ip.buf); | |
1da177e4 | 159 | |
69cf2d85 EC |
160 | rq = d->ip.rq; |
161 | if (rq == NULL) | |
896831f5 | 162 | return; |
69cf2d85 EC |
163 | while ((bio = d->ip.nxbio)) { |
164 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | |
165 | d->ip.nxbio = bio->bi_next; | |
166 | n = (unsigned long) rq->special; | |
167 | rq->special = (void *) --n; | |
1da177e4 | 168 | } |
69cf2d85 EC |
169 | if ((unsigned long) rq->special == 0) |
170 | aoe_end_request(d, rq, 0); | |
896831f5 EC |
171 | } |
172 | ||
173 | void | |
174 | aoedev_downdev(struct aoedev *d) | |
175 | { | |
176 | struct aoetgt *t, **tt, **te; | |
177 | struct frame *f; | |
178 | struct list_head *head, *pos, *nx; | |
69cf2d85 | 179 | struct request *rq; |
896831f5 EC |
180 | int i; |
181 | ||
69cf2d85 EC |
182 | d->flags &= ~DEVFL_UP; |
183 | ||
64a80f5a EC |
184 | /* clean out active buffers */ |
185 | for (i = 0; i < NFACTIVE; i++) { | |
186 | head = &d->factive[i]; | |
187 | list_for_each_safe(pos, nx, head) { | |
188 | f = list_entry(pos, struct frame, head); | |
189 | list_del(pos); | |
190 | if (f->buf) { | |
191 | f->buf->nframesout--; | |
192 | aoe_failbuf(d, f->buf); | |
193 | } | |
194 | aoe_freetframe(f); | |
195 | } | |
196 | } | |
197 | /* reset window dressings */ | |
896831f5 EC |
198 | tt = d->targets; |
199 | te = tt + NTARGETS; | |
200 | for (; tt < te && (t = *tt); tt++) { | |
896831f5 EC |
201 | t->maxout = t->nframes; |
202 | t->nout = 0; | |
203 | } | |
204 | ||
69cf2d85 EC |
205 | /* clean out the in-process request (if any) */ |
206 | aoe_failip(d); | |
68e0d42f | 207 | d->htgt = NULL; |
1da177e4 | 208 | |
69cf2d85 EC |
209 | /* fast fail all pending I/O */ |
210 | if (d->blkq) { | |
211 | while ((rq = blk_peek_request(d->blkq))) { | |
212 | blk_start_request(rq); | |
213 | aoe_end_request(d, rq, 1); | |
214 | } | |
1da177e4 LT |
215 | } |
216 | ||
1da177e4 | 217 | if (d->gd) |
80795aef | 218 | set_capacity(d->gd, 0); |
1da177e4 LT |
219 | } |
220 | ||
262bf541 EC |
221 | static void |
222 | aoedev_freedev(struct aoedev *d) | |
223 | { | |
224 | struct aoetgt **t, **e; | |
225 | ||
5ad21a33 | 226 | cancel_work_sync(&d->work); |
262bf541 EC |
227 | if (d->gd) { |
228 | aoedisk_rm_sysfs(d); | |
229 | del_gendisk(d->gd); | |
230 | put_disk(d->gd); | |
69cf2d85 | 231 | blk_cleanup_queue(d->blkq); |
262bf541 EC |
232 | } |
233 | t = d->targets; | |
234 | e = t + NTARGETS; | |
235 | for (; t < e && *t; t++) | |
9bb237b6 | 236 | freetgt(d, *t); |
262bf541 EC |
237 | if (d->bufpool) |
238 | mempool_destroy(d->bufpool); | |
9bb237b6 | 239 | skbpoolfree(d); |
0c966214 | 240 | minor_free(d->sysminor); |
262bf541 EC |
241 | kfree(d); |
242 | } | |
243 | ||
244 | int | |
245 | aoedev_flush(const char __user *str, size_t cnt) | |
246 | { | |
247 | ulong flags; | |
248 | struct aoedev *d, **dd; | |
249 | struct aoedev *rmd = NULL; | |
250 | char buf[16]; | |
251 | int all = 0; | |
252 | ||
253 | if (cnt >= 3) { | |
254 | if (cnt > sizeof buf) | |
255 | cnt = sizeof buf; | |
256 | if (copy_from_user(buf, str, cnt)) | |
257 | return -EFAULT; | |
258 | all = !strncmp(buf, "all", 3); | |
259 | } | |
260 | ||
262bf541 EC |
261 | spin_lock_irqsave(&devlist_lock, flags); |
262 | dd = &devlist; | |
263 | while ((d = *dd)) { | |
264 | spin_lock(&d->lock); | |
265 | if ((!all && (d->flags & DEVFL_UP)) | |
266 | || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) | |
69cf2d85 EC |
267 | || d->nopen |
268 | || d->ref) { | |
262bf541 EC |
269 | spin_unlock(&d->lock); |
270 | dd = &d->next; | |
271 | continue; | |
272 | } | |
273 | *dd = d->next; | |
274 | aoedev_downdev(d); | |
275 | d->flags |= DEVFL_TKILL; | |
276 | spin_unlock(&d->lock); | |
277 | d->next = rmd; | |
278 | rmd = d; | |
279 | } | |
280 | spin_unlock_irqrestore(&devlist_lock, flags); | |
281 | while ((d = rmd)) { | |
282 | rmd = d->next; | |
283 | del_timer_sync(&d->timer); | |
284 | aoedev_freedev(d); /* must be able to sleep */ | |
285 | } | |
286 | return 0; | |
287 | } | |
288 | ||
69cf2d85 EC |
289 | /* This has been confirmed to occur once with Tms=3*1000 due to the |
290 | * driver changing link and not processing its transmit ring. The | |
291 | * problem is hard enough to solve by returning an error that I'm | |
292 | * still punting on "solving" this. | |
293 | */ | |
9bb237b6 EC |
294 | static void |
295 | skbfree(struct sk_buff *skb) | |
296 | { | |
69cf2d85 | 297 | enum { Sms = 250, Tms = 30 * 1000}; |
9bb237b6 EC |
298 | int i = Tms / Sms; |
299 | ||
300 | if (skb == NULL) | |
301 | return; | |
302 | while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) | |
303 | msleep(Sms); | |
94873111 | 304 | if (i < 0) { |
9bb237b6 EC |
305 | printk(KERN_ERR |
306 | "aoe: %s holds ref: %s\n", | |
307 | skb->dev ? skb->dev->name : "netif", | |
308 | "cannot free skb -- memory leaked."); | |
309 | return; | |
310 | } | |
3d5b0605 | 311 | skb->truesize -= skb->data_len; |
9bb237b6 EC |
312 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; |
313 | skb_trim(skb, 0); | |
314 | dev_kfree_skb(skb); | |
315 | } | |
316 | ||
317 | static void | |
318 | skbpoolfree(struct aoedev *d) | |
319 | { | |
e9bb8fb0 | 320 | struct sk_buff *skb, *tmp; |
9bb237b6 | 321 | |
e9bb8fb0 | 322 | skb_queue_walk_safe(&d->skbpool, skb, tmp) |
9bb237b6 | 323 | skbfree(skb); |
e9bb8fb0 DM |
324 | |
325 | __skb_queue_head_init(&d->skbpool); | |
9bb237b6 EC |
326 | } |
327 | ||
0c966214 | 328 | /* find it or allocate it */ |
1da177e4 | 329 | struct aoedev * |
0c966214 | 330 | aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) |
1da177e4 LT |
331 | { |
332 | struct aoedev *d; | |
64a80f5a | 333 | int i; |
1da177e4 | 334 | ulong flags; |
0c966214 | 335 | ulong sysminor; |
1da177e4 LT |
336 | |
337 | spin_lock_irqsave(&devlist_lock, flags); | |
338 | ||
339 | for (d=devlist; d; d=d->next) | |
0c966214 | 340 | if (d->aoemajor == maj && d->aoeminor == min) { |
69cf2d85 | 341 | d->ref++; |
1da177e4 | 342 | break; |
69cf2d85 | 343 | } |
4bcce1a3 | 344 | if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) |
68e0d42f EC |
345 | goto out; |
346 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | |
347 | if (!d) | |
348 | goto out; | |
349 | INIT_WORK(&d->work, aoecmd_sleepwork); | |
350 | spin_lock_init(&d->lock); | |
e9bb8fb0 | 351 | skb_queue_head_init(&d->skbpool); |
68e0d42f EC |
352 | init_timer(&d->timer); |
353 | d->timer.data = (ulong) d; | |
354 | d->timer.function = dummy_timer; | |
355 | d->timer.expires = jiffies + HZ; | |
356 | add_timer(&d->timer); | |
357 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ | |
358 | d->tgt = d->targets; | |
69cf2d85 | 359 | d->ref = 1; |
64a80f5a EC |
360 | for (i = 0; i < NFACTIVE; i++) |
361 | INIT_LIST_HEAD(&d->factive[i]); | |
68e0d42f | 362 | d->sysminor = sysminor; |
0c966214 EC |
363 | d->aoemajor = maj; |
364 | d->aoeminor = min; | |
68e0d42f EC |
365 | d->mintimer = MINTIMER; |
366 | d->next = devlist; | |
367 | devlist = d; | |
368 | out: | |
3ae1c24e | 369 | spin_unlock_irqrestore(&devlist_lock, flags); |
1da177e4 LT |
370 | return d; |
371 | } | |
372 | ||
373 | static void | |
9bb237b6 | 374 | freetgt(struct aoedev *d, struct aoetgt *t) |
1da177e4 | 375 | { |
896831f5 EC |
376 | struct frame *f; |
377 | struct list_head *pos, *nx, *head; | |
1b86fda9 EC |
378 | struct aoeif *ifp; |
379 | ||
380 | for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { | |
381 | if (!ifp->nd) | |
382 | break; | |
383 | dev_put(ifp->nd); | |
384 | } | |
e407a7f6 | 385 | |
896831f5 EC |
386 | head = &t->ffree; |
387 | list_for_each_safe(pos, nx, head) { | |
388 | list_del(pos); | |
389 | f = list_entry(pos, struct frame, head); | |
9bb237b6 | 390 | skbfree(f->skb); |
896831f5 EC |
391 | kfree(f); |
392 | } | |
68e0d42f EC |
393 | kfree(t); |
394 | } | |
395 | ||
1da177e4 LT |
396 | void |
397 | aoedev_exit(void) | |
398 | { | |
399 | struct aoedev *d; | |
400 | ulong flags; | |
401 | ||
69cf2d85 | 402 | aoe_flush_iocq(); |
1da177e4 LT |
403 | while ((d = devlist)) { |
404 | devlist = d->next; | |
405 | ||
406 | spin_lock_irqsave(&d->lock, flags); | |
407 | aoedev_downdev(d); | |
3ae1c24e | 408 | d->flags |= DEVFL_TKILL; |
1da177e4 LT |
409 | spin_unlock_irqrestore(&d->lock, flags); |
410 | ||
411 | del_timer_sync(&d->timer); | |
412 | aoedev_freedev(d); | |
413 | } | |
414 | } | |
415 | ||
416 | int __init | |
417 | aoedev_init(void) | |
418 | { | |
1da177e4 LT |
419 | return 0; |
420 | } |