]>
Commit | Line | Data |
---|---|---|
1 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ | |
2 | /* | |
3 | * aoeblk.c | |
4 | * block device routines | |
5 | */ | |
6 | ||
7 | #include <linux/kernel.h> | |
8 | #include <linux/hdreg.h> | |
9 | #include <linux/blk-mq.h> | |
10 | #include <linux/backing-dev.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/ioctl.h> | |
13 | #include <linux/slab.h> | |
14 | #include <linux/ratelimit.h> | |
15 | #include <linux/genhd.h> | |
16 | #include <linux/netdevice.h> | |
17 | #include <linux/mutex.h> | |
18 | #include <linux/export.h> | |
19 | #include <linux/moduleparam.h> | |
20 | #include <linux/debugfs.h> | |
21 | #include <scsi/sg.h> | |
22 | #include "aoe.h" | |
23 | ||
24 | static DEFINE_MUTEX(aoeblk_mutex); | |
25 | static struct kmem_cache *buf_pool_cache; | |
26 | static struct dentry *aoe_debugfs_dir; | |
27 | ||
28 | /* GPFS needs a larger value than the default. */ | |
29 | static int aoe_maxsectors; | |
30 | module_param(aoe_maxsectors, int, 0644); | |
31 | MODULE_PARM_DESC(aoe_maxsectors, | |
32 | "When nonzero, set the maximum number of sectors per I/O request"); | |
33 | ||
34 | static ssize_t aoedisk_show_state(struct device *dev, | |
35 | struct device_attribute *attr, char *page) | |
36 | { | |
37 | struct gendisk *disk = dev_to_disk(dev); | |
38 | struct aoedev *d = disk->private_data; | |
39 | ||
40 | return snprintf(page, PAGE_SIZE, | |
41 | "%s%s\n", | |
42 | (d->flags & DEVFL_UP) ? "up" : "down", | |
43 | (d->flags & DEVFL_KICKME) ? ",kickme" : | |
44 | (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : ""); | |
45 | /* I'd rather see nopen exported so we can ditch closewait */ | |
46 | } | |
47 | static ssize_t aoedisk_show_mac(struct device *dev, | |
48 | struct device_attribute *attr, char *page) | |
49 | { | |
50 | struct gendisk *disk = dev_to_disk(dev); | |
51 | struct aoedev *d = disk->private_data; | |
52 | struct aoetgt *t = d->targets[0]; | |
53 | ||
54 | if (t == NULL) | |
55 | return snprintf(page, PAGE_SIZE, "none\n"); | |
56 | return snprintf(page, PAGE_SIZE, "%pm\n", t->addr); | |
57 | } | |
58 | static ssize_t aoedisk_show_netif(struct device *dev, | |
59 | struct device_attribute *attr, char *page) | |
60 | { | |
61 | struct gendisk *disk = dev_to_disk(dev); | |
62 | struct aoedev *d = disk->private_data; | |
63 | struct net_device *nds[8], **nd, **nnd, **ne; | |
64 | struct aoetgt **t, **te; | |
65 | struct aoeif *ifp, *e; | |
66 | char *p; | |
67 | ||
68 | memset(nds, 0, sizeof nds); | |
69 | nd = nds; | |
70 | ne = nd + ARRAY_SIZE(nds); | |
71 | t = d->targets; | |
72 | te = t + d->ntargets; | |
73 | for (; t < te && *t; t++) { | |
74 | ifp = (*t)->ifs; | |
75 | e = ifp + NAOEIFS; | |
76 | for (; ifp < e && ifp->nd; ifp++) { | |
77 | for (nnd = nds; nnd < nd; nnd++) | |
78 | if (*nnd == ifp->nd) | |
79 | break; | |
80 | if (nnd == nd && nd != ne) | |
81 | *nd++ = ifp->nd; | |
82 | } | |
83 | } | |
84 | ||
85 | ne = nd; | |
86 | nd = nds; | |
87 | if (*nd == NULL) | |
88 | return snprintf(page, PAGE_SIZE, "none\n"); | |
89 | for (p = page; nd < ne; nd++) | |
90 | p += snprintf(p, PAGE_SIZE - (p-page), "%s%s", | |
91 | p == page ? "" : ",", (*nd)->name); | |
92 | p += snprintf(p, PAGE_SIZE - (p-page), "\n"); | |
93 | return p-page; | |
94 | } | |
95 | /* firmware version */ | |
96 | static ssize_t aoedisk_show_fwver(struct device *dev, | |
97 | struct device_attribute *attr, char *page) | |
98 | { | |
99 | struct gendisk *disk = dev_to_disk(dev); | |
100 | struct aoedev *d = disk->private_data; | |
101 | ||
102 | return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver); | |
103 | } | |
104 | static ssize_t aoedisk_show_payload(struct device *dev, | |
105 | struct device_attribute *attr, char *page) | |
106 | { | |
107 | struct gendisk *disk = dev_to_disk(dev); | |
108 | struct aoedev *d = disk->private_data; | |
109 | ||
110 | return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt); | |
111 | } | |
112 | ||
113 | static int aoedisk_debugfs_show(struct seq_file *s, void *ignored) | |
114 | { | |
115 | struct aoedev *d; | |
116 | struct aoetgt **t, **te; | |
117 | struct aoeif *ifp, *ife; | |
118 | unsigned long flags; | |
119 | char c; | |
120 | ||
121 | d = s->private; | |
122 | seq_printf(s, "rttavg: %d rttdev: %d\n", | |
123 | d->rttavg >> RTTSCALE, | |
124 | d->rttdev >> RTTDSCALE); | |
125 | seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool)); | |
126 | seq_printf(s, "kicked: %ld\n", d->kicked); | |
127 | seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt); | |
128 | seq_printf(s, "ref: %ld\n", d->ref); | |
129 | ||
130 | spin_lock_irqsave(&d->lock, flags); | |
131 | t = d->targets; | |
132 | te = t + d->ntargets; | |
133 | for (; t < te && *t; t++) { | |
134 | c = '\t'; | |
135 | seq_printf(s, "falloc: %ld\n", (*t)->falloc); | |
136 | seq_printf(s, "ffree: %p\n", | |
137 | list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next); | |
138 | seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout, | |
139 | (*t)->maxout, (*t)->nframes); | |
140 | seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh); | |
141 | seq_printf(s, "\ttaint:%d\n", (*t)->taint); | |
142 | seq_printf(s, "\tr:%d\n", (*t)->rpkts); | |
143 | seq_printf(s, "\tw:%d\n", (*t)->wpkts); | |
144 | ifp = (*t)->ifs; | |
145 | ife = ifp + ARRAY_SIZE((*t)->ifs); | |
146 | for (; ifp->nd && ifp < ife; ifp++) { | |
147 | seq_printf(s, "%c%s", c, ifp->nd->name); | |
148 | c = ','; | |
149 | } | |
150 | seq_puts(s, "\n"); | |
151 | } | |
152 | spin_unlock_irqrestore(&d->lock, flags); | |
153 | ||
154 | return 0; | |
155 | } | |
156 | ||
157 | static int aoe_debugfs_open(struct inode *inode, struct file *file) | |
158 | { | |
159 | return single_open(file, aoedisk_debugfs_show, inode->i_private); | |
160 | } | |
161 | ||
162 | static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL); | |
163 | static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL); | |
164 | static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL); | |
165 | static struct device_attribute dev_attr_firmware_version = { | |
166 | .attr = { .name = "firmware-version", .mode = 0444 }, | |
167 | .show = aoedisk_show_fwver, | |
168 | }; | |
169 | static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL); | |
170 | ||
171 | static struct attribute *aoe_attrs[] = { | |
172 | &dev_attr_state.attr, | |
173 | &dev_attr_mac.attr, | |
174 | &dev_attr_netif.attr, | |
175 | &dev_attr_firmware_version.attr, | |
176 | &dev_attr_payload.attr, | |
177 | NULL, | |
178 | }; | |
179 | ||
180 | static const struct attribute_group aoe_attr_group = { | |
181 | .attrs = aoe_attrs, | |
182 | }; | |
183 | ||
184 | static const struct attribute_group *aoe_attr_groups[] = { | |
185 | &aoe_attr_group, | |
186 | NULL, | |
187 | }; | |
188 | ||
189 | static const struct file_operations aoe_debugfs_fops = { | |
190 | .open = aoe_debugfs_open, | |
191 | .read = seq_read, | |
192 | .llseek = seq_lseek, | |
193 | .release = single_release, | |
194 | }; | |
195 | ||
196 | static void | |
197 | aoedisk_add_debugfs(struct aoedev *d) | |
198 | { | |
199 | char *p; | |
200 | ||
201 | if (aoe_debugfs_dir == NULL) | |
202 | return; | |
203 | p = strchr(d->gd->disk_name, '/'); | |
204 | if (p == NULL) | |
205 | p = d->gd->disk_name; | |
206 | else | |
207 | p++; | |
208 | BUG_ON(*p == '\0'); | |
209 | d->debugfs = debugfs_create_file(p, 0444, aoe_debugfs_dir, d, | |
210 | &aoe_debugfs_fops); | |
211 | } | |
212 | void | |
213 | aoedisk_rm_debugfs(struct aoedev *d) | |
214 | { | |
215 | debugfs_remove(d->debugfs); | |
216 | d->debugfs = NULL; | |
217 | } | |
218 | ||
219 | static int | |
220 | aoeblk_open(struct block_device *bdev, fmode_t mode) | |
221 | { | |
222 | struct aoedev *d = bdev->bd_disk->private_data; | |
223 | ulong flags; | |
224 | ||
225 | if (!virt_addr_valid(d)) { | |
226 | pr_crit("aoe: invalid device pointer in %s\n", | |
227 | __func__); | |
228 | WARN_ON(1); | |
229 | return -ENODEV; | |
230 | } | |
231 | if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL) | |
232 | return -ENODEV; | |
233 | ||
234 | mutex_lock(&aoeblk_mutex); | |
235 | spin_lock_irqsave(&d->lock, flags); | |
236 | if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) { | |
237 | d->nopen++; | |
238 | spin_unlock_irqrestore(&d->lock, flags); | |
239 | mutex_unlock(&aoeblk_mutex); | |
240 | return 0; | |
241 | } | |
242 | spin_unlock_irqrestore(&d->lock, flags); | |
243 | mutex_unlock(&aoeblk_mutex); | |
244 | return -ENODEV; | |
245 | } | |
246 | ||
247 | static void | |
248 | aoeblk_release(struct gendisk *disk, fmode_t mode) | |
249 | { | |
250 | struct aoedev *d = disk->private_data; | |
251 | ulong flags; | |
252 | ||
253 | spin_lock_irqsave(&d->lock, flags); | |
254 | ||
255 | if (--d->nopen == 0) { | |
256 | spin_unlock_irqrestore(&d->lock, flags); | |
257 | aoecmd_cfg(d->aoemajor, d->aoeminor); | |
258 | return; | |
259 | } | |
260 | spin_unlock_irqrestore(&d->lock, flags); | |
261 | } | |
262 | ||
263 | static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx, | |
264 | const struct blk_mq_queue_data *bd) | |
265 | { | |
266 | struct aoedev *d = hctx->queue->queuedata; | |
267 | ||
268 | spin_lock_irq(&d->lock); | |
269 | ||
270 | if ((d->flags & DEVFL_UP) == 0) { | |
271 | pr_info_ratelimited("aoe: device %ld.%d is not up\n", | |
272 | d->aoemajor, d->aoeminor); | |
273 | spin_unlock_irq(&d->lock); | |
274 | blk_mq_start_request(bd->rq); | |
275 | return BLK_STS_IOERR; | |
276 | } | |
277 | ||
278 | list_add_tail(&bd->rq->queuelist, &d->rq_list); | |
279 | aoecmd_work(d); | |
280 | spin_unlock_irq(&d->lock); | |
281 | return BLK_STS_OK; | |
282 | } | |
283 | ||
284 | static int | |
285 | aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) | |
286 | { | |
287 | struct aoedev *d = bdev->bd_disk->private_data; | |
288 | ||
289 | if ((d->flags & DEVFL_UP) == 0) { | |
290 | printk(KERN_ERR "aoe: disk not up\n"); | |
291 | return -ENODEV; | |
292 | } | |
293 | ||
294 | geo->cylinders = d->geo.cylinders; | |
295 | geo->heads = d->geo.heads; | |
296 | geo->sectors = d->geo.sectors; | |
297 | return 0; | |
298 | } | |
299 | ||
300 | static int | |
301 | aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg) | |
302 | { | |
303 | struct aoedev *d; | |
304 | ||
305 | if (!arg) | |
306 | return -EINVAL; | |
307 | ||
308 | d = bdev->bd_disk->private_data; | |
309 | if ((d->flags & DEVFL_UP) == 0) { | |
310 | pr_err("aoe: disk not up\n"); | |
311 | return -ENODEV; | |
312 | } | |
313 | ||
314 | if (cmd == HDIO_GET_IDENTITY) { | |
315 | if (!copy_to_user((void __user *) arg, &d->ident, | |
316 | sizeof(d->ident))) | |
317 | return 0; | |
318 | return -EFAULT; | |
319 | } | |
320 | ||
321 | /* udev calls scsi_id, which uses SG_IO, resulting in noise */ | |
322 | if (cmd != SG_IO) | |
323 | pr_info("aoe: unknown ioctl 0x%x\n", cmd); | |
324 | ||
325 | return -ENOTTY; | |
326 | } | |
327 | ||
328 | static const struct block_device_operations aoe_bdops = { | |
329 | .open = aoeblk_open, | |
330 | .release = aoeblk_release, | |
331 | .ioctl = aoeblk_ioctl, | |
332 | .getgeo = aoeblk_getgeo, | |
333 | .owner = THIS_MODULE, | |
334 | }; | |
335 | ||
336 | static const struct blk_mq_ops aoeblk_mq_ops = { | |
337 | .queue_rq = aoeblk_queue_rq, | |
338 | }; | |
339 | ||
340 | /* alloc_disk and add_disk can sleep */ | |
341 | void | |
342 | aoeblk_gdalloc(void *vp) | |
343 | { | |
344 | struct aoedev *d = vp; | |
345 | struct gendisk *gd; | |
346 | mempool_t *mp; | |
347 | struct request_queue *q; | |
348 | struct blk_mq_tag_set *set; | |
349 | enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; | |
350 | ulong flags; | |
351 | int late = 0; | |
352 | int err; | |
353 | ||
354 | spin_lock_irqsave(&d->lock, flags); | |
355 | if (d->flags & DEVFL_GDALLOC | |
356 | && !(d->flags & DEVFL_TKILL) | |
357 | && !(d->flags & DEVFL_GD_NOW)) | |
358 | d->flags |= DEVFL_GD_NOW; | |
359 | else | |
360 | late = 1; | |
361 | spin_unlock_irqrestore(&d->lock, flags); | |
362 | if (late) | |
363 | return; | |
364 | ||
365 | gd = alloc_disk(AOE_PARTITIONS); | |
366 | if (gd == NULL) { | |
367 | pr_err("aoe: cannot allocate disk structure for %ld.%d\n", | |
368 | d->aoemajor, d->aoeminor); | |
369 | goto err; | |
370 | } | |
371 | ||
372 | mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab, | |
373 | buf_pool_cache); | |
374 | if (mp == NULL) { | |
375 | printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", | |
376 | d->aoemajor, d->aoeminor); | |
377 | goto err_disk; | |
378 | } | |
379 | ||
380 | set = &d->tag_set; | |
381 | set->ops = &aoeblk_mq_ops; | |
382 | set->cmd_size = sizeof(struct aoe_req); | |
383 | set->nr_hw_queues = 1; | |
384 | set->queue_depth = 128; | |
385 | set->numa_node = NUMA_NO_NODE; | |
386 | set->flags = BLK_MQ_F_SHOULD_MERGE; | |
387 | err = blk_mq_alloc_tag_set(set); | |
388 | if (err) { | |
389 | pr_err("aoe: cannot allocate tag set for %ld.%d\n", | |
390 | d->aoemajor, d->aoeminor); | |
391 | goto err_mempool; | |
392 | } | |
393 | ||
394 | q = blk_mq_init_queue(set); | |
395 | if (IS_ERR(q)) { | |
396 | pr_err("aoe: cannot allocate block queue for %ld.%d\n", | |
397 | d->aoemajor, d->aoeminor); | |
398 | blk_mq_free_tag_set(set); | |
399 | goto err_mempool; | |
400 | } | |
401 | ||
402 | spin_lock_irqsave(&d->lock, flags); | |
403 | WARN_ON(!(d->flags & DEVFL_GD_NOW)); | |
404 | WARN_ON(!(d->flags & DEVFL_GDALLOC)); | |
405 | WARN_ON(d->flags & DEVFL_TKILL); | |
406 | WARN_ON(d->gd); | |
407 | WARN_ON(d->flags & DEVFL_UP); | |
408 | blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); | |
409 | q->backing_dev_info->name = "aoe"; | |
410 | q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE; | |
411 | d->bufpool = mp; | |
412 | d->blkq = gd->queue = q; | |
413 | q->queuedata = d; | |
414 | d->gd = gd; | |
415 | if (aoe_maxsectors) | |
416 | blk_queue_max_hw_sectors(q, aoe_maxsectors); | |
417 | gd->major = AOE_MAJOR; | |
418 | gd->first_minor = d->sysminor; | |
419 | gd->fops = &aoe_bdops; | |
420 | gd->private_data = d; | |
421 | set_capacity(gd, d->ssize); | |
422 | snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", | |
423 | d->aoemajor, d->aoeminor); | |
424 | ||
425 | d->flags &= ~DEVFL_GDALLOC; | |
426 | d->flags |= DEVFL_UP; | |
427 | ||
428 | spin_unlock_irqrestore(&d->lock, flags); | |
429 | ||
430 | device_add_disk(NULL, gd, aoe_attr_groups); | |
431 | aoedisk_add_debugfs(d); | |
432 | ||
433 | spin_lock_irqsave(&d->lock, flags); | |
434 | WARN_ON(!(d->flags & DEVFL_GD_NOW)); | |
435 | d->flags &= ~DEVFL_GD_NOW; | |
436 | spin_unlock_irqrestore(&d->lock, flags); | |
437 | return; | |
438 | ||
439 | err_mempool: | |
440 | mempool_destroy(mp); | |
441 | err_disk: | |
442 | put_disk(gd); | |
443 | err: | |
444 | spin_lock_irqsave(&d->lock, flags); | |
445 | d->flags &= ~DEVFL_GD_NOW; | |
446 | schedule_work(&d->work); | |
447 | spin_unlock_irqrestore(&d->lock, flags); | |
448 | } | |
449 | ||
450 | void | |
451 | aoeblk_exit(void) | |
452 | { | |
453 | debugfs_remove_recursive(aoe_debugfs_dir); | |
454 | aoe_debugfs_dir = NULL; | |
455 | kmem_cache_destroy(buf_pool_cache); | |
456 | } | |
457 | ||
458 | int __init | |
459 | aoeblk_init(void) | |
460 | { | |
461 | buf_pool_cache = kmem_cache_create("aoe_bufs", | |
462 | sizeof(struct buf), | |
463 | 0, 0, NULL); | |
464 | if (buf_pool_cache == NULL) | |
465 | return -ENOMEM; | |
466 | aoe_debugfs_dir = debugfs_create_dir("aoe", NULL); | |
467 | return 0; | |
468 | } | |
469 |